pax_global_header00006660000000000000000000000064146605634320014523gustar00rootroot0000000000000052 comment=759855dd52717c62d035d45885a6fb6b357328b1 ewah_bool_utils-1.2.2/000077500000000000000000000000001466056343200147045ustar00rootroot00000000000000ewah_bool_utils-1.2.2/.github/000077500000000000000000000000001466056343200162445ustar00rootroot00000000000000ewah_bool_utils-1.2.2/.github/dependabot.yml000066400000000000000000000002551466056343200210760ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: github-actions directory: /.github/workflows schedule: interval: monthly groups: actions: patterns: - '*' ewah_bool_utils-1.2.2/.github/workflows/000077500000000000000000000000001466056343200203015ustar00rootroot00000000000000ewah_bool_utils-1.2.2/.github/workflows/bleeding-edge.yaml000066400000000000000000000041441466056343200236430ustar00rootroot00000000000000name: CI (bleeding edge) on: push: branches: - main pull_request: paths: - .github/workflows/bleeding-edge.yaml schedule: # run this every Wednesday at 3 am UTC - cron: 0 3 * * 3 workflow_dispatch: jobs: build: runs-on: ubuntu-latest name: tests with bleeding-edge crucial deps timeout-minutes: 60 concurrency: # auto-cancel any in-progress job *on the same branch* group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true steps: - name: Checkout repo uses: actions/checkout@v4 - name: Set up Python (newest testable version) uses: actions/setup-python@v5 with: python-version: 3.13-dev - uses: yezz123/setup-uv@v4 with: uv-version: 0.2.33 uv-venv: .venv - name: Install dependencies run: | uv pip install --upgrade setuptools wheel uv pip install --pre --no-build numpy \ --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple uv pip install --pre Cython - name: Build # --no-build-isolation is used to guarantee that build time dependencies # are not installed by pip as specified from pyproject.toml, hence we get # to use the dev version of numpy at build time. run: | uv pip install -e . --no-build-isolation uv pip install --upgrade --pre pytest - run: uv pip list - name: Run Tests run: | pytest -vvv --color=yes create-issue: if: ${{ failure() && github.event_name == 'schedule' }} needs: [build] permissions: issues: write runs-on: ubuntu-latest name: Create issue on failure steps: - name: Create issue on failure uses: imjohnbo/issue-bot@v3 with: title: 'TST: Upcoming dependency test failures' body: | The weekly build with future dependencies has failed. Check the logs https://github.com/${{github.repository}}/actions/runs/${{github.run_id}} pinned: false close-previous: false env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ewah_bool_utils-1.2.2/.github/workflows/build-test.yaml000066400000000000000000000026441466056343200232470ustar00rootroot00000000000000name: Build and Test on: push: branches: - main pull_request: workflow_dispatch: defaults: run: shell: bash jobs: build: strategy: # run all tests even if e.g. image tests fail early fail-fast: false matrix: os: - ubuntu-latest python-version: - '3.9' - '3.10' - '3.11' - '3.12' # Test all on ubuntu, test ends on macos and windows include: - os: macos-latest python-version: '3.9' - os: windows-latest python-version: '3.9' - os: macos-latest python-version: '3.12' - os: windows-latest python-version: '3.12' - os: ubuntu-20.04 python-version: '3.9' deps: minimal runs-on: ${{ matrix.os }} steps: - name: Checkout repo uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - uses: yezz123/setup-uv@v4 with: uv-version: 0.2.33 uv-venv: .venv - run: uv pip install --requirement test_requirements.txt - if: matrix.deps == 'minimal' run: | uv pip compile pyproject.toml --resolution=lowest-direct | uv pip install -r - - name: Build run: uv pip install --editable "." - run: uv pip list - name: Run Tests run: pytest --color=yes ewah_bool_utils-1.2.2/.github/workflows/wheels.yaml000066400000000000000000000054071466056343200224620ustar00rootroot00000000000000name: Build wheels and publish to PyPI on: push: branches: - main tags: - v* pull_request: paths: - .github/workflows/wheels.yaml workflow_dispatch: jobs: build_wheels: name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: os: - ubuntu-latest - windows-latest - macos-13 # x86_64 - macos-14 # arm64 fail-fast: false steps: - name: Checkout repo uses: actions/checkout@v4 - uses: yezz123/setup-uv@v4 with: uv-version: 0.2.33 uv-venv: .venv - name: Build wheels for CPython uses: pypa/cibuildwheel@v2.20.0 with: output-dir: dist env: CIBW_TEST_COMMAND: > uv pip install -r {project}/test_requirements.txt && pytest --pyargs ewah_bool_utils --color=yes - uses: actions/upload-artifact@v4 with: name: wheels-${{ matrix.os }} path: ./dist/*.whl build_sdist: name: Build source distribution runs-on: ubuntu-latest steps: - name: Checkout repo uses: actions/checkout@v4 - name: Build sdist run: pipx run build --sdist - name: Upload sdist uses: actions/upload-artifact@v4 with: name: sdist path: dist/*.tar.gz check_manifest: name: Check MANIFEST.in runs-on: ubuntu-latest steps: - name: Checkout repo uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: 3.x - uses: yezz123/setup-uv@v4 with: uv-version: 0.2.33 uv-venv: .venv - name: install check-manifest run: uv pip install check-manifest - name: Install build time dependencies shell: bash run: | uv pip install "Cython>=3.0" uv pip install numpy>=2.0.0 uv pip install --upgrade wheel uv pip install --upgrade setuptools - name: Build shell: bash run: uv pip install --no-build-isolation . - name: run check-manifest run: check-manifest -vvv deploy: name: Publish to PyPI needs: [build_wheels, build_sdist, check_manifest] runs-on: ubuntu-latest # upload to PyPI on every tag starting with 'v' if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') steps: - name: Download sdist uses: actions/download-artifact@v4 with: name: sdist path: dist - name: Download wheels uses: actions/download-artifact@v4 with: path: dist pattern: wheels-* merge-multiple: true - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@v1.9.0 with: user: __token__ password: ${{ secrets.pypi_token }} ewah_bool_utils-1.2.2/.gitignore000066400000000000000000000024751466056343200167040ustar00rootroot00000000000000# Cython generated files ewah_bool_utils/ewah_bool_wrap.cpp ewah_bool_utils/morton_utils.c ewah_bool_utils/_testing.cpp temp.linux-x86_64-3.6 # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: #*.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # dotenv .env # virtualenv .venv venv/ ENV/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ # IDE settings .vscode/ ewah_bool_utils-1.2.2/.pre-commit-config.yaml000066400000000000000000000022051466056343200211640ustar00rootroot00000000000000# pre-commit 1.1.0 is required for `exclude` # however `minimum_pre_commit_version` itself requires 1.15.0 minimum_pre_commit_version: 1.15.0 exclude: ^(docs/conf.py|ewah_bool_utils/__init__.py) ci: autofix_prs: false autoupdate_schedule: quarterly repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.6.0 hooks: - id: no-commit-to-branch - id: debug-statements - id: check-merge-conflict - id: trailing-whitespace - id: end-of-file-fixer - id: check-toml - id: check-shebang-scripts-are-executable - id: check-executables-have-shebangs - repo: https://github.com/psf/black-pre-commit-mirror rev: 24.4.2 hooks: - id: black - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.5.0 hooks: - id: ruff args: [--fix] - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.10.0 hooks: - id: rst-backticks - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks rev: v2.13.0 hooks: - id: pretty-format-yaml args: [--autofix] - repo: https://github.com/MarcoGorelli/cython-lint rev: v0.16.2 hooks: - id: cython-lint args: [--no-pycodestyle] ewah_bool_utils-1.2.2/AUTHORS.rst000066400000000000000000000003641466056343200165660ustar00rootroot00000000000000======= Credits ======= Development Lead ---------------- * Matthew Turk * Meagan Lang * Navaneeth Suresh Contributors ------------ None yet. Why not be the first? ewah_bool_utils-1.2.2/CONTRIBUTING.rst000066400000000000000000000067161466056343200173570ustar00rootroot00000000000000.. highlight:: shell ============ Contributing ============ Contributions are welcome, and they are greatly appreciated! Every little bit helps, and credit will always be given. You can contribute in many ways: Types of Contributions ---------------------- Report Bugs ~~~~~~~~~~~ Report bugs at https://github.com/yt-project/ewah_bool_utils/issues. If you are reporting a bug, please include: * Your operating system name and version. * Any details about your local setup that might be helpful in troubleshooting. * Detailed steps to reproduce the bug. Fix Bugs ~~~~~~~~ Look through the GitHub issues for bugs. Anything tagged with "bug" and "help wanted" is open to whoever wants to implement it. Implement Features ~~~~~~~~~~~~~~~~~~ Look through the GitHub issues for features. Anything tagged with "enhancement" and "help wanted" is open to whoever wants to implement it. Write Documentation ~~~~~~~~~~~~~~~~~~~ EWAH Bool Utils could always use more documentation, whether as part of the official EWAH Bool Utils docs, in docstrings, or even on the web in blog posts, articles, and such. Submit Feedback ~~~~~~~~~~~~~~~ The best way to send feedback is to file an issue at https://github.com/yt-project/ewah_bool_utils/issues. If you are proposing a feature: * Explain in detail how it would work. * Keep the scope as narrow as possible, to make it easier to implement. * Remember that this is a volunteer-driven project, and that contributions are welcome :) Get Started! ------------ Ready to contribute? Here's how to set up ``ewah_bool_utils`` for local development. 1. Fork the ``ewah_bool_utils`` repo on GitHub. 2. Clone your fork locally:: $ git clone git@github.com:your_name_here/ewah_bool_utils.git 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: $ mkvirtualenv ewah_bool_utils $ cd ewah_bool_utils/ $ python setup.py develop 4. Create a branch for local development:: $ git checkout -b name-of-your-bugfix-or-feature Now you can make your changes locally. 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: $ flake8 ewah_bool_utils tests $ python setup.py test or pytest $ tox To get flake8 and tox, just pip install them into your virtualenv. 6. Commit your changes and push your branch to GitHub:: $ git add . $ git commit -m "Your detailed description of your changes." $ git push origin name-of-your-bugfix-or-feature 7. Submit a pull request through the GitHub website. Pull Request Guidelines ----------------------- Before you submit a pull request, check that it meets these guidelines: 1. The pull request should include tests. 2. If the pull request adds functionality, the docs should be updated. Put your new functionality into a function with a docstring, and add the feature to the list in README.rst. 3. Check https://travis-ci.com/yt-project/ewah_bool_utils/pull_requests and make sure that the tests pass for all supported Python versions. Tips ---- To run a subset of tests:: $ pytest tests.test_ewah_bool_utils Deploying --------- A reminder for the maintainers on how to deploy. Make sure all your changes are committed (including an entry in HISTORY.rst). Then run:: $ bump2version patch # possible: major / minor / patch $ git push $ git push --tags Travis will then deploy to PyPI if tests pass. ewah_bool_utils-1.2.2/HISTORY.rst000066400000000000000000000030511466056343200165760ustar00rootroot00000000000000======= History ======= 1.0.2 (2022-12-22) ------------------ * BLD: lower runtime requirement for numpy from 1.17.5 to 1.17.3 by @neutrinoceros in https://github.com/yt-project/ewah_bool_utils/pull/21 1.0.1 (2022-12-22) ------------------ * BUG: Fix build on i386 by @neutrinoceros in https://github.com/yt-project/ewah_bool_utils/pull/15 * BUG: fix downstream linking by @neutrinoceros in https://github.com/yt-project/ewah_bool_utils/pull/16 * ENH: add a get_include() function to facilitate downstream integration by @neutrinoceros in https://github.com/yt-project/ewah_bool_utils/pull/18 1.0.0 (2022-12-22) ------------------ * Update EWAH from upstream lemire/EWAHBoolArray by @themousepotato in https://github.com/yt-project/ewah_bool_utils/pull/3 * TST: revive CI by @neutrinoceros in https://github.com/yt-project/ewah_bool_utils/pull/6 * MNT: bootstrap pre-commit by @neutrinoceros in https://github.com/yt-project/ewah_bool_utils/pull/5 * TST: expand test matrix by @neutrinoceros in https://github.com/yt-project/ewah_bool_utils/pull/10 * BLD: explicitly require wheel as a buildtime dependency by @neutrinoceros in https://github.com/yt-project/ewah_bool_utils/pull/11 * TST: finalize testing on MacOS by @neutrinoceros in https://github.com/yt-project/ewah_bool_utils/pull/12 * MNT: update code from yt by @neutrinoceros in https://github.com/yt-project/ewah_bool_utils/pull/4 * REL: add wheels+publish workflow by @neutrinoceros in https://github.com/yt-project/ewah_bool_utils/pull/13 0.1.0 (2020-06-08) ------------------ * First release on PyPI. ewah_bool_utils-1.2.2/LICENSE000066400000000000000000000030001466056343200157020ustar00rootroot00000000000000 BSD License Copyright (c) 2020, Matthew Turk, Meagan Lang, Navaneeth Suresh All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ewah_bool_utils-1.2.2/MANIFEST.in000066400000000000000000000011261466056343200164420ustar00rootroot00000000000000include AUTHORS.rst include CONTRIBUTING.rst include HISTORY.rst include LICENSE include README.rst include *.py include *.txt include ewah_bool_utils/cpp/*.h include ewah_bool_utils/cpp/LICENSE include ewah_bool_utils/cpp/README recursive-include ewah_bool_utils *.pxd recursive-include ewah_bool_utils *.pyx recursive-include ewah_bool_utils/tests * exclude ewah_bool_utils/_testing.cpp exclude ewah_bool_utils/ewah_bool_wrap.cpp exclude ewah_bool_utils/morton_utils.c recursive-include docs *.rst conf.py Makefile make.bat exclude .pre-commit-config.yaml recursive-exclude */__pycache__ * ewah_bool_utils-1.2.2/README.rst000066400000000000000000000011661466056343200163770ustar00rootroot00000000000000=============== EWAH Bool Utils =============== .. image:: https://img.shields.io/pypi/v/ewah_bool_utils.svg :target: https://pypi.python.org/pypi/ewah_bool_utils EWAH Bool Array utils for yt Free software: BSD license Features -------- * EWAH Bool Array compression stores integer arrays efficient in memory. * Can be used for indexing arrays. Credits ------- This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. .. _Cookiecutter: https://github.com/audreyr/cookiecutter .. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage ewah_bool_utils-1.2.2/docs/000077500000000000000000000000001466056343200156345ustar00rootroot00000000000000ewah_bool_utils-1.2.2/docs/Makefile000066400000000000000000000011501466056343200172710ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = python -msphinx SPHINXPROJ = ewah_bool_utils SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ewah_bool_utils-1.2.2/docs/authors.rst000066400000000000000000000000341466056343200200500ustar00rootroot00000000000000.. include:: ../AUTHORS.rst ewah_bool_utils-1.2.2/docs/conf.py000077500000000000000000000115461466056343200171450ustar00rootroot00000000000000#!/usr/bin/env python # # ewah_bool_utils documentation build configuration file, created by # sphinx-quickstart on Fri Jun 9 13:47:02 2017. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another # directory, add these directories to sys.path here. If the directory is # relative to the documentation root, use os.path.abspath to make it # absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath("..")) import ewah_bool_utils # -- General configuration --------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = ".rst" # The master toctree document. master_doc = "index" # General information about the project. project = "EWAH Bool Utils" copyright = "2020, Matthew Turk, Meagan Lang, Navaneeth Suresh" author = "Matthew Turk, Meagan Lang, Navaneeth Suresh" # The version info for the project you're documenting, acts as replacement # for |version| and |release|, also used in various other places throughout # the built documents. # # The short X.Y version. version = "1.2" # The full version, including alpha/beta/rc tags. release = "1.2.2" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Options for HTML output ------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = "alabaster" # Theme options are theme-specific and customize the look and feel of a # theme further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] # -- Options for HTMLHelp output --------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = "ewah_bool_utilsdoc" # -- Options for LaTeX output ------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass # [howto, manual, or own class]). latex_documents = [ ( master_doc, "ewah_bool_utils.tex", "EWAH Bool Utils Documentation", "Navaneeth Suresh", "manual", ), ] # -- Options for manual page output ------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, "ewah_bool_utils", "EWAH Bool Utils Documentation", [author], 1) ] # -- Options for Texinfo output ---------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( master_doc, "ewah_bool_utils", "EWAH Bool Utils Documentation", author, "ewah_bool_utils", "One line description of project.", "Miscellaneous", ), ] ewah_bool_utils-1.2.2/docs/contributing.rst000066400000000000000000000000411466056343200210700ustar00rootroot00000000000000.. include:: ../CONTRIBUTING.rst ewah_bool_utils-1.2.2/docs/history.rst000066400000000000000000000000341466056343200200640ustar00rootroot00000000000000.. include:: ../HISTORY.rst ewah_bool_utils-1.2.2/docs/index.rst000066400000000000000000000004701466056343200174760ustar00rootroot00000000000000Welcome to EWAH Bool Utils's documentation! ====================================== .. toctree:: :maxdepth: 2 :caption: Contents: readme installation usage modules contributing authors history Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ewah_bool_utils-1.2.2/docs/installation.rst000066400000000000000000000022421466056343200210670ustar00rootroot00000000000000.. highlight:: shell ============ Installation ============ Stable release -------------- To install EWAH Bool Utils, run this command in your terminal: .. code-block:: console $ pip install ewah_bool_utils This is the preferred method to install EWAH Bool Utils, as it will always install the most recent stable release. If you don't have `pip`_ installed, this `Python installation guide`_ can guide you through the process. .. _pip: https://pip.pypa.io .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ From sources ------------ The sources for EWAH Bool Utils can be downloaded from the `Github repo`_. You can either clone the public repository: .. code-block:: console $ git clone git://github.com/yt-project/ewah_bool_utils Or download the `tarball`_: .. code-block:: console $ curl -OJL https://github.com/yt-project/ewah_bool_utils/tarball/master Once you have a copy of the source, you can install it with: .. code-block:: console $ python setup.py install .. _Github repo: https://github.com/yt-project/ewah_bool_utils .. _tarball: https://github.com/yt-project/ewah_bool_utils/tarball/master ewah_bool_utils-1.2.2/docs/make.bat000066400000000000000000000014111466056343200172360ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=python -msphinx ) set SOURCEDIR=. set BUILDDIR=_build set SPHINXPROJ=ewah_bool_utils if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The Sphinx module was not found. Make sure you have Sphinx installed, echo.then set the SPHINXBUILD environment variable to point to the full echo.path of the 'sphinx-build' executable. Alternatively you may add the echo.Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd ewah_bool_utils-1.2.2/docs/readme.rst000066400000000000000000000000331466056343200176170ustar00rootroot00000000000000.. include:: ../README.rst ewah_bool_utils-1.2.2/docs/usage.rst000066400000000000000000000015171466056343200174760ustar00rootroot00000000000000===== Usage ===== Set from an integer numpy array:: from ewah_bool_utils.ewah_bool_array cimport ewah_bool_array cdef ewah_bool_array *ewah_array ewah_array = new ewah_bool_array() for i2 in range(numpy_array.shape[0]): i1 = numpy_array[i2] ewah_array[0].set(i1) Unset an EWAH array:: from libcpp.vector cimport vector from ewah_bool_utils.ewah_bool_array cimport ewah_bool_array import numpy as np cdef vector[size_t] vec vec = ewah_array[0].toArray() numpy_array = np.array(vec, 'uint64') Find the number of collisions between two EWAH arrays:: from ewah_bool_utils.ewah_bool_array cimport ewah_bool_array cdef ewah_bool_array ewah_array_keys cdef int ncoll ewah_array2[0].logicaland(ewah_array1[0], ewah_array_keys[0]) ncoll = ewah_array_keys[0].numberOfOnes() ewah_bool_utils-1.2.2/ewah_bool_utils/000077500000000000000000000000001466056343200200635ustar00rootroot00000000000000ewah_bool_utils-1.2.2/ewah_bool_utils/__init__.py000066400000000000000000000004231466056343200221730ustar00rootroot00000000000000"""Top-level package for EWAH Bool Utils.""" from ewah_bool_utils.ewah_bool_wrap import * def get_include(): """ Returns the directory that contains ewah*.h headers """ import os return os.path.abspath(os.path.join(os.path.dirname(__file__), "cpp")) ewah_bool_utils-1.2.2/ewah_bool_utils/_testing.pyx000066400000000000000000000061701466056343200224450ustar00rootroot00000000000000# Cython interface for ewah_bool_utils tests cimport cython cimport numpy as np from libcpp.vector cimport vector from .ewah_bool_array cimport ewah_bool_array import numpy as np cdef np.uint64_t FLAG = ~(0) ctypedef fused dtype_t: np.float32_t np.float64_t ctypedef fused int_t: np.int32_t np.int64_t cdef class Index: cdef void* ewah_array def __cinit__(self): cdef ewah_bool_array *ewah_array = new ewah_bool_array() self.ewah_array = ewah_array cdef bint _get(self, np.uint64_t i1): cdef ewah_bool_array *ewah_array = self.ewah_array return ewah_array[0].get(i1) def get(self, i1): return self._get(i1) cdef void _set(self, np.uint64_t i1): cdef ewah_bool_array *ewah_array = self.ewah_array ewah_array[0].set(i1) def set(self, i1): return self._set(i1) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) def select(self, dtype_t[:] np_array): cdef ewah_bool_array *ewah_array = self.ewah_array cdef int i, j cdef np.ndarray[dtype_t, ndim=1] out out = np.empty(ewah_array[0].numberOfOnes()) j = 0 for i in range(ewah_array[0].sizeInBits()): if ewah_array[0].get(i): out[j] = np_array[i] j += 1 return out @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) def set_from(self, int_t[:] ids): cdef ewah_bool_array *ewah_array = self.ewah_array cdef np.uint64_t i cdef np.uint64_t last = 0 for i in range(ids.shape[0]): if ids[i] < last: raise RuntimeError self._set(ids[i]) print('Set from %s array and ended up with %s bytes' % ( ids.size, ewah_array[0].sizeInBytes())) cpdef np.uint64_t[:] ewah_set_and_unset(int_t[:] arr): cdef ewah_bool_array *ewah_arr cdef vector[size_t] vec ewah_arr = new ewah_bool_array() for i2 in range(arr.shape[0]): i1 = arr[i2] ewah_arr[0].set(i1) vec = ewah_arr[0].toArray() np_arr = np.array(vec, 'uint64') return np_arr cpdef int find_ewah_collisions(int_t[:] arr1, int_t[:] arr2): cdef ewah_bool_array *ewah_arr1 cdef ewah_bool_array *ewah_arr2 cdef ewah_bool_array *ewah_arr_keys cdef int ncoll ewah_arr1 = new ewah_bool_array() ewah_arr2 = new ewah_bool_array() ewah_arr_keys = new ewah_bool_array() for i2 in range(arr1.shape[0]): i1 = arr1[i2] ewah_arr1[0].set(i1) for j2 in range(arr2.shape[0]): j1 = arr2[j2] ewah_arr2[0].set(j1) ewah_arr2[0].logicaland(ewah_arr1[0], ewah_arr_keys[0]) ncoll = ewah_arr_keys[0].numberOfOnes() return ncoll cpdef dtype_t[:] make_and_select_from_ewah_index(dtype_t[:] arr, int_t[:] np_idx): cdef Index idx = Index() idx.set_from(np_idx) out_arr = idx.select(arr) return out_arr ewah_bool_utils-1.2.2/ewah_bool_utils/cpp/000077500000000000000000000000001466056343200206455ustar00rootroot00000000000000ewah_bool_utils-1.2.2/ewah_bool_utils/cpp/LICENSE000066400000000000000000000240411466056343200216530ustar00rootroot00000000000000Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: You must give any other recipients of the Work or Derivative Works a copy of this License; and You must cause any modified files to carry prominent notices stating that You changed the files; and You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ewah_bool_utils-1.2.2/ewah_bool_utils/cpp/README000066400000000000000000000003731466056343200215300ustar00rootroot00000000000000This code was bundled from the EWAHBoolArray project by Daniel Lemire, available at: https://github.com/lemire/EWAHBoolArray Currently this is at revision 88b25a3345b82353ccd97a7de6064e6c179a7cc2 This code is available under the Apache2.0 license. ewah_bool_utils-1.2.2/ewah_bool_utils/cpp/boolarray.h000066400000000000000000000323421466056343200230140ustar00rootroot00000000000000/** * This code is released under the * Apache License Version 2.0 http://www.apache.org/licenses/. * * (c) Daniel Lemire, http://lemire.me/en/ */ #ifndef BOOLARRAY_H #define BOOLARRAY_H #include #include #include // mostly for Microsoft compilers #include #include #include #include #include "ewahutil.h" namespace ewah { /** * A dynamic bitset implementation. (without compression). */ template class BoolArray { public: BoolArray(const size_t n, const uword initval = 0) : buffer(n / wordinbits + (n % wordinbits == 0 ? 0 : 1), initval), sizeinbits(n) {} BoolArray() : buffer(), sizeinbits(0) {} BoolArray(const BoolArray &ba) : buffer(ba.buffer), sizeinbits(ba.sizeinbits) {} static BoolArray bitmapOf(size_t n, ...) { BoolArray ans; va_list vl; va_start(vl, n); for (size_t i = 0; i < n; i++) { ans.set(static_cast(va_arg(vl, int))); } va_end(vl); return ans; } size_t sizeInBytes() const { return buffer.size() * sizeof(uword); } void read(std::istream &in) { sizeinbits = 0; in.read(reinterpret_cast(&sizeinbits), sizeof(sizeinbits)); buffer.resize(sizeinbits / wordinbits + (sizeinbits % wordinbits == 0 ? 0 : 1)); if (buffer.size() == 0) return; in.read(reinterpret_cast(&buffer[0]), static_cast(buffer.size() * sizeof(uword))); } void readBuffer(std::istream &in, const size_t size) { buffer.resize(size); sizeinbits = size * sizeof(uword) * 8; if (buffer.empty()) return; in.read(reinterpret_cast(&buffer[0]), buffer.size() * sizeof(uword)); } void setSizeInBits(const size_t sizeib) { sizeinbits = sizeib; } void write(std::ostream &out) { write(out, sizeinbits); } void write(std::ostream &out, const size_t numberofbits) const { const size_t size = numberofbits / wordinbits + (numberofbits % wordinbits == 0 ? 0 : 1); out.write(reinterpret_cast(&numberofbits), sizeof(numberofbits)); if (numberofbits == 0) return; out.write(reinterpret_cast(&buffer[0]), static_cast(size * sizeof(uword))); } void writeBuffer(std::ostream &out, const size_t numberofbits) const { const size_t size = numberofbits / wordinbits + (numberofbits % wordinbits == 0 ? 0 : 1); if (size == 0) return; #ifdef EWAHASSERT assert(buffer.size() >= size); #endif out.write(reinterpret_cast(&buffer[0]), size * sizeof(uword)); } size_t sizeOnDisk() const { size_t size = sizeinbits / wordinbits + (sizeinbits % wordinbits == 0 ? 0 : 1); return sizeof(sizeinbits) + size * sizeof(uword); } BoolArray &operator=(const BoolArray &x) { this->buffer = x.buffer; this->sizeinbits = x.sizeinbits; return *this; } bool operator==(const BoolArray &x) const { if (sizeinbits != x.sizeinbits) return false; for (size_t k = 0; k < buffer.size(); ++k) if (buffer[k] != x.buffer[k]) return false; return true; } bool operator!=(const BoolArray &x) const { return !operator==(x); } void setWord(const size_t pos, const uword val) { #ifdef EWAHASSERT assert(pos < buffer.size()); #endif buffer[pos] = val; } void addWord(const uword val) { if (sizeinbits % wordinbits != 0) throw std::invalid_argument("you probably didn't want to do this"); sizeinbits += wordinbits; buffer.push_back(val); } uword getWord(const size_t pos) const { #ifdef EWAHASSERT assert(pos < buffer.size()); #endif return buffer[pos]; } /** * set to true (whether it was already set to true or not) */ void set(const size_t pos) { if (pos >= sizeinbits) padWithZeroes(pos + 1); buffer[pos / wordinbits] |= static_cast((static_cast(1) << (pos % wordinbits))); } /** * set to false (whether it was already set to false or not) * */ void unset(const size_t pos) { if (pos < sizeinbits) buffer[pos / wordinbits] &= ~(static_cast(1) << (pos % wordinbits)); } /** * true of false? (set or unset) */ bool get(const size_t pos) const { #ifdef EWAHASSERT assert(pos / wordinbits < buffer.size()); #endif return (buffer[pos / wordinbits] & (static_cast(1) << (pos % wordinbits))) != 0; } /** * set all bits to 0 */ void reset() { if (buffer.size() > 0) memset(&buffer[0], 0, sizeof(uword) * buffer.size()); sizeinbits = 0; } size_t sizeInBits() const { return sizeinbits; } ~BoolArray() {} /** * Computes the logical and and writes to the provided BoolArray (out). * The current bitmaps is unchanged. */ void logicaland(const BoolArray &ba, BoolArray &out) const { if (ba.buffer.size() < buffer.size()) out.setToSize(ba); else out.setToSize(*this); for (size_t i = 0; i < out.buffer.size(); ++i) out.buffer[i] = buffer[i] & ba.buffer[i]; } /** * Computes the logical and and return the result. * The current bitmaps is unchanged. */ BoolArray logicaland(const BoolArray &a) const { BoolArray answer; logicaland(a, answer); return answer; } void inplace_logicaland(const BoolArray &ba) { if (ba.buffer.size() < buffer.size()) setToSize(ba); for (size_t i = 0; i < buffer.size(); ++i) buffer[i] = buffer[i] & ba.buffer[i]; } /** * Computes the logical andnot and writes to the provided BoolArray (out). * The current bitmaps is unchanged. */ void logicalandnot(const BoolArray &ba, BoolArray &out) const { out.setToSize(*this); size_t upto = out.buffer.size() < ba.buffer.size() ? out.buffer.size() : ba.buffer.size(); for (size_t i = 0; i < upto; ++i) out.buffer[i] = static_cast(buffer[i] & (~ba.buffer[i])); for (size_t i = upto; i < out.buffer.size(); ++i) out.buffer[i] = buffer[i]; out.clearBogusBits(); } /** * Computes the logical andnot and return the result. * The current bitmaps is unchanged. */ BoolArray logicalandnot(const BoolArray &a) const { BoolArray answer; logicalandnot(a, answer); return answer; } void inplace_logicalandnot(const BoolArray &ba) { size_t upto = buffer.size() < ba.buffer.size() ? buffer.size() : ba.buffer.size(); for (size_t i = 0; i < upto; ++i) buffer[i] = buffer[i] & (~ba.buffer[i]); clearBogusBits(); } /** * Computes the logical or and writes to the provided BoolArray (out). * The current bitmaps is unchanged. */ void logicalor(const BoolArray &ba, BoolArray &out) const { const BoolArray *smallest; const BoolArray *largest; if (ba.buffer.size() > buffer.size()) { smallest = this; largest = &ba; out.setToSize(ba); } else { smallest = &ba; largest = this; out.setToSize(*this); } for (size_t i = 0; i < smallest->buffer.size(); ++i) out.buffer[i] = buffer[i] | ba.buffer[i]; for (size_t i = smallest->buffer.size(); i < largest->buffer.size(); ++i) out.buffer[i] = largest->buffer[i]; } /** * Computes the logical or and return the result. * The current bitmaps is unchanged. */ BoolArray logicalor(const BoolArray &a) const { BoolArray answer; logicalor(a, answer); return answer; } void inplace_logicalor(const BoolArray &ba) { logicalor(ba, *this); } /** * Computes the logical xor and writes to the provided BoolArray (out). * The current bitmaps is unchanged. */ void logicalxor(const BoolArray &ba, BoolArray &out) const { const BoolArray *smallest; const BoolArray *largest; if (ba.buffer.size() > buffer.size()) { smallest = this; largest = &ba; out.setToSize(ba); } else { smallest = &ba; largest = this; out.setToSize(*this); } for (size_t i = 0; i < smallest->buffer.size(); ++i) out.buffer[i] = buffer[i] ^ ba.buffer[i]; for (size_t i = smallest->buffer.size(); i < largest->buffer.size(); ++i) out.buffer[i] = largest->buffer[i]; } /** * Computes the logical xor and return the result. * The current bitmaps is unchanged. */ BoolArray logicalxor(const BoolArray &a) const { BoolArray answer; logicalxor(a, answer); return answer; } void inplace_logicalxor(const BoolArray &ba) { logicalxor(ba, *this); } /** * Computes the logical not and writes to the provided BoolArray (out). * The current bitmaps is unchanged. */ void logicalnot(BoolArray &out) const { out.setToSize(*this); for (size_t i = 0; i < buffer.size(); ++i) out.buffer[i] = ~buffer[i]; out.clearBogusBits(); } /** * Computes the logical not and return the result. * The current bitmaps is unchanged. */ BoolArray logicalandnot() const { BoolArray answer; logicalnot(answer); return answer; } void inplace_logicalnot() { for (size_t i = 0; i < buffer.size(); ++i) buffer[i] = ~buffer[i]; clearBogusBits(); } /** * Returns the number of bits set to the value 1. * The running time complexity is proportional to the * size of the bitmap. * * This is sometimes called the cardinality. */ size_t numberOfOnes() const { size_t count = 0; for (size_t i = 0; i < buffer.size(); ++i) { count += countOnes((UWORD)buffer[i]); } return count; } inline void printout(std::ostream &o = std::cout) { for (size_t k = 0; k < sizeinbits; ++k) o << get(k) << " "; o << std::endl; } /** * Make sure the two bitmaps have the same size (padding with zeroes * if necessary). It has constant running time complexity. */ void makeSameSize(BoolArray &a) { if (a.sizeinbits < sizeinbits) a.padWithZeroes(sizeinbits); else if (sizeinbits < a.sizeinbits) padWithZeroes(a.sizeinbits); } /** * Make sure the current bitmap has the size of the provided bitmap. */ void setToSize(const BoolArray &a) { sizeinbits = a.sizeinbits; buffer.resize(a.buffer.size()); } /** * make sure the size of the array is totalbits bits by padding with zeroes. * returns the number of words added (storage cost increase) */ size_t padWithZeroes(const size_t totalbits) { size_t currentwordsize = (sizeinbits + wordinbits - 1) / wordinbits; size_t neededwordsize = (totalbits + wordinbits - 1) / wordinbits; #ifdef EWAHASSERT assert(neededwordsize >= currentwordsize); #endif buffer.resize(neededwordsize); sizeinbits = totalbits; return static_cast(neededwordsize - currentwordsize); } void append(const BoolArray &a); enum { wordinbits = sizeof(uword) * 8 }; std::vector toArray() const { std::vector ans; for (size_t k = 0; k < buffer.size(); ++k) { uword myword = buffer[k]; while (myword != 0) { uint32_t ntz = numberOfTrailingZeros(myword); ans.push_back(sizeof(uword) * 8 * k + ntz); myword ^= (static_cast(1) << ntz); } } return ans; } /** * Transform into a string that presents a list of set bits. * The running time is linear in the size of the bitmap. */ operator std::string() const { std::stringstream ss; ss << *this; return ss.str(); } friend std::ostream &operator<<(std::ostream &out, const BoolArray &a) { std::vector v = a.toArray(); out << "{"; for (std::vector::const_iterator i = v.begin(); i != v.end();) { out << *i; ++i; if (i != v.end()) out << ","; } out << "}"; return out; return (out << static_cast(a)); } private: void clearBogusBits() { if ((sizeinbits % wordinbits) != 0) { const uword maskbogus = static_cast((static_cast(1) << (sizeinbits % wordinbits)) - 1); buffer[buffer.size() - 1] &= maskbogus; } } std::vector buffer; size_t sizeinbits; }; /** * computes the logical or (union) between "n" bitmaps (referenced by a * pointer). * The answer gets written out in container. This might be faster than calling * logicalor n-1 times. */ template void fast_logicalor_tocontainer(size_t n, const BoolArray **inputs, BoolArray &container) { if (n == 0) { container.reset(); return; } container = *inputs[0]; for (size_t i = 0; i < n; i++) { container.inplace_logicalor(*inputs[i]); } } /** * computes the logical or (union) between "n" bitmaps (referenced by a * pointer). * Returns the answer. This might be faster than calling * logicalor n-1 times. */ template BoolArray fast_logicalor(size_t n, const BoolArray **inputs) { BoolArray answer; fast_logicalor_tocontainer(n, inputs, answer); return answer; } template void BoolArray::append(const BoolArray &a) { if (sizeinbits % wordinbits == 0) { buffer.insert(buffer.end(), a.buffer.begin(), a.buffer.end()); } else { throw std::invalid_argument( "Cannot append if parent does not meet boundary"); } sizeinbits += a.sizeinbits; } } // namespace ewah #endif ewah_bool_utils-1.2.2/ewah_bool_utils/cpp/ewah-inl.h000066400000000000000000001623671466056343200225410ustar00rootroot00000000000000#ifndef EWAH_INL_H #define EWAH_INL_H #include "ewah.h" namespace ewah { /** * computes the logical or (union) between "n" bitmaps (referenced by a * pointer). * The answer gets written out in container. This might be faster than calling * logicalor n-1 times. */ template void fast_logicalor_tocontainer(size_t n, const EWAHBoolArray **inputs, EWAHBoolArray &container); /** * computes the logical or (union) between "n" bitmaps (referenced by a * pointer). * Returns the answer. This might be faster than calling * logicalor n-1 times. */ template EWAHBoolArray fast_logicalor(size_t n, const EWAHBoolArray **inputs) { EWAHBoolArray answer; fast_logicalor_tocontainer(n, inputs, answer); return answer; } /** * Iterate over words of bits from a compressed bitmap. */ template class EWAHBoolArrayIterator { public: /** * is there a new word? */ bool hasNext() const { return pointer < myparent.size(); } /** * return next word. */ uword next() { uword returnvalue; if (compressedwords < rl) { ++compressedwords; if (b) returnvalue = notzero; else returnvalue = zero; } else { ++literalwords; ++pointer; returnvalue = myparent[pointer]; } if ((compressedwords == rl) && (literalwords == lw)) { ++pointer; if (pointer < myparent.size()) readNewRunningLengthWord(); } return returnvalue; } EWAHBoolArrayIterator(const EWAHBoolArrayIterator &other) : pointer(other.pointer), myparent(other.myparent), compressedwords(other.compressedwords), literalwords(other.literalwords), rl(other.rl), lw(other.lw), b(other.b) {} static const uword zero = 0; static const uword notzero = static_cast(~zero); private: EWAHBoolArrayIterator(const std::vector &parent); void readNewRunningLengthWord(); friend class EWAHBoolArray; size_t pointer; const std::vector &myparent; uword compressedwords; uword literalwords; uword rl, lw; bool b; }; /** * Used to go through the set bits. Not optimally fast, but convenient. */ template class EWAHBoolArraySetBitForwardIterator { public: typedef std::forward_iterator_tag iterator_category; typedef size_t *pointer; typedef size_t &reference_type; typedef size_t value_type; typedef ptrdiff_t difference_type; typedef EWAHBoolArraySetBitForwardIterator type_of_iterator; /** * Provides the location of the set bit. */ inline size_t operator*() const { return answer; } bool operator<(const type_of_iterator &o) const { if (!o.hasValue) return true; if (!hasValue) return false; return answer < o.answer; } bool operator<=(const type_of_iterator &o) const { if (!o.hasValue) return true; if (!hasValue) return false; return answer <= o.answer; } bool operator>(const type_of_iterator &o) const { return !((*this) <= o); } bool operator>=(const type_of_iterator &o) const { return !((*this) < o); } EWAHBoolArraySetBitForwardIterator &operator++() { //++i if (hasNext) next(); else hasValue = false; return *this; } EWAHBoolArraySetBitForwardIterator operator++(int) { // i++ EWAHBoolArraySetBitForwardIterator old(*this); if (hasNext) next(); else hasValue = false; return old; } bool operator==(const EWAHBoolArraySetBitForwardIterator &o) const { if ((!hasValue) && (!o.hasValue)) return true; return (hasValue == o.hasValue) && (answer == o.answer); } bool operator!=(const EWAHBoolArraySetBitForwardIterator &o) const { return !(*this == o); } static EWAHBoolArraySetBitForwardIterator &end() { static EWAHBoolArraySetBitForwardIterator e; return e; } EWAHBoolArraySetBitForwardIterator(const std::vector *parent, size_t startpointer = 0) : word(0), position(0), runningLength(0), literalPosition(0), wordPosition(startpointer), wordLength(0), buffer(parent), hasNext(false), hasValue(false), answer(0) { if (wordPosition < buffer->size()) { setRunningLengthWord(); hasNext = moveToNext(); if (hasNext) { next(); hasValue = true; } } } EWAHBoolArraySetBitForwardIterator() : word(0), position(0), runningLength(0), literalPosition(0), wordPosition(0), wordLength(0), buffer(NULL), hasNext(false), hasValue(false), answer(0) {} inline bool runningHasNext() const { return position < runningLength; } inline bool literalHasNext() { while (word == 0 && wordPosition < wordLength) { word = (*buffer)[wordPosition++]; literalPosition = position; position += WORD_IN_BITS; } return word != 0; } inline void setRunningLengthWord() { uword rlw = (*buffer)[wordPosition]; runningLength = (size_t)WORD_IN_BITS * RunningLengthWord::getRunningLength(rlw) + position; if (!RunningLengthWord::getRunningBit(rlw)) { position = runningLength; } wordPosition++; // point to first literal word wordLength = static_cast(wordPosition + RunningLengthWord::getNumberOfLiteralWords(rlw)); } inline bool moveToNext() { while (!runningHasNext() && !literalHasNext()) { if (wordPosition >= buffer->size()) { return false; } setRunningLengthWord(); } return true; } void next() { // update answer if (runningHasNext()) { answer = position++; if (runningHasNext()) return; } else { uword t = static_cast(word & (~word + 1)); answer = literalPosition + countOnes((UWORD)(t - 1)); word ^= t; } hasNext = moveToNext(); } enum { WORD_IN_BITS = sizeof(uword) * 8 }; uword word; // lit word size_t position; size_t runningLength; size_t literalPosition; size_t wordPosition; // points to word in buffer uword wordLength; const std::vector *buffer; bool hasNext; bool hasValue; size_t answer; }; /** * This object is returned by the compressed bitmap as a * statistical descriptor. */ class BitmapStatistics { public: BitmapStatistics() : totalliteral(0), totalcompressed(0), runningwordmarker(0), maximumofrunningcounterreached(0) {} size_t getCompressedSize() const { return totalliteral + runningwordmarker; } size_t getUncompressedSize() const { return totalliteral + totalcompressed; } size_t getNumberOfDirtyWords() const { return totalliteral; } size_t getNumberOfCleanWords() const { return totalcompressed; } size_t getNumberOfMarkers() const { return runningwordmarker; } size_t getOverRuns() const { return maximumofrunningcounterreached; } size_t totalliteral; size_t totalcompressed; size_t runningwordmarker; size_t maximumofrunningcounterreached; }; template bool EWAHBoolArray::set(size_t i) { if (i < sizeinbits) return false; const size_t dist = (i + wordinbits) / wordinbits - (sizeinbits + wordinbits - 1) / wordinbits; sizeinbits = i + 1; if (dist > 0) { // easy if (dist > 1) { fastaddStreamOfEmptyWords(false, dist - 1); } addLiteralWord( static_cast(static_cast(1) << (i % wordinbits))); return true; } RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); if (lastRunningLengthWord.getNumberOfLiteralWords() == 0) { lastRunningLengthWord.setRunningLength( static_cast(lastRunningLengthWord.getRunningLength() - 1)); addLiteralWord( static_cast(static_cast(1) << (i % wordinbits))); return true; } buffer[buffer.size() - 1] |= static_cast(static_cast(1) << (i % wordinbits)); // check if we just completed a stream of 1s if (buffer[buffer.size() - 1] == static_cast(~0)) { // we remove the last dirty word buffer[buffer.size() - 1] = 0; buffer.resize(buffer.size() - 1); lastRunningLengthWord.setNumberOfLiteralWords(static_cast( lastRunningLengthWord.getNumberOfLiteralWords() - 1)); // next we add one clean word addEmptyWord(true); } return true; } template void EWAHBoolArray::inplace_logicalnot() { size_t pointer(0), lastrlw(0); while (pointer < buffer.size()) { RunningLengthWord rlw(buffer[pointer]); lastrlw = pointer; // we save this up if (rlw.getRunningBit()) rlw.setRunningBit(false); else rlw.setRunningBit(true); ++pointer; for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { buffer[pointer] = static_cast(~buffer[pointer]); ++pointer; } } if (sizeinbits % wordinbits != 0) { RunningLengthWord rlw(buffer[lastrlw]); const uword maskbogus = static_cast((static_cast(1) << (sizeinbits % wordinbits)) - 1); if (rlw.getNumberOfLiteralWords() > 0) { // easy case buffer[lastrlw + 1 + rlw.getNumberOfLiteralWords() - 1] &= maskbogus; } else { rlw.setRunningLength(rlw.getRunningLength() - 1); addLiteralWord(maskbogus); } } } template size_t EWAHBoolArray::numberOfWords() const { size_t tot(0); size_t pointer(0); while (pointer < buffer.size()) { ConstRunningLengthWord rlw(buffer[pointer]); tot += rlw.size(); pointer += 1 + rlw.getNumberOfLiteralWords(); } return tot; } template void EWAHBoolArray::assertWordCount(std::string message) const { #ifdef EWAHASSERT size_t tot = numberOfWords(); size_t expected = (sizeinbits + wordinbits - 1) / wordinbits; if (expected != tot) { std::cerr << "[assertWordCount] wordinbits " << wordinbits << std::endl; std::cerr << "[assertWordCount] sizeinbits " << sizeinbits << std::endl; std::cerr << "[assertWordCount] " << message << std::endl; std::cerr << "[assertWordCount] number of words " << tot << std::endl; std::cerr << "[assertWordCount] expected number of words " << expected << std::endl; debugprintout(); throw std::runtime_error("bug"); } #endif } template void EWAHBoolArray::correctWordCount() { size_t tot = numberOfWords(); size_t expected = (sizeinbits + wordinbits - 1) / wordinbits; if (expected != tot) { if (tot < expected) { fastaddStreamOfEmptyWords(false, expected - tot); } else { RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); lastRunningLengthWord.setRunningLength(static_cast( lastRunningLengthWord.getRunningLength() + expected - tot)); } } } template size_t EWAHBoolArray::numberOfOnes() const { size_t tot(0); size_t pointer(0); while (pointer < buffer.size()) { ConstRunningLengthWord rlw(buffer[pointer]); if (rlw.getRunningBit()) { tot += static_cast(rlw.getRunningLength() * wordinbits); } ++pointer; for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { tot += countOnes((UWORD)buffer[pointer]); ++pointer; } } return tot; } template std::vector EWAHBoolArray::toArray() const { std::vector ans; size_t pos(0); size_t pointer(0); const size_t buffersize = buffer.size(); while (pointer < buffersize) { ConstRunningLengthWord rlw(buffer[pointer]); const size_t productofrl = static_cast(rlw.getRunningLength() * wordinbits); if (rlw.getRunningBit()) { size_t upper_limit = pos + productofrl; for (; pos < upper_limit; ++pos) { ans.push_back(pos); } } else { pos += productofrl; } ++pointer; const size_t rlwlw = rlw.getNumberOfLiteralWords(); for (size_t k = 0; k < rlwlw; ++k) { uword myword = buffer[pointer]; while (myword != 0) { uint64_t t = myword & (~myword + 1); uint32_t r = numberOfTrailingZeros(t); ans.push_back(pos + r); myword ^= t; } pos += wordinbits; ++pointer; } } return ans; } template void EWAHBoolArray::logicalnot(EWAHBoolArray &x) const { x.reset(); x.buffer.reserve(buffer.size()); EWAHBoolArrayRawIterator i = this->raw_iterator(); if (!i.hasNext()) return; // nothing to do while (true) { BufferedRunningLengthWord &rlw = i.next(); if (i.hasNext()) { if (rlw.getRunningLength() > 0) x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), rlw.getRunningLength()); if (rlw.getNumberOfLiteralWords() > 0) { const uword *dw = i.dirtyWords(); for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { x.addLiteralWord(~dw[k]); } } } else { if (rlw.getNumberOfLiteralWords() == 0) { if ((this->sizeinbits % wordinbits != 0) && !rlw.getRunningBit()) { if (rlw.getRunningLength() > 1) x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), rlw.getRunningLength() - 1); const uword maskbogus = static_cast((static_cast(1) << (this->sizeinbits % wordinbits)) - 1); x.addLiteralWord(maskbogus); break; } else { if (rlw.getRunningLength() > 0) x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), rlw.getRunningLength()); break; } } if (rlw.getRunningLength() > 0) x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), rlw.getRunningLength()); const uword *dw = i.dirtyWords(); for (size_t k = 0; k + 1 < rlw.getNumberOfLiteralWords(); ++k) { x.addLiteralWord(~dw[k]); } const uword maskbogus = (this->sizeinbits % wordinbits != 0) ? static_cast((static_cast(1) << (this->sizeinbits % wordinbits)) - 1) : ~static_cast(0); x.addLiteralWord(static_cast((~dw[rlw.getNumberOfLiteralWords() - 1]) & maskbogus)); break; } } x.sizeinbits = this->sizeinbits; } template size_t EWAHBoolArray::addWord(const uword newdata, const uint32_t bitsthatmatter) { sizeinbits += bitsthatmatter; if (newdata == 0) { return addEmptyWord(0); } else if (newdata == static_cast(~0)) { return addEmptyWord(1); } else { return addLiteralWord(newdata); } } template inline void EWAHBoolArray::writeBuffer(std::ostream &out) const { if (!buffer.empty()) out.write(reinterpret_cast(&buffer[0]), sizeof(uword) * buffer.size()); } template inline void EWAHBoolArray::readBuffer(std::istream &in, const size_t buffersize) { buffer.resize(buffersize); if (buffersize > 0) in.read(reinterpret_cast(&buffer[0]), sizeof(uword) * buffersize); } template size_t EWAHBoolArray::write(std::ostream &out, const bool savesizeinbits) const { size_t written = 0; if (savesizeinbits) { uint64_t sb = static_cast(sizeinbits); out.write(reinterpret_cast(&sb), sizeof(sb)); written += sizeof(uint64_t); } const size_t buffersize = buffer.size(); uint64_t bs = static_cast(buffersize); out.write(reinterpret_cast(&bs), sizeof(bs)); written += sizeof(uint64_t); if (buffersize > 0) { out.write(reinterpret_cast(&buffer[0]), static_cast(sizeof(uword) * buffersize)); written += sizeof(uword) * buffersize; } return written; } template size_t EWAHBoolArray::write(char *out, size_t capacity, const bool savesizeinbits) const { size_t written = 0; if (savesizeinbits) { uint64_t sb = static_cast(sizeinbits); if (capacity < sizeof(sb)) return 0; capacity -= sizeof(sb); memcpy(out, &sb, sizeof(sb)); out += sizeof(sb); written += sizeof(uint64_t); } const size_t buffersize = buffer.size(); uint64_t bs = static_cast(buffersize); if (capacity < sizeof(bs)) return 0; capacity -= sizeof(bs); memcpy(out, &buffersize, sizeof(bs)); out += sizeof(bs); written += sizeof(uint64_t); if (buffersize > 0) { if (capacity < sizeof(uword) * buffersize) return 0; memcpy(out, &buffer[0], sizeof(uword) * buffersize); written += sizeof(uword) * buffersize; } return written; } template size_t EWAHBoolArray::read(std::istream &in, const bool savesizeinbits) { size_t read = 0; if (savesizeinbits) { uint64_t tmp; in.read(reinterpret_cast(&tmp), sizeof(tmp)); read += sizeof(tmp); sizeinbits = static_cast(tmp); } else { sizeinbits = 0; } size_t buffersize(0); uint64_t tmp; in.read(reinterpret_cast(&tmp), sizeof(tmp)); read += sizeof(tmp); buffersize = static_cast(tmp); buffer.resize(buffersize); if (buffersize > 0) { in.read(reinterpret_cast(&buffer[0]), static_cast(sizeof(uword) * buffersize)); read += sizeof(uword) * buffersize; } return read; } template size_t EWAHBoolArray::read(const char *in, size_t capacity, const bool savesizeinbits) { size_t read = 0; if (savesizeinbits) { uint64_t tmp; if (capacity < sizeof(tmp)) return 0; capacity -= sizeof(tmp); memcpy(reinterpret_cast(&tmp), in, sizeof(tmp)); read += sizeof(tmp); in += sizeof(tmp); sizeinbits = static_cast(tmp); } else { sizeinbits = 0; } size_t buffersize(0); uint64_t tmp; if (capacity < sizeof(uint64_t)) return 0; capacity -= sizeof(uint64_t); memcpy(reinterpret_cast(&tmp), in, sizeof(uint64_t)); in += sizeof(uint64_t); read += sizeof(uint64_t); buffersize = static_cast(tmp); buffer.resize(buffersize); if (buffersize > 0) { if (capacity < sizeof(uword) * buffersize) return 0; memcpy(&buffer[0], in, sizeof(uword) * buffersize); read += sizeof(uword) * buffersize; } return read; } template size_t EWAHBoolArray::addLiteralWord(const uword newdata) { RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); uword numbersofar = lastRunningLengthWord.getNumberOfLiteralWords(); if (numbersofar >= RunningLengthWord::largestliteralcount) { // 0x7FFF) { buffer.push_back(0); lastRLW = buffer.size() - 1; RunningLengthWord lastRunningLengthWord2(buffer[lastRLW]); lastRunningLengthWord2.setNumberOfLiteralWords(1); buffer.push_back(newdata); return 2; } lastRunningLengthWord.setNumberOfLiteralWords( static_cast(numbersofar + 1)); buffer.push_back(newdata); return 1; } template size_t EWAHBoolArray::padWithZeroes(const size_t totalbits) { size_t wordsadded = 0; if (totalbits <= sizeinbits) return wordsadded; size_t missingbits = totalbits - sizeinbits; RunningLengthWord rlw(buffer[lastRLW]); if (rlw.getNumberOfLiteralWords() > 0) { // Consume trailing zeroes of trailing literal word (past sizeinbits) size_t remain = sizeinbits % wordinbits; if (remain > 0) // Is last word partial? { size_t avail = wordinbits - remain; if (avail > 0) { if (missingbits > avail) { missingbits -= avail; } else { missingbits = 0; } sizeinbits += avail; } } } if (missingbits > 0) { size_t wordstoadd = missingbits / wordinbits; if ((missingbits % wordinbits) != 0) ++wordstoadd; wordsadded = addStreamOfEmptyWords(false, wordstoadd); } sizeinbits = totalbits; return wordsadded; } /** * This is a low-level iterator. */ template class EWAHBoolArrayRawIterator { public: EWAHBoolArrayRawIterator(const EWAHBoolArray &p) : pointer(0), myparent(&p.getBuffer()), rlw((*myparent)[pointer], this) {} EWAHBoolArrayRawIterator(const EWAHBoolArrayRawIterator &o) : pointer(o.pointer), myparent(o.myparent), rlw(o.rlw) {} bool hasNext() const { return pointer < myparent->size(); } BufferedRunningLengthWord &next() { rlw.read((*myparent)[pointer]); pointer = static_cast(pointer + rlw.getNumberOfLiteralWords() + 1); return rlw; } const uword *dirtyWords() const { return myparent->data() + static_cast(pointer - rlw.getNumberOfLiteralWords()); } EWAHBoolArrayRawIterator &operator=(const EWAHBoolArrayRawIterator &other) { pointer = other.pointer; myparent = other.myparent; rlw = other.rlw; return *this; } size_t pointer; const std::vector *myparent; BufferedRunningLengthWord rlw; EWAHBoolArrayRawIterator(); }; template EWAHBoolArrayIterator EWAHBoolArray::uncompress() const { return EWAHBoolArrayIterator(buffer); } template EWAHBoolArrayRawIterator EWAHBoolArray::raw_iterator() const { return EWAHBoolArrayRawIterator(*this); } template bool EWAHBoolArray::operator==(const EWAHBoolArray &x) const { EWAHBoolArrayRawIterator i = x.raw_iterator(); EWAHBoolArrayRawIterator j = raw_iterator(); if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... return (i.hasNext() == false) && (j.hasNext() == false); } // at this point, this should be safe: BufferedRunningLengthWord &rlwi = i.next(); BufferedRunningLengthWord &rlwj = j.next(); while ((rlwi.size() > 0) && (rlwj.size() > 0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); BufferedRunningLengthWord &prey = i_is_prey ? rlwi : rlwj; BufferedRunningLengthWord &predator = i_is_prey ? rlwj : rlwi; size_t index = 0; const bool nonzero = ((!predator.getRunningBit()) ? prey.nonzero_discharge(predator.getRunningLength(), index) : prey.nonzero_dischargeNegated(predator.getRunningLength(), index)); if (nonzero) { return false; } if (predator.getRunningLength() - index > 0) { if (predator.getRunningBit()) { return false; } } predator.discardRunningWordsWithReload(); } const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (size_t k = 0; k < nbre_literal; ++k) if ((rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)) != 0) return false; rlwi.discardLiteralWordsWithReload(nbre_literal); rlwj.discardLiteralWordsWithReload(nbre_literal); } } const bool i_remains = rlwi.size() > 0; BufferedRunningLengthWord &remaining = i_remains ? rlwi : rlwj; return !remaining.nonzero_discharge(); } template void EWAHBoolArray::swap(EWAHBoolArray &x) { buffer.swap(x.buffer); size_t tmp = x.sizeinbits; x.sizeinbits = sizeinbits; sizeinbits = tmp; tmp = x.lastRLW; x.lastRLW = lastRLW; lastRLW = tmp; } template void EWAHBoolArray::append(const EWAHBoolArray &x) { if (sizeinbits % wordinbits == 0) { // hoping for the best? sizeinbits += x.sizeinbits; ConstRunningLengthWord lRLW(buffer[lastRLW]); if ((lRLW.getRunningLength() == 0) && (lRLW.getNumberOfLiteralWords() == 0)) { // it could be that the running length word is empty, in such a case, // we want to get rid of it! lastRLW = x.lastRLW + buffer.size() - 1; buffer.resize(buffer.size() - 1); buffer.insert(buffer.end(), x.buffer.begin(), x.buffer.end()); } else { lastRLW = x.lastRLW + buffer.size(); buffer.insert(buffer.end(), x.buffer.begin(), x.buffer.end()); } } else { std::stringstream ss; ss << "This should really not happen! You are trying to append to a bitmap " "having a fractional number of words, that is, " << static_cast(sizeinbits) << " bits with a word size in bits of " << static_cast(wordinbits) << ". "; ss << "Size of the bitmap being appended: " << x.sizeinbits << " bits." << std::endl; throw std::invalid_argument(ss.str()); } } template EWAHBoolArrayIterator::EWAHBoolArrayIterator( const std::vector &parent) : pointer(0), myparent(parent), compressedwords(0), literalwords(0), rl(0), lw(0), b(0) { if (pointer < myparent.size()) readNewRunningLengthWord(); } template void EWAHBoolArrayIterator::readNewRunningLengthWord() { literalwords = 0; compressedwords = 0; ConstRunningLengthWord rlw(myparent[pointer]); rl = rlw.getRunningLength(); lw = rlw.getNumberOfLiteralWords(); b = rlw.getRunningBit(); if ((rl == 0) && (lw == 0)) { if (pointer < myparent.size() - 1) { ++pointer; readNewRunningLengthWord(); } else { pointer = myparent.size(); } } } template BoolArray EWAHBoolArray::toBoolArray() const { BoolArray ans(sizeinbits); EWAHBoolArrayIterator i = uncompress(); size_t counter = 0; while (i.hasNext()) { ans.setWord(counter++, i.next()); } return ans; } template template void EWAHBoolArray::appendSetBits(container &out, const size_t offset) const { size_t pointer(0); size_t currentoffset(offset); if (RESERVEMEMORY) out.reserve(buffer.size() + 64); // trading memory for speed. const size_t buffersize = buffer.size(); while (pointer < buffersize) { ConstRunningLengthWord rlw(buffer[pointer]); const size_t productofrl = static_cast(rlw.getRunningLength() * wordinbits); if (rlw.getRunningBit()) { const size_t upper_limit = currentoffset + productofrl; for (; currentoffset < upper_limit; ++currentoffset) { out.push_back(currentoffset); } } else { currentoffset += productofrl; } ++pointer; const size_t rlwlw = rlw.getNumberOfLiteralWords(); for (uword k = 0; k < rlwlw; ++k) { uword currentword = buffer[pointer]; while (currentword != 0) { uword t = static_cast(currentword & (~currentword+1)); uint32_t r = numberOfTrailingZeros(t); out.push_back(currentoffset + r); currentword ^= t; } currentoffset += wordinbits; ++pointer; } } } template bool EWAHBoolArray::operator!=(const EWAHBoolArray &x) const { return !(*this == x); } template bool EWAHBoolArray::operator==(const BoolArray &x) const { // could be more efficient return (this->toBoolArray() == x); } template bool EWAHBoolArray::operator!=(const BoolArray &x) const { // could be more efficient return (this->toBoolArray() != x); } template size_t EWAHBoolArray::addStreamOfEmptyWords(const bool v, size_t number) { if (number == 0) return 0; sizeinbits += number * wordinbits; size_t wordsadded = 0; if ((RunningLengthWord::getRunningBit(buffer[lastRLW]) != v) && (RunningLengthWord::size(buffer[lastRLW]) == 0)) { RunningLengthWord::setRunningBit(buffer[lastRLW], v); } else if ((RunningLengthWord::getNumberOfLiteralWords( buffer[lastRLW]) != 0) || (RunningLengthWord::getRunningBit(buffer[lastRLW]) != v)) { buffer.push_back(0); ++wordsadded; lastRLW = buffer.size() - 1; if (v) RunningLengthWord::setRunningBit(buffer[lastRLW], v); } const uword runlen = RunningLengthWord::getRunningLength(buffer[lastRLW]); const uword whatwecanadd = number < static_cast( RunningLengthWord::largestrunninglengthcount - runlen) ? static_cast(number) : static_cast( RunningLengthWord::largestrunninglengthcount - runlen); RunningLengthWord::setRunningLength( buffer[lastRLW], static_cast(runlen + whatwecanadd)); number -= static_cast(whatwecanadd); while (number >= RunningLengthWord::largestrunninglengthcount) { buffer.push_back(0); ++wordsadded; lastRLW = buffer.size() - 1; if (v) RunningLengthWord::setRunningBit(buffer[lastRLW], v); RunningLengthWord::setRunningLength( buffer[lastRLW], RunningLengthWord::largestrunninglengthcount); number -= static_cast( RunningLengthWord::largestrunninglengthcount); } if (number > 0) { buffer.push_back(0); ++wordsadded; lastRLW = buffer.size() - 1; if (v) RunningLengthWord::setRunningBit(buffer[lastRLW], v); RunningLengthWord::setRunningLength(buffer[lastRLW], static_cast(number)); } return wordsadded; } template void EWAHBoolArray::fastaddStreamOfEmptyWords(const bool v, size_t number) { if (number == 0) return; if ((RunningLengthWord::getRunningBit(buffer[lastRLW]) != v) && (RunningLengthWord::size(buffer[lastRLW]) == 0)) { RunningLengthWord::setRunningBit(buffer[lastRLW], v); } else if ((RunningLengthWord::getNumberOfLiteralWords( buffer[lastRLW]) != 0) || (RunningLengthWord::getRunningBit(buffer[lastRLW]) != v)) { buffer.push_back(0); lastRLW = buffer.size() - 1; if (v) RunningLengthWord::setRunningBit(buffer[lastRLW], v); } const uword runlen = RunningLengthWord::getRunningLength(buffer[lastRLW]); const uword whatwecanadd = number < static_cast( RunningLengthWord::largestrunninglengthcount - runlen) ? static_cast(number) : static_cast( RunningLengthWord::largestrunninglengthcount - runlen); RunningLengthWord::setRunningLength( buffer[lastRLW], static_cast(runlen + whatwecanadd)); number -= static_cast(whatwecanadd); while (number >= RunningLengthWord::largestrunninglengthcount) { buffer.push_back(0); lastRLW = buffer.size() - 1; if (v) RunningLengthWord::setRunningBit(buffer[lastRLW], v); RunningLengthWord::setRunningLength( buffer[lastRLW], RunningLengthWord::largestrunninglengthcount); number -= static_cast( RunningLengthWord::largestrunninglengthcount); } if (number > 0) { buffer.push_back(0); lastRLW = buffer.size() - 1; if (v) RunningLengthWord::setRunningBit(buffer[lastRLW], v); RunningLengthWord::setRunningLength(buffer[lastRLW], static_cast(number)); } } template size_t EWAHBoolArray::addStreamOfDirtyWords(const uword *v, const size_t number) { if (number == 0) return 0; uword rlw = buffer[lastRLW]; size_t NumberOfLiteralWords = RunningLengthWord::getNumberOfLiteralWords(rlw); if (NumberOfLiteralWords + number <= RunningLengthWord::largestliteralcount) { RunningLengthWord::setNumberOfLiteralWords( rlw, static_cast(NumberOfLiteralWords + number)); buffer[lastRLW] = rlw; sizeinbits += number * wordinbits; buffer.insert(buffer.end(), v, v + number); return number; } // we proceed the long way size_t howmanywecanadd = RunningLengthWord::largestliteralcount - NumberOfLiteralWords; RunningLengthWord::setNumberOfLiteralWords( rlw, RunningLengthWord::largestliteralcount); buffer[lastRLW] = rlw; buffer.insert(buffer.end(), v, v + howmanywecanadd); size_t wordadded = howmanywecanadd; sizeinbits += howmanywecanadd * wordinbits; buffer.push_back(0); lastRLW = buffer.size() - 1; ++wordadded; wordadded += addStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd); return wordadded; } template void EWAHBoolArray::fastaddStreamOfDirtyWords(const uword *v, const size_t number) { if (number == 0) return; uword rlw = buffer[lastRLW]; size_t NumberOfLiteralWords = RunningLengthWord::getNumberOfLiteralWords(rlw); if (NumberOfLiteralWords + number <= RunningLengthWord::largestliteralcount) { RunningLengthWord::setNumberOfLiteralWords( rlw, static_cast(NumberOfLiteralWords + number)); buffer[lastRLW] = rlw; for (size_t i = 0; i < number; ++i) buffer.push_back(v[i]); // buffer.insert(buffer.end(), v, v+number); // seems slower than push_back? return; } // we proceed the long way size_t howmanywecanadd = RunningLengthWord::largestliteralcount - NumberOfLiteralWords; RunningLengthWord::setNumberOfLiteralWords( rlw, RunningLengthWord::largestliteralcount); buffer[lastRLW] = rlw; for (size_t i = 0; i < howmanywecanadd; ++i) buffer.push_back(v[i]); // buffer.insert(buffer.end(), v, v+howmanywecanadd);// seems slower than // push_back? buffer.push_back(0); lastRLW = buffer.size() - 1; fastaddStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd); } template size_t EWAHBoolArray::addStreamOfNegatedDirtyWords(const uword *v, const size_t number) { if (number == 0) return 0; uword rlw = buffer[lastRLW]; size_t NumberOfLiteralWords = RunningLengthWord::getNumberOfLiteralWords(rlw); if (NumberOfLiteralWords + number <= RunningLengthWord::largestliteralcount) { RunningLengthWord::setNumberOfLiteralWords( rlw, static_cast(NumberOfLiteralWords + number)); buffer[lastRLW] = rlw; sizeinbits += number * wordinbits; for (size_t k = 0; k < number; ++k) buffer.push_back(~v[k]); return number; } // we proceed the long way size_t howmanywecanadd = RunningLengthWord::largestliteralcount - NumberOfLiteralWords; RunningLengthWord::setNumberOfLiteralWords( rlw, RunningLengthWord::largestliteralcount); buffer[lastRLW] = rlw; for (size_t k = 0; k < howmanywecanadd; ++k) buffer.push_back(~v[k]); size_t wordadded = howmanywecanadd; sizeinbits += howmanywecanadd * wordinbits; buffer.push_back(0); lastRLW = buffer.size() - 1; ++wordadded; wordadded += addStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd); return wordadded; } template size_t EWAHBoolArray::addEmptyWord(const bool v) { RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); const bool noliteralword = (lastRunningLengthWord.getNumberOfLiteralWords() == 0); // first, if the last running length word is empty, we align it // this uword runlen = lastRunningLengthWord.getRunningLength(); if ((noliteralword) && (runlen == 0)) { lastRunningLengthWord.setRunningBit(v); } if ((noliteralword) && (lastRunningLengthWord.getRunningBit() == v) && (runlen < RunningLengthWord::largestrunninglengthcount)) { lastRunningLengthWord.setRunningLength(static_cast(runlen + 1)); return 0; } else { // we have to start anew buffer.push_back(0); lastRLW = buffer.size() - 1; RunningLengthWord lastRunningLengthWord2(buffer[lastRLW]); lastRunningLengthWord2.setRunningBit(v); lastRunningLengthWord2.setRunningLength(1); return 1; } } template void fast_logicalor_tocontainer(size_t n, const EWAHBoolArray **inputs, EWAHBoolArray &container) { class EWAHBoolArrayPtr { public: EWAHBoolArrayPtr(const EWAHBoolArray *p, bool o) : ptr(p), own(o) {} const EWAHBoolArray *ptr; bool own; // whether to clean bool operator<(const EWAHBoolArrayPtr &o) const { return o.ptr->sizeInBytes() < ptr->sizeInBytes(); // backward on purpose } }; if (n == 0) { container.reset(); return; } if (n == 1) { container = *inputs[0]; return; } std::priority_queue pq; for (size_t i = 0; i < n; i++) { // could use emplace pq.push(EWAHBoolArrayPtr(inputs[i], false)); } while (pq.size() > 2) { EWAHBoolArrayPtr x1 = pq.top(); pq.pop(); EWAHBoolArrayPtr x2 = pq.top(); pq.pop(); EWAHBoolArray *buffer = new EWAHBoolArray(); x1.ptr->logicalor(*x2.ptr, *buffer); if (x1.own) { delete x1.ptr; } if (x2.own) { delete x2.ptr; } pq.push(EWAHBoolArrayPtr(buffer, true)); } EWAHBoolArrayPtr x1 = pq.top(); pq.pop(); EWAHBoolArrayPtr x2 = pq.top(); pq.pop(); x1.ptr->logicalor(*x2.ptr, container); if (x1.own) { delete x1.ptr; } if (x2.own) { delete x2.ptr; } } template void EWAHBoolArray::logicalor(const EWAHBoolArray &a, EWAHBoolArray &container) const { container.reset(); if (RESERVEMEMORY) container.buffer.reserve(buffer.size() + a.buffer.size()); EWAHBoolArrayRawIterator i = a.raw_iterator(); EWAHBoolArrayRawIterator j = raw_iterator(); if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... container.setSizeInBits(sizeInBits()); return; } // at this point, this should be safe: BufferedRunningLengthWord &rlwi = i.next(); BufferedRunningLengthWord &rlwj = j.next(); while ((rlwi.size() > 0) && (rlwj.size() > 0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); BufferedRunningLengthWord &prey = i_is_prey ? rlwi : rlwj; BufferedRunningLengthWord &predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit()) { container.fastaddStreamOfEmptyWords(true, predator.getRunningLength()); prey.discardFirstWordsWithReload(predator.getRunningLength()); } else { const size_t index = prey.discharge(container, predator.getRunningLength()); container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() - index); } predator.discardRunningWordsWithReload(); } const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (size_t k = 0; k < nbre_literal; ++k) { container.addWord(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k)); } rlwi.discardLiteralWordsWithReload(nbre_literal); rlwj.discardLiteralWordsWithReload(nbre_literal); } } const bool i_remains = rlwi.size() > 0; BufferedRunningLengthWord &remaining = i_remains ? rlwi : rlwj; remaining.discharge(container); container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits() : a.sizeInBits()); } template size_t EWAHBoolArray::logicalorcount(const EWAHBoolArray &a) const { size_t answer = 0; EWAHBoolArrayRawIterator i = a.raw_iterator(); EWAHBoolArrayRawIterator j = raw_iterator(); if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... return 0; } // at this point, this should be safe: BufferedRunningLengthWord &rlwi = i.next(); BufferedRunningLengthWord &rlwj = j.next(); while ((rlwi.size() > 0) && (rlwj.size() > 0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); BufferedRunningLengthWord &prey = i_is_prey ? rlwi : rlwj; BufferedRunningLengthWord &predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit()) { answer += predator.getRunningLength() * wordinbits; prey.discardFirstWordsWithReload(predator.getRunningLength()); } else { // const size_t index = prey.dischargeCount(predator.getRunningLength(), &answer); } predator.discardRunningWordsWithReload(); } const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (size_t k = 0; k < nbre_literal; ++k) { answer += countOnes( (uword)(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k))); } rlwi.discardLiteralWordsWithReload(nbre_literal); rlwj.discardLiteralWordsWithReload(nbre_literal); } } const bool i_remains = rlwi.size() > 0; BufferedRunningLengthWord &remaining = i_remains ? rlwi : rlwj; answer += remaining.dischargeCount(); return answer; } template void EWAHBoolArray::logicalxor(const EWAHBoolArray &a, EWAHBoolArray &container) const { container.reset(); if (RESERVEMEMORY) container.buffer.reserve(buffer.size() + a.buffer.size()); EWAHBoolArrayRawIterator i = a.raw_iterator(); EWAHBoolArrayRawIterator j = raw_iterator(); if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... container.setSizeInBits(sizeInBits()); return; } // at this point, this should be safe: BufferedRunningLengthWord &rlwi = i.next(); BufferedRunningLengthWord &rlwj = j.next(); while ((rlwi.size() > 0) && (rlwj.size() > 0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); BufferedRunningLengthWord &prey = i_is_prey ? rlwi : rlwj; BufferedRunningLengthWord &predator = i_is_prey ? rlwj : rlwi; const size_t index = (!predator.getRunningBit()) ? prey.discharge(container, predator.getRunningLength()) : prey.dischargeNegated(container, predator.getRunningLength()); container.fastaddStreamOfEmptyWords(predator.getRunningBit(), predator.getRunningLength() - index); predator.discardRunningWordsWithReload(); } const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (size_t k = 0; k < nbre_literal; ++k) container.addWord(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); rlwi.discardLiteralWordsWithReload(nbre_literal); rlwj.discardLiteralWordsWithReload(nbre_literal); } } const bool i_remains = rlwi.size() > 0; BufferedRunningLengthWord &remaining = i_remains ? rlwi : rlwj; remaining.discharge(container); container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits() : a.sizeInBits()); } template size_t EWAHBoolArray::logicalxorcount(const EWAHBoolArray &a) const { EWAHBoolArrayRawIterator i = a.raw_iterator(); EWAHBoolArrayRawIterator j = raw_iterator(); if (!i.hasNext()) return a.numberOfOnes(); if (!j.hasNext()) return this->numberOfOnes(); size_t answer = 0; // at this point, this should be safe: BufferedRunningLengthWord &rlwi = i.next(); BufferedRunningLengthWord &rlwj = j.next(); while ((rlwi.size() > 0) && (rlwj.size() > 0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); BufferedRunningLengthWord &prey = i_is_prey ? rlwi : rlwj; BufferedRunningLengthWord &predator = i_is_prey ? rlwj : rlwi; size_t index; if (predator.getRunningBit()) { index = prey.dischargeCountNegated(predator.getRunningLength(), &answer); } else { index = prey.dischargeCount(predator.getRunningLength(), &answer); } if (predator.getRunningBit()) answer += (predator.getRunningLength() - index) * wordinbits; predator.discardRunningWordsWithReload(); } const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (size_t k = 0; k < nbre_literal; ++k) { answer += countOnes( (uword)(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k))); } rlwi.discardLiteralWordsWithReload(nbre_literal); rlwj.discardLiteralWordsWithReload(nbre_literal); } } const bool i_remains = rlwi.size() > 0; BufferedRunningLengthWord &remaining = i_remains ? rlwi : rlwj; answer += remaining.dischargeCount(); return answer; } template void EWAHBoolArray::logicaland(const EWAHBoolArray &a, EWAHBoolArray &container) const { container.reset(); if (RESERVEMEMORY) container.buffer.reserve(buffer.size() > a.buffer.size() ? buffer.size() : a.buffer.size()); EWAHBoolArrayRawIterator i = a.raw_iterator(); EWAHBoolArrayRawIterator j = raw_iterator(); if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... container.setSizeInBits(sizeInBits()); return; } // at this point, this should be safe: BufferedRunningLengthWord &rlwi = i.next(); BufferedRunningLengthWord &rlwj = j.next(); while ((rlwi.size() > 0) && (rlwj.size() > 0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); BufferedRunningLengthWord &prey(i_is_prey ? rlwi : rlwj); BufferedRunningLengthWord &predator(i_is_prey ? rlwj : rlwi); if (!predator.getRunningBit()) { container.fastaddStreamOfEmptyWords(false, predator.getRunningLength()); prey.discardFirstWordsWithReload(predator.getRunningLength()); } else { const size_t index = prey.discharge(container, predator.getRunningLength()); container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() - index); } predator.discardRunningWordsWithReload(); } const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (size_t k = 0; k < nbre_literal; ++k) { container.addWord(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); } rlwi.discardLiteralWordsWithReload(nbre_literal); rlwj.discardLiteralWordsWithReload(nbre_literal); } } BufferedRunningLengthWord &remain = rlwj.size() > 0 ? rlwj : rlwi; while(remain.size() > 0) { container.addStreamOfEmptyWords(false, remain.size()); if (!remain.next()) { break; } } container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits() : a.sizeInBits()); container.assertWordCount("logicaland"); } template void EWAHBoolArray::logicalandnot(const EWAHBoolArray &a, EWAHBoolArray &container) const { container.reset(); if (RESERVEMEMORY) container.buffer.reserve(buffer.size() > a.buffer.size() ? buffer.size() : a.buffer.size()); EWAHBoolArrayRawIterator i = raw_iterator(); EWAHBoolArrayRawIterator j = a.raw_iterator(); if (!j.hasNext()) { // the other fellow is empty container = *this; // just copy, stupidly, the data return; } if (!(i.hasNext())) { // hopefully this never happens... container.setSizeInBits(sizeInBits()); return; } // at this point, this should be safe: BufferedRunningLengthWord &rlwi = i.next(); BufferedRunningLengthWord &rlwj = j.next(); while ((rlwi.size() > 0) && (rlwj.size() > 0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); BufferedRunningLengthWord &prey(i_is_prey ? rlwi : rlwj); BufferedRunningLengthWord &predator(i_is_prey ? rlwj : rlwi); if (((predator.getRunningBit()) && (i_is_prey)) || ((!predator.getRunningBit()) && (!i_is_prey))) { container.fastaddStreamOfEmptyWords(false, predator.getRunningLength()); prey.discardFirstWordsWithReload(predator.getRunningLength()); } else if (i_is_prey) { const size_t index = prey.discharge(container, predator.getRunningLength()); container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() - index); } else { const size_t index = prey.dischargeNegated(container, predator.getRunningLength()); container.fastaddStreamOfEmptyWords(true, predator.getRunningLength() - index); } predator.discardRunningWordsWithReload(); } const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (size_t k = 0; k < nbre_literal; ++k) { container.addWord(static_cast(rlwi.getLiteralWordAt(k) & ~rlwj.getLiteralWordAt(k))); } rlwi.discardLiteralWordsWithReload(nbre_literal); rlwj.discardLiteralWordsWithReload(nbre_literal); } } if(rlwi.size() > 0) { rlwi.discharge(container); container.setSizeInBits(sizeInBits()); } else { while(rlwj.size() > 0) { container.addStreamOfEmptyWords(false, rlwj.size()); if (!rlwj.next()) { break; } } container.setSizeInBits(a.sizeInBits()); } container.assertWordCount("logicalandnot"); } template size_t EWAHBoolArray::logicalandnotcount(const EWAHBoolArray &a) const { EWAHBoolArrayRawIterator i = raw_iterator(); EWAHBoolArrayRawIterator j = a.raw_iterator(); if (!j.hasNext()) { // the other fellow is empty return this->numberOfOnes(); } if (!(i.hasNext())) { // hopefully this never happens... return 0; } size_t answer = 0; // at this point, this should be safe: BufferedRunningLengthWord &rlwi = i.next(); BufferedRunningLengthWord &rlwj = j.next(); while ((rlwi.size() > 0) && (rlwj.size() > 0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); BufferedRunningLengthWord &prey(i_is_prey ? rlwi : rlwj); BufferedRunningLengthWord &predator(i_is_prey ? rlwj : rlwi); if (((predator.getRunningBit()) && (i_is_prey)) || ((!predator.getRunningBit()) && (!i_is_prey))) { prey.discardFirstWordsWithReload(predator.getRunningLength()); } else if (i_is_prey) { prey.dischargeCount(predator.getRunningLength(), &answer); } else { const size_t index = prey.dischargeCountNegated(predator.getRunningLength(), &answer); answer += (predator.getRunningLength() - index) * wordinbits; } predator.discardRunningWordsWithReload(); } const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (size_t k = 0; k < nbre_literal; ++k) { answer += countOnes( (uword)(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k)))); } rlwi.discardLiteralWordsWithReload(nbre_literal); rlwj.discardLiteralWordsWithReload(nbre_literal); } } const bool i_remains = rlwi.size() > 0; if (i_remains) { answer += rlwi.dischargeCount(); } return answer; } template size_t EWAHBoolArray::logicalandcount(const EWAHBoolArray &a) const { EWAHBoolArrayRawIterator i = a.raw_iterator(); EWAHBoolArrayRawIterator j = raw_iterator(); if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... return 0; } size_t answer = 0; // at this point, this should be safe: BufferedRunningLengthWord &rlwi = i.next(); BufferedRunningLengthWord &rlwj = j.next(); while ((rlwi.size() > 0) && (rlwj.size() > 0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); BufferedRunningLengthWord &prey(i_is_prey ? rlwi : rlwj); BufferedRunningLengthWord &predator(i_is_prey ? rlwj : rlwi); if (!predator.getRunningBit()) { prey.discardFirstWordsWithReload(predator.getRunningLength()); } else { // const size_t index = prey.dischargeCount(predator.getRunningLength(), &answer); } predator.discardRunningWordsWithReload(); } const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (size_t k = 0; k < nbre_literal; ++k) { answer += countOnes( (uword)(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k))); } rlwi.discardLiteralWordsWithReload(nbre_literal); rlwj.discardLiteralWordsWithReload(nbre_literal); } } return answer; } template bool EWAHBoolArray::intersects(const EWAHBoolArray &a) const { EWAHBoolArrayRawIterator i = a.raw_iterator(); EWAHBoolArrayRawIterator j = raw_iterator(); if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... return false; } // at this point, this should be safe: BufferedRunningLengthWord &rlwi = i.next(); BufferedRunningLengthWord &rlwj = j.next(); while ((rlwi.size() > 0) && (rlwj.size() > 0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); BufferedRunningLengthWord &prey(i_is_prey ? rlwi : rlwj); BufferedRunningLengthWord &predator(i_is_prey ? rlwj : rlwi); if (!predator.getRunningBit()) { prey.discardFirstWordsWithReload(predator.getRunningLength()); } else { size_t index = 0; bool isnonzero = prey.nonzero_discharge(predator.getRunningLength(), index); if (isnonzero) return true; } predator.discardRunningWordsWithReload(); } const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (size_t k = 0; k < nbre_literal; ++k) { if ((rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)) != 0) return true; } rlwi.discardLiteralWordsWithReload(nbre_literal); rlwj.discardLiteralWordsWithReload(nbre_literal); } } return false; } template BitmapStatistics EWAHBoolArray::computeStatistics() const { BitmapStatistics bs; EWAHBoolArrayRawIterator i = raw_iterator(); while (i.hasNext()) { BufferedRunningLengthWord &brlw(i.next()); ++bs.runningwordmarker; bs.totalliteral += brlw.getNumberOfLiteralWords(); bs.totalcompressed += brlw.getRunningLength(); if (brlw.getRunningLength() == RunningLengthWord::largestrunninglengthcount) { ++bs.maximumofrunningcounterreached; } } return bs; } template void EWAHBoolArray::debugprintout() const { std::cout << "==printing out EWAHBoolArray==" << std::endl; std::cout << "Number of compressed words: " << buffer.size() << std::endl; std::cout << "Size in bits: " << sizeinbits << std::endl; size_t pointer = 0; while (pointer < buffer.size()) { ConstRunningLengthWord rlw(buffer[pointer]); bool b = rlw.getRunningBit(); const uword rl = rlw.getRunningLength(); const uword lw = rlw.getNumberOfLiteralWords(); std::cout << "pointer = " << pointer << " running bit=" << b << " running length=" << rl << " lit. words=" << lw << std::endl; for (uword j = 0; j < lw; ++j) { const uword &w = buffer[pointer + j + 1]; std::cout << toBinaryString(w) << std::endl; } pointer += lw + 1; } std::cout << "==END==" << std::endl; } template size_t EWAHBoolArray::sizeOnDisk(const bool savesizeinbits) const { return (savesizeinbits ? sizeof(uint64_t) : 0) + sizeof(uint64_t) + sizeof(uword) * buffer.size(); } } // namespace ewah #endif ewah_bool_utils-1.2.2/ewah_bool_utils/cpp/ewah.h000066400000000000000000000516721466056343200217550ustar00rootroot00000000000000/** * This code is released under the * Apache License Version 2.0 http://www.apache.org/licenses/. * * (c) Daniel Lemire, http://lemire.me/en/ * with contributions from Zarian Waheed and others. */ #ifndef EWAH_H #define EWAH_H #include #include #include #include "boolarray.h" #include "ewahutil.h" #include "runninglengthword.h" namespace ewah { template class EWAHBoolArrayIterator; template class EWAHBoolArraySetBitForwardIterator; class BitmapStatistics; template class EWAHBoolArrayRawIterator; /** * This class is a compressed bitmap. * This is where compression * happens. * The underlying data structure is an STL vector. */ template class EWAHBoolArray { public: EWAHBoolArray() : buffer(1, 0), sizeinbits(0), lastRLW(0) {} static EWAHBoolArray bitmapOf(size_t n, ...) { EWAHBoolArray ans; va_list vl; va_start(vl, n); for (size_t i = 0; i < n; i++) { ans.set(static_cast(va_arg(vl, int))); } va_end(vl); return ans; } /** * Recover wasted memory usage. Fit buffers to the actual data. */ void trim() { buffer.shrink_to_fit(); } /** * Query the value of bit i. This runs in time proportional to * the size of the bitmap. This is not meant to be use in * a performance-sensitive context. * * (This implementation is based on zhenjl's Go version of JavaEWAH.) * */ bool get(const size_t pos) const { if (pos >= static_cast(sizeinbits)) return false; const size_t wordpos = pos / wordinbits; size_t WordChecked = 0; EWAHBoolArrayRawIterator j = raw_iterator(); while (j.hasNext()) { BufferedRunningLengthWord &rle = j.next(); WordChecked += static_cast(rle.getRunningLength()); if (wordpos < WordChecked) return rle.getRunningBit(); if (wordpos < WordChecked + rle.getNumberOfLiteralWords()) { const uword w = j.dirtyWords()[wordpos - WordChecked]; return (w & (static_cast(1) << (pos % wordinbits))) != 0; } WordChecked += static_cast(rle.getNumberOfLiteralWords()); } return false; } /** * Returns true if no bit is set. */ bool empty() const { size_t pointer(0); while (pointer < buffer.size()) { ConstRunningLengthWord rlw(buffer[pointer]); if (rlw.getRunningBit()) { if (rlw.getRunningLength() > 0) return false; } ++pointer; for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { if (buffer[pointer] != 0) return false; ++pointer; } } return true; } /** * Set the ith bit to true (starting at zero). * Auto-expands the bitmap. It has constant running time complexity. * Note that you must set the bits in increasing order: * set(1), set(2) is ok; set(2), set(1) is not ok. * set(100), set(100) is also not ok. * * Note: by design EWAH is not an updatable data structure in * the sense that once bit 1000 is set, you cannot change the value * of bits 0 to 1000. * * Returns true if the value of the bit was changed, and false otherwise. * (In practice, if you set the bits in strictly increasing order, it * should always return true.) */ bool set(size_t i); /** * Transform into a string that presents a list of set bits. * The running time is linear in the compressed size of the bitmap. */ operator std::string() const { std::stringstream ss; ss << *this; return ss.str(); } friend std::ostream &operator<<(std::ostream &out, const EWAHBoolArray &a) { out << "{"; for (EWAHBoolArray::const_iterator i = a.begin(); i != a.end();) { out << *i; ++i; if (i != a.end()) out << ","; } out << "}"; return out; } /** * Make sure the two bitmaps have the same size (padding with zeroes * if necessary). It has constant running time complexity. * * This is useful when calling "logicalnot" functions. * * This can an adverse effect of performance, especially when computing * intersections. */ void makeSameSize(EWAHBoolArray &a) { if (a.sizeinbits < sizeinbits) a.padWithZeroes(sizeinbits); else if (sizeinbits < a.sizeinbits) padWithZeroes(a.sizeinbits); } enum { RESERVEMEMORY = true }; // for speed typedef EWAHBoolArraySetBitForwardIterator const_iterator; /** * Returns an iterator that can be used to access the position of the * set bits. The running time complexity of a full scan is proportional to the * number * of set bits: be aware that if you have long strings of 1s, this can be * very inefficient. * * It can be much faster to use the toArray method if you want to * retrieve the set bits. */ const_iterator begin() const { return EWAHBoolArraySetBitForwardIterator(&buffer); } /** * Basically a bogus iterator that can be used together with begin() * for constructions such as for(EWAHBoolArray::iterator i = b.begin(); * i!=b.end(); ++i) {} */ const_iterator &end() const { return EWAHBoolArraySetBitForwardIterator::end(); } /** * Retrieve the set bits. Can be much faster than iterating through * the set bits with an iterator. */ std::vector toArray() const; /** * computes the logical and with another compressed bitmap * answer goes into container * Running time complexity is proportional to the sum of the compressed * bitmap sizes. * * The sizeInBits() of the result is equal to the maximum that of the current * bitmap's sizeInBits() and that of a.sizeInBits(). */ void logicaland(const EWAHBoolArray &a, EWAHBoolArray &container) const; /** * computes the logical and with another compressed bitmap * Return the answer * Running time complexity is proportional to the sum of the compressed * bitmap sizes. * * The sizeInBits() of the result is equal to the maximum that of the current * bitmap's sizeInBits() and that of a.sizeInBits(). */ EWAHBoolArray logicaland(const EWAHBoolArray &a) const { EWAHBoolArray answer; logicaland(a, answer); return answer; } /** * calls logicaland */ EWAHBoolArray operator&(const EWAHBoolArray &a) const { return logicaland(a); } /** * computes the logical and with another compressed bitmap * answer goes into container * Running time complexity is proportional to the sum of the compressed * bitmap sizes. * * The sizeInBits() of the result should be equal to that of the current * bitmap irrespective of a.sizeInBits(). * */ void logicalandnot(const EWAHBoolArray &a, EWAHBoolArray &container) const; /** * calls logicalandnot */ EWAHBoolArray operator-(const EWAHBoolArray &a) const { return logicalandnot(a); } /** * computes the logical and not with another compressed bitmap * Return the answer * Running time complexity is proportional to the sum of the compressed * bitmap sizes. * * The sizeInBits() of the result should be equal to that of the current * bitmap irrespective of a.sizeInBits(). * */ EWAHBoolArray logicalandnot(const EWAHBoolArray &a) const { EWAHBoolArray answer; logicalandnot(a, answer); return answer; } /** * tests whether the bitmaps "intersect" (have at least one 1-bit at the same * position). This function does not modify the existing bitmaps. * It is faster than calling logicaland. */ bool intersects(const EWAHBoolArray &a) const; /** * computes the logical or with another compressed bitmap * answer goes into container * Running time complexity is proportional to the sum of the compressed * bitmap sizes. * * If you have many bitmaps, see fast_logicalor_tocontainer. * * The sizeInBits() of the result is equal to the maximum that of the current * bitmap's sizeInBits() and that of a.sizeInBits(). */ void logicalor(const EWAHBoolArray &a, EWAHBoolArray &container) const; /** * computes the size (in number of set bits) of the logical or with another * compressed bitmap * Running time complexity is proportional to the sum of the compressed * bitmap sizes. */ size_t logicalorcount(const EWAHBoolArray &a) const; /** * computes the size (in number of set bits) of the logical and with another * compressed bitmap * Running time complexity is proportional to the sum of the compressed * bitmap sizes. */ size_t logicalandcount(const EWAHBoolArray &a) const; /** * computes the size (in number of set bits) of the logical and not with * another compressed bitmap * Running time complexity is proportional to the sum of the compressed * bitmap sizes. */ size_t logicalandnotcount(const EWAHBoolArray &a) const; /** * computes the size (in number of set bits) of the logical xor with another * compressed bitmap * Running time complexity is proportional to the sum of the compressed * bitmap sizes. */ size_t logicalxorcount(const EWAHBoolArray &a) const; /** * computes the logical or with another compressed bitmap * Return the answer * Running time complexity is proportional to the sum of the compressed * bitmap sizes. * * If you have many bitmaps, see fast_logicalor. * * The sizeInBits() of the result is equal to the maximum that of the current * bitmap's sizeInBits() and that of a.sizeInBits(). */ EWAHBoolArray logicalor(const EWAHBoolArray &a) const { EWAHBoolArray answer; logicalor(a, answer); return answer; } /** * calls logicalor */ EWAHBoolArray operator|(const EWAHBoolArray &a) const { return logicalor(a); } /** * computes the logical xor with another compressed bitmap * answer goes into container * Running time complexity is proportional to the sum of the compressed * bitmap sizes. * * The sizeInBits() of the result is equal to the maximum that of the current * bitmap's sizeInBits() and that of a.sizeInBits(). */ void logicalxor(const EWAHBoolArray &a, EWAHBoolArray &container) const; /** * computes the logical xor with another compressed bitmap * Return the answer * Running time complexity is proportional to the sum of the compressed * bitmap sizes. * * The sizeInBits() of the result is equal to the maximum that of the current * bitmap's sizeInBits() and that of a.sizeInBits(). */ EWAHBoolArray logicalxor(const EWAHBoolArray &a) const { EWAHBoolArray answer; logicalxor(a, answer); return answer; } /** * calls logicalxor */ EWAHBoolArray operator^(const EWAHBoolArray &a) const { return logicalxor(a); } /** * clear the content of the bitmap. It does not * release the memory. */ void reset() { buffer.clear(); buffer.push_back(0); sizeinbits = 0; lastRLW = 0; } /** * convenience method. * * returns the number of words added (storage cost increase) */ inline size_t addWord(const uword newdata, const uint32_t bitsthatmatter = 8 * sizeof(uword)); inline void printout(std::ostream &o = std::cout) { toBoolArray().printout(o); } /** * Prints a verbose description of the content of the compressed bitmap. */ void debugprintout() const; /** * Return the size in bits of this bitmap (this refers * to the uncompressed size in bits). * * You can increase it with padWithZeroes() */ inline size_t sizeInBits() const { return sizeinbits; } /** * Return the size of the buffer in bytes. This * is equivalent to the storage cost, minus some overhead. * See sizeOnDisk to get the actual storage cost with overhead. */ inline size_t sizeInBytes() const { return buffer.size() * sizeof(uword); } /** * same as addEmptyWord, but you can do several in one shot! * returns the number of words added (storage cost increase) */ size_t addStreamOfEmptyWords(const bool v, size_t number); /** * add a stream of dirty words, returns the number of words added * (storage cost increase) */ size_t addStreamOfDirtyWords(const uword *v, const size_t number); /** * add a stream of dirty words, each one negated, returns the number of words * added * (storage cost increase) */ size_t addStreamOfNegatedDirtyWords(const uword *v, const size_t number); /** * make sure the size of the array is totalbits bits by padding with zeroes. * returns the number of words added (storage cost increase). * * This is useful when calling "logicalnot" functions. * * This can an adverse effect of performance, especially when computing * intersections. * */ size_t padWithZeroes(const size_t totalbits); /** * Compute the size on disk assuming that it was saved using * the method "write". */ size_t sizeOnDisk(const bool savesizeinbits = true) const; /** * Save this bitmap to a stream. The file format is * | sizeinbits | buffer length | buffer content| * the sizeinbits part can be omitted if "savesizeinbits=false". * Both sizeinbits and buffer length are saved using the uint64_t data * type. * Returns how many bytes were handed out to the stream. */ size_t write(std::ostream &out, const bool savesizeinbits = true) const; /** * same as write(std::ostream...), except that you provide a char pointer * and a "capacity" (in bytes). The function never writes at or beyond * "out+capacity". If the storage needed exceeds the given capacity, the value * zero is returned: it should be considered an error. Otherwise, the number * of bytes copied is returned. */ size_t write(char *out, size_t capacity, const bool savesizeinbits = true) const; /** * This only writes the content of the buffer (see write()) method. * It is for advanced users. */ void writeBuffer(std::ostream &out) const; /** * size (in words) of the underlying STL vector. */ size_t bufferSize() const { return buffer.size(); } /** * this is the counterpart to the write method. * if you set savesizeinbits=false, then you are responsible * for setting the value of the attribute sizeinbits (see method * setSizeInBits). * * Returns how many bytes were queried from the stream. */ size_t read(std::istream &in, const bool savesizeinbits = true); /** * same as read(std::istream...), except that you provide a char pointer * and a "capacity" (in bytes). The function never reads at or beyond * "in+capacity". If the detected storage exceeds the given capacity, the * value zero is returned: it should be considered an error. Otherwise, the * number of bytes read is returned. */ size_t read(const char *in, size_t capacity, const bool savesizeinbits = true); /** * read the buffer from a stream, see method writeBuffer. * this is for advanced users. */ void readBuffer(std::istream &in, const size_t buffersize); /** * We define two EWAHBoolArray as being equal if they have the same set bits. * Alternatively, B1==B2 if and only if cardinality(B1 XOR B2) ==0. */ bool operator==(const EWAHBoolArray &x) const; /** * We define two EWAHBoolArray as being different if they do not have the same * set bits. * Alternatively, B1!=B2 if and only if cardinality(B1 XOR B2) >0. */ bool operator!=(const EWAHBoolArray &x) const; bool operator==(const BoolArray &x) const; bool operator!=(const BoolArray &x) const; /** * Iterate over the uncompressed words. * Can be considerably faster than begin()/end(). * Running time complexity of a full scan is proportional to the * uncompressed size of the bitmap. */ EWAHBoolArrayIterator uncompress() const; /** * To iterate over the compressed data. * Can be faster than any other iterator. * Running time complexity of a full scan is proportional to the * compressed size of the bitmap. */ EWAHBoolArrayRawIterator raw_iterator() const; /** * Appends the content of some other compressed bitmap * at the end of the current bitmap. */ void append(const EWAHBoolArray &x); /** * For research purposes. This computes the number of * dirty words and the number of compressed words. */ BitmapStatistics computeStatistics() const; /** * For convenience, this fully uncompresses the bitmap. * Not fast! */ BoolArray toBoolArray() const; /** * Convert to a list of positions of "set" bits. * The recommended container is vector. * * See also toArray(). */ template void appendSetBits(container &out, const size_t offset = 0) const; /** * Returns a vector containing the position of the set * bits in increasing order. This just calls "toArray". */ std::vector toVector() const { return toArray(); } /** * Returns the number of bits set to the value 1. * The running time complexity is proportional to the * compressed size of the bitmap. * * This is sometimes called the cardinality. */ size_t numberOfOnes() const; /** * Swap the content of this bitmap with another bitmap. * No copying is done. (Running time complexity is constant.) */ void swap(EWAHBoolArray &x); const std::vector &getBuffer() const { return buffer; } enum { wordinbits = sizeof(uword) * 8 }; /** * Please don't copy your bitmaps! The running time * complexity of a copy is the size of the compressed bitmap. **/ EWAHBoolArray(const EWAHBoolArray &other) : buffer(other.buffer), sizeinbits(other.sizeinbits), lastRLW(other.lastRLW) {} /** * Copies the content of one bitmap onto another. Running time complexity * is proportional to the size of the compressed bitmap. * please, never hard-copy this object. Use the swap method if you must. */ EWAHBoolArray &operator=(const EWAHBoolArray &x) { buffer = x.buffer; sizeinbits = x.sizeinbits; lastRLW = x.lastRLW; return *this; } /** * Move constructor. */ EWAHBoolArray(EWAHBoolArray &&other) : buffer(std::move(other.buffer)), sizeinbits(other.sizeinbits), lastRLW(other.lastRLW) {} /** * Move assignment operator. */ EWAHBoolArray &operator=(EWAHBoolArray &&x) { buffer = std::move(x.buffer); sizeinbits = x.sizeinbits; lastRLW = x.lastRLW; return *this; } /** * This is equivalent to the operator =. It is used * to keep in mind that assignment can be expensive. * *if you don't care to copy the bitmap (performance-wise), use this! */ void expensive_copy(const EWAHBoolArray &x) { buffer = x.buffer; sizeinbits = x.sizeinbits; lastRLW = x.lastRLW; } /** * Write the logical not of this bitmap in the provided container. * * This function takes into account the sizeInBits value. * You may need to call "padWithZeroes" to adjust the sizeInBits. */ void logicalnot(EWAHBoolArray &x) const; /** * Write the logical not of this bitmap in the provided container. * * This function takes into account the sizeInBits value. * You may need to call "padWithZeroes" to adjust the sizeInBits. */ EWAHBoolArray logicalnot() const { EWAHBoolArray answer; logicalnot(answer); return answer; } /** * Apply the logical not operation on this bitmap. * Running time complexity is proportional to the compressed size of the *bitmap. * The current bitmap is not modified. * * This function takes into account the sizeInBits value. * You may need to call "padWithZeroes" to adjust the sizeInBits. **/ void inplace_logicalnot(); /** * set size in bits. This does not affect the compressed size. It * runs in constant time. This should not normally be used, except * as part of a deserialization process. */ inline void setSizeInBits(const size_t size) { sizeinbits = size; } /** * Like addStreamOfEmptyWords but * addStreamOfEmptyWords but does not return the cost increase, * does not update sizeinbits */ inline void fastaddStreamOfEmptyWords(const bool v, size_t number); /** * LikeaddStreamOfDirtyWords but does not return the cost increase, * does not update sizeinbits. */ inline void fastaddStreamOfDirtyWords(const uword *v, const size_t number); private: void assertWordCount(std::string message) const; void correctWordCount(); size_t numberOfWords() const; // private because does not increment the size in bits // returns the number of words added (storage cost increase) inline size_t addLiteralWord(const uword newdata); // private because does not increment the size in bits // returns the number of words added (storage cost increase) size_t addEmptyWord(const bool v); // this second version "might" be faster if you hate OOP. // in my tests, it turned out to be slower! // private because does not increment the size in bits // inline void addEmptyWordStaticCalls(bool v); std::vector buffer; size_t sizeinbits; size_t lastRLW; }; } // namespace ewah #include "ewah-inl.h" #endif ewah_bool_utils-1.2.2/ewah_bool_utils/cpp/ewahutil.h000066400000000000000000000117301466056343200226420ustar00rootroot00000000000000/** * This code is released under the * Apache License Version 2.0 http://www.apache.org/licenses/. * * (c) Daniel Lemire, http://lemire.me/en/ * * Some code from the public domain tuklib. */ #ifndef EWAHUTIL_H #define EWAHUTIL_H #include // mostly for Microsoft compilers #include #include // part of Visual Studio 2010 and better #include #include #include #include #include #include #include #include #include #include #ifdef _MSC_VER #include #endif #if ((ULONG_MAX) == (UINT_MAX)) #define UWORD uint32_t #else #define UWORD uint64_t #endif namespace ewah { static inline uint32_t ctz64(uint64_t n) { #if defined(__GNUC__) && UINT_MAX >= UINT32_MAX && ULLONG_MAX >= UINT64_MAX return static_cast(__builtin_ctzll(n)); #elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 && \ ULONG_MAX >= UINT64_MAX uint32_t i; _BitScanForward64((unsigned long *)&i, n); return i; #else uint32_t i = 1; if ((n & static_cast(4294967295)) == 0) { n >>= 32; i += 32; } if ((n & static_cast(0x0000FFFFUL)) == 0) { n >>= 16; i += 16; } if ((n & static_cast(0x000000FFUL)) == 0) { n >>= 8; i += 8; } if ((n & static_cast(0x0000000FUL)) == 0) { n >>= 4; i += 4; } if ((n & static_cast(0x00000003UL)) == 0) { n >>= 2; i += 2; } i -= (n & 0x1); return i; #endif } static inline uint32_t ctz32(uint32_t n) { #if defined(__GNUC__) && UINT_MAX >= UINT32_MAX return static_cast(__builtin_ctz(n)); #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) uint32_t i; __asm__("bsfl %1, %0" : "=r"(i) : "rm"(n)); return i; #elif defined(_MSC_VER) && _MSC_VER >= 1400 uint32_t i; _BitScanForward((unsigned long *)&i, n); return i; #else uint32_t i = 1; if ((n & static_cast(0x0000FFFF)) == 0) { n >>= 16; i += 16; } if ((n & static_cast(0x000000FF)) == 0) { n >>= 8; i += 8; } if ((n & static_cast(0x0000000F)) == 0) { n >>= 4; i += 4; } if ((n & static_cast(0x00000003)) == 0) { n >>= 2; i += 2; } i -= (n & 1); return i; #endif } static inline uint32_t ctz16(uint16_t n) { #if defined(__GNUC__) && UINT_MAX >= UINT32_MAX return static_cast(__builtin_ctz(n)); #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) uint32_t i; __asm__("bsfl %1, %0" : "=r"(i) : "rm"(n)); return i; #elif defined(_MSC_VER) && _MSC_VER >= 1400 uint32_t i; _BitScanForward((unsigned long *)&i, n); return i; #else uint32_t i = 1; if ((n & static_cast(0x000000FF)) == 0) { n >>= 8; i += 8; } if ((n & static_cast(0x0000000F)) == 0) { n >>= 4; i += 4; } if ((n & static_cast(0x00000003)) == 0) { n >>= 2; i += 2; } i -= (n & 1); return i; #endif } #ifdef __GNUC__ /** * count the number of bits set to one (32 bit version) */ inline uint32_t countOnes(uint32_t x) { return static_cast(__builtin_popcount(x)); } #elif defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(_M_ARM)&& !defined(_M_ARM64) inline uint32_t countOnes(uint32_t x) { return __popcnt(x); } #else inline uint32_t countOnes(uint32_t v) { v = v - ((v >> 1) & 0x55555555); v = (v & 0x33333333) + ((v >> 2) & 0x33333333); return static_cast((((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24); } #endif #ifdef __GNUC__ /** * count the number of bits set to one (64 bit version) */ inline uint32_t countOnes(uint64_t x) { return static_cast(__builtin_popcountll(x)); } #elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(_M_ARM64) inline uint32_t countOnes(uint64_t x) { return static_cast(__popcnt64(static_cast<__int64>(x))); } #else inline uint32_t countOnes(uint64_t v) { v = v - ((v >> 1) & 0x5555555555555555); v = (v & 0x3333333333333333) + ((v >> 2) & 0x3333333333333333); v = ((v + (v >> 4)) & 0x0F0F0F0F0F0F0F0F); return static_cast((v * (0x0101010101010101)) >> 56); } #endif inline uint32_t countOnes(uint16_t v) { return countOnes(static_cast(v)); } inline uint32_t numberOfTrailingZeros(uint32_t x) { if (x == 0) return 32; return ctz32(x); } inline uint32_t numberOfTrailingZeros(uint64_t x) { if (x == 0) return 64; return ctz64(x); } inline uint32_t numberOfTrailingZeros(uint16_t x) { if (x == 0) return 16; return ctz16(x); } /** * Returns the binary representation of a binary word. */ template std::string toBinaryString(const uword w) { std::ostringstream convert; for (uint32_t k = 0; k < sizeof(uword) * 8; ++k) { if (w & (static_cast(1) << k)) convert << "1"; else convert << "0"; } return convert.str(); } } // namespace ewah #endif ewah_bool_utils-1.2.2/ewah_bool_utils/cpp/runninglengthword.h000066400000000000000000000376071466056343200246110ustar00rootroot00000000000000/** * This code is released under the * Apache License Version 2.0 http://www.apache.org/licenses/. * * (c) Daniel Lemire, http://lemire.me/en/ */ #ifndef RUNNINGLENGTHWORD_H_ #define RUNNINGLENGTHWORD_H_ #include namespace ewah { /** * For expert users. * This class is used to represent a special type of word storing * a run length. It is defined by the Enhanced Word Aligned Hybrid (EWAH) * format. You don't normally need to access this class. */ template class RunningLengthWord { public: RunningLengthWord(uword &data) : mydata(data) {} RunningLengthWord(const RunningLengthWord &rlw) : mydata(rlw.mydata) {} RunningLengthWord &operator=(const RunningLengthWord &rlw) { mydata = rlw.mydata; return *this; } /** * Which bit is being repeated? */ bool getRunningBit() const { return mydata & static_cast(1); } /** * how many words should be filled by the running bit */ static inline bool getRunningBit(uword data) { return data & static_cast(1); } /** * how many words should be filled by the running bit */ uword getRunningLength() const { return static_cast((mydata >> 1) & largestrunninglengthcount); } /** * followed by how many literal words? */ static inline uword getRunningLength(uword data) { return static_cast((data >> 1) & largestrunninglengthcount); } /** * followed by how many literal words? */ uword getNumberOfLiteralWords() const { return static_cast(mydata >> (1 + runninglengthbits)); } /** * Total of getRunningLength() and getNumberOfLiteralWords() */ uword size() const { return static_cast(getRunningLength() + getNumberOfLiteralWords()); } /** * Total of getRunningLength() and getNumberOfLiteralWords() */ static inline uword size(uword data) { return static_cast(getRunningLength(data) + getNumberOfLiteralWords(data)); } /** * followed by how many literal words? */ static inline uword getNumberOfLiteralWords(uword data) { return static_cast(data >> (1 + runninglengthbits)); } /** * running length of which type of bits */ void setRunningBit(bool b) { if (b) mydata |= static_cast(1); else mydata &= static_cast(~1); } void discardFirstWords(uword x) { const uword rl(getRunningLength()); if (rl >= x) { setRunningLength(rl - x); return; } x -= rl; setRunningLength(0); setNumberOfLiteralWords(getNumberOfLiteralWords() - x); } /** * running length of which type of bits */ static inline void setRunningBit(uword &data, bool b) { if (b) data |= static_cast(1); else data &= static_cast(~1); } void setRunningLength(uword l) { mydata |= shiftedlargestrunninglengthcount; mydata &= static_cast((l << 1) | notshiftedlargestrunninglengthcount); } // static call for people who hate objects static inline void setRunningLength(uword &data, uword l) { data |= shiftedlargestrunninglengthcount; data &= static_cast((l << 1) | notshiftedlargestrunninglengthcount); } void setNumberOfLiteralWords(uword l) { mydata |= notrunninglengthplusrunningbit; mydata &= static_cast((l << (runninglengthbits + 1)) | runninglengthplusrunningbit); } // static call for people who hate objects static inline void setNumberOfLiteralWords(uword &data, uword l) { data |= notrunninglengthplusrunningbit; data &= static_cast(l << (runninglengthbits + 1)) | runninglengthplusrunningbit; } static const uint32_t runninglengthbits = sizeof(uword) * 4; static const uint32_t literalbits = sizeof(uword) * 8 - 1 - runninglengthbits; static const uword largestliteralcount = (static_cast(1) << literalbits) - 1; static const uword largestrunninglengthcount = (static_cast(1) << runninglengthbits) - 1; static const uword shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; static const uword notshiftedlargestrunninglengthcount = static_cast(~shiftedlargestrunninglengthcount); static const uword runninglengthplusrunningbit = (static_cast(1) << (runninglengthbits + 1)) - 1; static const uword notrunninglengthplusrunningbit = static_cast(~runninglengthplusrunningbit); static const uword notlargestrunninglengthcount = static_cast(~largestrunninglengthcount); uword &mydata; }; /** * Same as RunningLengthWord, except that the values cannot be modified. */ template class ConstRunningLengthWord { public: ConstRunningLengthWord() : mydata(0) {} ConstRunningLengthWord(const uword data) : mydata(data) {} ConstRunningLengthWord(const ConstRunningLengthWord &rlw) : mydata(rlw.mydata) {} /** * Which bit is being repeated? */ bool getRunningBit() const { return mydata & static_cast(1); } /** * how many words should be filled by the running bit */ uword getRunningLength() const { return static_cast( (mydata >> 1) & RunningLengthWord::largestrunninglengthcount); } /** * followed by how many literal words? */ uword getNumberOfLiteralWords() const { return static_cast( mydata >> (1 + RunningLengthWord::runninglengthbits)); } /** * Total of getRunningLength() and getNumberOfLiteralWords() */ uword size() const { return getRunningLength() + getNumberOfLiteralWords(); } uword mydata; }; template class EWAHBoolArray; template class EWAHBoolArrayRawIterator; /** * Same as RunningLengthWord, except that the values are buffered for quick * access. */ template class BufferedRunningLengthWord { public: enum { wordinbits = sizeof(uword) * 8 }; BufferedRunningLengthWord(const uword &data, EWAHBoolArrayRawIterator *p) : RunningBit(data & static_cast(1)), RunningLength(static_cast( (data >> 1) & RunningLengthWord::largestrunninglengthcount)), NumberOfLiteralWords(static_cast( data >> (1 + RunningLengthWord::runninglengthbits))), parent(p) {} BufferedRunningLengthWord(const RunningLengthWord &p) : RunningBit(p.mydata & static_cast(1)), RunningLength((p.mydata >> 1) & RunningLengthWord::largestrunninglengthcount), NumberOfLiteralWords(p.mydata >> (1 + RunningLengthWord::runninglengthbits)), parent(p.parent) {} void discharge(EWAHBoolArray &container) { while (size() > 0) { // first run size_t pl = getRunningLength(); container.fastaddStreamOfEmptyWords(getRunningBit(), pl); size_t pd = getNumberOfLiteralWords(); writeLiteralWords(pd, container); if (!next()) break; } } size_t dischargeCount() { size_t answer = 0; while (size() > 0) { // first run if (getRunningBit()) { answer += wordinbits * getRunningLength(); } size_t pd = getNumberOfLiteralWords(); for (size_t i = 0; i < pd; ++i) answer += countOnes((uword)getLiteralWordAt(i)); if (!next()) break; } return answer; } size_t dischargeCountNegated() { size_t answer = 0; while (size() > 0) { // first run if (!getRunningBit()) { answer += wordinbits * getRunningLength(); } size_t pd = getNumberOfLiteralWords(); for (size_t i = 0; i < pd; ++i) answer += countOnes((uword)(~getLiteralWordAt(i))); if (!next()) break; } return answer; } // Symbolically write out up to max words, returns how many were written, // write to count the number bits written (we assume that count was initially // zero) size_t dischargeCount(size_t max, size_t *count) { size_t index = 0; while (true) { if (index + RunningLength > max) { const size_t offset = max - index; if (getRunningBit()) *count += offset * wordinbits; RunningLength -= offset; return max; } if (getRunningBit()) *count += RunningLength * wordinbits; index += RunningLength; if (NumberOfLiteralWords + index > max) { const size_t offset = max - index; for (size_t i = 0; i < offset; ++i) *count += countOnes((uword)getLiteralWordAt(i)); RunningLength = 0; NumberOfLiteralWords -= offset; return max; } for (size_t i = 0; i < NumberOfLiteralWords; ++i) *count += countOnes((uword)getLiteralWordAt(i)); index += NumberOfLiteralWords; if (!next()) break; } return index; } size_t dischargeCountNegated(size_t max, size_t *count) { size_t index = 0; while (true) { if (index + RunningLength > max) { const size_t offset = max - index; if (!getRunningBit()) *count += offset * wordinbits; RunningLength -= offset; return max; } if (!getRunningBit()) *count += RunningLength * wordinbits; index += RunningLength; if (NumberOfLiteralWords + index > max) { const size_t offset = max - index; for (size_t i = 0; i < offset; ++i) *count += countOnes((uword)(~getLiteralWordAt(i))); RunningLength = 0; NumberOfLiteralWords -= offset; return max; } for (size_t i = 0; i < NumberOfLiteralWords; ++i) *count += countOnes((uword)(~getLiteralWordAt(i))); index += NumberOfLiteralWords; if (!next()) break; } return index; } bool nonzero_discharge() { while (size() > 0) { // first run size_t pl = getRunningLength(); if ((pl > 0) && (getRunningBit())) return true; size_t pd = getNumberOfLiteralWords(); if (pd > 0) return true; discardFirstWordsWithReload(static_cast(pl + pd)); } return false; } // Write out up to max words, returns how many were written size_t discharge(EWAHBoolArray &container, size_t max) { size_t index = 0; while (true) { if (index + RunningLength > max) { const size_t offset = max - index; container.fastaddStreamOfEmptyWords(getRunningBit(), offset); RunningLength = static_cast(RunningLength - offset); return max; } container.fastaddStreamOfEmptyWords(getRunningBit(), RunningLength); index += RunningLength; if (NumberOfLiteralWords + index > max) { const size_t offset = max - index; writeLiteralWords(offset, container); RunningLength = 0; NumberOfLiteralWords = static_cast(NumberOfLiteralWords - offset); return max; } writeLiteralWords(NumberOfLiteralWords, container); index += NumberOfLiteralWords; if (!next()) break; } return index; } bool nonzero_discharge(size_t max, size_t &index) { index = 0; while ((index < max) && (size() > 0)) { // first run size_t pl = getRunningLength(); if (index + pl > max) { pl = max - index; } if ((getRunningBit()) && (pl > 0)) return true; index += pl; size_t pd = getNumberOfLiteralWords(); if (pd + index > max) { pd = max - index; } if (pd > 0) return true; discardFirstWordsWithReload(static_cast(pl + pd)); } return false; } // Write out up to max words, returns how many were written size_t dischargeNegated(EWAHBoolArray &container, size_t max) { // todo: could be optimized further size_t index = 0; while ((index < max) && (size() > 0)) { // first run size_t pl = getRunningLength(); if (index + pl > max) { pl = max - index; } container.fastaddStreamOfEmptyWords(!getRunningBit(), pl); index += pl; size_t pd = getNumberOfLiteralWords(); if (pd + index > max) { pd = max - index; } writeNegatedLiteralWords(pd, container); discardFirstWordsWithReload(static_cast(pl + pd)); index += pd; } return index; } bool nonzero_dischargeNegated(size_t max, size_t &index) { while ((index < max) && (size() > 0)) { // first run size_t pl = getRunningLength(); if (index + pl > max) { pl = max - index; } if ((!getRunningBit()) && (pl > 0)) return true; index += pl; size_t pd = getNumberOfLiteralWords(); if (pd + index > max) { pd = max - index; } if (pd > 0) return true; discardFirstWordsWithReload(static_cast(pl + pd)); index += pd; } return false; } uword getLiteralWordAt(size_t index) { return parent->dirtyWords()[index]; } void writeLiteralWords(size_t numWords, EWAHBoolArray &container) { container.fastaddStreamOfDirtyWords(parent->dirtyWords(), numWords); } void writeNegatedLiteralWords(size_t numWords, EWAHBoolArray &container) { container.addStreamOfNegatedDirtyWords(parent->dirtyWords(), numWords); } void discardRunningWords() { RunningLength = 0; } void discardRunningWordsWithReload() { RunningLength = 0; if (NumberOfLiteralWords == 0) next(); } bool next() { if (!parent->hasNext()) { NumberOfLiteralWords = 0; RunningLength = 0; return false; } parent->next(); return true; } void read(const uword &data) { RunningBit = data & static_cast(1); RunningLength = static_cast( (data >> 1) & RunningLengthWord::largestrunninglengthcount); NumberOfLiteralWords = static_cast( data >> (1 + RunningLengthWord::runninglengthbits)); } /** * Which bit is being repeated? */ bool getRunningBit() const { return RunningBit; } void discardFirstWords(uword x) { if (RunningLength >= x) { RunningLength = static_cast(RunningLength - x); return; } x = static_cast(x - RunningLength); RunningLength = 0; NumberOfLiteralWords = static_cast(NumberOfLiteralWords - x); } /** * how many words should be filled by the running bit (see previous method) */ uword getRunningLength() const { return RunningLength; } /** * followed by how many literal words? */ uword getNumberOfLiteralWords() const { return NumberOfLiteralWords; } /** * Total of getRunningLength() and getNumberOfLiteralWords() */ uword size() const { return static_cast(RunningLength + NumberOfLiteralWords); } friend std::ostream &operator<<(std::ostream &out, const BufferedRunningLengthWord &a) { out << "{RunningBit:" << a.RunningBit << ",RunningLength:" << a.RunningLength << ",NumberOfLiteralWords:" << a.NumberOfLiteralWords << "}"; return out; } void discardLiteralWordsWithReload(uword x) { assert(NumberOfLiteralWords >= x); NumberOfLiteralWords -= x; if (NumberOfLiteralWords == 0) next(); } void discardFirstWordsWithReload(uword x) { while (x > 0) { if (RunningLength > x) { RunningLength = static_cast(RunningLength - x); return; } x = static_cast(x - RunningLength); RunningLength = 0; size_t toDiscard = x > NumberOfLiteralWords ? NumberOfLiteralWords : x; NumberOfLiteralWords = static_cast(NumberOfLiteralWords - toDiscard); x = static_cast(x - toDiscard); if ((x > 0) || (size() == 0)) { if (!next()) break; } } } private: bool RunningBit; uword RunningLength; uword NumberOfLiteralWords; EWAHBoolArrayRawIterator *parent; }; } // namespace ewah #endif /* RUNNINGLENGTHWORD_H_ */ ewah_bool_utils-1.2.2/ewah_bool_utils/ewah_bool_array.pxd000066400000000000000000000070471466056343200237450ustar00rootroot00000000000000""" Wrapper for EWAH Bool Array: https://github.com/lemire/EWAHBoolArray """ from libc.stdint cimport uint32_t, uint64_t from libcpp cimport bool from libcpp.map cimport map as cmap from libcpp.string cimport string from libcpp.vector cimport vector # Streams req for c++ IO cdef extern from "" namespace "std": cdef cppclass ostream[T]: pass cdef extern from "" namespace "std": cdef cppclass istream[T]: pass cdef extern from "" namespace "std": cdef cppclass stringstream: stringstream() except + string str() ostream write(char *, size_t) istream read(char *, size_t) bint eof() cdef extern from "ewah.h" namespace "ewah": cppclass EWAHBoolArraySetBitForwardIterator[uword]: # EWAHBoolArraySetBitForwardIterator() EWAHBoolArraySetBitForwardIterator(const EWAHBoolArraySetBitForwardIterator &o) size_t operator*() EWAHBoolArraySetBitForwardIterator &operator++() bint operator==(EWAHBoolArraySetBitForwardIterator &x) bint operator!=(EWAHBoolArraySetBitForwardIterator &x) # ctypedef EWAHBoolArraySetBitForwardIterator[unsigned long long] const_iterator cdef cppclass EWAHBoolArray[uword]: # We are going to skip the varargs here; it is too tricky to assemble. bint get(const size_t pos) bint set(size_t i) void makeSameSize(EWAHBoolArray &a) vector[size_t] toArray() void logicaland(EWAHBoolArray &a, EWAHBoolArray &container) void logicalor(EWAHBoolArray &a, EWAHBoolArray &container) void logicalxor(EWAHBoolArray &a, EWAHBoolArray &container) bint intersects(EWAHBoolArray &a) void reset() size_t sizeInBits() size_t sizeInBytes() bint operator==(EWAHBoolArray &x) bint operator!=(EWAHBoolArray &x) void append(EWAHBoolArray &x) # Recommended container is "vector[size_t]" void appendRowIDs[container](container &out, const size_t offset) void appendSetBits[container](container &out, const size_t offset) size_t numberOfOnes() void logicalnot(EWAHBoolArray &x) void inplace_logicalnot() void swap(EWAHBoolArray &x) void read(stringstream &incoming, bint savesizeinbits) void readBuffer(stringstream &incoming, const size_t buffersize) void write(stringstream &out, bint savesizeinbits) void writeBuffer(stringstream &out) size_t addWord(uword newdata) vector[uword] &getBuffer() # const_iterator begin() # const_iterator end() EWAHBoolArraySetBitForwardIterator begin() EWAHBoolArraySetBitForwardIterator end() cdef extern from "boolarray.h" namespace "ewah": cppclass BoolArray[uword]: void setSizeInBits(size_t sizeib) void set(size_t pos) void unset(size_t pos) bool get(size_t pos) void reset() size_t sizeInBits() size_t sizeInBytes() size_t numberOfOnes() void inplace_logicalxor(BoolArray &other) void inplace_logicalnot() size_t padWithZeroes(size_t totalbits) uword getWord(size_t pos) size_t wordinbits cimport cython cimport numpy as np ctypedef np.uint32_t ewah_word_type ctypedef EWAHBoolArray[ewah_word_type] ewah_bool_array ctypedef EWAHBoolArraySetBitForwardIterator[ewah_word_type] ewah_bool_iterator ctypedef vector[size_t] bitset_array ctypedef cmap[np.uint64_t, ewah_bool_array] ewah_map ctypedef stringstream sstream ctypedef BoolArray[ewah_word_type] bool_array ewah_bool_utils-1.2.2/ewah_bool_utils/ewah_bool_wrap.pxd000066400000000000000000000171101466056343200235700ustar00rootroot00000000000000cimport numpy as np from libcpp.pair cimport pair from libcpp.set cimport set as cset from libcpp.vector cimport vector from ewah_bool_utils.ewah_bool_array cimport ( ewah_bool_array, ewah_bool_iterator, ewah_map, sstream, ) ctypedef bint bitarrtype ctypedef pair[np.uint64_t, np.uint64_t] ind_pair cdef class FileBitmasks: cdef np.uint32_t nfiles cdef ewah_map** ewah_coll cdef ewah_bool_array** ewah_keys cdef ewah_bool_array** ewah_refn cdef void _reset(self) cdef bint _iseq(self, FileBitmasks solf) cdef BoolArrayCollection _get_bitmask(self, np.uint32_t ifile) cdef tuple _find_collisions(self, BoolArrayCollection coll, bint verbose=*) cdef tuple _find_collisions_coarse(self, BoolArrayCollection coll, bint verbose=*, file_list=*) cdef tuple _find_collisions_refined(self, BoolArrayCollection coll, bint verbose=*) cdef void _set(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2=*) cdef void _set_coarse(self, np.uint32_t ifile, np.uint64_t i1) cdef void _set_refined(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2) cdef void _set_coarse_array(self, np.uint32_t ifile, np.uint8_t[:] arr) cdef void _set_refined_array(self, np.uint32_t ifile, np.uint64_t mi1, np.uint8_t[:] arr) cdef void _set_refined_index_array(self, np.uint32_t ifile, np.int64_t nsub_mi, np.ndarray[np.uint64_t, ndim=1] sub_mi1, np.ndarray[np.uint64_t, ndim=1] sub_mi2) cdef void _set_map(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2) cdef void _set_refn(self, np.uint32_t ifile, np.uint64_t i1) cdef bint _get(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2=*) cdef bint _get_coarse(self, np.uint32_t ifile, np.uint64_t i1) cdef void _get_coarse_array(self, np.uint32_t ifile, np.uint64_t imax, np.uint8_t[:] arr) except * cdef bint _isref(self, np.uint32_t ifile, np.uint64_t i) cdef np.uint64_t _count_total(self, np.uint32_t ifile) cdef np.uint64_t _count_refined(self, np.uint32_t ifile) cdef np.uint64_t _count_coarse(self, np.uint32_t ifile) cdef void _append(self, np.uint32_t ifile, BoolArrayCollection solf) cdef bint _intersects(self, np.uint32_t ifile, BoolArrayCollection solf) cdef void _logicalxor(self, np.uint32_t ifile, BoolArrayCollection solf, BoolArrayCollection out) cdef void _logicaland(self, np.uint32_t ifile, BoolArrayCollection solf, BoolArrayCollection out) cdef void _select_contaminated(self, np.uint32_t ifile, BoolArrayCollection mask, np.uint8_t[:] out, np.uint8_t[:] secondary_files, BoolArrayCollection mask2=*) cdef void _select_uncontaminated(self, np.uint32_t ifile, BoolArrayCollection mask, np.uint8_t[:] out, BoolArrayCollection mask2=*) cdef bytes _dumps(self, np.uint32_t ifile) cdef bint _loads(self, np.uint32_t ifile, bytes s) cdef bint _check(self) cdef class BoolArrayCollection: cdef ewah_map* ewah_coll cdef ewah_bool_array* ewah_keys cdef ewah_bool_array* ewah_refn cdef ewah_bool_array* ewah_coar cdef void _reset(self) cdef int _richcmp(self, BoolArrayCollection solf, int op) except -1 cdef void _set(self, np.uint64_t i1, np.uint64_t i2=*) cdef void _set_coarse(self, np.uint64_t i1) cdef void _set_refined(self, np.uint64_t i1, np.uint64_t i2) cdef void _set_coarse_array(self, np.uint8_t[:] arr) cdef void _set_refined_array(self, np.uint64_t mi1, np.uint8_t[:] arr) cdef void _set_map(self, np.uint64_t i1, np.uint64_t i2) cdef void _set_refn(self, np.uint64_t i1) cdef bint _get(self, np.uint64_t i1, np.uint64_t i2=*) cdef bint _get_coarse(self, np.uint64_t i1) cdef void _get_coarse_array(self, np.uint64_t imax, np.uint8_t[:] arr) except * cdef bint _contains(self, np.uint64_t i) cdef bint _isref(self, np.uint64_t i) cdef void _ewah_coarse(self) cdef np.uint64_t _count_total(self) cdef np.uint64_t _count_refined(self) cdef np.uint64_t _count_coarse(self) cdef void _append(self, BoolArrayCollection solf) cdef void _logicalor(self, BoolArrayCollection solf, BoolArrayCollection out) cdef bint _intersects(self, BoolArrayCollection solf) cdef void _logicalxor(self, BoolArrayCollection solf, BoolArrayCollection out) cdef void _logicaland(self, BoolArrayCollection solf, BoolArrayCollection out) cdef void _select_contaminated(self, BoolArrayCollection mask, np.uint8_t[:] out, BoolArrayCollection mask2=*) cdef void _select_uncontaminated(self, BoolArrayCollection mask, np.uint8_t[:] out, BoolArrayCollection mask2=*) cdef void _get_ghost_zones(self, int ngz, int order1, int order2, bint periodicity[3], BoolArrayCollection out_ewah, bint coarse_ghosts=*) cdef bytes _dumps(self) cdef bint _loads(self, bytes s) cdef bint _check(self) cdef class BoolArrayCollectionUncompressed: cdef int nele1 cdef int nele2 cdef ewah_map* ewah_coll cdef bitarrtype* ewah_keys cdef bitarrtype* ewah_refn cdef void _set(self, np.uint64_t i1, np.uint64_t i2=*) cdef void _set_coarse(self, np.uint64_t i1) cdef void _set_refined(self, np.uint64_t i1, np.uint64_t i2) cdef void _set_coarse_array(self, np.uint8_t[:] arr) cdef void _set_coarse_array_ptr(self, np.uint8_t *arr) cdef void _set_refined_array(self, np.uint64_t mi1, np.uint8_t[:] arr) cdef void _set_refined_array_ptr(self, np.uint64_t mi1, np.uint8_t *arr) cdef void _set_map(self, np.uint64_t i1, np.uint64_t i2) cdef void _set_refn(self, np.uint64_t i1) cdef bint _get(self, np.uint64_t i1, np.uint64_t i2=*) cdef bint _get_coarse(self, np.uint64_t i1) cdef bint _isref(self, np.uint64_t i) cdef np.uint64_t _count_total(self) cdef np.uint64_t _count_refined(self) cdef void _append(self, BoolArrayCollectionUncompressed solf) cdef bint _intersects(self, BoolArrayCollectionUncompressed solf) cdef void _compress(self, BoolArrayCollection solf) cdef class SparseUnorderedBitmaskSet: cdef cset[np.uint64_t] entries cdef void _set(self, np.uint64_t ind) cdef void _fill(self, np.uint8_t[:] mask) cdef void _fill_ewah(self, BoolArrayCollection mm) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm) cdef void _reset(self) cdef to_array(self) cdef class SparseUnorderedBitmaskVector: cdef int total cdef vector[np.uint64_t] entries cdef void _set(self, np.uint64_t ind) cdef void _fill(self, np.uint8_t[:] mask) cdef void _fill_ewah(self, BoolArrayCollection mm) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm) cdef void _reset(self) cdef to_array(self) cdef void _remove_duplicates(self) cdef void _prune(self) cdef class SparseUnorderedRefinedBitmaskSet: cdef cset[ind_pair] entries cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2) cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:]) cdef void _fill_ewah(self, BoolArrayCollection mm) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm) cdef void _reset(self) cdef to_array(self) cdef class SparseUnorderedRefinedBitmaskVector: cdef int total cdef vector[ind_pair] entries cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2) cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:]) cdef void _fill_ewah(self, BoolArrayCollection mm) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm) cdef void _reset(self) cdef to_array(self) cdef void _remove_duplicates(self) cdef void _prune(self) ewah_bool_utils-1.2.2/ewah_bool_utils/ewah_bool_wrap.pyx000066400000000000000000002205531466056343200236240ustar00rootroot00000000000000""" Wrapper for EWAH Bool Array: https://github.com/lemire/EWAHBoolArray """ import struct from cython.operator cimport dereference, preincrement from libc.stdlib cimport free, malloc from libcpp.algorithm cimport sort from libcpp.map cimport map as cmap import numpy as np cimport cython cimport numpy as np from ewah_bool_utils.morton_utils cimport morton_neighbors_coarse, morton_neighbors_refined cdef extern from "" namespace "std" nogil: Iter unique[Iter](Iter first, Iter last) cdef np.uint64_t FLAG = ~(0) cdef np.uint64_t MAX_VECTOR_SIZE = 1e7 ctypedef cmap[np.uint64_t, ewah_bool_array] ewahmap ctypedef cmap[np.uint64_t, ewah_bool_array].iterator ewahmap_it ctypedef pair[np.uint64_t, ewah_bool_array] ewahmap_p cdef class FileBitmasks: def __cinit__(self, np.uint32_t nfiles): cdef int i self.nfiles = nfiles self.ewah_keys = malloc(nfiles*sizeof(ewah_bool_array*)) self.ewah_refn = malloc(nfiles*sizeof(ewah_bool_array*)) self.ewah_coll = malloc(nfiles*sizeof(ewah_map*)) for i in range(nfiles): self.ewah_keys[i] = new ewah_bool_array() self.ewah_refn[i] = new ewah_bool_array() self.ewah_coll[i] = new ewah_map() cdef void _reset(self): cdef np.int32_t ifile for ifile in range(self.nfiles): self.ewah_keys[ifile].reset() self.ewah_refn[ifile].reset() self.ewah_coll[ifile].clear() cdef bint _iseq(self, FileBitmasks solf): cdef np.int32_t ifile cdef ewah_bool_array* arr1 cdef ewah_bool_array* arr2 cdef ewahmap *map1 cdef ewahmap *map2 cdef ewahmap_p pair1, pair2 cdef ewahmap_it it_map1, it_map2 if self.nfiles != solf.nfiles: return 0 for ifile in range(self.nfiles): # Keys arr1 = ( self.ewah_keys)[ifile] arr2 = ( solf.ewah_keys)[ifile] if arr1[0] != arr2[0]: return 0 # Refn arr1 = ( self.ewah_refn)[ifile] arr2 = ( solf.ewah_refn)[ifile] if arr1[0] != arr2[0]: return 0 # Map map1 = ( self.ewah_coll)[ifile] map2 = ( solf.ewah_coll)[ifile] for pair1 in map1[0]: it_map2 = map2[0].find(pair1.first) if it_map2 == map2[0].end(): return 0 if pair1.second != dereference(it_map2).second: return 0 for pair2 in map2[0]: it_map1 = map1[0].find(pair2.first) if it_map1 == map1[0].end(): return 0 if pair2.second != dereference(it_map1).second: return 0 # Match return 1 def iseq(self, solf): return self._iseq(solf) cdef BoolArrayCollection _get_bitmask(self, np.uint32_t ifile): cdef BoolArrayCollection out = BoolArrayCollection() cdef ewah_bool_array **ewah_keys = self.ewah_keys cdef ewah_bool_array **ewah_refn = self.ewah_refn cdef ewah_map **ewah_coll = self.ewah_coll # This version actually copies arrays, which can be costly cdef ewah_bool_array *ewah_keys_out = out.ewah_keys cdef ewah_bool_array *ewah_refn_out = out.ewah_refn cdef ewah_map *ewah_coll_out = out.ewah_coll ewah_keys_out[0] = ewah_keys[ifile][0] ewah_refn_out[0] = ewah_refn[ifile][0] ewah_coll_out[0] = ewah_coll[ifile][0] # This version only copies pointers which can lead to deallocation of # the source when the copy is deleted. # out.ewah_keys = ewah_keys[ifile] # out.ewah_refn = ewah_refn[ifile] # out.ewah_coll = ewah_coll[ifile] return out cdef tuple _find_collisions(self, BoolArrayCollection coll, bint verbose = 0): cdef tuple cc, cr cc = self._find_collisions_coarse(coll, verbose) cr = self._find_collisions_refined(coll, verbose) return cc, cr cdef tuple _find_collisions_coarse(self, BoolArrayCollection coll, bint verbose = 0, file_list = None): cdef np.int32_t ifile cdef ewah_bool_array arr_two, arr_swap, arr_keys, arr_refn cdef ewah_bool_array* iarr cdef ewah_bool_array* coll_keys cdef ewah_bool_array* coll_refn coll_keys = ( coll.ewah_keys) coll_refn = ( coll.ewah_refn) if file_list is None: file_list = range(self.nfiles) for ifile in file_list: iarr = (self.ewah_keys)[ifile] arr_keys.logicaland(iarr[0], arr_two) arr_keys.logicalor(iarr[0], arr_swap) arr_keys.swap(arr_swap) arr_refn.logicalor(arr_two, arr_swap) arr_refn.swap(arr_swap) coll_keys[0].swap(arr_keys) coll_refn[0].swap(arr_refn) # Print cdef int nc, nm nc = coll_refn[0].numberOfOnes() nm = coll_keys[0].numberOfOnes() cdef tuple nout = (nc, nm) if verbose == 1: print("{: 10d}/{: 10d} collisions at coarse refinement. ({: 10.5f}%)".format(nc,nm,100.0*float(nc)/nm)) return nout cdef tuple _find_collisions_refined(self, BoolArrayCollection coll, bint verbose = 0): cdef np.int32_t ifile cdef ewah_bool_array iarr, arr_two, arr_swap cdef ewah_bool_array* coll_refn cdef cmap[np.uint64_t, ewah_bool_array] map_keys, map_refn cdef cmap[np.uint64_t, ewah_bool_array]* coll_coll cdef cmap[np.uint64_t, ewah_bool_array]* map_bitmask coll_refn = coll.ewah_refn if coll_refn[0].numberOfOnes() == 0: if verbose == 1: print("{: 10d}/{: 10d} collisions at refined refinement. ({: 10.5f}%)".format(0,0,0)) return (0,0) coll_coll = coll.ewah_coll for ifile in range(self.nfiles): map_bitmask = ( self.ewah_coll)[ifile] for it_mi1 in map_bitmask[0]: mi1 = it_mi1.first iarr = it_mi1.second map_keys[mi1].logicaland(iarr, arr_two) map_keys[mi1].logicalor(iarr, arr_swap) map_keys[mi1].swap(arr_swap) map_refn[mi1].logicalor(arr_two, arr_swap) map_refn[mi1].swap(arr_swap) coll_coll[0] = map_refn # Count cdef int nc, nm nc = 0 nm = 0 for it_mi1 in map_refn: mi1 = it_mi1.first iarr = it_mi1.second nc += iarr.numberOfOnes() iarr = map_keys[mi1] nm += iarr.numberOfOnes() cdef tuple nout = (nc, nm) # Print if verbose == 1: if nm == 0: print("{: 10d}/{: 10d} collisions at refined refinement. ({: 10.5f}%)".format(nc,nm,0.0)) else: print("{: 10d}/{: 10d} collisions at refined refinement. ({: 10.5f}%)".format(nc,nm,100.0*float(nc)/nm)) return nout cdef void _set(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2 = FLAG): cdef ewah_bool_array *ewah_keys = ( self.ewah_keys)[ifile] cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] cdef ewah_map *ewah_coll = ( self.ewah_coll)[ifile] ewah_keys[0].set(i1) if i2 != FLAG: ewah_refn[0].set(i1) ewah_coll[0][i1].set(i2) cdef void _set_coarse(self, np.uint32_t ifile, np.uint64_t i1): cdef ewah_bool_array *ewah_keys = ( self.ewah_keys)[ifile] ewah_keys[0].set(i1) cdef void _set_refined(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2): cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] cdef ewah_map *ewah_coll = ( self.ewah_coll)[ifile] ewah_refn[0].set(i1) ewah_coll[0][i1].set(i2) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_coarse_array(self, np.uint32_t ifile, np.uint8_t[:] arr): cdef ewah_bool_array *ewah_keys = ( self.ewah_keys)[ifile] cdef np.uint64_t i1 for i1 in range(arr.shape[0]): if arr[i1] == 1: ewah_keys[0].set(i1) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_refined_array(self, np.uint32_t ifile, np.uint64_t i1, np.uint8_t[:] arr): cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] cdef ewah_map *ewah_coll = ( self.ewah_coll)[ifile] cdef np.uint64_t i2 for i2 in range(arr.shape[0]): if arr[i2] == 1: ewah_refn[0].set(i1) ewah_coll[0][i1].set(i2) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_refined_index_array(self, np.uint32_t ifile, np.int64_t nsub_mi, np.ndarray[np.uint64_t, ndim=1] sub_mi1, np.ndarray[np.uint64_t, ndim=1] sub_mi2): cdef np.ndarray[np.int64_t, ndim=1] ind = np.lexsort((sub_mi2[:nsub_mi], sub_mi1[:nsub_mi])) cdef np.int64_t i, p cdef BoolArrayCollection temp if self._count_refined(ifile) == 0: # Add to file bitmask in order for i in range(nsub_mi): p = ind[i] self._set_refined(ifile, sub_mi1[p], sub_mi2[p]) else: # Add to dummy bitmask in order, then combine temp = BoolArrayCollection() for i in range(nsub_mi): p = ind[i] temp._set_coarse(sub_mi1[p]) temp._set_refined(sub_mi1[p], sub_mi2[p]) self._append(ifile, temp) cdef void _set_map(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2): cdef ewah_map *ewah_coll = ( self.ewah_coll)[ifile] ewah_coll[0][i1].set(i2) cdef void _set_refn(self, np.uint32_t ifile, np.uint64_t i1): cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] ewah_refn[0].set(i1) cdef bint _get(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2 = FLAG): cdef ewah_bool_array *ewah_keys = ( self.ewah_keys)[ifile] cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] cdef ewah_map *ewah_coll = ( self.ewah_coll)[ifile] if (ewah_keys[0].get(i1) == 0): return 0 if (i2 == FLAG) or (ewah_refn[0].get(i1) == 0): return 1 return ewah_coll[0][i1].get(i2) cdef bint _get_coarse(self, np.uint32_t ifile, np.uint64_t i1): cdef ewah_bool_array *ewah_keys = ( self.ewah_keys)[ifile] return ewah_keys[0].get(i1) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) cdef void _get_coarse_array(self, np.uint32_t ifile, np.uint64_t imax, np.uint8_t[:] arr) except *: cdef ewah_bool_array *ewah_keys = ( self.ewah_keys)[ifile] cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_keys[0].begin()) cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_keys[0].end()) cdef np.uint64_t iset while iter_set[0] != iter_end[0]: iset = dereference(iter_set[0]) if iset >= imax: raise IndexError("Index {} exceedes max {}.".format(iset, imax)) arr[iset] = 1 preincrement(iter_set[0]) cdef bint _isref(self, np.uint32_t ifile, np.uint64_t i): cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] return ewah_refn[0].get(i) def count_coarse(self, ifile): return self._count_coarse(ifile) def count_total(self, ifile): return self._count_total(ifile) def count_refined(self, ifile): return self._count_refined(ifile) cdef np.uint64_t _count_coarse(self, np.uint32_t ifile): return self._count_total(ifile) - self._count_refined(ifile) cdef np.uint64_t _count_total(self, np.uint32_t ifile): cdef ewah_bool_array *ewah_keys = ( self.ewah_keys)[ifile] cdef np.uint64_t out = ewah_keys[0].numberOfOnes() return out cdef np.uint64_t _count_refined(self, np.uint32_t ifile): cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] cdef np.uint64_t out = ewah_refn[0].numberOfOnes() return out def append(self, np.uint32_t ifile, BoolArrayCollection solf): if solf is None: return self._append(ifile, solf) cdef void _append(self, np.uint32_t ifile, BoolArrayCollection solf): cdef ewah_bool_array *ewah_keys1 = ( self.ewah_keys)[ifile] cdef ewah_bool_array *ewah_refn1 = ( self.ewah_refn)[ifile] cdef ewah_map *ewah_coll1 = ( self.ewah_coll)[ifile] cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array swap, mi1_ewah1, mi1_ewah2 cdef np.uint64_t mi1 # Keys ewah_keys1[0].logicalor(ewah_keys2[0], swap) ewah_keys1[0].swap(swap) # Refined ewah_refn1[0].logicalor(ewah_refn2[0], swap) ewah_refn1[0].swap(swap) # Map it_map2 = ewah_coll2[0].begin() while it_map2 != ewah_coll2[0].end(): mi1 = dereference(it_map2).first mi1_ewah2 = dereference(it_map2).second it_map1 = ewah_coll1[0].find(mi1) if it_map1 == ewah_coll1[0].end(): ewah_coll1[0][mi1] = mi1_ewah2 else: mi1_ewah1 = dereference(it_map1).second mi1_ewah1.logicalor(mi1_ewah2, swap) mi1_ewah1.swap(swap) preincrement(it_map2) cdef bint _intersects(self, np.uint32_t ifile, BoolArrayCollection solf): cdef ewah_bool_array *ewah_keys1 = ( self.ewah_keys)[ifile] cdef ewah_bool_array *ewah_refn1 = ( self.ewah_refn)[ifile] cdef ewah_map *ewah_coll1 = ( self.ewah_coll)[ifile] cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array mi1_ewah1, mi1_ewah2 cdef np.uint64_t mi1 cdef ewah_bool_array ewah_coar1, ewah_coar2 # No intersection if ewah_keys1[0].intersects(ewah_keys2[0]) == 0: return 0 # Intersection at coarse level ewah_keys1[0].logicalxor(ewah_refn1[0],ewah_coar1) ewah_keys2[0].logicalxor(ewah_refn2[0],ewah_coar2) if ewah_coar1.intersects(ewah_keys2[0]) == 1: return 1 if ewah_coar2.intersects(ewah_keys1[0]) == 1: return 1 # Intersection at refined level if ewah_refn1[0].intersects(ewah_refn2[0]) == 1: it_map1 = ewah_coll1[0].begin() while (it_map1 != ewah_coll1[0].end()): mi1 = dereference(it_map1).first it_map2 = ewah_coll2[0].find(mi1) if it_map2 != ewah_coll2[0].end(): mi1_ewah1 = dereference(it_map1).second mi1_ewah2 = dereference(it_map2).second if mi1_ewah1.intersects(mi1_ewah2): return 1 preincrement(it_map1) return 0 cdef void _logicalxor(self, np.uint32_t ifile, BoolArrayCollection solf, BoolArrayCollection out): cdef ewah_bool_array *ewah_keys1 = ( self.ewah_keys)[ifile] cdef ewah_bool_array *ewah_refn1 = ( self.ewah_refn)[ifile] cdef ewah_map *ewah_coll1 = ( self.ewah_coll)[ifile] cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewah_bool_array *ewah_keys_out = out.ewah_keys cdef ewah_bool_array *ewah_refn_out = out.ewah_refn cdef ewah_map *ewah_coll_out = out.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array mi1_ewah1, mi1_ewah2, swap cdef np.uint64_t mi1 # Keys ewah_keys1[0].logicalxor(ewah_keys2[0],ewah_keys_out[0]) # Refn ewah_refn1[0].logicalxor(ewah_refn2[0],ewah_refn_out[0]) # Coll it_map1 = ewah_coll1[0].begin() while (it_map1 != ewah_coll1[0].end()): mi1 = dereference(it_map1).first mi1_ewah1 = dereference(it_map1).second it_map2 = ewah_coll2[0].find(mi1) if it_map2 == ewah_coll2[0].end(): ewah_coll_out[0][mi1] = mi1_ewah1 else: mi1_ewah2 = dereference(it_map2).second mi1_ewah1.logicalxor(mi1_ewah2, swap) ewah_coll_out[0][mi1] = swap preincrement(it_map1) it_map2 = ewah_coll2[0].begin() while (it_map2 != ewah_coll2[0].end()): mi1 = dereference(it_map2).first mi1_ewah2 = dereference(it_map2).second it_map1 = ewah_coll1[0].find(mi1) if it_map1 == ewah_coll1[0].end(): ewah_coll_out[0][mi1] = mi1_ewah2 preincrement(it_map2) def logicalxor(self, ifile, solf, out): return self._logicalxor(ifile, solf, out) cdef void _logicaland(self, np.uint32_t ifile, BoolArrayCollection solf, BoolArrayCollection out): cdef ewah_bool_array *ewah_keys1 = ( self.ewah_keys)[ifile] cdef ewah_bool_array *ewah_refn1 = ( self.ewah_refn)[ifile] cdef ewah_map *ewah_coll1 = ( self.ewah_coll)[ifile] cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewah_bool_array *ewah_keys_out = out.ewah_keys cdef ewah_bool_array *ewah_refn_out = out.ewah_refn cdef ewah_map *ewah_coll_out = out.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array mi1_ewah1, mi1_ewah2, swap cdef np.uint64_t mi1 # Keys ewah_keys1[0].logicaland(ewah_keys2[0],ewah_keys_out[0]) # Refn ewah_refn1[0].logicaland(ewah_refn2[0],ewah_refn_out[0]) # Coll if ewah_refn_out[0].numberOfOnes() > 0: it_map1 = ewah_coll1[0].begin() while (it_map1 != ewah_coll1[0].end()): mi1 = dereference(it_map1).first it_map2 = ewah_coll2[0].find(mi1) if it_map2 != ewah_coll2[0].end(): mi1_ewah1 = dereference(it_map1).second mi1_ewah2 = dereference(it_map2).second mi1_ewah1.logicaland(mi1_ewah2, swap) ewah_coll_out[0][mi1] = swap preincrement(it_map1) def logicaland(self, ifile, solf, out): return self._logicaland(ifile, solf, out) cdef void _select_contaminated(self, np.uint32_t ifile, BoolArrayCollection mask, np.uint8_t[:] out, np.uint8_t[:] secondary_files, BoolArrayCollection mask2 = None): # Fill mask at indices owned by this file that are also contaminated by # other files. cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] cdef ewah_bool_array ewah_mask cdef ewah_bool_array *ewah_mask1 cdef ewah_bool_array *ewah_mask2 cdef ewah_bool_array ewah_slct cdef ewah_bool_array *ewah_file cdef np.uint64_t iset # Merge masks as necessary if mask2 is None: ewah_mask = ( mask.ewah_keys)[0] else: ewah_mask1 = mask.ewah_keys ewah_mask2 = mask2.ewah_keys ewah_mask1[0].logicalor(ewah_mask2[0],ewah_mask) # Get just refined cells owned by this file ewah_mask.logicaland(ewah_refn[0], ewah_slct) # Set array values cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_slct.begin()) cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_slct.end()) while iter_set[0] != iter_end[0]: iset = dereference(iter_set[0]) out[iset] = 1 preincrement(iter_set[0]) # Find files that intersect this one cdef np.uint32_t isfile for isfile in range(self.nfiles): if isfile == ifile: continue ewah_file = ( self.ewah_keys)[isfile] if ewah_slct.intersects(ewah_file[0]) == 1: secondary_files[isfile] = 1 cdef void _select_uncontaminated(self, np.uint32_t ifile, BoolArrayCollection mask, np.uint8_t[:] out, BoolArrayCollection mask2 = None): # Fill mask at indices that are owned by this file and no other. cdef ewah_bool_array *ewah_keys = ( self.ewah_keys)[ifile] cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] cdef ewah_bool_array ewah_mask cdef ewah_bool_array *ewah_mask1 cdef ewah_bool_array *ewah_mask2 cdef ewah_bool_array ewah_slct cdef ewah_bool_array ewah_coar cdef np.uint64_t iset # Merge masks if necessary if mask2 is None: ewah_mask = ( mask.ewah_keys)[0] else: ewah_mask1 = mask.ewah_keys ewah_mask2 = mask2.ewah_keys ewah_mask1[0].logicalor(ewah_mask2[0],ewah_mask) # Get coarse cells owned by this file ewah_keys[0].logicalxor(ewah_refn[0],ewah_coar) ewah_coar.logicaland(ewah_mask,ewah_slct) # Set array elements cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_slct.begin()) cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_slct.end()) while iter_set[0] != iter_end[0]: iset = dereference(iter_set[0]) out[iset] = 1 preincrement(iter_set[0]) cdef bytes _dumps(self, np.uint32_t ifile): # TODO: write word size cdef sstream ss cdef ewah_bool_array *ewah_keys = ( self.ewah_keys)[ifile] cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] cdef ewah_map *ewah_coll = ( self.ewah_coll)[ifile] cdef ewahmap_it it_map cdef np.uint64_t nrefn, mi1 cdef ewah_bool_array mi1_ewah # Write mi1 ewah & refinement ewah ewah_keys[0].write(ss,1) ewah_refn[0].write(ss,1) # Number of refined bool arrays nrefn = (ewah_refn[0].numberOfOnes()) ss.write( &nrefn, sizeof(nrefn)) # Loop over refined bool arrays it_map = ewah_coll[0].begin() while it_map != ewah_coll[0].end(): mi1 = dereference(it_map).first mi1_ewah = dereference(it_map).second ss.write( &mi1, sizeof(mi1)) mi1_ewah.write(ss,1) preincrement(it_map) # Return type cast python bytes string return ss.str() cdef bint _loads(self, np.uint32_t ifile, bytes s): # TODO: write word size cdef sstream ss cdef ewah_bool_array *ewah_keys = ( self.ewah_keys)[ifile] cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] cdef ewah_map *ewah_coll = ( self.ewah_coll)[ifile] cdef np.uint64_t nrefn, mi1 nrefn = mi1 = 0 # Write string to string stream if len(s) == 0: return 1 ss.write(s, len(s)) # Read keys and refinement arrays ewah_keys[0].read(ss,1) if ss.eof(): return 1 ewah_refn[0].read(ss,1) # Read and check number of refined cells ss.read( (&nrefn), sizeof(nrefn)) if nrefn != ewah_refn[0].numberOfOnes(): raise Exception("Error in read. File indicates {} refinements, but bool array has {}.".format(nrefn,ewah_refn[0].numberOfOnes())) # Loop over refined cells for _ in range(nrefn): ss.read( (&mi1), sizeof(mi1)) if ss.eof(): return 1 ewah_coll[0][mi1].read(ss,1) # or... #mi1_ewah.read(ss,1) #ewah_coll[0][mi1].swap(mi1_ewah) return 1 cdef bint _check(self): cdef np.uint32_t ifile cdef ewah_bool_array *ewah_keys cdef ewah_bool_array *ewah_refn cdef ewah_bool_array tmp1, tmp2 cdef np.uint64_t nchk cdef str msg # Check individual files for ifile in range(self.nfiles): ewah_keys = ( self.ewah_keys)[ifile] ewah_refn = ( self.ewah_refn)[ifile] # Check that there are not any refn that are not keys ewah_keys[0].logicalxor(ewah_refn[0], tmp1) ewah_refn[0].logicaland(tmp1, tmp2) nchk = tmp2.numberOfOnes() if nchk > 0: msg = "File {}: There are {} refined cells that are not set on coarse level.".format(ifile,nchk) print(msg) return 0 # raise Exception(msg) return 1 def check(self): return self._check() def __dealloc__(self): for ifile in range(self.nfiles): del self.ewah_keys[ifile] del self.ewah_refn[ifile] del self.ewah_coll[ifile] def print_info(self, ifile, prefix=''): print("{}{: 8d} coarse, {: 8d} refined, {: 8d} total".format( prefix, self._count_coarse(ifile), self._count_refined(ifile), self._count_total(ifile))) cdef class BoolArrayCollection: def __cinit__(self): self.ewah_keys = new ewah_bool_array() self.ewah_refn = new ewah_bool_array() self.ewah_coar = new ewah_bool_array() self.ewah_coll = new ewah_map() cdef void _reset(self): self.ewah_keys[0].reset() self.ewah_refn[0].reset() self.ewah_coar[0].reset() self.ewah_coll[0].clear() cdef int _richcmp(self, BoolArrayCollection solf, int op) except -1: cdef ewah_bool_array *arr1 cdef ewah_bool_array *arr2 cdef ewahmap *map1 cdef ewahmap *map2 cdef ewahmap_it it_map1, it_map2 # == if op == 2: # Keys arr1 = self.ewah_keys arr2 = solf.ewah_keys if arr1[0] != arr2[0]: return 0 # Refn arr1 = self.ewah_refn arr2 = solf.ewah_refn if arr1[0] != arr2[0]: return 0 # Map map1 = self.ewah_coll map2 = solf.ewah_coll it_map1 = map1[0].begin() while (it_map1 != map1[0].end()): it_map2 = map2[0].find(dereference(it_map1).first) if it_map2 == map2[0].end(): return 0 if dereference(it_map1).second != dereference(it_map2).second: return 0 preincrement(it_map1) it_map2 =map2[0].begin() while (it_map2 != map2[0].end()): it_map1 = map1[0].find(dereference(it_map2).first) if it_map1 == map1[0].end(): return 0 if dereference(it_map2).second != dereference(it_map1).second: return 0 preincrement(it_map2) # Match return 1 # != elif op == 3: if self._richcmp(solf, 2) == 1: return 0 return 1 else: return -1 # options = ['<','<=','==','!=','>','>='] # raise NotImplementedError("Operator {} is not yet implemented.".format(options[op])) def __richcmp__(BoolArrayCollection self, BoolArrayCollection solf, int op): if self._richcmp(solf, op) == 1: return True else: return False cdef void _set(self, np.uint64_t i1, np.uint64_t i2 = FLAG): cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll ewah_keys[0].set(i1) # Note the 0 here, for dereferencing if i2 != FLAG: ewah_refn[0].set(i1) ewah_coll[0][i1].set(i2) def set(self, i1, i2 = FLAG): self._set(i1, i2) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) def set_from(self, np.uint64_t[:] ids): cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef np.uint64_t i cdef np.uint64_t last = 0 for i in range(ids.shape[0]): if ids[i] < last: raise RuntimeError self._set(ids[i]) last = ids[i] print("Set from %s array and ended up with %s bytes" % ( ids.size, ewah_keys[0].sizeInBytes())) cdef void _set_coarse(self, np.uint64_t i1): cdef ewah_bool_array *ewah_keys = self.ewah_keys ewah_keys[0].set(i1) def set_coarse(self, i1): return self._set_coarse(i1) cdef void _set_refined(self, np.uint64_t i1, np.uint64_t i2): cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll # Note the 0 here, for dereferencing ewah_refn[0].set(i1) ewah_coll[0][i1].set(i2) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_coarse_array(self, np.uint8_t[:] arr): cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef np.uint64_t i1 for i1 in range(arr.shape[0]): if arr[i1] == 1: ewah_keys[0].set(i1) # self._set_coarse(i1) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_refined_array(self, np.uint64_t i1, np.uint8_t[:] arr): cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll cdef np.uint64_t i2 for i2 in range(arr.shape[0]): if arr[i2] == 1: ewah_refn[0].set(i1) ewah_coll[0][i1].set(i2) # self._set_refined(i1, i2) def set_refined(self, i1, i2): return self._set_refined(i1, i2) cdef void _set_map(self, np.uint64_t i1, np.uint64_t i2): cdef ewah_map *ewah_coll = self.ewah_coll ewah_coll[0][i1].set(i2) def set_map(self, i1, i2): self._set_map(i1, i2) cdef void _set_refn(self, np.uint64_t i1): cdef ewah_bool_array *ewah_refn = self.ewah_refn ewah_refn[0].set(i1) def set_refn(self, i1): self._set_refn(i1) cdef bint _get(self, np.uint64_t i1, np.uint64_t i2 = FLAG): cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll # Note the 0 here, for dereferencing if (ewah_keys[0].get(i1) == 0): return 0 if (ewah_refn[0].get(i1) == 0) or (i2 == FLAG): return 1 return ewah_coll[0][i1].get(i2) def get(self, i1, i2 = FLAG): return self._get(i1, i2) cdef bint _get_coarse(self, np.uint64_t i1): cdef ewah_bool_array *ewah_keys = self.ewah_keys return ewah_keys[0].get(i1) def get_coarse(self, i1): return self._get_coarse(i1) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) cdef void _get_coarse_array(self, np.uint64_t imax, np.uint8_t[:] arr) except *: cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_keys[0].begin()) cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_keys[0].end()) cdef np.uint64_t iset while iter_set[0] != iter_end[0]: iset = dereference(iter_set[0]) if iset >= imax: raise IndexError("Index {} exceedes max {}.".format(iset, imax)) arr[iset] = 1 preincrement(iter_set[0]) def get_coarse_array(self, imax, arr): return self._get_coarse_array(imax, arr) cdef bint _contains(self, np.uint64_t i): cdef ewah_bool_array *ewah_keys = self.ewah_keys return ewah_keys[0].get(i) def contains(self, np.uint64_t i): return self._contains(i) cdef bint _isref(self, np.uint64_t i): cdef ewah_bool_array *ewah_refn = self.ewah_refn return ewah_refn[0].get(i) def isref(self, np.uint64_t i): return self._isref(i) cdef void _ewah_coarse(self): cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewah_bool_array *ewah_coar = self.ewah_coar ewah_coar[0].reset() ewah_keys[0].logicalxor(ewah_refn[0],ewah_coar[0]) return def ewah_coarse(self): return self._ewah_coarse() cdef np.uint64_t _count_total(self): cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef np.uint64_t out = ewah_keys.numberOfOnes() return out def count_total(self): return self._count_total() cdef np.uint64_t _count_refined(self): cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef np.uint64_t out = ewah_refn.numberOfOnes() return out def count_refined(self): return self._count_refined() cdef np.uint64_t _count_coarse(self): self._ewah_coarse() cdef ewah_bool_array *ewah_coar = self.ewah_coar cdef np.uint64_t out = ewah_coar.numberOfOnes() return out def count_coarse(self): return self._count_coarse() cdef void _logicalor(self, BoolArrayCollection solf, BoolArrayCollection out): cdef ewah_bool_array *ewah_keys1 = self.ewah_keys cdef ewah_bool_array *ewah_refn1 = self.ewah_refn cdef ewahmap *ewah_coll1 = self.ewah_coll cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewah_bool_array *ewah_keys3 = out.ewah_keys cdef ewah_bool_array *ewah_refn3 = out.ewah_refn cdef ewahmap *ewah_coll3 = out.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array mi1_ewah1, mi1_ewah2 cdef np.uint64_t mi1 # Keys ewah_keys1[0].logicalor(ewah_keys2[0], ewah_keys3[0]) # Refined ewah_refn1[0].logicalor(ewah_refn2[0], ewah_refn3[0]) # Map it_map1 = ewah_coll1[0].begin() while it_map1 != ewah_coll1[0].end(): mi1 = dereference(it_map1).first mi1_ewah1 = dereference(it_map1).second ewah_coll3[0][mi1] = mi1_ewah1 preincrement(it_map1) it_map2 = ewah_coll2[0].begin() while it_map2 != ewah_coll2[0].end(): mi1 = dereference(it_map2).first mi1_ewah2 = dereference(it_map2).second it_map1 = ewah_coll1[0].find(mi1) if it_map1 != ewah_coll1[0].end(): mi1_ewah1 = dereference(it_map1).second mi1_ewah1.logicalor(mi1_ewah2, ewah_coll3[0][mi1]) else: ewah_coll3[0][mi1] = mi1_ewah2 preincrement(it_map2) cdef void _append(self, BoolArrayCollection solf): cdef ewah_bool_array *ewah_keys1 = self.ewah_keys cdef ewah_bool_array *ewah_refn1 = self.ewah_refn cdef ewahmap *ewah_coll1 = self.ewah_coll cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array swap, mi1_ewah1, mi1_ewah2 cdef np.uint64_t mi1 # Keys ewah_keys1[0].logicalor(ewah_keys2[0], swap) ewah_keys1[0].swap(swap) # Refined ewah_refn1[0].logicalor(ewah_refn2[0], swap) ewah_refn1[0].swap(swap) # Map it_map2 = ewah_coll2[0].begin() while it_map2 != ewah_coll2[0].end(): mi1 = dereference(it_map2).first mi1_ewah2 = dereference(it_map2).second it_map1 = ewah_coll1[0].find(mi1) if it_map1 == ewah_coll1[0].end(): ewah_coll1[0][mi1] = mi1_ewah2 else: mi1_ewah1 = dereference(it_map1).second mi1_ewah1.logicalor(mi1_ewah2, swap) mi1_ewah1.swap(swap) preincrement(it_map2) def append(self, solf): return self._append(solf) cdef bint _intersects(self, BoolArrayCollection solf): cdef ewah_bool_array *ewah_keys1 = self.ewah_keys cdef ewah_bool_array *ewah_refn1 = self.ewah_refn cdef ewahmap *ewah_coll1 = self.ewah_coll cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array mi1_ewah1, mi1_ewah2 cdef np.uint64_t mi1 cdef ewah_bool_array ewah_coar1, ewah_coar2 # No intersection if ewah_keys1[0].intersects(ewah_keys2[0]) == 0: return 0 # Intersection at coarse level ewah_keys1[0].logicalxor(ewah_refn1[0],ewah_coar1) ewah_keys2[0].logicalxor(ewah_refn2[0],ewah_coar2) if ewah_coar1.intersects(ewah_keys2[0]) == 1: return 1 if ewah_coar2.intersects(ewah_keys1[0]) == 1: return 1 # Intersection at refined level if ewah_refn1[0].intersects(ewah_refn2[0]) == 1: it_map1 = ewah_coll1[0].begin() while (it_map1 != ewah_coll1[0].end()): mi1 = dereference(it_map1).first it_map2 = ewah_coll2[0].find(mi1) if it_map2 != ewah_coll2[0].end(): mi1_ewah1 = dereference(it_map1).second mi1_ewah2 = dereference(it_map2).second if mi1_ewah1.intersects(mi1_ewah2): return 1 preincrement(it_map1) return 0 cdef void _logicalxor(self, BoolArrayCollection solf, BoolArrayCollection out): cdef ewah_bool_array *ewah_keys1 = self.ewah_keys cdef ewah_bool_array *ewah_refn1 = self.ewah_refn cdef ewah_map *ewah_coll1 = self.ewah_coll cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewah_bool_array *ewah_keys_out = out.ewah_keys cdef ewah_bool_array *ewah_refn_out = out.ewah_refn cdef ewah_map *ewah_coll_out = out.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array mi1_ewah1, mi1_ewah2, swap cdef np.uint64_t mi1 # Keys ewah_keys1[0].logicalxor(ewah_keys2[0],ewah_keys_out[0]) # Refn ewah_refn1[0].logicalxor(ewah_refn2[0],ewah_refn_out[0]) # Coll it_map1 = ewah_coll1[0].begin() while (it_map1 != ewah_coll1[0].end()): mi1 = dereference(it_map1).first mi1_ewah1 = dereference(it_map1).second it_map2 = ewah_coll2[0].find(mi1) if it_map2 == ewah_coll2[0].end(): ewah_coll_out[0][mi1] = mi1_ewah1 else: mi1_ewah2 = dereference(it_map2).second mi1_ewah1.logicalxor(mi1_ewah2, swap) ewah_coll_out[0][mi1] = swap preincrement(it_map1) it_map2 = ewah_coll2[0].begin() while (it_map2 != ewah_coll2[0].end()): mi1 = dereference(it_map2).first mi1_ewah2 = dereference(it_map2).second it_map1 = ewah_coll1[0].find(mi1) if it_map1 == ewah_coll1[0].end(): ewah_coll_out[0][mi1] = mi1_ewah2 preincrement(it_map2) def logicalxor(self, solf, out): return self._logicalxor(solf, out) cdef void _logicaland(self, BoolArrayCollection solf, BoolArrayCollection out): cdef ewah_bool_array *ewah_keys1 = self.ewah_keys cdef ewah_bool_array *ewah_refn1 = self.ewah_refn cdef ewah_map *ewah_coll1 = self.ewah_coll cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewah_bool_array *ewah_keys_out = out.ewah_keys cdef ewah_bool_array *ewah_refn_out = out.ewah_refn cdef ewah_map *ewah_coll_out = out.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array mi1_ewah1, mi1_ewah2, swap cdef np.uint64_t mi1 # Keys ewah_keys1[0].logicaland(ewah_keys2[0],ewah_keys_out[0]) # Refn ewah_refn1[0].logicaland(ewah_refn2[0],ewah_refn_out[0]) # Coll if ewah_refn_out[0].numberOfOnes() > 0: it_map1 = ewah_coll1[0].begin() while (it_map1 != ewah_coll1[0].end()): mi1 = dereference(it_map1).first mi1_ewah1 = dereference(it_map1).second it_map2 = ewah_coll2[0].find(mi1) if it_map2 != ewah_coll2[0].end(): mi1_ewah2 = dereference(it_map2).second mi1_ewah1.logicaland(mi1_ewah2, swap) ewah_coll_out[0][mi1] = swap preincrement(it_map1) def logicaland(self, solf, out): return self._logicaland(solf, out) cdef void _select_contaminated(self, BoolArrayCollection mask, np.uint8_t[:] out, BoolArrayCollection mask2 = None): cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewah_bool_array ewah_mask cdef ewah_bool_array *ewah_mask1 cdef ewah_bool_array *ewah_mask2 if mask2 is None: ewah_mask = ( mask.ewah_keys)[0] else: ewah_mask1 = mask.ewah_keys ewah_mask2 = mask2.ewah_keys ewah_mask1[0].logicalor(ewah_mask2[0],ewah_mask) cdef ewah_bool_array ewah_slct ewah_refn[0].logicaland(ewah_mask,ewah_slct) cdef np.uint64_t iset cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_slct.begin()) cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_slct.end()) while iter_set[0] != iter_end[0]: iset = dereference(iter_set[0]) out[iset] = 1 preincrement(iter_set[0]) cdef void _select_uncontaminated(self, BoolArrayCollection mask, np.uint8_t[:] out, BoolArrayCollection mask2 = None): cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewah_bool_array ewah_mask cdef ewah_bool_array *ewah_mask1 cdef ewah_bool_array *ewah_mask2 if mask2 is None: ewah_mask = ( mask.ewah_keys)[0] else: ewah_mask1 = mask.ewah_keys ewah_mask2 = mask2.ewah_keys ewah_mask1[0].logicalor(ewah_mask2[0],ewah_mask) cdef ewah_bool_array ewah_slct cdef ewah_bool_array ewah_coar ewah_keys[0].logicalxor(ewah_refn[0],ewah_coar) ewah_coar.logicaland(ewah_mask,ewah_slct) cdef np.uint64_t iset cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_slct.begin()) cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_slct.end()) while iter_set[0] != iter_end[0]: iset = dereference(iter_set[0]) out[iset] = 1 preincrement(iter_set[0]) cdef void _get_ghost_zones(self, int ngz, int order1, int order2, bint periodicity[3], BoolArrayCollection out_ewah, bint coarse_ghosts = 0): cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewahmap *ewah_coll = self.ewah_coll cdef ewah_bool_iterator *iter_set1 = new ewah_bool_iterator(ewah_keys.begin()) cdef ewah_bool_iterator *iter_end1 = new ewah_bool_iterator(ewah_keys.end()) cdef ewah_bool_iterator *iter_set2 cdef ewah_bool_iterator *iter_end2 cdef np.uint64_t max_index1 = (1 << order1) cdef np.uint64_t max_index2 = (1 << order2) cdef np.uint64_t nele1 = (max_index1**3) cdef np.uint64_t nele2 = (max_index2**3) cdef BoolArrayCollectionUncompressed temp_bool = BoolArrayCollectionUncompressed(nele1, nele2) cdef BoolArrayCollectionUncompressed out_bool = BoolArrayCollectionUncompressed(nele1, nele2) cdef np.uint64_t mi1, mi2, mi1_n, mi2_n cdef np.uint32_t ntot, i cdef void* pointers[7] pointers[0] = malloc( sizeof(np.int32_t) * (2*ngz+1)*3) pointers[1] = malloc( sizeof(np.uint64_t) * (2*ngz+1)*3) pointers[2] = malloc( sizeof(np.uint64_t) * (2*ngz+1)*3) pointers[3] = malloc( sizeof(np.uint64_t) * (2*ngz+1)**3) pointers[4] = malloc( sizeof(np.uint64_t) * (2*ngz+1)**3) pointers[5] = malloc( sizeof(np.uint8_t) * nele1) pointers[6] = malloc( sizeof(np.uint8_t) * nele2) cdef np.uint32_t[:,:] index = pointers[0] cdef np.uint64_t[:,:] ind1_n = pointers[1] cdef np.uint64_t[:,:] ind2_n = pointers[2] cdef np.uint64_t[:] neighbor_list1 = pointers[3] cdef np.uint64_t[:] neighbor_list2 = pointers[4] cdef np.uint8_t *bool_keys = pointers[5] cdef np.uint8_t *bool_coll = pointers[6] cdef SparseUnorderedRefinedBitmaskSet list_coll = SparseUnorderedRefinedBitmaskSet() for i in range(nele1): bool_keys[i] = 0 while iter_set1[0] != iter_end1[0]: mi1 = dereference(iter_set1[0]) if (coarse_ghosts == 1) or (ewah_refn[0].get(mi1) == 0): # Coarse neighbors ntot = morton_neighbors_coarse(mi1, max_index1, periodicity, ngz, index, ind1_n, neighbor_list1) for i in range(ntot): mi1_n = neighbor_list1[i] if ewah_keys[0].get(mi1_n) == 0: bool_keys[mi1_n] = 1 else: for i in range(nele2): bool_coll[i] = 0 # Refined neighbors iter_set2 = new ewah_bool_iterator(ewah_coll[0][mi1].begin()) iter_end2 = new ewah_bool_iterator(ewah_coll[0][mi1].end()) while iter_set2[0] != iter_end2[0]: mi2 = dereference(iter_set2[0]) ntot = morton_neighbors_refined(mi1, mi2, max_index1, max_index2, periodicity, ngz, index, ind1_n, ind2_n, neighbor_list1, neighbor_list2) for i in range(ntot): mi1_n = neighbor_list1[i] mi2_n = neighbor_list2[i] if mi1_n == mi1: if ewah_coll[0][mi1].get(mi2_n) == 0: bool_keys[mi1_n] = 1 bool_coll[mi2_n] = 1 else: if ewah_refn[0].get(mi1_n) == 1: if ewah_coll[0][mi1_n].get(mi2_n) == 0: bool_keys[mi1_n] = 1 list_coll._set(mi1_n, mi2_n) else: if ewah_keys[0].get(mi1_n) == 0: bool_keys[mi1_n] = 1 preincrement(iter_set2[0]) # Add to running list temp_bool._set_refined_array_ptr(mi1, bool_coll) preincrement(iter_set1[0]) # Set keys out_bool._set_coarse_array_ptr(bool_keys) list_coll._fill_bool(out_bool) out_bool._append(temp_bool) out_bool._compress(out_ewah) # Free things for i in range(7): free(pointers[i]) cdef bytes _dumps(self): # TODO: write word size cdef sstream ss cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewahmap *ewah_coll = self.ewah_coll cdef ewahmap_it it_map cdef np.uint64_t nrefn, mi1 cdef ewah_bool_array mi1_ewah # Write mi1 ewah & refinement ewah ewah_keys[0].write(ss,1) ewah_refn[0].write(ss,1) # Number of refined bool arrays nrefn = (ewah_refn[0].numberOfOnes()) ss.write( &nrefn, sizeof(nrefn)) # Loop over refined bool arrays it_map = ewah_coll[0].begin() while it_map != ewah_coll[0].end(): mi1 = dereference(it_map).first mi1_ewah = dereference(it_map).second ss.write( &mi1, sizeof(mi1)) mi1_ewah.write(ss,1) preincrement(it_map) # Return type cast python bytes string return ss.str() def dumps(self): return self._dumps() cdef bint _loads(self, bytes s): # TODO: write word size cdef sstream ss cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewahmap *ewah_coll = self.ewah_coll cdef np.uint64_t nrefn, mi1 nrefn = mi1 = 0 # Write string to string stream if len(s) == 0: return 1 ss.write(s, len(s)) # Read keys and refinement arrays if ss.eof(): return 1 ewah_keys[0].read(ss,1) if ss.eof(): return 1 ewah_refn[0].read(ss,1) # Read and check number of refined cells if ss.eof(): return 1 ss.read( (&nrefn), sizeof(nrefn)) if nrefn != ewah_refn[0].numberOfOnes(): raise Exception("Error in read. File indicates {} refinements, but bool array has {}.".format(nrefn,ewah_refn[0].numberOfOnes())) # Loop over refined cells for _ in range(nrefn): ss.read( (&mi1), sizeof(mi1)) if ss.eof(): # A brief note about why we do this! # In previous versions of the EWAH code, which were more # susceptible to issues with differences in sizes of size_t # etc, the ewah_coll.read would use instance variables as # destinations; these were initialized to zero. In recent # versions, it uses (uninitialized) temporary variables. We # were passing in streams that were already at EOF - so the # uninitialized memory would not be written to, and it would # retain the previous values, which would invariably be really # really big! So we do a check for EOF here to make sure we're # not up to no good. break ewah_coll[0][mi1].read(ss,1) # or... #mi1_ewah.read(ss,1) #ewah_coll[0][mi1].swap(mi1_ewah) return 1 def loads(self, s): return self._loads(s) def save(self, fname): cdef bytes serial_BAC f = open(fname,'wb') serial_BAC = self._dumps() f.write(struct.pack('Q',len(serial_BAC))) f.write(serial_BAC) f.close() def load(self, fname): cdef np.uint64_t size_serial cdef bint flag_read f = open(fname,'rb') size_serial, = struct.unpack('Q',f.read(struct.calcsize('Q'))) flag_read = self._loads(f.read(size_serial)) f.close() return flag_read cdef bint _check(self): cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewah_bool_array tmp1, tmp2 cdef np.uint64_t nchk cdef str msg # Check that there are not any refn that are not keys ewah_keys[0].logicalxor(ewah_refn[0], tmp1) ewah_refn[0].logicaland(tmp1, tmp2) nchk = tmp2.numberOfOnes() if nchk > 0: msg = "There are {} refined cells that are not set on coarse level.".format(nchk) print(msg) return 0 # raise Exception(msg) return 1 def __dealloc__(self): cdef ewah_bool_array *ewah_keys = self.ewah_keys cdef ewah_bool_array *ewah_refn = self.ewah_refn cdef ewah_bool_array *ewah_coar = self.ewah_coar cdef ewah_map *ewah_coll = self.ewah_coll del ewah_keys del ewah_refn del ewah_coar del ewah_coll def print_info(self, prefix=''): print("{}{: 8d} coarse, {: 8d} refined, {: 8d} total".format(prefix, self._count_coarse(), self._count_refined(), self._count_total())) cdef class BoolArrayCollectionUncompressed: def __cinit__(self, np.uint64_t nele1, np.uint64_t nele2): self.nele1 = nele1 self.nele2 = nele2 self.ewah_coll = new ewah_map() cdef np.uint64_t i self.ewah_keys = malloc(sizeof(bitarrtype)*nele1) self.ewah_refn = malloc(sizeof(bitarrtype)*nele1) for i in range(nele1): self.ewah_keys[i] = 0 self.ewah_refn[i] = 0 def reset(self): self.__dealloc__() self.__init__(self.nele1,self.nele2) cdef void _compress(self, BoolArrayCollection solf): cdef np.uint64_t i cdef ewah_bool_array *ewah_keys = solf.ewah_keys cdef ewah_bool_array *ewah_refn = solf.ewah_refn cdef bitarrtype *bool_keys = self.ewah_keys cdef bitarrtype *bool_refn = self.ewah_refn for i in range(self.nele1): if bool_keys[i] == 1: ewah_keys[0].set(i) if bool_refn[i] == 1: ewah_refn[0].set(i) cdef ewah_map *ewah_coll1 = self.ewah_coll cdef ewah_map *ewah_coll2 = solf.ewah_coll ewah_coll2[0] = ewah_coll1[0] cdef void _set(self, np.uint64_t i1, np.uint64_t i2 = FLAG): cdef bitarrtype *ewah_keys = self.ewah_keys cdef bitarrtype *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll ewah_keys[i1] = 1 # Note the 0 here, for dereferencing if i2 != FLAG: ewah_refn[i1] = 1 ewah_coll[0][i1].set(i2) cdef void _set_coarse(self, np.uint64_t i1): cdef bitarrtype *ewah_keys = self.ewah_keys ewah_keys[i1] = 1 cdef void _set_refined(self, np.uint64_t i1, np.uint64_t i2): cdef bitarrtype *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll # Note the 0 here, for dereferencing ewah_refn[i1] = 1 ewah_coll[0][i1].set(i2) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_coarse_array(self, np.uint8_t[:] arr): cdef bitarrtype *ewah_keys = self.ewah_keys cdef np.uint64_t i1 for i1 in range(arr.shape[0]): if arr[i1] == 1: ewah_keys[i1] = 1 @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_coarse_array_ptr(self, np.uint8_t *arr): # TODO: memcpy? cdef bitarrtype *ewah_keys = self.ewah_keys cdef np.uint64_t i1 for i1 in range(self.nele1): if arr[i1] == 1: ewah_keys[i1] = 1 @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_refined_array(self, np.uint64_t i1, np.uint8_t[:] arr): cdef bitarrtype *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll cdef np.uint64_t i2 for i2 in range(arr.shape[0]): if arr[i2] == 1: ewah_refn[i1] = 1 ewah_coll[0][i1].set(i2) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_refined_array_ptr(self, np.uint64_t i1, np.uint8_t *arr): cdef bitarrtype *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll cdef np.uint64_t i2 cdef ewah_bool_array *barr = &ewah_coll[0][i1] for i2 in range(self.nele2): if arr[i2] == 1: ewah_refn[i1] = 1 barr.set(i2) cdef void _set_map(self, np.uint64_t i1, np.uint64_t i2): cdef ewah_map *ewah_coll = self.ewah_coll ewah_coll[0][i1].set(i2) cdef void _set_refn(self, np.uint64_t i1): cdef bitarrtype *ewah_refn = self.ewah_refn ewah_refn[i1] = 1 cdef bint _get(self, np.uint64_t i1, np.uint64_t i2 = FLAG): cdef bitarrtype *ewah_keys = self.ewah_keys cdef bitarrtype *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll # Note the 0 here, for dereferencing if ewah_keys[i1] == 0: return 0 if (ewah_refn[i1] == 0) or (i2 == FLAG): return 1 return ewah_coll[0][i1].get(i2) cdef bint _get_coarse(self, np.uint64_t i1): cdef bitarrtype *ewah_keys = self.ewah_keys return ewah_keys[i1] # if (ewah_keys[i1] == 0): return 0 # return 1 cdef bint _isref(self, np.uint64_t i): cdef bitarrtype *ewah_refn = self.ewah_refn return ewah_refn[i] cdef np.uint64_t _count_total(self): cdef bitarrtype *ewah_keys = self.ewah_keys cdef np.uint64_t i cdef np.uint64_t out = 0 for i in range(self.nele1): out += ewah_keys[i] return out cdef np.uint64_t _count_refined(self): cdef bitarrtype *ewah_refn = self.ewah_refn cdef np.uint64_t i cdef np.uint64_t out = 0 for i in range(self.nele1): out += ewah_refn[i] return out cdef void _append(self, BoolArrayCollectionUncompressed solf): cdef bitarrtype *ewah_keys1 = self.ewah_keys cdef bitarrtype *ewah_refn1 = self.ewah_refn cdef bitarrtype *ewah_keys2 = solf.ewah_keys cdef bitarrtype *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll1 = self.ewah_coll cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array swap, mi1_ewah1, mi1_ewah2 cdef np.uint64_t mi1 # TODO: Check if nele1 is equal? # Keys for mi1 in range(solf.nele1): if ewah_keys2[mi1] == 1: ewah_keys1[mi1] = 1 # Refined for mi1 in range(solf.nele1): if ewah_refn2[mi1] == 1: ewah_refn1[mi1] = 1 # Map it_map2 = ewah_coll2[0].begin() while it_map2 != ewah_coll2[0].end(): mi1 = dereference(it_map2).first mi1_ewah2 = dereference(it_map2).second it_map1 = ewah_coll1[0].find(mi1) if it_map1 == ewah_coll1[0].end(): ewah_coll1[0][mi1] = mi1_ewah2 else: mi1_ewah1 = dereference(it_map1).second mi1_ewah1.logicalor(mi1_ewah2, swap) mi1_ewah1.swap(swap) preincrement(it_map2) cdef bint _intersects(self, BoolArrayCollectionUncompressed solf): cdef bitarrtype *ewah_keys1 = self.ewah_keys cdef bitarrtype *ewah_refn1 = self.ewah_refn cdef bitarrtype *ewah_keys2 = solf.ewah_keys cdef bitarrtype *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll1 = self.ewah_coll cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array mi1_ewah1, mi1_ewah2 cdef np.uint64_t mi1 # No intersection for mi1 in range(self.nele1): if (ewah_keys1[mi1] == 1) and (ewah_keys2[mi1] == 1): break if (mi1 < self.nele1): return 0 mi1 = self.nele1 # This is to get rid of a warning # Intersection at refined level for mi1 in range(self.nele1): if (ewah_refn1[mi1] == 1) and (ewah_refn2[mi1] == 1): it_map1 = ewah_coll1[0].begin() while (it_map1 != ewah_coll1[0].end()): mi1 = dereference(it_map1).first it_map2 = ewah_coll2[0].find(mi1) if it_map2 != ewah_coll2[0].end(): mi1_ewah1 = dereference(it_map1).second mi1_ewah2 = dereference(it_map2).second if mi1_ewah1.intersects(mi1_ewah2): return 1 preincrement(it_map1) break # Intersection at coarse level or refined inside coarse if mi1 == self.nele1: return 1 return 0 def __dealloc__(self): cdef bitarrtype *ewah_keys = self.ewah_keys cdef bitarrtype *ewah_refn = self.ewah_refn free(ewah_keys) free(ewah_refn) cdef ewah_map *ewah_coll = self.ewah_coll del ewah_coll def print_info(self, prefix=''): cdef np.uint64_t nrefn = self._count_refined() cdef np.uint64_t nkeys = self._count_total() print("{}{: 8d} coarse, {: 8d} refined, {: 8d} total".format(prefix, nkeys - nrefn, nrefn, nkeys)) # Vector version cdef class SparseUnorderedBitmaskVector: def __cinit__(self): self.total = 0 cdef void _set(self, np.uint64_t ind): self.entries.push_back(ind) self.total += 1 def set(self, ind): self._set(ind) cdef void _fill(self, np.uint8_t[:] mask): cdef np.uint64_t i, ind for i in range(self.entries.size()): ind = self.entries[i] mask[ind] = 1 cdef void _fill_ewah(self, BoolArrayCollection mm): self._remove_duplicates() cdef np.uint64_t i, ind for i in range(self.entries.size()): ind = self.entries[i] mm._set_coarse(ind) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm): self._remove_duplicates() cdef np.uint64_t i, ind for i in range(self.entries.size()): ind = self.entries[i] mm._set_coarse(ind) cdef void _reset(self): self.entries.erase(self.entries.begin(), self.entries.end()) self.total = 0 cdef to_array(self): self._remove_duplicates() cdef np.ndarray[np.uint64_t, ndim=1] rv rv = np.empty(self.entries.size(), dtype='uint64') for i in range(self.entries.size()): rv[i] = self.entries[i] return rv cdef void _remove_duplicates(self): cdef vector[np.uint64_t].iterator last sort(self.entries.begin(), self.entries.end()) last = unique(self.entries.begin(), self.entries.end()) self.entries.erase(last, self.entries.end()) cdef void _prune(self): if self.total > MAX_VECTOR_SIZE: self._remove_duplicates() self.total = 0 def __dealloc__(self): self.entries.clear() # Set version cdef class SparseUnorderedBitmaskSet: cdef void _set(self, np.uint64_t ind): self.entries.insert(ind) def set(self, ind): self._set(ind) cdef void _fill(self, np.uint8_t[:] mask): for it in self.entries: mask[it] = 1 cdef void _fill_ewah(self, BoolArrayCollection mm): for it in self.entries: mm._set_coarse(it) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm): for it in self.entries: mm._set_coarse(it) cdef void _reset(self): self.entries.clear() cdef to_array(self): cdef np.uint64_t ind cdef np.ndarray[np.uint64_t, ndim=1] rv cdef cset[np.uint64_t].iterator it rv = np.empty(self.entries.size(), dtype='uint64') it = self.entries.begin() i = 0 while it != self.entries.end(): ind = dereference(it) rv[i] = ind preincrement(it) i += 1 return rv def __dealloc__(self): self.entries.clear() # vector version cdef class SparseUnorderedRefinedBitmaskVector: def __cinit__(self): self.total = 0 cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2): cdef ind_pair ind ind.first = ind1 ind.second = ind2 self.entries.push_back(ind) self.total += 1 def set(self, ind1, ind2): self._set(ind1, ind2) cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:] mask2): for it in self.entries: mask1[it.first] = mask2[it.second] = 1 cdef void _fill_ewah(self, BoolArrayCollection mm): self._remove_duplicates() for it in self.entries: mm._set_refined(it.first, it.second) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm): self._remove_duplicates() for it in self.entries: mm._set_refined(it.first, it.second) cdef void _reset(self): self.entries.erase(self.entries.begin(), self.entries.end()) self.total = 0 cdef to_array(self): cdef np.uint64_t i cdef np.ndarray[np.uint64_t, ndim=2] rv self._remove_duplicates() rv = np.empty((self.entries.size(),2),dtype='uint64') i = 0 for it in self.entries: rv[i,0] = it.first rv[i,1] = it.second i += 1 return rv cdef void _remove_duplicates(self): cdef vector[ind_pair].iterator last sort(self.entries.begin(), self.entries.end()) last = unique(self.entries.begin(), self.entries.end()) self.entries.erase(last, self.entries.end()) # http://stackoverflow.com/questions/16970982/find-unique-rows-in-numpy-array # cdef np.ndarray[np.uint64_t, ndim=2] rv # cdef np.ndarray[np.uint64_t, ndim=2] rv_uni # cdef np.uint64_t m # cdef vector[np.uint64_t].iterator last1 # cdef vector[np.uint64_t].iterator last2 # # cdef np.ndarray[np.uint64_t, ndim=1] _ # cdef vector[np.uint64_t] *entries1 = self.entries1 # cdef vector[np.uint64_t] *entries2 = self.entries2 # rv = np.empty((entries1[0].size(),2),dtype='uint64') # for i in range(entries1[0].size()): # rv[i,0] = entries1[0][i] # rv[i,1] = entries2[0][i] # rv_uni = np.unique(np.ascontiguousarray(rv).view(np.dtype((np.void, rv.dtype.itemsize * rv.shape[1])))).view(rv.dtype).reshape(-1,rv.shape[1]) # last1 = entries1[0].begin() + rv_uni.shape[0] # last2 = entries2[0].begin() + rv_uni.shape[0] # for m in range(rv_uni.shape[0]): # entries1[0][m] = rv_uni[m,0] # entries2[0][m] = rv_uni[m,1] # entries1[0].erase(last1, entries1[0].end()) # entries2[0].erase(last2, entries2[0].end()) cdef void _prune(self): if self.total > MAX_VECTOR_SIZE: self._remove_duplicates() self.total = 0 def __dealloc__(self): self.entries.clear() # Set version cdef class SparseUnorderedRefinedBitmaskSet: cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2): cdef ind_pair ind ind.first = ind1 ind.second = ind2 self.entries.insert(ind) def set(self, ind1, ind2): self._set(ind1, ind2) cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:] mask2): for p in self.entries: mask1[p.first] = mask2[p.second] = 1 cdef void _fill_ewah(self, BoolArrayCollection mm): for it in self.entries: mm._set_refined(it.first, it.second) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm): for it in self.entries: mm._set_refined(it.first, it.second) cdef void _reset(self): self.entries.clear() cdef to_array(self): cdef np.uint64_t i cdef np.ndarray[np.uint64_t, ndim=2] rv rv = np.empty((self.entries.size(),2),dtype='uint64') i = 0 for it in self.entries: rv[i,0] = it.first rv[i,1] = it.second i += 1 return rv def __dealloc__(self): self.entries.clear() ewah_bool_utils-1.2.2/ewah_bool_utils/morton_utils.pxd000066400000000000000000000072151466056343200233430ustar00rootroot00000000000000""" Helper functions to generate Morton indices """ cimport cython cimport numpy as np cdef extern from *: """ const int XSHIFT=2; const int YSHIFT=1; const int ZSHIFT=0; """ cdef int XSHIFT, YSHIFT, ZSHIFT @cython.cdivision(True) @cython.boundscheck(False) @cython.wraparound(False) cdef inline np.uint64_t compact_64bits_by2(np.uint64_t x): # Reversed magic x=x&(0x1249249249249249) x=(x|(x>>2))&(0x0649249249249249) x=(x|(x>>2))&(0x0199219243248649) x=(x|(x>>2))&(0x00786070C0E181C3) x=(x|(x>>4))&(0x0007E007C00F801F) x=(x|(x>>10))&(0x000001FFC00003FF) x=(x|(x>>20))&(0x00000000001FFFFF) return x #----------------------------------------------------------------------------- # 21 bits spread over 64 with 2 bits in between @cython.cdivision(True) @cython.boundscheck(False) @cython.wraparound(False) cdef inline np.uint64_t spread_64bits_by2(np.uint64_t x): # This magic comes from http://stackoverflow.com/questions/1024754/how-to-compute-a-3d-morton-number-interleave-the-bits-of-3-ints # Only reversible up to 2097151 # Select highest 21 bits (Required to be reversible to 21st bit) # x = ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---k jihg fedc ba98 7654 3210 x=(x&(0x00000000001FFFFF)) # x = ---- ---- ---- ---- ---- ---k jihg fedc ba-- ---- ---- ---- ---- --98 7654 3210 x=(x|(x<<20))&(0x000001FFC00003FF) # x = ---- ---- ---- -kji hgf- ---- ---- -edc ba-- ---- ---- 9876 5--- ---- ---4 3210 x=(x|(x<<10))&(0x0007E007C00F801F) # x = ---- ---- -kji h--- -gf- ---- -edc ---- ba-- ---- 987- ---6 5--- ---4 32-- --10 x=(x|(x<<4))&(0x00786070C0E181C3) # x = ---- ---k ji-- h--g --f- ---e d--c --b- -a-- --98 --7- -6-- 5--- -43- -2-- 1--0 x=(x|(x<<2))&(0x0199219243248649) # x = ---- -kj- -i-- h--g --f- -e-- d--c --b- -a-- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 x=(x|(x<<2))&(0x0649249249249249) # x = ---k --j- -i-- h--g --f- -e-- d--c --b- -a-- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 x=(x|(x<<2))&(0x1249249249249249) return x @cython.cdivision(True) cdef inline np.uint64_t encode_morton_64bit(np.uint64_t x_ind, np.uint64_t y_ind, np.uint64_t z_ind): cdef np.uint64_t mi mi = 0 mi |= spread_64bits_by2(z_ind)<>XSHIFT) p[1] = compact_64bits_by2(mi>>YSHIFT) p[2] = compact_64bits_by2(mi>>ZSHIFT) cdef np.uint32_t morton_neighbors_coarse(np.uint64_t mi1, np.uint64_t max_index1, bint periodicity[3], np.uint32_t nn, np.uint32_t[:,:] index, np.uint64_t[:,:] ind1_n, np.uint64_t[:] neighbors) cdef np.uint32_t morton_neighbors_refined(np.uint64_t mi1, np.uint64_t mi2, np.uint64_t max_index1, np.uint64_t max_index2, bint periodicity[3], np.uint32_t nn, np.uint32_t[:,:] index, np.uint64_t[:,:] ind1_n, np.uint64_t[:,:] ind2_n, np.uint64_t[:] neighbors1, np.uint64_t[:] neighbors2) ewah_bool_utils-1.2.2/ewah_bool_utils/morton_utils.pyx000066400000000000000000000136251466056343200233720ustar00rootroot00000000000000import numpy as np cimport cython cimport numpy as np @cython.cdivision(True) @cython.boundscheck(False) @cython.wraparound(False) cdef np.uint32_t morton_neighbors_coarse(np.uint64_t mi1, np.uint64_t max_index1, bint periodicity[3], np.uint32_t nn, np.uint32_t[:,:] index, np.uint64_t[:,:] ind1_n, np.uint64_t[:] neighbors): cdef np.uint32_t ntot = 0 cdef np.uint64_t ind1[3] cdef np.uint32_t count[3] cdef np.uint32_t origin[3] cdef np.int64_t adv cdef int i, j, k, ii, ij, ik for i in range(3): count[i] = 0 origin[i] = 0 # Get indices decode_morton_64bit(mi1,ind1) # Determine which directions are valid for j,i in enumerate(range(-nn,(nn+1))): if i == 0: for k in range(3): ind1_n[j,k] = ind1[k] index[count[k],k] = j origin[k] = count[k] count[k] += 1 else: for k in range(3): adv = ((ind1[k]) + i) if (adv < 0): if periodicity[k]: while adv < 0: adv += max_index1 ind1_n[j,k] = (adv % max_index1) else: continue elif (adv >= max_index1): if periodicity[k]: ind1_n[j,k] = (adv % max_index1) else: continue else: ind1_n[j,k] = (adv) # print(i,k,adv,max_index1,ind1_n[j,k],adv % max_index1) index[count[k],k] = j count[k] += 1 # Iterate over ever combinations for ii in range(count[0]): i = index[ii,0] for ij in range(count[1]): j = index[ij,1] for ik in range(count[2]): k = index[ik,2] if (ii != origin[0]) or (ij != origin[1]) or (ik != origin[2]): neighbors[ntot] = encode_morton_64bit(ind1_n[i,0], ind1_n[j,1], ind1_n[k,2]) ntot += 1 return ntot @cython.cdivision(True) @cython.boundscheck(False) @cython.wraparound(False) cdef np.uint32_t morton_neighbors_refined(np.uint64_t mi1, np.uint64_t mi2, np.uint64_t max_index1, np.uint64_t max_index2, bint periodicity[3], np.uint32_t nn, np.uint32_t[:,:] index, np.uint64_t[:,:] ind1_n, np.uint64_t[:,:] ind2_n, np.uint64_t[:] neighbors1, np.uint64_t[:] neighbors2): cdef np.uint32_t ntot = 0 cdef np.uint64_t ind1[3] cdef np.uint64_t ind2[3] cdef np.uint32_t count[3] cdef np.uint32_t origin[3] cdef np.int64_t adv, maj, rem, adv1 cdef int i, j, k, ii, ij, ik for i in range(3): count[i] = 0 origin[i] = 0 # Get indices decode_morton_64bit(mi1,ind1) decode_morton_64bit(mi2,ind2) # Determine which directions are valid for j,i in enumerate(range(-nn,(nn+1))): if i == 0: for k in range(3): ind1_n[j,k] = ind1[k] ind2_n[j,k] = ind2[k] index[count[k],k] = j origin[k] = count[k] count[k] += 1 else: for k in range(3): adv = (ind2[k] + i) maj = adv // (max_index2) rem = adv % (max_index2) if adv < 0: adv1 = (ind1[k] + (maj-1)) if adv1 < 0: if periodicity[k]: while adv1 < 0: adv1 += max_index1 ind1_n[j,k] = adv1 else: continue else: ind1_n[j,k] = adv1 while adv < 0: adv += max_index2 ind2_n[j,k] = adv elif adv >= max_index2: adv1 = (ind1[k] + maj) if adv1 >= max_index1: if periodicity[k]: ind1_n[j,k] = (adv1 % max_index1) else: continue else: ind1_n[j,k] = adv1 ind2_n[j,k] = rem else: ind1_n[j,k] = ind1[k] ind2_n[j,k] = (adv) index[count[k],k] = j count[k] += 1 # Iterate over ever combinations for ii in range(count[0]): i = index[ii,0] for ij in range(count[1]): j = index[ij,1] for ik in range(count[2]): k = index[ik,2] if (ii != origin[0]) or (ij != origin[1]) or (ik != origin[2]): neighbors1[ntot] = encode_morton_64bit(ind1_n[i,0], ind1_n[j,1], ind1_n[k,2]) neighbors2[ntot] = encode_morton_64bit(ind2_n[i,0], ind2_n[j,1], ind2_n[k,2]) ntot += 1 return ntot ewah_bool_utils-1.2.2/ewah_bool_utils/tests/000077500000000000000000000000001466056343200212255ustar00rootroot00000000000000ewah_bool_utils-1.2.2/ewah_bool_utils/tests/__init__.py000066400000000000000000000000551466056343200233360ustar00rootroot00000000000000"""Unit test package for ewah_bool_utils.""" ewah_bool_utils-1.2.2/ewah_bool_utils/tests/test_ewah_bool_utils.py000066400000000000000000000023471466056343200260230ustar00rootroot00000000000000"""Tests for `ewah_bool_utils` package.""" import time import numpy as np from ewah_bool_utils._testing import ( ewah_set_and_unset, find_ewah_collisions, make_and_select_from_ewah_index, ) np.random.seed(0) class Test_ewah_bool_array: """Test class for `ewah_set_and_unset` and `find_ewah_collisions`""" def test_ewah_set_and_unset_inputInt(self): """Test with integer inputs""" arr = np.array([1, 2, 3]) assert np.all(ewah_set_and_unset(arr)) == np.all(arr) def test_find_ewah_collisions(self): """Test with integer inputs""" arr1 = np.array([1, 2, 3]) arr2 = np.array([3, 4, 5]) assert find_ewah_collisions(arr1, arr2) == 1 def test_make_and_select_from_ewah_index(self): """Test with float64 inputs""" arr = np.random.rand(2000000) np_idx = np.random.choice(range(2000000), size=1000000, replace=False) np_idx.sort() out_array_gt = arr[np_idx] start = time.process_time() out_array = make_and_select_from_ewah_index(arr, np_idx) end = time.process_time() print("Process completed in %f seconds" % (end - start)) assert np.all(np.asarray(out_array)) == np.all(out_array_gt) ewah_bool_utils-1.2.2/pyproject.toml000066400000000000000000000040331466056343200176200ustar00rootroot00000000000000[build-system] # keep in sync with wheels.yaml requires = [ "setuptools>=61.2", "Cython>=3.0", "numpy>=2.0.0", ] [project] name = "ewah_bool_utils" version = "1.2.2" description = "EWAH Bool Array utils for yt" authors = [ { name = "Matthew Turk" }, { name = "Meagan Lang" }, { name = "Navaneeth Suresh" }, ] requires-python =">=3.9" dependencies = [ # match the absolute oldest version of numpy with *any* # level of support for our minimal Python requirement "numpy>=1.19.3, <3", ] classifiers = [ "Intended Audience :: Developers", "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: BSD License", "Programming Language :: Cython", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] [project.license] text = "BSD" [project.readme] file = "README.rst" content-type = "text/x-rst" [project.urls] Homepage = "https://github.com/yt-project/ewah_bool_utils" [tool.setuptools] license-files = [ "LICENSE", ] [tool.black] line-length = 88 include = '\.pyi?$' exclude = ''' /( \.eggs | \.git | \.hg | \.mypy_cache | \.tox | \.venv | _build | buck-out | build | dist )/ ''' [tool.ruff.lint] exclude = ["*__init__.py"] ignore = ["E501"] select = [ "E", "F", "W", "C4", # flake8-comprehensions "B", # flake8-bugbear "YTT", # flake8-2020 "I", # isort "UP", # pyupgrade ] [tool.ruff.lint.isort] combine-as-imports = true [tool.cibuildwheel] build = "cp39-* cp310-* cp311-* cp312-* cp313-*" build-frontend = "build[uv]" build-verbosity = 1 test-skip = "*-musllinux*" test-command = [ "uv pip install -r {project}/test_requirements.txt", "pytest --pyargs {project} --color=yes -ra", ] [tool.cibuildwheel.linux] archs = "x86_64" [tool.cibuildwheel.macos] archs = "auto64" [tool.cibuildwheel.windows] archs = "auto64" ewah_bool_utils-1.2.2/setup.py000066400000000000000000000031071466056343200164170ustar00rootroot00000000000000import os from distutils.ccompiler import get_default_compiler import numpy as np from Cython.Build import cythonize from setuptools import Extension, setup from setupext import check_for_openmp if check_for_openmp(): omp_args = ["-fopenmp"] else: omp_args = None cpp11_args = ["-std=c++11" if get_default_compiler() != "msvc" else "/std:c++11"] if os.name == "nt": std_libs = [] else: std_libs = ["m"] define_macros = [ ("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION"), # keep in sync with runtime requirements (pyproject.toml) ("NPY_TARGET_VERSION", "NPY_1_19_API_VERSION"), ] extensions = [ Extension( "ewah_bool_utils.ewah_bool_wrap", ["ewah_bool_utils/ewah_bool_wrap.pyx"], define_macros=define_macros, include_dirs=["ewah_bool_utils", "ewah_bool_utils/cpp", np.get_include()], language="c++", extra_compile_args=cpp11_args, ), Extension( "ewah_bool_utils.morton_utils", ["ewah_bool_utils/morton_utils.pyx"], define_macros=define_macros, extra_compile_args=omp_args, extra_link_args=omp_args, libraries=std_libs, include_dirs=[np.get_include()], ), Extension( "ewah_bool_utils._testing", ["ewah_bool_utils/_testing.pyx"], include_dirs=["ewah_bool_utils", "ewah_bool_utils/cpp", np.get_include()], define_macros=define_macros, extra_compile_args=["-O3"], language="c++", ), ] setup( ext_modules=cythonize( extensions, compiler_directives={"language_level": 3}, ), ) ewah_bool_utils-1.2.2/setupext.py000066400000000000000000000072321466056343200171430ustar00rootroot00000000000000import contextlib import glob import os import subprocess import sys import tempfile import warnings from distutils.ccompiler import new_compiler from distutils.errors import CompileError, LinkError from distutils.sysconfig import customize_compiler CCODE = """ #include #include int main() { omp_set_num_threads(2); #pragma omp parallel printf("nthreads=%d\\n", omp_get_num_threads()); return 0; } """ @contextlib.contextmanager def stdchannel_redirected(stdchannel, dest_filename): """ A context manager to temporarily redirect stdout or stderr e.g.: with stdchannel_redirected(sys.stderr, os.devnull): if compiler.has_function('clock_gettime', libraries=['rt']): libraries.append('rt') Code adapted from https://stackoverflow.com/a/17752455/1382869 """ try: oldstdchannel = os.dup(stdchannel.fileno()) dest_file = open(dest_filename, "w") os.dup2(dest_file.fileno(), stdchannel.fileno()) yield finally: if oldstdchannel is not None: os.dup2(oldstdchannel, stdchannel.fileno()) if dest_file is not None: dest_file.close() def check_for_openmp(): """Returns True if local setup supports OpenMP, False otherwise Code adapted from astropy_helpers, originally written by Tom Robitaille and Curtis McCully. """ # Create a temporary directory ccompiler = new_compiler() customize_compiler(ccompiler) tmp_dir = tempfile.mkdtemp() start_dir = os.path.abspath(".") if os.name == "nt": # TODO: make this work with mingw # AFAICS there's no easy way to get the compiler distutils # will be using until compilation actually happens compile_flag = "-openmp" link_flag = "" else: compile_flag = "-fopenmp" link_flag = "-fopenmp" try: os.chdir(tmp_dir) with open("test_openmp.c", "w") as f: f.write(CCODE) os.mkdir("objects") # Compile, link, and run test program with stdchannel_redirected(sys.stderr, os.devnull): ccompiler.compile( ["test_openmp.c"], output_dir="objects", extra_postargs=[compile_flag], ) ccompiler.link_executable( glob.glob(os.path.join("objects", "*")), "test_openmp", extra_postargs=[link_flag], ) output = ( subprocess.check_output("./test_openmp") .decode(sys.stdout.encoding or "utf-8") .splitlines() ) if "nthreads=" in output[0]: nthreads = int(output[0].strip().split("=")[1]) if len(output) == nthreads: using_openmp = True else: warnings.warn( "Unexpected number of lines from output of test " f"OpenMP program (output was {output})", stacklevel=1, ) using_openmp = False else: warnings.warn( f"Unexpected output from test OpenMP program (output was {output})", stacklevel=1, ) using_openmp = False except (CompileError, LinkError): using_openmp = False finally: os.chdir(start_dir) if using_openmp: warnings.warn("Using OpenMP to compile parallel extensions", stacklevel=1) else: warnings.warn( "Unable to compile OpenMP test program so Cython\n" "extensions will be compiled without parallel support", stacklevel=1, ) return using_openmp ewah_bool_utils-1.2.2/test_requirements.txt000066400000000000000000000000161466056343200212240ustar00rootroot00000000000000pytest>=6.2.4