pax_global_header00006660000000000000000000000064143402553010014507gustar00rootroot0000000000000052 comment=b9a1546133959298c56eee686932dbb18ff80f7a bitshuffle-0.5.1/000077500000000000000000000000001434025530100136455ustar00rootroot00000000000000bitshuffle-0.5.1/.github/000077500000000000000000000000001434025530100152055ustar00rootroot00000000000000bitshuffle-0.5.1/.github/dependabot.yml000066400000000000000000000003711434025530100200360ustar00rootroot00000000000000# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" bitshuffle-0.5.1/.github/workflows/000077500000000000000000000000001434025530100172425ustar00rootroot00000000000000bitshuffle-0.5.1/.github/workflows/flake8_cython.cfg000066400000000000000000000003331434025530100224600ustar00rootroot00000000000000[flake8] filename=*.pyx,*.pxd select=E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 show_source=True bitshuffle-0.5.1/.github/workflows/flake8_python.cfg000066400000000000000000000000651434025530100224770ustar00rootroot00000000000000[flake8] ignore=E501,E203,W503,E266 show_source=True bitshuffle-0.5.1/.github/workflows/install_hdf5.sh000066400000000000000000000005241434025530100221530ustar00rootroot00000000000000HDF5_VERSION=$1 # Download and install HDF5 $HDF5_VERSION from source for building wheels curl https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-${HDF5_VERSION%.*}/hdf5-$HDF5_VERSION/src/hdf5-$HDF5_VERSION.tar.gz -O -s tar -xzf hdf5-$HDF5_VERSION.tar.gz cd hdf5-$HDF5_VERSION ./configure --prefix=/usr/local make -j 2 make install cd .. bitshuffle-0.5.1/.github/workflows/lint.yml000066400000000000000000000013071434025530100207340ustar00rootroot00000000000000name: bitshuffle-ci-build on: pull_request: branches: - master push: branches: - master jobs: lint-code: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up Python 3.10 uses: actions/setup-python@v4 with: python-version: "3.10" - name: Install pip dependencies run: | pip install black flake8 - name: Run flake8 run: | flake8 --config $GITHUB_WORKSPACE/.github/workflows/flake8_python.cfg bitshuffle tests flake8 --config $GITHUB_WORKSPACE/.github/workflows/flake8_cython.cfg bitshuffle tests - name: Check code with black run: black --check . bitshuffle-0.5.1/.github/workflows/main.yml000066400000000000000000000025531434025530100207160ustar00rootroot00000000000000name: bitshuffle-ci-build on: pull_request: branches: - master push: branches: - master jobs: run-tests: strategy: matrix: python-version: ["3.6", "3.7", "3.10"] os: [ubuntu-latest, macos-latest] exclude: - os: macos-latest python-version: "3.6" runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 - name: Install apt dependencies if: ${{ matrix.os == 'ubuntu-latest' }} run: | sudo apt-get install -y libhdf5-serial-dev hdf5-tools pkg-config - name: Install homebrew dependencies if: ${{ matrix.os == 'macos-latest' }} run: | brew install hdf5 pkg-config - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install h5py if: ${{ matrix.os == 'macos-latest' }} run: | pip install h5py - name: Install pip dependencies run: | pip install Cython pip install -r requirements.txt pip install pytest # Pull in ZSTD repo git submodule update --init # Installing the plugin to arbitrary directory to check the install script. python setup.py install --h5plugin --h5plugin-dir ~/hdf5/lib --zstd - name: Run tests run: pytest -v . bitshuffle-0.5.1/.github/workflows/wheels.yml000066400000000000000000000063101434025530100212540ustar00rootroot00000000000000name: Build bitshuffle wheels and upload to PyPI on: workflow_dispatch: release: types: - published jobs: build_wheels: name: Build wheels on ${{ matrix.os }} and hdf5-${{ matrix.hdf5 }} runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest] hdf5: ["1.10.7"] steps: # Checkout bitshuffle - uses: actions/checkout@v3 # Build wheels for linux and x86 platforms - name: Build wheels uses: pypa/cibuildwheel@v2.11.2 with: output-dir: ./wheelhouse-hdf5-${{ matrix.hdf5}} env: CIBW_SKIP: "pp* *musllinux* cp311-macosx*" CIBW_ARCHS: "x86_64" CIBW_BEFORE_ALL: | chmod +x .github/workflows/install_hdf5.sh .github/workflows/install_hdf5.sh ${{ matrix.hdf5 }} git submodule update --init # Only build Haswell wheels on x86 for compatibility CIBW_ENVIRONMENT: > LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib CPATH=/usr/local/include ENABLE_ZSTD=1 BITSHUFFLE_ARCH=haswell CIBW_TEST_REQUIRES: pytest # Install different version of HDF5 for unit tests to ensure the # wheels are independent of HDF5 installation # CIBW_BEFORE_TEST: | # chmod +x .github/workflows/install_hdf5.sh # .github/workflows/install_hdf5.sh 1.8.11 # Run units tests but disable test_h5plugin.py CIBW_TEST_COMMAND: pytest {package}/tests # The Github runners for macOS don't support AVX2 instructions and so the tests will fail with SIGILL, so skip them CIBW_TEST_SKIP: "*macosx*" # Package wheels and host on CI - uses: actions/upload-artifact@v3 with: path: ./wheelhouse-hdf5-${{ matrix.hdf5 }}/*.whl build_sdist: name: Build source distribution strategy: matrix: python-version: ["3.8"] runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Install apt dependencies run: | sudo apt-get install -y libhdf5-serial-dev hdf5-tools pkg-config - name: Install Python uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install pip dependencies run: | pip install -r requirements.txt - name: Build sdist run: python setup.py sdist - uses: actions/upload-artifact@v3 with: path: dist/*.tar.gz # Upload to PyPI upload_pypi: needs: [build_wheels, build_sdist] runs-on: ubuntu-latest # Upload to PyPI on every tag # if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') # Alternatively, to publish when a GitHub Release is created, use the following rule: if: github.event_name == 'release' && github.event.action == 'published' steps: - uses: actions/download-artifact@v3 with: name: artifact path: dist - uses: pypa/gh-action-pypi-publish@v1.5.1 with: user: __token__ password: ${{ secrets.pypi_password }} # To test: repository_url: https://test.pypi.org/legacy/ bitshuffle-0.5.1/.gitignore000066400000000000000000000012371434025530100156400ustar00rootroot00000000000000## C # Object files *.o *.ko *.obj *.elf # Libraries *.lib *.a # Shared objects (inc. Windows DLLs) *.dll *.so *.so.* *.dylib # Executables *.exe *.out *.app *.i*86 *.x86_64 *.hex ## Python *.py[cod] # C extensions *.so # Packages *.egg *.egg-info dist build eggs parts bin var sdist develop-eggs .installed.cfg lib lib64 __pycache__ # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox nosetests.xml # Translations *.mo # Mr Developer .mr.developer.cfg .project .pydevproject # Documentation builds doc/_build doc/generated ## Editor files and backups. *.swp *.swo # Generated files bitshuffle/ext.c bitshuffle/h5.c # ItelliJ .idea bitshuffle-0.5.1/.gitmodules000066400000000000000000000001101434025530100160120ustar00rootroot00000000000000[submodule "zstd"] path = zstd url = https://github.com/facebook/zstd bitshuffle-0.5.1/LICENSE000066400000000000000000000021741434025530100146560ustar00rootroot00000000000000Bitshuffle - Filter for improving compression of typed binary data. Copyright (c) 2014 Kiyoshi Masui (kiyo@physics.ubc.ca) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. bitshuffle-0.5.1/MANIFEST.in000066400000000000000000000003461434025530100154060ustar00rootroot00000000000000recursive-include src *.h *.c recursive-include bitshuffle *.pyx recursive-include lz4 *.h *.c recursive-include lzf *.h *.c include setup.cfg.example include LICENSE include README.rst include requirements.txt exclude setup.cfg bitshuffle-0.5.1/README.rst000066400000000000000000000250201434025530100153330ustar00rootroot00000000000000========== Bitshuffle ========== Filter for improving compression of typed binary data. Bitshuffle is an algorithm that rearranges typed, binary data for improving compression, as well as a python/C package that implements this algorithm within the Numpy framework. The library can be used along side HDF5 to compress and decompress datasets and is integrated through the `dynamically loaded filters`_ framework. Bitshuffle is HDF5 filter number ``32008``. Algorithmically, Bitshuffle is closely related to HDF5's `Shuffle filter`_ except it operates at the bit level instead of the byte level. Arranging a typed data array in to a matrix with the elements as the rows and the bits within the elements as the columns, Bitshuffle "transposes" the matrix, such that all the least-significant-bits are in a row, etc. This transpose is performed within blocks of data roughly 8 kB long [1]_. This does not in itself compress data, only rearranges it for more efficient compression. To perform the actual compression you will need a compression library. Bitshuffle has been designed to be well matched to Marc Lehmann's LZF_ as well as LZ4_ and ZSTD_. Note that because Bitshuffle modifies the data at the bit level, sophisticated entropy reducing compression libraries such as GZIP and BZIP are unlikely to achieve significantly better compression than simpler and faster duplicate-string-elimination algorithms such as LZF, LZ4 and ZSTD. Bitshuffle thus includes routines (and HDF5 filter options) to apply LZ4 and ZSTD compression to each block after shuffling [2]_. The Bitshuffle algorithm relies on neighbouring elements of a dataset being highly correlated to improve data compression. Any correlations that span at least 24 elements of the dataset may be exploited to improve compression. Bitshuffle was designed with performance in mind. On most machines the time required for Bitshuffle+LZ4 is insignificant compared to the time required to read or write the compressed data to disk. Because it is able to exploit the SSE and AVX instruction sets present on modern Intel and AMD processors, on these machines compression is only marginally slower than an out-of-cache memory copy. On modern x86 processors you can expect Bitshuffle to have a throughput of roughly 1 byte per clock cycle, and on the Haswell generation of Intel processors (2013) and later, you can expect up to 2 bytes per clock cycle. In addition, Bitshuffle is parallelized using OpenMP. As a bonus, Bitshuffle ships with a dynamically loaded version of `h5py`'s LZF compression filter, such that the filter can be transparently used outside of python and in command line utilities such as ``h5dump``. .. [1] Chosen to fit comfortably within L1 cache as well as be well matched window of the LZF compression library. .. [2] Over applying bitshuffle to the full dataset then applying LZ4/ZSTD compression, this has the tremendous advantage that the block is already in the L1 cache. .. _`dynamically loaded filters`: http://www.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf .. _`Shuffle filter`: http://www.hdfgroup.org/HDF5/doc_resource/H5Shuffle_Perf.pdf .. _LZF: http://oldhome.schmorp.de/marc/liblzf.html .. _LZ4: https://code.google.com/p/lz4/ .. _ZSTD: https://github.com/facebook/zstd Applications ------------ Bitshuffle might be right for your application if: - You need to compress typed binary data. - Your data is arranged such that adjacent elements over the fastest varying index of your dataset are similar (highly correlated). - A special case of the previous point is if you are only exercising a subset of the bits in your data-type, as is often true of integer data. - You need both high compression ratios and high performance. Comparing Bitshuffle to other compression algorithms and HDF5 filters: - Bitshuffle is less general than many other compression algorithms. To achieve good compression ratios, consecutive elements of your data must be highly correlated. - For the right datasets, Bitshuffle is one of the few compression algorithms that promises both high throughput and high compression ratios. - Bitshuffle should have roughly the same throughput as Shuffle, but may obtain higher compression ratios. - The MAFISC_ filter actually includes something similar to Bitshuffle as one of its prefilters, However, MAFICS's emphasis is on obtaining high compression ratios at all costs, sacrificing throughput. .. _MAFISC: http://wr.informatik.uni-hamburg.de/research/projects/icomex/mafisc Installation for Python ----------------------- In most cases bitshuffle can be installed by `pip`:: pip install bitshuffle On Linux and macOS x86_64 platforms binary wheels are available, on other platforms a source build will be performed. The binary wheels are built with AVX2 support and will only run processors that support these instructions (most processors from 2015 onwards, i.e. Intel Haswell, AMD Excavator and later). On an unsupported processor these builds of bitshuffle will crash with `SIGILL`. To run on unsupported x86_64 processors, or target newer instructions such as AVX512, you should perform a build from source. This can be forced by giving pip the `--no-binary=bitshuffle` option. Source installation requires python 2.7+ or 3.3+, HDF5 1.8.4 or later, HDF5 for python (h5py), Numpy and Cython. Bitshuffle is linked against HDF5. To use the dynamically loaded HDF5 filter requires HDF5 1.8.11 or later. For total control, bitshuffle can be built using `python setup.py`. If ZSTD support is to be enabled the ZSTD repo needs to pulled into bitshuffle before installation with:: git submodule update --init To build and install bitshuffle:: python setup.py install [--h5plugin [--h5plugin-dir=spam] --zstd] To get finer control of installation options, including whether to compile with OpenMP multi-threading and the target microarchitecture copy the ``setup.cfg.example`` to ``setup.cfg`` and edit the values therein. If using the dynamically loaded HDF5 filter (which gives you access to the Bitshuffle and LZF filters outside of python), set the environment variable ``HDF5_PLUGIN_PATH`` to the value of ``--h5plugin-dir`` or use HDF5's default search location of ``/usr/local/hdf5/lib/plugin``. ZSTD support is enabled with ``--zstd``. If you get an error about missing source files when building the extensions, try upgrading setuptools. There is a weird bug where setuptools prior to 0.7 doesn't work properly with Cython in some cases. .. _source: http://docs.h5py.org/en/latest/build.html#source-installation Usage from Python ----------------- The `bitshuffle` module contains routines for shuffling and unshuffling Numpy arrays. If installed with the dynamically loaded filter plugins, Bitshuffle can be used in conjunction with HDF5 both inside and outside of python, in the same way as any other filter; simply by specifying the filter number ``32008``. Otherwise the filter will be available only within python and only after importing `bitshuffle.h5`. Reading Bitshuffle encoded datasets will be transparent. The filter can be added to new datasets either through the `h5py` low level interface or through the convenience functions provided in `bitshuffle.h5`. See the docstrings and unit tests for examples. For `h5py` version 2.5.0 and later Bitshuffle can be added to new datasets through the high level interface, as in the example below. The compression algorithm can be configured using the `filter_opts` in `bitshuffle.h5.create_dataset()`. LZ4 is chosen with: `(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)` and ZSTD with: `(BLOCK_SIZE, h5.H5_COMPRESS_ZSTD, COMP_LVL)`. See `test_h5filter.py` for an example. Example h5py ------------ :: import h5py import numpy import bitshuffle.h5 print(h5py.__version__) # >= '2.5.0' f = h5py.File(filename, "w") # block_size = 0 let Bitshuffle choose its value block_size = 0 dataset = f.create_dataset( "data", (100, 100, 100), compression=bitshuffle.h5.H5FILTER, compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), dtype='float32', ) # create some random data array = numpy.random.rand(100, 100, 100) array = array.astype('float32') dataset[:] = array f.close() Usage from C ------------ If you wish to use Bitshuffle in your C program and would prefer not to use the HDF5 dynamically loaded filter, the C library in the ``src/`` directory is self-contained and complete. Usage from Java --------------- You can use Bitshuffle even in Java and the routines for shuffling and unshuffling are ported into `snappy-java`_. To use the routines, you need to add the following dependency to your pom.xml:: org.xerial.snappy snappy-java 1.1.3-M1 First, import org.xerial.snapy.BitShuffle in your Java code:: import org.xerial.snappy.BitShuffle; Then, you use them like this:: int[] data = new int[] {1, 3, 34, 43, 34}; byte[] shuffledData = BitShuffle.bitShuffle(data); int[] result = BitShuffle.bitUnShuffleIntArray(shuffledData); .. _`snappy-java`: https://github.com/xerial/snappy-java Rust HDF5 plugin ---------------- If you wish to open HDF5 files compressed with bitshuffle in your Rust program, there is a `Rust binding`_ for it. In your Cargo.toml:: [dependencies] ... hdf5-bitshuffle = "0.9" ... To register the plugin in your code:: use hdf5_bitshuffle::register_bitshuffle_plugin; fn main() { register_bitshuffle_plugin(); } .. _`Rust binding`: https://docs.rs/hdf5-bitshuffle/latest/hdf5_bitshuffle/ Anaconda -------- The conda package can be build via:: conda build conda-recipe For Best Results ---------------- Here are a few tips to help you get the most out of Bitshuffle: - For multi-dimensional datasets, order your data such that the fastest varying dimension is the one over which your data is most correlated (have values that change the least), or fake this using chunks. - To achieve the highest throughput, use a data type that is 64 *bytes* or smaller. If you have a very large compound data type, consider adding a dimension to your datasets instead. - To make full use of the SSE2 instruction set, use a data type whose size is a multiple of 2 bytes. For the AVX2 instruction set, use a data type whose size is a multiple of 4 bytes. Citing Bitshuffle ----------------- Bitshuffle was initially described in http://dx.doi.org/10.1016/j.ascom.2015.07.002, pre-print available at http://arxiv.org/abs/1503.00638. bitshuffle-0.5.1/bitshuffle/000077500000000000000000000000001434025530100160005ustar00rootroot00000000000000bitshuffle-0.5.1/bitshuffle/__init__.py000066400000000000000000000016141434025530100201130ustar00rootroot00000000000000# flake8: noqa """ Filter for improving compression of typed binary data. Functions ========= using_NEON using_SSE2 using_AVX2 using_AVX512 bitshuffle bitunshuffle compress_lz4 decompress_lz4 compress_zstd decompress_zstd """ from __future__ import absolute_import from bitshuffle.ext import ( __version__, __zstd__, bitshuffle, bitunshuffle, using_NEON, using_SSE2, using_AVX2, using_AVX512, compress_lz4, decompress_lz4, ) # Import ZSTD API if enabled zstd_api = [] if __zstd__: from bitshuffle.ext import ( compress_zstd, decompress_zstd, ) zstd_api += ["compress_zstd", "decompress_zstd"] __all__ = [ "__version__", "bitshuffle", "bitunshuffle", "using_NEON", "using_SSE2", "using_AVX2", "using_AVX512", "compress_lz4", "decompress_lz4", ] + zstd_api bitshuffle-0.5.1/bitshuffle/ext.pyx000066400000000000000000000467031434025530100173540ustar00rootroot00000000000000""" Wrappers for public and private bitshuffle routines """ from __future__ import absolute_import, division, print_function, unicode_literals import numpy as np cimport numpy as np cimport cython np.import_array() # Repeat each calculation this many times. For timing. cdef int REPEATC = 1 # cdef int REPEATC = 32 REPEAT = REPEATC cdef extern from b"bitshuffle.h": int bshuf_using_NEON() int bshuf_using_SSE2() int bshuf_using_AVX2() int bshuf_using_AVX512() int bshuf_bitshuffle(void *A, void *B, int size, int elem_size, int block_size) nogil int bshuf_bitunshuffle(void *A, void *B, int size, int elem_size, int block_size) nogil int bshuf_compress_lz4_bound(int size, int elem_size, int block_size) int bshuf_compress_lz4(void *A, void *B, int size, int elem_size, int block_size) nogil int bshuf_decompress_lz4(void *A, void *B, int size, int elem_size, int block_size) nogil IF ZSTD_SUPPORT: int bshuf_compress_zstd_bound(int size, int elem_size, int block_size) int bshuf_compress_zstd(void *A, void *B, int size, int elem_size, int block_size, const int comp_lvl) nogil int bshuf_decompress_zstd(void *A, void *B, int size, int elem_size, int block_size) nogil int BSHUF_VERSION_MAJOR int BSHUF_VERSION_MINOR int BSHUF_VERSION_POINT __version__ = "%d.%d.%d" % (BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR, BSHUF_VERSION_POINT) IF ZSTD_SUPPORT: __zstd__ = True ELSE: __zstd__ = False # Prototypes from bitshuffle.c cdef extern int bshuf_copy(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_byte_elem_scal(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_byte_elem_SSE(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_byte_elem_NEON(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bit_byte_scal(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bit_byte_SSE(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bit_byte_NEON(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bit_byte_AVX(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bit_byte_AVX512(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bitrow_eight(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bit_elem_AVX512(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bit_elem_AVX(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bit_elem_SSE(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bit_elem_NEON(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bit_elem_scal(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_byte_bitrow_SSE(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_byte_bitrow_NEON(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_byte_bitrow_AVX(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_byte_bitrow_scal(void *A, void *B, int size, int elem_size) cdef extern int bshuf_shuffle_bit_eightelem_scal(void *A, void *B, int size, int elem_size) cdef extern int bshuf_shuffle_bit_eightelem_SSE(void *A, void *B, int size, int elem_size) cdef extern int bshuf_shuffle_bit_eightelem_NEON(void *A, void *B, int size, int elem_size) cdef extern int bshuf_shuffle_bit_eightelem_AVX(void *A, void *B, int size, int elem_size) cdef extern int bshuf_shuffle_bit_eightelem_AVX512(void *A, void *B, int size, int elem_size) cdef extern int bshuf_untrans_bit_elem_SSE(void *A, void *B, int size, int elem_size) cdef extern int bshuf_untrans_bit_elem_NEON(void *A, void *B, int size, int elem_size) cdef extern int bshuf_untrans_bit_elem_AVX(void *A, void *B, int size, int elem_size) cdef extern int bshuf_untrans_bit_elem_AVX512(void *A, void *B, int size, int elem_size) cdef extern int bshuf_untrans_bit_elem_scal(void *A, void *B, int size, int elem_size) cdef extern int bshuf_trans_bit_elem(void *A, void *B, int size, int elem_size) cdef extern int bshuf_untrans_bit_elem(void *A, void *B, int size, int elem_size) ctypedef int (*Cfptr) (void *A, void *B, int size, int elem_size) def using_NEON(): """Whether compiled using Arm NEON instructions.""" if bshuf_using_NEON(): return True else: return False def using_SSE2(): """Whether compiled using SSE2 instructions.""" if bshuf_using_SSE2(): return True else: return False def using_AVX2(): """Whether compiled using AVX2 instructions.""" if bshuf_using_AVX2(): return True else: return False def using_AVX512(): """Whether compiled using AVX512 instructions.""" if bshuf_using_AVX512(): return True else: return False def _setup_arr(arr): shape = tuple(arr.shape) if not arr.flags['C_CONTIGUOUS']: msg = "Input array must be C-contiguous." raise ValueError(msg) size = arr.size dtype = arr.dtype itemsize = dtype.itemsize out = np.empty(shape, dtype=dtype) return out, size, itemsize @cython.boundscheck(False) @cython.wraparound(False) cdef _wrap_C_fun(Cfptr fun, np.ndarray arr): """Wrap a C function with standard call signature.""" cdef int ii, size, itemsize, count=0 cdef np.ndarray out out, size, itemsize = _setup_arr(arr) cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat arr_flat = arr.view(np.uint8).ravel() cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat out_flat = out.view(np.uint8).ravel() cdef void* arr_ptr = &arr_flat[0] cdef void* out_ptr = &out_flat[0] for ii in range(REPEATC): count = fun(arr_ptr, out_ptr, size, itemsize) if count < 0: msg = "Failed. Error code %d." excp = RuntimeError(msg % count, count) raise excp return out def copy(np.ndarray arr not None): """Copies the data. For testing and profiling purposes. """ return _wrap_C_fun(&bshuf_copy, arr) def trans_byte_elem_scal(np.ndarray arr not None): """Transpose bytes within words but not bits. """ return _wrap_C_fun(&bshuf_trans_byte_elem_scal, arr) def trans_byte_elem_SSE(np.ndarray arr not None): """Transpose bytes within array elements. """ return _wrap_C_fun(&bshuf_trans_byte_elem_SSE, arr) def trans_byte_elem_NEON(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_byte_elem_NEON, arr) def trans_bit_byte_scal(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bit_byte_scal, arr) def trans_bit_byte_SSE(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bit_byte_SSE, arr) def trans_bit_byte_NEON(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bit_byte_NEON, arr) def trans_bit_byte_AVX(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bit_byte_AVX, arr) def trans_bit_byte_AVX512(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bit_byte_AVX512, arr) def trans_bitrow_eight(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bitrow_eight, arr) def trans_bit_elem_AVX512(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bit_elem_AVX512, arr) def trans_bit_elem_AVX(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bit_elem_AVX, arr) def trans_bit_elem_scal(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bit_elem_scal, arr) def trans_bit_elem_SSE(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bit_elem_SSE, arr) def trans_bit_elem_NEON(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bit_elem_NEON, arr) def trans_byte_bitrow_SSE(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_byte_bitrow_SSE, arr) def trans_byte_bitrow_NEON(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_byte_bitrow_NEON, arr) def trans_byte_bitrow_AVX(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_byte_bitrow_AVX, arr) def trans_byte_bitrow_scal(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_byte_bitrow_scal, arr) def shuffle_bit_eightelem_scal(np.ndarray arr not None): return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_scal, arr) def shuffle_bit_eightelem_SSE(np.ndarray arr not None): return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_SSE, arr) def shuffle_bit_eightelem_NEON(np.ndarray arr not None): return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_NEON, arr) def shuffle_bit_eightelem_AVX(np.ndarray arr not None): return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_AVX, arr) def shuffle_bit_eightelem_AVX512(np.ndarray arr not None): return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_AVX512, arr) def untrans_bit_elem_SSE(np.ndarray arr not None): return _wrap_C_fun(&bshuf_untrans_bit_elem_SSE, arr) def untrans_bit_elem_NEON(np.ndarray arr not None): return _wrap_C_fun(&bshuf_untrans_bit_elem_NEON, arr) def untrans_bit_elem_AVX(np.ndarray arr not None): return _wrap_C_fun(&bshuf_untrans_bit_elem_AVX, arr) def untrans_bit_elem_AVX512(np.ndarray arr not None): return _wrap_C_fun(&bshuf_untrans_bit_elem_AVX512, arr) def untrans_bit_elem_scal(np.ndarray arr not None): return _wrap_C_fun(&bshuf_untrans_bit_elem_scal, arr) def trans_bit_elem(np.ndarray arr not None): return _wrap_C_fun(&bshuf_trans_bit_elem, arr) def untrans_bit_elem(np.ndarray arr not None): return _wrap_C_fun(&bshuf_untrans_bit_elem, arr) @cython.boundscheck(False) @cython.wraparound(False) def bitshuffle(np.ndarray arr not None, int block_size=0): """Bitshuffle an array. Output array is the same shape and data type as input array but underlying buffer has been bitshuffled. Parameters ---------- arr : numpy array Data to ne processed. block_size : positive integer Block size in number of elements. By default, block size is chosen automatically. Returns ------- out : numpy array Array with the same shape as input but underlying data has been bitshuffled. """ cdef int ii, size, itemsize, count=0 cdef np.ndarray out out, size, itemsize = _setup_arr(arr) cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat arr_flat = arr.view(np.uint8).ravel() cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat out_flat = out.view(np.uint8).ravel() cdef void* arr_ptr = &arr_flat[0] cdef void* out_ptr = &out_flat[0] with nogil: for ii in range(REPEATC): count = bshuf_bitshuffle(arr_ptr, out_ptr, size, itemsize, block_size) if count < 0: msg = "Failed. Error code %d." excp = RuntimeError(msg % count, count) raise excp return out @cython.boundscheck(False) @cython.wraparound(False) def bitunshuffle(np.ndarray arr not None, int block_size=0): """Bitshuffle an array. Output array is the same shape and data type as input array but underlying buffer has been un-bitshuffled. Parameters ---------- arr : numpy array Data to ne processed. block_size : positive integer Block size in number of elements. Must match value used for shuffling. Returns ------- out : numpy array Array with the same shape as input but underlying data has been un-bitshuffled. """ cdef int ii, size, itemsize, count=0 cdef np.ndarray out out, size, itemsize = _setup_arr(arr) cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat arr_flat = arr.view(np.uint8).ravel() cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat out_flat = out.view(np.uint8).ravel() cdef void* arr_ptr = &arr_flat[0] cdef void* out_ptr = &out_flat[0] with nogil: for ii in range(REPEATC): count = bshuf_bitunshuffle(arr_ptr, out_ptr, size, itemsize, block_size) if count < 0: msg = "Failed. Error code %d." excp = RuntimeError(msg % count, count) raise excp return out @cython.boundscheck(False) @cython.wraparound(False) def compress_lz4(np.ndarray arr not None, int block_size=0): """Bitshuffle then compress an array using LZ4. Parameters ---------- arr : numpy array Data to ne processed. block_size : positive integer Block size in number of elements. By default, block size is chosen automatically. Returns ------- out : array with np.uint8 data type Buffer holding compressed data. """ cdef int ii, size, itemsize, count=0 shape = (arr.shape[i] for i in range(arr.ndim)) if not arr.flags['C_CONTIGUOUS']: msg = "Input array must be C-contiguous." raise ValueError(msg) size = arr.size dtype = arr.dtype itemsize = dtype.itemsize max_out_size = bshuf_compress_lz4_bound(size, itemsize, block_size) cdef np.ndarray out out = np.empty(max_out_size, dtype=np.uint8) cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat arr_flat = arr.view(np.uint8).ravel() cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat out_flat = out.view(np.uint8).ravel() cdef void* arr_ptr = &arr_flat[0] cdef void* out_ptr = &out_flat[0] with nogil: for ii in range(REPEATC): count = bshuf_compress_lz4(arr_ptr, out_ptr, size, itemsize, block_size) if count < 0: msg = "Failed. Error code %d." excp = RuntimeError(msg % count, count) raise excp return out[:count] @cython.boundscheck(False) @cython.wraparound(False) def decompress_lz4(np.ndarray arr not None, shape, dtype, int block_size=0): """Decompress a buffer using LZ4 then bitunshuffle it yielding an array. Parameters ---------- arr : numpy array Input data to be decompressed. shape : tuple of integers Shape of the output (decompressed array). Must match the shape of the original data array before compression. dtype : numpy dtype Datatype of the output array. Must match the data type of the original data array before compression. block_size : positive integer Block size in number of elements. Must match value used for compression. Returns ------- out : numpy array with shape *shape* and data type *dtype* Decompressed data. """ cdef int ii, size, itemsize, count=0 if not arr.flags['C_CONTIGUOUS']: msg = "Input array must be C-contiguous." raise ValueError(msg) size = np.prod(shape) itemsize = dtype.itemsize cdef np.ndarray out out = np.empty(tuple(shape), dtype=dtype) cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat arr_flat = arr.view(np.uint8).ravel() cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat out_flat = out.view(np.uint8).ravel() cdef void* arr_ptr = &arr_flat[0] cdef void* out_ptr = &out_flat[0] with nogil: for ii in range(REPEATC): count = bshuf_decompress_lz4(arr_ptr, out_ptr, size, itemsize, block_size) if count < 0: msg = "Failed. Error code %d." excp = RuntimeError(msg % count, count) raise excp if count != arr.size: msg = "Decompressed different number of bytes than input buffer size." msg += "Input buffer %d, decompressed %d." % (arr.size, count) raise RuntimeError(msg, count) return out IF ZSTD_SUPPORT: @cython.boundscheck(False) @cython.wraparound(False) def compress_zstd(np.ndarray arr not None, int block_size=0, int comp_lvl=1): """Bitshuffle then compress an array using ZSTD. Parameters ---------- arr : numpy array Data to be processed. block_size : positive integer Block size in number of elements. By default, block size is chosen automatically. comp_lvl : positive integer Compression level applied by ZSTD Returns ------- out : array with np.uint8 data type Buffer holding compressed data. """ cdef int ii, size, itemsize, count=0 shape = (arr.shape[i] for i in range(arr.ndim)) if not arr.flags['C_CONTIGUOUS']: msg = "Input array must be C-contiguous." raise ValueError(msg) size = arr.size dtype = arr.dtype itemsize = dtype.itemsize max_out_size = bshuf_compress_zstd_bound(size, itemsize, block_size) cdef np.ndarray out out = np.empty(max_out_size, dtype=np.uint8) cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat arr_flat = arr.view(np.uint8).ravel() cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat out_flat = out.view(np.uint8).ravel() cdef void* arr_ptr = &arr_flat[0] cdef void* out_ptr = &out_flat[0] with nogil: for ii in range(REPEATC): count = bshuf_compress_zstd(arr_ptr, out_ptr, size, itemsize, block_size, comp_lvl) if count < 0: msg = "Failed. Error code %d." excp = RuntimeError(msg % count, count) raise excp return out[:count] @cython.boundscheck(False) @cython.wraparound(False) def decompress_zstd(np.ndarray arr not None, shape, dtype, int block_size=0): """Decompress a buffer using ZSTD then bitunshuffle it yielding an array. Parameters ---------- arr : numpy array Input data to be decompressed. shape : tuple of integers Shape of the output (decompressed array). Must match the shape of the original data array before compression. dtype : numpy dtype Datatype of the output array. Must match the data type of the original data array before compression. block_size : positive integer Block size in number of elements. Must match value used for compression. Returns ------- out : numpy array with shape *shape* and data type *dtype* Decompressed data. """ cdef int ii, size, itemsize, count=0 if not arr.flags['C_CONTIGUOUS']: msg = "Input array must be C-contiguous." raise ValueError(msg) size = np.prod(shape) itemsize = dtype.itemsize cdef np.ndarray out out = np.empty(tuple(shape), dtype=dtype) cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat arr_flat = arr.view(np.uint8).ravel() cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat out_flat = out.view(np.uint8).ravel() cdef void* arr_ptr = &arr_flat[0] cdef void* out_ptr = &out_flat[0] with nogil: for ii in range(REPEATC): count = bshuf_decompress_zstd(arr_ptr, out_ptr, size, itemsize, block_size) if count < 0: msg = "Failed. Error code %d." excp = RuntimeError(msg % count, count) raise excp if count != arr.size: msg = "Decompressed different number of bytes than input buffer size." msg += "Input buffer %d, decompressed %d." % (arr.size, count) raise RuntimeError(msg, count) return out bitshuffle-0.5.1/bitshuffle/h5.pyx000066400000000000000000000170571434025530100170700ustar00rootroot00000000000000""" HDF5 support for Bitshuffle. To read a dataset that uses the Bitshuffle filter using h5py, simply import this module (unless you have installed the Bitshuffle dynamically loaded filter, in which case importing this module is unnecessary). To create a new dataset that includes the Bitshuffle filter, use one of the convenience functions provided. Constants ========= H5FILTER : The Bitshuffle HDF5 filter integer identifier. H5_COMPRESS_LZ4 : Filter option flag for LZ4 compression. H5_COMPRESS_ZSTD : Filter option flag for ZSTD compression. Functions ========= create_dataset create_bitshuffle_lzf_dataset create_bitshuffle_compressed_dataset Examples ======== >>> import numpy as np >>> import h5py >>> import bitshuffle.h5 >>> shape = (123, 456) >>> chunks = (10, 456) >>> dtype = np.float64 >>> f = h5py.File("tmp_test.h5") >>> bitshuffle.h5.create_bitshuffle_compressed_dataset( f, "some_data", shape, dtype, chunks) >>> f["some_data"][:] = 42 """ from __future__ import absolute_import, division, print_function, unicode_literals import sys import numpy import h5py from h5py import h5d, h5fd, h5s, h5t, h5p, h5z, defs, filters cimport cython cdef extern from b"bshuf_h5filter.h": int bshuf_register_h5filter() int BSHUF_H5FILTER int BSHUF_H5_COMPRESS_LZ4 int BSHUF_H5_COMPRESS_ZSTD cdef extern int init_filter(const char* libname) cdef int LZF_FILTER = 32000 H5FILTER = BSHUF_H5FILTER H5_COMPRESS_LZ4 = BSHUF_H5_COMPRESS_LZ4 H5_COMPRESS_ZSTD = BSHUF_H5_COMPRESS_ZSTD # Init HDF5 dynamic loading with HDF5 library used by h5py if not sys.platform.startswith('win'): if sys.version_info[0] >= 3: libs = [bytes(h5d.__file__, encoding='utf-8'), bytes(h5fd.__file__, encoding='utf-8'), bytes(h5s.__file__, encoding='utf-8'), bytes(h5t.__file__, encoding='utf-8'), bytes(h5p.__file__, encoding='utf-8'), bytes(h5z.__file__, encoding='utf-8'), bytes(defs.__file__, encoding='utf-8')] else: libs = [h5d.__file__, h5fd.__file__, h5s.__file__, h5t.__file__, h5p.__file__, h5z.__file__, defs.__file__] # Ensure all symbols are loaded success = -1 for lib in libs: success = init_filter(lib) if success == 0: break if success == -1: raise RuntimeError("Failed to load all HDF5 symbols using these libs: {}".format(libs)) def register_h5_filter(): ret = bshuf_register_h5filter() if ret < 0: raise RuntimeError("Failed to register bitshuffle HDF5 filter.", ret) register_h5_filter() def create_dataset(parent, name, shape, dtype, chunks=None, maxshape=None, fillvalue=None, track_times=None, filter_pipeline=(), filter_flags=None, filter_opts=None): """Create a dataset with an arbitrary filter pipeline. Return a new low-level dataset identifier. Much of this code is copied from h5py, but couldn't reuse much code due to unstable API. """ if hasattr(filter_pipeline, "__getitem__"): filter_pipeline = list(filter_pipeline) else: filter_pipeline = [filter_pipeline] filter_flags = [filter_flags] filter_opts = [filter_opts] nfilters = len(filter_pipeline) if filter_flags is None: filter_flags = [None] * nfilters if filter_opts is None: filter_opts = [None] * nfilters if not len(filter_flags) == nfilters or not len(filter_opts) == nfilters: msg = "Supplied incompatible number of filters, flags, and options." raise ValueError(msg) shape = tuple(shape) tmp_shape = maxshape if maxshape is not None else shape # Validate chunk shape chunks_larger = (numpy.array([ not i>=j for i, j in zip(tmp_shape, chunks) if i is not None])).any() if isinstance(chunks, tuple) and chunks_larger: errmsg = ("Chunk shape must not be greater than data shape in any " "dimension. {} is not compatible with {}".format(chunks, shape)) raise ValueError(errmsg) if isinstance(dtype, h5py.Datatype): # Named types are used as-is tid = dtype.id dtype = tid.dtype # Following code needs this else: # Validate dtype dtype = numpy.dtype(dtype) tid = h5t.py_create(dtype, logical=1) if shape == (): if any((chunks, filter_pipeline)): raise TypeError("Scalar datasets don't support chunk/filter options") if maxshape and maxshape != (): raise TypeError("Scalar datasets cannot be extended") return h5p.create(h5p.DATASET_CREATE) def rq_tuple(tpl, name): """Check if chunks/maxshape match dataset rank""" if tpl in (None, True): return try: tpl = tuple(tpl) except TypeError: raise TypeError('"%s" argument must be None or a sequence object' % name) if len(tpl) != len(shape): raise ValueError('"%s" must have same rank as dataset shape' % name) rq_tuple(chunks, 'chunks') rq_tuple(maxshape, 'maxshape') if (chunks is True) or (chunks is None and filter_pipeline): chunks = filters.guess_chunk(shape, maxshape, dtype.itemsize) if maxshape is True: maxshape = (None,)*len(shape) dcpl = h5p.create(h5p.DATASET_CREATE) if chunks is not None: dcpl.set_chunk(chunks) dcpl.set_fill_time(h5d.FILL_TIME_ALLOC) # prevent resize glitch if fillvalue is not None: fillvalue = numpy.array(fillvalue) dcpl.set_fill_value(fillvalue) if track_times in (True, False): dcpl.set_obj_track_times(track_times) elif track_times is not None: raise TypeError("track_times must be either True or False") for ii in range(nfilters): this_filter = filter_pipeline[ii] this_flags = filter_flags[ii] this_opts = filter_opts[ii] if this_flags is None: this_flags = 0 if this_opts is None: this_opts = () dcpl.set_filter(this_filter, this_flags, this_opts) if maxshape is not None: maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape) sid = h5s.create_simple(shape, maxshape) dset_id = h5d.create(parent.id, name, tid, sid, dcpl=dcpl) return dset_id def create_bitshuffle_lzf_dataset(parent, name, shape, dtype, chunks=None, maxshape=None, fillvalue=None, track_times=None): """Create dataset with a filter pipeline including bitshuffle and LZF""" filter_pipeline = [H5FILTER, LZF_FILTER] dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks, filter_pipeline=filter_pipeline, maxshape=maxshape, fillvalue=fillvalue, track_times=track_times) return dset_id def create_bitshuffle_compressed_dataset(parent, name, shape, dtype, chunks=None, maxshape=None, fillvalue=None, track_times=None): """Create dataset with bitshuffle+internal LZ4 compression.""" filter_pipeline = [H5FILTER, ] filter_opts = [(0, H5_COMPRESS_LZ4)] dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks, filter_pipeline=filter_pipeline, filter_opts=filter_opts, maxshape=maxshape, fillvalue=fillvalue, track_times=track_times) return dset_id bitshuffle-0.5.1/conda-recipe/000077500000000000000000000000001434025530100161765ustar00rootroot00000000000000bitshuffle-0.5.1/conda-recipe/bld.bat000066400000000000000000000001131434025530100174220ustar00rootroot00000000000000SET CONDA_HOME=%PREFIX% "%PYTHON%" setup.py install if errorlevel 1 exit 1 bitshuffle-0.5.1/conda-recipe/build.sh000066400000000000000000000001361434025530100176310ustar00rootroot00000000000000export CONDA_HOME=$PREFIX $PYTHON setup.py install # Python command to install the script bitshuffle-0.5.1/conda-recipe/meta.yaml000066400000000000000000000007671434025530100200220ustar00rootroot00000000000000package: name: bitshuffle version: 0.2.1 source: # git_url: https://github.com/kiyo-masui/bitshuffle.git # git_rev: 0.2.1 path: .. patches: - setup.py.patch requirements: build: - python - setuptools - cython - numpy - h5py - hdf5 run: - python - numpy - h5py - cython about: home: https://github.com/kiyo-masui/bitshuffle/blob/master/setup.py summary: "bitshuffle library." bitshuffle-0.5.1/conda-recipe/setup.py.patch000066400000000000000000000007671434025530100210200ustar00rootroot00000000000000--- setup.py 2016-01-19 16:56:12.954563000 +0100 +++ xxx.py 2016-01-19 16:56:00.817087000 +0100 @@ -40,8 +40,8 @@ # Copied from h5py. # TODO, figure out what the canonacal way to do this should be. -INCLUDE_DIRS = [] -LIBRARY_DIRS = [] +INCLUDE_DIRS = [os.environ['CONDA_HOME'] + '/include'] +LIBRARY_DIRS = [os.environ['CONDA_HOME'] + '/lib'] if sys.platform == 'darwin': # putting here both macports and homebrew paths will generate # "ld: warning: dir not found" at the linking phase bitshuffle-0.5.1/lz4/000077500000000000000000000000001434025530100143565ustar00rootroot00000000000000bitshuffle-0.5.1/lz4/LICENSE000066400000000000000000000024371434025530100153710ustar00rootroot00000000000000LZ4 Library Copyright (c) 2011-2016, Yann Collet All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. bitshuffle-0.5.1/lz4/README.md000066400000000000000000000027621434025530100156440ustar00rootroot00000000000000LZ4 - Library Files ================================ The __lib__ directory contains several files, but you don't necessarily need them all. To integrate fast LZ4 compression/decompression into your program, you basically just need "**lz4.c**" and "**lz4.h**". For more compression at the cost of compression speed (while preserving decompression speed), use **lz4hc** on top of regular lz4. `lz4hc` only provides compression functions. It also needs `lz4` to compile properly. If you want to produce files or data streams compatible with `lz4` command line utility, use **lz4frame**. This library encapsulates lz4-compressed blocks into the [official interoperable frame format]. In order to work properly, lz4frame needs lz4 and lz4hc, and also **xxhash**, which provides error detection algorithm. (_Advanced stuff_ : It's possible to hide xxhash symbols into a local namespace. This is what `liblz4` does, to avoid symbol duplication in case a user program would link to several libraries containing xxhash symbols.) A more complex "lz4frame_static.h" is also provided, although its usage is not recommended. It contains definitions which are not guaranteed to remain stable within future versions. Use for static linking ***only***. The other files are not source code. There are : - LICENSE : contains the BSD license text - Makefile : script to compile or install lz4 library (static or dynamic) - liblz4.pc.in : for pkg-config (make install) [official interoperable frame format]: ../lz4_Frame_format.md bitshuffle-0.5.1/lz4/lz4.c000066400000000000000000003151771434025530100152510ustar00rootroot00000000000000/* LZ4 - Fast LZ compression algorithm Copyright (C) 2011-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - LZ4 homepage : http://www.lz4.org - LZ4 source repository : https://github.com/lz4/lz4 */ /*-************************************ * Tuning parameters **************************************/ /* * LZ4_HEAPMODE : * Select how default compression functions will allocate memory for their hash table, * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). */ #ifndef LZ4_HEAPMODE # define LZ4_HEAPMODE 0 #endif /* * LZ4_ACCELERATION_DEFAULT : * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 */ #define LZ4_ACCELERATION_DEFAULT 1 /* * LZ4_ACCELERATION_MAX : * Any "acceleration" value higher than this threshold * get treated as LZ4_ACCELERATION_MAX instead (fix #876) */ #define LZ4_ACCELERATION_MAX 65537 /*-************************************ * CPU Feature Detection **************************************/ /* LZ4_FORCE_MEMORY_ACCESS * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. * The below switch allow to select different access method for improved performance. * Method 0 (default) : use `memcpy()`. Safe and portable. * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets which assembly generation depends on alignment. * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ # if defined(__GNUC__) && \ ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define LZ4_FORCE_MEMORY_ACCESS 2 # elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) # define LZ4_FORCE_MEMORY_ACCESS 1 # endif #endif /* * LZ4_FORCE_SW_BITCOUNT * Define this parameter if your target system or compiler does not support hardware bit count */ #if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ # undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */ # define LZ4_FORCE_SW_BITCOUNT #endif /*-************************************ * Dependency **************************************/ /* * LZ4_SRC_INCLUDED: * Amalgamation flag, whether lz4.c is included */ #ifndef LZ4_SRC_INCLUDED # define LZ4_SRC_INCLUDED 1 #endif #ifndef LZ4_STATIC_LINKING_ONLY #define LZ4_STATIC_LINKING_ONLY #endif #ifndef LZ4_DISABLE_DEPRECATE_WARNINGS #define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ #endif #define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */ #include "lz4.h" /* see also "memory routines" below */ /*-************************************ * Compiler Options **************************************/ #if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */ # include /* only present in VS2005+ */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif /* _MSC_VER */ #ifndef LZ4_FORCE_INLINE # ifdef _MSC_VER /* Visual Studio */ # define LZ4_FORCE_INLINE static __forceinline # else # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # ifdef __GNUC__ # define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) # else # define LZ4_FORCE_INLINE static inline # endif # else # define LZ4_FORCE_INLINE static # endif /* __STDC_VERSION__ */ # endif /* _MSC_VER */ #endif /* LZ4_FORCE_INLINE */ /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, * together with a simple 8-byte copy loop as a fall-back path. * However, this optimization hurts the decompression speed by >30%, * because the execution does not go to the optimized loop * for typical compressible data, and all of the preamble checks * before going to the fall-back path become useless overhead. * This optimization happens only with the -O3 flag, and -O2 generates * a simple 8-byte copy loop. * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 * functions are annotated with __attribute__((optimize("O2"))), * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute * of LZ4_wildCopy8 does not affect the compression speed. */ #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) # define LZ4_FORCE_O2 __attribute__((optimize("O2"))) # undef LZ4_FORCE_INLINE # define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) #else # define LZ4_FORCE_O2 #endif #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) # define expect(expr,value) (__builtin_expect ((expr),(value)) ) #else # define expect(expr,value) (expr) #endif #ifndef likely #define likely(expr) expect((expr) != 0, 1) #endif #ifndef unlikely #define unlikely(expr) expect((expr) != 0, 0) #endif /* Should the alignment test prove unreliable, for some reason, * it can be disabled by setting LZ4_ALIGN_TEST to 0 */ #ifndef LZ4_ALIGN_TEST /* can be externally provided */ # define LZ4_ALIGN_TEST 1 #endif /*-************************************ * Memory routines **************************************/ #ifdef LZ4_USER_MEMORY_FUNCTIONS /* memory management functions can be customized by user project. * Below functions must exist somewhere in the Project * and be available at link time */ void* LZ4_malloc(size_t s); void* LZ4_calloc(size_t n, size_t s); void LZ4_free(void* p); # define ALLOC(s) LZ4_malloc(s) # define ALLOC_AND_ZERO(s) LZ4_calloc(1,s) # define FREEMEM(p) LZ4_free(p) #else # include /* malloc, calloc, free */ # define ALLOC(s) malloc(s) # define ALLOC_AND_ZERO(s) calloc(1,s) # define FREEMEM(p) free(p) #endif #include /* memset, memcpy */ #define MEM_INIT(p,v,s) memset((p),(v),(s)) /*-************************************ * Common Constants **************************************/ #define MINMATCH 4 #define WILDCOPYLENGTH 8 #define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ #define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ #define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ #define FASTLOOP_SAFE_DISTANCE 64 static const int LZ4_minLength = (MFLIMIT+1); #define KB *(1 <<10) #define MB *(1 <<20) #define GB *(1U<<30) #define LZ4_DISTANCE_ABSOLUTE_MAX 65535 #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ # error "LZ4_DISTANCE_MAX is too big : must be <= 65535" #endif #define ML_BITS 4 #define ML_MASK ((1U<=1) # include #else # ifndef assert # define assert(condition) ((void)0) # endif #endif #define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */ #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) # include static int g_debuglog_enable = 1; # define DEBUGLOG(l, ...) { \ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ fprintf(stderr, __FILE__ ": "); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, " \n"); \ } } #else # define DEBUGLOG(l, ...) {} /* disabled */ #endif static int LZ4_isAligned(const void* ptr, size_t alignment) { return ((size_t)ptr & (alignment -1)) == 0; } /*-************************************ * Types **************************************/ #include #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # include typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; typedef int32_t S32; typedef uint64_t U64; typedef uintptr_t uptrval; #else # if UINT_MAX != 4294967295UL # error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" # endif typedef unsigned char BYTE; typedef unsigned short U16; typedef unsigned int U32; typedef signed int S32; typedef unsigned long long U64; typedef size_t uptrval; /* generally true, except OpenVMS-64 */ #endif #if defined(__x86_64__) typedef U64 reg_t; /* 64-bits in x32 mode */ #else typedef size_t reg_t; /* 32-bits in x32 mode */ #endif typedef enum { notLimited = 0, limitedOutput = 1, fillOutput = 2 } limitedOutput_directive; /*-************************************ * Reading and writing into memory **************************************/ /** * LZ4 relies on memcpy with a constant size being inlined. In freestanding * environments, the compiler can't assume the implementation of memcpy() is * standard compliant, so it can't apply its specialized memcpy() inlining * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze * memcpy() as if it were standard compliant, so it can inline it in freestanding * environments. This is needed when decompressing the Linux Kernel, for example. */ #if defined(__GNUC__) && (__GNUC__ >= 4) #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) #else #define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) #endif static unsigned LZ4_isLittleEndian(void) { const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ return one.c[0]; } #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) /* lie to the compiler about data alignment; use with caution */ static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign; static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; } static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } #else /* safe and portable access using memcpy() */ static U16 LZ4_read16(const void* memPtr) { U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; } static U32 LZ4_read32(const void* memPtr) { U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; } static reg_t LZ4_read_ARCH(const void* memPtr) { reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; } static void LZ4_write16(void* memPtr, U16 value) { LZ4_memcpy(memPtr, &value, sizeof(value)); } static void LZ4_write32(void* memPtr, U32 value) { LZ4_memcpy(memPtr, &value, sizeof(value)); } #endif /* LZ4_FORCE_MEMORY_ACCESS */ static U16 LZ4_readLE16(const void* memPtr) { if (LZ4_isLittleEndian()) { return LZ4_read16(memPtr); } else { const BYTE* p = (const BYTE*)memPtr; return (U16)((U16)p[0] + (p[1]<<8)); } } static void LZ4_writeLE16(void* memPtr, U16 value) { if (LZ4_isLittleEndian()) { LZ4_write16(memPtr, value); } else { BYTE* p = (BYTE*)memPtr; p[0] = (BYTE) value; p[1] = (BYTE)(value>>8); } } /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ LZ4_FORCE_INLINE void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) { BYTE* d = (BYTE*)dstPtr; const BYTE* s = (const BYTE*)srcPtr; BYTE* const e = (BYTE*)dstEnd; do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d= 16. */ LZ4_FORCE_INLINE void LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) { BYTE* d = (BYTE*)dstPtr; const BYTE* s = (const BYTE*)srcPtr; BYTE* const e = (BYTE*)dstEnd; do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d= dstPtr + MINMATCH * - there is at least 8 bytes available to write after dstEnd */ LZ4_FORCE_INLINE void LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) { BYTE v[8]; assert(dstEnd >= dstPtr + MINMATCH); switch(offset) { case 1: MEM_INIT(v, *srcPtr, 8); break; case 2: LZ4_memcpy(v, srcPtr, 2); LZ4_memcpy(&v[2], srcPtr, 2); LZ4_memcpy(&v[4], v, 4); break; case 4: LZ4_memcpy(v, srcPtr, 4); LZ4_memcpy(&v[4], srcPtr, 4); break; default: LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); return; } LZ4_memcpy(dstPtr, v, 8); dstPtr += 8; while (dstPtr < dstEnd) { LZ4_memcpy(dstPtr, v, 8); dstPtr += 8; } } #endif /*-************************************ * Common functions **************************************/ static unsigned LZ4_NbCommonBytes (reg_t val) { assert(val != 0); if (LZ4_isLittleEndian()) { if (sizeof(val) == 8) { # if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT) /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */ return (unsigned)_tzcnt_u64(val) >> 3; # elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r = 0; _BitScanForward64(&r, (U64)val); return (unsigned)r >> 3; # elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_ctzll((U64)val) >> 3; # else const U64 m = 0x0101010101010101ULL; val ^= val - 1; return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56); # endif } else /* 32 bits */ { # if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r; _BitScanForward(&r, (U32)val); return (unsigned)r >> 3; # elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_ctz((U32)val) >> 3; # else const U32 m = 0x01010101; return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; # endif } } else /* Big Endian CPU */ { if (sizeof(val)==8) { # if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_clzll((U64)val) >> 3; # else #if 1 /* this method is probably faster, * but adds a 128 bytes lookup table */ static const unsigned char ctz7_tab[128] = { 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, }; U64 const mask = 0x0101010101010101ULL; U64 const t = (((val >> 8) - mask) | val) & mask; return ctz7_tab[(t * 0x0080402010080402ULL) >> 57]; #else /* this method doesn't consume memory space like the previous one, * but it contains several branches, * that may end up slowing execution */ static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. Note that this code path is never triggered in 32-bits mode. */ unsigned r; if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } r += (!val); return r; #endif # endif } else /* 32 bits */ { # if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_clz((U32)val) >> 3; # else val >>= 8; val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | (val + 0x00FF0000)) >> 24; return (unsigned)val ^ 3; # endif } } } #define STEPSIZE sizeof(reg_t) LZ4_FORCE_INLINE unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) { const BYTE* const pStart = pIn; if (likely(pIn < pInLimit-(STEPSIZE-1))) { reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; } else { return LZ4_NbCommonBytes(diff); } } while (likely(pIn < pInLimit-(STEPSIZE-1))) { reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } pIn += LZ4_NbCommonBytes(diff); return (unsigned)(pIn - pStart); } if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } if ((pIn compression run slower on incompressible data */ /*-************************************ * Local Structures and types **************************************/ typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; /** * This enum distinguishes several different modes of accessing previous * content in the stream. * * - noDict : There is no preceding content. * - withPrefix64k : Table entries up to ctx->dictSize before the current blob * blob being compressed are valid and refer to the preceding * content (of length ctx->dictSize), which is available * contiguously preceding in memory the content currently * being compressed. * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere * else in memory, starting at ctx->dictionary with length * ctx->dictSize. * - usingDictCtx : Like usingExtDict, but everything concerning the preceding * content is in a separate context, pointed to by * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table * entries in the current context that refer to positions * preceding the beginning of the current compression are * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx * ->dictSize describe the location and size of the preceding * content, and matches are found by looking in the ctx * ->dictCtx->hashTable. */ typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; /*-************************************ * Local Utils **************************************/ int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; } /*-************************************ * Internal Definitions used in Tests **************************************/ #if defined (__cplusplus) extern "C" { #endif int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize); int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const void* dictStart, size_t dictSize); #if defined (__cplusplus) } #endif /*-****************************** * Compression functions ********************************/ LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType) { if (tableType == byU16) return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); else return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); } LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType) { const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; if (LZ4_isLittleEndian()) { const U64 prime5bytes = 889523592379ULL; return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); } else { const U64 prime8bytes = 11400714785074694791ULL; return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); } } LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType) { if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType); return LZ4_hash4(LZ4_read32(p), tableType); } LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) { switch (tableType) { default: /* fallthrough */ case clearedTable: { /* illegal! */ assert(0); return; } case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; } case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; } case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; } } } LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) { switch (tableType) { default: /* fallthrough */ case clearedTable: /* fallthrough */ case byPtr: { /* illegal! */ assert(0); return; } case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; } case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; } } } LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase) { switch (tableType) { case clearedTable: { /* illegal! */ assert(0); return; } case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } } } LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) { U32 const h = LZ4_hashPosition(p, tableType); LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); } /* LZ4_getIndexOnHash() : * Index of match position registered in hash table. * hash position must be calculated by using base+index, or dictBase+index. * Assumption 1 : only valid if tableType == byU32 or byU16. * Assumption 2 : h is presumed valid (within limits of hash table) */ LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) { LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; assert(h < (1U << (LZ4_MEMORY_USAGE-2))); return hashTable[h]; } if (tableType == byU16) { const U16* const hashTable = (const U16*) tableBase; assert(h < (1U << (LZ4_MEMORY_USAGE-1))); return hashTable[h]; } assert(0); return 0; /* forbidden case */ } static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase) { if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; } if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; } { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ } LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, const void* tableBase, tableType_t tableType, const BYTE* srcBase) { U32 const h = LZ4_hashPosition(p, tableType); return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); } LZ4_FORCE_INLINE void LZ4_prepareTable(LZ4_stream_t_internal* const cctx, const int inputSize, const tableType_t tableType) { /* If the table hasn't been used, it's guaranteed to be zeroed out, and is * therefore safe to use no matter what mode we're in. Otherwise, we figure * out if it's safe to leave as is or whether it needs to be reset. */ if ((tableType_t)cctx->tableType != clearedTable) { assert(inputSize >= 0); if ((tableType_t)cctx->tableType != tableType || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) || ((tableType == byU32) && cctx->currentOffset > 1 GB) || tableType == byPtr || inputSize >= 4 KB) { DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx); MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); cctx->currentOffset = 0; cctx->tableType = (U32)clearedTable; } else { DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)"); } } /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster * than compressing without a gap. However, compressing with * currentOffset == 0 is faster still, so we preserve that case. */ if (cctx->currentOffset != 0 && tableType == byU32) { DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset"); cctx->currentOffset += 64 KB; } /* Finally, clear history */ cctx->dictCtx = NULL; cctx->dictionary = NULL; cctx->dictSize = 0; } /** LZ4_compress_generic() : * inlined, to ensure branches are decided at compilation time. * Presumed already validated at this stage: * - source != NULL * - inputSize > 0 */ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( LZ4_stream_t_internal* const cctx, const char* const source, char* const dest, const int inputSize, int *inputConsumed, /* only written when outputDirective == fillOutput */ const int maxOutputSize, const limitedOutput_directive outputDirective, const tableType_t tableType, const dict_directive dictDirective, const dictIssue_directive dictIssue, const int acceleration) { int result; const BYTE* ip = (const BYTE*) source; U32 const startIndex = cctx->currentOffset; const BYTE* base = (const BYTE*) source - startIndex; const BYTE* lowLimit; const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx; const BYTE* const dictionary = dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary; const U32 dictSize = dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize; const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with index in current context */ int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx); U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */ const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary; const BYTE* anchor = (const BYTE*) source; const BYTE* const iend = ip + inputSize; const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; const BYTE* const matchlimit = iend - LASTLITERALS; /* the dictCtx currentOffset is indexed on the start of the dictionary, * while a dictionary in the current context precedes the currentOffset */ const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ? dictionary + dictSize - dictCtx->currentOffset : dictionary + dictSize - startIndex; BYTE* op = (BYTE*) dest; BYTE* const olimit = op + maxOutputSize; U32 offset = 0; U32 forwardH; DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType); assert(ip != NULL); /* If init conditions are not met, we don't have to mark stream * as having dirty context, since no action was taken yet */ if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */ if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */ if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */ assert(acceleration >= 1); lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0); /* Update context state */ if (dictDirective == usingDictCtx) { /* Subsequent linked blocks can't use the dictionary. */ /* Instead, they use the block we just compressed. */ cctx->dictCtx = NULL; cctx->dictSize = (U32)inputSize; } else { cctx->dictSize += (U32)inputSize; } cctx->currentOffset += (U32)inputSize; cctx->tableType = (U32)tableType; if (inputSizehashTable, tableType, base); ip++; forwardH = LZ4_hashPosition(ip, tableType); /* Main Loop */ for ( ; ; ) { const BYTE* match; BYTE* token; const BYTE* filledIp; /* Find a match */ if (tableType == byPtr) { const BYTE* forwardIp = ip; int step = 1; int searchMatchNb = acceleration << LZ4_skipTrigger; do { U32 const h = forwardH; ip = forwardIp; forwardIp += step; step = (searchMatchNb++ >> LZ4_skipTrigger); if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; assert(ip < mflimitPlusOne); match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base); forwardH = LZ4_hashPosition(forwardIp, tableType); LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base); } while ( (match+LZ4_DISTANCE_MAX < ip) || (LZ4_read32(match) != LZ4_read32(ip)) ); } else { /* byU32, byU16 */ const BYTE* forwardIp = ip; int step = 1; int searchMatchNb = acceleration << LZ4_skipTrigger; do { U32 const h = forwardH; U32 const current = (U32)(forwardIp - base); U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); assert(matchIndex <= current); assert(forwardIp - base < (ptrdiff_t)(2 GB - 1)); ip = forwardIp; forwardIp += step; step = (searchMatchNb++ >> LZ4_skipTrigger); if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; assert(ip < mflimitPlusOne); if (dictDirective == usingDictCtx) { if (matchIndex < startIndex) { /* there was no match, try the dictionary */ assert(tableType == byU32); matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); match = dictBase + matchIndex; matchIndex += dictDelta; /* make dictCtx index comparable with current context */ lowLimit = dictionary; } else { match = base + matchIndex; lowLimit = (const BYTE*)source; } } else if (dictDirective==usingExtDict) { if (matchIndex < startIndex) { DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex); assert(startIndex - matchIndex >= MINMATCH); match = dictBase + matchIndex; lowLimit = dictionary; } else { match = base + matchIndex; lowLimit = (const BYTE*)source; } } else { /* single continuous memory segment */ match = base + matchIndex; } forwardH = LZ4_hashPosition(forwardIp, tableType); LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex); if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */ assert(matchIndex < current); if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) && (matchIndex+LZ4_DISTANCE_MAX < current)) { continue; } /* too far */ assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */ if (LZ4_read32(match) == LZ4_read32(ip)) { if (maybe_extMem) offset = current - matchIndex; break; /* match found */ } } while(1); } /* Catch up */ filledIp = ip; while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; } /* Encode Literals */ { unsigned const litLength = (unsigned)(ip - anchor); token = op++; if ((outputDirective == limitedOutput) && /* Check output buffer overflow */ (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) { return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ } if ((outputDirective == fillOutput) && (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) { op--; goto _last_literals; } if (litLength >= RUN_MASK) { int len = (int)(litLength - RUN_MASK); *token = (RUN_MASK<= 255 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } else *token = (BYTE)(litLength< olimit)) { /* the match was too close to the end, rewind and go to last literals */ op = token; goto _last_literals; } /* Encode Offset */ if (maybe_extMem) { /* static test */ DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source)); assert(offset <= LZ4_DISTANCE_MAX && offset > 0); LZ4_writeLE16(op, (U16)offset); op+=2; } else { DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match)); assert(ip-match <= LZ4_DISTANCE_MAX); LZ4_writeLE16(op, (U16)(ip - match)); op+=2; } /* Encode MatchLength */ { unsigned matchCode; if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx) && (lowLimit==dictionary) /* match within extDict */ ) { const BYTE* limit = ip + (dictEnd-match); assert(dictEnd > match); if (limit > matchlimit) limit = matchlimit; matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); ip += (size_t)matchCode + MINMATCH; if (ip==limit) { unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit); matchCode += more; ip += more; } DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH); } else { matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); ip += (size_t)matchCode + MINMATCH; DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH); } if ((outputDirective) && /* Check output buffer overflow */ (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) { if (outputDirective == fillOutput) { /* Match description too long : reduce it */ U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255; ip -= matchCode - newMatchCode; assert(newMatchCode < matchCode); matchCode = newMatchCode; if (unlikely(ip <= filledIp)) { /* We have already filled up to filledIp so if ip ends up less than filledIp * we have positions in the hash table beyond the current position. This is * a problem if we reuse the hash table. So we have to remove these positions * from the hash table. */ const BYTE* ptr; DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip)); for (ptr = ip; ptr <= filledIp; ++ptr) { U32 const h = LZ4_hashPosition(ptr, tableType); LZ4_clearHash(h, cctx->hashTable, tableType); } } } else { assert(outputDirective == limitedOutput); return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ } } if (matchCode >= ML_MASK) { *token += ML_MASK; matchCode -= ML_MASK; LZ4_write32(op, 0xFFFFFFFF); while (matchCode >= 4*255) { op+=4; LZ4_write32(op, 0xFFFFFFFF); matchCode -= 4*255; } op += matchCode / 255; *op++ = (BYTE)(matchCode % 255); } else *token += (BYTE)(matchCode); } /* Ensure we have enough space for the last literals. */ assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit)); anchor = ip; /* Test end of chunk */ if (ip >= mflimitPlusOne) break; /* Fill table */ LZ4_putPosition(ip-2, cctx->hashTable, tableType, base); /* Test next position */ if (tableType == byPtr) { match = LZ4_getPosition(ip, cctx->hashTable, tableType, base); LZ4_putPosition(ip, cctx->hashTable, tableType, base); if ( (match+LZ4_DISTANCE_MAX >= ip) && (LZ4_read32(match) == LZ4_read32(ip)) ) { token=op++; *token=0; goto _next_match; } } else { /* byU32, byU16 */ U32 const h = LZ4_hashPosition(ip, tableType); U32 const current = (U32)(ip-base); U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); assert(matchIndex < current); if (dictDirective == usingDictCtx) { if (matchIndex < startIndex) { /* there was no match, try the dictionary */ matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); match = dictBase + matchIndex; lowLimit = dictionary; /* required for match length counter */ matchIndex += dictDelta; } else { match = base + matchIndex; lowLimit = (const BYTE*)source; /* required for match length counter */ } } else if (dictDirective==usingExtDict) { if (matchIndex < startIndex) { match = dictBase + matchIndex; lowLimit = dictionary; /* required for match length counter */ } else { match = base + matchIndex; lowLimit = (const BYTE*)source; /* required for match length counter */ } } else { /* single memory segment */ match = base + matchIndex; } LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); assert(matchIndex < current); if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current)) && (LZ4_read32(match) == LZ4_read32(ip)) ) { token=op++; *token=0; if (maybe_extMem) offset = current - matchIndex; DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source)); goto _next_match; } } /* Prepare next loop */ forwardH = LZ4_hashPosition(++ip, tableType); } _last_literals: /* Encode Last Literals */ { size_t lastRun = (size_t)(iend - anchor); if ( (outputDirective) && /* Check output buffer overflow */ (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) { if (outputDirective == fillOutput) { /* adapt lastRun to fill 'dst' */ assert(olimit >= op); lastRun = (size_t)(olimit-op) - 1/*token*/; lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/ } else { assert(outputDirective == limitedOutput); return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ } } DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun); if (lastRun >= RUN_MASK) { size_t accumulator = lastRun - RUN_MASK; *op++ = RUN_MASK << ML_BITS; for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; *op++ = (BYTE) accumulator; } else { *op++ = (BYTE)(lastRun< 0); DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result); return result; } /** LZ4_compress_generic() : * inlined, to ensure branches are decided at compilation time; * takes care of src == (NULL, 0) * and forward the rest to LZ4_compress_generic_validated */ LZ4_FORCE_INLINE int LZ4_compress_generic( LZ4_stream_t_internal* const cctx, const char* const src, char* const dst, const int srcSize, int *inputConsumed, /* only written when outputDirective == fillOutput */ const int dstCapacity, const limitedOutput_directive outputDirective, const tableType_t tableType, const dict_directive dictDirective, const dictIssue_directive dictIssue, const int acceleration) { DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", srcSize, dstCapacity); if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */ if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */ if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */ DEBUGLOG(5, "Generating an empty block"); assert(outputDirective == notLimited || dstCapacity >= 1); assert(dst != NULL); dst[0] = 0; if (outputDirective == fillOutput) { assert (inputConsumed != NULL); *inputConsumed = 0; } return 1; } assert(src != NULL); return LZ4_compress_generic_validated(cctx, src, dst, srcSize, inputConsumed, /* only written into if outputDirective == fillOutput */ dstCapacity, outputDirective, tableType, dictDirective, dictIssue, acceleration); } int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) { LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse; assert(ctx != NULL); if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; if (maxOutputSize >= LZ4_compressBound(inputSize)) { if (inputSize < LZ4_64Klimit) { return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration); } else { const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); } } else { if (inputSize < LZ4_64Klimit) { return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); } else { const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration); } } } /** * LZ4_compress_fast_extState_fastReset() : * A variant of LZ4_compress_fast_extState(). * * Using this variant avoids an expensive initialization step. It is only safe * to call if the state buffer is known to be correctly initialized already * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of * "correctly initialized"). */ int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) { LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse; if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; if (dstCapacity >= LZ4_compressBound(srcSize)) { if (srcSize < LZ4_64Klimit) { const tableType_t tableType = byU16; LZ4_prepareTable(ctx, srcSize, tableType); if (ctx->currentOffset) { return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration); } else { return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); } } else { const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; LZ4_prepareTable(ctx, srcSize, tableType); return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); } } else { if (srcSize < LZ4_64Klimit) { const tableType_t tableType = byU16; LZ4_prepareTable(ctx, srcSize, tableType); if (ctx->currentOffset) { return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration); } else { return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); } } else { const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; LZ4_prepareTable(ctx, srcSize, tableType); return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); } } } int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) { int result; #if (LZ4_HEAPMODE) LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ if (ctxPtr == NULL) return 0; #else LZ4_stream_t ctx; LZ4_stream_t* const ctxPtr = &ctx; #endif result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); #if (LZ4_HEAPMODE) FREEMEM(ctxPtr); #endif return result; } int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize) { return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1); } /* Note!: This function leaves the stream in an unclean/broken state! * It is not safe to subsequently use the same state with a _fastReset() or * _continue() call without resetting it. */ static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize) { void* const s = LZ4_initStream(state, sizeof (*state)); assert(s != NULL); (void)s; if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */ return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1); } else { if (*srcSizePtr < LZ4_64Klimit) { return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1); } else { tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1); } } } int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) { #if (LZ4_HEAPMODE) LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ if (ctx == NULL) return 0; #else LZ4_stream_t ctxBody; LZ4_stream_t* ctx = &ctxBody; #endif int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize); #if (LZ4_HEAPMODE) FREEMEM(ctx); #endif return result; } /*-****************************** * Streaming functions ********************************/ LZ4_stream_t* LZ4_createStream(void) { LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ DEBUGLOG(4, "LZ4_createStream %p", lz4s); if (lz4s == NULL) return NULL; LZ4_initStream(lz4s, sizeof(*lz4s)); return lz4s; } static size_t LZ4_stream_t_alignment(void) { #if LZ4_ALIGN_TEST typedef struct { char c; LZ4_stream_t t; } t_a; return sizeof(t_a) - sizeof(LZ4_stream_t); #else return 1; /* effectively disabled */ #endif } LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) { DEBUGLOG(5, "LZ4_initStream"); if (buffer == NULL) { return NULL; } if (size < sizeof(LZ4_stream_t)) { return NULL; } if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL; MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal)); return (LZ4_stream_t*)buffer; } /* resetStream is now deprecated, * prefer initStream() which is more general */ void LZ4_resetStream (LZ4_stream_t* LZ4_stream) { DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream); MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal)); } void LZ4_resetStream_fast(LZ4_stream_t* ctx) { LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32); } int LZ4_freeStream (LZ4_stream_t* LZ4_stream) { if (!LZ4_stream) return 0; /* support free on NULL */ DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream); FREEMEM(LZ4_stream); return (0); } #define HASH_UNIT sizeof(reg_t) int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) { LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse; const tableType_t tableType = byU32; const BYTE* p = (const BYTE*)dictionary; const BYTE* const dictEnd = p + dictSize; const BYTE* base; DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict); /* It's necessary to reset the context, * and not just continue it with prepareTable() * to avoid any risk of generating overflowing matchIndex * when compressing using this dictionary */ LZ4_resetStream(LZ4_dict); /* We always increment the offset by 64 KB, since, if the dict is longer, * we truncate it to the last 64k, and if it's shorter, we still want to * advance by a whole window length so we can provide the guarantee that * there are only valid offsets in the window, which allows an optimization * in LZ4_compress_fast_continue() where it uses noDictIssue even when the * dictionary isn't a full 64k. */ dict->currentOffset += 64 KB; if (dictSize < (int)HASH_UNIT) { return 0; } if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; base = dictEnd - dict->currentOffset; dict->dictionary = p; dict->dictSize = (U32)(dictEnd - p); dict->tableType = (U32)tableType; while (p <= dictEnd-HASH_UNIT) { LZ4_putPosition(p, dict->hashTable, tableType, base); p+=3; } return (int)dict->dictSize; } void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) { const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL : &(dictionaryStream->internal_donotuse); DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", workingStream, dictionaryStream, dictCtx != NULL ? dictCtx->dictSize : 0); if (dictCtx != NULL) { /* If the current offset is zero, we will never look in the * external dictionary context, since there is no value a table * entry can take that indicate a miss. In that case, we need * to bump the offset to something non-zero. */ if (workingStream->internal_donotuse.currentOffset == 0) { workingStream->internal_donotuse.currentOffset = 64 KB; } /* Don't actually attach an empty dictionary. */ if (dictCtx->dictSize == 0) { dictCtx = NULL; } } workingStream->internal_donotuse.dictCtx = dictCtx; } static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize) { assert(nextSize >= 0); if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */ /* rescale hash table */ U32 const delta = LZ4_dict->currentOffset - 64 KB; const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; int i; DEBUGLOG(4, "LZ4_renormDictT"); for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; else LZ4_dict->hashTable[i] -= delta; } LZ4_dict->currentOffset = 64 KB; if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; } } int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) { const tableType_t tableType = byU32; LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse; const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize; DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize); LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; /* invalidate tiny dictionaries */ if ( (streamPtr->dictSize-1 < 4-1) /* intentional underflow */ && (dictEnd != (const BYTE*)source) ) { DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary); streamPtr->dictSize = 0; streamPtr->dictionary = (const BYTE*)source; dictEnd = (const BYTE*)source; } /* Check overlapping input/dictionary space */ { const BYTE* sourceEnd = (const BYTE*) source + inputSize; if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) { streamPtr->dictSize = (U32)(dictEnd - sourceEnd); if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; streamPtr->dictionary = dictEnd - streamPtr->dictSize; } } /* prefix mode : source data follows dictionary */ if (dictEnd == (const BYTE*)source) { if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration); else return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration); } /* external dictionary mode */ { int result; if (streamPtr->dictCtx) { /* We depend here on the fact that dictCtx'es (produced by * LZ4_loadDict) guarantee that their tables contain no references * to offsets between dictCtx->currentOffset - 64 KB and * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe * to use noDictIssue even when the dict isn't a full 64 KB. */ if (inputSize > 4 KB) { /* For compressing large blobs, it is faster to pay the setup * cost to copy the dictionary's tables into the active context, * so that the compression loop is only looking into one table. */ LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr)); result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); } else { result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration); } } else { if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration); } else { result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); } } streamPtr->dictionary = (const BYTE*)source; streamPtr->dictSize = (U32)inputSize; return result; } } /* Hidden debug function, to force-test external dictionary mode */ int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize) { LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse; int result; LZ4_renormDictT(streamPtr, srcSize); if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1); } else { result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); } streamPtr->dictionary = (const BYTE*)source; streamPtr->dictSize = (U32)srcSize; return result; } /*! LZ4_saveDict() : * If previously compressed data block is not guaranteed to remain available at its memory location, * save it into a safer place (char* safeBuffer). * Note : you don't need to call LZ4_loadDict() afterwards, * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue(). * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. */ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) { LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */ if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; } if (safeBuffer == NULL) assert(dictSize == 0); if (dictSize > 0) memmove(safeBuffer, previousDictEnd - dictSize, dictSize); dict->dictionary = (const BYTE*)safeBuffer; dict->dictSize = (U32)dictSize; return dictSize; } /*-******************************* * Decompression functions ********************************/ typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; #undef MIN #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) /* Read the variable-length literal or match length. * * ip - pointer to use as input. * lencheck - end ip. Return an error if ip advances >= lencheck. * loop_check - check ip >= lencheck in body of loop. Returns loop_error if so. * initial_check - check ip >= lencheck before start of loop. Returns initial_error if so. * error (output) - error code. Should be set to 0 before call. */ typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error; LZ4_FORCE_INLINE unsigned read_variable_length(const BYTE**ip, const BYTE* lencheck, int loop_check, int initial_check, variable_length_error* error) { U32 length = 0; U32 s; if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ *error = initial_error; return length; } do { s = **ip; (*ip)++; length += s; if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ *error = loop_error; return length; } } while (s==255); return length; } /*! LZ4_decompress_generic() : * This generic decompression function covers all use cases. * It shall be instantiated several times, using different sets of directives. * Note that it is important for performance that this function really get inlined, * in order to remove useless branches during compilation optimization. */ LZ4_FORCE_INLINE int LZ4_decompress_generic( const char* const src, char* const dst, int srcSize, int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ earlyEnd_directive partialDecoding, /* full, partial */ dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ const BYTE* const dictStart, /* only if dict==usingExtDict */ const size_t dictSize /* note : = 0 if noDict */ ) { if (src == NULL) { return -1; } { const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; BYTE* op = (BYTE*) dst; BYTE* const oend = op + outputSize; BYTE* cpy; const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; const int safeDecode = (endOnInput==endOnInputSize); const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); /* Set up the "end" pointers for the shortcut. */ const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; const BYTE* match; size_t offset; unsigned token; size_t length; DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); /* Special cases */ assert(lowPrefix <= op); if ((endOnInput) && (unlikely(outputSize==0))) { /* Empty output buffer */ if (partialDecoding) return 0; return ((srcSize==1) && (*ip==0)) ? 0 : -1; } if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); } if ((endOnInput) && unlikely(srcSize==0)) { return -1; } /* Currently the fast loop shows a regression on qualcomm arm chips. */ #if LZ4_FAST_DEC_LOOP if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { DEBUGLOG(6, "skip fast decode loop"); goto safe_decode; } /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */ while (1) { /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ assert(oend - op >= FASTLOOP_SAFE_DISTANCE); if (endOnInput) { assert(ip < iend); } token = *ip++; length = token >> ML_BITS; /* literal length */ assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ /* decode literal length */ if (length == RUN_MASK) { variable_length_error error = ok; length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); if (error == initial_error) { goto _output_error; } if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ /* copy literals */ cpy = op+length; LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); if (endOnInput) { /* LZ4_decompress_safe() */ if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } LZ4_wildCopy32(op, ip, cpy); } else { /* LZ4_decompress_fast() */ if (cpy>oend-8) { goto safe_literal_copy; } LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : * it doesn't know input length, and only relies on end-of-block properties */ } ip += length; op = cpy; } else { cpy = op+length; if (endOnInput) { /* LZ4_decompress_safe() */ DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); /* We don't need to check oend, since we check it once for each loop below */ if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } /* Literals can only be 14, but hope compilers optimize if we copy by a register size */ LZ4_memcpy(op, ip, 16); } else { /* LZ4_decompress_fast() */ /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : * it doesn't know input length, and relies on end-of-block properties */ LZ4_memcpy(op, ip, 8); if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); } } ip += length; op = cpy; } /* get offset */ offset = LZ4_readLE16(ip); ip+=2; match = op - offset; assert(match <= op); /* get matchlength */ length = token & ML_MASK; if (length == ML_MASK) { variable_length_error error = ok; if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); if (error != ok) { goto _output_error; } if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ length += MINMATCH; if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { goto safe_match_copy; } } else { length += MINMATCH; if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { goto safe_match_copy; } /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */ if ((dict == withPrefix64k) || (match >= lowPrefix)) { if (offset >= 8) { assert(match >= lowPrefix); assert(match <= op); assert(op + 18 <= oend); LZ4_memcpy(op, match, 8); LZ4_memcpy(op+8, match+8, 8); LZ4_memcpy(op+16, match+16, 2); op += length; continue; } } } if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ /* match starting within external dictionary */ if ((dict==usingExtDict) && (match < lowPrefix)) { if (unlikely(op+length > oend-LASTLITERALS)) { if (partialDecoding) { DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); length = MIN(length, (size_t)(oend-op)); } else { goto _output_error; /* end-of-block condition violated */ } } if (length <= (size_t)(lowPrefix-match)) { /* match fits entirely within external dictionary : just copy */ memmove(op, dictEnd - (lowPrefix-match), length); op += length; } else { /* match stretches into both external dictionary and current block */ size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; LZ4_memcpy(op, dictEnd - copySize, copySize); op += copySize; if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ BYTE* const endOfMatch = op + restSize; const BYTE* copyFrom = lowPrefix; while (op < endOfMatch) { *op++ = *copyFrom++; } } else { LZ4_memcpy(op, lowPrefix, restSize); op += restSize; } } continue; } /* copy match within block */ cpy = op + length; assert((op <= oend) && (oend-op >= 32)); if (unlikely(offset<16)) { LZ4_memcpy_using_offset(op, match, cpy, offset); } else { LZ4_wildCopy32(op, match, cpy); } op = cpy; /* wildcopy correction */ } safe_decode: #endif /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ while (1) { token = *ip++; length = token >> ML_BITS; /* literal length */ assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ /* A two-stage shortcut for the most common case: * 1) If the literal length is 0..14, and there is enough space, * enter the shortcut and copy 16 bytes on behalf of the literals * (in the fast mode, only 8 bytes can be safely copied this way). * 2) Further if the match length is 4..18, copy 18 bytes in a similar * manner; but we ensure that there's enough space in the output for * those 18 bytes earlier, upon entering the shortcut (in other words, * there is a combined check for both stages). */ if ( (endOnInput ? length != RUN_MASK : length <= 8) /* strictly "less than" on input, to re-enter the loop with at least one byte */ && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) { /* Copy the literals */ LZ4_memcpy(op, ip, endOnInput ? 16 : 8); op += length; ip += length; /* The second stage: prepare for match copying, decode full info. * If it doesn't work out, the info won't be wasted. */ length = token & ML_MASK; /* match length */ offset = LZ4_readLE16(ip); ip += 2; match = op - offset; assert(match <= op); /* check overflow */ /* Do not deal with overlapping matches. */ if ( (length != ML_MASK) && (offset >= 8) && (dict==withPrefix64k || match >= lowPrefix) ) { /* Copy the match. */ LZ4_memcpy(op + 0, match + 0, 8); LZ4_memcpy(op + 8, match + 8, 8); LZ4_memcpy(op +16, match +16, 2); op += length + MINMATCH; /* Both stages worked, load the next token. */ continue; } /* The second stage didn't work out, but the info is ready. * Propel it right to the point of match copying. */ goto _copy_match; } /* decode literal length */ if (length == RUN_MASK) { variable_length_error error = ok; length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); if (error == initial_error) { goto _output_error; } if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ } /* copy literals */ cpy = op+length; #if LZ4_FAST_DEC_LOOP safe_literal_copy: #endif LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) { /* We've either hit the input parsing restriction or the output parsing restriction. * In the normal scenario, decoding a full block, it must be the last sequence, * otherwise it's an error (invalid input or dimensions). * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. */ if (partialDecoding) { /* Since we are partial decoding we may be in this block because of the output parsing * restriction, which is not valid since the output buffer is allowed to be undersized. */ assert(endOnInput); DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); /* Finishing in the middle of a literals segment, * due to lack of input. */ if (ip+length > iend) { length = (size_t)(iend-ip); cpy = op + length; } /* Finishing in the middle of a literals segment, * due to lack of output space. */ if (cpy > oend) { cpy = oend; assert(op<=oend); length = (size_t)(oend-op); } } else { /* We must be on the last sequence because of the parsing limitations so check * that we exactly regenerate the original size (must be exact when !endOnInput). */ if ((!endOnInput) && (cpy != oend)) { goto _output_error; } /* We must be on the last sequence (or invalid) because of the parsing limitations * so check that we exactly consume the input and don't overrun the output buffer. */ if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { DEBUGLOG(6, "should have been last run of literals") DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend); goto _output_error; } } memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */ ip += length; op += length; /* Necessarily EOF when !partialDecoding. * When partialDecoding, it is EOF if we've either * filled the output buffer or * can't proceed with reading an offset for following match. */ if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { break; } } else { LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ ip += length; op = cpy; } /* get offset */ offset = LZ4_readLE16(ip); ip+=2; match = op - offset; /* get matchlength */ length = token & ML_MASK; _copy_match: if (length == ML_MASK) { variable_length_error error = ok; length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); if (error != ok) goto _output_error; if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ } length += MINMATCH; #if LZ4_FAST_DEC_LOOP safe_match_copy: #endif if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ /* match starting within external dictionary */ if ((dict==usingExtDict) && (match < lowPrefix)) { if (unlikely(op+length > oend-LASTLITERALS)) { if (partialDecoding) length = MIN(length, (size_t)(oend-op)); else goto _output_error; /* doesn't respect parsing restriction */ } if (length <= (size_t)(lowPrefix-match)) { /* match fits entirely within external dictionary : just copy */ memmove(op, dictEnd - (lowPrefix-match), length); op += length; } else { /* match stretches into both external dictionary and current block */ size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; LZ4_memcpy(op, dictEnd - copySize, copySize); op += copySize; if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ BYTE* const endOfMatch = op + restSize; const BYTE* copyFrom = lowPrefix; while (op < endOfMatch) *op++ = *copyFrom++; } else { LZ4_memcpy(op, lowPrefix, restSize); op += restSize; } } continue; } assert(match >= lowPrefix); /* copy match within block */ cpy = op + length; /* partialDecoding : may end anywhere within the block */ assert(op<=oend); if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { size_t const mlen = MIN(length, (size_t)(oend-op)); const BYTE* const matchEnd = match + mlen; BYTE* const copyEnd = op + mlen; if (matchEnd > op) { /* overlap copy */ while (op < copyEnd) { *op++ = *match++; } } else { LZ4_memcpy(op, match, mlen); } op = copyEnd; if (op == oend) { break; } continue; } if (unlikely(offset<8)) { LZ4_write32(op, 0); /* silence msan warning when offset==0 */ op[0] = match[0]; op[1] = match[1]; op[2] = match[2]; op[3] = match[3]; match += inc32table[offset]; LZ4_memcpy(op+4, match, 4); match -= dec64table[offset]; } else { LZ4_memcpy(op, match, 8); match += 8; } op += 8; if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ if (op < oCopyLimit) { LZ4_wildCopy8(op, match, oCopyLimit); match += oCopyLimit - op; op = oCopyLimit; } while (op < cpy) { *op++ = *match++; } } else { LZ4_memcpy(op, match, 8); if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } } op = cpy; /* wildcopy correction */ } /* end of decoding */ if (endOnInput) { DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ } else { return (int) (((const char*)ip)-src); /* Nb of input bytes read */ } /* Overflow error detected */ _output_error: return (int) (-(((const char*)ip)-src))-1; } } /*===== Instantiate the API decoding functions. =====*/ LZ4_FORCE_O2 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, decode_full_block, noDict, (BYTE*)dest, NULL, 0); } LZ4_FORCE_O2 int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) { dstCapacity = MIN(targetOutputSize, dstCapacity); return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, endOnInputSize, partial_decode, noDict, (BYTE*)dst, NULL, 0); } LZ4_FORCE_O2 int LZ4_decompress_fast(const char* source, char* dest, int originalSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, decode_full_block, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 0); } /*===== Instantiate a few more decoding cases, used more than once. =====*/ LZ4_FORCE_O2 /* Exported, an obsolete API function. */ int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, decode_full_block, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 0); } /* Another obsolete API function, paired with the previous one. */ int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) { /* LZ4_decompress_fast doesn't validate match offsets, * and thus serves well with any prefixed dictionary. */ return LZ4_decompress_fast(source, dest, originalSize); } LZ4_FORCE_O2 static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, size_t prefixSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, decode_full_block, noDict, (BYTE*)dest-prefixSize, NULL, 0); } LZ4_FORCE_O2 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, decode_full_block, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); } LZ4_FORCE_O2 static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, decode_full_block, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); } /* The "double dictionary" mode, for use with e.g. ring buffers: the first part * of the dictionary is passed as prefix, and the second via dictStart + dictSize. * These routines are used only once, in LZ4_decompress_*_continue(). */ LZ4_FORCE_INLINE int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize, size_t prefixSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, decode_full_block, usingExtDict, (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); } LZ4_FORCE_INLINE int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize, size_t prefixSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, decode_full_block, usingExtDict, (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); } /*===== streaming decompression functions =====*/ LZ4_streamDecode_t* LZ4_createStreamDecode(void) { LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t)); LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal)); /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */ return lz4s; } int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) { if (LZ4_stream == NULL) { return 0; } /* support free on NULL */ FREEMEM(LZ4_stream); return 0; } /*! LZ4_setStreamDecode() : * Use this function to instruct where to find the dictionary. * This function is not necessary if previous data is still available where it was decoded. * Loading a size of 0 is allowed (same effect as no dictionary). * @return : 1 if OK, 0 if error */ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; lz4sd->prefixSize = (size_t) dictSize; lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; lz4sd->externalDict = NULL; lz4sd->extDictSize = 0; return 1; } /*! LZ4_decoderRingBufferSize() : * when setting a ring buffer for streaming decompression (optional scenario), * provides the minimum size of this ring buffer * to be compatible with any source respecting maxBlockSize condition. * Note : in a ring buffer scenario, * blocks are presumed decompressed next to each other. * When not enough space remains for next block (remainingSize < maxBlockSize), * decoding resumes from beginning of ring buffer. * @return : minimum ring buffer size, * or 0 if there is an error (invalid maxBlockSize). */ int LZ4_decoderRingBufferSize(int maxBlockSize) { if (maxBlockSize < 0) return 0; if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; if (maxBlockSize < 16) maxBlockSize = 16; return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); } /* *_continue() : These decoding functions allow decompression of multiple blocks in "streaming" mode. Previously decoded blocks must still be available at the memory position where they were decoded. If it's not possible, save the relevant part of decoded data into a safe buffer, and indicate where it stands using LZ4_setStreamDecode() */ LZ4_FORCE_O2 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; int result; if (lz4sd->prefixSize == 0) { /* The first call, no dictionary yet. */ assert(lz4sd->extDictSize == 0); result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); if (result <= 0) return result; lz4sd->prefixSize = (size_t)result; lz4sd->prefixEnd = (BYTE*)dest + result; } else if (lz4sd->prefixEnd == (BYTE*)dest) { /* They're rolling the current segment. */ if (lz4sd->prefixSize >= 64 KB - 1) result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); else if (lz4sd->extDictSize == 0) result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, lz4sd->prefixSize); else result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize, lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize += (size_t)result; lz4sd->prefixEnd += result; } else { /* The buffer wraps around, or they're switching to another buffer. */ lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = (size_t)result; lz4sd->prefixEnd = (BYTE*)dest + result; } return result; } LZ4_FORCE_O2 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; int result; assert(originalSize >= 0); if (lz4sd->prefixSize == 0) { assert(lz4sd->extDictSize == 0); result = LZ4_decompress_fast(source, dest, originalSize); if (result <= 0) return result; lz4sd->prefixSize = (size_t)originalSize; lz4sd->prefixEnd = (BYTE*)dest + originalSize; } else if (lz4sd->prefixEnd == (BYTE*)dest) { if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0) result = LZ4_decompress_fast(source, dest, originalSize); else result = LZ4_decompress_fast_doubleDict(source, dest, originalSize, lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize += (size_t)originalSize; lz4sd->prefixEnd += originalSize; } else { lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; result = LZ4_decompress_fast_extDict(source, dest, originalSize, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = (size_t)originalSize; lz4sd->prefixEnd = (BYTE*)dest + originalSize; } return result; } /* Advanced decoding functions : *_usingDict() : These decoding functions work the same as "_continue" ones, the dictionary must be explicitly provided within parameters */ int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) { if (dictSize==0) return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); if (dictStart+dictSize == dest) { if (dictSize >= 64 KB - 1) { return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); } assert(dictSize >= 0); return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize); } assert(dictSize >= 0); return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize); } int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) { if (dictSize==0 || dictStart+dictSize == dest) return LZ4_decompress_fast(source, dest, originalSize); assert(dictSize >= 0); return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize); } /*=************************************************* * Obsolete Functions ***************************************************/ /* obsolete compression functions */ int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); } int LZ4_compress(const char* src, char* dest, int srcSize) { return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize)); } int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); } int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); } int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1); } int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); } /* These decompression functions are deprecated and should no longer be used. They are only provided here for compatibility with older user programs. - LZ4_uncompress is totally equivalent to LZ4_decompress_fast - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe */ int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); } int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); } /* Obsolete Streaming functions */ int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; } int LZ4_resetStreamState(void* state, char* inputBuffer) { (void)inputBuffer; LZ4_resetStream((LZ4_stream_t*)state); return 0; } void* LZ4_create (char* inputBuffer) { (void)inputBuffer; return LZ4_createStream(); } char* LZ4_slideInputBuffer (void* state) { /* avoid const char * -> char * conversion warning */ return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary; } #endif /* LZ4_COMMONDEFS_ONLY */ bitshuffle-0.5.1/lz4/lz4.h000066400000000000000000001176351434025530100152550ustar00rootroot00000000000000/* * LZ4 - Fast LZ compression algorithm * Header File * Copyright (C) 2011-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - LZ4 homepage : http://www.lz4.org - LZ4 source repository : https://github.com/lz4/lz4 */ #if defined (__cplusplus) extern "C" { #endif #ifndef LZ4_H_2983827168210 #define LZ4_H_2983827168210 /* --- Dependency --- */ #include /* size_t */ /** Introduction LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core, scalable with multi-cores CPU. It features an extremely fast decoder, with speed in multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. The LZ4 compression library provides in-memory compression and decompression functions. It gives full buffer control to user. Compression can be done in: - a single step (described as Simple Functions) - a single step, reusing a context (described in Advanced Functions) - unbounded multiple steps (described as Streaming compression) lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md). Decompressing such a compressed block requires additional metadata. Exact metadata depends on exact decompression function. For the typical case of LZ4_decompress_safe(), metadata includes block's compressed size, and maximum bound of decompressed size. Each application is free to encode and pass such metadata in whichever way it wants. lz4.h only handle blocks, it can not generate Frames. Blocks are different from Frames (doc/lz4_Frame_format.md). Frames bundle both blocks and metadata in a specified manner. Embedding metadata is required for compressed data to be self-contained and portable. Frame format is delivered through a companion API, declared in lz4frame.h. The `lz4` CLI can only manage frames. */ /*^*************************************************************** * Export parameters *****************************************************************/ /* * LZ4_DLL_EXPORT : * Enable exporting of functions when building a Windows DLL * LZ4LIB_VISIBILITY : * Control library symbols visibility. */ #ifndef LZ4LIB_VISIBILITY # if defined(__GNUC__) && (__GNUC__ >= 4) # define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) # else # define LZ4LIB_VISIBILITY # endif #endif #if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) # define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY #elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) # define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ #else # define LZ4LIB_API LZ4LIB_VISIBILITY #endif /*------ Version ------*/ #define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ #define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */ #define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) #define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE #define LZ4_QUOTE(str) #str #define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str) #define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version */ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version */ /*-************************************ * Tuning parameter **************************************/ /*! * LZ4_MEMORY_USAGE : * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) * Increasing memory usage improves compression ratio. * Reduced memory usage may improve speed, thanks to better cache locality. * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ #ifndef LZ4_MEMORY_USAGE # define LZ4_MEMORY_USAGE 14 #endif /*-************************************ * Simple Functions **************************************/ /*! LZ4_compress_default() : * Compresses 'srcSize' bytes from buffer 'src' * into already allocated 'dst' buffer of size 'dstCapacity'. * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize). * It also runs faster, so it's a recommended setting. * If the function cannot compress 'src' into a more limited 'dst' budget, * compression stops *immediately*, and the function result is zero. * In which case, 'dst' content is undefined (invalid). * srcSize : max supported value is LZ4_MAX_INPUT_SIZE. * dstCapacity : size of buffer 'dst' (which must be already allocated) * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity) * or 0 if compression fails * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer). */ LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity); /*! LZ4_decompress_safe() : * compressedSize : is the exact complete size of the compressed block. * dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size. * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity) * If destination buffer is not large enough, decoding will stop and output an error code (negative value). * If the source stream is detected malformed, the function will stop decoding and return a negative result. * Note 1 : This function is protected against malicious data packets : * it will never writes outside 'dst' buffer, nor read outside 'source' buffer, * even if the compressed block is maliciously modified to order the decoder to do these actions. * In such case, the decoder stops immediately, and considers the compressed block malformed. * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them. * The implementation is free to send / store / derive this information in whichever way is most beneficial. * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead. */ LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity); /*-************************************ * Advanced Functions **************************************/ #define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ #define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) /*! LZ4_compressBound() : Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) This function is primarily useful for memory allocation purposes (destination buffer size). Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize) inputSize : max supported value is LZ4_MAX_INPUT_SIZE return : maximum output size in a "worst case" scenario or 0, if input size is incorrect (too large or negative) */ LZ4LIB_API int LZ4_compressBound(int inputSize); /*! LZ4_compress_fast() : Same as LZ4_compress_default(), but allows selection of "acceleration" factor. The larger the acceleration value, the faster the algorithm, but also the lesser the compression. It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. An acceleration value of "1" is the same as regular LZ4_compress_default() Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c). Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c). */ LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); /*! LZ4_compress_fast_extState() : * Same as LZ4_compress_fast(), using an externally allocated memory space for its state. * Use LZ4_sizeofState() to know how much memory must be allocated, * and allocate it on 8-bytes boundaries (using `malloc()` typically). * Then, provide this buffer as `void* state` to compression function. */ LZ4LIB_API int LZ4_sizeofState(void); LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); /*! LZ4_compress_destSize() : * Reverse the logic : compresses as much data as possible from 'src' buffer * into already allocated buffer 'dst', of size >= 'targetDestSize'. * This function either compresses the entire 'src' content into 'dst' if it's large enough, * or fill 'dst' buffer completely with as much data as possible from 'src'. * note: acceleration parameter is fixed to "default". * * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. * New value is necessarily <= input value. * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) * or 0 if compression fails. * * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+): * the produced compressed content could, in specific circumstances, * require to be decompressed into a destination buffer larger * by at least 1 byte than the content to decompress. * If an application uses `LZ4_compress_destSize()`, * it's highly recommended to update liblz4 to v1.9.2 or better. * If this can't be done or ensured, * the receiving decompression function should provide * a dstCapacity which is > decompressedSize, by at least 1 byte. * See https://github.com/lz4/lz4/issues/859 for details */ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); /*! LZ4_decompress_safe_partial() : * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', * into destination buffer 'dst' of size 'dstCapacity'. * Up to 'targetOutputSize' bytes will be decoded. * The function stops decoding on reaching this objective. * This can be useful to boost performance * whenever only the beginning of a block is required. * * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize) * If source stream is detected malformed, function returns a negative result. * * Note 1 : @return can be < targetOutputSize, if compressed block contains less data. * * Note 2 : targetOutputSize must be <= dstCapacity * * Note 3 : this function effectively stops decoding on reaching targetOutputSize, * so dstCapacity is kind of redundant. * This is because in older versions of this function, * decoding operation would still write complete sequences. * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize, * it could write more bytes, though only up to dstCapacity. * Some "margin" used to be required for this operation to work properly. * Thankfully, this is no longer necessary. * The function nonetheless keeps the same signature, in an effort to preserve API compatibility. * * Note 4 : If srcSize is the exact size of the block, * then targetOutputSize can be any value, * including larger than the block's decompressed size. * The function will, at most, generate block's decompressed size. * * Note 5 : If srcSize is _larger_ than block's compressed size, * then targetOutputSize **MUST** be <= block's decompressed size. * Otherwise, *silent corruption will occur*. */ LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); /*-********************************************* * Streaming Compression Functions ***********************************************/ typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ LZ4LIB_API LZ4_stream_t* LZ4_createStream(void); LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); /*! LZ4_resetStream_fast() : v1.9.0+ * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks * (e.g., LZ4_compress_fast_continue()). * * An LZ4_stream_t must be initialized once before usage. * This is automatically done when created by LZ4_createStream(). * However, should the LZ4_stream_t be simply declared on stack (for example), * it's necessary to initialize it first, using LZ4_initStream(). * * After init, start any new stream with LZ4_resetStream_fast(). * A same LZ4_stream_t can be re-used multiple times consecutively * and compress multiple streams, * provided that it starts each new stream with LZ4_resetStream_fast(). * * LZ4_resetStream_fast() is much faster than LZ4_initStream(), * but is not compatible with memory regions containing garbage data. * * Note: it's only useful to call LZ4_resetStream_fast() * in the context of streaming compression. * The *extState* functions perform their own resets. * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive. */ LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); /*! LZ4_loadDict() : * Use this function to reference a static dictionary into LZ4_stream_t. * The dictionary must remain available during compression. * LZ4_loadDict() triggers a reset, so any previous data will be forgotten. * The same dictionary will have to be loaded on decompression side for successful decoding. * Dictionary are useful for better compression of small data (KB range). * While LZ4 accept any input as dictionary, * results are generally better when using Zstandard's Dictionary Builder. * Loading a size of 0 is allowed, and is the same as reset. * @return : loaded dictionary size, in bytes (necessarily <= 64 KB) */ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); /*! LZ4_compress_fast_continue() : * Compress 'src' content using data from previously compressed blocks, for better compression ratio. * 'dst' buffer must be already allocated. * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. * * @return : size of compressed block * or 0 if there is an error (typically, cannot fit into 'dst'). * * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. * Each block has precise boundaries. * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata. * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. * * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory ! * * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. * Make sure that buffers are separated, by at least one byte. * This construction ensures that each block only depends on previous block. * * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. * * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed. */ LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); /*! LZ4_saveDict() : * If last 64KB data cannot be guaranteed to remain available at its current memory location, * save it into a safer place (char* safeBuffer). * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(), * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables. * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error. */ LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize); /*-********************************************** * Streaming Decompression Functions * Bufferless synchronous API ************************************************/ typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ /*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : * creation / destruction of streaming decompression tracking context. * A tracking context can be re-used multiple times. */ LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); /*! LZ4_setStreamDecode() : * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. * Use this function to start decompression of a new stream of blocks. * A dictionary can optionally be set. Use NULL or size 0 for a reset order. * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. * @return : 1 if OK, 0 if error */ LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); /*! LZ4_decoderRingBufferSize() : v1.8.2+ * Note : in a ring buffer scenario (optional), * blocks are presumed decompressed next to each other * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), * at which stage it resumes from beginning of ring buffer. * When setting such a ring buffer for streaming decompression, * provides the minimum size of this ring buffer * to be compatible with any source respecting maxBlockSize condition. * @return : minimum ring buffer size, * or 0 if there is an error (invalid maxBlockSize). */ LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); #define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */ /*! LZ4_decompress_*_continue() : * These decoding functions allow decompression of consecutive blocks in "streaming" mode. * A block is an unsplittable entity, it must be presented entirely to a decompression function. * Decompression functions only accepts one block at a time. * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded. * If less than 64KB of data has been decoded, all the data must be present. * * Special : if decompression side sets a ring buffer, it must respect one of the following conditions : * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize). * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes. * In which case, encoding and decoding buffers do not need to be synchronized. * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize. * - Synchronized mode : * Decompression buffer size is _exactly_ the same as compression buffer size, * and follows exactly same update rule (block boundaries at same positions), * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream), * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB). * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. * In which case, encoding and decoding buffers do not need to be synchronized, * and encoding ring buffer can have any size, including small ones ( < 64 KB). * * Whenever these conditions are not possible, * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression, * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. */ LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity); /*! LZ4_decompress_*_usingDict() : * These decoding functions work the same as * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue() * They are stand-alone, and don't need an LZ4_streamDecode_t structure. * Dictionary is presumed stable : it must remain accessible and unmodified during decompression. * Performance tip : Decompression speed can be substantially increased * when dst == dictStart + dictSize. */ LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize); #endif /* LZ4_H_2983827168210 */ /*^************************************* * !!!!!! STATIC LINKING ONLY !!!!!! ***************************************/ /*-**************************************************************************** * Experimental section * * Symbols declared in this section must be considered unstable. Their * signatures or semantics may change, or they may be removed altogether in the * future. They are therefore only safe to depend on when the caller is * statically linked against the library. * * To protect against unsafe usage, not only are the declarations guarded, * the definitions are hidden by default * when building LZ4 as a shared/dynamic library. * * In order to access these declarations, * define LZ4_STATIC_LINKING_ONLY in your application * before including LZ4's headers. * * In order to make their implementations accessible dynamically, you must * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library. ******************************************************************************/ #ifdef LZ4_STATIC_LINKING_ONLY #ifndef LZ4_STATIC_3504398509 #define LZ4_STATIC_3504398509 #ifdef LZ4_PUBLISH_STATIC_FUNCTIONS #define LZ4LIB_STATIC_API LZ4LIB_API #else #define LZ4LIB_STATIC_API #endif /*! LZ4_compress_fast_extState_fastReset() : * A variant of LZ4_compress_fast_extState(). * * Using this variant avoids an expensive initialization step. * It is only safe to call if the state buffer is known to be correctly initialized already * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized"). * From a high level, the difference is that * this function initializes the provided state with a call to something like LZ4_resetStream_fast() * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream(). */ LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); /*! LZ4_attach_dictionary() : * This is an experimental API that allows * efficient use of a static dictionary many times. * * Rather than re-loading the dictionary buffer into a working context before * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a * working LZ4_stream_t, this function introduces a no-copy setup mechanism, * in which the working stream references the dictionary stream in-place. * * Several assumptions are made about the state of the dictionary stream. * Currently, only streams which have been prepared by LZ4_loadDict() should * be expected to work. * * Alternatively, the provided dictionaryStream may be NULL, * in which case any existing dictionary stream is unset. * * If a dictionary is provided, it replaces any pre-existing stream history. * The dictionary contents are the only history that can be referenced and * logically immediately precede the data compressed in the first subsequent * compression call. * * The dictionary will only remain attached to the working stream through the * first compression call, at the end of which it is cleared. The dictionary * stream (and source buffer) must remain in-place / accessible / unchanged * through the completion of the first compression call on the stream. */ LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream); /*! In-place compression and decompression * * It's possible to have input and output sharing the same buffer, * for highly contrained memory environments. * In both cases, it requires input to lay at the end of the buffer, * and decompression to start at beginning of the buffer. * Buffer size must feature some margin, hence be larger than final size. * * |<------------------------buffer--------------------------------->| * |<-----------compressed data--------->| * |<-----------decompressed size------------------>| * |<----margin---->| * * This technique is more useful for decompression, * since decompressed size is typically larger, * and margin is short. * * In-place decompression will work inside any buffer * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize). * This presumes that decompressedSize > compressedSize. * Otherwise, it means compression actually expanded data, * and it would be more efficient to store such data with a flag indicating it's not compressed. * This can happen when data is not compressible (already compressed, or encrypted). * * For in-place compression, margin is larger, as it must be able to cope with both * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX, * and data expansion, which can happen when input is not compressible. * As a consequence, buffer size requirements are much higher, * and memory savings offered by in-place compression are more limited. * * There are ways to limit this cost for compression : * - Reduce history size, by modifying LZ4_DISTANCE_MAX. * Note that it is a compile-time constant, so all compressions will apply this limit. * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX, * so it's a reasonable trick when inputs are known to be small. * - Require the compressor to deliver a "maximum compressed size". * This is the `dstCapacity` parameter in `LZ4_compress*()`. * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail, * in which case, the return code will be 0 (zero). * The caller must be ready for these cases to happen, * and typically design a backup scheme to send data uncompressed. * The combination of both techniques can significantly reduce * the amount of margin required for in-place compression. * * In-place compression can work in any buffer * which size is >= (maxCompressedSize) * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success. * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX, * so it's possible to reduce memory requirements by playing with them. */ #define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) #define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ #ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ # define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ #endif #define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ #define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ #endif /* LZ4_STATIC_3504398509 */ #endif /* LZ4_STATIC_LINKING_ONLY */ #ifndef LZ4_H_98237428734687 #define LZ4_H_98237428734687 /*-************************************************************ * Private Definitions ************************************************************** * Do not use these definitions directly. * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. * Accessing members will expose user code to API and/or ABI break in future versions of the library. **************************************************************/ #define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) #define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) #define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # include typedef int8_t LZ4_i8; typedef uint8_t LZ4_byte; typedef uint16_t LZ4_u16; typedef uint32_t LZ4_u32; #else typedef signed char LZ4_i8; typedef unsigned char LZ4_byte; typedef unsigned short LZ4_u16; typedef unsigned int LZ4_u32; #endif typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; struct LZ4_stream_t_internal { LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; LZ4_u32 currentOffset; LZ4_u32 tableType; const LZ4_byte* dictionary; const LZ4_stream_t_internal* dictCtx; LZ4_u32 dictSize; }; typedef struct { const LZ4_byte* externalDict; size_t extDictSize; const LZ4_byte* prefixEnd; size_t prefixSize; } LZ4_streamDecode_t_internal; /*! LZ4_stream_t : * Do not use below internal definitions directly ! * Declare or allocate an LZ4_stream_t instead. * LZ4_stream_t can also be created using LZ4_createStream(), which is recommended. * The structure definition can be convenient for static allocation * (on stack, or as part of larger structure). * Init this structure with LZ4_initStream() before first use. * note : only use this definition in association with static linking ! * this definition is not API/ABI safe, and may change in future versions. */ #define LZ4_STREAMSIZE 16416 /* static size, for inter-version compatibility */ #define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*)) union LZ4_stream_u { void* table[LZ4_STREAMSIZE_VOIDP]; LZ4_stream_t_internal internal_donotuse; }; /* previously typedef'd to LZ4_stream_t */ /*! LZ4_initStream() : v1.9.0+ * An LZ4_stream_t structure must be initialized at least once. * This is automatically done when invoking LZ4_createStream(), * but it's not when the structure is simply declared on stack (for example). * * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t. * It can also initialize any arbitrary buffer of sufficient size, * and will @return a pointer of proper type upon initialization. * * Note : initialization fails if size and alignment conditions are not respected. * In which case, the function will @return NULL. * Note2: An LZ4_stream_t structure guarantees correct alignment and size. * Note3: Before v1.9.0, use LZ4_resetStream() instead */ LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size); /*! LZ4_streamDecode_t : * information structure to track an LZ4 stream during decompression. * init this structure using LZ4_setStreamDecode() before first use. * note : only use in association with static linking ! * this definition is not API/ABI safe, * and may change in a future version ! */ #define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ ) #define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) union LZ4_streamDecode_u { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; LZ4_streamDecode_t_internal internal_donotuse; } ; /* previously typedef'd to LZ4_streamDecode_t */ /*-************************************ * Obsolete Functions **************************************/ /*! Deprecation warnings * * Deprecated functions make the compiler generate a warning when invoked. * This is meant to invite users to update their source code. * Should deprecation warnings be a problem, it is generally possible to disable them, * typically with -Wno-deprecated-declarations for gcc * or _CRT_SECURE_NO_WARNINGS in Visual. * * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS * before including the header file. */ #ifdef LZ4_DISABLE_DEPRECATE_WARNINGS # define LZ4_DEPRECATED(message) /* disable deprecation warnings */ #else # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ # define LZ4_DEPRECATED(message) [[deprecated(message)]] # elif defined(_MSC_VER) # define LZ4_DEPRECATED(message) __declspec(deprecated(message)) # elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) # define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) # elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) # define LZ4_DEPRECATED(message) __attribute__((deprecated)) # else # pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") # define LZ4_DEPRECATED(message) /* disabled */ # endif #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ /*! Obsolete compression functions (since v1.7.3) */ LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); /*! Obsolete decompression functions (since v1.8.0) */ LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); /* Obsolete streaming functions (since v1.7.0) * degraded functionality; do not use! * * In order to perform streaming compression, these functions depended on data * that is no longer tracked in the state. They have been preserved as well as * possible: using them will still produce a correct output. However, they don't * actually retain any history between compression calls. The compression ratio * achieved will therefore be no better than compressing each chunk * independently. */ LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer); LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void); LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); /*! Obsolete streaming decoding functions (since v1.7.0) */ LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); /*! Obsolete LZ4_decompress_fast variants (since v1.9.0) : * These functions used to be faster than LZ4_decompress_safe(), * but this is no longer the case. They are now slower. * This is because LZ4_decompress_fast() doesn't know the input size, * and therefore must progress more cautiously into the input buffer to not read beyond the end of block. * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. * * The last remaining LZ4_decompress_fast() specificity is that * it can decompress a block without knowing its compressed size. * Such functionality can be achieved in a more secure manner * by employing LZ4_decompress_safe_partial(). * * Parameters: * originalSize : is the uncompressed size to regenerate. * `dst` must be already allocated, its size must be >= 'originalSize' bytes. * @return : number of bytes read from source buffer (== compressed size). * The function expects to finish at block's end exactly. * If the source stream is detected malformed, the function stops decoding and returns a negative result. * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer. * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds. * Also, since match offsets are not validated, match reads from 'src' may underflow too. * These issues never happen if input (compressed) data is correct. * But they may happen if input data is invalid (error or intentional tampering). * As a consequence, use these functions in trusted environments with trusted data **only**. */ LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); /*! LZ4_resetStream() : * An LZ4_stream_t structure must be initialized at least once. * This is done with LZ4_initStream(), or LZ4_resetStream(). * Consider switching to LZ4_initStream(), * invoking LZ4_resetStream() will trigger deprecation warnings in the future. */ LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); #endif /* LZ4_H_98237428734687 */ #if defined (__cplusplus) } #endif bitshuffle-0.5.1/lzf/000077500000000000000000000000001434025530100144405ustar00rootroot00000000000000bitshuffle-0.5.1/lzf/LICENSE.txt000066400000000000000000000030361434025530100162650ustar00rootroot00000000000000Copyright Notice and Statement for LZF filter Copyright (c) 2008-2009 Andrew Collette http://h5py.alfven.org All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: a. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. b. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. c. Neither the name of the author nor the names of contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. bitshuffle-0.5.1/lzf/README.txt000066400000000000000000000050671434025530100161460ustar00rootroot00000000000000=============================== LZF filter for HDF5, revision 3 =============================== The LZF filter provides high-speed compression with acceptable compression performance, resulting in much faster performance than DEFLATE, at the cost of a slightly lower compression ratio. It's appropriate for large datasets of low to moderate complexity, for which some compression is much better than none, but for which the speed of DEFLATE is unacceptable. This filter has been tested against HDF5 versions 1.6.5 through 1.8.3. It is released under the BSD license (see LICENSE.txt for details). Using the filter from HDF5 -------------------------- There is exactly one new public function declared in lzf_filter.h, with the following signature: int register_lzf(void) Calling this will register the filter with the HDF5 library. A non-negative return value indicates success. If the registration fails, an error is pushed onto the current error stack and a negative value is returned. It's strongly recommended to use the SHUFFLE filter with LZF, as it's cheap, supported by all current versions of HDF5, and can significantly improve the compression ratio. An example C program ("example.c") is included which demonstrates the proper use of the filter. Compiling --------- The filter consists of a single .c file and header, along with an embedded version of the LZF compression library. Since the filter is stateless, it's recommended to statically link the entire thing into your program; for example: $ gcc -O2 -lhdf5 lzf/*.c lzf_filter.c myprog.c -o myprog It can also be built as a shared library, although you will have to install the resulting library somewhere the runtime linker can find it: $ gcc -O2 -lhdf5 -fPIC -shared lzf/*.c lzf_filter.c -o liblzf_filter.so A similar procedure should be used for building C++ code. As in these examples, using option -O1 or higher is strongly recommended for increased performance. Contact ------- This filter is maintained as part of the HDF5 for Python (h5py) project. The goal of h5py is to provide access to the majority of the HDF5 C API and feature set from Python. The most recent version of h5py (1.1) includes the LZF filter by default. * Downloads and bug tracker: http://h5py.googlecode.com * Main web site and documentation: http://h5py.alfven.org * Contact email: h5py at alfven dot org History of changes ------------------ Revision 3 (6/25/09) Fix issue with changed filter struct definition under HDF5 1.8.3. Revision 2 Minor speed enhancement. Revision 1 Initial release. bitshuffle-0.5.1/lzf/README_bitshuffle.txt000066400000000000000000000003151434025530100203500ustar00rootroot00000000000000The LZF filter for HDF5 is part of the h5py project (http://h5py.alfven.org). The version included with bitshuffle is from version 2.3 of h5py with no modifications other than the addition of this README. bitshuffle-0.5.1/lzf/example.c000066400000000000000000000052661434025530100162500ustar00rootroot00000000000000/* Copyright (C) 2009 Andrew Collette http://h5py.alfven.org License: BSD (see LICENSE.txt) Example program demonstrating use of the LZF filter from C code. To compile this program: h5cc -DH5_USE_16_API lzf/*.c lzf_filter.c example.c -o example To run: $ ./example Success! $ h5ls -v test_lzf.hdf5 Opened "test_lzf.hdf5" with sec2 driver. dset Dataset {100/100, 100/100, 100/100} Location: 0:1:0:976 Links: 1 Modified: 2009-02-15 16:35:11 PST Chunks: {1, 100, 100} 40000 bytes Storage: 4000000 logical bytes, 174288 allocated bytes, 2295.05% utilization Filter-0: shuffle-2 OPT {4} Filter-1: lzf-32000 OPT {1, 261, 40000} Type: native float */ #include #include "hdf5.h" #include "lzf_filter.h" #define SIZE 100*100*100 #define SHAPE {100,100,100} #define CHUNKSHAPE {1,100,100} int main(){ static float data[SIZE]; static float data_out[SIZE]; const hsize_t shape[] = SHAPE; const hsize_t chunkshape[] = CHUNKSHAPE; int r, i; int return_code = 1; hid_t fid, sid, dset, plist = 0; for(i=0; i0) H5Dclose(dset); if(sid>0) H5Sclose(sid); if(plist>0) H5Pclose(plist); if(fid>0) H5Fclose(fid); return return_code; } bitshuffle-0.5.1/lzf/lzf/000077500000000000000000000000001434025530100152335ustar00rootroot00000000000000bitshuffle-0.5.1/lzf/lzf/lzf.h000066400000000000000000000104751434025530100162060ustar00rootroot00000000000000/* * Copyright (c) 2000-2008 Marc Alexander Lehmann * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * * Alternatively, the contents of this file may be used under the terms of * the GNU General Public License ("GPL") version 2 or any later version, * in which case the provisions of the GPL are applicable instead of * the above. If you wish to allow the use of your version of this file * only under the terms of the GPL and not to allow others to use your * version of this file under the BSD license, indicate your decision * by deleting the provisions above and replace them with the notice * and other provisions required by the GPL. If you do not delete the * provisions above, a recipient may use your version of this file under * either the BSD or the GPL. */ #ifndef LZF_H #define LZF_H /*********************************************************************** ** ** lzf -- an extremely fast/free compression/decompression-method ** http://liblzf.plan9.de/ ** ** This algorithm is believed to be patent-free. ** ***********************************************************************/ #define LZF_VERSION 0x0105 /* 1.5, API version */ /* * Compress in_len bytes stored at the memory block starting at * in_data and write the result to out_data, up to a maximum length * of out_len bytes. * * If the output buffer is not large enough or any error occurs return 0, * otherwise return the number of bytes used, which might be considerably * more than in_len (but less than 104% of the original size), so it * makes sense to always use out_len == in_len - 1), to ensure _some_ * compression, and store the data uncompressed otherwise (with a flag, of * course. * * lzf_compress might use different algorithms on different systems and * even different runs, thus might result in different compressed strings * depending on the phase of the moon or similar factors. However, all * these strings are architecture-independent and will result in the * original data when decompressed using lzf_decompress. * * The buffers must not be overlapping. * * If the option LZF_STATE_ARG is enabled, an extra argument must be * supplied which is not reflected in this header file. Refer to lzfP.h * and lzf_c.c. * */ unsigned int lzf_compress (const void *const in_data, unsigned int in_len, void *out_data, unsigned int out_len); /* * Decompress data compressed with some version of the lzf_compress * function and stored at location in_data and length in_len. The result * will be stored at out_data up to a maximum of out_len characters. * * If the output buffer is not large enough to hold the decompressed * data, a 0 is returned and errno is set to E2BIG. Otherwise the number * of decompressed bytes (i.e. the original length of the data) is * returned. * * If an error in the compressed data is detected, a zero is returned and * errno is set to EINVAL. * * This function is very fast, about as fast as a copying loop. */ unsigned int lzf_decompress (const void *const in_data, unsigned int in_len, void *out_data, unsigned int out_len); #endif bitshuffle-0.5.1/lzf/lzf/lzfP.h000066400000000000000000000125101434025530100163160ustar00rootroot00000000000000/* * Copyright (c) 2000-2007 Marc Alexander Lehmann * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * * Alternatively, the contents of this file may be used under the terms of * the GNU General Public License ("GPL") version 2 or any later version, * in which case the provisions of the GPL are applicable instead of * the above. If you wish to allow the use of your version of this file * only under the terms of the GPL and not to allow others to use your * version of this file under the BSD license, indicate your decision * by deleting the provisions above and replace them with the notice * and other provisions required by the GPL. If you do not delete the * provisions above, a recipient may use your version of this file under * either the BSD or the GPL. */ #ifndef LZFP_h #define LZFP_h #define STANDALONE 1 /* at the moment, this is ok. */ #ifndef STANDALONE # include "lzf.h" #endif /* * Size of hashtable is (1 << HLOG) * sizeof (char *) * decompression is independent of the hash table size * the difference between 15 and 14 is very small * for small blocks (and 14 is usually a bit faster). * For a low-memory/faster configuration, use HLOG == 13; * For best compression, use 15 or 16 (or more, up to 23). */ #ifndef HLOG # define HLOG 17 /* Avoid pathological case at HLOG=16 A.C. 2/15/09 */ #endif /* * Sacrifice very little compression quality in favour of compression speed. * This gives almost the same compression as the default code, and is * (very roughly) 15% faster. This is the preferred mode of operation. */ #ifndef VERY_FAST # define VERY_FAST 1 #endif /* * Sacrifice some more compression quality in favour of compression speed. * (roughly 1-2% worse compression for large blocks and * 9-10% for small, redundant, blocks and >>20% better speed in both cases) * In short: when in need for speed, enable this for binary data, * possibly disable this for text data. */ #ifndef ULTRA_FAST # define ULTRA_FAST 1 #endif /* * Unconditionally aligning does not cost very much, so do it if unsure */ #ifndef STRICT_ALIGN # define STRICT_ALIGN !(defined(__i386) || defined (__amd64)) #endif /* * You may choose to pre-set the hash table (might be faster on some * modern cpus and large (>>64k) blocks, and also makes compression * deterministic/repeatable when the configuration otherwise is the same). */ #ifndef INIT_HTAB # define INIT_HTAB 0 #endif /* ======================================================================= Changing things below this line may break the HDF5 LZF filter. A.C. 2/15/09 ======================================================================= */ /* * Avoid assigning values to errno variable? for some embedding purposes * (linux kernel for example), this is neccessary. NOTE: this breaks * the documentation in lzf.h. */ #ifndef AVOID_ERRNO # define AVOID_ERRNO 0 #endif /* * Wether to pass the LZF_STATE variable as argument, or allocate it * on the stack. For small-stack environments, define this to 1. * NOTE: this breaks the prototype in lzf.h. */ #ifndef LZF_STATE_ARG # define LZF_STATE_ARG 0 #endif /* * Wether to add extra checks for input validity in lzf_decompress * and return EINVAL if the input stream has been corrupted. This * only shields against overflowing the input buffer and will not * detect most corrupted streams. * This check is not normally noticable on modern hardware * (<1% slowdown), but might slow down older cpus considerably. */ #ifndef CHECK_INPUT # define CHECK_INPUT 1 #endif /*****************************************************************************/ /* nothing should be changed below */ typedef unsigned char u8; typedef const u8 *LZF_STATE[1 << (HLOG)]; #if !STRICT_ALIGN /* for unaligned accesses we need a 16 bit datatype. */ # include # if USHRT_MAX == 65535 typedef unsigned short u16; # elif UINT_MAX == 65535 typedef unsigned int u16; # else # undef STRICT_ALIGN # define STRICT_ALIGN 1 # endif #endif #if ULTRA_FAST # if defined(VERY_FAST) # undef VERY_FAST # endif #endif #if INIT_HTAB # ifdef __cplusplus # include # else # include # endif #endif #endif bitshuffle-0.5.1/lzf/lzf/lzf_c.c000066400000000000000000000214571434025530100165050ustar00rootroot00000000000000/* * Copyright (c) 2000-2008 Marc Alexander Lehmann * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * * Alternatively, the contents of this file may be used under the terms of * the GNU General Public License ("GPL") version 2 or any later version, * in which case the provisions of the GPL are applicable instead of * the above. If you wish to allow the use of your version of this file * only under the terms of the GPL and not to allow others to use your * version of this file under the BSD license, indicate your decision * by deleting the provisions above and replace them with the notice * and other provisions required by the GPL. If you do not delete the * provisions above, a recipient may use your version of this file under * either the BSD or the GPL. */ #include "lzfP.h" #define HSIZE (1 << (HLOG)) /* * don't play with this unless you benchmark! * decompression is not dependent on the hash function * the hashing function might seem strange, just believe me * it works ;) */ #ifndef FRST # define FRST(p) (((p[0]) << 8) | p[1]) # define NEXT(v,p) (((v) << 8) | p[2]) # if ULTRA_FAST # define IDX(h) ((( h >> (3*8 - HLOG)) - h ) & (HSIZE - 1)) # elif VERY_FAST # define IDX(h) ((( h >> (3*8 - HLOG)) - h*5) & (HSIZE - 1)) # else # define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1)) # endif #endif /* * IDX works because it is very similar to a multiplicative hash, e.g. * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1)) * the latter is also quite fast on newer CPUs, and compresses similarly. * * the next one is also quite good, albeit slow ;) * (int)(cos(h & 0xffffff) * 1e6) */ #if 0 /* original lzv-like hash function, much worse and thus slower */ # define FRST(p) (p[0] << 5) ^ p[1] # define NEXT(v,p) ((v) << 5) ^ p[2] # define IDX(h) ((h) & (HSIZE - 1)) #endif #define MAX_LIT (1 << 5) #define MAX_OFF (1 << 13) #define MAX_REF ((1 << 8) + (1 << 3)) #if __GNUC__ >= 3 # define expect(expr,value) __builtin_expect ((expr),(value)) # define inline inline #else # define expect(expr,value) (expr) # define inline static #endif #define expect_false(expr) expect ((expr) != 0, 0) #define expect_true(expr) expect ((expr) != 0, 1) /* * compressed format * * 000LLLLL ; literal * LLLooooo oooooooo ; backref L * 111ooooo LLLLLLLL oooooooo ; backref L+7 * */ unsigned int lzf_compress (const void *const in_data, unsigned int in_len, void *out_data, unsigned int out_len #if LZF_STATE_ARG , LZF_STATE htab #endif ) { #if !LZF_STATE_ARG LZF_STATE htab; #endif const u8 **hslot; const u8 *ip = (const u8 *)in_data; u8 *op = (u8 *)out_data; const u8 *in_end = ip + in_len; u8 *out_end = op + out_len; const u8 *ref; /* off requires a type wide enough to hold a general pointer difference. * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only * works for differences within a single object). We also assume that no * no bit pattern traps. Since the only platform that is both non-POSIX * and fails to support both assumptions is windows 64 bit, we make a * special workaround for it. */ #if ( defined (WIN32) && defined (_M_X64) ) || defined (_WIN64) unsigned _int64 off; /* workaround for missing POSIX compliance */ #else unsigned long off; #endif unsigned int hval; int lit; if (!in_len || !out_len) return 0; #if INIT_HTAB memset (htab, 0, sizeof (htab)); # if 0 for (hslot = htab; hslot < htab + HSIZE; hslot++) *hslot++ = ip; # endif #endif lit = 0; op++; /* start run */ hval = FRST (ip); while (ip < in_end - 2) { hval = NEXT (hval, ip); hslot = htab + IDX (hval); ref = *hslot; *hslot = ip; if (1 #if INIT_HTAB && ref < ip /* the next test will actually take care of this, but this is faster */ #endif && (off = ip - ref - 1) < MAX_OFF && ip + 4 < in_end && ref > (u8 *)in_data #if STRICT_ALIGN && ref[0] == ip[0] && ref[1] == ip[1] && ref[2] == ip[2] #else && *(u16 *)ref == *(u16 *)ip && ref[2] == ip[2] #endif ) { /* match found at *ref++ */ unsigned int len = 2; unsigned int maxlen = in_end - ip - len; maxlen = maxlen > MAX_REF ? MAX_REF : maxlen; if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */ if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */ return 0; op [- lit - 1] = lit - 1; /* stop run */ op -= !lit; /* undo run if length is zero */ for (;;) { if (expect_true (maxlen > 16)) { len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; } do len++; while (len < maxlen && ref[len] == ip[len]); break; } len -= 2; /* len is now #octets - 1 */ ip++; if (len < 7) { *op++ = (off >> 8) + (len << 5); } else { *op++ = (off >> 8) + ( 7 << 5); *op++ = len - 7; } *op++ = off; lit = 0; op++; /* start run */ ip += len + 1; if (expect_false (ip >= in_end - 2)) break; #if ULTRA_FAST || VERY_FAST --ip; # if VERY_FAST && !ULTRA_FAST --ip; # endif hval = FRST (ip); hval = NEXT (hval, ip); htab[IDX (hval)] = ip; ip++; # if VERY_FAST && !ULTRA_FAST hval = NEXT (hval, ip); htab[IDX (hval)] = ip; ip++; # endif #else ip -= len + 1; do { hval = NEXT (hval, ip); htab[IDX (hval)] = ip; ip++; } while (len--); #endif } else { /* one more literal byte we must copy */ if (expect_false (op >= out_end)) return 0; lit++; *op++ = *ip++; if (expect_false (lit == MAX_LIT)) { op [- lit - 1] = lit - 1; /* stop run */ lit = 0; op++; /* start run */ } } } if (op + 3 > out_end) /* at most 3 bytes can be missing here */ return 0; while (ip < in_end) { lit++; *op++ = *ip++; if (expect_false (lit == MAX_LIT)) { op [- lit - 1] = lit - 1; /* stop run */ lit = 0; op++; /* start run */ } } op [- lit - 1] = lit - 1; /* end run */ op -= !lit; /* undo run if length is zero */ return op - (u8 *)out_data; } bitshuffle-0.5.1/lzf/lzf/lzf_d.c000066400000000000000000000105051434025530100164760ustar00rootroot00000000000000/* * Copyright (c) 2000-2007 Marc Alexander Lehmann * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * * Alternatively, the contents of this file may be used under the terms of * the GNU General Public License ("GPL") version 2 or any later version, * in which case the provisions of the GPL are applicable instead of * the above. If you wish to allow the use of your version of this file * only under the terms of the GPL and not to allow others to use your * version of this file under the BSD license, indicate your decision * by deleting the provisions above and replace them with the notice * and other provisions required by the GPL. If you do not delete the * provisions above, a recipient may use your version of this file under * either the BSD or the GPL. */ #include "lzfP.h" #if AVOID_ERRNO # define SET_ERRNO(n) #else # include # define SET_ERRNO(n) errno = (n) #endif /* ASM is slower than C in HDF5 tests -- A.C. 2/5/09 #ifndef __STRICT_ANSI__ #ifndef H5PY_DISABLE_LZF_ASM #if (__i386 || __amd64) && __GNUC__ >= 3 # define lzf_movsb(dst, src, len) \ asm ("rep movsb" \ : "=D" (dst), "=S" (src), "=c" (len) \ : "0" (dst), "1" (src), "2" (len)); #endif #endif #endif */ unsigned int lzf_decompress (const void *const in_data, unsigned int in_len, void *out_data, unsigned int out_len) { u8 const *ip = (const u8 *)in_data; u8 *op = (u8 *)out_data; u8 const *const in_end = ip + in_len; u8 *const out_end = op + out_len; do { unsigned int ctrl = *ip++; if (ctrl < (1 << 5)) /* literal run */ { ctrl++; if (op + ctrl > out_end) { SET_ERRNO (E2BIG); return 0; } #if CHECK_INPUT if (ip + ctrl > in_end) { SET_ERRNO (EINVAL); return 0; } #endif #ifdef lzf_movsb lzf_movsb (op, ip, ctrl); #else do *op++ = *ip++; while (--ctrl); #endif } else /* back reference */ { unsigned int len = ctrl >> 5; u8 *ref = op - ((ctrl & 0x1f) << 8) - 1; #if CHECK_INPUT if (ip >= in_end) { SET_ERRNO (EINVAL); return 0; } #endif if (len == 7) { len += *ip++; #if CHECK_INPUT if (ip >= in_end) { SET_ERRNO (EINVAL); return 0; } #endif } ref -= *ip++; if (op + len + 2 > out_end) { SET_ERRNO (E2BIG); return 0; } if (ref < (u8 *)out_data) { SET_ERRNO (EINVAL); return 0; } #ifdef lzf_movsb len += 2; lzf_movsb (op, ref, len); #else *op++ = *ref++; *op++ = *ref++; do *op++ = *ref++; while (--len); #endif } } while (ip < in_end); return op - (u8 *)out_data; } bitshuffle-0.5.1/lzf/lzf_filter.c000066400000000000000000000154201434025530100167460ustar00rootroot00000000000000/***** Preamble block ********************************************************* * * This file is part of h5py, a low-level Python interface to the HDF5 library. * * Copyright (C) 2008 Andrew Collette * http://h5py.alfven.org * License: BSD (See LICENSE.txt for full license) * * $Date$ * ****** End preamble block ****************************************************/ /* Implements an LZF filter module for HDF5, using the BSD-licensed library by Marc Alexander Lehmann (http://www.goof.com/pcg/marc/liblzf.html). No Python-specific code is used. The filter behaves like the DEFLATE filter, in that it is called for every type and space, and returns 0 if the data cannot be compressed. The only public function is (int) register_lzf(void), which passes on the result from H5Zregister. */ #include #include #include #include "hdf5.h" #include "lzf/lzf.h" #include "lzf_filter.h" /* Our own versions of H5Epush_sim, as it changed in 1.8 */ #if H5_VERS_MAJOR == 1 && H5_VERS_MINOR < 7 #define PUSH_ERR(func, minor, str) H5Epush(__FILE__, func, __LINE__, H5E_PLINE, minor, str) #define H5PY_GET_FILTER H5Pget_filter_by_id #else #define PUSH_ERR(func, minor, str) H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str) #define H5PY_GET_FILTER(a,b,c,d,e,f,g) H5Pget_filter_by_id2(a,b,c,d,e,f,g,NULL) #endif /* Deal with the mutiple definitions for H5Z_class_t. Note: Only HDF5 1.6 and 1.8 are supported. (1) The old class should always be used for HDF5 1.6 (2) The new class should always be used for HDF5 1.8 < 1.8.3 (3) The old class should be used for HDF5 1.8 >= 1.8.3 only if the macro H5_USE_16_API is set */ #if H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 8 && (H5_VERS_RELEASE < 3 || !H5_USE_16_API) #define H5PY_H5Z_NEWCLS 1 #else #define H5PY_H5Z_NEWCLS 0 #endif size_t lzf_filter(unsigned flags, size_t cd_nelmts, const unsigned cd_values[], size_t nbytes, size_t *buf_size, void **buf); herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space); /* Try to register the filter, passing on the HDF5 return value */ int register_lzf(void){ int retval; #if H5PY_H5Z_NEWCLS H5Z_class_t filter_class = { H5Z_CLASS_T_VERS, (H5Z_filter_t)(H5PY_FILTER_LZF), 1, 1, "lzf", NULL, (H5Z_set_local_func_t)(lzf_set_local), (H5Z_func_t)(lzf_filter) }; #else H5Z_class_t filter_class = { (H5Z_filter_t)(H5PY_FILTER_LZF), "lzf", NULL, (H5Z_set_local_func_t)(lzf_set_local), (H5Z_func_t)(lzf_filter) }; #endif retval = H5Zregister(&filter_class); if(retval<0){ PUSH_ERR("register_lzf", H5E_CANTREGISTER, "Can't register LZF filter"); } return retval; } /* Filter setup. Records the following inside the DCPL: 1. If version information is not present, set slots 0 and 1 to the filter revision and LZF API version, respectively. 2. Compute the chunk size in bytes and store it in slot 2. */ herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space){ int ndims; int i; herr_t r; unsigned int bufsize; hsize_t chunkdims[32]; unsigned int flags; size_t nelements = 8; unsigned values[] = {0,0,0,0,0,0,0,0}; r = H5PY_GET_FILTER(dcpl, H5PY_FILTER_LZF, &flags, &nelements, values, 0, NULL); if(r<0) return -1; if(nelements < 3) nelements = 3; /* First 3 slots reserved. If any higher slots are used, preserve the contents. */ /* It seems the H5Z_FLAG_REVERSE flag doesn't work here, so we have to be careful not to clobber any existing version info */ if(values[0]==0) values[0] = H5PY_FILTER_LZF_VERSION; if(values[1]==0) values[1] = LZF_VERSION; ndims = H5Pget_chunk(dcpl, 32, chunkdims); if(ndims<0) return -1; if(ndims>32){ PUSH_ERR("lzf_set_local", H5E_CALLBACK, "Chunk rank exceeds limit"); return -1; } bufsize = H5Tget_size(type); if(bufsize==0) return -1; for(i=0;i=3)&&(cd_values[2]!=0)){ outbuf_size = cd_values[2]; /* Precomputed buffer guess */ }else{ outbuf_size = (*buf_size); } #ifdef H5PY_LZF_DEBUG fprintf(stderr, "Decompress %d chunk w/buffer %d\n", nbytes, outbuf_size); #endif while(!status){ free(outbuf); outbuf = malloc(outbuf_size); if(outbuf == NULL){ PUSH_ERR("lzf_filter", H5E_CALLBACK, "Can't allocate decompression buffer"); goto failed; } status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size); if(!status){ /* compression failed */ if(errno == E2BIG){ outbuf_size += (*buf_size); #ifdef H5PY_LZF_DEBUG fprintf(stderr, " Too small: %d\n", outbuf_size); #endif } else if(errno == EINVAL) { PUSH_ERR("lzf_filter", H5E_CALLBACK, "Invalid data for LZF decompression"); goto failed; } else { PUSH_ERR("lzf_filter", H5E_CALLBACK, "Unknown LZF decompression error"); goto failed; } } /* if !status */ } /* while !status */ } /* compressing vs decompressing */ if(status != 0){ free(*buf); *buf = outbuf; *buf_size = outbuf_size; return status; /* Size of compressed/decompressed data */ } failed: free(outbuf); return 0; } /* End filter function */ bitshuffle-0.5.1/lzf/lzf_filter.h000066400000000000000000000015521434025530100167540ustar00rootroot00000000000000/***** Preamble block ********************************************************* * * This file is part of h5py, a low-level Python interface to the HDF5 library. * * Copyright (C) 2008 Andrew Collette * http://h5py.alfven.org * License: BSD (See LICENSE.txt for full license) * * $Date$ * ****** End preamble block ****************************************************/ #ifndef H5PY_LZF_H #define H5PY_LZF_H #ifdef __cplusplus extern "C" { #endif /* Filter revision number, starting at 1 */ #define H5PY_FILTER_LZF_VERSION 4 /* Filter ID registered with the HDF Group as of 2/6/09. For maintenance requests, contact the filter author directly. */ #define H5PY_FILTER_LZF 32000 /* Register the filter with the library. Returns a negative value on failure, and a non-negative value on success. */ int register_lzf(void); #ifdef __cplusplus } #endif #endif bitshuffle-0.5.1/pyproject.toml000066400000000000000000000003371434025530100165640ustar00rootroot00000000000000# Include dependencies when building wheels on cibuildwheel [build-system] requires = [ "setuptools>=0.7", "Cython>=0.19", "oldest-supported-numpy", "h5py>=2.4.0", ] build-backend = "setuptools.build_meta" bitshuffle-0.5.1/requirements.txt000066400000000000000000000001061434025530100171260ustar00rootroot00000000000000# Order matters setuptools>=0.7 Cython>=0.19 numpy>=1.6.1 h5py>=2.4.0 bitshuffle-0.5.1/setup.cfg.example000066400000000000000000000005201434025530100171150ustar00rootroot00000000000000[install] # These control the installation of the hdf5 dynamically loaded filter plugin. h5plugin = 0 h5plugin-dir = /usr/local/hdf5/lib/plugin [build_ext] # Whether to compile with OpenMP multi-threading. Default is system dependent: # False on OSX (since the clang compiler does not yet support OpenMP) and True # otherwise. omp = 1 bitshuffle-0.5.1/setup.py000066400000000000000000000347371434025530100153750ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function # I didn't import unicode_literals. They break setuptools or Cython in python # 2.7, but python 3 seems to be happy with them. import glob import os from os import path from setuptools import setup, Extension from setuptools.command.build_ext import build_ext as build_ext_ from setuptools.command.develop import develop as develop_ from setuptools.command.install import install as install_ from Cython.Compiler.Main import default_options import shutil import subprocess import sys import platform VERSION_MAJOR = 0 VERSION_MINOR = 5 VERSION_POINT = 1 # Define ZSTD macro for cython compilation default_options["compile_time_env"] = {"ZSTD_SUPPORT": False} # Only unset in the 'release' branch and in tags. VERSION_DEV = None VERSION = "%d.%d.%d" % (VERSION_MAJOR, VERSION_MINOR, VERSION_POINT) if VERSION_DEV: VERSION = VERSION + ".dev%d" % VERSION_DEV COMPILE_FLAGS = ["-O3", "-ffast-math", "-std=c99"] # Cython breaks strict aliasing rules. COMPILE_FLAGS += ["-fno-strict-aliasing"] COMPILE_FLAGS += ["-fPIC"] COMPILE_FLAGS_MSVC = ["/Ox", "/fp:fast"] MACROS = [ ("BSHUF_VERSION_MAJOR", VERSION_MAJOR), ("BSHUF_VERSION_MINOR", VERSION_MINOR), ("BSHUF_VERSION_POINT", VERSION_POINT), ] H5PLUGINS_DEFAULT = "/usr/local/hdf5/lib/plugin" # OSX's clang compiler does not support OpenMP. if sys.platform == "darwin": OMP_DEFAULT = False else: OMP_DEFAULT = True # Build against the native architecture unless overridden by an environment variable # This can also be overridden by a direct command line argument, or a `setup.cfg` entry # This option is needed for the cibuildwheel action if "BITSHUFFLE_ARCH" in os.environ: MARCH_DEFAULT = os.environ["BITSHUFFLE_ARCH"] else: MARCH_DEFAULT = "native" FALLBACK_CONFIG = { "include_dirs": [], "library_dirs": [], "libraries": [], "extra_compile_args": [], "extra_link_args": [], } if "HDF5_DIR" in os.environ: FALLBACK_CONFIG["include_dirs"] += [os.environ["HDF5_DIR"] + "/include"] # macports FALLBACK_CONFIG["library_dirs"] += [os.environ["HDF5_DIR"] + "/lib"] # macports elif sys.platform == "darwin": # putting here both macports and homebrew paths will generate # "ld: warning: dir not found" at the linking phase FALLBACK_CONFIG["include_dirs"] += ["/opt/local/include"] # macports FALLBACK_CONFIG["library_dirs"] += ["/opt/local/lib"] # macports FALLBACK_CONFIG["include_dirs"] += ["/usr/local/include"] # homebrew FALLBACK_CONFIG["library_dirs"] += ["/usr/local/lib"] # homebrew elif sys.platform.startswith("freebsd"): FALLBACK_CONFIG["include_dirs"] += ["/usr/local/include"] # homebrew FALLBACK_CONFIG["library_dirs"] += ["/usr/local/lib"] # homebrew FALLBACK_CONFIG["include_dirs"] = [ d for d in FALLBACK_CONFIG["include_dirs"] if path.isdir(d) ] FALLBACK_CONFIG["library_dirs"] = [ d for d in FALLBACK_CONFIG["library_dirs"] if path.isdir(d) ] FALLBACK_CONFIG["extra_compile_args"] = ["-DH5_BUILT_AS_DYNAMIC_LIB"] def pkgconfig(*packages, **kw): config = kw.setdefault("config", {}) optional_args = kw.setdefault("optional", "") flag_map = { "include_dirs": ["--cflags-only-I", 2], "library_dirs": ["--libs-only-L", 2], "libraries": ["--libs-only-l", 2], "extra_compile_args": ["--cflags-only-other", 0], "extra_link_args": ["--libs-only-other", 0], } for package in packages: try: subprocess.check_output(["pkg-config", package]) except (subprocess.CalledProcessError, OSError): print( "Can't find %s with pkg-config fallback to " "static config" % package ) for distutils_key in flag_map: config.setdefault(distutils_key, []).extend( FALLBACK_CONFIG[distutils_key] ) config["libraries"].append(package) else: for distutils_key, (pkg_option, n) in flag_map.items(): items = ( subprocess.check_output( ["pkg-config", optional_args, pkg_option, package] ) .decode("utf8") .split() ) opt = config.setdefault(distutils_key, []) opt.extend([i[n:] for i in items]) return config zstd_headers = ["zstd/lib/zstd.h"] zstd_lib = ["zstd/lib/"] zstd_sources = glob.glob("zstd/lib/common/*.c") zstd_sources += glob.glob("zstd/lib/compress/*.c") zstd_sources += glob.glob("zstd/lib/decompress/*.c") ext_bshuf = Extension( "bitshuffle.ext", sources=[ "bitshuffle/ext.pyx", "src/bitshuffle.c", "src/bitshuffle_core.c", "src/iochain.c", "lz4/lz4.c", ], include_dirs=["src/", "lz4/"], depends=["src/bitshuffle.h", "src/bitshuffle_core.h", "src/iochain.h", "lz4/lz4.h"], libraries=[], define_macros=MACROS, ) h5filter = Extension( "bitshuffle.h5", sources=[ "bitshuffle/h5.pyx", "src/bshuf_h5filter.c", "src/bitshuffle.c", "src/bitshuffle_core.c", "src/iochain.c", "lz4/lz4.c", ], depends=[ "src/bitshuffle.h", "src/bitshuffle_core.h", "src/iochain.h", "src/bshuf_h5filter.h", "lz4/lz4.h", ], define_macros=MACROS + [("H5_USE_18_API", None)], **pkgconfig("hdf5", config=dict(include_dirs=["src/", "lz4/"])) ) if not sys.platform.startswith("win"): h5filter.sources.append("src/hdf5_dl.c") h5filter.libraries.remove("hdf5") filter_plugin = Extension( "bitshuffle.plugin.libh5bshuf", sources=[ "src/bshuf_h5plugin.c", "src/bshuf_h5filter.c", "src/bitshuffle.c", "src/bitshuffle_core.c", "src/iochain.c", "lz4/lz4.c", ], depends=[ "src/bitshuffle.h", "src/bitshuffle_core.h", "src/iochain.h", "src/bshuf_h5filter.h", "lz4/lz4.h", ], define_macros=MACROS, **pkgconfig("hdf5", config=dict(include_dirs=["src/", "lz4/"])) ) lzf_plugin = Extension( "bitshuffle.plugin.libh5LZF", sources=[ "src/lzf_h5plugin.c", "lzf/lzf_filter.c", "lzf/lzf/lzf_c.c", "lzf/lzf/lzf_d.c", ], depends=["lzf/lzf_filter.h", "lzf/lzf/lzf.h", "lzf/lzf/lzfP.h"], **pkgconfig("hdf5", config=dict(include_dirs=["lzf/", "lzf/lzf/"])) ) EXTENSIONS = [ ext_bshuf, ] # Check for HDF5 support HDF5_FILTER_SUPPORT = False CPATHS = os.environ["CPATH"].split(":") if "CPATH" in os.environ else [] for p in ["/usr/include"] + pkgconfig("hdf5")["include_dirs"] + CPATHS: if os.path.exists(os.path.join(p, "hdf5.h")): HDF5_FILTER_SUPPORT = True if HDF5_FILTER_SUPPORT: EXTENSIONS.append(h5filter) # Check for plugin hdf5 plugin support (hdf5 >= 1.8.11) HDF5_PLUGIN_SUPPORT = False CPATHS = os.environ["CPATH"].split(":") if "CPATH" in os.environ else [] for p in ["/usr/include"] + pkgconfig("hdf5")["include_dirs"] + CPATHS: if os.path.exists(os.path.join(p, "H5PLextern.h")): HDF5_PLUGIN_SUPPORT = True if HDF5_PLUGIN_SUPPORT: EXTENSIONS.extend([filter_plugin, lzf_plugin]) # For enabling ZSTD support when building wheels # This needs to be done after all Extensions have been added to EXTENSIONS if "ENABLE_ZSTD" in os.environ: default_options["compile_time_env"] = {"ZSTD_SUPPORT": True} for ext in EXTENSIONS: if ext.name in [ "bitshuffle.ext", "bitshuffle.h5", "bitshuffle.plugin.libh5bshuf", ]: ext.sources += zstd_sources ext.include_dirs += zstd_lib ext.depends += zstd_headers ext.define_macros += [("ZSTD_SUPPORT", 1)] class develop(develop_): def run(self): # Dummy directory for copying build plugins. if not path.isdir("bitshuffle/plugin"): os.mkdir("bitshuffle/plugin") develop_.run(self) # Custom installation to include installing dynamic filters. class install(install_): user_options = install_.user_options + [ ("h5plugin", None, "Install HDF5 filter plugins for use outside of python."), ( "h5plugin-dir=", None, "Where to install filter plugins. Default %s." % H5PLUGINS_DEFAULT, ), ("zstd", None, "Install ZSTD support."), ] def initialize_options(self): install_.initialize_options(self) self.h5plugin = False self.zstd = False self.h5plugin_dir = H5PLUGINS_DEFAULT def finalize_options(self): install_.finalize_options(self) if self.h5plugin not in ("0", "1", True, False): raise ValueError("Invalid h5plugin argument. Must be '0' or '1'.") self.h5plugin = int(self.h5plugin) self.h5plugin_dir = path.abspath(self.h5plugin_dir) self.zstd = self.zstd # Add ZSTD files and macro to extensions if ZSTD enabled if self.zstd: default_options["compile_time_env"] = {"ZSTD_SUPPORT": True} for ext in EXTENSIONS: if ext.name in [ "bitshuffle.ext", "bitshuffle.h5", "bitshuffle.plugin.libh5bshuf", ]: ext.sources += zstd_sources ext.include_dirs += zstd_lib ext.depends += zstd_headers ext.define_macros += [("ZSTD_SUPPORT", 1)] def run(self): install_.run(self) if self.h5plugin: if not HDF5_PLUGIN_SUPPORT: print("HDF5 < 1.8.11, not installing filter plugins.") return plugin_build = path.join(self.build_lib, "bitshuffle", "plugin") try: os.makedirs(self.h5plugin_dir) except OSError as e: if e.args[0] == 17: # Directory already exists, this is fine. pass else: raise plugin_libs = glob.glob(path.join(plugin_build, "*")) for plugin_lib in plugin_libs: plugin_name = path.split(plugin_lib)[1] shutil.copy2(plugin_lib, path.join(self.h5plugin_dir, plugin_name)) print("Installed HDF5 filter plugins to %s" % self.h5plugin_dir) # Command line or site.cfg specification of OpenMP. class build_ext(build_ext_): user_options = build_ext_.user_options + [ ( "omp=", None, "Whether to compile with OpenMP threading. Default" " on current system is %s." % str(OMP_DEFAULT), ), ( "march=", None, "Generate instructions for a specific machine type. Default is %s." % MARCH_DEFAULT, ), ] boolean_options = build_ext_.boolean_options + ["omp"] def initialize_options(self): build_ext_.initialize_options(self) self.omp = OMP_DEFAULT self.march = MARCH_DEFAULT def finalize_options(self): # For some reason this gets run twice. Careful to print messages and # add arguments only one time. build_ext_.finalize_options(self) if self.omp not in ("0", "1", True, False): raise ValueError("Invalid omp argument. Mut be '0' or '1'.") self.omp = int(self.omp) import numpy as np ext_bshuf.include_dirs.append(np.get_include()) # Required only by old version of setuptools < 18.0 from Cython.Build import cythonize self.extensions = cythonize(self.extensions) for ext in self.extensions: ext._needs_stub = False def build_extensions(self): c = self.compiler.compiler_type # Set compiler flags including architecture if self.compiler.compiler_type == "msvc": openmpflag = "/openmp" compileflags = COMPILE_FLAGS_MSVC else: openmpflag = "-fopenmp" archi = platform.machine() if archi in ("i386", "x86_64"): compileflags = COMPILE_FLAGS + ["-march=%s" % self.march] else: compileflags = COMPILE_FLAGS + ["-mcpu=%s" % self.march] if archi == "ppc64le": compileflags = COMPILE_FLAGS + ["-DNO_WARN_X86_INTRINSICS"] if self.omp not in ("0", "1", True, False): raise ValueError("Invalid omp argument. Mut be '0' or '1'.") self.omp = int(self.omp) # Add the appropriate OpenMP flags if needed if self.omp: if not hasattr(self, "_printed_omp_message"): self._printed_omp_message = True print("\n#################################") print("# Compiling with OpenMP support #") print("#################################\n") # More portable to pass -fopenmp to linker. # self.libraries += ['gomp'] compileflags += [openmpflag] linkflags = [openmpflag] else: linkflags = [] # Add the compile/link options to each extension for e in self.extensions: e.extra_compile_args = list(set(e.extra_compile_args).union(compileflags)) e.extra_link_args = list(set(e.extra_link_args).union(linkflags)) build_ext_.build_extensions(self) # Don't install numpy/cython/hdf5 if not needed for cmd in ["sdist", "clean", "--help", "--help-commands", "--version"]: if cmd in sys.argv: setup_requires = [] break else: setup_requires = ["Cython>=0.19", "numpy>=1.6.1"] with open("requirements.txt") as f: requires = f.read().splitlines() requires = [r.split()[0] for r in requires] with open("README.rst") as r: long_description = r.read() # TODO hdf5 support should be an "extra". Figure out how to set this up. setup( name="bitshuffle", version=VERSION, packages=["bitshuffle", "bitshuffle"], scripts=[], ext_modules=EXTENSIONS, cmdclass={"build_ext": build_ext, "install": install, "develop": develop}, setup_requires=setup_requires, install_requires=requires, # extras_require={'H5': ["h5py"]}, package_data={"": ["data/*"]}, # metadata for upload to PyPI author="Kiyoshi Wesley Masui", author_email="kiyo@physics.ubc.ca", description="Bitshuffle filter for improving typed data compression.", long_description=long_description, license="MIT", url="https://github.com/kiyo-masui/bitshuffle", download_url=("https://github.com/kiyo-masui/bitshuffle/tarball/%s" % VERSION), keywords=["compression", "hdf5", "numpy"], ) bitshuffle-0.5.1/src/000077500000000000000000000000001434025530100144345ustar00rootroot00000000000000bitshuffle-0.5.1/src/bitshuffle.c000066400000000000000000000202411434025530100167320ustar00rootroot00000000000000/* * Bitshuffle - Filter for improving compression of typed binary data. * * Author: Kiyoshi Masui * Website: http://www.github.com/kiyo-masui/bitshuffle * Created: 2014 * * See LICENSE file for details about copyright and rights to use. * */ #include "bitshuffle.h" #include "bitshuffle_core.h" #include "bitshuffle_internals.h" #include "lz4.h" #ifdef ZSTD_SUPPORT #include "zstd.h" #endif #include #include // Macros. #define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) { \ free(buf); return count - 1000; } /* Bitshuffle and compress a single block. */ int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \ const size_t size, const size_t elem_size, const int option) { int64_t nbytes, count; void *tmp_buf_bshuf; void *tmp_buf_lz4; size_t this_iter; const void *in; void *out; tmp_buf_bshuf = malloc(size * elem_size); if (tmp_buf_bshuf == NULL) return -1; int dst_capacity = LZ4_compressBound(size * elem_size); tmp_buf_lz4 = malloc(dst_capacity); if (tmp_buf_lz4 == NULL){ free(tmp_buf_bshuf); return -1; } in = ioc_get_in(C_ptr, &this_iter); ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size)); count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size); if (count < 0) { free(tmp_buf_lz4); free(tmp_buf_bshuf); return count; } nbytes = LZ4_compress_default((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size, dst_capacity); free(tmp_buf_bshuf); CHECK_ERR_FREE_LZ(nbytes, tmp_buf_lz4); out = ioc_get_out(C_ptr, &this_iter); ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4)); bshuf_write_uint32_BE(out, nbytes); memcpy((char *) out + 4, tmp_buf_lz4, nbytes); free(tmp_buf_lz4); return nbytes + 4; } /* Decompress and bitunshuffle a single block. */ int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr, const size_t size, const size_t elem_size, const int option) { int64_t nbytes, count; void *out, *tmp_buf; const void *in; size_t this_iter; int32_t nbytes_from_header; in = ioc_get_in(C_ptr, &this_iter); nbytes_from_header = bshuf_read_uint32_BE(in); ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + nbytes_from_header + 4)); out = ioc_get_out(C_ptr, &this_iter); ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + size * elem_size)); tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; nbytes = LZ4_decompress_safe((const char*) in + 4, (char *) tmp_buf, nbytes_from_header, size * elem_size); CHECK_ERR_FREE_LZ(nbytes, tmp_buf); if (nbytes != size * elem_size) { free(tmp_buf); return -91; } nbytes = nbytes_from_header; count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); nbytes += 4; free(tmp_buf); return nbytes; } #ifdef ZSTD_SUPPORT /* Bitshuffle and compress a single block. */ int64_t bshuf_compress_zstd_block(ioc_chain *C_ptr, \ const size_t size, const size_t elem_size, const int comp_lvl) { int64_t nbytes, count; void *tmp_buf_bshuf; void *tmp_buf_zstd; size_t this_iter; const void *in; void *out; tmp_buf_bshuf = malloc(size * elem_size); if (tmp_buf_bshuf == NULL) return -1; size_t tmp_buf_zstd_size = ZSTD_compressBound(size * elem_size); tmp_buf_zstd = malloc(tmp_buf_zstd_size); if (tmp_buf_zstd == NULL){ free(tmp_buf_bshuf); return -1; } in = ioc_get_in(C_ptr, &this_iter); ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size)); count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size); if (count < 0) { free(tmp_buf_zstd); free(tmp_buf_bshuf); return count; } nbytes = ZSTD_compress(tmp_buf_zstd, tmp_buf_zstd_size, (const void*)tmp_buf_bshuf, size * elem_size, comp_lvl); free(tmp_buf_bshuf); CHECK_ERR_FREE_LZ(nbytes, tmp_buf_zstd); out = ioc_get_out(C_ptr, &this_iter); ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4)); bshuf_write_uint32_BE(out, nbytes); memcpy((char *) out + 4, tmp_buf_zstd, nbytes); free(tmp_buf_zstd); return nbytes + 4; } /* Decompress and bitunshuffle a single block. */ int64_t bshuf_decompress_zstd_block(ioc_chain *C_ptr, const size_t size, const size_t elem_size, const int option) { int64_t nbytes, count; void *out, *tmp_buf; const void *in; size_t this_iter; int32_t nbytes_from_header; in = ioc_get_in(C_ptr, &this_iter); nbytes_from_header = bshuf_read_uint32_BE(in); ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + nbytes_from_header + 4)); out = ioc_get_out(C_ptr, &this_iter); ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + size * elem_size)); tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; nbytes = ZSTD_decompress(tmp_buf, size * elem_size, (void *)((char *) in + 4), nbytes_from_header); CHECK_ERR_FREE_LZ(nbytes, tmp_buf); if (nbytes != size * elem_size) { free(tmp_buf); return -91; } nbytes = nbytes_from_header; count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); nbytes += 4; free(tmp_buf); return nbytes; } #endif // ZSTD_SUPPORT /* ---- Public functions ---- * * See header file for description and usage. * */ size_t bshuf_compress_lz4_bound(const size_t size, const size_t elem_size, size_t block_size) { size_t bound, leftover; if (block_size == 0) { block_size = bshuf_default_block_size(elem_size); } if (block_size % BSHUF_BLOCKED_MULT) return -81; // Note that each block gets a 4 byte header. // Size of full blocks. bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size); // Size of partial blocks, if any. leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT; if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4; // Size of uncompressed data not fitting into any blocks. bound += (size % BSHUF_BLOCKED_MULT) * elem_size; return bound; } int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size) { return bshuf_blocked_wrap_fun(&bshuf_compress_lz4_block, in, out, size, elem_size, block_size, 0/*option*/); } int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size) { return bshuf_blocked_wrap_fun(&bshuf_decompress_lz4_block, in, out, size, elem_size, block_size, 0/*option*/); } #ifdef ZSTD_SUPPORT size_t bshuf_compress_zstd_bound(const size_t size, const size_t elem_size, size_t block_size) { size_t bound, leftover; if (block_size == 0) { block_size = bshuf_default_block_size(elem_size); } if (block_size % BSHUF_BLOCKED_MULT) return -81; // Note that each block gets a 4 byte header. // Size of full blocks. bound = (ZSTD_compressBound(block_size * elem_size) + 4) * (size / block_size); // Size of partial blocks, if any. leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT; if (leftover) bound += ZSTD_compressBound(leftover * elem_size) + 4; // Size of uncompressed data not fitting into any blocks. bound += (size % BSHUF_BLOCKED_MULT) * elem_size; return bound; } int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size, const int comp_lvl) { return bshuf_blocked_wrap_fun(&bshuf_compress_zstd_block, in, out, size, elem_size, block_size, comp_lvl); } int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size) { return bshuf_blocked_wrap_fun(&bshuf_decompress_zstd_block, in, out, size, elem_size, block_size, 0/*option*/); } #endif // ZSTD_SUPPORT bitshuffle-0.5.1/src/bitshuffle.h000066400000000000000000000136441434025530100167500ustar00rootroot00000000000000/* * Bitshuffle - Filter for improving compression of typed binary data. * * This file is part of Bitshuffle * Author: Kiyoshi Masui * Website: http://www.github.com/kiyo-masui/bitshuffle * Created: 2014 * * See LICENSE file for details about copyright and rights to use. * * * Header File * * Worker routines return an int64_t which is the number of bytes processed * if positive or an error code if negative. * * Error codes: * -1 : Failed to allocate memory. * -11 : Missing SSE. * -12 : Missing AVX. * -80 : Input size not a multiple of 8. * -81 : block_size not multiple of 8. * -91 : Decompression error, wrong number of bytes processed. * -1YYY : Error internal to compression routine with error code -YYY. */ #ifndef BITSHUFFLE_H #define BITSHUFFLE_H #include #include "bitshuffle_core.h" #ifdef __cplusplus extern "C" { #endif /* * ---- LZ4 Interface ---- */ /* ---- bshuf_compress_lz4_bound ---- * * Bound on size of data compressed with *bshuf_compress_lz4*. * * Parameters * ---------- * size : number of elements in input * elem_size : element size of typed data * block_size : Process in blocks of this many elements. Pass 0 to * select automatically (recommended). * * Returns * ------- * Bound on compressed data size. * */ size_t bshuf_compress_lz4_bound(const size_t size, const size_t elem_size, size_t block_size); /* ---- bshuf_compress_lz4 ---- * * Bitshuffled and compress the data using LZ4. * * Transpose within elements, in blocks of data of *block_size* elements then * compress the blocks using LZ4. In the output buffer, each block is prefixed * by a 4 byte integer giving the compressed size of that block. * * Output buffer must be large enough to hold the compressed data. This could * be in principle substantially larger than the input buffer. Use the routine * *bshuf_compress_lz4_bound* to get an upper limit. * * Parameters * ---------- * in : input buffer, must be of size * elem_size bytes * out : output buffer, must be large enough to hold data. * size : number of elements in input * elem_size : element size of typed data * block_size : Process in blocks of this many elements. Pass 0 to * select automatically (recommended). * * Returns * ------- * number of bytes used in output buffer, negative error-code if failed. * */ int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size); /* ---- bshuf_decompress_lz4 ---- * * Undo compression and bitshuffling. * * Decompress data then un-bitshuffle it in blocks of *block_size* elements. * * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size* * must patch the parameters used to compress the data. * * Parameters * ---------- * in : input buffer * out : output buffer, must be of size * elem_size bytes * size : number of elements in input * elem_size : element size of typed data * block_size : Process in blocks of this many elements. Pass 0 to * select automatically (recommended). * * Returns * ------- * number of bytes consumed in *input* buffer, negative error-code if failed. * */ int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size); /* * ---- ZSTD Interface ---- */ #ifdef ZSTD_SUPPORT /* ---- bshuf_compress_zstd_bound ---- * * Bound on size of data compressed with *bshuf_compress_zstd*. * * Parameters * ---------- * size : number of elements in input * elem_size : element size of typed data * block_size : Process in blocks of this many elements. Pass 0 to * select automatically (recommended). * * Returns * ------- * Bound on compressed data size. * */ size_t bshuf_compress_zstd_bound(const size_t size, const size_t elem_size, size_t block_size); /* ---- bshuf_compress_zstd ---- * * Bitshuffled and compress the data using zstd. * * Transpose within elements, in blocks of data of *block_size* elements then * compress the blocks using ZSTD. In the output buffer, each block is prefixed * by a 4 byte integer giving the compressed size of that block. * * Output buffer must be large enough to hold the compressed data. This could * be in principle substantially larger than the input buffer. Use the routine * *bshuf_compress_zstd_bound* to get an upper limit. * * Parameters * ---------- * in : input buffer, must be of size * elem_size bytes * out : output buffer, must be large enough to hold data. * size : number of elements in input * elem_size : element size of typed data * block_size : Process in blocks of this many elements. Pass 0 to * select automatically (recommended). * comp_lvl : compression level applied * * Returns * ------- * number of bytes used in output buffer, negative error-code if failed. * */ int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size, const int comp_lvl); /* ---- bshuf_decompress_zstd ---- * * Undo compression and bitshuffling. * * Decompress data then un-bitshuffle it in blocks of *block_size* elements. * * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size* * must patch the parameters used to compress the data. * * Parameters * ---------- * in : input buffer * out : output buffer, must be of size * elem_size bytes * size : number of elements in input * elem_size : element size of typed data * block_size : Process in blocks of this many elements. Pass 0 to * select automatically (recommended). * * Returns * ------- * number of bytes consumed in *input* buffer, negative error-code if failed. * */ int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size); #endif // ZSTD_SUPPORT #ifdef __cplusplus } // extern "C" #endif #endif // BITSHUFFLE_H bitshuffle-0.5.1/src/bitshuffle_core.c000066400000000000000000001774741434025530100177670ustar00rootroot00000000000000/* * Bitshuffle - Filter for improving compression of typed binary data. * * Author: Kiyoshi Masui * Website: http://www.github.com/kiyo-masui/bitshuffle * Created: 2014 * * See LICENSE file for details about copyright and rights to use. * */ #include "bitshuffle_core.h" #include "bitshuffle_internals.h" #include #include #if defined(__AVX512F__) && defined (__AVX512BW__) && defined(__AVX2__) && defined(__SSE2__) #define USEAVX512 #endif #if defined(__AVX2__) && defined (__SSE2__) #define USEAVX2 #endif #if defined(__SSE2__) || defined(NO_WARN_X86_INTRINSICS) #define USESSE2 #endif #if defined(__ARM_NEON__) || (__ARM_NEON) #ifdef __aarch64__ #define USEARMNEON #endif #endif // Conditional includes for SSE2 and AVX2. #ifdef USEAVX2 #include #elif defined USESSE2 #include #elif defined USEARMNEON #include #endif #if defined(_OPENMP) && defined(_MSC_VER) typedef int64_t omp_size_t; #else typedef size_t omp_size_t; #endif // Macros. #define CHECK_MULT_EIGHT(n) if (n % 8) return -80; #define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) /* ---- Functions indicating compile time instruction set. ---- */ int bshuf_using_NEON(void) { #ifdef USEARMNEON return 1; #else return 0; #endif } int bshuf_using_SSE2(void) { #ifdef USESSE2 return 1; #else return 0; #endif } int bshuf_using_AVX2(void) { #ifdef USEAVX2 return 1; #else return 0; #endif } int bshuf_using_AVX512(void) { #ifdef USEAVX512 return 1; #else return 0; #endif } /* ---- Worker code not requiring special instruction sets. ---- * * The following code does not use any x86 specific vectorized instructions * and should compile on any machine * */ /* Transpose 8x8 bit array packed into a single quadword *x*. * *t* is workspace. */ #define TRANS_BIT_8X8(x, t) { \ t = (x ^ (x >> 7)) & 0x00AA00AA00AA00AALL; \ x = x ^ t ^ (t << 7); \ t = (x ^ (x >> 14)) & 0x0000CCCC0000CCCCLL; \ x = x ^ t ^ (t << 14); \ t = (x ^ (x >> 28)) & 0x00000000F0F0F0F0LL; \ x = x ^ t ^ (t << 28); \ } /* Transpose 8x8 bit array along the diagonal from upper right to lower left */ #define TRANS_BIT_8X8_BE(x, t) { \ t = (x ^ (x >> 9)) & 0x0055005500550055LL; \ x = x ^ t ^ (t << 9); \ t = (x ^ (x >> 18)) & 0x0000333300003333LL; \ x = x ^ t ^ (t << 18); \ t = (x ^ (x >> 36)) & 0x000000000F0F0F0FLL; \ x = x ^ t ^ (t << 36); \ } /* Transpose of an array of arbitrarily typed elements. */ #define TRANS_ELEM_TYPE(in, out, lda, ldb, type_t) { \ size_t ii, jj, kk; \ const type_t* in_type = (const type_t*) in; \ type_t* out_type = (type_t*) out; \ for(ii = 0; ii + 7 < lda; ii += 8) { \ for(jj = 0; jj < ldb; jj++) { \ for(kk = 0; kk < 8; kk++) { \ out_type[jj*lda + ii + kk] = \ in_type[ii*ldb + kk * ldb + jj]; \ } \ } \ } \ for(ii = lda - lda % 8; ii < lda; ii ++) { \ for(jj = 0; jj < ldb; jj++) { \ out_type[jj*lda + ii] = in_type[ii*ldb + jj]; \ } \ } \ } /* Memory copy with bshuf call signature. For testing and profiling. */ int64_t bshuf_copy(const void* in, void* out, const size_t size, const size_t elem_size) { const char* in_b = (const char*) in; char* out_b = (char*) out; memcpy(out_b, in_b, size * elem_size); return size * elem_size; } /* Transpose bytes within elements, starting partway through input. */ int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size, const size_t elem_size, const size_t start) { size_t ii, jj, kk; const char* in_b = (const char*) in; char* out_b = (char*) out; CHECK_MULT_EIGHT(start); if (size > start) { // ii loop separated into 2 loops so the compiler can unroll // the inner one. for (ii = start; ii + 7 < size; ii += 8) { for (jj = 0; jj < elem_size; jj++) { for (kk = 0; kk < 8; kk++) { out_b[jj * size + ii + kk] = in_b[ii * elem_size + kk * elem_size + jj]; } } } for (ii = size - size % 8; ii < size; ii ++) { for (jj = 0; jj < elem_size; jj++) { out_b[jj * size + ii] = in_b[ii * elem_size + jj]; } } } return size * elem_size; } /* Transpose bytes within elements. */ int64_t bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size, const size_t elem_size) { return bshuf_trans_byte_elem_remainder(in, out, size, elem_size, 0); } /* Transpose bits within bytes. */ int64_t bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size, const size_t elem_size, const size_t start_byte) { const uint64_t* in_b = (const uint64_t*) in; uint8_t* out_b = (uint8_t*) out; uint64_t x, t; size_t ii, kk; size_t nbyte = elem_size * size; size_t nbyte_bitrow = nbyte / 8; uint64_t e=1; const int little_endian = *(uint8_t *) &e == 1; const size_t bit_row_skip = little_endian ? nbyte_bitrow : -nbyte_bitrow; const int64_t bit_row_offset = little_endian ? 0 : 7 * nbyte_bitrow; CHECK_MULT_EIGHT(nbyte); CHECK_MULT_EIGHT(start_byte); for (ii = start_byte / 8; ii < nbyte_bitrow; ii ++) { x = in_b[ii]; if (little_endian) { TRANS_BIT_8X8(x, t); } else { TRANS_BIT_8X8_BE(x, t); } for (kk = 0; kk < 8; kk ++) { out_b[bit_row_offset + kk * bit_row_skip + ii] = x; x = x >> 8; } } return size * elem_size; } /* Transpose bits within bytes. */ int64_t bshuf_trans_bit_byte_scal(const void* in, void* out, const size_t size, const size_t elem_size) { return bshuf_trans_bit_byte_remainder(in, out, size, elem_size, 0); } /* General transpose of an array, optimized for large element sizes. */ int64_t bshuf_trans_elem(const void* in, void* out, const size_t lda, const size_t ldb, const size_t elem_size) { size_t ii, jj; const char* in_b = (const char*) in; char* out_b = (char*) out; for(ii = 0; ii < lda; ii++) { for(jj = 0; jj < ldb; jj++) { memcpy(&out_b[(jj*lda + ii) * elem_size], &in_b[(ii*ldb + jj) * elem_size], elem_size); } } return lda * ldb * elem_size; } /* Transpose rows of shuffled bits (size / 8 bytes) within groups of 8. */ int64_t bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size, const size_t elem_size) { size_t nbyte_bitrow = size / 8; CHECK_MULT_EIGHT(size); return bshuf_trans_elem(in, out, 8, elem_size, nbyte_bitrow); } /* Transpose bits within elements. */ int64_t bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; void *tmp_buf; CHECK_MULT_EIGHT(size); tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_trans_bit_byte_scal(out, tmp_buf, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); free(tmp_buf); return count; } /* For data organized into a row for each bit (8 * elem_size rows), transpose * the bytes. */ int64_t bshuf_trans_byte_bitrow_scal(const void* in, void* out, const size_t size, const size_t elem_size) { size_t ii, jj, kk, nbyte_row; const char *in_b; char *out_b; in_b = (const char*) in; out_b = (char*) out; nbyte_row = size / 8; CHECK_MULT_EIGHT(size); for (jj = 0; jj < elem_size; jj++) { for (ii = 0; ii < nbyte_row; ii++) { for (kk = 0; kk < 8; kk++) { out_b[ii * 8 * elem_size + jj * 8 + kk] = \ in_b[(jj * 8 + kk) * nbyte_row + ii]; } } } return size * elem_size; } /* Shuffle bits within the bytes of eight element blocks. */ int64_t bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, \ const size_t size, const size_t elem_size) { const char *in_b; char *out_b; uint64_t x, t; size_t ii, jj, kk; size_t nbyte, out_index; uint64_t e=1; const int little_endian = *(uint8_t *) &e == 1; const size_t elem_skip = little_endian ? elem_size : -elem_size; const uint64_t elem_offset = little_endian ? 0 : 7 * elem_size; CHECK_MULT_EIGHT(size); in_b = (const char*) in; out_b = (char*) out; nbyte = elem_size * size; for (jj = 0; jj < 8 * elem_size; jj += 8) { for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { x = *((uint64_t*) &in_b[ii + jj]); if (little_endian) { TRANS_BIT_8X8(x, t); } else { TRANS_BIT_8X8_BE(x, t); } for (kk = 0; kk < 8; kk++) { out_index = ii + jj / 8 + elem_offset + kk * elem_skip; *((uint8_t*) &out_b[out_index]) = x; x = x >> 8; } } } return size * elem_size; } /* Untranspose bits within elements. */ int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; void *tmp_buf; CHECK_MULT_EIGHT(size); tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; count = bshuf_trans_byte_bitrow_scal(in, tmp_buf, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_shuffle_bit_eightelem_scal(tmp_buf, out, size, elem_size); free(tmp_buf); return count; } /* ---- Worker code that uses Arm NEON ---- * * The following code makes use of the Arm NEON instruction set. * NEON technology is the implementation of the ARM Advanced Single * Instruction Multiple Data (SIMD) extension. * The NEON unit is the component of the processor that executes SIMD instructions. * It is also called the NEON Media Processing Engine (MPE). * */ #ifdef USEARMNEON /* Transpose bytes within elements for 16 bit elements. */ int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) { size_t ii; const char *in_b = (const char*) in; char *out_b = (char*) out; int8x16_t a0, b0, a1, b1; for (ii=0; ii + 15 < size; ii += 16) { a0 = vld1q_s8(in_b + 2*ii + 0*16); b0 = vld1q_s8(in_b + 2*ii + 1*16); a1 = vzip1q_s8(a0, b0); b1 = vzip2q_s8(a0, b0); a0 = vzip1q_s8(a1, b1); b0 = vzip2q_s8(a1, b1); a1 = vzip1q_s8(a0, b0); b1 = vzip2q_s8(a0, b0); a0 = vzip1q_s8(a1, b1); b0 = vzip2q_s8(a1, b1); vst1q_s8(out_b + 0*size + ii, a0); vst1q_s8(out_b + 1*size + ii, b0); } return bshuf_trans_byte_elem_remainder(in, out, size, 2, size - size % 16); } /* Transpose bytes within elements for 32 bit elements. */ int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) { size_t ii; const char *in_b; char *out_b; in_b = (const char*) in; out_b = (char*) out; int8x16_t a0, b0, c0, d0, a1, b1, c1, d1; int64x2_t a2, b2, c2, d2; for (ii=0; ii + 15 < size; ii += 16) { a0 = vld1q_s8(in_b + 4*ii + 0*16); b0 = vld1q_s8(in_b + 4*ii + 1*16); c0 = vld1q_s8(in_b + 4*ii + 2*16); d0 = vld1q_s8(in_b + 4*ii + 3*16); a1 = vzip1q_s8(a0, b0); b1 = vzip2q_s8(a0, b0); c1 = vzip1q_s8(c0, d0); d1 = vzip2q_s8(c0, d0); a0 = vzip1q_s8(a1, b1); b0 = vzip2q_s8(a1, b1); c0 = vzip1q_s8(c1, d1); d0 = vzip2q_s8(c1, d1); a1 = vzip1q_s8(a0, b0); b1 = vzip2q_s8(a0, b0); c1 = vzip1q_s8(c0, d0); d1 = vzip2q_s8(c0, d0); a2 = vzip1q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1)); b2 = vzip2q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1)); c2 = vzip1q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1)); d2 = vzip2q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1)); vst1q_s64((int64_t *) (out_b + 0*size + ii), a2); vst1q_s64((int64_t *) (out_b + 1*size + ii), b2); vst1q_s64((int64_t *) (out_b + 2*size + ii), c2); vst1q_s64((int64_t *) (out_b + 3*size + ii), d2); } return bshuf_trans_byte_elem_remainder(in, out, size, 4, size - size % 16); } /* Transpose bytes within elements for 64 bit elements. */ int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) { size_t ii; const char* in_b = (const char*) in; char* out_b = (char*) out; int8x16_t a0, b0, c0, d0, e0, f0, g0, h0; int8x16_t a1, b1, c1, d1, e1, f1, g1, h1; for (ii=0; ii + 15 < size; ii += 16) { a0 = vld1q_s8(in_b + 8*ii + 0*16); b0 = vld1q_s8(in_b + 8*ii + 1*16); c0 = vld1q_s8(in_b + 8*ii + 2*16); d0 = vld1q_s8(in_b + 8*ii + 3*16); e0 = vld1q_s8(in_b + 8*ii + 4*16); f0 = vld1q_s8(in_b + 8*ii + 5*16); g0 = vld1q_s8(in_b + 8*ii + 6*16); h0 = vld1q_s8(in_b + 8*ii + 7*16); a1 = vzip1q_s8 (a0, b0); b1 = vzip2q_s8 (a0, b0); c1 = vzip1q_s8 (c0, d0); d1 = vzip2q_s8 (c0, d0); e1 = vzip1q_s8 (e0, f0); f1 = vzip2q_s8 (e0, f0); g1 = vzip1q_s8 (g0, h0); h1 = vzip2q_s8 (g0, h0); a0 = vzip1q_s8 (a1, b1); b0 = vzip2q_s8 (a1, b1); c0 = vzip1q_s8 (c1, d1); d0 = vzip2q_s8 (c1, d1); e0 = vzip1q_s8 (e1, f1); f0 = vzip2q_s8 (e1, f1); g0 = vzip1q_s8 (g1, h1); h0 = vzip2q_s8 (g1, h1); a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0)); b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0)); c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0)); d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0)); e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0)); f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0)); g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0)); h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0)); a0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1)); b0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1)); c0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1)); d0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1)); e0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1)); f0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1)); g0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1)); h0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1)); vst1q_s8(out_b + 0*size + ii, a0); vst1q_s8(out_b + 1*size + ii, b0); vst1q_s8(out_b + 2*size + ii, c0); vst1q_s8(out_b + 3*size + ii, d0); vst1q_s8(out_b + 4*size + ii, e0); vst1q_s8(out_b + 5*size + ii, f0); vst1q_s8(out_b + 6*size + ii, g0); vst1q_s8(out_b + 7*size + ii, h0); } return bshuf_trans_byte_elem_remainder(in, out, size, 8, size - size % 16); } /* Transpose bytes within elements using best NEON algorithm available. */ int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; // Trivial cases: power of 2 bytes. switch (elem_size) { case 1: count = bshuf_copy(in, out, size, elem_size); return count; case 2: count = bshuf_trans_byte_elem_NEON_16(in, out, size); return count; case 4: count = bshuf_trans_byte_elem_NEON_32(in, out, size); return count; case 8: count = bshuf_trans_byte_elem_NEON_64(in, out, size); return count; } // Worst case: odd number of bytes. Turns out that this is faster for // (odd * 2) byte elements as well (hence % 4). if (elem_size % 4) { count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); return count; } // Multiple of power of 2: transpose hierarchically. { size_t nchunk_elem; void* tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; if ((elem_size % 8) == 0) { nchunk_elem = elem_size / 8; TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t); count = bshuf_trans_byte_elem_NEON_64(out, tmp_buf, size * nchunk_elem); bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size); } else if ((elem_size % 4) == 0) { nchunk_elem = elem_size / 4; TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t); count = bshuf_trans_byte_elem_NEON_32(out, tmp_buf, size * nchunk_elem); bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size); } else { // Not used since scalar algorithm is faster. nchunk_elem = elem_size / 2; TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t); count = bshuf_trans_byte_elem_NEON_16(out, tmp_buf, size * nchunk_elem); bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size); } free(tmp_buf); return count; } } /* Creates a mask made up of the most significant * bit of each byte of 'input' */ int32_t move_byte_mask_neon(uint8x16_t input) { return ( ((input[0] & 0x80) >> 7) | (((input[1] & 0x80) >> 7) << 1) | (((input[2] & 0x80) >> 7) << 2) | (((input[3] & 0x80) >> 7) << 3) | (((input[4] & 0x80) >> 7) << 4) | (((input[5] & 0x80) >> 7) << 5) | (((input[6] & 0x80) >> 7) << 6) | (((input[7] & 0x80) >> 7) << 7) | (((input[8] & 0x80) >> 7) << 8) | (((input[9] & 0x80) >> 7) << 9) | (((input[10] & 0x80) >> 7) << 10) | (((input[11] & 0x80) >> 7) << 11) | (((input[12] & 0x80) >> 7) << 12) | (((input[13] & 0x80) >> 7) << 13) | (((input[14] & 0x80) >> 7) << 14) | (((input[15] & 0x80) >> 7) << 15) ); } /* Transpose bits within bytes. */ int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { size_t ii, kk; const char* in_b = (const char*) in; char* out_b = (char*) out; uint16_t* out_ui16; int64_t count; size_t nbyte = elem_size * size; CHECK_MULT_EIGHT(nbyte); int16x8_t xmm; int32_t bt; for (ii = 0; ii + 15 < nbyte; ii += 16) { xmm = vld1q_s16((int16_t *) (in_b + ii)); for (kk = 0; kk < 8; kk++) { bt = move_byte_mask_neon((uint8x16_t) xmm); xmm = vshlq_n_s16(xmm, 1); out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; *out_ui16 = bt; } } count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, nbyte - nbyte % 16); return count; } /* Transpose bits within elements. */ int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; CHECK_MULT_EIGHT(size); void* tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; count = bshuf_trans_byte_elem_NEON(in, out, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_trans_bit_byte_NEON(out, tmp_buf, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); free(tmp_buf); return count; } /* For data organized into a row for each bit (8 * elem_size rows), transpose * the bytes. */ int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { size_t ii, jj; const char* in_b = (const char*) in; char* out_b = (char*) out; CHECK_MULT_EIGHT(size); size_t nrows = 8 * elem_size; size_t nbyte_row = size / 8; int8x16_t a0, b0, c0, d0, e0, f0, g0, h0; int8x16_t a1, b1, c1, d1, e1, f1, g1, h1; int64x1_t *as, *bs, *cs, *ds, *es, *fs, *gs, *hs; for (ii = 0; ii + 7 < nrows; ii += 8) { for (jj = 0; jj + 15 < nbyte_row; jj += 16) { a0 = vld1q_s8(in_b + (ii + 0)*nbyte_row + jj); b0 = vld1q_s8(in_b + (ii + 1)*nbyte_row + jj); c0 = vld1q_s8(in_b + (ii + 2)*nbyte_row + jj); d0 = vld1q_s8(in_b + (ii + 3)*nbyte_row + jj); e0 = vld1q_s8(in_b + (ii + 4)*nbyte_row + jj); f0 = vld1q_s8(in_b + (ii + 5)*nbyte_row + jj); g0 = vld1q_s8(in_b + (ii + 6)*nbyte_row + jj); h0 = vld1q_s8(in_b + (ii + 7)*nbyte_row + jj); a1 = vzip1q_s8(a0, b0); b1 = vzip1q_s8(c0, d0); c1 = vzip1q_s8(e0, f0); d1 = vzip1q_s8(g0, h0); e1 = vzip2q_s8(a0, b0); f1 = vzip2q_s8(c0, d0); g1 = vzip2q_s8(e0, f0); h1 = vzip2q_s8(g0, h0); a0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1)); b0= (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1)); c0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1)); d0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1)); e0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1)); f0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1)); g0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1)); h0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1)); a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0)); b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0)); c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0)); d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0)); e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0)); f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0)); g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0)); h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0)); as = (int64x1_t *) &a1; bs = (int64x1_t *) &b1; cs = (int64x1_t *) &c1; ds = (int64x1_t *) &d1; es = (int64x1_t *) &e1; fs = (int64x1_t *) &f1; gs = (int64x1_t *) &g1; hs = (int64x1_t *) &h1; vst1_s64((int64_t *)(out_b + (jj + 0) * nrows + ii), *as); vst1_s64((int64_t *)(out_b + (jj + 1) * nrows + ii), *(as + 1)); vst1_s64((int64_t *)(out_b + (jj + 2) * nrows + ii), *bs); vst1_s64((int64_t *)(out_b + (jj + 3) * nrows + ii), *(bs + 1)); vst1_s64((int64_t *)(out_b + (jj + 4) * nrows + ii), *cs); vst1_s64((int64_t *)(out_b + (jj + 5) * nrows + ii), *(cs + 1)); vst1_s64((int64_t *)(out_b + (jj + 6) * nrows + ii), *ds); vst1_s64((int64_t *)(out_b + (jj + 7) * nrows + ii), *(ds + 1)); vst1_s64((int64_t *)(out_b + (jj + 8) * nrows + ii), *es); vst1_s64((int64_t *)(out_b + (jj + 9) * nrows + ii), *(es + 1)); vst1_s64((int64_t *)(out_b + (jj + 10) * nrows + ii), *fs); vst1_s64((int64_t *)(out_b + (jj + 11) * nrows + ii), *(fs + 1)); vst1_s64((int64_t *)(out_b + (jj + 12) * nrows + ii), *gs); vst1_s64((int64_t *)(out_b + (jj + 13) * nrows + ii), *(gs + 1)); vst1_s64((int64_t *)(out_b + (jj + 14) * nrows + ii), *hs); vst1_s64((int64_t *)(out_b + (jj + 15) * nrows + ii), *(hs + 1)); } for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) { out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj]; out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj]; out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj]; out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj]; out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj]; out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj]; out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj]; out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj]; } } return size * elem_size; } /* Shuffle bits within the bytes of eight element blocks. */ int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { CHECK_MULT_EIGHT(size); // With a bit of care, this could be written such that such that it is // in_buf = out_buf safe. const char* in_b = (const char*) in; uint16_t* out_ui16 = (uint16_t*) out; size_t ii, jj, kk; size_t nbyte = elem_size * size; int16x8_t xmm; int32_t bt; if (elem_size % 2) { bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size); } else { for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) { xmm = vld1q_s16((int16_t *) &in_b[ii + jj]); for (kk = 0; kk < 8; kk++) { bt = move_byte_mask_neon((uint8x16_t) xmm); xmm = vshlq_n_s16(xmm, 1); size_t ind = (ii + jj / 8 + (7 - kk) * elem_size); out_ui16[ind / 2] = bt; } } } } return size * elem_size; } /* Untranspose bits within elements. */ int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; CHECK_MULT_EIGHT(size); void* tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; count = bshuf_trans_byte_bitrow_NEON(in, tmp_buf, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_shuffle_bit_eightelem_NEON(tmp_buf, out, size, elem_size); free(tmp_buf); return count; } #else // #ifdef USEARMNEON int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { return -13; } int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { return -13; } int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { return -13; } int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { return -13; } int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { return -13; } int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) { return -13; } int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) { return -13; } int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) { return -13; } int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size, const size_t elem_size) { return -13; } #endif /* ---- Worker code that uses SSE2 ---- * * The following code makes use of the SSE2 instruction set and specialized * 16 byte registers. The SSE2 instructions are present on modern x86 * processors. The first Intel processor microarchitecture supporting SSE2 was * Pentium 4 (2000). * */ #ifdef USESSE2 /* Transpose bytes within elements for 16 bit elements. */ int64_t bshuf_trans_byte_elem_SSE_16(const void* in, void* out, const size_t size) { size_t ii; const char *in_b = (const char*) in; char *out_b = (char*) out; __m128i a0, b0, a1, b1; for (ii=0; ii + 15 < size; ii += 16) { a0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 0*16]); b0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 1*16]); a1 = _mm_unpacklo_epi8(a0, b0); b1 = _mm_unpackhi_epi8(a0, b0); a0 = _mm_unpacklo_epi8(a1, b1); b0 = _mm_unpackhi_epi8(a1, b1); a1 = _mm_unpacklo_epi8(a0, b0); b1 = _mm_unpackhi_epi8(a0, b0); a0 = _mm_unpacklo_epi8(a1, b1); b0 = _mm_unpackhi_epi8(a1, b1); _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); } return bshuf_trans_byte_elem_remainder(in, out, size, 2, size - size % 16); } /* Transpose bytes within elements for 32 bit elements. */ int64_t bshuf_trans_byte_elem_SSE_32(const void* in, void* out, const size_t size) { size_t ii; const char *in_b; char *out_b; in_b = (const char*) in; out_b = (char*) out; __m128i a0, b0, c0, d0, a1, b1, c1, d1; for (ii=0; ii + 15 < size; ii += 16) { a0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 0*16]); b0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 1*16]); c0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 2*16]); d0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 3*16]); a1 = _mm_unpacklo_epi8(a0, b0); b1 = _mm_unpackhi_epi8(a0, b0); c1 = _mm_unpacklo_epi8(c0, d0); d1 = _mm_unpackhi_epi8(c0, d0); a0 = _mm_unpacklo_epi8(a1, b1); b0 = _mm_unpackhi_epi8(a1, b1); c0 = _mm_unpacklo_epi8(c1, d1); d0 = _mm_unpackhi_epi8(c1, d1); a1 = _mm_unpacklo_epi8(a0, b0); b1 = _mm_unpackhi_epi8(a0, b0); c1 = _mm_unpacklo_epi8(c0, d0); d1 = _mm_unpackhi_epi8(c0, d0); a0 = _mm_unpacklo_epi64(a1, c1); b0 = _mm_unpackhi_epi64(a1, c1); c0 = _mm_unpacklo_epi64(b1, d1); d0 = _mm_unpackhi_epi64(b1, d1); _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0); _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0); } return bshuf_trans_byte_elem_remainder(in, out, size, 4, size - size % 16); } /* Transpose bytes within elements for 64 bit elements. */ int64_t bshuf_trans_byte_elem_SSE_64(const void* in, void* out, const size_t size) { size_t ii; const char* in_b = (const char*) in; char* out_b = (char*) out; __m128i a0, b0, c0, d0, e0, f0, g0, h0; __m128i a1, b1, c1, d1, e1, f1, g1, h1; for (ii=0; ii + 15 < size; ii += 16) { a0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 0*16]); b0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 1*16]); c0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 2*16]); d0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 3*16]); e0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 4*16]); f0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 5*16]); g0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 6*16]); h0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 7*16]); a1 = _mm_unpacklo_epi8(a0, b0); b1 = _mm_unpackhi_epi8(a0, b0); c1 = _mm_unpacklo_epi8(c0, d0); d1 = _mm_unpackhi_epi8(c0, d0); e1 = _mm_unpacklo_epi8(e0, f0); f1 = _mm_unpackhi_epi8(e0, f0); g1 = _mm_unpacklo_epi8(g0, h0); h1 = _mm_unpackhi_epi8(g0, h0); a0 = _mm_unpacklo_epi8(a1, b1); b0 = _mm_unpackhi_epi8(a1, b1); c0 = _mm_unpacklo_epi8(c1, d1); d0 = _mm_unpackhi_epi8(c1, d1); e0 = _mm_unpacklo_epi8(e1, f1); f0 = _mm_unpackhi_epi8(e1, f1); g0 = _mm_unpacklo_epi8(g1, h1); h0 = _mm_unpackhi_epi8(g1, h1); a1 = _mm_unpacklo_epi32(a0, c0); b1 = _mm_unpackhi_epi32(a0, c0); c1 = _mm_unpacklo_epi32(b0, d0); d1 = _mm_unpackhi_epi32(b0, d0); e1 = _mm_unpacklo_epi32(e0, g0); f1 = _mm_unpackhi_epi32(e0, g0); g1 = _mm_unpacklo_epi32(f0, h0); h1 = _mm_unpackhi_epi32(f0, h0); a0 = _mm_unpacklo_epi64(a1, e1); b0 = _mm_unpackhi_epi64(a1, e1); c0 = _mm_unpacklo_epi64(b1, f1); d0 = _mm_unpackhi_epi64(b1, f1); e0 = _mm_unpacklo_epi64(c1, g1); f0 = _mm_unpackhi_epi64(c1, g1); g0 = _mm_unpacklo_epi64(d1, h1); h0 = _mm_unpackhi_epi64(d1, h1); _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0); _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0); _mm_storeu_si128((__m128i *) &out_b[4*size + ii], e0); _mm_storeu_si128((__m128i *) &out_b[5*size + ii], f0); _mm_storeu_si128((__m128i *) &out_b[6*size + ii], g0); _mm_storeu_si128((__m128i *) &out_b[7*size + ii], h0); } return bshuf_trans_byte_elem_remainder(in, out, size, 8, size - size % 16); } /* Transpose bytes within elements using best SSE algorithm available. */ int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; // Trivial cases: power of 2 bytes. switch (elem_size) { case 1: count = bshuf_copy(in, out, size, elem_size); return count; case 2: count = bshuf_trans_byte_elem_SSE_16(in, out, size); return count; case 4: count = bshuf_trans_byte_elem_SSE_32(in, out, size); return count; case 8: count = bshuf_trans_byte_elem_SSE_64(in, out, size); return count; } // Worst case: odd number of bytes. Turns out that this is faster for // (odd * 2) byte elements as well (hence % 4). if (elem_size % 4) { count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); return count; } // Multiple of power of 2: transpose hierarchically. { size_t nchunk_elem; void* tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; if ((elem_size % 8) == 0) { nchunk_elem = elem_size / 8; TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t); count = bshuf_trans_byte_elem_SSE_64(out, tmp_buf, size * nchunk_elem); bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size); } else if ((elem_size % 4) == 0) { nchunk_elem = elem_size / 4; TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t); count = bshuf_trans_byte_elem_SSE_32(out, tmp_buf, size * nchunk_elem); bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size); } else { // Not used since scalar algorithm is faster. nchunk_elem = elem_size / 2; TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t); count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf, size * nchunk_elem); bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size); } free(tmp_buf); return count; } } /* Transpose bits within bytes. */ int64_t bshuf_trans_bit_byte_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { size_t ii, kk; const char* in_b = (const char*) in; char* out_b = (char*) out; uint16_t* out_ui16; int64_t count; size_t nbyte = elem_size * size; CHECK_MULT_EIGHT(nbyte); __m128i xmm; int32_t bt; for (ii = 0; ii + 15 < nbyte; ii += 16) { xmm = _mm_loadu_si128((__m128i *) &in_b[ii]); for (kk = 0; kk < 8; kk++) { bt = _mm_movemask_epi8(xmm); xmm = _mm_slli_epi16(xmm, 1); out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; *out_ui16 = bt; } } count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, nbyte - nbyte % 16); return count; } /* Transpose bits within elements. */ int64_t bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; CHECK_MULT_EIGHT(size); void* tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_trans_bit_byte_SSE(out, tmp_buf, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); free(tmp_buf); return count; } /* For data organized into a row for each bit (8 * elem_size rows), transpose * the bytes. */ int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { size_t ii, jj; const char* in_b = (const char*) in; char* out_b = (char*) out; CHECK_MULT_EIGHT(size); size_t nrows = 8 * elem_size; size_t nbyte_row = size / 8; __m128i a0, b0, c0, d0, e0, f0, g0, h0; __m128i a1, b1, c1, d1, e1, f1, g1, h1; __m128 *as, *bs, *cs, *ds, *es, *fs, *gs, *hs; for (ii = 0; ii + 7 < nrows; ii += 8) { for (jj = 0; jj + 15 < nbyte_row; jj += 16) { a0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 0)*nbyte_row + jj]); b0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 1)*nbyte_row + jj]); c0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 2)*nbyte_row + jj]); d0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 3)*nbyte_row + jj]); e0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 4)*nbyte_row + jj]); f0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 5)*nbyte_row + jj]); g0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 6)*nbyte_row + jj]); h0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 7)*nbyte_row + jj]); a1 = _mm_unpacklo_epi8(a0, b0); b1 = _mm_unpacklo_epi8(c0, d0); c1 = _mm_unpacklo_epi8(e0, f0); d1 = _mm_unpacklo_epi8(g0, h0); e1 = _mm_unpackhi_epi8(a0, b0); f1 = _mm_unpackhi_epi8(c0, d0); g1 = _mm_unpackhi_epi8(e0, f0); h1 = _mm_unpackhi_epi8(g0, h0); a0 = _mm_unpacklo_epi16(a1, b1); b0 = _mm_unpacklo_epi16(c1, d1); c0 = _mm_unpackhi_epi16(a1, b1); d0 = _mm_unpackhi_epi16(c1, d1); e0 = _mm_unpacklo_epi16(e1, f1); f0 = _mm_unpacklo_epi16(g1, h1); g0 = _mm_unpackhi_epi16(e1, f1); h0 = _mm_unpackhi_epi16(g1, h1); a1 = _mm_unpacklo_epi32(a0, b0); b1 = _mm_unpackhi_epi32(a0, b0); c1 = _mm_unpacklo_epi32(c0, d0); d1 = _mm_unpackhi_epi32(c0, d0); e1 = _mm_unpacklo_epi32(e0, f0); f1 = _mm_unpackhi_epi32(e0, f0); g1 = _mm_unpacklo_epi32(g0, h0); h1 = _mm_unpackhi_epi32(g0, h0); // We don't have a storeh instruction for integers, so interpret // as a float. Have a storel (_mm_storel_epi64). as = (__m128 *) &a1; bs = (__m128 *) &b1; cs = (__m128 *) &c1; ds = (__m128 *) &d1; es = (__m128 *) &e1; fs = (__m128 *) &f1; gs = (__m128 *) &g1; hs = (__m128 *) &h1; _mm_storel_pi((__m64 *) &out_b[(jj + 0) * nrows + ii], *as); _mm_storel_pi((__m64 *) &out_b[(jj + 2) * nrows + ii], *bs); _mm_storel_pi((__m64 *) &out_b[(jj + 4) * nrows + ii], *cs); _mm_storel_pi((__m64 *) &out_b[(jj + 6) * nrows + ii], *ds); _mm_storel_pi((__m64 *) &out_b[(jj + 8) * nrows + ii], *es); _mm_storel_pi((__m64 *) &out_b[(jj + 10) * nrows + ii], *fs); _mm_storel_pi((__m64 *) &out_b[(jj + 12) * nrows + ii], *gs); _mm_storel_pi((__m64 *) &out_b[(jj + 14) * nrows + ii], *hs); _mm_storeh_pi((__m64 *) &out_b[(jj + 1) * nrows + ii], *as); _mm_storeh_pi((__m64 *) &out_b[(jj + 3) * nrows + ii], *bs); _mm_storeh_pi((__m64 *) &out_b[(jj + 5) * nrows + ii], *cs); _mm_storeh_pi((__m64 *) &out_b[(jj + 7) * nrows + ii], *ds); _mm_storeh_pi((__m64 *) &out_b[(jj + 9) * nrows + ii], *es); _mm_storeh_pi((__m64 *) &out_b[(jj + 11) * nrows + ii], *fs); _mm_storeh_pi((__m64 *) &out_b[(jj + 13) * nrows + ii], *gs); _mm_storeh_pi((__m64 *) &out_b[(jj + 15) * nrows + ii], *hs); } for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) { out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj]; out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj]; out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj]; out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj]; out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj]; out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj]; out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj]; out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj]; } } return size * elem_size; } /* Shuffle bits within the bytes of eight element blocks. */ int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { CHECK_MULT_EIGHT(size); // With a bit of care, this could be written such that such that it is // in_buf = out_buf safe. const char* in_b = (const char*) in; uint16_t* out_ui16 = (uint16_t*) out; size_t ii, jj, kk; size_t nbyte = elem_size * size; __m128i xmm; int32_t bt; if (elem_size % 2) { bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size); } else { for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) { xmm = _mm_loadu_si128((__m128i *) &in_b[ii + jj]); for (kk = 0; kk < 8; kk++) { bt = _mm_movemask_epi8(xmm); xmm = _mm_slli_epi16(xmm, 1); size_t ind = (ii + jj / 8 + (7 - kk) * elem_size); out_ui16[ind / 2] = bt; } } } } return size * elem_size; } /* Untranspose bits within elements. */ int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; CHECK_MULT_EIGHT(size); void* tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; count = bshuf_trans_byte_bitrow_SSE(in, tmp_buf, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_shuffle_bit_eightelem_SSE(tmp_buf, out, size, elem_size); free(tmp_buf); return count; } #else // #ifdef USESSE2 int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { return -11; } int64_t bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { return -11; } int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { return -11; } int64_t bshuf_trans_bit_byte_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { return -11; } int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { return -11; } int64_t bshuf_trans_byte_elem_SSE_64(const void* in, void* out, const size_t size) { return -11; } int64_t bshuf_trans_byte_elem_SSE_32(const void* in, void* out, const size_t size) { return -11; } int64_t bshuf_trans_byte_elem_SSE_16(const void* in, void* out, const size_t size) { return -11; } int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t size, const size_t elem_size) { return -11; } #endif // #ifdef USESSE2 /* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */ /* ---- Worker code that uses AVX2 ---- * * The following code makes use of the AVX2 instruction set and specialized * 32 byte registers. The AVX2 instructions are present on newer x86 * processors. The first Intel processor microarchitecture supporting AVX2 was * Haswell (2013). * */ #ifdef USEAVX2 /* Transpose bits within bytes. */ int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size, const size_t elem_size) { size_t ii, kk; const char* in_b = (const char*) in; char* out_b = (char*) out; int32_t* out_i32; size_t nbyte = elem_size * size; int64_t count; __m256i ymm; int32_t bt; for (ii = 0; ii + 31 < nbyte; ii += 32) { ymm = _mm256_loadu_si256((__m256i *) &in_b[ii]); for (kk = 0; kk < 8; kk++) { bt = _mm256_movemask_epi8(ymm); ymm = _mm256_slli_epi16(ymm, 1); out_i32 = (int32_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; *out_i32 = bt; } } count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, nbyte - nbyte % 32); return count; } /* Transpose bits within elements. */ int64_t bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; CHECK_MULT_EIGHT(size); void* tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_trans_bit_byte_AVX(out, tmp_buf, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); free(tmp_buf); return count; } /* For data organized into a row for each bit (8 * elem_size rows), transpose * the bytes. */ int64_t bshuf_trans_byte_bitrow_AVX(const void* in, void* out, const size_t size, const size_t elem_size) { size_t hh, ii, jj, kk, mm; const char* in_b = (const char*) in; char* out_b = (char*) out; CHECK_MULT_EIGHT(size); size_t nrows = 8 * elem_size; size_t nbyte_row = size / 8; if (elem_size % 4) return bshuf_trans_byte_bitrow_SSE(in, out, size, elem_size); __m256i ymm_0[8]; __m256i ymm_1[8]; __m256i ymm_storeage[8][4]; for (jj = 0; jj + 31 < nbyte_row; jj += 32) { for (ii = 0; ii + 3 < elem_size; ii += 4) { for (hh = 0; hh < 4; hh ++) { for (kk = 0; kk < 8; kk ++){ ymm_0[kk] = _mm256_loadu_si256((__m256i *) &in_b[ (ii * 8 + hh * 8 + kk) * nbyte_row + jj]); } for (kk = 0; kk < 4; kk ++){ ymm_1[kk] = _mm256_unpacklo_epi8(ymm_0[kk * 2], ymm_0[kk * 2 + 1]); ymm_1[kk + 4] = _mm256_unpackhi_epi8(ymm_0[kk * 2], ymm_0[kk * 2 + 1]); } for (kk = 0; kk < 2; kk ++){ for (mm = 0; mm < 2; mm ++){ ymm_0[kk * 4 + mm] = _mm256_unpacklo_epi16( ymm_1[kk * 4 + mm * 2], ymm_1[kk * 4 + mm * 2 + 1]); ymm_0[kk * 4 + mm + 2] = _mm256_unpackhi_epi16( ymm_1[kk * 4 + mm * 2], ymm_1[kk * 4 + mm * 2 + 1]); } } for (kk = 0; kk < 4; kk ++){ ymm_1[kk * 2] = _mm256_unpacklo_epi32(ymm_0[kk * 2], ymm_0[kk * 2 + 1]); ymm_1[kk * 2 + 1] = _mm256_unpackhi_epi32(ymm_0[kk * 2], ymm_0[kk * 2 + 1]); } for (kk = 0; kk < 8; kk ++){ ymm_storeage[kk][hh] = ymm_1[kk]; } } for (mm = 0; mm < 8; mm ++) { for (kk = 0; kk < 4; kk ++){ ymm_0[kk] = ymm_storeage[mm][kk]; } ymm_1[0] = _mm256_unpacklo_epi64(ymm_0[0], ymm_0[1]); ymm_1[1] = _mm256_unpacklo_epi64(ymm_0[2], ymm_0[3]); ymm_1[2] = _mm256_unpackhi_epi64(ymm_0[0], ymm_0[1]); ymm_1[3] = _mm256_unpackhi_epi64(ymm_0[2], ymm_0[3]); ymm_0[0] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 32); ymm_0[1] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 32); ymm_0[2] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 49); ymm_0[3] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 49); _mm256_storeu_si256((__m256i *) &out_b[ (jj + mm * 2 + 0 * 16) * nrows + ii * 8], ymm_0[0]); _mm256_storeu_si256((__m256i *) &out_b[ (jj + mm * 2 + 0 * 16 + 1) * nrows + ii * 8], ymm_0[1]); _mm256_storeu_si256((__m256i *) &out_b[ (jj + mm * 2 + 1 * 16) * nrows + ii * 8], ymm_0[2]); _mm256_storeu_si256((__m256i *) &out_b[ (jj + mm * 2 + 1 * 16 + 1) * nrows + ii * 8], ymm_0[3]); } } } for (ii = 0; ii < nrows; ii ++ ) { for (jj = nbyte_row - nbyte_row % 32; jj < nbyte_row; jj ++) { out_b[jj * nrows + ii] = in_b[ii * nbyte_row + jj]; } } return size * elem_size; } /* Shuffle bits within the bytes of eight element blocks. */ int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t size, const size_t elem_size) { CHECK_MULT_EIGHT(size); // With a bit of care, this could be written such that such that it is // in_buf = out_buf safe. const char* in_b = (const char*) in; char* out_b = (char*) out; size_t ii, jj, kk; size_t nbyte = elem_size * size; __m256i ymm; int32_t bt; if (elem_size % 4) { return bshuf_shuffle_bit_eightelem_SSE(in, out, size, elem_size); } else { for (jj = 0; jj + 31 < 8 * elem_size; jj += 32) { for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { ymm = _mm256_loadu_si256((__m256i *) &in_b[ii + jj]); for (kk = 0; kk < 8; kk++) { bt = _mm256_movemask_epi8(ymm); ymm = _mm256_slli_epi16(ymm, 1); size_t ind = (ii + jj / 8 + (7 - kk) * elem_size); * (int32_t *) &out_b[ind] = bt; } } } } return size * elem_size; } /* Untranspose bits within elements. */ int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; CHECK_MULT_EIGHT(size); void* tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; count = bshuf_trans_byte_bitrow_AVX(in, tmp_buf, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_shuffle_bit_eightelem_AVX(tmp_buf, out, size, elem_size); free(tmp_buf); return count; } #else // #ifdef USEAVX2 int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size, const size_t elem_size) { return -12; } int64_t bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size, const size_t elem_size) { return -12; } int64_t bshuf_trans_byte_bitrow_AVX(const void* in, void* out, const size_t size, const size_t elem_size) { return -12; } int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t size, const size_t elem_size) { return -12; } int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, const size_t elem_size) { return -12; } #endif // #ifdef USEAVX2 #ifdef USEAVX512 /* Transpose bits within bytes. */ int64_t bshuf_trans_bit_byte_AVX512(const void* in, void* out, const size_t size, const size_t elem_size) { size_t ii, kk; const char* in_b = (const char*) in; char* out_b = (char*) out; size_t nbyte = elem_size * size; int64_t count; int64_t* out_i64; __m512i zmm; __mmask64 bt; if (nbyte >= 64) { const __m512i mask = _mm512_set1_epi8(0); for (ii = 0; ii + 63 < nbyte; ii += 64) { zmm = _mm512_loadu_si512((__m512i *) &in_b[ii]); for (kk = 0; kk < 8; kk++) { bt = _mm512_cmp_epi8_mask(zmm, mask, 1); zmm = _mm512_slli_epi16(zmm, 1); out_i64 = (int64_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; *out_i64 = (int64_t)bt; } } } __m256i ymm; int32_t bt32; int32_t* out_i32; size_t start = nbyte - nbyte % 64; for (ii = start; ii + 31 < nbyte; ii += 32) { ymm = _mm256_loadu_si256((__m256i *) &in_b[ii]); for (kk = 0; kk < 8; kk++) { bt32 = _mm256_movemask_epi8(ymm); ymm = _mm256_slli_epi16(ymm, 1); out_i32 = (int32_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; *out_i32 = bt32; } } count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, nbyte - nbyte % 64 % 32); return count; } /* Transpose bits within elements. */ int64_t bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; CHECK_MULT_EIGHT(size); void* tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_trans_bit_byte_AVX512(out, tmp_buf, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); free(tmp_buf); return count; } /* Shuffle bits within the bytes of eight element blocks. */ int64_t bshuf_shuffle_bit_eightelem_AVX512(const void* in, void* out, const size_t size, const size_t elem_size) { CHECK_MULT_EIGHT(size); // With a bit of care, this could be written such that such that it is // in_buf = out_buf safe. const char* in_b = (const char*) in; char* out_b = (char*) out; size_t ii, jj, kk; size_t nbyte = elem_size * size; __m512i zmm; __mmask64 bt; if (elem_size % 8) { return bshuf_shuffle_bit_eightelem_AVX(in, out, size, elem_size); } else { const __m512i mask = _mm512_set1_epi8(0); for (jj = 0; jj + 63 < 8 * elem_size; jj += 64) { for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { zmm = _mm512_loadu_si512((__m512i *) &in_b[ii + jj]); for (kk = 0; kk < 8; kk++) { bt = _mm512_cmp_epi8_mask(zmm, mask, 1); zmm = _mm512_slli_epi16(zmm, 1); size_t ind = (ii + jj / 8 + (7 - kk) * elem_size); * (int64_t *) &out_b[ind] = bt; } } } } return size * elem_size; } /* Untranspose bits within elements. */ int64_t bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; CHECK_MULT_EIGHT(size); void* tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; count = bshuf_trans_byte_bitrow_AVX(in, tmp_buf, size, elem_size); CHECK_ERR_FREE(count, tmp_buf); count = bshuf_shuffle_bit_eightelem_AVX512(tmp_buf, out, size, elem_size); free(tmp_buf); return count; } #else // #ifdef USEAVX512 int64_t bshuf_trans_bit_byte_AVX512(const void* in, void* out, const size_t size, const size_t elem_size) { return -14; } int64_t bshuf_trans_bit_elem_AVX512(const void* in, void* out, const size_t size, const size_t elem_size) { return -14; } int64_t bshuf_shuffle_bit_eightelem_AVX512(const void* in, void* out, const size_t size, const size_t elem_size) { return -14; } int64_t bshuf_untrans_bit_elem_AVX512(const void* in, void* out, const size_t size, const size_t elem_size) { return -14; } #endif /* ---- Drivers selecting best instruction set at compile time. ---- */ int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; #ifdef USEAVX512 count = bshuf_trans_bit_elem_AVX512(in, out, size, elem_size); #elif defined USEAVX2 count = bshuf_trans_bit_elem_AVX(in, out, size, elem_size); #elif defined(USESSE2) count = bshuf_trans_bit_elem_SSE(in, out, size, elem_size); #elif defined(USEARMNEON) count = bshuf_trans_bit_elem_NEON(in, out, size, elem_size); #else count = bshuf_trans_bit_elem_scal(in, out, size, elem_size); #endif return count; } int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size, const size_t elem_size) { int64_t count; #ifdef USEAVX512 count = bshuf_untrans_bit_elem_AVX512(in, out, size, elem_size); #elif defined USEAVX2 count = bshuf_untrans_bit_elem_AVX(in, out, size, elem_size); #elif defined(USESSE2) count = bshuf_untrans_bit_elem_SSE(in, out, size, elem_size); #elif defined(USEARMNEON) count = bshuf_untrans_bit_elem_NEON(in, out, size, elem_size); #else count = bshuf_untrans_bit_elem_scal(in, out, size, elem_size); #endif return count; } /* ---- Wrappers for implementing blocking ---- */ /* Wrap a function for processing a single block to process an entire buffer in * parallel. */ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, \ const size_t size, const size_t elem_size, size_t block_size, const int option) { omp_size_t ii = 0; int64_t err = 0; int64_t count, cum_count=0; size_t last_block_size; size_t leftover_bytes; size_t this_iter; char *last_in; char *last_out; ioc_chain C; ioc_init(&C, in, out); if (block_size == 0) { block_size = bshuf_default_block_size(elem_size); } if (block_size % BSHUF_BLOCKED_MULT) return -81; #if defined(_OPENMP) #pragma omp parallel for schedule(dynamic, 1) \ private(count) reduction(+ : cum_count) #endif for (ii = 0; ii < (omp_size_t)( size / block_size ); ii ++) { count = fun(&C, block_size, elem_size, option); if (count < 0) err = count; cum_count += count; } last_block_size = size % block_size; last_block_size = last_block_size - last_block_size % BSHUF_BLOCKED_MULT; if (last_block_size) { count = fun(&C, last_block_size, elem_size, option); if (count < 0) err = count; cum_count += count; } if (err < 0) return err; leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size; //this_iter; last_in = (char *) ioc_get_in(&C, &this_iter); ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes)); last_out = (char *) ioc_get_out(&C, &this_iter); ioc_set_next_out(&C, &this_iter, (void *) (last_out + leftover_bytes)); memcpy(last_out, last_in, leftover_bytes); ioc_destroy(&C); return cum_count + leftover_bytes; } /* Bitshuffle a single block. */ int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \ const size_t size, const size_t elem_size, const int option) { size_t this_iter; const void *in; void *out; int64_t count; in = ioc_get_in(C_ptr, &this_iter); ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size)); out = ioc_get_out(C_ptr, &this_iter); ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + size * elem_size)); count = bshuf_trans_bit_elem(in, out, size, elem_size); return count; } /* Bitunshuffle a single block. */ int64_t bshuf_bitunshuffle_block(ioc_chain* C_ptr, \ const size_t size, const size_t elem_size, const int option) { size_t this_iter; const void *in; void *out; int64_t count; in = ioc_get_in(C_ptr, &this_iter); ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size)); out = ioc_get_out(C_ptr, &this_iter); ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + size * elem_size)); count = bshuf_untrans_bit_elem(in, out, size, elem_size); return count; } /* Write a 64 bit unsigned integer to a buffer in big endian order. */ void bshuf_write_uint64_BE(void* buf, uint64_t num) { int ii; uint8_t* b = (uint8_t*) buf; uint64_t pow28 = 1 << 8; for (ii = 7; ii >= 0; ii--) { b[ii] = num % pow28; num = num / pow28; } } /* Read a 64 bit unsigned integer from a buffer big endian order. */ uint64_t bshuf_read_uint64_BE(void* buf) { int ii; uint8_t* b = (uint8_t*) buf; uint64_t num = 0, pow28 = 1 << 8, cp = 1; for (ii = 7; ii >= 0; ii--) { num += b[ii] * cp; cp *= pow28; } return num; } /* Write a 32 bit unsigned integer to a buffer in big endian order. */ void bshuf_write_uint32_BE(void* buf, uint32_t num) { int ii; uint8_t* b = (uint8_t*) buf; uint32_t pow28 = 1 << 8; for (ii = 3; ii >= 0; ii--) { b[ii] = num % pow28; num = num / pow28; } } /* Read a 32 bit unsigned integer from a buffer big endian order. */ uint32_t bshuf_read_uint32_BE(const void* buf) { int ii; uint8_t* b = (uint8_t*) buf; uint32_t num = 0, pow28 = 1 << 8, cp = 1; for (ii = 3; ii >= 0; ii--) { num += b[ii] * cp; cp *= pow28; } return num; } /* ---- Public functions ---- * * See header file for description and usage. * */ size_t bshuf_default_block_size(const size_t elem_size) { // This function needs to be absolutely stable between versions. // Otherwise encoded data will not be decodable. size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size; // Ensure it is a required multiple. block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT; return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK); } int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size) { return bshuf_blocked_wrap_fun(&bshuf_bitshuffle_block, in, out, size, elem_size, block_size, 0/*option*/); } int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size) { return bshuf_blocked_wrap_fun(&bshuf_bitunshuffle_block, in, out, size, elem_size, block_size, 0/*option*/); } #undef TRANS_BIT_8X8 #undef TRANS_ELEM_TYPE #undef MAX #undef CHECK_MULT_EIGHT #undef CHECK_ERR_FREE #undef USESSE2 #undef USEAVX2 bitshuffle-0.5.1/src/bitshuffle_core.h000066400000000000000000000106651434025530100177600ustar00rootroot00000000000000/* * Bitshuffle - Filter for improving compression of typed binary data. * * This file is part of Bitshuffle * Author: Kiyoshi Masui * Website: http://www.github.com/kiyo-masui/bitshuffle * Created: 2014 * * See LICENSE file for details about copyright and rights to use. * * * Header File * * Worker routines return an int64_t which is the number of bytes processed * if positive or an error code if negative. * * Error codes: * -1 : Failed to allocate memory. * -11 : Missing SSE. * -12 : Missing AVX. * -13 : Missing Arm Neon. * -14 : Missing AVX512. * -80 : Input size not a multiple of 8. * -81 : block_size not multiple of 8. * -91 : Decompression error, wrong number of bytes processed. * -1YYY : Error internal to compression routine with error code -YYY. */ #ifndef BITSHUFFLE_CORE_H #define BITSHUFFLE_CORE_H // We assume GNU g++ defining `__cplusplus` has stdint.h #if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus) #include #else typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; typedef signed int int32_t; typedef unsigned long long uint64_t; typedef long long int64_t; #endif #include // These are usually set in the setup.py. #ifndef BSHUF_VERSION_MAJOR #define BSHUF_VERSION_MAJOR 0 #define BSHUF_VERSION_MINOR 4 #define BSHUF_VERSION_POINT 0 #endif #ifdef __cplusplus extern "C" { #endif /* --- bshuf_using_SSE2 ---- * * Whether routines where compiled with the SSE2 instruction set. * * Returns * ------- * 1 if using SSE2, 0 otherwise. * */ int bshuf_using_SSE2(void); /* ---- bshuf_using_NEON ---- * * Whether routines where compiled with the NEON instruction set. * * Returns * ------- * 1 if using NEON, 0 otherwise. * */ int bshuf_using_NEON(void); /* ---- bshuf_using_AVX2 ---- * * Whether routines where compiled with the AVX2 instruction set. * * Returns * ------- * 1 if using AVX2, 0 otherwise. * */ int bshuf_using_AVX2(void); /* ---- bshuf_using_AVX512 ---- * * Whether routines where compiled with the AVX512 instruction set. * * Returns * ------- * 1 if using AVX512, 0 otherwise. * */ int bshuf_using_AVX512(void); /* ---- bshuf_default_block_size ---- * * The default block size as function of element size. * * This is the block size used by the blocked routines (any routine * taking a *block_size* argument) when the block_size is not provided * (zero is passed). * * The results of this routine are guaranteed to be stable such that * shuffled/compressed data can always be decompressed. * * Parameters * ---------- * elem_size : element size of data to be shuffled/compressed. * */ size_t bshuf_default_block_size(const size_t elem_size); /* ---- bshuf_bitshuffle ---- * * Bitshuffle the data. * * Transpose the bits within elements, in blocks of *block_size* * elements. * * Parameters * ---------- * in : input buffer, must be of size * elem_size bytes * out : output buffer, must be of size * elem_size bytes * size : number of elements in input * elem_size : element size of typed data * block_size : Do transpose in blocks of this many elements. Pass 0 to * select automatically (recommended). * * Returns * ------- * number of bytes processed, negative error-code if failed. * */ int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size); /* ---- bshuf_bitunshuffle ---- * * Unshuffle bitshuffled data. * * Untranspose the bits within elements, in blocks of *block_size* * elements. * * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size* * must match the parameters used to shuffle the data. * * Parameters * ---------- * in : input buffer, must be of size * elem_size bytes * out : output buffer, must be of size * elem_size bytes * size : number of elements in input * elem_size : element size of typed data * block_size : Do transpose in blocks of this many elements. Pass 0 to * select automatically (recommended). * * Returns * ------- * number of bytes processed, negative error-code if failed. * */ int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size); #ifdef __cplusplus } // extern "C" #endif #endif // BITSHUFFLE_CORE_H bitshuffle-0.5.1/src/bitshuffle_internals.h000066400000000000000000000042711434025530100210230ustar00rootroot00000000000000/* * Bitshuffle - Filter for improving compression of typed binary data. * * This file is part of Bitshuffle * Author: Kiyoshi Masui * Website: http://www.github.com/kiyo-masui/bitshuffle * Created: 2014 * * See LICENSE file for details about copyright and rights to use. */ #ifndef BITSHUFFLE_INTERNALS_H #define BITSHUFFLE_INTERNALS_H // We assume GNU g++ defining `__cplusplus` has stdint.h #if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus) #include #else typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; typedef signed int int32_t; typedef unsigned long long uint64_t; typedef long long int64_t; #endif #include #include "iochain.h" // Constants. #ifndef BSHUF_MIN_RECOMMEND_BLOCK #define BSHUF_MIN_RECOMMEND_BLOCK 128 #define BSHUF_BLOCKED_MULT 8 // Block sizes must be multiple of this. #define BSHUF_TARGET_BLOCK_SIZE_B 8192 #endif // Macros. #define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; } #ifdef __cplusplus extern "C" { #endif /* ---- Utility functions for internal use only ---- */ int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, const size_t elem_size); /* Read a 32 bit unsigned integer from a buffer big endian order. */ uint32_t bshuf_read_uint32_BE(const void* buf); /* Write a 32 bit unsigned integer to a buffer in big endian order. */ void bshuf_write_uint32_BE(void* buf, uint32_t num); int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size, const size_t elem_size); /* Function definition for worker functions that process a single block. */ typedef int64_t (*bshufBlockFunDef)(ioc_chain* C_ptr, const size_t size, const size_t elem_size, const int option); /* Wrap a function for processing a single block to process an entire buffer in * parallel. */ int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, const size_t size, const size_t elem_size, size_t block_size, const int option); #ifdef __cplusplus } // extern "C" #endif #endif // BITSHUFFLE_INTERNALS_H bitshuffle-0.5.1/src/bshuf_h5filter.c000066400000000000000000000200711434025530100175110ustar00rootroot00000000000000/* * Bitshuffle HDF5 filter * * This file is part of Bitshuffle * Author: Kiyoshi Masui * Website: http://www.github.com/kiyo-masui/bitshuffle * Created: 2014 * * See LICENSE file for details about copyright and rights to use. * */ #include "bitshuffle.h" #include "bshuf_h5filter.h" #define PUSH_ERR(func, minor, str) \ H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str) // Prototypes from bitshuffle.c void bshuf_write_uint64_BE(void* buf, uint64_t num); uint64_t bshuf_read_uint64_BE(void* buf); void bshuf_write_uint32_BE(void* buf, uint32_t num); uint32_t bshuf_read_uint32_BE(const void* buf); // Only called on compression, not on reverse. herr_t bshuf_h5_set_local(hid_t dcpl, hid_t type, hid_t space){ herr_t r; size_t ii; unsigned int elem_size; unsigned int flags; size_t nelements = 8; size_t nelem_max = 11; unsigned values[] = {0,0,0,0,0,0,0,0,0,0,0}; unsigned tmp_values[] = {0,0,0,0,0,0,0,0}; char msg[80]; r = H5Pget_filter_by_id2(dcpl, BSHUF_H5FILTER, &flags, &nelements, tmp_values, 0, NULL, NULL); if(r<0) return -1; // First 3 slots reserved. Move any passed options to higher addresses. for (ii=0; ii < nelements && ii + 3 < nelem_max; ii++) { values[ii + 3] = tmp_values[ii]; } nelements = 3 + nelements; values[0] = BSHUF_VERSION_MAJOR; values[1] = BSHUF_VERSION_MINOR; elem_size = H5Tget_size(type); if(elem_size <= 0) { PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, "Invalid element size."); return -1; } values[2] = elem_size; // Validate user supplied arguments. if (nelements > 3) { if (values[3] % 8 || values[3] < 0) { sprintf(msg, "Error in bitshuffle. Invalid block size: %d.", values[3]); PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, msg); return -1; } } if (nelements > 4) { switch (values[4]) { case 0: break; case BSHUF_H5_COMPRESS_LZ4: break; #ifdef ZSTD_SUPPORT case BSHUF_H5_COMPRESS_ZSTD: break; #endif default: PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, "Invalid bitshuffle compression."); } } r = H5Pmodify_filter(dcpl, BSHUF_H5FILTER, flags, nelements, values); if(r<0) return -1; return 1; } size_t bshuf_h5_filter(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], size_t nbytes, size_t *buf_size, void **buf) { size_t size, elem_size; int err = -1; char msg[80]; size_t block_size = 0; size_t buf_size_out, nbytes_uncomp, nbytes_out; char* in_buf = *buf; void *out_buf; if (cd_nelmts < 3) { PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, "Not enough parameters."); return 0; } elem_size = cd_values[2]; #ifdef ZSTD_SUPPORT const int comp_lvl = cd_values[5]; #endif // User specified block size. if (cd_nelmts > 3) block_size = cd_values[3]; if (block_size == 0) block_size = bshuf_default_block_size(elem_size); #ifndef ZSTD_SUPPORT if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) { PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, "ZSTD compression filter chosen but ZSTD support not installed."); return 0; } #endif // Compression in addition to bitshuffle. if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) { if (flags & H5Z_FLAG_REVERSE) { // First eight bytes is the number of bytes in the output buffer, // little endian. nbytes_uncomp = bshuf_read_uint64_BE(in_buf); // Override the block size with the one read from the header. block_size = bshuf_read_uint32_BE((const char*) in_buf + 8) / elem_size; // Skip over the header. in_buf += 12; buf_size_out = nbytes_uncomp; } else { nbytes_uncomp = nbytes; // Pick which compressions library to use if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) { buf_size_out = bshuf_compress_lz4_bound(nbytes_uncomp / elem_size, elem_size, block_size) + 12; } #ifdef ZSTD_SUPPORT else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) { buf_size_out = bshuf_compress_zstd_bound(nbytes_uncomp / elem_size, elem_size, block_size) + 12; } #endif } } else { nbytes_uncomp = nbytes; buf_size_out = nbytes; } // TODO, remove this restriction by memcopying the extra. if (nbytes_uncomp % elem_size) { PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, "Non integer number of elements."); return 0; } size = nbytes_uncomp / elem_size; out_buf = malloc(buf_size_out); if (out_buf == NULL) { PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, "Could not allocate output buffer."); return 0; } if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) { if (flags & H5Z_FLAG_REVERSE) { // Bit unshuffle/decompress. // Pick which compressions library to use if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) { err = bshuf_decompress_lz4(in_buf, out_buf, size, elem_size, block_size); } #ifdef ZSTD_SUPPORT else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) { err = bshuf_decompress_zstd(in_buf, out_buf, size, elem_size, block_size); } #endif nbytes_out = nbytes_uncomp; } else { // Bit shuffle/compress. // Write the header, described in // http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf. // Technically we should be using signed integers instead of // unsigned ones, however for valid inputs (positive numbers) these // have the same representation. bshuf_write_uint64_BE(out_buf, nbytes_uncomp); bshuf_write_uint32_BE((char*) out_buf + 8, block_size * elem_size); if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) { err = bshuf_compress_lz4(in_buf, (char*) out_buf + 12, size, elem_size, block_size); } #ifdef ZSTD_SUPPORT else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) { err = bshuf_compress_zstd(in_buf, (char*) out_buf + 12, size, elem_size, block_size, comp_lvl); } #endif nbytes_out = err + 12; } } else { if (flags & H5Z_FLAG_REVERSE) { // Bit unshuffle. err = bshuf_bitunshuffle(in_buf, out_buf, size, elem_size, block_size); } else { // Bit shuffle. err = bshuf_bitshuffle(in_buf, out_buf, size, elem_size, block_size); } nbytes_out = nbytes; } //printf("nb_in %d, nb_uncomp %d, nb_out %d, buf_out %d, block %d\n", //nbytes, nbytes_uncomp, nbytes_out, buf_size_out, block_size); if (err < 0) { sprintf(msg, "Error in bitshuffle with error code %d.", err); PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, msg); free(out_buf); return 0; } else { free(*buf); *buf = out_buf; *buf_size = buf_size_out; return nbytes_out; } } H5Z_class_t bshuf_H5Filter[1] = {{ H5Z_CLASS_T_VERS, (H5Z_filter_t)(BSHUF_H5FILTER), 1, 1, "bitshuffle; see https://github.com/kiyo-masui/bitshuffle", NULL, (H5Z_set_local_func_t)(bshuf_h5_set_local), (H5Z_func_t)(bshuf_h5_filter) }}; int bshuf_register_h5filter(void){ int retval; retval = H5Zregister(bshuf_H5Filter); if(retval<0){ PUSH_ERR("bshuf_register_h5filter", H5E_CANTREGISTER, "Can't register bitshuffle filter"); } return retval; } bitshuffle-0.5.1/src/bshuf_h5filter.h000066400000000000000000000031141434025530100175150ustar00rootroot00000000000000/* * Bitshuffle HDF5 filter * * This file is part of Bitshuffle * Author: Kiyoshi Masui * Website: http://www.github.com/kiyo-masui/bitshuffle * Created: 2014 * * See LICENSE file for details about copyright and rights to use. * * * Header File * * Filter Options * -------------- * block_size (option slot 0) : integer (optional) * What block size to use (in elements not bytes). Default is 0, * for which bitshuffle will pick a block size with a target of 8kb. * Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4 * Whether to apply LZ4 compression to the data after bitshuffling. * This is much faster than applying compression as a second filter * because it is done when the small block of data is already in the * L1 cache. * * For LZ4 compression, the compressed format of the data is the same as * for the normal LZ4 filter described in * http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf. * */ #ifndef BSHUF_H5FILTER_H #define BSHUF_H5FILTER_H #ifdef __cplusplus extern "C" { #endif #define H5Z_class_t_vers 2 #include "hdf5.h" #define BSHUF_H5FILTER 32008 #define BSHUF_H5_COMPRESS_LZ4 2 #define BSHUF_H5_COMPRESS_ZSTD 3 extern H5Z_class_t bshuf_H5Filter[1]; /* ---- bshuf_register_h5filter ---- * * Register the bitshuffle HDF5 filter within the HDF5 library. * * Call this before using the bitshuffle HDF5 filter from C unless * using dynamically loaded filters. * */ int bshuf_register_h5filter(void); #ifdef __cplusplus } // extern "C" #endif #endif // BSHUF_H5FILTER_H bitshuffle-0.5.1/src/bshuf_h5plugin.c000066400000000000000000000007461434025530100175310ustar00rootroot00000000000000/* * Dynamically loaded filter plugin for HDF5 Bitshuffle filter. * * This file is part of Bitshuffle * Author: Kiyoshi Masui * Website: http://www.github.com/kiyo-masui/bitshuffle * Created: 2014 * * See LICENSE file for details about copyright and rights to use. * */ #include "bshuf_h5filter.h" #include "H5PLextern.h" H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;} const void* H5PLget_plugin_info(void) {return bshuf_H5Filter;} bitshuffle-0.5.1/src/hdf5_dl.c000066400000000000000000000246741434025530100161220ustar00rootroot00000000000000# /*########################################################################## # # Copyright (c) 2019 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ###########################################################################*/ /* This provides replacement for HDF5 functions/variables used by filters. * * Those replacement provides no-op functions by default and if init_filter * is called it provides access to HDF5 functions/variables through dynamic * loading. * This is useful on Linux/macOS to avoid linking the plugin with a dedicated * HDF5 library. */ #include #include #include #include "hdf5.h" /*Function types*/ /*H5*/ typedef herr_t (*DL_func_H5open)(void); /*H5E*/ typedef herr_t (* DL_func_H5Epush1)( const char *file, const char *func, unsigned line, H5E_major_t maj, H5E_minor_t min, const char *str); typedef herr_t (* DL_func_H5Epush2)( hid_t err_stack, const char *file, const char *func, unsigned line, hid_t cls_id, hid_t maj_id, hid_t min_id, const char *msg, ...); /*H5P*/ typedef herr_t (* DL_func_H5Pget_filter_by_id2)(hid_t plist_id, H5Z_filter_t id, unsigned int *flags/*out*/, size_t *cd_nelmts/*out*/, unsigned cd_values[]/*out*/, size_t namelen, char name[]/*out*/, unsigned *filter_config/*out*/); typedef int (* DL_func_H5Pget_chunk)( hid_t plist_id, int max_ndims, hsize_t dim[]/*out*/); typedef herr_t (* DL_func_H5Pmodify_filter)( hid_t plist_id, H5Z_filter_t filter, unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[/*cd_nelmts*/]); /*H5T*/ typedef size_t (* DL_func_H5Tget_size)( hid_t type_id); typedef H5T_class_t (* DL_func_H5Tget_class)(hid_t type_id); typedef hid_t (* DL_func_H5Tget_super)(hid_t type); typedef herr_t (* DL_func_H5Tclose)(hid_t type_id); /*H5Z*/ typedef herr_t (* DL_func_H5Zregister)( const void *cls); static struct { /*H5*/ DL_func_H5open H5open; /*H5E*/ DL_func_H5Epush1 H5Epush1; DL_func_H5Epush2 H5Epush2; /*H5P*/ DL_func_H5Pget_filter_by_id2 H5Pget_filter_by_id2; DL_func_H5Pget_chunk H5Pget_chunk; DL_func_H5Pmodify_filter H5Pmodify_filter; /*H5T*/ DL_func_H5Tget_size H5Tget_size; DL_func_H5Tget_class H5Tget_class; DL_func_H5Tget_super H5Tget_super; DL_func_H5Tclose H5Tclose; /*H5T*/ DL_func_H5Zregister H5Zregister; } DL_H5Functions = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; static struct { /*HDF5 variables*/ void *h5e_cantregister_ptr; void *h5e_callback_ptr; void *h5e_pline_ptr; void *h5e_err_cls_ptr; } H5Variables_ptr = { NULL, NULL, NULL, NULL}; /*HDF5 variables*/ hid_t H5E_CANTREGISTER_g = -1; hid_t H5E_CALLBACK_g = -1; hid_t H5E_PLINE_g = -1; hid_t H5E_ERR_CLS_g = -1; static bool is_init = false; /* * Try to find a symbol within a library * * handle: Handle to the library * symbol: Symbol to look for * Returns: a pointer to the symbol or NULL * if the symbol can't be found */ void *find_sym(void *handle, const char *symbol) { void *ret = NULL, *err = NULL; dlerror(); /* clear error code */ ret = dlsym(handle, symbol); if(ret != NULL && (err = dlerror()) == NULL) return ret; else return NULL; } /* * Check that all symbols have been loaded * * Returns: -1 if an error occured, 0 for success */ int check_symbols() { if(DL_H5Functions.H5open == NULL) return -1; /*H5E*/ if(DL_H5Functions.H5Epush1 == NULL) return -1; if(DL_H5Functions.H5Epush2 == NULL) return -1; /*H5P*/ if(DL_H5Functions.H5Pget_filter_by_id2 == NULL) return -1; if(DL_H5Functions.H5Pget_chunk == NULL) return -1; if(DL_H5Functions.H5Pmodify_filter == NULL) return -1; /*H5T*/ if(DL_H5Functions.H5Tget_size == NULL) return -1; if(DL_H5Functions.H5Tget_class == NULL) return -1; if(DL_H5Functions.H5Tget_super == NULL) return -1; if(DL_H5Functions.H5Tclose == NULL) return -1; /*H5Z*/ if(DL_H5Functions.H5Zregister == NULL) return -1; /*Variables*/ if(H5Variables_ptr.h5e_cantregister_ptr == NULL) return -1; if(H5Variables_ptr.h5e_callback_ptr == NULL) return -1; if(H5Variables_ptr.h5e_pline_ptr == NULL) return -1; if(H5Variables_ptr.h5e_err_cls_ptr == NULL) return -1; return 0; } /* Initialize the dynamic loading of symbols and register the plugin * * libname: Name of the DLL from which to load libHDF5 symbols * Returns: -1 if an error occured, 0 for success */ int init_filter(const char *libname) { int retval = -1; void *handle = NULL; handle = dlopen(libname, RTLD_LAZY | RTLD_LOCAL); if (handle != NULL) { /*H5*/ if(DL_H5Functions.H5open == NULL) // find_sym will return NULL if it fails so no need to check return ptr DL_H5Functions.H5open = (DL_func_H5open)find_sym(handle, "H5open"); /*H5E*/ if(DL_H5Functions.H5Epush1 == NULL) DL_H5Functions.H5Epush1 = (DL_func_H5Epush1)find_sym(handle, "H5Epush1"); if(DL_H5Functions.H5Epush2 == NULL) DL_H5Functions.H5Epush2 = (DL_func_H5Epush2)find_sym(handle, "H5Epush2"); /*H5P*/ if(DL_H5Functions.H5Pget_filter_by_id2 == NULL) DL_H5Functions.H5Pget_filter_by_id2 = (DL_func_H5Pget_filter_by_id2)find_sym(handle, "H5Pget_filter_by_id2"); if(DL_H5Functions.H5Pget_chunk == NULL) DL_H5Functions.H5Pget_chunk = (DL_func_H5Pget_chunk)find_sym(handle, "H5Pget_chunk"); if(DL_H5Functions.H5Pmodify_filter == NULL) DL_H5Functions.H5Pmodify_filter = (DL_func_H5Pmodify_filter)find_sym(handle, "H5Pmodify_filter"); /*H5T*/ if(DL_H5Functions.H5Tget_size == NULL) DL_H5Functions.H5Tget_size = (DL_func_H5Tget_size)find_sym(handle, "H5Tget_size"); if(DL_H5Functions.H5Tget_class == NULL) DL_H5Functions.H5Tget_class = (DL_func_H5Tget_class)find_sym(handle, "H5Tget_class"); if(DL_H5Functions.H5Tget_super == NULL) DL_H5Functions.H5Tget_super = (DL_func_H5Tget_super)find_sym(handle, "H5Tget_super"); if(DL_H5Functions.H5Tclose == NULL) DL_H5Functions.H5Tclose = (DL_func_H5Tclose)find_sym(handle, "H5Tclose"); /*H5Z*/ if(DL_H5Functions.H5Zregister == NULL) DL_H5Functions.H5Zregister = (DL_func_H5Zregister)find_sym(handle, "H5Zregister"); /*Variables*/ if(H5Variables_ptr.h5e_cantregister_ptr == NULL) H5Variables_ptr.h5e_cantregister_ptr = find_sym(handle, "H5E_CANTREGISTER_g"); if(H5Variables_ptr.h5e_callback_ptr == NULL) H5Variables_ptr.h5e_callback_ptr = find_sym(handle, "H5E_CALLBACK_g"); if(H5Variables_ptr.h5e_pline_ptr == NULL) H5Variables_ptr.h5e_pline_ptr = find_sym(handle, "H5E_PLINE_g"); if(H5Variables_ptr.h5e_err_cls_ptr == NULL) H5Variables_ptr.h5e_err_cls_ptr = find_sym(handle, "H5E_ERR_CLS_g"); retval = check_symbols(); if(!retval) { H5E_CANTREGISTER_g = *((hid_t *)H5Variables_ptr.h5e_cantregister_ptr); H5E_CALLBACK_g = *((hid_t *)H5Variables_ptr.h5e_callback_ptr); H5E_PLINE_g = *((hid_t *)H5Variables_ptr.h5e_pline_ptr); H5E_ERR_CLS_g = *((hid_t *)H5Variables_ptr.h5e_err_cls_ptr); is_init = true; } } return retval; }; #define CALL(fallback, func, ...)\ if(DL_H5Functions.func != NULL) {\ return DL_H5Functions.func(__VA_ARGS__);\ } else {\ return fallback;\ } /*Function wrappers*/ /*H5*/ herr_t H5open(void) { CALL(0, H5open) }; /*H5E*/ herr_t H5Epush1(const char *file, const char *func, unsigned line, H5E_major_t maj, H5E_minor_t min, const char *str) { CALL(0, H5Epush1, file, func, line, maj, min, str) } herr_t H5Epush2(hid_t err_stack, const char *file, const char *func, unsigned line, hid_t cls_id, hid_t maj_id, hid_t min_id, const char *fmt, ...) { if(DL_H5Functions.H5Epush2 != NULL) { /* Avoid using variadic: convert fmt+ ... to a message sting */ va_list ap; char msg_string[256]; /*Buffer hopefully wide enough*/ va_start(ap, fmt); vsnprintf(msg_string, sizeof(msg_string), fmt, ap); msg_string[sizeof(msg_string) - 1] = '\0'; va_end(ap); return DL_H5Functions.H5Epush2(err_stack, file, func, line, cls_id, maj_id, min_id, msg_string); } else { return 0; } } /*H5P*/ herr_t H5Pget_filter_by_id2(hid_t plist_id, H5Z_filter_t id, unsigned int *flags/*out*/, size_t *cd_nelmts/*out*/, unsigned cd_values[]/*out*/, size_t namelen, char name[]/*out*/, unsigned *filter_config/*out*/) { CALL(0, H5Pget_filter_by_id2, plist_id, id, flags, cd_nelmts, cd_values, namelen, name, filter_config) } int H5Pget_chunk(hid_t plist_id, int max_ndims, hsize_t dim[]/*out*/) { CALL(0, H5Pget_chunk, plist_id, max_ndims, dim) } herr_t H5Pmodify_filter(hid_t plist_id, H5Z_filter_t filter, unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[/*cd_nelmts*/]) { CALL(0, H5Pmodify_filter, plist_id, filter, flags, cd_nelmts, cd_values) } /*H5T*/ size_t H5Tget_size(hid_t type_id) { CALL(0, H5Tget_size, type_id) } H5T_class_t H5Tget_class(hid_t type_id) { CALL(H5T_NO_CLASS, H5Tget_class, type_id) } hid_t H5Tget_super(hid_t type) { CALL(0, H5Tget_super, type) } herr_t H5Tclose(hid_t type_id) { CALL(0, H5Tclose, type_id) } /*H5Z*/ herr_t H5Zregister(const void *cls) { CALL(-1, H5Zregister, cls) } bitshuffle-0.5.1/src/iochain.c000066400000000000000000000046631434025530100162230ustar00rootroot00000000000000/* * IOchain - Distribute a chain of dependent IO events among threads. * * This file is part of Bitshuffle * Author: Kiyoshi Masui * Website: http://www.github.com/kiyo-masui/bitshuffle * Created: 2014 * * See LICENSE file for details about copyright and rights to use. * */ #include #include "iochain.h" void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0) { #ifdef _OPENMP omp_init_lock(&C->next_lock); for (size_t ii = 0; ii < IOC_SIZE; ii ++) { omp_init_lock(&(C->in_pl[ii].lock)); omp_init_lock(&(C->out_pl[ii].lock)); } #endif C->next = 0; C->in_pl[0].ptr = in_ptr_0; C->out_pl[0].ptr = out_ptr_0; } void ioc_destroy(ioc_chain *C) { #ifdef _OPENMP omp_destroy_lock(&C->next_lock); for (size_t ii = 0; ii < IOC_SIZE; ii ++) { omp_destroy_lock(&(C->in_pl[ii].lock)); omp_destroy_lock(&(C->out_pl[ii].lock)); } #endif } const void * ioc_get_in(ioc_chain *C, size_t *this_iter) { #ifdef _OPENMP omp_set_lock(&C->next_lock); #pragma omp flush #endif *this_iter = C->next; C->next ++; #ifdef _OPENMP omp_set_lock(&(C->in_pl[*this_iter % IOC_SIZE].lock)); omp_set_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock)); omp_set_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock)); omp_unset_lock(&C->next_lock); #endif return C->in_pl[*this_iter % IOC_SIZE].ptr; } void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr) { C->in_pl[(*this_iter + 1) % IOC_SIZE].ptr = in_ptr; #ifdef _OPENMP omp_unset_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock)); #endif } void * ioc_get_out(ioc_chain *C, size_t *this_iter) { #ifdef _OPENMP omp_set_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock)); #pragma omp flush #endif void *out_ptr = C->out_pl[*this_iter % IOC_SIZE].ptr; #ifdef _OPENMP omp_unset_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock)); #endif return out_ptr; } void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) { C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr; #ifdef _OPENMP omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock)); // *in_pl[this_iter]* lock released at the end of the iteration to avoid being // overtaken by previous threads and having *out_pl[this_iter]* corrupted. // Especially worried about thread 0, iteration 0. omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock)); #endif } bitshuffle-0.5.1/src/iochain.h000066400000000000000000000050701434025530100162210ustar00rootroot00000000000000/* * IOchain - Distribute a chain of dependent IO events among threads. * * This file is part of Bitshuffle * Author: Kiyoshi Masui * Website: http://www.github.com/kiyo-masui/bitshuffle * Created: 2014 * * See LICENSE file for details about copyright and rights to use. * * * Header File * * Similar in concept to a queue. Each task includes reading an input * and writing output, but the location of the input/output (the pointers) * depend on the previous item in the chain. * * This is designed for parallelizing blocked compression/decompression IO, * where the destination of a compressed block depends on the compressed size * of all previous blocks. * * Implemented with OpenMP locks. * * * Usage * ----- * - Call `ioc_init` in serial block. * - Each thread should create a local variable *size_t this_iter* and * pass its address to all function calls. Its value will be set * inside the functions and is used to identify the thread. * - Each thread must call each of the `ioc_get*` and `ioc_set*` methods * exactly once per iteration, starting with `ioc_get_in` and ending * with `ioc_set_next_out`. * - The order (`ioc_get_in`, `ioc_set_next_in`, *work*, `ioc_get_out`, * `ioc_set_next_out`, *work*) is most efficient. * - Have each thread call `ioc_end_pop`. * - `ioc_get_in` is blocked until the previous entry's * `ioc_set_next_in` is called. * - `ioc_get_out` is blocked until the previous entry's * `ioc_set_next_out` is called. * - There are no blocks on the very first iteration. * - Call `ioc_destroy` in serial block. * - Safe for num_threads >= IOC_SIZE (but less efficient). * */ #ifndef IOCHAIN_H #define IOCHAIN_H #include #ifdef _OPENMP #include #endif #define IOC_SIZE 33 typedef struct ioc_ptr_and_lock { #ifdef _OPENMP omp_lock_t lock; #endif void *ptr; } ptr_and_lock; typedef struct ioc_const_ptr_and_lock { #ifdef _OPENMP omp_lock_t lock; #endif const void *ptr; } const_ptr_and_lock; typedef struct ioc_chain { #ifdef _OPENMP omp_lock_t next_lock; #endif size_t next; const_ptr_and_lock in_pl[IOC_SIZE]; ptr_and_lock out_pl[IOC_SIZE]; } ioc_chain; void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0); void ioc_destroy(ioc_chain *C); const void * ioc_get_in(ioc_chain *C, size_t *this_iter); void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr); void * ioc_get_out(ioc_chain *C, size_t *this_iter); void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr); #endif // IOCHAIN_H bitshuffle-0.5.1/src/lzf_h5plugin.c000066400000000000000000000016641434025530100172150ustar00rootroot00000000000000/* * Dynamically loaded filter plugin for HDF5 LZF filter. * * This file is part of Bitshuffle * Author: Kiyoshi Masui * Website: http://www.github.com/kiyo-masui/bitshuffle * Created: 2014 * * See LICENSE file for details about copyright and rights to use. * */ #define H5Z_class_t_vers 2 #include "lzf_filter.h" #include "H5PLextern.h" #include size_t lzf_filter(unsigned flags, size_t cd_nelmts, const unsigned cd_values[], size_t nbytes, size_t *buf_size, void **buf); herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space); H5Z_class_t lzf_H5Filter[1] = {{ H5Z_CLASS_T_VERS, (H5Z_filter_t)(H5PY_FILTER_LZF), 1, 1, "lzf", NULL, (H5Z_set_local_func_t)(lzf_set_local), (H5Z_func_t)(lzf_filter) }}; H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;} const void* H5PLget_plugin_info(void) {return lzf_H5Filter;} bitshuffle-0.5.1/tests/000077500000000000000000000000001434025530100150075ustar00rootroot00000000000000bitshuffle-0.5.1/tests/data/000077500000000000000000000000001434025530100157205ustar00rootroot00000000000000bitshuffle-0.5.1/tests/data/regression_0.1.3.h5000066400000000000000000003374171434025530100210740ustar00rootroot00000000000000HDF  `TREE(HEAPXcompressed8original HhTREEP8-@HEAP`~SNOD Hh((Pp Pp TREE8u|@xHEAP`{ rT bPLii2HzmV\yTa VjgU.]0Uo .+ n{.DB($sk jpW͞OD{EmttP_D&1eMGFόN?-Eעϰg{؄JV"6/:kMZHOXYS@B1.ga<5)cx uQ$_QdOi G8Fr:R +T'#(B1:?Kywc Gu7]0FN\8rEWq@ֆ^+=:>B14%KHѽtȒׯ,Zx^xI^ |pj3 ţ!^%5lFP\뫢uc,b˶!,*!Ү6'V>N5Sf`gPˢ5^zJOqև۔f¦ 3Yw2݊( L/cb.SӻI vn!Ǹ\+°޶s%sd vb-fvyd   "{`S.pXQ>w!#w;%cq6@B1kF1D=v2BQx`z.Gx"BL7T5.k#^ `c ˦3tr%@pzt4`q}.0S2vdc /݄){y^sT.Ax|zn=6 S5GJQKm)1j&BW#@B1JPG$`7Mb.MJS\߆Yʠ~+ [Nsc["f>bB1a`Wzך'uDhș_Ftnq=T 3ј#NB1- v=p)L;[B֏~f;KgV' ?I\WjjE*p5vͰ#FE;ATхؔ/ܥv3wfZ^wvў!X=*E}=6Sg5mK񎊥@Ed/J1d4J\gEt]q a2!X<[RiԞ 97zg1OTi `(٘)KS~ Ybi%xl8 Ĕh]j{L,q=MA 5|*er25Y{N^4 lӵBES<OyyQvy V p. zA@=t$3PPcԷsC`ta_c? 0Pg]"fxH9H_E\▎#ް ʼhtZ M=.Ӊ0-^;\JOR3Sgڧ2yJlt-xpX<$@;ϳ`UwEcme5Xy#8+he!S[r YG( Wo)4 ~F\ ?P\?k\6K$4X9>Y3 H-1ϐѴ|q&2=0(gpH)wF_>Sz͚BYL\Q~ގS"Dir)_,c+r|8^lQm:*lf#FnfJiaiԣQqk=S8@]&s I©IwMAAe>^^:ʐ_e┉QLԋ]ww>4$NxbupTEj,bJzsy,_{p:*[uqИ6$(Rfwrp6] @oZC8 !ú3)E8 3P_ K]Ы 9ٞ$^'SBL"4$ `-1ϐѴ|ŜUBW#C`b)ӳvuxy" 9Lizf+',l)¥w a,I#-#9RXikI9{͑r~t.ñ՘BQ嘡&.N¾Mrԝ]AbkѿȃQrѕاc{0; *Z`׻鄺g D% DЖSÔgc.~6 `~E@׮w'H'P&=dž'bۋO#܀TUxKQ~ly4ҹC αSP ϹNJXLV8{`s23L7 kCJ1І]rlW"i*0_ɉR4ү]Uɖn1 :q:%d8[񛖙Fȩgh ZcYٰ]y?rT h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@GrTTREEZAJCrT h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@(RrTTREEXZq3_᱊B>"\eMfjFHJ%v$koX[*iӵ2>Fl36yg}䑷N4K$!eޤp?(ރT0i[?ү=q{7qR 8G e5oIF7 @`'AH@*:pQyT/u!Z^J `@["v5ܘ[ɎB`B4tY&<9rw{*!sv?q2s,}0KXf3 '1Iɧ_`ԜM_b5mܹ̂uj(00Ibp ;rC!}Ep,6P3C"C4~k,|kDR`pcP$0Ŗ% m#t1!* (raQ UW k7瓫J"ȏXh:2n9X=8L=Nv)QHALx>+2O^W,ӠQH sSΫ] F"O}s9ͤ[CP7buHōz\+"9ưANJzm6XocDJG\iĎ,7٣IFzi? QRVx#l2ix :lE;9[I W VYUJp<׏vm'x)[nޫEzhybOWtd0!Bb (crQPxW=2Q P\rT h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@xdrTTREE _mIrT\cl8N\32/>@ m4Ү&s]^Xod[{m~ ~-`zV_e>wcXV~+Z-2H, IAuS}LwBW<+*N R1JJ˚JD\ȴvVI,cblYE8YJq4w,<ǰ-x_^㜾n̿z,䯺yJ/=[~Qu%,./A(m_ n)vkKsGzCEk=*k^}F%,QTvQW5܃n[qfc^֜,QʩdR3x#d |3_5UR="}znhЬx 2?sT3+]~3Ϝ/`wc-7ofy0V.q=OIAS<pjo_-^fũe!əecC \@Ss1 lřkN\_:z_9;u56L -]Cnr8ڡQg):qwg^bE`t:℄on}o*``HYJ`-{H <1\qdM}8zO5a-"ٻĝSGBZ6* ۻ~u=H+޳cP' qA1v%# mi-A ~iI6ud%޼?gD?ɢ/0;;'AU0nȅ]1+GY@nT< U _}(=4ujugcCj(AWᚲW6X7w!?{o& *LH{X#h6|nrH'mt:դYӐi"{VҵNp|yeW8:{vژ]Fp&Lvw1CwJn_;heߩfdJ@*,&x֝n@M6䘅dgRFUjP|UP/ܺ@Mѿr{@qds\B|"?-37}9FS A6-}(O,f"Ÿ00h%~G<&-]JETh4Ob=aF8HIT530ZcZ a}~?ZZse}]"{ōnb %%D|y/%sG Ƙc$(+Wg^v,ge?v#̯XʟU Ϗ(|~} (5 PqaPc2_@-¡JIW& Wy q As .&Jt婧OVxOkCga$bV`ɝ7.'[ɚ$}t>]z-NƂE22S *%( )vj^45Du@1 .lA_EB"mpZep1/ xpQ{tr: Z8鰇U&ҝ>rb~(5Wl~.lvysRy9|33|ܭTjRVY+ 'lAҩ{0 ’ayǴV'윉؂[z``)werT h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@XrTTREE:ʼnT䩢=!)N1WYQ]> {iH߭v (+7_Cr0W=߳kB{߇:r>i܏ 0'axCXtY~Sa0xTn6̲]e=̖[Ym)-w>7t2=,vQbr[OlತAZ'JM"Zzju.˂d;q@QӬj @'FT}}w|{v+֏/XwِB38o@&u1A ! ʪ1Rr%-XۃE77] G\܈6qF+wScsPӢ̆V}(,̙KMFw?(KvR " 8'nT<~puɭn&j&Dƌ{\>4-\?ȼnܳ 2 up1Qq]χvP3i4QH_յK783UqK8\*[p14.Jr/1?r>Jn/ЮT4aKMih_ X 4K^&A^Txx2a}^$ε\32ګHZ Q;%hTO .d#r}l Y;ypS~JDԮZL7^%!nIqSBV"aj,:+u"[UaOs4p`b8J=5%Lߚ 1x =j\٣Eruuorȕ9BSs$U->3>6-?V%ޕ! ;.BÄ!X0SΈ c(c5AQ%1LaL= F5 |x!@!1VXeާ%LJV2N*\45nlW,Ko(sv?-3X R:WdOFkc`̪98PEv>SҐö.W 0p6ݡDCW/hNWM@!ćb?P;[J+  jT-z,s%=|9lKr~2 %w/  ګĔXOSn>ga,mhD [eMN ['y?B/Km֕AT|ւ[#x}%9wgwHq=/xᔆXF6vCɫj=fz'Idcn`V^emD ;mԢx XG' jՒW I91{IA~5  Zt-!3~O~4AU@;BS꿛g}FA!Tv:^h2EmuѷBM^SRǰʥRVN+Ϯ2{@+ɝ,X[ȅC&n7%..–$P,Ԝ`p?:hRє:d[$Hz L%?>qb>MSeG[.}> R͚TB/f@lJ\ z{]?+i^I'a9 ,C?9gJrwD/;M$%X6^]TT -X_*}PP淬+}_YFz3y#+WiEnQ90@ G^XDgid>p/{?ØJ桌5kJb#s_0U/OdP zYjN$?pj%d('qO[8Whyod#g@ږwR'c @o.+?Vѡ{HxE3sNLK2Yi6jWJkIcz /[x !"yq090>M(!Bi\\='Y3" V͂YSYk`3T\ⶥTgg5^ >kk}bpX iΗGKQ;{0BLa"%|6Q8F?&3@?d CPN5 a AN+}<~ "}/0"}⒆ f~GZ(ser.(9%TVN8c z81ɪq*UyuHc@r9jm,6ihLR֬P~4єk*8eEz,h V-?3,GSKL3sgj\t,A=켇mזHAc&')];`b|(z > Q8Poe >or\c]e x6Xv՚fZ'v{?VQJ9 ׏z5,}ޕL3'ilbʴ2WNy8RP\f~6x>F9MŇTwDc4E[:9)9t8n h IM}aX* ▻oé^r8K0ӓa2`{\qPUԤEPs&6*wZLC(DrT h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@rTTREEh(D5 +j KdQY>P @X3W^D|Sgngsځ".-`{BTO0`0p,b,s'UQ".j)&"̹%k6jKō]H/JQHfiPjQ/v>{Cq0`( ]ϊ;\DB 'K,};eL /)i uI Qdeai*(f^7sG6E}؈e/ ŌMEx'OUIAye\_P\OV ڍ2~.f1M):(C$[(v 8`>`'Z PN!ǓE73#qҞO}(%OdN+/j_2MȑK#- 坘@C1¶1UX4!+TD QQ7tsgB[ZwP-ݟfr'o[;ߗa8Q!r>0:xg h݅ԡ՟wzZ/}OkLpGw#11|xg}$3 &U1b&Ÿv#Q*P-eN[ٕZ{rm|ݣo;,NG+z 1t.ƥ#X {r ,ARWbqe 9}N0l636& ;%U(l(O+Ags#H:Cߵcʨuлr}b/}pL6Eg ͆j3i51s[a1_Zn9 {vy$zS%UʱVW۠x&ׄ4DҭAa ACr3:]x@:EFF^G3՝t-c;,US-lu ;ϲ6<.!&"f3}SF[f^ۥ{M][t)hw!#^+Ү3CTREEXrm1JP$iP'YG>ҟhae:5LfT̮O$Պ:?]ԗѷmvPUͮΧ#.i"QSivi8\El"}Xw@ŌKUŒV`ܮ@4\Rx}m$ pbت?P(KZ@ {n1`%iBk5{_D)ǍZO=C2^{;NO*f O3 \]6qdqs (:QaEei i\j'8*,*a~}]͎D I&"*PSmti J!F_lx]UK2ri\õ 9s(gpN.PxXrԌVuT 6dbLQndw\]o).8Bgx )c(|(Ra ta_qlsf#0EULDIчMAi@-]4a M$>>΃hw~6aϒtԂ1c'6"ȅ-<L"lʎgpPv<\#o;c6dOE#U ?-J6uY D߷oV+M]>9Ĝ ] c4tVz9w(u$籈p rP8DnRh NIra=f(pfiv12AVUr y !gx)o ytqORMV8p|cajJpgLAw,ڿ8彂w V F^waӫ˕+h#|/(ǩa6nuHKy;Ei9W e|H7|PnGHctA41h)M1Jݼoe~C Ie=rZE/^y9@؊t<Qg;1P"M WCڶX,j_@)_J p&*P/00ֈ p%MgZ)ӈ\B<(Dg{  ;a`"GJɘzKC9Pݷ2!0?aRquLl,Po^Or1h$^+Ү3C(UN#܆ηɕ A%JJL Uڙ*'lV$Fr[Kbqù(d{I. 7Xؾweu0]E?"Jc!%o퇋fښM"s1u $FHs[op?&|rK6Z{dʮv t5"UVSsuJi!Ĝ^$+]3˘$:nXs3v$*_^1; 9@ 0o4:L"7mMi\0HC;\+@DH/K6KY)w>Ϧ+ h+ePɲ:f* &<#KĮNHЯqt|A-y,fR;QK鹇%b0EhkD4XۊRt(ޯHɫʠa:7aUAg>ms;r`4['Xw4>3o]MKo_*kr#77*@ò9ٯ,{F'(W"@R5r<]1+N5Pm1`_Tڻ@=M+DX!kLYt${)ܯNFm}-Oi5M?Vxa4,%i[anݹZLKv$IUۛqH ;RO:Ś/9Y bZbϲw +.2g_9KQOMKECl⨟&e YMXR B>jT-%aÀlShsgW HS1!_TU -eQJo܎.~ic=.)^邗Q|oe_%i]%WT XrC_-YM>s!XUF-`9hL"~ih,|cGH;{o6 SO ՗csIw5_Gk]g {L~ଘS-OYvHD?I+2hhOZ"\Q8,BrRA k18-okN֙~: 39nmq_eS9awOJgR1BfPҰ]:~dԀKg!1 XVy(@i]݄r/~\Ce@8xge]>d(ɿP_$=v`N挚3|t}RM&y48T˿ɓVBwDqGP:7i*V7 &g}3$,X^ h[nvUaKs<|4ϔz7T*e{"a[GP1mĜUl09pC2vS-bQlQcMU.v>h"[Iܣv-4 YU:2t'0XۆPzR݀'!}GGQ4jyºFPlqAЊqi`eĔYs02bK&NxGqDhe a<"R,d.x fJ\GPD同ȃ2 X"ԣt?)&G. /I'ҞؠD|&\iMq'HP%u'M$ʰi>YjȬ @4'v@ҍʋBlz T3Cd7/^ ,IViٵN8 WMjd=&*rw <}_v&mHp6 ɽ#?;Wp@zF[L^WP|zq^ZjBt( ^G_ wiTF\| &d(=DB4Dck _!<c ]Fϖf28JF?lXJrQcuV`ݼѪ-ПDJv<G0j<:Y)[c qjb+7'ty}_='psMP:#TޘNւEج+lFJ)x3T`Oi#*#_ Qw- >+bz,˃ʙIW(vjGlOp>;qtC|QGzH!b:\k1/ tqgUWW#\CPp0=(zPj;\2׹`qu4꩘? HϾ<jZMƏ0 "m5 ?\kxkƗTؐI܀Ox#_}6b}&YaR/][8=cr?v{2(C?߯I<<0PtmN1Aky ^0O{9MoӆI5U_WCx]U⠳-ean\t~L3Uwu< uF)'|bAIγaHUCodbg]M=Cn5 FKL Vnrr]o,$kȶ,otBOR8a?7\pKQh+71V>_}\FnJ8 uhȂ |>҃ňzxPp_ >K;K{ ki>}pl2ԝ^Ah́U3Kf R;a-`i M1N\By?rz=c#*7VQg<`|8=N݆[,4a?Ewh 6cU$8F*$=T^^AR2$&=ͺ KGKT(\3@b<ѵ,o̙EmLW `2f/mIۏ ıV Cw%sQAF'oþyZ. c[ ~?}e.c贅r"i|k7겒m ʥc eF@f̴$hR^[; Zq߾#P2K;-vYAXW'Ca"wy6 +gY%aX4-)<)~.y8fh,QH [c TV.~A}eRT\5:rjDHE^{{D[jv0K+֏P.;ˌWXC F͋B&@B]4a&v굷jAvӍ["ms,RDnͶP"EԱ/M}r6J)Y }|s/ Ǭ~E==)' *ʥr-$R%hRbx&J:(,֦TJCr?Qt,}\KY:DތnE1}62&&wLn@A:@uv G&XpügF} +/AU`Ez-9cė/h`4iiwAZV{/m5bsՉ,>qTqx-6mxԗSy )Bawv .k&TKBDU/^3! lG<>V-jZI1u'r$C!Z*aZA{Tumpؿ3C.W./t{g,tKv mu:h,=HO yYM}֙1G x X:YE-pb0V=?Jշtgm@v󰲯̟d!fZvX٣'/J3 ~tWM[OXTk7lE`BD GiR`1TF*$ ,/X V`w$ZLeۨ9# H<=xWP<=CdrlSg0 S4QZhlbW&g^q<+0 `ʎ^Kޟ]^ؔH.҈|UGZI-X fId~L/nDaB@3{#5t!mh usǥ޳*M. y +J53˼YLTS+c{v6#@Kj䫨q}F(YϤvF y;?q:m;kiŢbB!惂<޶\"ɬW7r2z5Fj բIZ*a~ң؊X% *ԛe[51F2׋yH'{M.FYhQX B0)BGQCDZtg۝xuIL>Yuhʏԯ`c噐QAcFnu҉arPog_F01fX7p/+ӿ; xl0G-0F|:ޡwi~^Q6?A:[\2%e}zZ ơJ}65&rQȿN.r O4G.kt3uWrƙ`{5;Yaoۖ.^.aNf,Н j<vЦx)z 2O`ɸl jZ\/f4]Jw2xoG?L%,8j;.YRcØ֧x3뭃UumD4:MMkgP' EtmG,:vh1_Q;sIwTBK2-_,8|L\6\ ŚI*3"τG]*V_7#L7 ge7ug $FvTO$ynW{}׼+0|y7\$!.Թijdev߼`FHfj-8g"uZI3ADDJ(ݼ/`L~t.fa !py@Y["%}&Nz՝ D={Iiq0QaK(  m^r﭂n66iCN%]{p Bi%ꂖ͚73ʹ%+d:WMĭ1e >25}q`V寧ysn yf"֌Va3oSmfM"K)]\9a&sIo nwуIHm^)s^ 7{cb*h߆p`u|_r`h PQPU}Y+{02qq+d ։x!eZK8BxkG:0'|Zs#ɕ;ax?l:i[ C1aa<?GO=M= @ٸn$aӹͻ&i^s~kɿSokűlޠ@!ƞ?;dnJ XF)cu \K :@;2fƧ}S㱡 ~yYCڹ+Aqq{FdĕD xl)٦E ~Oю\%i>NdQx6&ZFNe5 !?f)a`YY᎙6"u؍<ZGr ѳəB%]С\? w ",L!eW&=3$: 3I)l&hO̢yV[4^U)?r- 4b~zP+0ĉo K^u{o'8RMn1 xltyC5[ AcX_(~Λ"<4hM"?T|I0+^k7`TDR~ b?y&g_RT'd|CZ zF\f5r/:iA[hhAa%]wV 4b+Wo͍+C ˯)QYx/!k N73*N1@;N>4োc܇>J`E?oS ?=ߔc$qNB~ɵG D} yjQyXeiUM\#tT MaV&g* n,G5Zx_RP47[b&(i &^iNڷ+aR,moQ%0~v[A|EdG{ķV9~/h}F#-TjioWE~S/z*&{U41%yNI1Ժ#b?,F)V#5d0yRF'H<D Q?vnܷgTdKRQq&|PAhoBl Uw#PvN6@yQG c Y~;]d'OaSJO ~ T,i.eD?Py`m`2_k%y )ce겤 )+qA>" qٸ̖u;lo [PK953Ǻg(Dx [5ۼK} _hjFcڤ&_ϢgjzWIZ Y 0bn#2}ؤbtr:<Ս ݅!%Z>?qi]^KH"C{7n"21%. T}HJGM@pҺҐhDM?7wU8t)1 of7Ӕ:gKzPJ< iKP" va (w`qglc-q X;{'=;VWCmFX2F8k;cI$dT"3nCF3lX 1_}!@s0Rԋ)0`h=1K=`A=2V6gQkI=dߪz ( o-diP;C`r=#r(~#,ۣFp^T5R szV ToMn;gffs|)޿Z}\N9^nXԩ+@p0hG"M|d>/6 ` +zCԣmK5;%"i0Pݦ'L_E.{J'Oܐ4yOl?w7*3l(vyB+: ov<,b _ˡH&ys`tBc)?d%̇Ipc͟ji[s鵸 yql `%gO{^Kvb=yK$a $RWbDs Nnmte(hnմԙ 7 sd<,P(j`zWq>x  !oskvh dO ޷.%0|6-95"]}>GGFyS`?z.B$d+zcIcBrA\IniD7Q#op^Q5 Z6 f˲j, TP@a8ⳟan({rHט%x*(HO)E$2#ce*;NRꔐ ~oA|fAjjɿՠ .'\DU'L;YĜLS)bD:rmlcBۥ\D`Jݽ׹svZse(SO3kid>y{}RĬG[ /8Ň a4}d9>oh^׾*3 JuBU#ڀjU .d;|ͣ>A;L3lI> pabRZaVn] KQiu ՆP;9ⳈL<}1-'dm4>ɿu =5!2{x_a #d66woRЬҸ@UsCz] |}":*-6)aTL Dn ʦ +-ՇY3yࡻz(:;*޹YlG{}M'S3!C?cIGKIYb'͓ZC^0@%"Nn@;$$ %BatNvuǶb7/B2NPJS9.zeģ&=~Yv[Ӎq%oy'й֪/~5~sǮ 7 4h},uhMuPbr:v ;hFIx|p_r"G Y 5u*[kf3>z &k Rq-Pnݰ WKU洱0TUaţ%.IMM_|a7[27^Av1M&n(܋ac`isT! ؕAQ{%|<'bOrAٛN_\Gw~uS5mCJDJ!PN-IF <n-KEg6<;JHHa !mtZ.ky28F7` ^!e`H'KTw|_3]:OR59t }GNL Ur ?cf:$U[2g.W L>sR"z<&S/(""$YX+/OᖣY(-gC&rFPkJ|UbLDY '+]y-͚2:Kq }\(7Su=#9Pia#aX>1hy6E2'㍶`Yı4n[Y˴&/o.]hi(;rb?p%pa֫/#\6sH|rϢ͎ S >Sm8^&JGC7[$Cؕr7g\ψFM%Ӡ\d,"&30u;M y.0IYB̍>E/M $\xCB7#_TREEWmM*RrDNL~S\8lG9*~ t0-ǻtmMkKk!g"*-տd,pktYzѠIlg^je441SoG0İzrQ,{`-C靶y uPĉLoaGZnp~^DzrU0I[6U  ^=۠F\{Yc@px4`Y1:8 Dq]rlL"(f\vCN Yzi ahө5S!18b?ۋXP%5WwAPnqa171t2sk2uqWgnҤd-\ tђG_LCx BF\u.^?% 7};!VK,]ڨKV]+.s2M q YQ/ +mSطi=K׮J2i_ V{PQbκ 3w dʤN\6ip_lMѧh`]p?pdy%㬤ZL«4xKGҩ1w,;Htq#1zO3{mnt#bI7[wUlzB h?IC5p8ɇC>,E)۰,u?ULÃj`z^y:B C)0G"(]goC|pǁj{y[< 'ʸۉ K+o )w.MٿB N~+TѮ&j(߉۹W,$io[Uu[Yc"5vF}`j3uxo&Q&!CD` BM;{tY/ov,ʀZxϬWDPzИ>7˔DYArqz>i#YPwONgJN09RuUNCev!4 ޘ#qN\ .276&WS_1=ZHJBԠ^%1T 9 Su6)8|u%sԨ’&Dlf:fȩ.m}srT.ET^Qʧg uϓf@xp&ȃSǛa(٥xMmNC}odŮU}!,PNҭCRyO7fW/sha`|>j)ធQL O10鋋3 JMܵut"l'R<&5_nGJse͖' B6\^Hx#Ӑg3'uO?W2Xb``sؠ~n6:Hy@٭|/ *L/Brt+yJ3ʥft&1EGX~|u=hLֽ}xtM;њ~ L&al8驠ϺHr;*TLI 3N!wP ܮGUBΟ1$dٮ@ϋ*EHrkc<c+yA mfsgIX- QF؟:Lw&}l﮿=k2ZBPU R^MṅJ5[C`h͊:0/&[+?.Y )xgQ ӊx(3SEReU;_'_Cef󾵿S䅥ZepՆqN{ [MR2V4`3^(ͿaW93M<Zk{obBfuZ䶑n ]<Ye='%-Mo+\#  |2ó舢A`Jq]WBӶb>Nbho{R{͝/{9 MYoI8C&GZ=`+4.pLD9ˌ1ʉY25 }Vo~cN6fﺞ\JXkZ;Vhۇ쓷CWat`J龯*+oK53J~Ĝ&HGP?8צQ[}yȿflD-"5&T]C<&XVJ n@/ 7.|:&o:s/8B?r+ȕfz* P0ǧuL˚nO])B;A.~Wxw1] 4e~ `(;(&IbXyIԓ0YQ&dGsaQEϫxg/lepFѺ^f/ ,_Ϝ 19fI3|p [3;*rʙFR(%!  $nc`5xa]_;c,.LK-{'MA1y*cVVO3]&GpUuՒU'm@ ~/d,N}-S$Zak ڴ_cc5qQW ,)}Z5gO$‰q "_ ӽw΋*^yƐ~jCVlJζ+t\ g"#XD9)}b)$#nn=]=?. +G}'vkeDTa{ ilZ/DcqZ!"\{~A4#;A0 3W>eFrހ#Q`.si?5HٙVXr^![q@MՈ<%Gf2ߌz©qAvc<~:HΣt-Hb>AxD0e$7\Q7cJt`R n(^~\FJb@:0~UCŻer5*e\X A8C<}A5u T6)|0S:f7^h{Vf||%Uœt \xmQ'Qkg >3֦J[e (P0n߾_o=]UL9n=[0'\&ɢ$`qno #bc+^TI %  ]7zX0W8o 7}򕥚:ߌ8< .koel cCMnd;8Pٔ8L 3p}_!RzߣJ#WEU$0I z̒ 2 x>>um ukwO fÜ-,[w׋*^m>> Ưw|[7jXr#b4 UvS?j[`K$ccp]!9V!ԡW3Zdɱ3[̖|^~1MƑ?!tk|֧rzvx]o3ݾsXhUSN޴ǬqM?ئo*XBb}lN0Yi)4jjfPCԳ/'?^#^G9Lj.WR@z`Ws1΀ Gj-+oy͙<d$ 2->a,>z֦Y?#%IwW Sn7a6#x6\^K"d1iGb],orP3V`NK~BݒIRy>tYK?iEZx8cڡ?3-=jeWTؑ*wUFI%Y"Q" ĞZ: rTa1_a|S1_b|S1_ca2_a|S2_b|S2_ca3_a|S3_b|S3_ca4_a|S4_b|S4_ca6_a|S6_b|S6_ca8_a|S8_b|S8_ca10_a|S10_b|S10_cSNODP8h(zۚ  h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle @C rTa1_a|S1_b|S1_ca2_a|S2_b|S2_ca3_a|S3_b|S3_ca4_a|S4_b|S4_ca6_a|S6_b|S6_ca8_a|S8_b|S8_ca10_a|S10_b|S10_cSNODPHh8,} RwS|=OH SWH7^wF2+o׆МjA)=Jyx;;@*Dv#޵q9\"P8]p1X*Y$"P7`@KEN(1L}ȕ[ ŧ亪*P$e#PmDb~Bd`PEbCޢM,'ڇas38JsWj ̹$2"'w.5 mO-ZmB͢ӷnN3k^~1{g;<đSؓ};|ұrt!1 _2[7ԃz#d`TREEhs >sH`}&%aȷgU>E[ =vU (kϳ6=σ>'D ߽ܓ:S" Xv]oý@(uVoƫ4WdfO ҃`+,aiKivmV9Pr%>нsۏR;&" ,/DGFTC BZw%@nM r3t=T4OdeW ? ֎a=>=hDNEV,wbym9W4HՍs JT\KY@ ǁp"B;$tr>)b k|J` m_g$ǼiVAxn5:A`7kWZO6*5GdkOK_R;aSiv~}WC:HZ ΥryЇz/h8qUԑ9#rMv(D3k,Yjlyݸtwqΐt*W) ̔O#r1*i^o1 `ho9`oLH~߼z@ Qn+=suY@ȿ 'sJ'a+fo)&{NK?`IeK7VTͻaP bg9>6 s^%`5(\w*̚PV3F{C|Agф .u'HLzEq7[!V%wHs FojR+E:,$U ;c q1x^R]vD؀*7Rʪ _j -HoR`%c3w c:MQKY`JT5y2m^) u}ze?΋7g}gΥytJda#-k8Fs]I?2?D}`h,n񷮑c%(ݣ=$K;U]'tw7RsR#SÒ#h!{TB~5lφnt:vbHcCjkI( psr*A뒍8+oY0U뿸,j !͓xUMHc\4˜ ]=ϵc 1ɤ4hieJ"^v-dWg"Q$S2)5Iuwz{C J hБ!qf׊E" \)mK8兛jD:j%^~cĉUG&Sl5ʜ ɛ欀Jѯ.'XZW5zDCp}鱉kIuR 1W͝$ .&]PyIAZں .Z8{Ƈzއa6>.YQ&oYт9,-}SX9Xc,;vM|``!Ŭ43"MQiǘĢqCD3gc%ꂒƎ~!Jyead[UͺGqH-#m;EOĮ}}?u82k-x]֋OcaE5nnc:Ӥ qCM zgW*JBSdxt|OC33pU8aM*Fr.%}fy\(46Pĕ-AK+4P[,Wq}c_DSl P$a%LZQ(U.= $YWZAz̙ T)n>E6:7U[P7me[HcV#_"i%WQdbyjF$0 h@V:x? &sL1O^9 ns4{N <l(h]1; \]CJB`3 ^LyGKT*ׯRnf>fGS4_$eG"1?`X"K7c<%poh3I>}E#-<~^a3! e 𽶷+-ux>gaE5T?JkVxP"֪*iN'W?En^Ip87*%lY7s;R[)wϤ F3bp]FQf#;/ŢzZ2]sdY싋#ٿP \r$q8X5Ӯ" k jy {Ca֭~H-]^Hٕ:~|3oluGU/VxQ&}!z=;:DWQ\w03ϓ -~ a 5(3HQ7`*]1r߀IE H xkC%/(qA+p~[:H/fHiťqFdrfug@gmUx()1keU?*'W((#VƜ@\r">~T$=Seoޟ-%ɇ^)q*aTe/]ݗ=;\{I>>YUв,l pɱIO(aV2TZ7JYmC@[{ rS=;ZUfNّz}ZY~o^kg Bv ;43*I-VGvI/w۷ ETk 5VuewiP%}sROOzr$m"g+n!״2|o7Y%W>fqnKߪx|zB!+Ip[zBTnZi}H/|aSzQ{YvԽ\$,tCwI!zĞ2qXB~a}4y]] d }W>ޢ  ٙQtBPD5r!hstRPq, uܖHu#7e.]?24@OJ *aeGQO\/Lwc5@wA+#= MW3!'lEmzjQ+-e~$F# 5 lbֻJv궺=]'.glSX 4|Tlmƍ\eh_#mɎ*s>״{ASd vMv @ɈK^R^;1H(4+XUR [suD%dp_I2-܊̈́s7;K?&  G خێ&\O$V-M5rxY!ɪccj4lOh!ZeAVS?֞M o ]HIƗeGgw۲ _g]ؑG Auޚѱ8d9@N3d`L+/4ЕpJ yef22i,49C?ޭhSzs&j_ ⌜xPwf9+tN^>xbf>w8;L23jwpm6G×"gQ: ͪZuFTݼ,8Co؂p^L$}Swr]]*]/Hyr7(^=2(dA$7RvȏC" C}+ImJT+CXWWadyWfeh~qv'v|P:+G&ټ*̰: xz.=tzJA{|u2c"-ӝF GViEk_x1 BRj=ҁ,ƵH$1/~7K2Q30*Q٬: n}feȟ1Og<cˏP~PcΩA32'4#̗^*/^;9V >9hyJp,k)fyE~XVR$_=2pInW.UMjs"GI\kctذ3pGf;&*u'<0ÎDeUTfyk;Np#]osbl#\XhPrv=X֙zY\YȈ_;0 ,KFBtxAQ.zu d;]g!]aј_,OW+9Qh$g_|Xo*x6t3 NA< \h%e58ԛElƝؚG`yV [tM@"u. Mce&UyΓ  \LH}cԜJdY,LaYM IB33tNw/\,*,Pu'FCmѕ1% GzYi9"OL bNSKLQ͚ͮ!ak%ݛ2|o7Y%W>fqnKߪx|zBa;i$o )2NӁB5뉴 u5Hۭ8Sl *-^+՘@9.@ X^43I5IJ*]Mak v:>BS%Վ5(nm;43on~nKM{?* fk53ju@B1jJ\V@>QG 7u9 IK(FlhʓV]*B1[;r{Q'S#$Fx'C1wcmϰOQ$5#o5<ݰr!<>xDv? B1m(0,᰸[!p@kK%j`3#%*#<WW4Cp(Ǥ y~<":)LlDr̶9}:&NJ_C9e_xGo>W`^7n!E$%6@I(/ @jJ\V@>QG 7u9 IK(FlhʓV]*I(/ @[;r{Q'S#$Fx'C1wcmϰOQ$5#o5<ݰr!<>xDv? I(/ @m(0,᰸[!p@kK%j`3#%*#<WW4Cp(Ǥ y~<"A(/ 8LlDr̶9}:&NJ_C9e_xGo>W`^7n!E$%6b4\ŞCqpkINwQca\b;3[yq#"7 !QjGߧm !bٴ$I,`! b|4tY"UTdw>#J+%t ޻?c5*Aeg\b7vj PZRҭr՗= f|򹰀ko!lD\ gۈSep_Nk!@B1pcXq? G+ƉÆ36tK!xjs=%;_>5 DB1ڒ @ @qsvi*Ns0)[V JXip 5B1Y+[H 6zk):Ը'8]Y+._yDQkh).:)"-쓢Oct)jls tx[uPvpd fm.:b@I(/ @pcXq? G+ƉÆ36tK!xjs=%;_>5 DI(/ @ڒ @ @qsvi*Ns0)[V JXip 5I(/ @Y+[H 6zk):Ը'8]Y+._yDQkh).A(/ 8"-쓢Oct)jls tx[uPvpd fm.:bl7}b [I|v«̇#ۚo `$0)L+;LC\0~@a~U h`rrQԞqSd2T{@Oޭ݄nVP^r*Ƽ׭QMdSZIpKZ+~Ȏϳj +BhL=b\A2& 4;.zMPE -,9$XwKMtYETC5\Ou2nD qdI>]>E7ю|H3gh-ZSHiQ&;ռXcB=,RZJ,Edz$C* pwh_&Re8?{yijT:I qh)4`eɺOa6cGffذxd(}r'|BfeҹDIf쥩䒐Uà;z}z}r>W G~;D좉빝q܅]O93mߦJՄSէ}gUX Ri0'qrw6PЁƎK;%&<k.ǎBk"ܬUU=L)' RfۤH"1@?6> ƫP3˶SĢzL-inD (/ dI>]>E7ю|H3gh-ZSHiQ&;ռXcB=,RZJ,Edz$C* pwh_&Re8?{yijT:I (/ h)4`eɺOa6cGffذxd(}r'|BfeҹDIf쥩䒐Uà;z}z}r>W G~;D좉빝(/ ܅]O93mߦJՄSէ}gUX Ri0'qrw6PЁƎK;%&<k.ǎBk"ܬUU=L)' RfۤH)(/ 1@?6> ƫP3˶SĢzL-inD  h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ Sl`TREEJKl` h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@P]l`TREEM h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ fl`TREEOOol`9z6!eZ 42Zbe޼w ƍ# w8iY'Q-b[<ͼ}~GB_SChQ(57PJpɹƐK}ͲT3&>5R}P'N7p_8|4~L2h&>:m3O ;+ɼP?\7l 6ܭc}}[I_zqSy-+=^Hk(/0:{CQe?0pazﵿ\ :x~,\4I`e[-a `{ !DneϊrӾSqw{PIFN yq̈ۖϧ8(^(Y`C4Ky_)LU+O jM`0@V7Rq!'\r)I ދ^ ʣ#wlNHgpT,) s1kҝP]h+RBC̈́RD@͓+i1n"(8 C>䚽?q \OwB¨絊JJ72ٿZW.v?E+X۔x fH7!G+)͎aKno5OqGI?Ual՜FH.Am_ uf"VBa]@Z󝒲}^"N(OUPXISpMt`a9(/ !'\r)I ދ^ ʣ#wlNHgpT,) s1kҝP]h+RBC̈́RD@͓+i1n"(8 C>䚽?(/  \OwB¨絊JJ72ٿZW.v?E+X۔x fH7!G+)͎aKno5OqGI?Ual՜FH.Am_ uf"VBa]@Z󝒲}^)(/ N(OUPXISpMt`a9 $-yO~%$t-1V9V3/|x3vxau@T.A0CH%)Zi& T(q_IMn,*_R|?#~Kȋ7 FI^mP [{LCj5f7aR[kѤ3 ŁzDbky~syu=:ցqIKf&Ha Ljk <"W:o(T; ˄V[?g`D(/ vo~+^֓l jB use禿"FAZSg2e yH ^P uX<#mŽnY/.8r4oK\;&(^\pQ#T(/ vaxǎ.T$96[9jt^ m Krmآ۳Y%x pϊrkz1jy69)5R0N@'+twm@ lj\Kځ^nRn:cG*(/ Vn;i) ܉kܰnd6a"Ym:#K#l8@r>R[kѤ3 ŁzDbky~syu=:ցqIKf&Ha Ljk <)(/ W:o(T; ˄V[?g`DxW]Ƞ%A9cs<_g:TQ8b`r_Aъl~.vbVV&cR$(Tl.^]4S.xŰ!pK9>@F pv'NzQ[g@7CNoosYW*nBZO=~GP s:\~ЙŨ.g2gLEFWoťů߾-< rJiBhQVyhk[z_}[#aӲ\dfDsrʻ{/k^W[> +%%"qBD87rd[퍲\Wֽ)4 \"C*?YlBɜEp0iǘMLE9h""(/JFdwOwpdfFnlsVfRm\8+1wun/ h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ l`TREEdDl` h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@l`TREE0-Ewm0z{|>[68ٴkS!ڮW7VKC}U_|%硄\XYD&\g o1`7w`g磌qסCj~[P &:FxY٨<"jgq.-:>V`X1]Ȟҹ,:WLa0?,5޿v5Bf`-fnCh0GN6bQZ.2ͧm6xl5l<*}E 7TFK-YToݺC$y}zR{٧唥C\8+1wun/(/ -Ewm0z{|>[68ٴkS!ڮW7VKC}U_|%硄\XYD&\g o1`7w`g磌qסCj~[P &:FxY٨<"jgq.-:>V`X1]Ȟҹ,:WLa0?,5޿v5Bf`-fn(/ Ch0GN6i(/ `Z.2ͧm6xl5l<*}E 7TFK-YToݺC$y}zR{٧唥C\8+1wun/ h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ @l`TREEml`'I3?d~/ UŸ},$[V-ۘ{G݈Mj`:o;e{Ao-2;gޥ3bx}ڻQdMbO Zypo42#ũ-!}{4,L`ձtYptEI13C3mpPAvm|{2 *C?>$ R;3; !G#ʘFZٌ׺%^>D\46`}b}R Db9ͼeU7 1a1yd݌khL^կdu}4ym?aB%xV5Jt!T6[$ ,e1w88(5a(L)ʩh){tFb L% yDx—MpersEZThsI.ckBv˥smcț纳fMY2,-w&bb?fˊ"T%F;3M@ |+dmb07;'=栁 T&cqkEٓ87j&#PH]v#7ӦS)1+P) :sZl!,0.IkUg^-tuK '7Ój[clŢ|h|9*)"JfDk~3>X[3 g?*c2wi~y  H'r)LDjb92n^ݬpY믘;rD':^,Pɏ5׼ Ai75:^zmqa7j`eIG-$yWCNGk{HTlJ#™Q~KFHHH5nj4əs;Jj L¥;ω%tT6\khL^կdu}4ym(/ ?aB%xV5Jt!T6[$ ,e1w88(5a(L)ʩh){tFb L% yDx—MpersEZThsI.ckBv˥smcț纳fMY2,-w&bb?fˊ"T%F;3M@ |+d(/ mb07;'=栁 T&cqkEٓ87j&#PH]v#7ӦS)1+P) :sZl!,0.IkUg^-tuK '7Ój[clŢ|h|9*)"JfDk~3>X[3 g?(/ *c2wi~y  H'r)LDjb92n^ݬpY믘;rD':^,Pɏ5׼ Ai75:^zmqa7j`eIG-$yWCNGk{HTlJ#™Q~KFHHH5nj4əs;Jj!(/ L¥;ω%tT6\khL^կdu}4y h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@l`TREE h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ l`TREE~l`SNOD?(n0P@p.09s޿ʰ-.>feGJ|ؔ^۲%`I N3MC{6g)B&Rh~h6D -Sr_d JNlDs<(2 CBDJjky9)qJ5Hu+&$RcHG/߀9+;,3QZEK>2ᡭk vHѼeF+#ΗES%a1cWh܁}c67N`58{N`xwٵÙ^jm_$L>dԠdaciminJWs«WR<" 9o{f$֢chX6EpZu_x `ǚy=܈I}VXY882 ` $ ƛd-$tVF)Mcˆ9.F^5?t"%~.1R[RfEd G.EK6&p #%-TXA iLF+8$xΌ̷ݒ&@? a䘦;Dզs[i|H;gN+uZu=?GvN'ljr:/cyVkPX˱ì}%-2y;C }&ܰǒD3:+W?GT^a`U(,3DV=}FP EgX&#XZ"֛^e'~,(-_z?z̔DJ<ՔE;H=@g0r @Lj3Cg`Cخ),,0@\ z2_vU_j814Bs ?=} 7\.h\ :o/qn3d}ס/\JX9)]5j]mL?rwѶKOf#£!hh0]kH6 =.mMY pGe"o!VRk| 38;L83RB@:`+Ʊ[rR=yY.nL΀!{+ bn)hTX_tUH~5-={T{['R2=q84ipU_2j;4weGAC"g;Nþ $-OȤ#f;x>f08mt/}^Q]vǠ,q=f記Fo~M0kJI,qoQ又$z(/ 8;L83RB@:`+Ʊ[rR=yY.nL΀!{+ bn)hTX_tUH~5-={T{['R2=q84ipU_2j;4weGAC"g;Nþ $-OȤ#f;x>f08(/ mt/}^Q]vǠ,q=f記Fo~M0kJI,qoQ又$z K^fMj9:|M o5mHW.#;- ?[#TÉ%MYUc@h sjR:jP60ӜPʨrp"M5)e=D#nmYB_CrA=& F̗ لUg S9#7Xˀ9 xXPby6eԿv~P(񍢝T3VZÅ;l` h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@l`TREE2y0q97_V3+Dvgۂ*hCWI8-AsܛEWpU?D>Y?6i`] ?ɱlCuelt;LMQk^ YTњP.|eUtyhT'^R)q6vw{K|nvb CMYpQLe9ݩ/P GpB(1F]W4!zN2egHgv4=aKYFmƿR1G ZD5m"8`&EaPYo|.+61|a$WuşxPi{`UwoޔRS Sf^t~Q\|>Å; (/`y0q97_V3+Dvgۂ*hCWI8-AsܛEWpU?D>Y?6i`] ?ɱlCuelt;LMQk^ YTњP.|eUtyhT'^R)q6v (/`w{K|nvb CMYpQLe9ݩ/P GpB(1F]W4!zN2egHgv4=aKYFmƿR1G ZD5m"8`&EaPYo|.+61|a$WuşxPi{`UwoޔRS Sf^t~Q\|>Å; h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ l`TREEJ#`l`2|]%I !h։ UKcԥ͘c9k𛣰wrڐPGf}yQ{+LDTnqƣfVH-5P8_iiPD&/ײH@6^"jy{bdlI5{nsA+Jãx(L~ywR%Wմ(-,18a_49;gGOF.ZGéԳ\l^ž#%r6O˖C&/ 4m(B?eRUj~$b `o.^k=ߗ/k*9KA"w[Xցxq/N`~Fၿ -c(vy@?"Y]>SR:ΗGt>Jg0O ,N0eE*X3sZ(+F[HR#!QRb+>RIMnbo'J*t䄾 #x>tl~o:=m Rh:QTy lNkkdӪ;XeBhȲ%ސ˘@QaxMǯQUŞ-5aՆ+CjѠqxTjV]˛}4h[c``G}蚶?={`WΤRa6c)lymvAZ ٜt~ē STg5V[W)>1ni"dkxFZ|rD`1xC2P]!3c:He 2+x󶔈ľ}nްN+8Ga& lRөՔk0D]x>W`ThU(̜beu/P4w^\ҴOxL2WA&W3! V@ 3[_2 G<Y* hϞ$@ƬvH`moU\jUV5pV4eh7AQ} tt+?NH ."M@Z~qǫw0ԉ>^e׮dI'-7g[SJ q1՘R̠{Ix= *$ S HtEH ќSn!. KlbQxbˢJ]OT4 '^=V;zK`ipP$Z*"f-x-\]Ohu.YF ou8qbA=0 h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@(,l`TREEx& h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ h5l`TREE=` (/`X4nNٱ2݉_9j ]LU&܈j@RvVU^/+u>>RIMnbo'J*t䄾 #x>tl~o:=m Rh:QTy lNkkdӪ;XeBhȲ%ސ˘@QaxMǯQUŞ-5aՆ+CjѠqxTjV]˛}4h[c``G}蚶? (/`={`WΤRa6c)lymvAZ ٜt~ē STg5V[W)>1ni"dkxFZ|rD`1xC2P]!3c:He 2+x󶔈ľ}nްN+8Ga& lRөՔk0D]x>W`ThU(̜beu/P4w^\ҴOxL2 (/`WA&W3! V@ 3[_2 G<Y* hϞ$@ƬvH`moU\jUV5pV4eh7AQ} tt+?NH ."M@Z~qǫw0ԉ>^e׮dI'-7g[SJ q1՘R̠{Ix= *$ S HtEH ќSn!. Kli(/ `xbˢJ]OT4 '^=V;zK`ipP$Z*"f-x-\]Ohu.YF ou8qbA=0qҞc>烕"Μ fR܆WJ(jU 16-g^˗ʮGRd% JBhW/g?&곊b7&s4lh\q1l܄dGs6ct敂d@N1aH9yS,o.jQf'w_YIEpfDi"EԺj'hX@6tr \MUn9j5MY7;Al` h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@Gl`TREEO[SmׇK'K]@lSJ⻍hEAt@vBO&G_sZnĥiA ۚĢ (=ٙ**?W1kX4-Σt^.wzC f&+zvB3Q[7Q}[j9Z5p ү.<ǏJ,ZNiex-C,A7,\Hc V(nj =`+D#F5=>nV(lcFQ9o:YC4wSu~+fFHg -337e=!noS[JdcN΀N~?ޭWS29(^b"(b ULɠ*վMЋq#C|MiPxt,iw< -<#ƺC(mv;%U7 TuI\R/hloJiBս32چCFi$[K̀'X`ɼ~ϊ%'iAm]5uS7?$o#Eֵ&!H|\ 0mOLUcqsƇpTE|#V<^z0G:Q3}k`bΠco&4M!_Nj/e0LXƙ7is?5*y*rJQ:@pCt)NJqFa*&JBhHzdaq]9R1lUA)*SG=Se (/`[SmׇK'K]@lSJ⻍hEAt@vBO&G_sZnĥiA ۚĢ (=ٙ**?W1kX4-Σt^.wzC f&+zvB3Q[7Q}[j9Z5p ү.<ǏJ,ZNiex-C,A7,\Hc V(nj =`+D#F (/`5=>nV(lcFQ9o:YC4wSu~+fFHg -337e=!noS (/`[JdcN΀N~?ޭWS29(^b"(b ULɠ*վMЋq#C|MiPxt,iw< -<#ƺC(mv;%U7 TuI\R/hloJiBս32چCFi$[K̀'X`ɼ~ϊ%'iAm]5uS7?$o#Eֵ&!H|\ 0mOLUcqsƇpTE|#V<^z(/ 0G:Q3}k`bΠco&4M!_Nj/e0LXƙ7is?5*y*rJQ:@pCt)NJqFa*&JBhHzdaq]9R1lUA)*SG=Se h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ Xl`TREES8bl`w8`jq8v'gz#mlJx-a[sRuĶӌRղ(4L7Smd.AQ7&NV7ha)߰k$3 6򃼆j hP( ~!oN\ޕ1 -x'g+śxbmor( X\X n Ģ<"?b/ sI]PRO;7E֓=›|ǾsKHR#/|'ܲnlZq=IWy YUq}qڭ}HnG;:ۺ@501j*_AH_i6}Y*+.];E uMy~~Ho28 q3`c#=ǟ}3&in)zE ,ϔ;39ˢo/aXf1ӯN.;!4<n='qZy|A_ǥ-lS5Вz*I$]zH+VG.Ru^,grf40aylA82a.PIa'p С6o_j9kxF1PTI. `o! 0T%0Mٞ`[:(̟ Ȍnn/3eR&!˾oŽȘUv͂ᤍ) ?!9Ӿi-5A~1JxӵjSڲ^$zd)5glRIBe"s.s' dc8!?W!@~K'w %,=L,/A96h1Z:!'*i;1j*N'Ǐ h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@Hkl`TREExsrѓK0"3ͫ[VR%tWQ ]4=k= }7k,ܩ)Hz=?e: D;Sůu:[/4tf&AfxB.1n[W)Jm?8{:$3yXS"O6mz%S%zϣ}1qdIòkIfG9-46GR;4?ttW3 k4ܐg(uߗ`sGmJe]P: /icDPίI^mK !2NHDBD+n"||@$/.Rݧ: iGlU]?(7`A`Oϧ` "gvgr^s0cAcqK!n.?I- Z!bK͟NjʔM'Dyx{õ9\Yxxfuf(*ǔX ZnrQ[4[u,M᠚5("휉֑iyc41s\{Ltz2c bvPX|") e[s{ϛaNTpKrsw2F7MEǨ9ɒK>mUFl3^Ѫry50脯Hc8m I]&<@)Y׍k@ YDOfob?;֙w+e5[woY&t̑QMz9xUr:!xK>8"far]XCa&&|"hpDcğ~mSlD*e*ԭ).>F $adV$mⓔ[ nvP@?( 7o鸮WÝdQ>>wEY' A2 9x謘bbvSg>Ԏq" CR+"7AZGaN`.YUIi#W雴)KC aƛF>ftS5:25a^MkJicFr{ %/?RL&̚/@S#`P1ux]%©Ome[>[$߃d#8J7 cf4FgmUFl3^Ѫry50脯Hc8m I]&<@)Y׍k@ YDOfob?;֙w+e5[woY&t̑QMz9xU(/` :!xK>8"far]XCa&&|"hpDcğ~mSlD*e*ԭ).>F $adV$mⓔ[ nvP@?( 7o鸮WÝdQ>>wEY' A2 9x謘bbvSg>Ԏq" CR+"7AZGaN`.YUIi#W雴)KC aƛF>ftS5:25a^MkJicFr{ %/?RL&̚/@S#`P1ux]%©Ome[>[$߃d#8J7 cf4Fgbh\ 6/G{kE]{d`ޫ wzsW%W >8]}3P"جm߱О,ߗ 74)H5ۚ|E7 sBG~:"&Kzᤴk& zF^͈%W7Ns4Ԝ 3EeG۩oAqeBĚ 7}ൃTITvi_S4#ݵ@^Dhb) fS 3X,$`A%CHČEؾ)@QTʒd^*E dnڑON ٺtx[b)wxIQ}2/s35X "Y|WN@JXA~̏0gny*'"eW{eLێ~[# w߶ W!= l+ztF!D<& KOTLt!4@OަJ $wN.D>.#Q䂟RCd4/2xט<d-NJdsT=|A$ $3":kAE$9h7Unu]+ũmG̉TR*|96bWTB4)OBvA-͏opTls0mH#}?xJJ:$7(ax`y!:us)rB(]04)  %wjo9{bϲ i{M !+w38xY(ۥ8BBc9P=49Q16y ֟_a}K8*Ѭ?K[a _ܞ/ ҏB'CN 4?BDBhl@yĀzB pO3(Aǖ\xOg02A4{!{N'&M!H Nΐ(k&` LV )Ct$##+2Sâk|[צ]u 6@5$).S'~#Z?T0̔i[l/9:z`'q&Dϯb YL'/̸1M*l)$ "nӾi=`:'rXU}Cip{3p^3D3LPu7RrS 5؀YT{H{*nIh( '"-bΝejE?~R%w.TREEX(/` EA|TVtM Fg2}1ãSyK_ |($(u -nSiXau\#℅n2/K3(3\AՑk^BmU+&5Y0\ 푻uB'$+x)Menmȁ mTS0W[BgGOI:&(< 1=ӭiW 3 n%[6ἑ`YI{|ƙ}M}C0PMDPPj\g}DGO2~Ƞf,z{MK,%e7w8iI Iд#Kƚ ɭcP+B%'Nj?EPSy(/` ? IٗFDw%H F Gi0<$Ȯ@%t!Ў$JVݸ5s)L%QWB2A4{!{N'&M!H Nΐ(k&` LV )Ct$##+2Sâk|[צ]u 6@5$)(/ .S'~#Z?T0̔i[l/9:z`'q&Dϯb YL'/̸1M*l)$ "nӾi=`:'rXU}Cip{3p^3D3LPu7RrS 5؀YT{H{*nIh( '"-bΝejE?~R%w.{jmG{@Ct"P}ٛQ1K4W.s[b}$\ӄw߿hwtd .V6񌦞3-Y AAd1m p$ϸbET({bQ?Y&7褻;*`iycyBB"kBƯl*14+Ŀ%97ENuv \:xkϏVJM/K9WM ckfoOca8c\#/8aF,_wve{CQg b|gG'4ç&PF6 2Tb$%Z̞BdwA۾RTl?-q v beo=FR`L=VF%Ki*oVyCwBvݪQ. YAo  V?̂}6)& ;Bp& :Ed7=Vuݗ] 70),)堤w5z rG4chvJL3 AUB )RyF͹ceG_gsE"hI?t̅exZ[\HWn~p3W1||9oԹK|>$B ﴕS 7Wma2t{Ĩ '$Ee– L}ib\1rnѭ8h%=*琏R4δHh x]!g(!%.%ltAd xL0f,g6K"Og|~S[4EB#R7oΖ恁v&DpiY:47-%YcڲyM(UqlH,̜ܠ"pjHdBIΟqWNvp^맓;2%"9*4|i(^9TREECr LѼȲɊN:q)g*BC2.F͑#b(=E|b[Q_8d7xtXTm .dmS}8#v:o%ofV]|2{;tr۳upr t52z]`iaیc%LqvDdg:}wpCW-)}[mդ/"YuQda3IZ%,#n*3.#I̖ ziB5۟BwS*f0yh,2QgCE FES:쭝(%-i %ǽ97 [W @2@n3?w&FAByӱx\YziK|$ Gގt֬-^_!UW`o&Cͻ5ዺU{'X*M 2߯9$MbcQBU ]Pq_8!խ2ls "?ucm/{i+v_HDA[V-EHײ9 tUm9DNٚQSg΃xLpXO6<ËVKcL$ C;^DV.$7n8J'u0Gdji(^9 h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ Wl`SNODH@XX4`Wp؎xQTREE_(/` LѼȲɊN:q)g*BC2.F͑#b(=E|b[Q_8d7xtXTm .dmS}8#v:o%ofV]|2{;t(/` ۳upr t52z]`iaیc%LqvDdg:}wpCW-)}[mդ/"YuQda3IZ%,#n*3.#I̖ ziB5۟BwS*f0yh,2QgCE FES:쭝(%-i %ǽ97 [W @2@n3?w&FAByӱx\YziK|$ Gގt֬-^_!UW`o&Cͻ5ዺU{'X*M 2߯9$MbcQBU ]Pq_8!խ2ls "?ucm/{i(/ +v_HDA[V-EHײ9 tUm9DNٚQSg΃xLpXO6<ËVKcL$ C;^DV.$7n8J'u0Gdji(^9l` h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@l` h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ l`hl` h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@+l` h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ [)l`68l`/[%3 XClU=/"㷾#wt͗Id* PZm=ߨKbCWy#ĊԤ2w<,7˧"d8Cbw5i1Z7Pb#0Dx)m\ (5@no=*m&|X'ÍJ qBũb+Li"8@V\_D5GDT7biJCyA'X=*F~Oe$^|4Dt$AK/t4iU]5/g[,8SUIZ-C/xKvK/ϫW=K!;Lro_{7"И1Ï=aֺlx<X/k? xN俆ذSu拜9{kj:} !a67x@ loi!(cP-Ac6+Fx`&Z6˼.Fk? >>":Ȼ}h^gC'|7{dcɣ5kAs:$B6}7cЦ  ]Q3d˼??eJF ,WL"YfV}YI:0s+~݁~?kbD6~0 ֭4ܛi[b^ Q50yaMQL(^iT?&8Tdžc^`ے4$L3tz=;Bp(P$~jvtGNϔbE!B5׼BDSIfA](WʘZNji] ̤<;Ӣxl?)ݕttT#K=0[ņױlU_2k؛ sa S%_xnP4sOq27?ytel@kج({fCO wʋF]m$>X(@9Re *Y:XK>]f)@xC\Vz[8ʦA?"l\WP%KYng5[\eH(:68{nLsW8Q̴Z1~첑VWC&@wT'pyt*L< >/^hߕqb-Y&PJ~Cj0K3%km^MFxKh=ݿOr.|XSHcK2@0gQk弤*'5mˢ'aCij펫II0R DW}"V=ݛ h3D/vpwC.4)p:EL71wI6!P2]vWס (IyY"듬sS?sl3!N 򷐌38]ǕS`";85$Gkޕ ,B]<  }2awN\iႳgk:%"Ӿv)I6?*1|,vO awdD@%w*Y6X 96ݒ2*vku?cL@ {^ÏgMBflQT3׵l)`9FY_*/=A5BI`x*,~NQ3;CDg \̨oA|-z6c'b_sH"BN@Sy;G:<=pL)K6\>XA48;M7XLfH&kt|A]S4 r<6LAr:U/m0QJ S[ `&&\QEwI\PBZb9#B\VJVFv6 ‡{{²2QE,ZYբ;0"ݕ=>Q րFv$c0>/2,1lܸ- 8Პ7IŐk\}H d*!#O<,1f:\e{}<U{hYXi8ڣ)fB`78.@aPPN_VFm?S4P]I0E8*8S:@^9 z9튑sVnzƬb}@~Gbsh 2D\]sv3#zo(E/LLN<5zTosgcV1'Xg!Ce G#N:'25y( JN]*߂2 fn:9%;Ag/EEpJ9E 쉞~(vR}ɈNQf[$uLI 2c@8d}@qcTvբ.ޙ8-$1k M6\08WɸYwwQFvɰ#h0&z!!qc*8<&k5naXE8)+&YEa$o012+נdTU[i8 *'h84w=A j7.-ݥHoQ n0?zq龚E6Î+i(q<-wps7,Er -4O8)_N5Q0 sy%JOǧbg8%*=eT[}^/t/fu)!֡*;agױUtS?uL HVit_:JD\5C h9ը6Ŀ"[j l'l8Uy:iùpMf XNUzdZQ_?eä'<=۱ކd.%52TREE  (/`%6+tlAd\ܻh ޷q ֐n|G%s= z:L;3ݥ U0H ɫfSun U$ o_!1+Yȗٛ5wwDR~zC#Z8};,]/ ̬,Duh=b.Vpɪ`n{9kD,PC_D>"Ӿv)I6?*1|,vO awdD@%w*Y6X 96ݒ2*vku?cL@ {^ÏgMBflQT3׵l)`9FY_*/=A5BI`x*,~NQ3;CDg \̨oA|-z6c'b_sH"BN@Sy;G:<=pL)K6\>XA48;M7XLfH&kt|A]S4 r<6LAr:U (/`/m0QJ S[ `&&\QEwI\PBZb9#B\VJVFv6 ‡{{²2QE,ZYբ;0"ݕ=>Q րFv$c0>/2,1lܸ- 8Პ7IŐk\}H d*!#O<,1f:\e{}<U{hYXi8ڣ)fB`78.@aPPN_VFm?S4P]I0E8*8S:@^9 z9튑sVnzƬb}@~Gbsh 2D\]sv3#zo(E/LLN<5zTosgcV1'Xg!Ce G#N:'25y( JN]*߂2 fn:9%;Ag/EEpJ9E 쉞~(vR}ɈNQf[$uLI 2c@8d}@qcTvբ.ޙ8-$1k M6\08WɸYwwQFvɰ#h0&z!!qc*8<&k5naXE8)+&YEa$o012+נdTU[i8 *'h84w=A j7.-ݥHoQ n0?zq龚E6Î+i(q<-wps7,E(/` -4O8)_N5Q0 sy%JOǧbg8%*=eT[}^/t/fu)!֡*;agױUtS?uL HVit_:JD\5C h9ը6Ŀ"[j l'l8Uy:iùpMf XNUzdZQ_?eä'<=۱ކd.%52e,It%PSfx&;iel\t݊YXC*ճ4zډKoM) 7B @cM+1"(.TȾHE_8ˆ._?IJe_-j!ԅ_K.?\wV-Q "4 ;d\&x_TmGQ- 1 0@9:^S"c3̠9 K% ~]ȳIJš 9EoG2q_ _K<{ekǡc(k_amuE WON#y5XS%{$ -MeJUds+7 3Y%<) s 1&EuͷL|jo697I:ˆ5 D+f ^1??nbgTvx)}8eWT;fBZr04F/QiW>Bgswk~&_eF?TGu+)y쬃@?,d/P$Qnfd1sXɒm~f 1q,7vz|2#;pʄ7iȎc H{md }*ր2_/lKa`i\JE0PZ~V%n~1A3\>a 4~z(ƱFXm! b)a hh\rSTբim`K;Kr=n*SƻXI /aз9b8܂o<K㜎IE!9:&=>wR6Λ81 ^EMJ6K5fƗЗ\扼ainUp]H<&WL - tE9=gBb*;.gOJ,?7|D*ErW+}\ykƕ2 L]J\^ڤe1K'П7l(byt#~mbO~TQJ\PJɐhr%׬-i۳]NNԓOEozOa!ƒWį}p4Lwq;fx> g>LOp.@{؟KY̰>PR 80¤"0tdQ ;@#n| Ajse RwsbFӒ#€{I`JH@ %,%+F;&_s1 C*e8Ǻ2E iʛnjViX{J&$AW޺dNG qz$Ol'3툨xdfT ;cWj!tpn-ŶنjbS\$32/B3EHLqv _^2QA"ft.$XM}1Ln`6TNI/W!c)#~h>j,-*afa cPݶ@TeifQY,bJ4:@5kЃ N:vŏl2CMI0JщQQ?PÌSp OŒ*)0rѯ>ɛ,YNT_7PүPxd]@] 㽜B!NͻFnԳXʀw}eb=NX ]!=DK(X7*yN`5<}wDjF;)d$p3?+{<͙Zdb&* : rpO[7~eyw^pue0|$Ȍ%<6$X5?%:VB1~ * Ltn΁8 ;!J(#7̈́yu6N{VF_"I@iQJ]@Yj7%lF]iO7ވTREE1h (/`tKwG$ RGA͞f=s"ȤO삣+*H^|z?I`UH{ȡ +s{}.x\3`gD0H;SP5uti/{ D&Լkդ,xzYR~;o7:뒼8?r>QiW>Bgswk~&_eF?TGu+)y쬃@?,d/P$Qnfd1sXɒm~f 1q,7vz|2#;pʄ7iȎc H{md }*ր2_/lKa`i\JE0PZ~V%n~1A3\>a 4~z(ƱFXm! b)a hh\rSTբim`K;Kr=n*SƻXI /aз9b8܂o<K㜎IE!9:&=>wR6Λ81 ^EMJ (/`6K5fƗЗ\扼ainUp]H<&WL - tE9=gBb*;.gOJ,?7|D*ErW+}\ykƕ2 L]J\^ڤe1K'П7l(byt#~mbO~TQJ\PJɐhr%׬-i۳]NNԓOEozOa!ƒWį}p4Lwq;fx> g>LOp.@{؟KY̰>PR 80¤"0tdQ ;@#n| Ajse RwsbFӒ#€{I`JH@ %,%+F;&_s1 C*e8Ǻ2E iʛnjViX{J&$AW޺dNG qz$Ol'3툨xdfT ;cWj!tpn- (/`ŶنjbS\$32/B3EHLqv _^2QA"ft.$XM}1Ln`6TNI/W!c)#~h>j,-*afa cPݶ@TeifQY,bJ4:@5kЃ N:vŏl2CMI0JщQQ?PÌSp OŒ*)0rѯ>ɛ,YNT_7PүPxd]@] 㽜B!NͻFnԳXʀw}eb=NX ]!=DK(X7*yN`5<}wDjF;)d$p3?+{<͙Zdb&* : rpO[7~eyw^pue0|$Ȍ%<6$X5?%:VI(/ @~ * Ltn΁8 ;!J(#7̈́yu6N{VF_"I@iQJ]@Yj7%lF]iO7ވ 5u PȧI 2RbBdӱ Ma\F|EHD ^=`/ֆ"rL_`^BfM=tEǪ)K tc,}~C'̨b皉L?vチCEKB[}ҰzUIx_^ϛz|rO<.PF]^n@Џr/d'tfM@YJs: 5էU:SYU!^Uf4wB5*^xf 4[qcX 93NVyW̥upEA dJUV ApSeK]$S/#jmyn}퀿>݋&pߎ%ևȼxScߧx3Yv}sbKKՂԥ8V+LֹA!F6A]4)@ssU, 8)bH$_0O`B\G_{>؟@ll a(nUCui8l}/O{kUdh}J.WTُ@)xdj|0#W67o4x•sr 'YZ0龜_OƸ_ 3 bcq`Bi\8*sGQqꉻvǤ䴺Bx&ŀ{h,/4=IPORM~NWYQpbqCmso-@X]a@<#ŗX%mJ-5S~twu9m̘]V#?Oez9.6.Bq0c;P4&b0ٴ/-pR4 h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@@l`TREEĬIh~0[S*oxȈY >猵]Yy>Optgq,_B'ҹb{hOBD}+i` *]A(GzLa$,|.fYJ5kUv i]'uI+>A\ r.M}ěq$^ q ZfnGYihcfzi7]7̰ԩL-2:H0%|U +9G:?`e04XA'ed0 Grxձy2NJ:*7tzVO?ǦP<YH!]bigKT1h*SlNi_IgB5_cUܤRY{c}oWkU> G+ä֤"5ېCpNx 6 !38l0 p1ڝ3|X[] vkRW' BJ)\aY/ g&kBl@.RMgH#z^ô5~!@imM)dp=ѽ A9E*z͔9ߪ`8@}N/ ^B,ػr]>e#tu<{sAR]swbFe([;xRw};׫RCm!1a_Kh*=\d K=>B!&Hv,{8 xQM< LR,j|@ʂT7p2ߞ6WQPNnM0gNkM_q4zw!N6Ыn&UX8 B!A{A~c:e6<_@_DsGA),5nb\/oRo2S? 8Tt[[ӝ`!}d2iigU=q Qʉٸ(tj?n9*H./.=xTpk )KNk5~ר[E['TIO<stF|nPμQPPԨ5}~~Iۋ@pWt x]ʇ?Zu:TUAbB-QzqQQ?IdB!Lt3Ĝfw }cnr F01KǟWߗ[ҭDHTb w ͋!#ev/ƈc7֕%QumUρO">t$qDrh4-PXZGƾLIg0^OEvBA2"|/;fN뛿>o2a \Dq'fÛ>NP . ^iT!6 ~\ikt_xe-8F/!T,ZGܿ0hnU{?Y\2!7|/%k\|-fR0>0X6ivanԝ99ؓs 5s&Į(` ̭D%GTB oAӂyDgܪek<-%{f MCKՅ9yY6M% 9.cn80Z01/{-zɥ. K_sߝx^@5N?. i뺈6}&U&Q;t[4&b0ٴ/-pR4 h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle@ Rl`TREENZ (/`̆Ih~0[S*oxȈY >猵]Yy>Optgq,_B'ҹb{hOBD}+i` *]A(GzLa$,|.fYJ5kUv i]'uI+>A\ r.M}ěq$^ q ZfnGYihcfzi7]7̰ԩL-2:H0%|U +9G:?`e04XA'ed0 Grxձy2NJ:*7tzVO?ǦP<YH!]bigKT1h*SlNi_IgB5_cUܤRY{c}oWkU> G+ä֤"5ېCpNx 6 !38l0 p1ڝ3|X[] vkRW' BJ)\aY/ g&kBl@.RMgH#z^ô5~!@imM)dp=ѽ A9E*z͔9ߪ`8@}N/ ^B,ػr]>e#tu<{sAR]swbFe([;xRw};׫RCm!1a_Kh*=\d K=>B!&Hv,{8 xQM< LR,j|@ʂT7p2ߞ6WQPNnM0gNkM_q4 (/`zw!N6Ыn&UX8 B!A{A~c:e6<_@_DsGA),5nb\/oRo2S? 8Tt[[ӝ`!}d2iigU=q Qʉٸ(tj?n9*H./.=xTpk )KNk5~ר[E['TIO<stF|nPμQPPԨ5}~~Iۋ@pWt x]ʇ?Zu:TUAbB-QzqQQ?IdB!Lt3Ĝfw }cnr F01KǟWߗ[ҭDHTb w ͋!#ev/ƈc7֕%QumUρO">t$qD(/` h4-PXZGƾLIg0^OEvBA2"|/;fN뛿>o2a \Dq'fÛ>NP . ^iT!6 ~\ikt_xe-8F/!T,ZGܿ0hnU{?Y\2!7|/%k\|-fR0>0X6ivanԝ99ؓs 5s&Į(` ̭D%GTB oAӂyDgܪek<-%{f MCKՅ9yY6M% 9.cn80Z01/{-zɥ. K_sߝx^@5N?. i뺈6}&U&Q;t[4&b0ٴ/-pR4 Njl`  h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle @Nr l`  h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle @ ~ l` Hl`a1_a|S1_b|S1_ca2_a|S2_b|S2_ca3_a|S3_b|S3_ca4_a|S4_b|S4_ca6_a|S6_b|S6_ca8_a|S8_b|S8_ca10_a|S10_b|S10_cSNODPxh(a~e,  h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle @ l`FztÆأ VͅU U zwv@yCZ?8Hc?ؘ_B*#,)a@ַ(dL.q n[2F; гlLʒ qhFǒŇaGCs{DxYL )r ԅs]}ry:!@|R#UrI=`8q/ݩ7m4ܺdWZν&P+mYbJ)+K ̃V$`0@#\`{dJ5 dFykE L3-$1f ^jG48 ocbZ\{G*\}sJ+h9(*Da*>8Lh{Z7bƒgzu8C|{dFz{˗LYHk6<RZpɍ遊M$Хny5[3Z$aB)Y1\`OsHFg}]<7Ĉk57 k.ژ7%&~O 41. ǐynȔQU[>@p8+ B^Jtb~) +׬T/RQ4c󰤗uH)KsNaN#oiqIjxqJwQOm!!:! ue?$PC+A-T/U/dʋJ6dveQ"JݹEZzb/:PڀЩ!{CB`'L!lTVhΥ]WsG%U̸T}6uKk}o2Y*A-[4vRVh8ޅM&kIK6JXNg1Ld0t(+o'*Hosc0V%"O˚qkĶ[_+dTJuS\ۜeo#ܕyz_ލJ&zR 1 SzwƔ*>W39#OQGm(/֠m⒞uzy+]DEd՝+:g1a*ރJpQo,;Ғs`ORvvFrt<(q*Bכo}!֗G0Af|E5Spb[Ppm%  h%N;a ?*B RD7*EBTO.UxFZEsy\jư ?@#[MAMYVN߂tV|ԭ1:t=JV4R̺1ɐ2کK 89젰 *JUW֎6sogo%٘Q0srPv_f-rOíZF|W+6]# ?$ǬխmoE0{IkgWDpc9i/CW.8 UR` TW0yz:, r t"Fe_Hrm؛˭$:tփ=*b_z\LHpO5Ғ, ޢ;3*~t\z~$-n0:t..T(sk}(aϩ7aly/ Եh-YbusҩGr dHcÑ86ʍ&G7-~ՇMnKeXFnU'Wz߾kSNph9MɵiLZW1S">4)o|y(>0VzܼuIFJ ]M#:6FDJԆJ6=qX"Nc TREE~z sJPQNCo-yq)n Y"U cҐj8̋En6 > qH5Xp=MJWhKESlKZε2|G8D'д,5n=Pc!$F8ˎ-Tڅ+dm+~Q}Ye&򭢷!z􆬿65x%`FZg$XfSR #sDйŌ((vtS/Jz.2I"MOőA |2mTKY#@2xтUj]t0xUx/hԅd}G>-kre_;եXg Ocq$[*8q|[c@4Vԟ{s}- Zsu~DžT)zIM?n_֒V4+-P5:6vFgr;KW ~~+o $-zTU#-C2<B9am Vy-Ȧ`3 f/~ib?w gu9^DߨNlZȖԸf|lX( 6NfXRl 98Y36!p}#L9 Yp/+7g9Su )qqĶ,KdW w#1j 2;+˟bZQ;^~kzE^~zٓ)m4iʔPL9XHpF 8ӯ>5>3q!U-#mSŧ 4&-ܕ4ZS`Š&I1h@Ay>z齤@bXu4ʊF59XVslF5YRj#F:xkLMb/꜇Ϸ#N!Nq>r:J]D۴ROn7]VPVA6m_8U$# 鉴4b3:2*y#3+,QkD˛G޼gp`/ҸံrUwzh >xwy4#] h g*TC) AKK}M}w|,,~R C767JzZrn~$gE弿mWJWژ7(-Jԏ2'h<}+^['i=A}\aC^`ZD|U:e{rld^\ps>@c7f_٫AlIfs5m?Zh[(~Ŕh:KZjuewa37A ^ 4QH(Yα _h9BNJKիhb.-OB=-(qg1W< 4.L0$~Ɓܹjv5]/\;iNտ.|m?=#'{'`8J7|Ћ.ro+:]ԼQrr%c @~as{ͭ1,H+F. NVzܼuIFJ ]M#:6FDJԆJ6=qX"Nc TREE (/`JPQNCo-yq)n Y"U cҐj8̋En6 > qH5Xp=MJWhKESlKZε2|G8D'д,5n=Pc!$F8ˎ-Tڅ+dm+~Q}Ye&򭢷!z􆬿65x%`FZg$XfSR #(/`DйŌ((vtS/Jz.2I"MOőA |2mTKY#@2xтUj]t0xUx/hԅd}G>-kre_;եXg Ocq$[*8q|[c@4Vԟ{s}- Zsu~DžT)zIM?n_֒V4+-P5:6vFgr;KW ~~+o $-zTU#-C2<B9am Vy-Ȧ`3 f/~ib?w gu9^DߨNlZȖԸf|lX( 6NfXRl 98Y36!p}#L9 Yp/+7g9Su )qqĶ,KdW w#1j 2;+˟bZQ;^~kzE^~zٓ)m4iʔPL9XHpF 8ӯ>5>3q!U-#mSŧ 4&-ܕ4ZS`Š&I1h@Ay>z齤@bXu4ʊF59XV(/`lF5YRj#F:xkLMb/꜇Ϸ#N!Nq>r:J]D۴ROn7]VPVA6m_8U$# 鉴4b3:2*y#3+,QkD˛G޼gp`/ҸံrUwzh >xwy4#] h g*TC) AKK}M}w|,,~R C767JzZrn~$gE弿mWJWژ7(-Jԏ2'h<}+^['i=A}\aC^`ZD|U:e{rld^\ps>@c7f_٫AlIfs5m?Zh[(~Ŕh:KZjuewa37A ^ 4QH(Yα _h9BNJKիhb.-OB=-(qg1W< 4.L0$~Ɓܹjv5]/\;iNտ.|m?=#'{'`8J7|Ћ.ro+:]ԼQrr%c @~as{ͭ1,H+F. NVzܼuIFJ ]M#:6FDJԆJ6=qX"Nc Kn_qqxq X1rڞka͛9)jTo)ͨiSM5.ȂͰ|! ⲇijn2g,[>갿<\ZǶZGleI5gIB%eB4+K*fow_LkqdVtvdC xY/F yӺC+m|pQ< 4{%|{VsB6X_T4G o vd'E} ް5S2cWDSz- 1vsۏ0C#xa=ŷ }8u1J_Fb`sNGt{'p{ sxpI`Xp =wOD$uSY'#FJU?luQ{xeltc'KJp936tׯ.;yZoSt.j^٩[|7Cb4yfB0vλIˇwET*͈Lttu;ҕUIҝLq_Wx5j8i6g*X!(`s18?%X@T1nXf 3cHֹ*kh\Er00 t^huY]^lЏhj|619lt{wVKCo@OX<[EEfX=+Wc6N<0МAlq bi(?GVcAisU(&38 18AK+j"0^ԭz0ez%rd4^<21(l?[_}㵪D#0]rkaa2,`ŊRZ;RI.2 {UrH^ 7 sQIUow%Id7IK"*@vEHlex1Kz^HSer'+Yg!cT}{nZmzM9ȔW+#5ֵȒ6LVZA޺ZN@i+\ &ّOݸvs۵EHGE>|t"%҅I|T9~[ O,h*g}~$XfLnȨM췜u"*D3,L&JL'R]ѫ`8>+t\0O?[< x05F&L7{'xhuVfĩ_!359F0iڙlBV {}ΰ'xZqz1{HbKTRفL|X)sܝ_ꎲD0-p?VJqmжvHsYF,;4 EQfDB{CWa1_a|S1_b|S1_ca2_a|S2_b|S2_ca3_a|S3_b|S3_ca4_a|S4_b|S4_ca6_a|S6_b|S6_ca8_a|S8_b|S8_ca10_a|S10_b|S10_cSNODPh8j6i -TREEr Hsu[kOlҧ~'l5o;ERQ@`RP*uJߋ wVxY׀碙J'z;Fu4+Cf7s&fa͐"'YIR-',#}KPxY]&P֞}/*j>*~cFઁ6BCNȖO@~$[ yͿ^UzzPYY#dS@l zeN^w<@ƍ2OPvp$IПǺ tyW^ԁENb[`^ofq f 5~'g 8&gG"N\2d" am9_*Si:jFooe|1Pgݛno*JQ1dE𭛌O]ɦH u ̌ yFl[L+V lrW$kH}MYTR>)i(!V$ c7p ٠{̂@[ufc}z-Rv͕vC|wNsAw;kBCgIG4-Ep&o榤Fs]Ƨ`8oa,1ȃ%4#6VЌxΗGN|#dNE+*@خ^GˊbB&~A+7Ҵ?RK+z+ | nIy Bgaǂ&u &ͱvRvڪ|1?ov5)b+ʶH9\Ԁ G Z}DC}V!Ym95vaH={|p4gM$/$8*]FG3|Ղ?5ietvfZ"vwzd8-4u}DlhTxBzZhz>Z(؍"ky9\r/,v|JUԥjLWr/N!A0C ZxK"]2;zĴDHNIYgbE :cȢƚXc[M[׳ e6WׅY 1NLѥn?$%2 nt8Lsmx' b}8Ge7qSH[T6H;gPsžVL@'ay[= y0$La*#R)<#ÀG!@YЯGD6)FsN2 OugQQ[lŮ+1HױT/ m0wR@Eӽ޵䱮G%^/mSmbl^8 bv'KYY< }~r(hoXVE2k~DDRch4?&ji?Ϳ+IyK AgGZqOK=8LOA7ңV&{&F'c/l]C9=>  F' Ssө5FvRM)^cG_ To2T 4g| }&ñv݆鋴0-┈r$>6Ԟgo ̎Oeߣ؀tG' wS":i%.ܬ_g醽i4qi(b-_Ng{$4gIȢ6,O0Lٚh[K{E*Im)]+§c<#Mjɣn5*؟|f.&a)PZ@1Q% HӦnnvZ? r9pblf!n`˩[堆Iչ(&yLBGcÝ60A{~'7:%Xq1kRs4#+TǕ!HJƏj(z~D:JBwp 7P<{M)|ݧȗ=Zm\/} e &!S8&"/מTX'CqM 6.o*n'sYF,;4 EQfDB{CW  h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle @  l`a1_a|S1_b|S1_ca2_a|S2_b|S2_ca3_a|S3_b|S3_ca4_a|S4_b|S4_ca6_a|S6_b|S6_ca8_a|S8_b|S8_ca10_a|S10_b|S10_cSNODPhx{2`6TREE H(/`u[kOlҧ~'l5o;ERQ@`RP*uJߋ wVxY׀碙J'z;Fu4+Cf7s&fa͐"'YIR-',#}KPxY]&P֞}/*j>*~cFઁ6BCNȖO@~$[ yͿ^UzzPYY#dS@l zeN^w<@ƍ2OPvp$IПǺ tyW^ԁENb[`^ofq f 5~'g 8&gG"N\2d" am9_*Si:jFooe|1Pgݛno*JQ1dE𭛌O]ɦH u ̌ yFl[L+V lrW$kH}MYTR>)i(!V$ c7p ٠{̂@[ufc}z-Rv͕vC|wNsAw;kBCgIG4-Ep&o榤F(/`]Ƨ`8oa,1ȃ%4#6VЌxΗGN|#dNE+*@خ^GˊbB&~A+7Ҵ?RK+z+ | nIy Bgaǂ&u &ͱvRvڪ|1?ov5)b+ʶH9\Ԁ G Z}DC}V!Ym95vaH={|p4gM$/$8*]FG3|Ղ?5ietvfZ"vwzd8-4u}DlhTxBzZhz>Z(؍"ky9\r/,v|JUԥjLWr/N!A0C ZxK"]2;zĴDHNIYgbE :cȢƚXc[M[׳ e6WׅY 1NLѥn?$%2 nt8Lsmx' b}8Ge7qSH[T6H;gPsžVL@'ay[= y0$La*#R)<#ÀG!@YЯGD6)F(/`N2 OugQQ[lŮ+1HױT/ m0wR@Eӽ޵䱮G%^/mSmbl^8 bv'KYY< }~r(hoXVE2k~DDRch4?&ji?Ϳ+IyK AgGZqOK=8LOA7ңV&{&F'c/l]C9=>  F' Ssө5FvRM)^cG_ To2T 4g| }&ñv݆鋴0-┈r$>6Ԟgo ̎Oeߣ؀tG' wS":i%.ܬ_g醽i4qi(b-_Ng{$4gIȢ6,O0Lٚh[K{E*Im)]+§c<#Mjɣn5*؟|f.&a)PZ@1Q% HӦnnvZ? r9pblf!n`˩[堆Iչ(&yLBGcÝ60A{~(/ '7:%Xq1kRs4#+TǕ!HJƏj(z~D:JBwp 7P<{M)|ݧȗ=Zm\/} e &!S8&"/מTX'CqM 6.o*n'sYF,;4 EQfDB{CW j l`  h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle @ l`  h}@bitshuffle; see https://github.com/kiyo-masui/bitshuffle @  l`BS< < 7'}zCTHj=*n~&0jkQ ShXX(1Ja#KlNЎII~H* TFMu2i>#e 2CMV2⫯0%=,m%('9C,NZUҨ2|NHr mE kXBPM{Nhn[! br8>B 'ОK [ E0G^|y. r~:0D4L3Kt,Mco)cT_qf'Ԫtxyt2,YA- Z[˗zaU!x[mϤMAܟƥ̯,a׮X`Y?}/8B=tu*x/$bZnx;pfeg]_Փ9U3hyX҂@;و;DIhߦGGhWS~N7UiOv7j p7OͬoP}!Yjqc =r?\ی=TBKIX}@s/~"nE2dCkIۉ jmYTʓ, n~x~39qV[U.Uۺv0>~f w0;,e`Jy ח>&m/"Y̸JC "%H [*Ő}jk}/ո- |@G툑vN<"h[|ENi[jʐJl*M2 2tbd{ : ?edb8`np/9\I\>8"އ%ci=z  b)MK[xe/~v:{n,|N7|es aW»Qb udS|\Nf5A%OG:WatiYxo ]K nR6BI,,rėe7z΁rnkꇷɤshGLa[Y V{s_*]pQ<,G7Kͨl7Y}:d<%Bo{p]AS>5ꏶ(d晉}\8@zr`Fxt~ ½nޚX@dMA6Jn Χ_X#LXO^m%P|-)~jYLty6ZhTREE ? jsDAR*uBhE 973t4=/0NA+}D0>LxY c*<}"G@]>NbMD xqdI~D#[Ceƹ+RT|[$̱7u_y,J:QZ3#Bh^}w,$M2d 9i'@'=I bO ֭ 7)AYM`17Ghp?FqLiWQh4,jeA1G`1gvf;I-{!a'@M6 -,:TdC $F~/6'GpTy,ȑ٫x7UzQ(|ϊvPmI!1XM7zt}krW"9> Pׄ0؎s9o)9!jlf2d~3md˯_@}="%AMSbύ[I*+)L.oβS8($^~Zw-G¶k#jJcXKocfZXSG^-I Ԅms*+xOw2`|U(n0|q|[zڹ9*i^Ѻ]-;Ps!Q0+e#Vmd|E)Ka./f1A@e'xgpxμV@?i"T!eGoiN'\:ZAA1,l|7LÝ/gT(j$\xޥUz huǍ.l͕2YT&GX5*YȔ]rC2 U~:yÓ@ -[+O /tfJom1F%x H'NlESҕ57RHx%*~V>q\6Oz8Flc9^]]u 2rBŒ[9_#Z]jPC5qr$ac{ȍX0v)nZhSey]z%ϤKP?/͹ZR,m)X扗 ֛PDp^Uus^58];W fw"\dA,>Lq?6yOrk fH 3Y5b2Peg;b]cE-؂ZPKּL{\*+IJ:m~]:S/Pws|0qt^> = 0 UQoc vo`rS\;}8GѽBڜ v:89fΪ`*(}G%f Fe%ș$e\ nS<֋ޞՔC[#A-AK]X'c|mD+yKR 6 UvtU9¿~a,>0;`gr3UqFB&"xā1ih XR#p0J Q$DM۽9HR/Z ?3XW^O'gž<i&!ᨔx@Tt,XkI5iF v3xQ$l3hO$C9ٚ{9Ċ-Uɛf1'-HIzlq")yrV, ƗgӮ(sIS";-;w 7"cC84k0\!Ϗo.IF@ 6G_8]@@7L_g{N}8| hs αSֽ؍aua4e=Ve Swj\NEģx-!t&jgH#ɷb?=^jW=C/-D}z,L#r_0*B%Ekjz| Yb/>qF4ip/h?d2n'9Ԩ[X$]ʔ h`g|#6foju[bR5[\}{[1mǯB_xk3} @R ?\BxβaЗ s=gkxѮb:30 U(: Mx1=$Ng S kLxY c*<}"G@]>NbMD xqdI~D#[Ceƹ+RT|[$̱7u_y,J:QZ3#Bh^}w,$M2d 9i'@'=I bO ֭ 7)AYM`17Ghp?FqLiWQh4,jeA1G`1gvf;I-{!a'@M6 -,:TdC $F~/6'GpTy,ȑ٫x7UzQ(|ϊvPmI!1XM7zt}krW"9> Pׄ0؎s9o)9!jlf2d~3md˯_@}="%AMSbύ[I*+)L.oβS8($^~Zw-G¶k#jJcXKocfZXSG^-I Ԅms*+xOw2`|U(n0|q|[zڹ9*i^Ѻ]-;Ps!Q0+e#Vmd|E)Ka./f1A@e'xgpxμV@?i"T!eGoiN'\:ZAA1,l|7LÝ/gT(j$\xޥUz huǍ.l͕2YT&GX5*YȔ]rC2 U~:yÓ@ -[+O /tfJom1F%x H'NlESҕ57RHx%*~V>q\6Oz8Flc9^]]u 2rBŒ[9_#Z]jPC5qr$ac{ȍX0v)nZhSey]z%ϤKP?/͹ZR,m)X扗 ֛PDp^Uu(/`^58];W fw"\dA,>Lq?6yOrk fH 3Y5b2Peg;b]cE-؂ZPKּL{\*+IJ:m~]:S/Pws|0qt^> = 0 UQoc vo`rS\;}8GѽBڜ v:89fΪ`*(}G%f Fe%ș$e\ nS<֋ޞՔC[#A-AK]X'c|mD+yKR 6 UvtU9¿~a,>0;`gr3UqFB&"xā1ih XR#p0J Q$DM۽9HR/Z ?3XW^O'gž<i&!ᨔx@Tt,XkI5iF v3xQ$l3hO$C9ٚ{9Ċ-Uɛf1'-HIzlq")yrV, ƗgӮ(sIS";-;w 7"cC84k0\!Ϗo.IF@ 6G_8]@@7L(/`_g{N}8| hs αSֽ؍aua4e=Ve Swj\NEģx-!t&jgH#ɷb?=^jW=C/-D}z,L#r_0*B%Ekjz| Yb/>qF4ip/h?d2n'9Ԩ[X$]ʔ h`g|#6foju[bR5[\}{[1mǯB_xk3} @R ?\BxβaЗ s=gkxѮb:30 U(: Mx1=$Ng S k 1 and (err.args[1] == -11) and not ext.using_SSE2(): return if len(err.args) > 1 and (err.args[1] == -12) and not ext.using_AVX2(): return if len(err.args) > 1 and (err.args[1] == -14) and not ext.using_AVX512(): return else: raise delta_t = min(delta_ts) size_i = self.data.size * self.data.dtype.itemsize size_o = out.size * out.dtype.itemsize size = max([size_i, size_o]) speed = ext.REPEAT * size / delta_t / 1024**3 # GB/s if TIME: print("%-20s: %5.2f s/GB, %5.2f GB/s" % (self.case, 1.0 / speed, speed)) if self.check is not None: ans = self.check(self.data).view(np.uint8) self.assertTrue(np.all(ans == out.view(np.uint8))) if self.check_data is not None: ans = self.check_data.view(np.uint8) self.assertTrue(np.all(ans == out.view(np.uint8))) def test_00_copy(self): self.case = "copy" self.fun = ext.copy self.check = lambda x: x def test_01a_trans_byte_elem_scal_16(self): self.case = "byte T elem scal 16" self.data = self.data.view(np.int16) self.fun = ext.trans_byte_elem_scal self.check = trans_byte_elem def test_01b_trans_byte_elem_scal_32(self): self.case = "byte T elem scal 32" self.data = self.data.view(np.int32) self.fun = ext.trans_byte_elem_scal self.check = trans_byte_elem def test_01c_trans_byte_elem_scal_64(self): self.case = "byte T elem scal 64" self.data = self.data.view(np.int64) self.fun = ext.trans_byte_elem_scal self.check = trans_byte_elem def test_01d_trans_byte_elem_16(self): self.case = "byte T elem SSE 16" self.data = self.data.view(np.int16) self.fun = ext.trans_byte_elem_SSE self.check = trans_byte_elem def test_01e_trans_byte_elem_32(self): self.case = "byte T elem SSE 32" self.data = self.data.view(np.float32) self.fun = ext.trans_byte_elem_SSE self.check = trans_byte_elem def test_01f_trans_byte_elem_64(self): self.case = "byte T elem SSE 64" self.data = self.data.view(np.float64) self.fun = ext.trans_byte_elem_SSE self.check = trans_byte_elem def test_01g_trans_byte_elem_128(self): self.case = "byte T elem SSE 128" self.data = self.data.view(np.complex128) self.fun = ext.trans_byte_elem_SSE self.check = trans_byte_elem def test_01h_trans_byte_elem_96(self): self.case = "byte T elem SSE 96" n = self.data.size // 128 * 96 dt = np.dtype( [(str("a"), np.int32), (str("b"), np.int32), (str("c"), np.int32)] ) self.data = self.data[:n].view(dt) self.fun = ext.trans_byte_elem_SSE self.check = trans_byte_elem def test_01i_trans_byte_elem_80(self): self.case = "byte T elem SSE 80" n = self.data.size // 128 * 80 dt = np.dtype( [ (str("a"), np.int16), (str("b"), np.int16), (str("c"), np.int16), (str("d"), np.int16), (str("e"), np.int16), ] ) self.data = self.data[:n].view(dt) self.fun = ext.trans_byte_elem_SSE self.check = trans_byte_elem def test_03a_trans_bit_byte(self): self.case = "bit T byte scal 64" self.data = self.data.view(np.float64) self.fun = ext.trans_bit_byte_scal self.check = trans_bit_byte def test_03d_trans_bit_byte_SSE(self): self.case = "bit T byte SSE 64" self.data = self.data.view(np.float64) self.fun = ext.trans_bit_byte_SSE self.check = trans_bit_byte def test_03f_trans_bit_byte_AVX(self): self.case = "bit T byte AVX 64" self.data = self.data.view(np.float64) self.fun = ext.trans_bit_byte_AVX self.check = trans_bit_byte def test_03g_trans_bit_byte_AVX_32(self): self.case = "bit T byte AVX 32" self.data = self.data.view(np.float32) self.fun = ext.trans_bit_byte_AVX self.check = trans_bit_byte def test_03h_trans_bit_byte_AVX512(self): self.case = "bit T byte AVX512 64" self.data = self.data.view(np.float64) self.fun = ext.trans_bit_byte_AVX512 self.check = trans_bit_byte def test_03g_trans_bit_byte_AVX512_32(self): self.case = "bit T byte AVX512 32" self.data = self.data.view(np.float32) self.fun = ext.trans_bit_byte_AVX512 self.check = trans_bit_byte def test_04a_trans_bit_elem_AVX(self): self.case = "bit T elem AVX 64" self.data = self.data.view(np.float64) self.fun = ext.trans_bit_elem_AVX self.check = trans_bit_elem def test_04b_trans_bit_elem_AVX_128(self): self.case = "bit T elem AVX 128" self.data = self.data.view(np.complex128) self.fun = ext.trans_bit_elem_AVX self.check = trans_bit_elem def test_04c_trans_bit_elem_AVX_32(self): self.case = "bit T elem AVX 32" self.data = self.data.view(np.float32) self.fun = ext.trans_bit_elem_AVX self.check = trans_bit_elem def test_04d_trans_bit_elem_AVX_16(self): self.case = "bit T elem AVX 16" self.data = self.data.view(np.int16) self.fun = ext.trans_bit_elem_AVX self.check = trans_bit_elem def test_04e_trans_bit_elem_64(self): self.case = "bit T elem scal 64" self.data = self.data.view(np.float64) self.fun = ext.trans_bit_elem_scal self.check = trans_bit_elem def test_04f_trans_bit_elem_SSE_32(self): self.case = "bit T elem SSE 32" self.data = self.data.view(np.float32) self.fun = ext.trans_bit_elem_SSE self.check = trans_bit_elem def test_04g_trans_bit_elem_SSE_64(self): self.case = "bit T elem SSE 64" self.data = self.data.view(np.float64) self.fun = ext.trans_bit_elem_SSE self.check = trans_bit_elem def test_04h_trans_bit_elem_AVX512(self): self.case = "bit T elem AVX512 64" self.data = self.data.view(np.float64) self.fun = ext.trans_bit_elem_AVX512 self.check = trans_bit_elem def test_04i_trans_bit_elem_AVX512(self): self.case = "bit T elem AVX 128" self.data = self.data.view(np.complex128) self.fun = ext.trans_bit_elem_AVX512 self.check = trans_bit_elem def test_04j_trans_bit_elem_AVX512_32(self): self.case = "bit T elem AVX512 32" self.data = self.data.view(np.float32) self.fun = ext.trans_bit_elem_AVX512 self.check = trans_bit_elem def test_04k_trans_bit_elem_AVX512_16(self): self.case = "bit T elem AVX512 16" self.data = self.data.view(np.int16) self.fun = ext.trans_bit_elem_AVX512 self.check = trans_bit_elem def test_06a_untrans_bit_elem_16(self): self.case = "bit U elem SSE 16" pre_trans = self.data.view(np.int16) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_SSE self.check_data = pre_trans def test_06b_untrans_bit_elem_128(self): self.case = "bit U elem SSE 128" pre_trans = self.data.view(np.complex128) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_SSE self.check_data = pre_trans def test_06c_untrans_bit_elem_32(self): self.case = "bit U elem SSE 32" pre_trans = self.data.view(np.float32) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_SSE self.check_data = pre_trans def test_06d_untrans_bit_elem_32(self): self.case = "bit U elem AVX 32" pre_trans = self.data.view(np.float32) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_AVX self.check_data = pre_trans def test_06e_untrans_bit_elem_64(self): self.case = "bit U elem SSE 64" pre_trans = self.data.view(np.float64) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_SSE self.check_data = pre_trans def test_06f_untrans_bit_elem_64(self): self.case = "bit U elem AVX 64" pre_trans = self.data.view(np.float64) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_AVX self.check_data = pre_trans def test_06g_untrans_bit_elem_64(self): self.case = "bit U elem scal 64" pre_trans = self.data.view(np.float64) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_scal self.check_data = pre_trans def test_06h_untrans_bit_elem_32(self): self.case = "bit U elem AVX512 32" pre_trans = self.data.view(np.float32) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_AVX512 self.check_data = pre_trans def test_06i_untrans_bit_elem_64(self): self.case = "bit U elem AVX512 64" pre_trans = self.data.view(np.float64) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_AVX512 self.check_data = pre_trans def test_07a_trans_byte_bitrow_64(self): self.case = "byte T row scal 64" self.data = self.data.view(np.float64) self.fun = ext.trans_byte_bitrow_scal def test_07b_trans_byte_bitrow_SSE_64(self): self.case = "byte T row SSE 64" self.data = self.data.view(np.float64) self.fun = ext.trans_byte_bitrow_SSE self.check = ext.trans_byte_bitrow_scal def test_07c_trans_byte_bitrow_AVX_64(self): self.case = "byte T row AVX 64" self.data = self.data.view(np.float64) self.fun = ext.trans_byte_bitrow_AVX self.check = ext.trans_byte_bitrow_scal def test_08a_shuffle_bit_eight_scal_64(self): self.case = "bit S eight scal 64" self.data = self.data.view(np.float64) self.fun = ext.shuffle_bit_eightelem_scal def test_08b_shuffle_bit_eight_SSE_64(self): self.case = "bit S eight SSE 64" self.data = self.data.view(np.float64) self.fun = ext.shuffle_bit_eightelem_SSE self.check = ext.shuffle_bit_eightelem_scal def test_08c_shuffle_bit_eight_AVX_32(self): self.case = "bit S eight AVX 32" self.data = self.data.view(np.float32) self.fun = ext.shuffle_bit_eightelem_AVX self.check = ext.shuffle_bit_eightelem_scal def test_08d_shuffle_bit_eight_AVX_64(self): self.case = "bit S eight AVX 64" self.data = self.data.view(np.float64) self.fun = ext.shuffle_bit_eightelem_AVX self.check = ext.shuffle_bit_eightelem_scal def test_08e_shuffle_bit_eight_AVX_16(self): self.case = "bit S eight AVX 16" self.data = self.data.view(np.int16) self.fun = ext.shuffle_bit_eightelem_AVX self.check = ext.shuffle_bit_eightelem_scal def test_08f_shuffle_bit_eight_AVX_128(self): self.case = "bit S eight AVX 128" self.data = self.data.view(np.complex128) self.fun = ext.shuffle_bit_eightelem_AVX self.check = ext.shuffle_bit_eightelem_scal def test_08g_shuffle_bit_eight_AVX512_32(self): self.case = "bit S eight AVX 32" self.data = self.data.view(np.float32) self.fun = ext.shuffle_bit_eightelem_AVX512 self.check = ext.shuffle_bit_eightelem_scal def test_08h_shuffle_bit_eight_AVX512_64(self): self.case = "bit S eight AVX512 64" self.data = self.data.view(np.float64) self.fun = ext.shuffle_bit_eightelem_AVX512 self.check = ext.shuffle_bit_eightelem_scal def test_08i_shuffle_bit_eight_AVX512_16(self): self.case = "bit S eight AVX512 16" self.data = self.data.view(np.int16) self.fun = ext.shuffle_bit_eightelem_AVX512 self.check = ext.shuffle_bit_eightelem_scal def test_08i_shuffle_bit_eight_AVX512_128(self): self.case = "bit S eight AVX512 128" self.data = self.data.view(np.complex128) self.fun = ext.shuffle_bit_eightelem_AVX512 self.check = ext.shuffle_bit_eightelem_scal def test_09a_trans_bit_elem_scal_64(self): self.case = "bit T elem scal 64" self.data = self.data.view(np.float64) self.fun = ext.trans_bit_elem_scal self.check = trans_bit_elem def test_09b_trans_bit_elem_SSE_64(self): self.case = "bit T elem SSE 64" self.data = self.data.view(np.float64) self.fun = ext.trans_bit_elem_SSE self.check = trans_bit_elem def test_09c_trans_bit_elem_AVX_64(self): self.case = "bit T elem AVX 64" self.data = self.data.view(np.float64) self.fun = ext.trans_bit_elem_AVX self.check = trans_bit_elem def test_09d_untrans_bit_elem_scal_64(self): self.case = "bit U elem scal 64" pre_trans = self.data.view(np.float64) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_scal self.check_data = pre_trans def test_09e_untrans_bit_elem_SSE_64(self): self.case = "bit U elem SSE 64" pre_trans = self.data.view(np.float64) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_SSE self.check_data = pre_trans def test_09f_untrans_bit_elem_AVX_64(self): self.case = "bit U elem AVX 64" pre_trans = self.data.view(np.float64) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_AVX self.check_data = pre_trans def test_09g_untrans_bit_elem_AVX_64(self): self.case = "bit U elem AVX512 64" pre_trans = self.data.view(np.float64) self.data = trans_bit_elem(pre_trans) self.fun = ext.untrans_bit_elem_AVX512 self.check_data = pre_trans def test_10a_bitshuffle_64(self): self.case = "bitshuffle 64" self.data = self.data.view(np.float64) self.fun = lambda x: ext.bitshuffle(x, BLOCK) def test_10b_bitunshuffle_64(self): self.case = "bitunshuffle 64" pre_trans = self.data.view(np.float64) self.data = ext.bitshuffle(pre_trans, BLOCK) self.fun = lambda x: ext.bitunshuffle(x, BLOCK) self.check_data = pre_trans def test_10c_compress_64(self): self.case = "compress 64" self.data = self.data.view(np.float64) self.fun = lambda x: ext.compress_lz4(x, BLOCK) def test_10d_decompress_64(self): self.case = "decompress 64" pre_trans = self.data.view(np.float64) self.data = ext.compress_lz4(pre_trans, BLOCK) self.fun = lambda x: ext.decompress_lz4( x, pre_trans.shape, pre_trans.dtype, BLOCK ) self.check_data = pre_trans @unittest.skipUnless(__zstd__, "ZSTD support not included") def test_10c_compress_z64(self): self.case = "compress zstd 64" self.data = self.data.view(np.float64) self.fun = lambda x: ext.compress_zstd(x, BLOCK) @unittest.skipUnless(__zstd__, "ZSTD support not included") def test_10d_decompress_z64(self): self.case = "decompress zstd 64" pre_trans = self.data.view(np.float64) self.data = ext.compress_zstd(pre_trans, BLOCK) self.fun = lambda x: ext.decompress_zstd( x, pre_trans.shape, pre_trans.dtype, BLOCK ) self.check_data = pre_trans """ Commented out to prevent nose from finding them. class TestDevCases(unittest.TestCase): def deactivated_test_trans_byte_bitrow_AVX(self): d = np.arange(256, dtype=np.uint32) #d = ext.trans_bit_elem(d) t = ext.trans_byte_bitrow_AVX(d).view(np.uint8) t1 = ext.trans_byte_bitrow_SSE(d).view(np.uint8) t.shape = (32, 32) t1.shape = (32, 32) #print t[:20,:18] self.assertTrue(np.all(t == t1)) def deactivated_test_untrans_bit_elem(self): d = np.arange(32, dtype=np.uint16) #d = random.randint(0, 2**7, 256).astype(np.uint16) d1 = ext.trans_bit_elem(d) #print d t = ext.untrans_bit_elem_AVX(d1) #t1 = ext.untrans_bit_byte_scal(d1) #print np.reshape(d1.view(np.uint8), (16, 4)) #print np.reshape(t1.view(np.uint8), (2, 32)) #print np.reshape(t2.view(np.uint8), (32, 2)) #print np.reshape(t.view(np.uint8), (32, 2)) def deactivated_test_trans_bit_byte(self): d = np.arange(16, dtype=np.uint16) t = ext.trans_bit_byte_scal(d) #print t t1 = trans_bit_byte(d) #print t1 self.assertTrue(np.all(t == t1)) def deactivated_test_trans_byte_bitrow_SSE(self): d = np.arange(256, dtype = np.uint8) t = ext.trans_byte_bitrow_scal(d) #print np.reshape(t, (32, 8)) t1 = ext.trans_byte_bitrow_SSE(d) #print np.reshape(t1, (32, 8)) self.assertTrue(np.all(t == t1)) def deactivated_test_trans_byte_elem_SSE(self): d = np.empty(16, dtype=([('a', 'u4'), ('b', 'u4'), ('c', 'u4')])) d['a'] = np.arange(16) * 1 d['b'] = np.arange(16) * 2 d['c'] = np.arange(16) * 3 #print d.dtype.itemsize #print np.reshape(d.view(np.uint8), (16, 12)) t1 = ext.trans_byte_elem_SSE(d) #print np.reshape(t1.view(np.uint8), (12, 16)) t0 = trans_byte_elem(d) #print np.reshape(t0.view(np.uint8), (12, 16)) self.assertTrue(np.all(t0.view(np.uint8) == t1.view(np.uint8))) def deactivated_test_bitshuffle(self): d = np.arange(128, dtype=np.uint16) t1 = ext.bitshuffle(d) #print t1 t2 = ext.bitunshuffle(t1) #print t2 self.assertTrue(np.all(t2.view(np.uint8) == d.view(np.uint8))) """ class TestOddLengths(unittest.TestCase): def setUp(self): self.reps = 10 self.nmax = 128 * 8 # self.nmax = 4 * 8 # XXX self.fun = ext.copy self.check = lambda x: x def test_trans_bit_elem_SSE(self): self.fun = ext.trans_bit_elem_SSE self.check = trans_bit_elem def test_untrans_bit_elem_SSE(self): self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x)) self.check = lambda x: x def test_trans_bit_elem_AVX(self): self.fun = ext.trans_bit_elem_AVX self.check = trans_bit_elem def test_trans_bit_elem_AVX512(self): self.fun = ext.trans_bit_elem_AVX512 self.check = trans_bit_elem def test_untrans_bit_elem_AVX(self): self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x)) self.check = lambda x: x def test_untrans_bit_elem_AVX512(self): self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x)) self.check = lambda x: x def test_trans_bit_elem_scal(self): self.fun = ext.trans_bit_elem_scal self.check = trans_bit_elem def test_untrans_bit_elem_scal(self): self.fun = lambda x: ext.untrans_bit_elem_scal(ext.trans_bit_elem(x)) self.check = lambda x: x def test_trans_byte_elem_SSE(self): self.fun = ext.trans_byte_elem_SSE self.check = trans_byte_elem def tearDown(self): try: for dtype in TEST_DTYPES: itemsize = np.dtype(dtype).itemsize nbyte_max = self.nmax * itemsize dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8) dbuf = dbuf.view(dtype) for ii in range(self.reps): n = random.randint(0, self.nmax // 8, 1)[0] * 8 data = dbuf[:n] out = self.fun(data).view(np.uint8) ans = self.check(data).view(np.uint8) self.assertTrue(np.all(out == ans)) except RuntimeError as err: if len(err.args) > 1 and (err.args[1] == -11) and not ext.using_SSE2(): return if len(err.args) > 1 and (err.args[1] == -12) and not ext.using_AVX2(): return if len(err.args) > 1 and (err.args[1] == -14) and not ext.using_AVX512(): return else: raise class TestBitShuffleCircle(unittest.TestCase): """Ensure that final filter is circularly consistent for any data type and any length buffer.""" def test_circle(self): nmax = 100000 reps = 20 for dtype in TEST_DTYPES: itemsize = np.dtype(dtype).itemsize nbyte_max = nmax * itemsize dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8) dbuf = dbuf.view(dtype) for ii in range(reps): n = random.randint(0, nmax, 1)[0] data = dbuf[:n] shuff = ext.bitshuffle(data) out = ext.bitunshuffle(shuff) self.assertTrue(out.dtype is data.dtype) self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8))) def test_circle_with_compression(self): nmax = 100000 reps = 20 for dtype in TEST_DTYPES: itemsize = np.dtype(dtype).itemsize nbyte_max = nmax * itemsize dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8) dbuf = dbuf.view(dtype) for ii in range(reps): n = random.randint(0, nmax, 1)[0] data = dbuf[:n] shuff = ext.compress_lz4(data) out = ext.decompress_lz4(shuff, data.shape, data.dtype) self.assertTrue(out.dtype is data.dtype) self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8))) @unittest.skipUnless(__zstd__, "ZSTD support not included") def test_circle_with_zstd_compression(self): nmax = 100000 reps = 20 for dtype in TEST_DTYPES: itemsize = np.dtype(dtype).itemsize nbyte_max = nmax * itemsize dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8) dbuf = dbuf.view(dtype) for ii in range(reps): n = random.randint(0, nmax, 1)[0] data = dbuf[:n] shuff = ext.compress_zstd(data) out = ext.decompress_zstd(shuff, data.shape, data.dtype) self.assertTrue(out.dtype is data.dtype) self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8))) # Python implementations for checking results. def trans_byte_elem(arr): dtype = arr.dtype itemsize = dtype.itemsize in_buf = arr.flat[:].view(np.uint8) nelem = in_buf.size // itemsize in_buf.shape = (nelem, itemsize) out_buf = np.empty((itemsize, nelem), dtype=np.uint8) for ii in range(nelem): for jj in range(itemsize): out_buf[jj, ii] = in_buf[ii, jj] return out_buf.flat[:].view(dtype) def trans_bit_byte(arr): n = arr.size dtype = arr.dtype itemsize = dtype.itemsize bits = np.unpackbits(arr.view(np.uint8)) bits.shape = (n * itemsize, 8) # We have to reverse the order of the bits both for unpacking and packing, # since we want to call the least significant bit the first bit. bits = bits[:, ::-1] bits_shuff = (bits.T).copy() bits_shuff.shape = (n * itemsize, 8) bits_shuff = bits_shuff[:, ::-1] arr_bt = np.packbits(bits_shuff.flat[:]) return arr_bt.view(dtype) def trans_bit_elem(arr): n = arr.size dtype = arr.dtype itemsize = dtype.itemsize bits = np.unpackbits(arr.view(np.uint8)) bits.shape = (n * itemsize, 8) # We have to reverse the order of the bits both for unpacking and packing, # since we want to call the least significant bit the first bit. bits = bits[:, ::-1].copy() bits.shape = (n, itemsize * 8) bits_shuff = (bits.T).copy() bits_shuff.shape = (n * itemsize, 8) bits_shuff = bits_shuff[:, ::-1] arr_bt = np.packbits(bits_shuff.flat[:]) return arr_bt.view(dtype) if __name__ == "__main__": unittest.main() bitshuffle-0.5.1/tests/test_h5filter.py000066400000000000000000000067201434025530100201470ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function, unicode_literals import unittest import os import glob import numpy as np import h5py import pytest from h5py import h5z from bitshuffle import h5, __zstd__ os.environ["HDF5_PLUGIN_PATH"] = "" class TestFilter(unittest.TestCase): def test_filter(self): shape = (32 * 1024 + 783,) chunks = (4 * 1024 + 23,) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname, "w") h5.create_dataset( f, b"range", shape, dtype, chunks, filter_pipeline=(32008, 32000), filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY), filter_opts=None, ) f["range"][:] = data f.close() f = h5py.File(fname, "r") d = f["range"][:] self.assertTrue(np.all(d == data)) f.close() def test_with_block_size(self): shape = (128 * 1024 + 783,) chunks = (4 * 1024 + 23,) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname, "w") h5.create_dataset( f, b"range", shape, dtype, chunks, filter_pipeline=(32008, 32000), filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY), filter_opts=((680,), ()), ) f["range"][:] = data f.close() # os.system('h5dump -H -p tmp_test_filters.h5') f = h5py.File(fname, "r") d = f["range"][:] self.assertTrue(np.all(d == data)) f.close() def test_with_lz4_compression(self): shape = (128 * 1024 + 783,) chunks = (4 * 1024 + 23,) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname, "w") h5.create_dataset( f, b"range", shape, dtype, chunks, filter_pipeline=(32008,), filter_flags=(h5z.FLAG_MANDATORY,), filter_opts=((0, h5.H5_COMPRESS_LZ4),), ) f["range"][:] = data f.close() # os.system('h5dump -H -p tmp_test_filters.h5') f = h5py.File(fname, "r") d = f["range"][:] self.assertTrue(np.all(d == data)) f.close() @pytest.mark.skipif( __zstd__ is False, reason="Bitshuffle has not been built with ZSTD support.", ) def test_with_zstd_compression(self): shape = (128 * 1024 + 783,) chunks = (4 * 1024 + 23,) compression_lvl = 10 dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname, "w") h5.create_dataset( f, b"range", shape, dtype, chunks, filter_pipeline=(32008,), filter_flags=(h5z.FLAG_MANDATORY,), filter_opts=((0, h5.H5_COMPRESS_ZSTD, compression_lvl),), ) f["range"][:] = data f.close() # os.system('h5dump -H -p tmp_test_filters.h5') f = h5py.File(fname, "r") d = f["range"][:] self.assertTrue(np.all(d == data)) f.close() def tearDown(self): files = glob.glob("tmp_test_*") for f in files: os.remove(f) if __name__ == "__main__": unittest.main() bitshuffle-0.5.1/tests/test_h5plugin.py000066400000000000000000000032621434025530100201560ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function, unicode_literals import unittest import os import glob import numpy as np import h5py import pytest from subprocess import Popen, PIPE, STDOUT import bitshuffle plugin_dir = os.path.join(os.path.dirname(bitshuffle.__file__), "plugin") os.environ["HDF5_PLUGIN_PATH"] = plugin_dir H5VERSION = h5py.h5.get_libversion() if H5VERSION[0] < 1 or ( H5VERSION[0] == 1 and (H5VERSION[1] < 8 or (H5VERSION[1] == 8 and H5VERSION[2] < 11)) ): H51811P = False else: H51811P = True class TestFilterPlugins(unittest.TestCase): @pytest.mark.skipif( "CIBUILDWHEEL" in os.environ, reason="Can't build dynamic HDF5 plugin into bitshuffle wheel.", ) def test_plugins(self): if not H51811P: return shape = (32 * 1024,) chunks = (4 * 1024,) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname, "w") dset = f.create_dataset( "range", shape=shape, dtype=dtype, chunks=chunks, compression=32008 ) dset[:] = data f.close() # Make sure the filters are working outside of h5py by calling h5dump h5dump = Popen(["h5dump", fname], stdout=PIPE, stderr=STDOUT) stdout, nothing = h5dump.communicate() err = h5dump.returncode self.assertEqual(err, 0) f = h5py.File(fname, "r") d = f["range"][:] self.assertTrue(np.all(d == data)) f.close() def tearDown(self): files = glob.glob("tmp_test_*") for f in files: os.remove(f) if __name__ == "__main__": unittest.main() bitshuffle-0.5.1/tests/test_regression.py000066400000000000000000000024331434025530100206020ustar00rootroot00000000000000""" Test that data encoded with earlier versions can still be decoded correctly. """ from __future__ import absolute_import, division, print_function import pathlib import unittest import numpy as np import h5py from bitshuffle import __zstd__ from packaging import version TEST_DATA_DIR = pathlib.Path(__file__).parent / "data" OUT_FILE_TEMPLATE = "regression_%s.h5" VERSIONS = ["0.1.3", "0.4.0"] class TestAll(unittest.TestCase): def test_regression(self): for rev in VERSIONS: file_name = TEST_DATA_DIR / (OUT_FILE_TEMPLATE % rev) f = h5py.File(file_name, "r") g_orig = f["original"] g_comp = f["compressed"] for dset_name in g_comp.keys(): self.assertTrue(np.all(g_comp[dset_name][:] == g_orig[dset_name][:])) # Only run ZSTD comparison on versions >= 0.4.0 and if ZSTD support # has been built into bitshuffle if version.parse(rev) >= version.parse("0.4.0") and __zstd__: g_comp_zstd = f["compressed_zstd"] for dset_name in g_comp_zstd.keys(): self.assertTrue( np.all(g_comp_zstd[dset_name][:] == g_orig[dset_name][:]) ) if __name__ == "__main__": unittest.main() bitshuffle-0.5.1/zstd/000077500000000000000000000000001434025530100146315ustar00rootroot00000000000000