pax_global_header00006660000000000000000000000064150541414400014510gustar00rootroot0000000000000052 comment=e3d340d881b4227741a1f9dbb3b09069fd9a7896 bitarray-3.7.1/000077500000000000000000000000001505414144000133355ustar00rootroot00000000000000bitarray-3.7.1/.github/000077500000000000000000000000001505414144000146755ustar00rootroot00000000000000bitarray-3.7.1/.github/workflows/000077500000000000000000000000001505414144000167325ustar00rootroot00000000000000bitarray-3.7.1/.github/workflows/build_wheels.yml000066400000000000000000000036721505414144000221330ustar00rootroot00000000000000name: Build and upload to PyPI on: [push, pull_request] jobs: build_wheels: name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: # macos-13 is an intel runner, macos-14 is apple silicon os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest, macos-13, macos-14] steps: - uses: actions/checkout@v4 - name: Set up QEMU if: runner.os == 'Linux' && runner.arch == 'X64' uses: docker/setup-qemu-action@v3.2.0 with: platforms: all - name: Build wheels uses: pypa/cibuildwheel@v2.22.0 # to supply options, put them in 'env', like: env: # configure cibuildwheel to build native archs ('auto'), and some # emulated ones CIBW_ARCHS_LINUX: ${{ runner.arch == 'X64' && 'auto ppc64le s390x' || 'auto' }} CIBW_TEST_COMMAND: python -c "import bitarray; assert bitarray.test().wasSuccessful()" - uses: actions/upload-artifact@v4 with: name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} path: ./wheelhouse/*.whl build_sdist: name: Build source distribution runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Build sdist run: pipx run build --sdist - uses: actions/upload-artifact@v4 with: name: cibw-sdist path: dist/*.tar.gz upload_pypi: needs: [build_wheels, build_sdist] runs-on: ubuntu-latest environment: pypi permissions: id-token: write # upload to PyPI on every tag if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') steps: - uses: actions/download-artifact@v4 with: pattern: cibw-* path: dist merge-multiple: true - uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} bitarray-3.7.1/.gitignore000066400000000000000000000000771505414144000153310ustar00rootroot00000000000000*~ *.pyc *.egg-info *.so *.o MANIFEST README.html build/ dist/ bitarray-3.7.1/CHANGE_LOG000066400000000000000000001000431505414144000147040ustar00rootroot000000000000002025-08-28 3.7.1: ------------------- * fix type hinting for memoryviews, see #241 * add [bit-endianness](endianness.rst) documentation * improve testing, including debug mode test for `digit_to_int()` 2025-08-24 3.7.0: ------------------- * add `util.gen_primes()`, generate bitarrays in which active indices are prime numbers * improve `.buffer_info()` to return named tuple * add optional `mode` argument to `util.sum_indices()` to sum square of active indices * improve internal `_sysinfo()` to include `Py_DEBUG` * add [Dubner's conjecture](../examples/dubner.rst) (in memory of Harvey Dubner) * add [dynamically growing sieve](../examples/dyn_sieve.py) 2025-08-12 3.6.1: ------------------- * add development files for statistical tests in `devel/random/` * optimize `util.sum_indices()` * fix RecursionError in `util.random_k()`, see #239 * add `devel/test_sum_indices.py` 2025-07-29 3.6.0: ------------------- * add `util.random_k()`, see #237 * add `util.sum_indices()` * optimize `util.xor_indices()` * move development files from `examples/` to `devel/` 2025-07-21 3.5.2: ------------------- * optimize `util.random_p()` by also using bitwise AND in final step * fix DeprecationWarning regarding `u` type code * add [verification tests](../devel/test_random.py) for internals of `util.random_p()` 2025-07-14 3.5.1: ------------------- * optimize `util.random_p()` for `n < 100` * add [Random Bitarrays](random_p.rst) documentation * add [statistical tests for random functions](../devel/test_random.py) 2025-07-06 3.5.0: ------------------- * add `util.random_p()` * improve sparse compression testing 2025-06-23 3.4.3: ------------------- * minor updates to documentation * C-level: - simplify and speedup `extend_unicode01()` - customize `resize_lite()` - avoid unused code - use `PyTypeObject` for bitarray type object in `_util.c` to be consistent with `_bitarray.c` - add and improve comments to implementation of sparse bitarray compression - simplify `sc_count()` 2025-05-21 3.4.2: ------------------- * extend documentation of [compression of sparse bitarrays](sparse_compression.rst) * `util.sc_decode()` and `util.vl_decode()` now raise `StopIteration` instead of `ValueError` when unexpected end of stream is encountered * add debug mode tests for `read_n()`, `write_n()` and `count_from_word()` 2025-05-15 3.4.1: ------------------- * add `pyproject.toml`, see #233 * implement `bits2bytes()` in C * optimize `delslice()` when `step` is larger than about 5 * consistently name `*_span()` and `*_range()` in C for invert, set and count * organize and add tests (including debug mode tests for `zlw()`) 2025-05-06 3.4.0: ------------------- * remove `.endian()` method in favor of data descriptor `.endian` * allow bitarray initializer `bytes` or `bytearray` to set buffer directly * allow calling `.extend()` with `bytes` object (although the only valid bytes are 0x00 and 0x01) * add `util.byteswap()` * add `util.correspond_all()` * fix `.reverse()` for imported buffer * drop Python 3.5 support * add tests 2025-05-02 3.3.2: ------------------- * fix off-by-one-error in check for length of count argument in `util.canonical_decode()` * simplify `util.int2ba()` * add tests * add [masked indexing example](../examples/masked.py) * add [tricks example](../devel/tricks.py) 2025-04-04 3.3.1: ------------------- * remove `License` classifier in favor of a SPDX license expression, #231 * reorganize and cleanup many tests 2025-03-30 3.3.0: ------------------- * add optional `group` and `sep` arguments' to `.to01()`, #230 - as well as `util.ba2hex()` and `util.ba2base()` * ignore whitespace in `util.base2ba()` and `util.hex2ba()` * check for embedded nul characters when extending (and initializing) bitarray from string * improve testing * add [double precision floating point number example](../examples/double.py) 2025-03-19 3.2.0: ------------------- * add `util.xor_indices()`, #229 * add [Hamming code example](../examples/hamming.py) 2025-03-06 3.1.1: ------------------- * updated `pythoncapi_compat.h` for pypy3.11 support, see #227 * use `__builtin_parityll()` when available in `util.parity()` * add `parity_64()` to header * simplify some tests * add [LFSR example](../examples/lfsr.py) 2025-02-19 3.1.0: ------------------- * allow mask assignment to bitarrays, see #225 * add missing masked operations to pyi-file * refactor `resize()` and avoid overallocation when downsizing buffer * update `build_wheels.yml` * fix some typos * minor simplifications * rename `growth/` example to `resize/` and add tests for `resize()` * update gene example * add comments 2024-10-15 3.0.0: ------------------- * see [Bitarray 3 transition](bitarray3.rst) * remove Python 2.7 support * `.decode()` now returns iterator (equivalent to past `.iterdecode()`) * `.search()` now returns iterator (equivalent to past `.itersearch()`) * remove `.iterdecode()` and `.itersearch()` * remove `util.rindex()`, use `.index(..., right=1)` instead, deprecated since 2.9 * remove `util.make_endian()`, use `bitarray(..., endian=...)` instead, deprecated since 2.9 * remove hackish support for `bitarray()` handling unpickling, see detailed explaination in #207 - closes #206 2024-10-10 2.9.3: ------------------- * add official Python 3.13 support * update cibuildwheel to 2.21.3 * minor simplifications * fix some typos 2024-01-01 2.9.2: ------------------- * optimize initialization from strings by not constantly resizing buffer * optimize `util.hex2ba()` and `util.base2ba()` by avoiding unnecessary copies * optimize `util.base2ba()` and `util.ba2base()` for `n=16` (hexadecimal) 2023-12-23 2.9.1: ------------------- * avoid buffer being unnecessarily initialized with 0s in several functions of the `bitarray.util` module * fix `.count()` type hint in pyi-file * improve testing 2023-12-17 2.9.0: ------------------- * deprecate support for Python 2 - Python 2.7 support will be removed in bitarray version 3.0, see [roadmap](https://github.com/ilanschnell/bitarray#roadmap) * `bitarray(n)` for integer initializer `n` will always return a bitarray of length `n` with all items initialized to `0`, see #212 * allow sub-bitarray in `.count()`, #212 * add `util.ones()` * `.find()` and `.index()`: add keyword argument `right` for rightmost index * `.itersearch()`: add start and stop argument, and keyword argument `right` (for descending order - starting with rightmost match) * deprecate `util.rindex()` (will be removed in 3.0 release), use `.index(..., right=True)` instead * deprecate `util.make_endian()` (will be removed in 3.0 release), use `bitarray(..., endian=...)` instead 2023-12-09 2.8.5: ------------------- * speedup unaligned copies by always using word shifts (in combination with builtin byte swap 64 when available) when bit-endianness and machine byte-order are opposite * add `HAVE_BUILTIN_BSWAP64` to header * avoid misaligned pointers when casting to `(uint64_t *)` * add tests 2023-12-04 2.8.4: ------------------- * simplify `copy_n()` (remove special cases), see #d2d6fd53 * add [word shift example C program](../devel/shift_r8.c), and simplify `shift_r8()` * improve documentation and testing * add [roadmap](https://github.com/ilanschnell/bitarray#roadmap) 2023-11-11 2.8.3: ------------------- * ensure readonly attribute is set correctly when creating new objects, see #211 * optimize sparse bitarray compression for raw block type * improve hash functions in Bloom filter example 2023-10-03 2.8.2: ------------------- * update cibuildwheel to 2.16.1 in order to provide cp312 wheels on PyPI * improve error messages for masked assignment * simplify test collection * added `pytest.ini` to allow running pytest with no additional arguments, see #208 * `util.sc_encode()`: avoid writing empty blocks at end of compressed stream, i.e. skip encoding when total population count is reached 2023-08-06 2.8.1: ------------------- * use reconstructor function for pickling, see #207 * simplify implementation of `.reverse()` 2023-07-22 2.8.0: ------------------- * allow [integer sequence indexing](indexing.rst) by list of indices, see #204 (addresses #156 and #190) * add [masked indexing](indexing.rst) by bitarray masks, see #205 (addresses #190) * improve `.bytereverse()` docstring, see issue #202 2023-06-24 2.7.6: ------------------- * remove caching hash value, fixes issue #201 2023-06-10 2.7.5: ------------------- * fix for pypy3.9-v7.3.11, #198 (fixes #188) * enable building wheels for PyPy 2023-05-29 2.7.4: ------------------- * register `bitarray` as `abc.MutableSequence`, see #196 * cibuildwheel: update macOS version to 11 from unsupported 10.15, see https://github.com/actions/runner-images/issues/5583 * improve documentation regarding type when indexing single bitarray items, #192 2023-02-20 2.7.3: ------------------- * fix popcount64 name conflict on NetBSD, #189 * even though PyPy is not actively supported, allow running tests for PyPy 3.7 and 3.8, see also #188 * allow running `python setup.py test` * add official Python 3.12 support * simplify count functionality in util module * retire `bitcount_lookup[256]` table * improve `util.count_n()` error messages * avoid `util` module tests from being run more than once in each call to `bitarray.test()` when called multiple times in the same Python process, see #6e52e49a * improve testing 2023-02-12 2.7.2: ------------------- * speedup all count functionality by using `__builtin_popcountll` when available, see #187 * add `popcount64()` to `bitarray.h` - we assume now that `uint64_t` is always available * improve testing 2023-02-10 2.7.1: ------------------- * optimize `util.sc_encode()` 2023-02-05 2.7.0: ------------------- * add `util.sc_encode()` and `util.sc_decode()` for [compression of sparse bitarrays](sparse_compression.rst) * add `util.any_and()` * add `util.intervals()` * move functionality of the following utility functions entirely to C: `hex2ba()`, `base2ba()`, `deserialize()`, `vl_decode()`, `zeros()` * improve error checking for unpickling * add [distance metrics](../examples/distance.py) example: dice, hamming, jaccard, kulczynski1, rogerstanimoto, russellrao, sokalmichener, sokalsneath, yule * add example [sparse bitarray](../examples/sparse) implementations 2023-01-01 2.6.2: ------------------- * optimize `richcompare()` for opposite endianness * improve some docstrings add tests 2022-12-18 2.6.1: ------------------- * add documentation on shift operators, #181 * fix typo in iterable initializer description, #179 * optimize `richcompare()` 2022-07-19 2.6.0: ------------------- * add data descriptors: `.nbytes`, `.padbits`, `.readonly` * allow optional `endian` argument to be `None` when creating bitarrays * fix type annotation for `canonical_decode()`, #178 * frozenbitarray's pad bits are now guaranteed to be zero * add tests 2022-05-10 2.5.1: ------------------- * optimize `.reverse()`, see #177 * allow negative (byte) indices in `.bytereverse()` 2022-05-04 2.5.0: ------------------- * add calculating of canonical Huffman codes `util.canonical_huffman()` and decoding thereof `util.canonical_decode()`, see #173 * allow creating "Huffman codes" from frequency maps with a single symbol in `util.huffman_code()` and `util.canonical_huffman()`, see #172 * allow bytes-like argument in `.frombytes()` and `.pack()` - previously, the arguments were limited to the `bytes` object, see #174 * allow bytes-like arguments in `util.deserialize()` * add official [pyodide](https://pyodide.org/) support * add [DEFLATE decompression](../examples/puff/) example * optimize `.bytereverse()` * optimize `delslice()` for cases like `del a[1:17:2]` when `a` is large * fix `examples/huffman/compress.py` to handle files with 0 or 1 characters, see also #172 * add `skipIf` decorator for skipping tests * add tests 2022-03-29 2.4.1: ------------------- * improve `resize()`, see #167 * optimize `copy_n()` by avoiding loops, #171 * `.bytereverse()` no longer sets unused pad bits to zero 2022-03-01 2.4.0: ------------------- * enable building wheels for multiple platforms and Python versions using pypa/cibuildwheel, see #165 and #170 (thanks Brian Thorne, @hardbyte) * use setuptools instead of distutils in `setup.py`, #168 * add missing type hinting for `.count()` step argument 2022-02-21 2.3.7: ------------------- * add optional step argument to `.count()` method, see #162 and #163 * add tests 2022-02-07 2.3.6: ------------------- * add optional value parameter to `util.count_n()`, see #154 and #161 * determine machine endianness at build time when possible, by using the `PY_LITTLE_ENDIAN` macro, in order to optimize `shift_r8()` * add official Python 3.11 support 2022-01-07 2.3.5: ------------------- * Fixed bug for big-endian systems (such as IBM s390), see #159 and #160 * Pass `zip_safe=False` to `setup()`, see #151 2021-09-12 2.3.4: ------------------- * Fix `util.ba2int()` for frozenbitarrays. A bug was introduced in 2.3.0 as `.tobytes()` no longer treats pad bits for read-only buffers as zero. * add tests 2021-09-05 2.3.3: ------------------- * improve some error messages * add tests 2021-08-23 2.3.2: ------------------- * fix slice assignment for shared buffer with offset case, see #3ba05687 and #73081e98 * add tests (including internal debug mode tests for `buffers_overlap()`) 2021-08-20 2.3.1: ------------------- * fix special shared buffer copy case, see #815c2a11 * add and improve tests 2021-08-15 2.3.0: ------------------- * add optional `buffer` argument to `bitarray()` to import the buffer of another object, #141, #146, see also: [buffer protocol](buffer.rst) * update `.buffer_info()` to include: a read-only flag, an imported buffer flag, and the number of buffer exports * add optional start and stop arguments to `util.rindex()` * add [memory-mapped file](../examples/mmapped-file.py) example * ignore underscore (`_`) in string input, e.g. `bitarray('1100_0111')` * add missing type hinting for new `.bytereverse()` arguments * fix `.extend()` type annotations, #145 * avoid `.reverse()` using temporary memory * make `.unpack()`, `util.serialize()`, `util.vl_encode()` and `.__reduce__()` more memory efficient * add and improve tests 2021-08-07 2.2.5: ------------------- * speedup `find_bit()` and `find_last()` using uint64 checking, this means a speedup for `.find()`, `.index()`, `.search()` and `util.rindex()` * add optional start and stop arguments to `.bytereverse()` * add example to illustrate how [unaligned copying](../devel/copy_n.py) works internally * add documentation * add tests 2021-07-29 2.2.4: ------------------- * use shift operations to speedup all unaligned copy operations, #142 * expose functionality to Python level only in debug mode for testing * add and improve tests 2021-07-22 2.2.3: ------------------- * speedup `repeat()`, #136 * speedup shift operations, #139 * optimize slice assignment with negative step, e.g.: `a[::-1] = 1` * add tests 2021-07-16 2.2.2: ------------------- * speedup slice assignment, see #132 and #135 * speedup bitwise operations, #133 * optimize `getbit()` and `setbit()` in `bitarray.h` * fix TypeError messages when bitarray or int (0, 1) are expected (bool is a subclass of int) * add and improve tests 2021-07-06 2.2.1: ------------------- * improve documentation * speedup `vl_encode()` * `bitarray.h`: make `getbit()` always an (inline) function * add assertions in C code 2021-07-03 2.2.0: ------------------- * add `bitarray.util.vl_encode()` and `bitarray.util.vl_decode()` which uses a [variable length bitarray format](variable_length.rst), #131 2021-06-15 2.1.3: ------------------- * Fix building with MSVC / Bullseye, #129 2021-06-13 2.1.2: ------------------- * support type hinting for all Python 3 versions (that bitarray supports, 3.5 and higher currently), fixed #128 * add explicit endianness to two tests, fixes #127 2021-06-11 2.1.1: ------------------- * add type hinting (see PEP 484, 561) using stub (`.pyi`) files * add tests 2021-05-05 2.1.0: ------------------- * add `.find()` method, see #122 * `.find()`, `.index()`, `.search()` and `.itersearch()` now all except both (sub-) bitarray as well as bool items to be searched for * improve encode/decode error messages * add [lexicographical permutations example](../examples/lexico.py) * add tests 2021-04-19 2.0.1: ------------------- * update documentation * improve some error messages 2021-04-14 2.0.0: ------------------- * require more specific objects, int (0 or 1) or bool, see #119 * items are always returned as int 0 or 1, #119 * remove `.length()` method (deprecated since 1.5.1 - use `len()`) * in `.unpack()` the `one` argument now defaults to 0x01 (was 0xff) * `.tolist()` now always returns a list of integers (0 or 1) * fix frozenbitarray hash function, see #121 * fix frozenbitarray being mutable by `<<=` and `>>=` * support sequence protocol in `.extend()` (and bitarray creation) * improve OverflowError messages from `util.int2ba()` * add [hexadecimal example](../examples/hexadecimal.py) 2021-04-10 1.9.2: ------------------- * update pythoncapi_compat: Fix support with PyPy 3.7, #120 * update readme 2021-04-05 1.9.1: ------------------- * switch documentation from markdown to reStructuredText * add tests 2021-04-03 1.9.0: ------------------- * add shift operations (`<<`, `>>`, `<<=`, `>>=`), see #117 * add `bitarray.util.ba2base()` and `bitarray.util.base2ba()`, see last paragraph in [Bitarray representations](represent.rst) * documentation and tests 2021-03-31 1.8.2: ------------------- * fix crash caused by unsupported types in binary operations, #116 * speedup initializing or extending a bitarray from another with different bit-endianness * add formatting options to `bitarray.util.pprint()` * add documentation on [bitarray representations](represent.rst) * add and improve tests (all 291 tests run in less than half a second on a modern machine) 2021-03-25 1.8.1: ------------------- * moved implementation of and `hex2ba()` and `ba2hex()` to C-level * add `bitarray.util.parity()` 2021-03-21 1.8.0: ------------------- * add `bitarray.util.serialize()` and `bitarray.util.deserialize()` * allow whitespace (ignore space and `\n\r\t\v`) in input strings, e.g. `bitarray('01 11')` or `a += '10 00'` * add `bitarray.util.pprint()` * When initializing a bitarray from another with different bit-endianness, e.g. `a = bitarray('110', 'little')` and `b = bitarray(a, 'big')`, the buffer used to be simply copied, with consequence that `a == b` would result in `False`. This is fixed now, that is `a == b` will always evaluate to `True`. * add test for loading existing pickle file (created using bitarray 1.5.0) * add example showing how to [jsonize bitarrays](../examples/extend_json.py) * add tests 2021-03-12 1.7.1: ------------------- * fix issue #114, raise TypeError when incorrect index is used during assignment, e.g. `a[1.5] = 1` * raise TypeError (not IndexError) when assigning slice to incorrect type, e.g. `a[1:4] = 1.2` * improve some docstrings and tests 2021-02-27 1.7.0: ------------------- * add `bitarray.util.urandom()` * raise TypeError when trying to extend bitarrays from bytes on Python 3, i.e. `bitarray(b'011')` and `.extend(b'110')`. (Deprecated since 1.4.1) 2021-01-20 1.6.3: ------------------- * add missing .h files to sdist tarball, #113 2021-01-20 1.6.2: ------------------- * use `Py_SET_TYPE()` and `Py_SET_SIZE()` for Python 3.10, #109 * add official Python 3.10 support * fix slice assignment to same object, e.g. `a[2::] = a` or `a[::-1] = a`, #112 * add bitarray.h, #110 2020-11-05 1.6.1: ------------------- * use PyType_Ready for all types: bitarray, bitarrayiterator, decodeiterator, decodetree, searchiterator 2020-10-17 1.6.0: ------------------- * add `decodetree` object, for speeding up consecutive calls to `.decode()` and `.iterdecode()`, in particular when dealing with large prefix codes, see #103 * add optional parameter to `.tolist()` which changes the items in the returned list to integers (0 or 1), as opposed to Booleans * remove deprecated `bitdiff()`, which has been deprecated since version 1.2.0, use `bitarray.util.count_xor()` instead * drop Python 2.6 support * update license file, #104 2020-08-24 1.5.3: ------------------- * add optional index parameter to `.index()` to invert single bit * fix `sys.getsizeof(bitarray)` by adding `.__sizeof__()`, see issue #100 2020-08-16 1.5.2: ------------------- * add PyType_Ready usage, issue #66 * speedup search() for bitarrays with length 1 in sparse bitarrays, see issue #67 * add tests 2020-08-10 1.5.1: ------------------- * support signed integers in `util.ba2int()` and `util.int2ba()`, see issue #85 * deprecate `.length()` in favor of `len()` 2020-08-05 1.5.0: ------------------- * Use `Py_ssize_t` for bitarray index. This means that on 32bit systems, the maximum number of elements in a bitarray is 2 GBits. We used to have a special 64bit index type for all architectures, but this prevented us from using Python's sequence, mapping and number methods, and made those method lookups slow. * speedup slice operations when step size = 1 (if alignment allows copying whole bytes) * Require equal endianness for operations: `&`, `|`, `^`, `&=`, `|=`, `^=`. This should have always been the case but was overlooked in the past. * raise TypeError when trying to create bitarray from boolean * This will be last release to still support Python 2.6 (which was retired in 2013). We do NOT plan to stop support for Python 2.7 anytime soon. 2020-07-15 1.4.2: ------------------- * add more tests * C-level: - simplify pack/unpack code - fix memory leak in `~` operation (bitarray_cpinvert) 2020-07-14 1.4.1: ------------------- * add official Python 3.9 support * improve many docstrings * add DeprecationWarning for `bitdiff()` * add DeprecationWarning when trying to extend bitarrays from bytes on Python 3 (`bitarray(b'011')` and `.extend(b'110')`) * C-level: - Rewrote `.fromfile()` and `.tofile()` implementation, such that now the same code is used for Python 2 and 3. The new implementation is more memory efficient on Python 3. - use `memcmp()` in `richcompare()` to shortcut EQ/NE, when comparing two very large bitarrays for equality the speedup can easily be 100x - simplify how unpacking is handled * add more tests 2020-07-11 1.4.0: ------------------- * add `.clear()` method (Python 3.3 added this method to lists) * avoid over-allocation when bitarray objects are initially created * raise BufferError when resizing bitarrays which is exporting buffers * add example to study the resize() function * improve some error messages * add more tests * raise `NotImplementedError` with (useful message) when trying to call the `.fromstring()` or `.tostring()` methods, which have been removed in the last release 2020-07-06 1.3.0: ------------------- * add `bitarray.util.make_endian()` * `util.ba2hex()` and `util.hex2ba()` now also support little-endian * add `bitarray.get_default_endian()` * made first argument of initializer a positional-only parameter * remove `.fromstring()` and `.tostring()` methods, these have been deprecated 8 years ago, since version 0.4.0 * add `__all__` in `bitarray/__init__.py` * drop Python 3.3 and 3.4 support 2020-05-18 1.2.2: ------------------- * `util.ba2hex()` now always return a string object (instead of bytes object for Python 3), see issue #94 * `util.hex2ba` allows a unicode object as input on Python 2 * Determine 64-bitness of interpreter in a cross-platform fashion #91, in order to better support PyPy 2020-01-06 1.2.1: ------------------- * simplify markdown of readme so PyPI renders better * make tests for bitarray.util required (instead of warning when they cannot be imported) 2019-12-06 1.2.0: ------------------- * add bitarray.util module which provides useful utility functions * deprecate `bitarray.bitdiff()` in favor of `bitarray.util.count_xor()` * use markdown for documentation * fix bug in `.count()` on 32bit systems in special cases when array size is 2^29 bits or larger * simplified tests by using bytes syntax * update smallints and sieve example to use new utility module * simplified mandel example to use numba * use file context managers in tests 2019-11-07 1.1.0: ------------------- * add frozenbitarray object * add optional start and stop arguments to `.count()` method * add official Python 3.8 support * optimize `setrange()` (C-function) by using `memset()` * fix issue #74, bitarray is hashable on Python 2 * fix issue #68, `unittest.TestCase.assert_` deprecated * improved test suite - tests should run in about 1 second * update documentation to use positional-only syntax in docstrings * update readme to pass Python 3 doctest * add utils module to examples 2019-07-19 1.0.1: ------------------- * fix readme to pass `twine check` 2019-07-15 1.0.0: ------------------- * fix bitarrays beings created from unicode in Python 2 * use `PyBytes_*` in C code, treating the Py3k function names as default, which also removes all redefinitions of `PyString_*` * handle negative arguments of .index() method consistently with how they are treated for lists * add a few more comments to the C code * move imports outside tests: pickle, io, etc. * drop Python 2.5 support 2019-05-20 0.9.3: ------------------- * refactor resize() - only shrink allocated memory if new size falls lower than half the allocated size * improve error message when trying to initialize from float or complex 2019-04-29 0.9.2: ------------------- * fix to compile on Windows with VS 2015, issue #72 2019-04-28 0.9.1: ------------------- * fix types to actually be types, #29 * check for ambiguous prefix codes when building binary tree for decoding * remove Python level methods: encode, decode, iterdecode (in favor of having these implemented on the C-level along with check_codedict) * fix self tests for Python 2.5 and 2.6 * move all Huffman code related example code into examples/huffman * add code to generate graphviz .dot file of Huffman tree to examples 2019-04-22 0.9.0: ------------------- * more efficient decode and iterdecode by using C-level binary tree instead of a python one, #54 * added buffer protocol support for Python 3, #55 * fixed invalid pointer exceptions in pypy, #47 * made all examples Py3k compatible * add gene sequence example * add official Python 3.7 support * drop Python 2.4, 3.1 and 3.2 support 2018-07-06 0.8.3: ------------------- * add exception to setup.py when README.rst cannot be opened 2018-05-30 0.8.2: ------------------- * add official Python 3.6 support (although it was already working) * fix description of `fill()`, #52 * handle extending self correctly, #28 * `copy_n()`: fast copy with `memmove()` fixed, #43 * minor clarity/wording changes to README, #23 2013-03-30 0.8.1: ------------------- * fix issue #10, i.e. `int(bitarray())` segfault * added tests for using a bitarray object as an argument to functions like int, long (on Python 2), float, list, tuple, dict 2012-04-04 0.8.0: ------------------- * add Python 2.4 support * add (module level) function bitdiff for calculating the difference between two bitarrays 2012-02-15 0.7.0: ------------------- * add iterdecode method (C level), which returns an iterator but is otherwise like the decode method * improve memory efficiency and speed of pickling large bitarray objects 2012-02-06 0.6.0: ------------------- * add buffer protocol to bitarray objects (Python 2.7 only) * allow slice assignment to 0 or 1, e.g. `a[::3] = 0` (in addition to booleans) * moved implementation of itersearch method to C level (Lluis Pamies) * search, itersearch now only except bitarray objects, whereas `__contains__` excepts either booleans or bitarrays * use a priority queue for Huffman tree example (thanks to Ushma Bhatt) * improve documentation 2012-02-02 0.5.2: ------------------- * fixed MSVC compile error on Python 3 (thanks to Chris Gohlke) * add missing start and stop optional parameters to index() method * add examples/compress.py 2012-01-31 0.5.1: ------------------- * update documentation to use tobytes and frombytes, rather than tostring and fromstring (which are now deprecated) * simplified how tests are run 2012-01-23 0.5.0: ------------------- * added itersearch method * added Bloom filter example * minor fixes in docstrings, added more tests 2011-12-29 0.4.0: ------------------- * porting to Python 3.x (Roland Puntaier) * introduced `.tobytes()` and `.frombytes()` (`.tostring()` and `.fromstring()` are now deprecated) * updated development status * added sieve prime number example * moved project to github: https://github.com/ilanschnell/bitarray 2009-04-06 0.3.5: ------------------- * fixed reference counts bugs * added possibility to slice assign to `True` or `False`, e.g. `a[::3] = True` will set every third element to `True` 2009-01-15 0.3.4: ------------------- * Made C code less ambiguous, such that the package compiles on Visual Studio, with all tests passing. 2008-12-14 0.3.3: ------------------- * Made changes to the C code to allow compilation with more compilers. Compiles on Visual Studio, although there are still a few tests failing. 2008-10-19 0.3.2: ------------------- * Added sequential search method. * The special method `__contains__` now also takes advantage of the sequential search. 2008-10-12 0.3.1: ------------------- * Simplified state information for pickling. Argument for count is now optional, defaults to True. Fixed typos. 2008-09-30 0.3.0: ------------------- * Fixed a severe bug for 64-bit machines. Implemented all methods in C, improved tests. * Removed deprecated methods `.from01()` and `.fromlist()`. 2008-09-23 0.2.5: ------------------- * Added section in README about prefix codes. Implemented _multiply method for faster `__mul__` and `__imul__`. Fixed some typos. 2008-09-22 0.2.4: ------------------- * Implemented encode and decode method (in C) for variable-length prefix codes. * Added more examples, wrote README for the examples. * Added more tests, fixed some typos. 2008-09-16 0.2.3: ------------------- * Fixed a memory leak, implemented a number of methods in C. These include __getitem__, __setitem__, __delitem__, pop, remove, insert. The methods implemented on the Python level is very limit now. * Implemented bitwise operations. 2008-09-09 0.2.2: ------------------- * Rewrote parts of the README * Implemented memory efficient algorithm for the reverse method * Fixed typos, added a few tests, more C refactoring. 2008-09-07 0.2.1: ------------------- * Improved tests, in particular added checking for memory leaks. * Refactored many things on the C level. * Implemented a few more methods. 2008-09-02 0.2.0: ------------------- * Added bit-endianness property to the bitarray object * Added the examples to the release package. 2008-08-17 0.1.0: ------------------- * First official release; put project to http://pypi.python.org/pypi/bitarray/ May 2008: --------- Wrote the initial code, and put it on my personal web-site: http://ilan.schnell-web.net/prog/ bitarray-3.7.1/LICENSE000066400000000000000000000045541505414144000143520ustar00rootroot00000000000000PYTHON SOFTWARE FOUNDATION LICENSE ---------------------------------- 1. This LICENSE AGREEMENT is between Ilan Schnell, and the Individual or Organization ("Licensee") accessing and otherwise using this software ("bitarray") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, Ilan Schnell hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use bitarray alone or in any derivative version, provided, however, that Ilan Schnell's License Agreement and Ilan Schnell's notice of copyright, i.e., "Copyright (c) 2008 - 2025 Ilan Schnell; All Rights Reserved" are retained in bitarray alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates bitarray or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to bitarray. 4. Ilan Schnell is making bitarray available to Licensee on an "AS IS" basis. ILAN SCHNELL MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ILAN SCHNELL MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF BITARRAY WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. ILAN SCHNELL SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF BITARRAY FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING BITARRAY, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between Ilan Schnell and Licensee. This License Agreement does not grant permission to use Ilan Schnell trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using bitarray, Licensee agrees to be bound by the terms and conditions of this License Agreement. bitarray-3.7.1/Makefile000066400000000000000000000013161505414144000147760ustar00rootroot00000000000000PYTHON=python bitarray/_bitarray.so: bitarray/_bitarray.c $(PYTHON) setup.py build_ext --inplace test: bitarray/_bitarray.so $(PYTHON) setup.py test install: $(PYTHON) -m pip install -vv . doc: bitarray/_bitarray.so $(PYTHON) update_doc.py $(PYTHON) setup.py sdist twine check dist/* mypy: mypy bitarray/*.pyi mypy bitarray/test_*.py mypy examples/*.py mypy examples/huffman/*.py mypy examples/sparse/*.py clean: rm -rf build dist rm -f bitarray/*.o bitarray/*.so rm -f bitarray/*.pyc rm -f examples/*.pyc rm -rf bitarray/__pycache__ *.egg-info rm -rf examples/__pycache__ examples/*/__pycache__ rm -rf .mypy_cache bitarray/.mypy_cache rm -rf examples/.mypy_cache examples/*/.mypy_cache bitarray-3.7.1/README.rst000066400000000000000000001004271505414144000150300ustar00rootroot00000000000000bitarray: efficient arrays of booleans ====================================== This library provides an object type which efficiently represents an array of booleans. Bitarrays are sequence types and behave very much like usual lists. Eight bits are represented by one byte in a contiguous block of memory. The user can select between two representations: little-endian and big-endian. All functionality is implemented in C. Methods for accessing the machine representation are provided, including the ability to import and export buffers. This allows creating bitarrays that are mapped to other objects, including memory-mapped files. Key features ------------ * The bit-endianness can be specified for each bitarray object, see below. * Sequence methods: slicing (including slice assignment and deletion), operations ``+``, ``*``, ``+=``, ``*=``, the ``in`` operator, ``len()`` * Bitwise operations: ``~``, ``&``, ``|``, ``^``, ``<<``, ``>>`` (as well as their in-place versions ``&=``, ``|=``, ``^=``, ``<<=``, ``>>=``). * Fast methods for encoding and decoding variable bit length prefix codes. * Bitarray objects support the buffer protocol (both importing and exporting buffers). * Packing and unpacking to other binary data formats, e.g. ``numpy.ndarray``. * Pickling and unpickling of bitarray objects. * Immutable ``frozenbitarray`` objects which are hashable * Sequential search * Type hinting * Extensive test suite with about 600 unittests * Utility module ``bitarray.util``: * conversion to and from hexadecimal strings * generating random bitarrays * pretty printing * conversion to and from integers * creating Huffman codes * compression of sparse bitarrays * (de-) serialization * various count functions * other helpful functions Installation ------------ Python wheels are are available on PyPI for all mayor platforms and Python versions. Which means you can simply: .. code-block:: shell-session $ pip install bitarray Once you have installed the package, you may want to test it: .. code-block:: shell-session $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /Users/ilan/bitarray/bitarray bitarray version: 3.7.1 sys.version: 3.13.5 (main, Jun 16 2025) [Clang 18.1.8] sys.prefix: /Users/ilan/miniforge pointer size: 64 bit sizeof(size_t): 8 sizeof(bitarrayobject): 80 HAVE_BUILTIN_BSWAP64: 1 default bit-endianness: big machine byte-order: little Py_DEBUG: 0 DEBUG: 0 ......................................................................... ......................................................................... ................................................................ ---------------------------------------------------------------------- Ran 597 tests in 0.165s OK The ``test()`` function is part of the API. It will return a ``unittest.runner.TextTestResult`` object, such that one can verify that all tests ran successfully by: .. code-block:: python import bitarray assert bitarray.test().wasSuccessful() Usage ----- As mentioned above, bitarray objects behave very much like lists, so there is not too much to learn. The biggest difference from list objects (except that bitarray are obviously homogeneous) is the ability to access the machine representation of the object. When doing so, the bit-endianness is of importance; this issue is explained in detail in the section below. Here, we demonstrate the basic usage of bitarray objects: .. code-block:: python >>> from bitarray import bitarray >>> a = bitarray() # create empty bitarray >>> a.append(1) >>> a.extend([1, 0]) >>> a bitarray('110') >>> x = bitarray(2 ** 20) # bitarray of length 1048576 (initialized to 0) >>> len(x) 1048576 >>> bitarray('1001 011') # initialize from string (whitespace is ignored) bitarray('1001011') >>> lst = [1, 0, False, True, True] >>> a = bitarray(lst) # initialize from iterable >>> a bitarray('10011') >>> a[2] # indexing a single item will always return an integer 0 >>> a[2:4] # whereas indexing a slice will always return a bitarray bitarray('01') >>> a[2:3] # even when the slice length is just one bitarray('0') >>> a.count(1) 3 >>> a.remove(0) # removes first occurrence of 0 >>> a bitarray('1011') Like lists, bitarray objects support slice assignment and deletion: .. code-block:: python >>> a = bitarray(50) >>> a.setall(0) # set all elements in a to 0 >>> a[11:37:3] = 9 * bitarray('1') >>> a bitarray('00000000000100100100100100100100100100000000000000') >>> del a[12::3] >>> a bitarray('0000000000010101010101010101000000000') >>> a[-6:] = bitarray('10011') >>> a bitarray('000000000001010101010101010100010011') >>> a += bitarray('000111') >>> a[9:] bitarray('001010101010101010100010011000111') In addition, slices can be assigned to booleans, which is easier (and faster) than assigning to a bitarray in which all values are the same: .. code-block:: python >>> a = 20 * bitarray('0') >>> a[1:15:3] = True >>> a bitarray('01001001001001000000') This is easier and faster than: .. code-block:: python >>> a = 20 * bitarray('0') >>> a[1:15:3] = 5 * bitarray('1') >>> a bitarray('01001001001001000000') Note that in the latter we have to create a temporary bitarray whose length must be known or calculated. Another example of assigning slices to Booleans, is setting ranges: .. code-block:: python >>> a = bitarray(30) >>> a[:] = 0 # set all elements to 0 - equivalent to a.setall(0) >>> a[10:25] = 1 # set elements in range(10, 25) to 1 >>> a bitarray('000000000011111111111111100000') As of bitarray version 2.8, indices may also be lists of arbitrary indices (like in NumPy), or bitarrays that are treated as masks, see `Bitarray indexing `__. Bitwise operators ----------------- Bitarray objects support the bitwise operators ``~``, ``&``, ``|``, ``^``, ``<<``, ``>>`` (as well as their in-place versions ``&=``, ``|=``, ``^=``, ``<<=``, ``>>=``). The behavior is very much what one would expect: .. code-block:: python >>> a = bitarray('101110001') >>> ~a # invert bitarray('010001110') >>> b = bitarray('111001011') >>> a ^ b # bitwise XOR bitarray('010111010') >>> a &= b # inplace AND >>> a bitarray('101000001') >>> a <<= 2 # in-place left-shift by 2 >>> a bitarray('100000100') >>> b >> 1 # return b right-shifted by 1 bitarray('011100101') The C language does not specify the behavior of negative shifts and of left shifts larger or equal than the width of the promoted left operand. The exact behavior is compiler/machine specific. This Python bitarray library specifies the behavior as follows: * the length of the bitarray is never changed by any shift operation * blanks are filled by 0 * negative shifts raise ``ValueError`` * shifts larger or equal to the length of the bitarray result in bitarrays with all values 0 It is worth noting that (regardless of bit-endianness) the bitarray left shift (``<<``) always shifts towards lower indices, and the right shift (``>>``) always shifts towards higher indices. Bit-endianness -------------- For many purposes the bit-endianness is not of any relevance to the end user and can be regarded as an implementation detail of bitarray objects. However, there are use cases when the bit-endianness becomes important. These use cases involve explicitly reading and writing the bitarray buffer using ``.tobytes()``, ``.frombytes()``, ``.tofile()`` or ``.fromfile()``, importing and exporting buffers. Also, a number of utility functions in ``bitarray.util`` will return different results depending on bit-endianness, such as ``ba2hex()`` or ``ba2int``. To better understand this topic, please read `bit-endianness `__. Buffer protocol --------------- Bitarray objects support the buffer protocol. They can both export their own buffer, as well as import another object's buffer. To learn more about this topic, please read `buffer protocol `__. There is also an example that shows how to memory-map a file to a bitarray: `mmapped-file.py `__ Variable bit length prefix codes -------------------------------- The ``.encode()`` method takes a dictionary mapping symbols to bitarrays and an iterable, and extends the bitarray object with the encoded symbols found while iterating. For example: .. code-block:: python >>> d = {'H':bitarray('111'), 'e':bitarray('0'), ... 'l':bitarray('110'), 'o':bitarray('10')} ... >>> a = bitarray() >>> a.encode(d, 'Hello') >>> a bitarray('111011011010') Note that the string ``'Hello'`` is an iterable, but the symbols are not limited to characters, in fact any immutable Python object can be a symbol. Taking the same dictionary, we can apply the ``.decode()`` method which will return an iterable of the symbols: .. code-block:: python >>> list(a.decode(d)) ['H', 'e', 'l', 'l', 'o'] >>> ''.join(a.decode(d)) 'Hello' Symbols are not limited to being characters. The above dictionary ``d`` can be efficiently constructed using the function ``bitarray.util.huffman_code()``. I also wrote `Huffman coding in Python using bitarray `__ for more background information. When the codes are large, and you have many decode calls, most time will be spent creating the (same) internal decode tree objects. In this case, it will be much faster to create a ``decodetree`` object, which can be passed to bitarray's ``.decode()`` method, instead of passing the prefix code dictionary to those methods itself: .. code-block:: python >>> from bitarray import bitarray, decodetree >>> t = decodetree({'a': bitarray('0'), 'b': bitarray('1')}) >>> a = bitarray('0110') >>> list(a.decode(t)) ['a', 'b', 'b', 'a'] The sole purpose of the immutable ``decodetree`` object is to be passed to bitarray's ``.decode()`` method. Frozenbitarrays --------------- A ``frozenbitarray`` object is very similar to the bitarray object. The difference is that this a ``frozenbitarray`` is immutable, and hashable, and can therefore be used as a dictionary key: .. code-block:: python >>> from bitarray import frozenbitarray >>> key = frozenbitarray('1100011') >>> {key: 'some value'} {frozenbitarray('1100011'): 'some value'} >>> key[3] = 1 Traceback (most recent call last): ... TypeError: frozenbitarray is immutable Reference ========= bitarray version: 3.7.1 -- `change log `__ In the following, ``item`` and ``value`` are usually a single bit - an integer 0 or 1. Also, ``sub_bitarray`` refers to either a bitarray, or an ``item``. The bitarray object: -------------------- ``bitarray(initializer=0, /, endian='big', buffer=None)`` -> bitarray Return a new bitarray object whose items are bits initialized from the optional initializer, and bit-endianness. The initializer may be one of the following types: a.) ``int`` bitarray, initialized to zeros, of given length b.) ``bytes`` or ``bytearray`` to initialize buffer directly c.) ``str`` of 0s and 1s, ignoring whitespace and "_" d.) iterable of integers 0 or 1. Optional keyword arguments: ``endian``: Specifies the bit-endianness of the created bitarray object. Allowed values are ``big`` and ``little`` (the default is ``big``). The bit-endianness effects the buffer representation of the bitarray. ``buffer``: Any object which exposes a buffer. When provided, ``initializer`` cannot be present (or has to be ``None``). The imported buffer may be read-only or writable, depending on the object type. New in version 2.3: optional ``buffer`` argument New in version 3.4: allow initializer ``bytes`` or ``bytearray`` to set buffer directly bitarray methods: ----------------- ``all()`` -> bool Return ``True`` when all bits in bitarray are 1. ``a.all()`` is a faster version of ``all(a)``. ``any()`` -> bool Return ``True`` when any bit in bitarray is 1. ``a.any()`` is a faster version of ``any(a)``. ``append(item, /)`` Append ``item`` to the end of the bitarray. ``buffer_info()`` -> BufferInfo Return named tuple with following fields: 0. ``address``: memory address of buffer 1. ``nbytes``: buffer size (in bytes) 2. ``endian``: bit-endianness as a string 3. ``padbits``: number of pad bits 4. ``alloc``: allocated memory for buffer (in bytes) 5. ``readonly``: memory is read-only (bool) 6. ``imported``: buffer is imported (bool) 7. ``exports``: number of buffer exports New in version 3.7: return named tuple ``bytereverse(start=0, stop=, /)`` For each byte in byte-range(``start``, ``stop``) reverse bits in-place. The start and stop indices are given in terms of bytes (not bits). Also note that this method only changes the buffer; it does not change the bit-endianness of the bitarray object. Pad bits are left unchanged such that two consecutive calls will always leave the bitarray unchanged. New in version 2.2.5: optional start and stop arguments ``clear()`` Remove all items from bitarray. New in version 1.4 ``copy()`` -> bitarray Return copy of bitarray (with same bit-endianness). ``count(value=1, start=0, stop=, step=1, /)`` -> int Number of occurrences of ``value`` bitarray within ``[start:stop:step]``. Optional arguments ``start``, ``stop`` and ``step`` are interpreted in slice notation, meaning ``a.count(value, start, stop, step)`` equals ``a[start:stop:step].count(value)``. The ``value`` may also be a sub-bitarray. In this case non-overlapping occurrences are counted within ``[start:stop]`` (``step`` must be 1). New in version 1.1.0: optional start and stop arguments New in version 2.3.7: optional step argument New in version 2.9: add non-overlapping sub-bitarray count ``decode(code, /)`` -> iterator Given a prefix code (a dict mapping symbols to bitarrays, or ``decodetree`` object), decode content of bitarray and return an iterator over corresponding symbols. See also: `Bitarray 3 transition `__ New in version 3.0: returns iterator (equivalent to past ``.iterdecode()``) ``encode(code, iterable, /)`` Given a prefix code (a dict mapping symbols to bitarrays), iterate over the iterable object with symbols, and extend bitarray with corresponding bitarray for each symbol. ``extend(iterable, /)`` Append items from to the end of the bitarray. If ``iterable`` is a (Unicode) string, each ``0`` and ``1`` are appended as bits (ignoring whitespace and underscore). New in version 3.4: allow ``bytes`` object ``fill()`` -> int Add zeros to the end of the bitarray, such that the length will be a multiple of 8, and return the number of bits added [0..7]. ``find(sub_bitarray, start=0, stop=, /, right=False)`` -> int Return lowest (or rightmost when ``right=True``) index where sub_bitarray is found, such that sub_bitarray is contained within ``[start:stop]``. Return -1 when sub_bitarray is not found. New in version 2.1 New in version 2.9: add optional keyword argument ``right`` ``frombytes(bytes, /)`` Extend bitarray with raw bytes from a bytes-like object. Each added byte will add eight bits to the bitarray. New in version 2.5.0: allow bytes-like argument ``fromfile(f, n=-1, /)`` Extend bitarray with up to ``n`` bytes read from file object ``f`` (or any other binary stream what supports a ``.read()`` method, e.g. ``io.BytesIO``). Each read byte will add eight bits to the bitarray. When ``n`` is omitted or negative, reads and extends all data until EOF. When ``n`` is non-negative but exceeds the available data, ``EOFError`` is raised. However, the available data is still read and extended. ``index(sub_bitarray, start=0, stop=, /, right=False)`` -> int Return lowest (or rightmost when ``right=True``) index where sub_bitarray is found, such that sub_bitarray is contained within ``[start:stop]``. Raises ``ValueError`` when sub_bitarray is not present. New in version 2.9: add optional keyword argument ``right`` ``insert(index, value, /)`` Insert ``value`` into bitarray before ``index``. ``invert(index=, /)`` Invert all bits in bitarray (in-place). When the optional ``index`` is given, only invert the single bit at ``index``. New in version 1.5.3: optional index argument ``pack(bytes, /)`` Extend bitarray from a bytes-like object, where each byte corresponds to a single bit. The byte ``b'\x00'`` maps to bit 0 and all other bytes map to bit 1. This method, as well as the ``.unpack()`` method, are meant for efficient transfer of data between bitarray objects to other Python objects (for example NumPy's ndarray object) which have a different memory view. New in version 2.5.0: allow bytes-like argument ``pop(index=-1, /)`` -> item Remove and return item at ``index`` (default last). Raises ``IndexError`` if index is out of range. ``remove(value, /)`` Remove the first occurrence of ``value``. Raises ``ValueError`` if value is not present. ``reverse()`` Reverse all bits in bitarray (in-place). ``search(sub_bitarray, start=0, stop=, /, right=False)`` -> iterator Return iterator over indices where sub_bitarray is found, such that sub_bitarray is contained within ``[start:stop]``. The indices are iterated in ascending order (from lowest to highest), unless ``right=True``, which will iterate in descending order (starting with rightmost match). See also: `Bitarray 3 transition `__ New in version 2.9: optional start and stop arguments - add optional keyword argument ``right`` New in version 3.0: returns iterator (equivalent to past ``.itersearch()``) ``setall(value, /)`` Set all elements in bitarray to ``value``. Note that ``a.setall(value)`` is equivalent to ``a[:] = value``. ``sort(reverse=False)`` Sort all bits in bitarray (in-place). ``to01(group=0, sep=' ')`` -> str Return bitarray as (Unicode) string of ``0``s and ``1``s. The bits are grouped into ``group`` bits (default is no grouping). When grouped, the string ``sep`` is inserted between groups of ``group`` characters, default is a space. New in version 3.3: optional ``group`` and ``sep`` arguments ``tobytes()`` -> bytes Return the bitarray buffer (pad bits are set to zero). ``tofile(f, /)`` Write bitarray buffer to file object ``f``. ``tolist()`` -> list Return bitarray as list of integers. ``a.tolist()`` equals ``list(a)``. Note that the list object being created will require 32 or 64 times more memory (depending on the machine architecture) than the bitarray object, which may cause a memory error if the bitarray is very large. ``unpack(zero=b'\x00', one=b'\x01')`` -> bytes Return bytes that contain one byte for each bit in the bitarray, using specified mapping. bitarray data descriptors: -------------------------- Data descriptors were added in version 2.6. ``endian`` -> str bit-endianness as Unicode string New in version 3.4: replaces former ``.endian()`` method ``nbytes`` -> int buffer size in bytes ``padbits`` -> int number of pad bits ``readonly`` -> bool bool indicating whether buffer is read-only Other objects: -------------- ``frozenbitarray(initializer=0, /, endian='big', buffer=None)`` -> frozenbitarray Return a ``frozenbitarray`` object. Initialized the same way a ``bitarray`` object is initialized. A ``frozenbitarray`` is immutable and hashable, and may therefore be used as a dictionary key. New in version 1.1 ``decodetree(code, /)`` -> decodetree Given a prefix code (a dict mapping symbols to bitarrays), create a binary tree object to be passed to ``.decode()``. New in version 1.6 Functions defined in the `bitarray` module: ------------------------------------------- ``bits2bytes(n, /)`` -> int Return the number of bytes necessary to store n bits. ``get_default_endian()`` -> str Return the default bit-endianness for new bitarray objects being created. Unless ``_set_default_endian('little')`` was called, the default bit-endianness is ``big``. New in version 1.3 ``test(verbosity=1)`` -> TextTestResult Run self-test, and return ``unittest.runner.TextTestResult`` object. Functions defined in `bitarray.util` module: -------------------------------------------- This sub-module was added in version 1.2. ``any_and(a, b, /)`` -> bool Efficient implementation of ``any(a & b)``. New in version 2.7 ``ba2base(n, bitarray, /, group=0, sep=' ')`` -> str Return a string containing the base ``n`` ASCII representation of the bitarray. Allowed values for ``n`` are 2, 4, 8, 16, 32 and 64. The bitarray has to be multiple of length 1, 2, 3, 4, 5 or 6 respectively. For ``n=32`` the RFC 4648 Base32 alphabet is used, and for ``n=64`` the standard base 64 alphabet is used. When grouped, the string ``sep`` is inserted between groups of ``group`` characters, default is a space. See also: `Bitarray representations `__ New in version 1.9 New in version 3.3: optional ``group`` and ``sep`` arguments ``ba2hex(bitarray, /, group=0, sep=' ')`` -> hexstr Return a string containing the hexadecimal representation of the bitarray (which has to be multiple of 4 in length). When grouped, the string ``sep`` is inserted between groups of ``group`` characters, default is a space. New in version 3.3: optional ``group`` and ``sep`` arguments ``ba2int(bitarray, /, signed=False)`` -> int Convert the given bitarray to an integer. The bit-endianness of the bitarray is respected. ``signed`` indicates whether two's complement is used to represent the integer. ``base2ba(n, asciistr, /, endian=None)`` -> bitarray Bitarray of base ``n`` ASCII representation. Allowed values for ``n`` are 2, 4, 8, 16, 32 and 64. For ``n=32`` the RFC 4648 Base32 alphabet is used, and for ``n=64`` the standard base 64 alphabet is used. Whitespace is ignored. See also: `Bitarray representations `__ New in version 1.9 New in version 3.3: ignore whitespace ``byteswap(a, n=, /)`` Reverse every ``n`` consecutive bytes of ``a`` in-place. By default, all bytes are reversed. Note that ``n`` is not limited to 2, 4 or 8, but can be any positive integer. Also, ``a`` may be any object that exposes a writable buffer. Nothing about this function is specific to bitarray objects. New in version 3.4 ``canonical_decode(bitarray, count, symbol, /)`` -> iterator Decode bitarray using canonical Huffman decoding tables where ``count`` is a sequence containing the number of symbols of each length and ``symbol`` is a sequence of symbols in canonical order. See also: `Canonical Huffman Coding `__ New in version 2.5 ``canonical_huffman(dict, /)`` -> tuple Given a frequency map, a dictionary mapping symbols to their frequency, calculate the canonical Huffman code. Returns a tuple containing: 0. the canonical Huffman code as a dict mapping symbols to bitarrays 1. a list containing the number of symbols of each code length 2. a list of symbols in canonical order Note: the two lists may be used as input for ``canonical_decode()``. See also: `Canonical Huffman Coding `__ New in version 2.5 ``correspond_all(a, b, /)`` -> tuple Return tuple with counts of: ~a & ~b, ~a & b, a & ~b, a & b New in version 3.4 ``count_and(a, b, /)`` -> int Return ``(a & b).count()`` in a memory efficient manner, as no intermediate bitarray object gets created. ``count_n(a, n, value=1, /)`` -> int Return lowest index ``i`` for which ``a[:i].count(value) == n``. Raises ``ValueError`` when ``n`` exceeds total count (``a.count(value)``). New in version 2.3.6: optional value argument ``count_or(a, b, /)`` -> int Return ``(a | b).count()`` in a memory efficient manner, as no intermediate bitarray object gets created. ``count_xor(a, b, /)`` -> int Return ``(a ^ b).count()`` in a memory efficient manner, as no intermediate bitarray object gets created. This is also known as the Hamming distance. ``deserialize(bytes, /)`` -> bitarray Return a bitarray given a bytes-like representation such as returned by ``serialize()``. See also: `Bitarray representations `__ New in version 1.8 New in version 2.5.0: allow bytes-like argument ``gen_primes(n, /, endian=None, odd=False)`` -> bitarray Generate a bitarray of length ``n`` in which active indices are prime numbers. By default (``odd=False``), active indices correspond to prime numbers directly. When ``odd=True``, only odd prime numbers are represented in the resulting bitarray ``a``, and ``a[i]`` corresponds to ``2*i+1`` being prime or not. Apart from working with prime numbers, this function is useful for testing, as it provides a simple way to create a well-defined bitarray of any length. New in version 3.7 ``hex2ba(hexstr, /, endian=None)`` -> bitarray Bitarray of hexadecimal representation. hexstr may contain any number (including odd numbers) of hex digits (upper or lower case). Whitespace is ignored. New in version 3.3: ignore whitespace ``huffman_code(dict, /, endian=None)`` -> dict Given a frequency map, a dictionary mapping symbols to their frequency, calculate the Huffman code, i.e. a dict mapping those symbols to bitarrays (with given bit-endianness). Note that the symbols are not limited to being strings. Symbols may be any hashable object. ``int2ba(int, /, length=None, endian=None, signed=False)`` -> bitarray Convert the given integer to a bitarray (with given bit-endianness, and no leading (big-endian) / trailing (little-endian) zeros), unless the ``length`` of the bitarray is provided. An ``OverflowError`` is raised if the integer is not representable with the given number of bits. ``signed`` determines whether two's complement is used to represent the integer, and requires ``length`` to be provided. ``intervals(bitarray, /)`` -> iterator Compute all uninterrupted intervals of 1s and 0s, and return an iterator over tuples ``(value, start, stop)``. The intervals are guaranteed to be in order, and their size is always non-zero (``stop - start > 0``). New in version 2.7 ``ones(n, /, endian=None)`` -> bitarray Create a bitarray of length ``n``, with all values ``1``, and optional bit-endianness (``little`` or ``big``). New in version 2.9 ``parity(a, /)`` -> int Return parity of bitarray ``a``. ``parity(a)`` is equivalent to ``a.count() % 2`` but more efficient. New in version 1.9 ``pprint(bitarray, /, stream=None, group=8, indent=4, width=80)`` Pretty-print bitarray object to ``stream``, defaults is ``sys.stdout``. By default, bits are grouped in bytes (8 bits), and 64 bits per line. Non-bitarray objects are printed using ``pprint.pprint()``. New in version 1.8 ``random_k(n, /, k, endian=None)`` -> bitarray Return (pseudo-) random bitarray of length ``n`` with ``k`` elements set to one. Mathematically equivalent to setting (in a bitarray of length ``n``) all bits at indices ``random.sample(range(n), k)`` to one. The random bitarrays are reproducible when giving Python's ``random.seed()`` with a specific seed value. This function requires Python 3.9 or higher, as it depends on the standard library function ``random.randbytes()``. Raises ``NotImplementedError`` when Python version is too low. New in version 3.6 ``random_p(n, /, p=0.5, endian=None)`` -> bitarray Return (pseudo-) random bitarray of length ``n``, where each bit has probability ``p`` of being one (independent of any other bits). Mathematically equivalent to ``bitarray((random() < p for _ in range(n)), endian)``, but much faster for large ``n``. The random bitarrays are reproducible when giving Python's ``random.seed()`` with a specific seed value. This function requires Python 3.12 or higher, as it depends on the standard library function ``random.binomialvariate()``. Raises ``NotImplementedError`` when Python version is too low. See also: `Random Bitarrays `__ New in version 3.5 ``sc_decode(stream, /)`` -> bitarray Decompress binary stream (an integer iterator, or bytes-like object) of a sparse compressed (``sc``) bitarray, and return the decoded bitarray. This function consumes only one bitarray and leaves the remaining stream untouched. Use ``sc_encode()`` for compressing (encoding). See also: `Compression of sparse bitarrays `__ New in version 2.7 ``sc_encode(bitarray, /)`` -> bytes Compress a sparse bitarray and return its binary representation. This representation is useful for efficiently storing sparse bitarrays. Use ``sc_decode()`` for decompressing (decoding). See also: `Compression of sparse bitarrays `__ New in version 2.7 ``serialize(bitarray, /)`` -> bytes Return a serialized representation of the bitarray, which may be passed to ``deserialize()``. It efficiently represents the bitarray object (including its bit-endianness) and is guaranteed not to change in future releases. See also: `Bitarray representations `__ New in version 1.8 ``strip(bitarray, /, mode='right')`` -> bitarray Return a new bitarray with zeros stripped from left, right or both ends. Allowed values for mode are the strings: ``left``, ``right``, ``both`` ``subset(a, b, /)`` -> bool Return ``True`` if bitarray ``a`` is a subset of bitarray ``b``. ``subset(a, b)`` is equivalent to ``a | b == b`` (and equally ``a & b == a``) but more efficient as no intermediate bitarray object is created and the buffer iteration is stopped as soon as one mismatch is found. ``sum_indices(a, /, mode=1)`` -> int Return sum of indices of all active bits in bitarray ``a``. Equivalent to ``sum(i for i, v in enumerate(a) if v)``. ``mode=2`` sums square of indices. New in version 3.6 New in version 3.7: add optional mode argument ``urandom(n, /, endian=None)`` -> bitarray Return random bitarray of length ``n`` (uses ``os.urandom()``). New in version 1.7 ``vl_decode(stream, /, endian=None)`` -> bitarray Decode binary stream (an integer iterator, or bytes-like object), and return the decoded bitarray. This function consumes only one bitarray and leaves the remaining stream untouched. Use ``vl_encode()`` for encoding. See also: `Variable length bitarray format `__ New in version 2.2 ``vl_encode(bitarray, /)`` -> bytes Return variable length binary representation of bitarray. This representation is useful for efficiently storing small bitarray in a binary stream. Use ``vl_decode()`` for decoding. See also: `Variable length bitarray format `__ New in version 2.2 ``xor_indices(a, /)`` -> int Return xor reduced indices of all active bits in bitarray ``a``. This is essentially equivalent to ``reduce(operator.xor, (i for i, v in enumerate(a) if v))``. New in version 3.2 ``zeros(n, /, endian=None)`` -> bitarray Create a bitarray of length ``n``, with all values ``0``, and optional bit-endianness (``little`` or ``big``). bitarray-3.7.1/bitarray/000077500000000000000000000000001505414144000151525ustar00rootroot00000000000000bitarray-3.7.1/bitarray/__init__.py000066400000000000000000000045211505414144000172650ustar00rootroot00000000000000# Copyright (c) 2008 - 2025, Ilan Schnell; All Rights Reserved """ This package defines an object type which can efficiently represent a bitarray. Bitarrays are sequence types and behave very much like lists. Please find a description of this package at: https://github.com/ilanschnell/bitarray Author: Ilan Schnell """ from __future__ import absolute_import from collections import namedtuple from bitarray._bitarray import (bitarray, decodetree, _sysinfo, bits2bytes, _bitarray_reconstructor, get_default_endian, _set_default_endian, __version__) __all__ = ['bitarray', 'frozenbitarray', 'decodetree', 'bits2bytes'] BufferInfo = namedtuple('BufferInfo', ['address', 'nbytes', 'endian', 'padbits', 'alloc', 'readonly', 'imported', 'exports']) class frozenbitarray(bitarray): """frozenbitarray(initializer=0, /, endian='big', buffer=None) -> \ frozenbitarray Return a `frozenbitarray` object. Initialized the same way a `bitarray` object is initialized. A `frozenbitarray` is immutable and hashable, and may therefore be used as a dictionary key. """ def __init__(self, *args, **kwargs): self._freeze() def __repr__(self): return 'frozen' + bitarray.__repr__(self) def __hash__(self): "Return hash(self)." # ensure hash is independent of endianness a = bitarray(self, 'big') return hash((len(a), a.tobytes())) # Technically the code below is not necessary, as all these methods will # raise a TypeError on read-only memory. However, with a different error # message. def __delitem__(self, *args, **kwargs): "" # no docstring raise TypeError("frozenbitarray is immutable") append = bytereverse = clear = extend = encode = fill = __delitem__ frombytes = fromfile = insert = invert = pack = pop = __delitem__ remove = reverse = setall = sort = __setitem__ = __delitem__ __iadd__ = __iand__ = __imul__ = __ior__ = __ixor__ = __delitem__ __ilshift__ = __irshift__ = __delitem__ def test(verbosity=1): """test(verbosity=1) -> TextTestResult Run self-test, and return `unittest.runner.TextTestResult` object. """ from bitarray import test_bitarray return test_bitarray.run(verbosity=verbosity) bitarray-3.7.1/bitarray/__init__.pyi000066400000000000000000000130231505414144000174330ustar00rootroot00000000000000# Copyright (c) 2021 - 2025, Ilan Schnell; All Rights Reserved # # This stub, as well as util.pyi, are tested with Python 3.10 and mypy 1.11.2 from collections.abc import Iterable, Iterator, Sequence from unittest.runner import TextTestResult from typing import Any, BinaryIO, Dict, Union, overload, NamedTuple CodeDict = Dict[Any, bitarray] # Python 3.12 has abc.Buffer which should be used instead BytesLike = Union[bytes, bytearray] class BufferInfo(NamedTuple): address: int nbytes: int endian: str padbits: int alloc: int readonly: bool imported: bool exports: int class decodetree: def __init__(self, code: CodeDict) -> None: ... def complete(self) -> bool: ... def nodes(self) -> int: ... def todict(self) -> CodeDict: ... class bitarray: def __init__(self, initializer: Union[int, str, Iterable[int], None] = ..., endian: Union[str, None] = ..., buffer: Any = ...) -> None: ... def all(self) -> bool: ... def any(self) -> bool: ... def append(self, value: int) -> None: ... def buffer_info(self) -> BufferInfo: ... def bytereverse(self, start: int = ..., stop: int = ...) -> None: ... def clear(self) -> None: ... def copy(self) -> bitarray: ... def count(self, sub_bitarray: Union[bitarray, int] = ..., start: int = ..., stop: int = ..., step: int = ...) -> int: ... def encode(self, code: CodeDict, x: Iterable) -> None: ... def decode(self, code: Union[CodeDict, decodetree]) -> Iterator: ... def extend(self, x: Union[str, Iterable[int]]) -> None: ... def fill(self) -> int: ... def find(self, sub_bitarray: Union[bitarray, int], start: int = ..., stop: int = ..., right: int = ...) -> int: ... def frombytes(self, a: BytesLike) -> None: ... def fromfile(self, f: BinaryIO, n: int = ...) -> None: ... def index(self, sub_bitarray: Union[bitarray, int], start: int = ..., stop: int = ..., right: int = ...) -> int: ... def insert(self, i: int, value: int) -> None: ... def invert(self, i: int = ...) -> None: ... def search(self, sub_bitarray: Union[bitarray, int], start: int = ..., stop: int = ..., right: int = ...) -> Iterator[int]: ... def pack(self, b: BytesLike) -> None: ... def pop(self, i: int = ...) -> int: ... def remove(self, value: int) -> None: ... def reverse(self) -> None: ... def setall(self, value: int) -> None: ... def sort(self, reverse: int) -> None: ... def to01(self, group: int = ..., sep: str = ...) -> str: ... def tobytes(self) -> bytes: ... def tofile(self, f: BinaryIO) -> None: ... def tolist(self) -> list[int]: ... def unpack(self, zero: bytes = ..., one: bytes = ...) -> bytes: ... def __len__(self) -> int: ... def __iter__(self) -> Iterator[int]: ... @overload def __getitem__(self, i: int) -> int: ... @overload def __getitem__(self, s: Union[slice, bitarray, Sequence]) -> bitarray: ... @overload def __setitem__(self, i: Union[int, slice, Sequence], o: int) -> None: ... @overload def __setitem__(self, s: Union[slice, bitarray, Sequence], o: bitarray) -> None: ... def __delitem__(self, i: Union[int, slice, bitarray, Sequence]) -> None: ... def __buffer__(self, flags: int, /) -> memoryview: ... def __release_buffer__(self, buffer: memoryview, /) -> None: ... def __add__(self, other: bitarray) -> bitarray: ... def __iadd__(self, other: bitarray) -> bitarray: ... def __mul__(self, n: int) -> bitarray: ... def __imul__(self, n: int) -> bitarray: ... def __rmul__(self, n: int) -> bitarray: ... def __ge__(self, other: bitarray) -> bool: ... def __gt__(self, other: bitarray) -> bool: ... def __le__(self, other: bitarray) -> bool: ... def __lt__(self, other: bitarray) -> bool: ... def __and__(self, other: bitarray) -> bitarray: ... def __or__(self, other: bitarray) -> bitarray: ... def __xor__(self, other: bitarray) -> bitarray: ... def __iand__(self, other: bitarray) -> bitarray: ... def __ior__(self, other: bitarray) -> bitarray: ... def __ixor__(self, other: bitarray) -> bitarray: ... def __invert__(self) -> bitarray: ... def __lshift__(self, n: int) -> bitarray: ... def __rshift__(self, n: int) -> bitarray: ... def __ilshift__(self, n: int) -> bitarray: ... def __irshift__(self, n: int) -> bitarray: ... # data descriptors @property def endian(self) -> str: ... @property def nbytes(self) -> int: ... @property def padbits(self) -> int: ... @property def readonly(self) -> bool: ... class frozenbitarray(bitarray): def __hash__(self) -> int: ... __version__: str def bits2bytes(n: int) -> int: ... def get_default_endian() -> str: ... def test(verbosity: int = ...) -> TextTestResult: ... def _set_default_endian(endian: str) -> None: ... def _sysinfo(key: str) -> int: ... def _bitarray_reconstructor(cls: type, buffer: bytes, endian: str, padbits: int, readonly: int) -> bitarray: ... bitarray-3.7.1/bitarray/_bitarray.c000066400000000000000000003736731505414144000173150ustar00rootroot00000000000000/* Copyright (c) 2008 - 2025, Ilan Schnell; All Rights Reserved bitarray is published under the PSF license. This file is the C part of the bitarray package. All functionality of the bitarray object is implemented here. Author: Ilan Schnell */ #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pythoncapi_compat.h" #include "bitarray.h" /* size used when reading / writing blocks from files (in bytes) */ #define BLOCKSIZE 65536 /* default bit-endianness */ static int default_endian = ENDIAN_BIG; /* translation table - setup during module initialization */ static char reverse_trans[256]; static PyTypeObject Bitarray_Type; #define bitarray_Check(obj) PyObject_TypeCheck((obj), &Bitarray_Type) static int resize(bitarrayobject *self, Py_ssize_t nbits) { const size_t size = Py_SIZE(self); const size_t allocated = self->allocated; const size_t newsize = BYTES((size_t) nbits); size_t new_allocated; if (self->ob_exports > 0) { PyErr_SetString(PyExc_BufferError, "cannot resize bitarray that is exporting buffers"); return -1; } if (self->buffer) { PyErr_SetString(PyExc_BufferError, "cannot resize imported buffer"); return -1; } if (nbits < 0) { PyErr_Format(PyExc_OverflowError, "bitarray resize %zd", nbits); return -1; } assert(allocated >= size && size == BYTES((size_t) self->nbits)); /* ob_item == NULL implies ob_size == allocated == 0 */ assert(self->ob_item != NULL || (size == 0 && allocated == 0)); /* resize() is never called on read-only memory */ assert(self->readonly == 0); /* bypass everything when buffer size hasn't changed */ if (newsize == size) { self->nbits = nbits; return 0; } if (newsize == 0) { PyMem_Free(self->ob_item); self->ob_item = NULL; Py_SET_SIZE(self, 0); self->allocated = 0; self->nbits = 0; return 0; } if (allocated >= newsize) { /* current buffer is large enough to host the requested size */ if (newsize >= allocated / 2) { /* minor downsize, bypass reallocation */ Py_SET_SIZE(self, newsize); self->nbits = nbits; return 0; } /* major downsize, resize down to exact size */ new_allocated = newsize; } else { /* need to grow buffer */ new_allocated = newsize; /* overallocate when previous size isn't zero and when growth is moderate */ if (size != 0 && newsize / 2 <= allocated) { /* overallocate proportional to the bitarray size and add padding to make the allocated size multiple of 4 */ new_allocated += (newsize >> 4) + (newsize < 8 ? 3 : 7); new_allocated &= ~(size_t) 3; } } assert(new_allocated >= newsize); self->ob_item = PyMem_Realloc(self->ob_item, new_allocated); if (self->ob_item == NULL) { PyErr_NoMemory(); return -1; } Py_SET_SIZE(self, newsize); self->allocated = new_allocated; self->nbits = nbits; return 0; } /* create new bitarray object without initialization of buffer */ static bitarrayobject * newbitarrayobject(PyTypeObject *type, Py_ssize_t nbits, int endian) { const size_t nbytes = BYTES((size_t) nbits); bitarrayobject *obj; assert(nbits >= 0); obj = (bitarrayobject *) type->tp_alloc(type, 0); if (obj == NULL) return NULL; if (nbytes == 0) { obj->ob_item = NULL; } else { /* allocate exact size */ obj->ob_item = (char *) PyMem_Malloc(nbytes); if (obj->ob_item == NULL) { PyObject_Del(obj); PyErr_NoMemory(); return NULL; } } Py_SET_SIZE(obj, nbytes); obj->allocated = nbytes; /* no overallocation */ obj->nbits = nbits; obj->endian = endian; obj->ob_exports = 0; obj->weakreflist = NULL; obj->buffer = NULL; obj->readonly = 0; return obj; } /* return new copy of bitarray object self */ static bitarrayobject * bitarray_cp(bitarrayobject *self) { bitarrayobject *res; res = newbitarrayobject(Py_TYPE(self), self->nbits, self->endian); if (res == NULL) return NULL; memcpy(res->ob_item, self->ob_item, (size_t) Py_SIZE(self)); return res; } static void bitarray_dealloc(bitarrayobject *self) { if (self->weakreflist) PyObject_ClearWeakRefs((PyObject *) self); if (self->buffer) { PyBuffer_Release(self->buffer); PyMem_Free(self->buffer); } else if (self->ob_item) { /* only free object's buffer - imported buffers cannot be freed */ assert(self->buffer == NULL); PyMem_Free((void *) self->ob_item); } Py_TYPE(self)->tp_free((PyObject *) self); } /* return 1 when buffers overlap, 0 otherwise */ static int buffers_overlap(bitarrayobject *self, bitarrayobject *other) { if (Py_SIZE(self) == 0 || Py_SIZE(other) == 0) return 0; /* is pointer ptr in buffer of bitarray a ? */ #define PIB(a, ptr) (a->ob_item <= ptr && ptr < a->ob_item + Py_SIZE(a)) return PIB(self, other->ob_item) || PIB(other, self->ob_item); #undef PIB } /* reverse bits in first n characters of p */ static void bytereverse(char *p, Py_ssize_t n) { assert(n >= 0); while (n--) { *p = reverse_trans[(unsigned char) *p]; p++; } } /* The following two functions operate on first n bytes in buffer. Within this region, they shift all bits by k positions to right, i.e. towards higher addresses. They operate on little-endian and bit-endian bitarrays respectively. As we shift right, we need to start with the highest address and loop downwards such that lower bytes are still unaltered. See also devel/shift_r8.c */ static void shift_r8le(unsigned char *buff, Py_ssize_t n, int k) { Py_ssize_t w = 0; #if HAVE_BUILTIN_BSWAP64 || PY_LITTLE_ENDIAN /* use shift word */ w = n / 8; /* number of words used for shifting */ n %= 8; /* number of remaining bytes */ #endif while (n--) { /* shift in byte-range(8 * w, n) */ Py_ssize_t i = n + 8 * w; buff[i] <<= k; /* shift byte */ if (n || w) /* add shifted next lower byte */ buff[i] |= buff[i - 1] >> (8 - k); } assert(w == 0 || ((uintptr_t) buff) % 4 == 0); while (w--) { /* shift in word-range(0, w) */ uint64_t *p = ((uint64_t *) buff) + w; #if HAVE_BUILTIN_BSWAP64 && PY_BIG_ENDIAN *p = builtin_bswap64(*p); *p <<= k; *p = builtin_bswap64(*p); #else *p <<= k; #endif if (w) /* add shifted byte from next lower word */ buff[8 * w] |= buff[8 * w - 1] >> (8 - k); } } static void shift_r8be(unsigned char *buff, Py_ssize_t n, int k) { Py_ssize_t w = 0; #if HAVE_BUILTIN_BSWAP64 || PY_BIG_ENDIAN /* use shift word */ w = n / 8; /* number of words used for shifting */ n %= 8; /* number of remaining bytes */ #endif while (n--) { /* shift in byte-range(8 * w, n) */ Py_ssize_t i = n + 8 * w; buff[i] >>= k; /* shift byte */ if (n || w) /* add shifted next lower byte */ buff[i] |= buff[i - 1] << (8 - k); } assert(w == 0 || ((uintptr_t) buff) % 4 == 0); while (w--) { /* shift in word-range(0, w) */ uint64_t *p = ((uint64_t *) buff) + w; #if HAVE_BUILTIN_BSWAP64 && PY_LITTLE_ENDIAN *p = builtin_bswap64(*p); *p >>= k; *p = builtin_bswap64(*p); #else *p >>= k; #endif if (w) /* add shifted byte from next lower word */ buff[8 * w] |= buff[8 * w - 1] << (8 - k); } } /* shift bits in byte-range(a, b) by k bits to right */ static void shift_r8(bitarrayobject *self, Py_ssize_t a, Py_ssize_t b, int k) { unsigned char *buff = (unsigned char *) self->ob_item + a; Py_ssize_t n = b - a; /* number of bytes to be shifted */ Py_ssize_t s = 0; /* distance to next aligned pointer */ assert(0 <= k && k < 8); assert(0 <= a && a <= Py_SIZE(self)); assert(0 <= b && b <= Py_SIZE(self)); assert(self->readonly == 0); if (k == 0 || n <= 0) return; if (n >= 8) { s = to_aligned((void *) buff); buff += s; /* align pointer for casting to (uint64_t *) */ n -= s; } if (IS_LE(self)) { /* little endian */ shift_r8le(buff, n, k); if (s) { buff[0] |= buff[-1] >> (8 - k); shift_r8le(buff - s, s, k); } } else { /* big endian */ shift_r8be(buff, n, k); if (s) { buff[0] |= buff[-1] << (8 - k); shift_r8be(buff - s, s, k); } } } /* Copy n bits from other (starting at b) onto self (starting at a). Please see devel/copy_n.py for more details. Notes: - self and other may have opposite bit-endianness - other may equal self - copy a section of self onto itself - when other and self are distinct objects, their buffers may not overlap */ static void copy_n(bitarrayobject *self, Py_ssize_t a, bitarrayobject *other, Py_ssize_t b, Py_ssize_t n) { Py_ssize_t p3 = b / 8, i; int sa = a % 8, sb = -(b % 8); char t3 = 0; /* silence uninitialized warning on some compilers */ assert(0 <= n && n <= self->nbits && n <= other->nbits); assert(0 <= a && a <= self->nbits - n); assert(0 <= b && b <= other->nbits - n); assert(self == other || !buffers_overlap(self, other)); assert(self->readonly == 0); if (n == 0 || (self == other && a == b)) return; if (sa + sb < 0) { t3 = other->ob_item[p3++]; sb += 8; } if (n > sb) { const Py_ssize_t p1 = a / 8, p2 = (a + n - 1) / 8, m = BYTES(n - sb); const char *table = ones_table[IS_BE(self)]; char *cp1 = self->ob_item + p1, m1 = table[sa]; char *cp2 = self->ob_item + p2, m2 = table[(a + n) % 8]; char t1 = *cp1, t2 = *cp2; assert(p1 + m <= Py_SIZE(self) && p3 + m <= Py_SIZE(other)); memmove(cp1, other->ob_item + p3, (size_t) m); if (self->endian != other->endian) bytereverse(cp1, m); shift_r8(self, p1, p2 + 1, sa + sb); if (m1) *cp1 = (*cp1 & ~m1) | (t1 & m1); /* restore bits at p1 */ if (m2) *cp2 = (*cp2 & m2) | (t2 & ~m2); /* restore bits at p2 */ } for (i = 0; i < sb && i < n; i++) /* copy first sb bits */ setbit(self, i + a, t3 & BITMASK(other, i + b)); } /* starting at start, delete n bits from self */ static int delete_n(bitarrayobject *self, Py_ssize_t start, Py_ssize_t n) { const Py_ssize_t nbits = self->nbits; assert(0 <= start && start <= nbits); assert(0 <= n && n <= nbits - start); /* start == nbits implies n == 0 */ assert(start != nbits || n == 0); copy_n(self, start, self, start + n, nbits - start - n); return resize(self, nbits - n); } /* starting at start, insert n (uninitialized) bits into self */ static int insert_n(bitarrayobject *self, Py_ssize_t start, Py_ssize_t n) { const Py_ssize_t nbits = self->nbits; assert(0 <= start && start <= nbits); assert(n >= 0); if (resize(self, nbits + n) < 0) return -1; copy_n(self, start + n, self, start, nbits - start); return 0; } /* repeat self m times (negative m is treated as 0) */ static int repeat(bitarrayobject *self, Py_ssize_t m) { Py_ssize_t q, k = self->nbits; assert(self->readonly == 0); if (k == 0 || m == 1) /* nothing to do */ return 0; if (m <= 0) /* clear */ return resize(self, 0); assert(m > 1 && k > 0); if (k >= PY_SSIZE_T_MAX / m) { PyErr_Format(PyExc_OverflowError, "cannot repeat bitarray (of size %zd) %zd times", k, m); return -1; } q = k * m; /* number of resulting bits */ if (resize(self, q) < 0) return -1; /* k: number of bits which have been copied so far */ while (k <= q / 2) { /* double copies */ copy_n(self, k, self, 0, k); k *= 2; } assert(q / 2 < k && k <= q); copy_n(self, k, self, 0, q - k); /* copy remaining bits */ return 0; } /* the following functions xyz_span, xyz_range operate on bitarray items: - xyz_span: contiguous bits - self[a:b] (step=1) - xyz_range: self[start:stop:step] (step > 0 is required) */ /* invert bits self[a:b] in-place */ static void invert_span(bitarrayobject *self, Py_ssize_t a, Py_ssize_t b) { const Py_ssize_t n = b - a; /* number of bits to invert */ Py_ssize_t i; assert(0 <= a && a <= self->nbits); assert(0 <= b && b <= self->nbits); assert(self->readonly == 0); if (n >= 64) { const Py_ssize_t wa = (a + 63) / 64; /* word-range(wa, wb) */ const Py_ssize_t wb = b / 64; uint64_t *wbuff = WBUFF(self); invert_span(self, a, 64 * wa); for (i = wa; i < wb; i++) wbuff[i] = ~wbuff[i]; invert_span(self, 64 * wb, b); } else if (n >= 8) { const Py_ssize_t ca = BYTES(a); /* char-range(ca, cb) */ const Py_ssize_t cb = b / 8; char *buff = self->ob_item; invert_span(self, a, 8 * ca); for (i = ca; i < cb; i++) buff[i] = ~buff[i]; invert_span(self, 8 * cb, b); } else { /* (bit-) range(a, b) */ for (i = a; i < b; i++) self->ob_item[i / 8] ^= BITMASK(self, i); } } /* invert bits self[start:stop:step] in-place */ static void invert_range(bitarrayobject *self, Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step) { assert(step > 0); if (step == 1) { invert_span(self, start, stop); } else { const char *table = bitmask_table[IS_BE(self)]; char *buff = self->ob_item; Py_ssize_t i; for (i = start; i < stop; i += step) buff[i >> 3] ^= table[i & 7]; } } /* set bits self[a:b] to vi */ static void set_span(bitarrayobject *self, Py_ssize_t a, Py_ssize_t b, int vi) { assert(0 <= a && a <= self->nbits); assert(0 <= b && b <= self->nbits); assert(self->readonly == 0); if (b >= a + 8) { const Py_ssize_t ca = BYTES(a); /* char-range(ca, cb) */ const Py_ssize_t cb = b / 8; assert(a + 8 > 8 * ca && 8 * cb + 8 > b); set_span(self, a, 8 * ca, vi); memset(self->ob_item + ca, vi ? 0xff : 0x00, (size_t) (cb - ca)); set_span(self, 8 * cb, b, vi); } else { /* (bit-) range(a, b) */ while (a < b) setbit(self, a++, vi); } } /* set bits self[start:stop:step] to vi */ static void set_range(bitarrayobject *self, Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step, int vi) { assert(step > 0); if (step == 1) { set_span(self, start, stop, vi); } else { const char *table = bitmask_table[IS_BE(self)]; char *buff = self->ob_item; Py_ssize_t i; if (vi) { for (i = start; i < stop; i += step) buff[i >> 3] |= table[i & 7]; } else { for (i = start; i < stop; i += step) buff[i >> 3] &= ~table[i & 7]; } } } /* return number of 1 bits in self[a:b] */ static Py_ssize_t count_span(bitarrayobject *self, Py_ssize_t a, Py_ssize_t b) { const Py_ssize_t n = b - a; Py_ssize_t cnt = 0; assert(0 <= a && a <= self->nbits); assert(0 <= b && b <= self->nbits); if (n >= 64) { Py_ssize_t p = BYTES(a), w; /* first full byte */ p += to_aligned((void *) (self->ob_item + p)); /* align pointer */ w = (b / 8 - p) / 8; /* number of (full) words to count */ assert(8 * p - a < 64 && b - (8 * (p + 8 * w)) < 64 && w >= 0); cnt += count_span(self, a, 8 * p); cnt += popcnt_words((uint64_t *) (self->ob_item + p), w); cnt += count_span(self, 8 * (p + 8 * w), b); } else if (n >= 8) { const Py_ssize_t ca = BYTES(a); /* char-range(ca, cb) */ const Py_ssize_t cb = b / 8, m = cb - ca; assert(8 * ca - a < 8 && b - 8 * cb < 8 && 0 <= m && m < 8); cnt += count_span(self, a, 8 * ca); if (m) { /* starting at ca count in m bytes */ uint64_t tmp = 0; /* copy bytes we want to count into tmp word */ memcpy((char *) &tmp, self->ob_item + ca, (size_t) m); cnt += popcnt_64(tmp); } cnt += count_span(self, 8 * cb, b); } else { /* (bit-) range(a, b) */ while (a < b) cnt += getbit(self, a++); } return cnt; } /* return number of 1 bits in self[start:stop:step] */ static Py_ssize_t count_range(bitarrayobject *self, Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step) { assert(step > 0); if (step == 1) { return count_span(self, start, stop); } else { Py_ssize_t cnt = 0, i; for (i = start; i < stop; i += step) cnt += getbit(self, i); return cnt; } } /* return first (or rightmost in case right=1) occurrence of vi in self[a:b], -1 when not found */ static Py_ssize_t find_bit(bitarrayobject *self, int vi, Py_ssize_t a, Py_ssize_t b, int right) { const Py_ssize_t n = b - a; Py_ssize_t res, i; assert(0 <= a && a <= self->nbits); assert(0 <= b && b <= self->nbits); assert(0 <= vi && vi <= 1); if (n <= 0) return -1; /* When the search range is greater than 64 bits, we skip uint64 words. Note that we cannot check for n >= 64 here as the function could then go into an infinite recursive loop when a word is found. */ if (n > 64) { const Py_ssize_t wa = (a + 63) / 64; /* word-range(wa, wb) */ const Py_ssize_t wb = b / 64; const uint64_t *wbuff = WBUFF(self); const uint64_t w = vi ? 0 : ~0; if (right) { if ((res = find_bit(self, vi, 64 * wb, b, 1)) >= 0) return res; for (i = wb - 1; i >= wa; i--) { /* skip uint64 words */ if (w ^ wbuff[i]) return find_bit(self, vi, 64 * i, 64 * i + 64, 1); } return find_bit(self, vi, a, 64 * wa, 1); } else { if ((res = find_bit(self, vi, a, 64 * wa, 0)) >= 0) return res; for (i = wa; i < wb; i++) { /* skip uint64 words */ if (w ^ wbuff[i]) return find_bit(self, vi, 64 * i, 64 * i + 64, 0); } return find_bit(self, vi, 64 * wb, b, 0); } } /* For the same reason as above, we cannot check for n >= 8 here. */ if (n > 8) { const Py_ssize_t ca = BYTES(a); /* char-range(ca, cb) */ const Py_ssize_t cb = b / 8; const char *buff = self->ob_item; const char c = vi ? 0 : ~0; if (right) { if ((res = find_bit(self, vi, 8 * cb, b, 1)) >= 0) return res; for (i = cb - 1; i >= ca; i--) { /* skip bytes */ assert_byte_in_range(self, i); if (c ^ buff[i]) return find_bit(self, vi, 8 * i, 8 * i + 8, 1); } return find_bit(self, vi, a, 8 * ca, 1); } else { if ((res = find_bit(self, vi, a, 8 * ca, 0)) >= 0) return res; for (i = ca; i < cb; i++) { /* skip bytes */ assert_byte_in_range(self, i); if (c ^ buff[i]) return find_bit(self, vi, 8 * i, 8 * i + 8, 0); } return find_bit(self, vi, 8 * cb, b, 0); } } /* finally, search for the desired bit by stepping one-by-one */ for (i = right ? b - 1 : a; a <= i && i < b; i += right ? -1 : 1) if (getbit(self, i) == vi) return i; return -1; } /* Given sub-bitarray, return: -1: on error (after setting exception) 0, 1: value of integer sub or sub[0] if sub-bitarray has length 1 2: when sub is bitarray of length 0, 2, 3, ... */ static int value_sub(PyObject *sub) { if (PyIndex_Check(sub)) { int vi; return conv_pybit(sub, &vi) ? vi : -1; } if (bitarray_Check(sub)) { bitarrayobject *s = (bitarrayobject *) sub; return (s->nbits == 1) ? getbit(s, 0) : 2; } PyErr_Format(PyExc_TypeError, "sub_bitarray must be bitarray or int, " "not '%s'", Py_TYPE(sub)->tp_name); return -1; } /* Return first/rightmost occurrence of sub-bitarray (in self), such that sub is contained within self[start:stop], or -1 when sub is not found. */ static Py_ssize_t find_sub(bitarrayobject *self, bitarrayobject *sub, Py_ssize_t start, Py_ssize_t stop, int right) { const Py_ssize_t sbits = sub->nbits; const Py_ssize_t step = right ? -1 : 1; Py_ssize_t i, k; stop -= sbits - 1; i = right ? stop - 1 : start; while (start <= i && i < stop) { for (k = 0; k < sbits; k++) if (getbit(self, i + k) != getbit(sub, k)) goto next; return i; next: i += step; } return -1; } /* Return first/rightmost occurrence of bit or sub-bitarray (depending on type of sub) contained within self[start:stop], or -1 when not found. On Error, set exception and return -2. */ static Py_ssize_t find_obj(bitarrayobject *self, PyObject *sub, Py_ssize_t start, Py_ssize_t stop, int right) { int vi; assert(0 <= start && start <= self->nbits); assert(0 <= stop && stop <= self->nbits); if ((vi = value_sub(sub)) < 0) return -2; if (vi < 2) return find_bit(self, vi, start, stop, right); assert(bitarray_Check(sub) && vi == 2); return find_sub(self, (bitarrayobject *) sub, start, stop, right); } /* return the number of non-overlapping occurrences of sub-bitarray within self[start:stop] */ static Py_ssize_t count_sub(bitarrayobject *self, bitarrayobject *sub, Py_ssize_t start, Py_ssize_t stop) { const Py_ssize_t sbits = sub->nbits; Py_ssize_t pos, cnt = 0; assert(0 <= start && start <= self->nbits); assert(0 <= stop && stop <= self->nbits); if (sbits == 0) return start <= stop ? stop - start + 1 : 0; while ((pos = find_sub(self, sub, start, stop, 0)) >= 0) { start = pos + sbits; cnt++; } return cnt; } /* set item i in self to given value */ static int set_item(bitarrayobject *self, Py_ssize_t i, PyObject *value) { int vi; if (!conv_pybit(value, &vi)) return -1; setbit(self, i, vi); return 0; } static int extend_bitarray(bitarrayobject *self, bitarrayobject *other) { /* We have to store the sizes before we resize, and since other may be self, we also need to store other->nbits. */ const Py_ssize_t self_nbits = self->nbits; const Py_ssize_t other_nbits = other->nbits; if (resize(self, self_nbits + other_nbits) < 0) return -1; copy_n(self, self_nbits, other, 0, other_nbits); return 0; } static int extend_iter(bitarrayobject *self, PyObject *iter) { const Py_ssize_t nbits = self->nbits; PyObject *item; assert(PyIter_Check(iter)); while ((item = PyIter_Next(iter))) { if (resize(self, self->nbits + 1) < 0 || set_item(self, self->nbits - 1, item) < 0) { Py_DECREF(item); /* ignore resize() return value as we fail anyhow */ resize(self, nbits); return -1; } Py_DECREF(item); } if (PyErr_Occurred()) return -1; return 0; } static int extend_sequence(bitarrayobject *self, PyObject *sequence) { const Py_ssize_t nbits = self->nbits; Py_ssize_t n, i; if ((n = PySequence_Size(sequence)) < 0) return -1; if (resize(self, nbits + n) < 0) return -1; for (i = 0; i < n; i++) { PyObject *item = PySequence_GetItem(sequence, i); if (item == NULL || set_item(self, nbits + i, item) < 0) { Py_XDECREF(item); resize(self, nbits); return -1; } Py_DECREF(item); } return 0; } static int extend_unicode01(bitarrayobject *self, PyObject *unicode) { const Py_ssize_t nbits = self->nbits; const Py_ssize_t length = PyUnicode_GET_LENGTH(unicode); Py_ssize_t i = nbits, j; /* i is the current index in self */ if (resize(self, nbits + length) < 0) return -1; for (j = 0; j < length; j++) { Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, j); switch (ch) { case '0': case '1': setbit(self, i++, ch - '0'); continue; case '_': continue; } if (Py_UNICODE_ISSPACE(ch)) continue; PyErr_Format(PyExc_ValueError, "expected '0' or '1' (or whitespace " "or underscore), got '%c' (0x%02x)", ch, ch); resize(self, nbits); /* no bits added on error */ return -1; } return resize(self, i); /* in case we ignored characters */ } static int extend_dispatch(bitarrayobject *self, PyObject *obj) { PyObject *iter; /* dispatch on type */ if (bitarray_Check(obj)) /* bitarray */ return extend_bitarray(self, (bitarrayobject *) obj); if (PyUnicode_Check(obj)) /* Unicode string */ return extend_unicode01(self, obj); if (PySequence_Check(obj)) /* sequence */ return extend_sequence(self, obj); if (PyIter_Check(obj)) /* iter */ return extend_iter(self, obj); /* finally, try to get the iterator of the object */ if ((iter = PyObject_GetIter(obj))) { int res = extend_iter(self, iter); Py_DECREF(iter); return res; } PyErr_Format(PyExc_TypeError, "'%s' object is not iterable", Py_TYPE(obj)->tp_name); return -1; } /************************************************************************** Implementation of bitarray methods **************************************************************************/ /* All methods which modify the buffer need to raise an exception when the buffer is read-only. This is necessary because the buffer may be imported from another object which has a read-only buffer. We decided to do this check at the top level here, by adding the RAISE_IF_READONLY macro to all methods which modify the buffer. We could have done it at the low level (in setbit(), etc.), however as many of these functions have no return value we decided to do it here. The situation is different from how resize() raises an exception when called on an imported buffer. There, it is easy to raise the exception in resize() itself, as there only one function which resizes the buffer, and this function (resize()) needs to report failures anyway. */ /* raise when buffer is readonly */ #define RAISE_IF_READONLY(self, ret_value) \ if (((bitarrayobject *) self)->readonly) { \ PyErr_SetString(PyExc_TypeError, "cannot modify read-only memory"); \ return ret_value; \ } static PyObject * bitarray_all(bitarrayobject *self) { return PyBool_FromLong(find_bit(self, 0, 0, self->nbits, 0) == -1); } PyDoc_STRVAR(all_doc, "all() -> bool\n\ \n\ Return `True` when all bits in bitarray are 1.\n\ `a.all()` is a faster version of `all(a)`."); static PyObject * bitarray_any(bitarrayobject *self) { return PyBool_FromLong(find_bit(self, 1, 0, self->nbits, 0) >= 0); } PyDoc_STRVAR(any_doc, "any() -> bool\n\ \n\ Return `True` when any bit in bitarray is 1.\n\ `a.any()` is a faster version of `any(a)`."); static PyObject * bitarray_append(bitarrayobject *self, PyObject *value) { int vi; RAISE_IF_READONLY(self, NULL); if (!conv_pybit(value, &vi)) return NULL; if (resize(self, self->nbits + 1) < 0) return NULL; setbit(self, self->nbits - 1, vi); Py_RETURN_NONE; } PyDoc_STRVAR(append_doc, "append(item, /)\n\ \n\ Append `item` to the end of the bitarray."); static PyObject * bitarray_bytereverse(bitarrayobject *self, PyObject *args) { const Py_ssize_t nbytes = Py_SIZE(self); Py_ssize_t start = 0, stop = nbytes; RAISE_IF_READONLY(self, NULL); if (!PyArg_ParseTuple(args, "|nn:bytereverse", &start, &stop)) return NULL; if (start < 0) start += nbytes; if (stop < 0) stop += nbytes; if (start < 0 || start > nbytes || stop < 0 || stop > nbytes) { PyErr_SetString(PyExc_IndexError, "byte index out of range"); return NULL; } if (stop > start) bytereverse(self->ob_item + start, stop - start); Py_RETURN_NONE; } PyDoc_STRVAR(bytereverse_doc, "bytereverse(start=0, stop=, /)\n\ \n\ For each byte in byte-range(`start`, `stop`) reverse bits in-place.\n\ The start and stop indices are given in terms of bytes (not bits).\n\ Also note that this method only changes the buffer; it does not change the\n\ bit-endianness of the bitarray object. Pad bits are left unchanged such\n\ that two consecutive calls will always leave the bitarray unchanged."); static PyObject * bitarray_buffer_info(bitarrayobject *self) { static PyObject *info = NULL; /* BufferInfo object */ PyObject *res, *args, *address, *readonly, *imported; if (info == NULL) { info = bitarray_module_attr("BufferInfo"); if (info == NULL) return NULL; } address = PyLong_FromVoidPtr((void *) self->ob_item); readonly = PyBool_FromLong(self->readonly); imported = PyBool_FromLong(self->buffer ? 1 : 0); if (address == NULL || readonly == NULL || imported == NULL) return NULL; args = Py_BuildValue("OnsnnOOi", address, Py_SIZE(self), ENDIAN_STR(self->endian), PADBITS(self), self->allocated, readonly, imported, self->ob_exports); Py_DECREF(address); Py_DECREF(readonly); Py_DECREF(imported); res = PyObject_CallObject(info, args); Py_DECREF(args); return res; } PyDoc_STRVAR(buffer_info_doc, "buffer_info() -> BufferInfo\n\ \n\ Return named tuple with following fields:\n\ \n\ 0. `address`: memory address of buffer\n\ 1. `nbytes`: buffer size (in bytes)\n\ 2. `endian`: bit-endianness as a string\n\ 3. `padbits`: number of pad bits\n\ 4. `alloc`: allocated memory for buffer (in bytes)\n\ 5. `readonly`: memory is read-only (bool)\n\ 6. `imported`: buffer is imported (bool)\n\ 7. `exports`: number of buffer exports"); static PyObject * bitarray_clear(bitarrayobject *self) { RAISE_IF_READONLY(self, NULL); if (resize(self, 0) < 0) return NULL; Py_RETURN_NONE; } PyDoc_STRVAR(clear_doc, "clear()\n\ \n\ Remove all items from bitarray."); /* Set readonly member to 1 if self is an instance of frozenbitarray. Return PyObject of self. On error, set exception and return NULL. */ static PyObject * freeze_if_frozen(bitarrayobject *self) { static PyObject *frozen = NULL; /* frozenbitarray class object */ int is_frozen; assert(self->ob_exports == 0 && self->buffer == NULL); if (frozen == NULL) { frozen = bitarray_module_attr("frozenbitarray"); if (frozen == NULL) return NULL; } if ((is_frozen = PyObject_IsInstance((PyObject *) self, frozen)) < 0) return NULL; if (is_frozen) { set_padbits(self); self->readonly = 1; } return (PyObject *) self; } static PyObject * bitarray_copy(bitarrayobject *self) { bitarrayobject *res; if ((res = bitarray_cp(self)) == NULL) return NULL; return freeze_if_frozen(res); } PyDoc_STRVAR(copy_doc, "copy() -> bitarray\n\ \n\ Return copy of bitarray (with same bit-endianness)."); static PyObject * bitarray_count(bitarrayobject *self, PyObject *args) { PyObject *sub = Py_None; Py_ssize_t start = 0, stop = PY_SSIZE_T_MAX, step = 1, slicelength, cnt; int vi; if (!PyArg_ParseTuple(args, "|Onnn:count", &sub , &start, &stop, &step)) return NULL; vi = (sub == Py_None) ? 1 : value_sub(sub); if (vi < 0) return NULL; if (step == 0) { PyErr_SetString(PyExc_ValueError, "step cannot be zero"); return NULL; } if (step > 0 && start > self->nbits) return PyLong_FromSsize_t(0); slicelength = PySlice_AdjustIndices(self->nbits, &start, &stop, step); if (vi < 2) { /* value count */ adjust_step_positive(slicelength, &start, &stop, &step); cnt = count_range(self, start, stop, step); return PyLong_FromSsize_t(vi ? cnt : slicelength - cnt); } assert(bitarray_Check(sub) && vi == 2); /* sub-bitarray count */ if (step != 1) { PyErr_SetString(PyExc_ValueError, "step must be 1 for sub-bitarray count"); return NULL; } cnt = count_sub(self, (bitarrayobject *) sub, start, stop); return PyLong_FromSsize_t(cnt); } PyDoc_STRVAR(count_doc, "count(value=1, start=0, stop=, step=1, /) -> int\n\ \n\ Number of occurrences of `value` bitarray within `[start:stop:step]`.\n\ Optional arguments `start`, `stop` and `step` are interpreted in\n\ slice notation, meaning `a.count(value, start, stop, step)` equals\n\ `a[start:stop:step].count(value)`.\n\ The `value` may also be a sub-bitarray. In this case non-overlapping\n\ occurrences are counted within `[start:stop]` (`step` must be 1)."); static PyObject * bitarray_extend(bitarrayobject *self, PyObject *obj) { RAISE_IF_READONLY(self, NULL); if (extend_dispatch(self, obj) < 0) return NULL; Py_RETURN_NONE; } PyDoc_STRVAR(extend_doc, "extend(iterable, /)\n\ \n\ Append items from to the end of the bitarray.\n\ If `iterable` is a (Unicode) string, each `0` and `1` are appended as\n\ bits (ignoring whitespace and underscore)."); static PyObject * bitarray_fill(bitarrayobject *self) { const Py_ssize_t p = PADBITS(self); /* number of pad bits */ RAISE_IF_READONLY(self, NULL); set_padbits(self); /* there is no reason to call resize() - .fill() will not raise BufferError when buffer is imported or exported */ self->nbits += p; return PyLong_FromSsize_t(p); } PyDoc_STRVAR(fill_doc, "fill() -> int\n\ \n\ Add zeros to the end of the bitarray, such that the length will be\n\ a multiple of 8, and return the number of bits added [0..7]."); static PyObject * bitarray_find(bitarrayobject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"", "", "", "right", NULL}; Py_ssize_t start = 0, stop = PY_SSIZE_T_MAX, pos; int right = 0; PyObject *sub; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|nni", kwlist, &sub, &start, &stop, &right)) return NULL; if (start > self->nbits) /* cannot find anything (including empty sub-bitarray) */ return PyLong_FromSsize_t(-1); PySlice_AdjustIndices(self->nbits, &start, &stop, 1); pos = find_obj(self, sub, start, stop, right); if (pos == -2) return NULL; return PyLong_FromSsize_t(pos); } PyDoc_STRVAR(find_doc, "find(sub_bitarray, start=0, stop=, /, right=False) -> int\n\ \n\ Return lowest (or rightmost when `right=True`) index where sub_bitarray\n\ is found, such that sub_bitarray is contained within `[start:stop]`.\n\ Return -1 when sub_bitarray is not found."); static PyObject * bitarray_index(bitarrayobject *self, PyObject *args, PyObject *kwds) { PyObject *result; result = bitarray_find(self, args, kwds); if (result == NULL) return NULL; assert(PyLong_Check(result)); if (PyLong_AsSsize_t(result) < 0) { Py_DECREF(result); return PyErr_Format(PyExc_ValueError, "%A not in bitarray", PyTuple_GET_ITEM(args, 0)); } return result; } PyDoc_STRVAR(index_doc, "index(sub_bitarray, start=0, stop=, /, right=False) -> int\n\ \n\ Return lowest (or rightmost when `right=True`) index where sub_bitarray\n\ is found, such that sub_bitarray is contained within `[start:stop]`.\n\ Raises `ValueError` when sub_bitarray is not present."); static PyObject * bitarray_insert(bitarrayobject *self, PyObject *args) { Py_ssize_t n = self->nbits, i; int vi; RAISE_IF_READONLY(self, NULL); if (!PyArg_ParseTuple(args, "nO&:insert", &i, conv_pybit, &vi)) return NULL; if (i < 0) { i += n; if (i < 0) i = 0; } if (i > n) i = n; if (insert_n(self, i, 1) < 0) return NULL; setbit(self, i, vi); Py_RETURN_NONE; } PyDoc_STRVAR(insert_doc, "insert(index, value, /)\n\ \n\ Insert `value` into bitarray before `index`."); static PyObject * bitarray_invert(bitarrayobject *self, PyObject *args) { PyObject *arg = Py_None; RAISE_IF_READONLY(self, NULL); if (!PyArg_ParseTuple(args, "|O:invert", &arg)) return NULL; if (PyIndex_Check(arg)) { Py_ssize_t i; i = PyNumber_AsSsize_t(arg, NULL); if (i == -1 && PyErr_Occurred()) return NULL; if (i < 0) i += self->nbits; if (i < 0 || i >= self->nbits) { PyErr_SetString(PyExc_IndexError, "index out of range"); return NULL; } self->ob_item[i / 8] ^= BITMASK(self, i); Py_RETURN_NONE; } if (PySlice_Check(arg)) { Py_ssize_t start, stop, step, slicelength; if (PySlice_GetIndicesEx(arg, self->nbits, &start, &stop, &step, &slicelength) < 0) return NULL; adjust_step_positive(slicelength, &start, &stop, &step); invert_range(self, start, stop, step); Py_RETURN_NONE; } if (arg == Py_None) { invert_span(self, 0, self->nbits); Py_RETURN_NONE; } return PyErr_Format(PyExc_TypeError, "index expect, not '%s' object", Py_TYPE(arg)->tp_name); } PyDoc_STRVAR(invert_doc, "invert(index=, /)\n\ \n\ Invert all bits in bitarray (in-place).\n\ When the optional `index` is given, only invert the single bit at `index`."); static PyObject * bitarray_reduce(bitarrayobject *self) { static PyObject *reconstructor = NULL; PyObject *dict, *bytes, *result; if (reconstructor == NULL) { reconstructor = bitarray_module_attr("_bitarray_reconstructor"); if (reconstructor == NULL) return NULL; } dict = PyObject_GetAttrString((PyObject *) self, "__dict__"); if (dict == NULL) { PyErr_Clear(); dict = Py_None; Py_INCREF(dict); } set_padbits(self); bytes = PyBytes_FromStringAndSize(self->ob_item, Py_SIZE(self)); if (bytes == NULL) { Py_DECREF(dict); return NULL; } result = Py_BuildValue("O(OOsii)O", reconstructor, Py_TYPE(self), bytes, ENDIAN_STR(self->endian), (int) PADBITS(self), self->readonly, dict); Py_DECREF(dict); Py_DECREF(bytes); return result; } PyDoc_STRVAR(reduce_doc, "Internal. Used for pickling support."); static PyObject * bitarray_repr(bitarrayobject *self) { PyObject *result; size_t nbits = self->nbits, strsize, i; char *str; if (nbits == 0) return PyUnicode_FromString("bitarray()"); strsize = nbits + 12; /* 12 is length of "bitarray('')" */ str = PyMem_New(char, strsize); if (str == NULL) return PyErr_NoMemory(); strcpy(str, "bitarray('"); /* has length 10 */ for (i = 0; i < nbits; i++) str[i + 10] = getbit(self, i) + '0'; str[strsize - 2] = '\''; str[strsize - 1] = ')'; /* we know the string length beforehand - not null-terminated */ result = PyUnicode_FromStringAndSize(str, strsize); PyMem_Free((void *) str); return result; } static PyObject * bitarray_reverse(bitarrayobject *self) { const Py_ssize_t p = PADBITS(self); /* number of pad bits */ char *buff = self->ob_item; RAISE_IF_READONLY(self, NULL); /* Increase self->nbits to full buffer size. The p pad bits will later be the leading p bits. To remove those p leading bits, we must have p extra bits at the end of the bitarray. */ self->nbits += p; /* reverse order of bytes */ swap_bytes(buff, Py_SIZE(self)); /* reverse order of bits within each byte */ bytereverse(self->ob_item, Py_SIZE(self)); /* Remove the p pad bits at the end of the original bitarray that are now the leading p bits. The reason why we don't just call delete_n(self, 0, p) here is that it calls resize(), and we want to allow reversing an imported writable buffer. */ copy_n(self, 0, self, p, self->nbits - p); self->nbits -= p; Py_RETURN_NONE; } PyDoc_STRVAR(reverse_doc, "reverse()\n\ \n\ Reverse all bits in bitarray (in-place)."); static PyObject * bitarray_setall(bitarrayobject *self, PyObject *value) { int vi; RAISE_IF_READONLY(self, NULL); if (!conv_pybit(value, &vi)) return NULL; if (self->ob_item) memset(self->ob_item, vi ? 0xff : 0x00, (size_t) Py_SIZE(self)); Py_RETURN_NONE; } PyDoc_STRVAR(setall_doc, "setall(value, /)\n\ \n\ Set all elements in bitarray to `value`.\n\ Note that `a.setall(value)` is equivalent to `a[:] = value`."); static PyObject * bitarray_sort(bitarrayobject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"reverse", NULL}; Py_ssize_t nbits = self->nbits, cnt1; int reverse = 0; RAISE_IF_READONLY(self, NULL); if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:sort", kwlist, &reverse)) return NULL; cnt1 = count_span(self, 0, nbits); if (reverse) { set_span(self, 0, cnt1, 1); set_span(self, cnt1, nbits, 0); } else { Py_ssize_t cnt0 = nbits - cnt1; set_span(self, 0, cnt0, 0); set_span(self, cnt0, nbits, 1); } Py_RETURN_NONE; } PyDoc_STRVAR(sort_doc, "sort(reverse=False)\n\ \n\ Sort all bits in bitarray (in-place)."); static PyObject * bitarray_tolist(bitarrayobject *self) { PyObject *list; Py_ssize_t i; list = PyList_New(self->nbits); if (list == NULL) return NULL; for (i = 0; i < self->nbits; i++) { PyObject *item = PyLong_FromLong(getbit(self, i)); if (item == NULL) { Py_DECREF(list); return NULL; } PyList_SET_ITEM(list, i, item); } return list; } PyDoc_STRVAR(tolist_doc, "tolist() -> list\n\ \n\ Return bitarray as list of integers.\n\ `a.tolist()` equals `list(a)`."); static PyObject * bitarray_frombytes(bitarrayobject *self, PyObject *buffer) { const Py_ssize_t n = Py_SIZE(self); /* nbytes before extending */ const Py_ssize_t p = PADBITS(self); /* number of pad bits */ Py_buffer view; RAISE_IF_READONLY(self, NULL); if (PyObject_GetBuffer(buffer, &view, PyBUF_SIMPLE) < 0) return NULL; /* resize to accommodate new bytes */ if (resize(self, 8 * (n + view.len)) < 0) goto error; assert(Py_SIZE(self) == n + view.len); memcpy(self->ob_item + n, (char *) view.buf, (size_t) view.len); /* remove pad bits staring at previous bit length (8 * n - p) */ if (delete_n(self, 8 * n - p, p) < 0) goto error; PyBuffer_Release(&view); Py_RETURN_NONE; error: PyBuffer_Release(&view); return NULL; } PyDoc_STRVAR(frombytes_doc, "frombytes(bytes, /)\n\ \n\ Extend bitarray with raw bytes from a bytes-like object.\n\ Each added byte will add eight bits to the bitarray."); static PyObject * bitarray_tobytes(bitarrayobject *self) { set_padbits(self); return PyBytes_FromStringAndSize(self->ob_item, Py_SIZE(self)); } PyDoc_STRVAR(tobytes_doc, "tobytes() -> bytes\n\ \n\ Return the bitarray buffer (pad bits are set to zero)."); /* Extend self with bytes from f.read(n). Return number of bytes actually read and extended, or -1 on failure (after setting exception). */ static Py_ssize_t extend_fread(bitarrayobject *self, PyObject *f, Py_ssize_t n) { PyObject *bytes, *ret; Py_ssize_t res; /* result (size or -1) */ bytes = PyObject_CallMethod(f, "read", "n", n); if (bytes == NULL) return -1; if (!PyBytes_Check(bytes)) { Py_DECREF(bytes); PyErr_Format(PyExc_TypeError, ".read() did not return 'bytes', " "got '%s'", Py_TYPE(bytes)->tp_name); return -1; } res = PyBytes_GET_SIZE(bytes); assert(0 <= res && res <= n); ret = bitarray_frombytes(self, bytes); Py_DECREF(bytes); if (ret == NULL) res = -1; Py_DECREF(ret); return res; } static PyObject * bitarray_fromfile(bitarrayobject *self, PyObject *args) { PyObject *f; Py_ssize_t nread = 0, n = -1; RAISE_IF_READONLY(self, NULL); if (!PyArg_ParseTuple(args, "O|n:fromfile", &f, &n)) return NULL; if (n < 0) /* read till EOF */ n = PY_SSIZE_T_MAX; while (nread < n) { Py_ssize_t nblock = Py_MIN(n - nread, BLOCKSIZE), size; size = extend_fread(self, f, nblock); if (size < 0) return NULL; nread += size; assert(size <= nblock && nread <= n); if (size < nblock) { if (n == PY_SSIZE_T_MAX) /* read till EOF */ break; PyErr_SetString(PyExc_EOFError, "not enough bytes to read"); return NULL; } } Py_RETURN_NONE; } PyDoc_STRVAR(fromfile_doc, "fromfile(f, n=-1, /)\n\ \n\ Extend bitarray with up to `n` bytes read from file object `f` (or any\n\ other binary stream what supports a `.read()` method, e.g. `io.BytesIO`).\n\ Each read byte will add eight bits to the bitarray. When `n` is omitted\n\ or negative, reads and extends all data until EOF.\n\ When `n` is non-negative but exceeds the available data, `EOFError` is\n\ raised. However, the available data is still read and extended."); static PyObject * bitarray_tofile(bitarrayobject *self, PyObject *f) { const Py_ssize_t nbytes = Py_SIZE(self); Py_ssize_t offset; set_padbits(self); for (offset = 0; offset < nbytes; offset += BLOCKSIZE) { PyObject *ret; /* return object from write call */ Py_ssize_t size = Py_MIN(nbytes - offset, BLOCKSIZE); assert(size >= 0 && offset + size <= nbytes); /* basically: f.write(memoryview(self)[offset:offset + size] */ ret = PyObject_CallMethod(f, "write", "y#", self->ob_item + offset, size); if (ret == NULL) return NULL; Py_DECREF(ret); /* drop write result */ } Py_RETURN_NONE; } PyDoc_STRVAR(tofile_doc, "tofile(f, /)\n\ \n\ Write bitarray buffer to file object `f`."); static PyObject * bitarray_to01(bitarrayobject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"group", "sep", NULL}; size_t strsize = self->nbits, j, nsep; Py_ssize_t group = 0, i; PyObject *result; char *sep = " ", *str; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|ns:to01", kwlist, &group, &sep)) return NULL; if (group < 0) return PyErr_Format(PyExc_ValueError, "non-negative integer " "expected, got: %zd", group); nsep = (group && strsize) ? strlen(sep) : 0; /* 0 means no grouping */ if (nsep) strsize += nsep * ((strsize - 1) / group); str = PyMem_New(char, strsize); if (str == NULL) return PyErr_NoMemory(); for (i = j = 0; i < self->nbits; i++) { if (nsep && i && i % group == 0) { memcpy(str + j, sep, nsep); j += nsep; } str[j++] = getbit(self, i) + '0'; } assert(j == strsize); result = PyUnicode_FromStringAndSize(str, strsize); PyMem_Free((void *) str); return result; } PyDoc_STRVAR(to01_doc, "to01(group=0, sep=' ') -> str\n\ \n\ Return bitarray as (Unicode) string of `0`s and `1`s.\n\ The bits are grouped into `group` bits (default is no grouping).\n\ When grouped, the string `sep` is inserted between groups\n\ of `group` characters, default is a space."); static PyObject * bitarray_unpack(bitarrayobject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"zero", "one", NULL}; PyObject *res; char zero = 0x00, one = 0x01, *str; Py_ssize_t i; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|cc:unpack", kwlist, &zero, &one)) return NULL; res = PyBytes_FromStringAndSize(NULL, self->nbits); if (res == NULL) return NULL; str = PyBytes_AsString(res); for (i = 0; i < self->nbits; i++) str[i] = getbit(self, i) ? one : zero; return res; } PyDoc_STRVAR(unpack_doc, "unpack(zero=b'\\x00', one=b'\\x01') -> bytes\n\ \n\ Return bytes that contain one byte for each bit in the bitarray,\n\ using specified mapping."); static PyObject * bitarray_pack(bitarrayobject *self, PyObject *buffer) { const Py_ssize_t nbits = self->nbits; Py_buffer view; Py_ssize_t i; RAISE_IF_READONLY(self, NULL); if (PyObject_GetBuffer(buffer, &view, PyBUF_SIMPLE) < 0) return NULL; if (resize(self, nbits + view.len) < 0) { PyBuffer_Release(&view); return NULL; } for (i = 0; i < view.len; i++) setbit(self, nbits + i, ((char *) view.buf)[i]); PyBuffer_Release(&view); Py_RETURN_NONE; } PyDoc_STRVAR(pack_doc, "pack(bytes, /)\n\ \n\ Extend bitarray from a bytes-like object, where each byte corresponds\n\ to a single bit. The byte `b'\\x00'` maps to bit 0 and all other bytes\n\ map to bit 1."); static PyObject * bitarray_pop(bitarrayobject *self, PyObject *args) { Py_ssize_t n = self->nbits, i = -1; long vi; RAISE_IF_READONLY(self, NULL); if (!PyArg_ParseTuple(args, "|n:pop", &i)) return NULL; if (n == 0) { /* special case -- most common failure cause */ PyErr_SetString(PyExc_IndexError, "pop from empty bitarray"); return NULL; } if (i < 0) i += n; if (i < 0 || i >= n) { PyErr_SetString(PyExc_IndexError, "pop index out of range"); return NULL; } vi = getbit(self, i); if (delete_n(self, i, 1) < 0) return NULL; return PyLong_FromLong(vi); } PyDoc_STRVAR(pop_doc, "pop(index=-1, /) -> item\n\ \n\ Remove and return item at `index` (default last).\n\ Raises `IndexError` if index is out of range."); static PyObject * bitarray_remove(bitarrayobject *self, PyObject *value) { Py_ssize_t i; int vi; RAISE_IF_READONLY(self, NULL); if (!conv_pybit(value, &vi)) return NULL; i = find_bit(self, vi, 0, self->nbits, 0); if (i < 0) return PyErr_Format(PyExc_ValueError, "%d not in bitarray", vi); if (delete_n(self, i, 1) < 0) return NULL; Py_RETURN_NONE; } PyDoc_STRVAR(remove_doc, "remove(value, /)\n\ \n\ Remove the first occurrence of `value`.\n\ Raises `ValueError` if value is not present."); static PyObject * bitarray_sizeof(bitarrayobject *self) { Py_ssize_t res; res = sizeof(bitarrayobject) + self->allocated; if (self->buffer) res += sizeof(Py_buffer); return PyLong_FromSsize_t(res); } PyDoc_STRVAR(sizeof_doc, "Return size of bitarray object in bytes."); /* private method - called only when frozenbitarray is initialized to disallow memoryviews to change the buffer */ static PyObject * bitarray_freeze(bitarrayobject *self) { if (self->buffer) { assert(self->buffer->readonly == self->readonly); if (self->readonly == 0) { PyErr_SetString(PyExc_TypeError, "cannot import writable " "buffer into frozenbitarray"); return NULL; } } set_padbits(self); self->readonly = 1; Py_RETURN_NONE; } /* -------- bitarray methods exposed in debug mode for testing ---------- */ #ifndef NDEBUG static PyObject * bitarray_shift_r8(bitarrayobject *self, PyObject *args) { Py_ssize_t a, b; int n; if (!PyArg_ParseTuple(args, "nni", &a, &b, &n)) return NULL; shift_r8(self, a, b, n); Py_RETURN_NONE; } static PyObject * bitarray_copy_n(bitarrayobject *self, PyObject *args) { PyObject *other; Py_ssize_t a, b, n; if (!PyArg_ParseTuple(args, "nO!nn", &a, &Bitarray_Type, &other, &b, &n)) return NULL; copy_n(self, a, (bitarrayobject *) other, b, n); Py_RETURN_NONE; } static PyObject * bitarray_overlap(bitarrayobject *self, PyObject *other) { assert(bitarray_Check(other)); return PyBool_FromLong(buffers_overlap(self, (bitarrayobject *) other)); } #endif /* NDEBUG */ /* ---------------------- bitarray getset members ---------------------- */ static PyObject * bitarray_get_endian(bitarrayobject *self, void *Py_UNUSED(ignored)) { return PyUnicode_FromString(ENDIAN_STR(self->endian)); } static PyObject * bitarray_get_nbytes(bitarrayobject *self, void *Py_UNUSED(ignored)) { return PyLong_FromSsize_t(Py_SIZE(self)); } static PyObject * bitarray_get_padbits(bitarrayobject *self, void *Py_UNUSED(ignored)) { return PyLong_FromSsize_t(PADBITS(self)); } static PyObject * bitarray_get_readonly(bitarrayobject *self, void *Py_UNUSED(ignored)) { return PyBool_FromLong(self->readonly); } static PyGetSetDef bitarray_getsets[] = { {"endian", (getter) bitarray_get_endian, NULL, PyDoc_STR("bit-endianness as Unicode string")}, {"nbytes", (getter) bitarray_get_nbytes, NULL, PyDoc_STR("buffer size in bytes")}, {"padbits", (getter) bitarray_get_padbits, NULL, PyDoc_STR("number of pad bits")}, {"readonly", (getter) bitarray_get_readonly, NULL, PyDoc_STR("bool indicating whether buffer is read-only")}, {NULL, NULL, NULL, NULL} }; /* ----------------------- bitarray_as_sequence ------------------------ */ static Py_ssize_t bitarray_len(bitarrayobject *self) { return self->nbits; } static PyObject * bitarray_concat(bitarrayobject *self, PyObject *other) { bitarrayobject *res; if ((res = bitarray_cp(self)) == NULL) return NULL; if (extend_dispatch(res, other) < 0) { Py_DECREF(res); return NULL; } return freeze_if_frozen(res); } static PyObject * bitarray_repeat(bitarrayobject *self, Py_ssize_t n) { bitarrayobject *res; if ((res = bitarray_cp(self)) == NULL) return NULL; if (repeat(res, n) < 0) { Py_DECREF(res); return NULL; } return freeze_if_frozen(res); } static PyObject * bitarray_item(bitarrayobject *self, Py_ssize_t i) { if (i < 0 || i >= self->nbits) { PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); return NULL; } return PyLong_FromLong(getbit(self, i)); } static int bitarray_ass_item(bitarrayobject *self, Py_ssize_t i, PyObject *value) { RAISE_IF_READONLY(self, -1); if (i < 0 || i >= self->nbits) { PyErr_SetString(PyExc_IndexError, "bitarray assignment index out of range"); return -1; } if (value == NULL) return delete_n(self, i, 1); else return set_item(self, i, value); } /* return 1 if value (which can be an int or bitarray) is in self, 0 otherwise, and -1 on error */ static int bitarray_contains(bitarrayobject *self, PyObject *value) { Py_ssize_t pos; pos = find_obj(self, value, 0, self->nbits, 0); if (pos == -2) return -1; return pos >= 0; } static PyObject * bitarray_inplace_concat(bitarrayobject *self, PyObject *other) { RAISE_IF_READONLY(self, NULL); if (extend_dispatch(self, other) < 0) return NULL; Py_INCREF(self); return (PyObject *) self; } static PyObject * bitarray_inplace_repeat(bitarrayobject *self, Py_ssize_t n) { RAISE_IF_READONLY(self, NULL); if (repeat(self, n) < 0) return NULL; Py_INCREF(self); return (PyObject *) self; } static PySequenceMethods bitarray_as_sequence = { (lenfunc) bitarray_len, /* sq_length */ (binaryfunc) bitarray_concat, /* sq_concat */ (ssizeargfunc) bitarray_repeat, /* sq_repeat */ (ssizeargfunc) bitarray_item, /* sq_item */ 0, /* sq_slice */ (ssizeobjargproc) bitarray_ass_item, /* sq_ass_item */ 0, /* sq_ass_slice */ (objobjproc) bitarray_contains, /* sq_contains */ (binaryfunc) bitarray_inplace_concat, /* sq_inplace_concat */ (ssizeargfunc) bitarray_inplace_repeat, /* sq_inplace_repeat */ }; /* ----------------------- bitarray_as_mapping ------------------------- */ /* return new bitarray with item in self, specified by slice */ static PyObject * getslice(bitarrayobject *self, PyObject *slice) { Py_ssize_t start, stop, step, slicelength; bitarrayobject *res; assert(PySlice_Check(slice)); if (PySlice_GetIndicesEx(slice, self->nbits, &start, &stop, &step, &slicelength) < 0) return NULL; res = newbitarrayobject(Py_TYPE(self), slicelength, self->endian); if (res == NULL) return NULL; if (step == 1) { copy_n(res, 0, self, start, slicelength); } else { Py_ssize_t i, j; for (i = 0, j = start; i < slicelength; i++, j += step) setbit(res, i, getbit(self, j)); } return freeze_if_frozen(res); } static int ensure_mask_size(bitarrayobject *self, bitarrayobject *mask) { if (self->nbits != mask->nbits) { PyErr_Format(PyExc_IndexError, "bitarray length is %zd, but " "mask has length %zd", self->nbits, mask->nbits); return -1; } return 0; } /* return a new bitarray with items from 'self' masked by bitarray 'mask' */ static PyObject * getmask(bitarrayobject *self, bitarrayobject *mask) { bitarrayobject *res; Py_ssize_t i, j, n; if (ensure_mask_size(self, mask) < 0) return NULL; n = count_span(mask, 0, mask->nbits); res = newbitarrayobject(Py_TYPE(self), n, self->endian); if (res == NULL) return NULL; for (i = j = 0; i < mask->nbits; i++) { if (getbit(mask, i)) setbit(res, j++, getbit(self, i)); } assert(j == n); return freeze_if_frozen(res); } /* Return j-th item from sequence. The item is considered an index into an array with given length, and is normalized pythonically. On failure, an exception is set and -1 is returned. */ static Py_ssize_t index_from_seq(PyObject *sequence, Py_ssize_t j, Py_ssize_t length) { PyObject *item; Py_ssize_t i; if ((item = PySequence_GetItem(sequence, j)) == NULL) return -1; i = PyNumber_AsSsize_t(item, PyExc_IndexError); Py_DECREF(item); if (i == -1 && PyErr_Occurred()) return -1; if (i < 0) i += length; if (i < 0 || i >= length) { PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); return -1; } return i; } /* return a new bitarray with items from 'self' listed by sequence (of indices) 'seq' */ static PyObject * getsequence(bitarrayobject *self, PyObject *seq) { bitarrayobject *res; Py_ssize_t i, j, n; n = PySequence_Size(seq); res = newbitarrayobject(Py_TYPE(self), n, self->endian); if (res == NULL) return NULL; for (j = 0; j < n; j++) { if ((i = index_from_seq(seq, j, self->nbits)) < 0) { Py_DECREF(res); return NULL; } setbit(res, j, getbit(self, i)); } return freeze_if_frozen(res); } static int subscr_seq_check(PyObject *item) { if (PyTuple_Check(item)) { PyErr_SetString(PyExc_TypeError, "multiple dimensions not supported"); return -1; } if (PySequence_Check(item)) return 0; PyErr_Format(PyExc_TypeError, "bitarray indices must be integers, " "slices or sequences, not '%s'", Py_TYPE(item)->tp_name); return -1; } static PyObject * bitarray_subscr(bitarrayobject *self, PyObject *item) { if (PyIndex_Check(item)) { Py_ssize_t i; i = PyNumber_AsSsize_t(item, PyExc_IndexError); if (i == -1 && PyErr_Occurred()) return NULL; if (i < 0) i += self->nbits; return bitarray_item(self, i); } if (PySlice_Check(item)) return getslice(self, item); if (bitarray_Check(item)) return getmask(self, (bitarrayobject *) item); if (subscr_seq_check(item) < 0) return NULL; return getsequence(self, item); } /* The following functions are called from assign_slice(). */ /* set items in self, specified by slice, to other bitarray */ static int setslice_bitarray(bitarrayobject *self, PyObject *slice, bitarrayobject *other) { Py_ssize_t start, stop, step, slicelength, increase; int other_copied = 0, res = -1; assert(PySlice_Check(slice)); if (PySlice_GetIndicesEx(slice, self->nbits, &start, &stop, &step, &slicelength) < 0) return -1; /* number of bits by which self has to be increased (decreased) */ increase = other->nbits - slicelength; /* Make a copy of other, in case the buffers overlap. This is obviously the case when self and other are the same object, but can also happen when the two bitarrays share memory. */ if (buffers_overlap(self, other)) { if ((other = bitarray_cp(other)) == NULL) return -1; other_copied = 1; } if (step == 1) { if (increase > 0) { /* increase self */ if (insert_n(self, start + slicelength, increase) < 0) goto finish; } if (increase < 0) { /* decrease self */ if (delete_n(self, start + other->nbits, -increase) < 0) goto finish; } /* copy new values into self */ copy_n(self, start, other, 0, other->nbits); } else { Py_ssize_t i, j; if (increase != 0) { PyErr_Format(PyExc_ValueError, "attempt to assign sequence of " "size %zd to extended slice of size %zd", other->nbits, slicelength); goto finish; } for (i = 0, j = start; i < slicelength; i++, j += step) setbit(self, j, getbit(other, i)); } res = 0; finish: if (other_copied) Py_DECREF(other); return res; } /* set items in self, specified by slice, to value */ static int setslice_bool(bitarrayobject *self, PyObject *slice, PyObject *value) { Py_ssize_t start, stop, step, slicelength; int vi; assert(PySlice_Check(slice) && PyIndex_Check(value)); if (!conv_pybit(value, &vi)) return -1; if (PySlice_GetIndicesEx(slice, self->nbits, &start, &stop, &step, &slicelength) < 0) return -1; adjust_step_positive(slicelength, &start, &stop, &step); set_range(self, start, stop, step, vi); return 0; } /* delete items in self, specified by slice */ static int delslice(bitarrayobject *self, PyObject *slice) { Py_ssize_t start, stop, step, slicelength; assert(PySlice_Check(slice)); if (PySlice_GetIndicesEx(slice, self->nbits, &start, &stop, &step, &slicelength) < 0) return -1; adjust_step_positive(slicelength, &start, &stop, &step); if (step > 1) { /* set items not to be removed (up to stop) */ Py_ssize_t i = start + 1, j = start; if (step >= 4) { for (; i < stop; i += step) { Py_ssize_t length = Py_MIN(step - 1, stop - i); copy_n(self, j, self, i, length); j += length; } } else { for (; i < stop; i++) { if ((i - start) % step != 0) setbit(self, j++, getbit(self, i)); } } assert(slicelength == 0 || j == stop - slicelength); } return delete_n(self, stop - slicelength, slicelength); } /* assign slice of bitarray self to value */ static int assign_slice(bitarrayobject *self, PyObject *slice, PyObject *value) { if (value == NULL) return delslice(self, slice); if (bitarray_Check(value)) return setslice_bitarray(self, slice, (bitarrayobject *) value); if (PyIndex_Check(value)) return setslice_bool(self, slice, value); PyErr_Format(PyExc_TypeError, "bitarray or int expected for slice " "assignment, not '%s'", Py_TYPE(value)->tp_name); return -1; } /* The following functions are called from assign_mask(). */ /* assign mask of bitarray self to bitarray other */ static int setmask_bitarray(bitarrayobject *self, bitarrayobject *mask, bitarrayobject *other) { Py_ssize_t n, i, j; assert(self->nbits == mask->nbits); n = count_span(mask, 0, mask->nbits); /* mask size */ if (n != other->nbits) { PyErr_Format(PyExc_IndexError, "attempt to assign mask of size %zd " "to bitarray of size %zd", n, other->nbits); return -1; } for (i = j = 0; i < mask->nbits; i++) { if (getbit(mask, i)) setbit(self, i, getbit(other, j++)); } assert(j == n); return 0; } /* assign mask of bitarray self to boolean value */ static int setmask_bool(bitarrayobject *self, bitarrayobject *mask, PyObject *value) { static char *expr[] = {"a &= ~mask", /* a[mask] = 0 */ "a |= mask"}; /* a[mask] = 1 */ int vi; if (!conv_pybit(value, &vi)) return -1; PyErr_Format(PyExc_NotImplementedError, "mask assignment to bool not " "implemented;\n`a[mask] = %d` equivalent to `%s`", vi, expr[vi]); return -1; } /* delete items in self, specified by mask */ static int delmask(bitarrayobject *self, bitarrayobject *mask) { Py_ssize_t n = 0, i; assert(self->nbits == mask->nbits); for (i = 0; i < mask->nbits; i++) { if (getbit(mask, i) == 0) /* set items we want to keep */ setbit(self, n++, getbit(self, i)); } assert(self == mask || n == mask->nbits - count_span(mask, 0, mask->nbits)); return resize(self, n); } /* assign mask of bitarray self to value */ static int assign_mask(bitarrayobject *self, bitarrayobject *mask, PyObject *value) { if (ensure_mask_size(self, mask) < 0) return -1; if (value == NULL) return delmask(self, mask); if (bitarray_Check(value)) return setmask_bitarray(self, mask, (bitarrayobject *) value); if (PyIndex_Check(value)) return setmask_bool(self, mask, value); PyErr_Format(PyExc_TypeError, "bitarray or int expected for mask " "assignment, not '%s'", Py_TYPE(value)->tp_name); return -1; } /* The following functions are called from assign_sequence(). */ /* assign sequence (of indices) of bitarray self to bitarray */ static int setseq_bitarray(bitarrayobject *self, PyObject *seq, bitarrayobject *other) { Py_ssize_t n, i, j; int other_copied = 0, res = -1; n = PySequence_Size(seq); if (n != other->nbits) { PyErr_Format(PyExc_ValueError, "attempt to assign sequence of " "size %zd to bitarray of size %zd", n, other->nbits); return -1; } /* Make a copy of other, see comment in setslice_bitarray(). */ if (buffers_overlap(self, other)) { if ((other = bitarray_cp(other)) == NULL) return -1; other_copied = 1; } for (j = 0; j < n; j++) { if ((i = index_from_seq(seq, j, self->nbits)) < 0) goto finish; setbit(self, i, getbit(other, j)); } res = 0; finish: if (other_copied) Py_DECREF(other); return res; } /* assign sequence (of indices) of bitarray self to Boolean value */ static int setseq_bool(bitarrayobject *self, PyObject *seq, PyObject *value) { Py_ssize_t n, i, j; int vi; if (!conv_pybit(value, &vi)) return -1; n = PySequence_Size(seq); for (j = 0; j < n; j++) { if ((i = index_from_seq(seq, j, self->nbits)) < 0) return -1; setbit(self, i, vi); } return 0; } /* delete items in self, specified by sequence of indices */ static int delsequence(bitarrayobject *self, PyObject *seq) { const Py_ssize_t nbits = self->nbits; const Py_ssize_t nseq = PySequence_Size(seq); bitarrayobject *mask; /* temporary bitarray masking items to remove */ Py_ssize_t i, j; int res = -1; /* shortcuts for removing 0 or 1 items to avoid creating mask */ if (nseq < 2) { if (nseq == 0) /* use resize to check for BufferError */ return resize(self, nbits); assert(nseq == 1); if ((i = index_from_seq(seq, 0, nbits)) < 0) return -1; return delete_n(self, i, 1); } /* create mask bitarray - note that its bit-endianness is irrelevant */ mask = newbitarrayobject(&Bitarray_Type, nbits, ENDIAN_LITTLE); if (mask == NULL) return -1; if (self->ob_item) memset(mask->ob_item, 0x00, (size_t) Py_SIZE(mask)); /* set indices from sequence in mask */ for (j = 0; j < nseq; j++) { if ((i = index_from_seq(seq, j, nbits)) < 0) goto finish; setbit(mask, i, 1); } res = delmask(self, mask); /* do actual work here */ finish: Py_DECREF(mask); return res; } /* assign sequence (of indices) of bitarray self to value */ static int assign_sequence(bitarrayobject *self, PyObject *seq, PyObject *value) { assert(PySequence_Check(seq)); if (value == NULL) return delsequence(self, seq); if (bitarray_Check(value)) return setseq_bitarray(self, seq, (bitarrayobject *) value); if (PyIndex_Check(value)) return setseq_bool(self, seq, value); PyErr_Format(PyExc_TypeError, "bitarray or int expected for sequence " "assignment, not '%s'", Py_TYPE(value)->tp_name); return -1; } static int bitarray_ass_subscr(bitarrayobject *self, PyObject *item, PyObject *value) { RAISE_IF_READONLY(self, -1); if (PyIndex_Check(item)) { Py_ssize_t i; i = PyNumber_AsSsize_t(item, PyExc_IndexError); if (i == -1 && PyErr_Occurred()) return -1; if (i < 0) i += self->nbits; return bitarray_ass_item(self, i, value); } if (PySlice_Check(item)) return assign_slice(self, item, value); if (bitarray_Check(item)) return assign_mask(self, (bitarrayobject *) item, value); if (subscr_seq_check(item) < 0) return -1; return assign_sequence(self, item, value); } static PyMappingMethods bitarray_as_mapping = { (lenfunc) bitarray_len, (binaryfunc) bitarray_subscr, (objobjargproc) bitarray_ass_subscr, }; /* --------------------------- bitarray_as_number ---------------------- */ static PyObject * bitarray_cpinvert(bitarrayobject *self) { bitarrayobject *res; if ((res = bitarray_cp(self)) == NULL) return NULL; invert_span(res, 0, res->nbits); return freeze_if_frozen(res); } /* perform bitwise in-place operation */ static void bitwise(bitarrayobject *self, bitarrayobject *other, const char oper) { const Py_ssize_t nbytes = Py_SIZE(self); const Py_ssize_t cwords = nbytes / 8; /* complete 64-bit words */ Py_ssize_t i; char *buff_s = self->ob_item; char *buff_o = other->ob_item; uint64_t *wbuff_s = WBUFF(self); uint64_t *wbuff_o = WBUFF(other); assert(self->nbits == other->nbits); assert(self->endian == other->endian); assert(self->readonly == 0); switch (oper) { case '&': for (i = 0; i < cwords; i++) wbuff_s[i] &= wbuff_o[i]; for (i = 8 * cwords; i < nbytes; i++) buff_s[i] &= buff_o[i]; break; case '|': for (i = 0; i < cwords; i++) wbuff_s[i] |= wbuff_o[i]; for (i = 8 * cwords; i < nbytes; i++) buff_s[i] |= buff_o[i]; break; case '^': for (i = 0; i < cwords; i++) wbuff_s[i] ^= wbuff_o[i]; for (i = 8 * cwords; i < nbytes; i++) buff_s[i] ^= buff_o[i]; break; default: Py_UNREACHABLE(); } } /* Return 0 if both a and b are bitarray objects with same length and bit-endianness. Otherwise, set exception and return -1. */ static int bitwise_check(PyObject *a, PyObject *b, const char *ostr) { if (!bitarray_Check(a) || !bitarray_Check(b)) { PyErr_Format(PyExc_TypeError, "unsupported operand type(s) for %s: '%s' and '%s'", ostr, Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name); return -1; } return ensure_eq_size_endian((bitarrayobject *) a, (bitarrayobject *) b); } #define BITWISE_FUNC(name, inplace, ostr) \ static PyObject * \ bitarray_ ## name (PyObject *self, PyObject *other) \ { \ bitarrayobject *res; \ \ if (bitwise_check(self, other, ostr) < 0) \ return NULL; \ if (inplace) { \ RAISE_IF_READONLY(self, NULL); \ res = (bitarrayobject *) self; \ Py_INCREF(res); \ } \ else { \ res = bitarray_cp((bitarrayobject *) self); \ if (res == NULL) \ return NULL; \ } \ bitwise(res, (bitarrayobject *) other, *ostr); \ if (!inplace) \ return freeze_if_frozen(res); \ return (PyObject *) res; \ } BITWISE_FUNC(and, 0, "&") /* bitarray_and */ BITWISE_FUNC(or, 0, "|") /* bitarray_or */ BITWISE_FUNC(xor, 0, "^") /* bitarray_xor */ BITWISE_FUNC(iand, 1, "&=") /* bitarray_iand */ BITWISE_FUNC(ior, 1, "|=") /* bitarray_ior */ BITWISE_FUNC(ixor, 1, "^=") /* bitarray_ixor */ /* shift bitarray n positions to left (right=0) or right (right=1) */ static void shift(bitarrayobject *self, Py_ssize_t n, int right) { const Py_ssize_t nbits = self->nbits; assert(n >= 0 && self->readonly == 0); if (n > nbits) n = nbits; assert(n <= nbits); if (right) { /* rshift */ copy_n(self, n, self, 0, nbits - n); set_span(self, 0, n, 0); } else { /* lshift */ copy_n(self, 0, self, n, nbits - n); set_span(self, nbits - n, nbits, 0); } } /* check shift arguments and return shift count, -1 on error */ static Py_ssize_t shift_check(PyObject *self, PyObject *other, const char *ostr) { Py_ssize_t n; if (!bitarray_Check(self) || !PyIndex_Check(other)) { PyErr_Format(PyExc_TypeError, "unsupported operand type(s) for %s: '%s' and '%s'", ostr, Py_TYPE(self)->tp_name, Py_TYPE(other)->tp_name); return -1; } n = PyNumber_AsSsize_t(other, PyExc_OverflowError); if (n == -1 && PyErr_Occurred()) return -1; if (n < 0) { PyErr_SetString(PyExc_ValueError, "negative shift count"); return -1; } return n; } #define SHIFT_FUNC(name, inplace, ostr) \ static PyObject * \ bitarray_ ## name (PyObject *self, PyObject *other) \ { \ bitarrayobject *res; \ Py_ssize_t n; \ \ if ((n = shift_check(self, other, ostr)) < 0) \ return NULL; \ if (inplace) { \ RAISE_IF_READONLY(self, NULL); \ res = (bitarrayobject *) self; \ Py_INCREF(res); \ } \ else { \ res = bitarray_cp((bitarrayobject *) self); \ if (res == NULL) \ return NULL; \ } \ shift((bitarrayobject *) res, n, *ostr == '>'); \ if (!inplace) \ return freeze_if_frozen(res); \ return (PyObject *) res; \ } SHIFT_FUNC(lshift, 0, "<<") /* bitarray_lshift */ SHIFT_FUNC(rshift, 0, ">>") /* bitarray_rshift */ SHIFT_FUNC(ilshift, 1, "<<=") /* bitarray_ilshift */ SHIFT_FUNC(irshift, 1, ">>=") /* bitarray_irshift */ static PyNumberMethods bitarray_as_number = { 0, /* nb_add */ 0, /* nb_subtract */ 0, /* nb_multiply */ 0, /* nb_remainder */ 0, /* nb_divmod */ 0, /* nb_power */ 0, /* nb_negative */ 0, /* nb_positive */ 0, /* nb_absolute */ 0, /* nb_bool (was nb_nonzero) */ (unaryfunc) bitarray_cpinvert, /* nb_invert */ (binaryfunc) bitarray_lshift, /* nb_lshift */ (binaryfunc) bitarray_rshift, /* nb_rshift */ (binaryfunc) bitarray_and, /* nb_and */ (binaryfunc) bitarray_xor, /* nb_xor */ (binaryfunc) bitarray_or, /* nb_or */ 0, /* nb_int */ 0, /* nb_reserved (was nb_long) */ 0, /* nb_float */ 0, /* nb_inplace_add */ 0, /* nb_inplace_subtract */ 0, /* nb_inplace_multiply */ 0, /* nb_inplace_remainder */ 0, /* nb_inplace_power */ (binaryfunc) bitarray_ilshift, /* nb_inplace_lshift */ (binaryfunc) bitarray_irshift, /* nb_inplace_rshift */ (binaryfunc) bitarray_iand, /* nb_inplace_and */ (binaryfunc) bitarray_ixor, /* nb_inplace_xor */ (binaryfunc) bitarray_ior, /* nb_inplace_or */ 0, /* nb_floor_divide */ 0, /* nb_true_divide */ 0, /* nb_inplace_floor_divide */ 0, /* nb_inplace_true_divide */ 0, /* nb_index */ }; /************************************************************************** variable length encoding and decoding **************************************************************************/ static int check_codedict(PyObject *codedict) { if (!PyDict_Check(codedict)) { PyErr_Format(PyExc_TypeError, "dict expected, got '%s'", Py_TYPE(codedict)->tp_name); return -1; } if (PyDict_Size(codedict) == 0) { PyErr_SetString(PyExc_ValueError, "non-empty dict expected"); return -1; } return 0; } static int check_value(PyObject *value) { if (!bitarray_Check(value)) { PyErr_SetString(PyExc_TypeError, "bitarray expected for dict value"); return -1; } if (((bitarrayobject *) value)->nbits == 0) { PyErr_SetString(PyExc_ValueError, "non-empty bitarray expected"); return -1; } return 0; } static PyObject * bitarray_encode(bitarrayobject *self, PyObject *args) { PyObject *codedict, *iterable, *iter, *symbol, *value; RAISE_IF_READONLY(self, NULL); if (!PyArg_ParseTuple(args, "OO:encode", &codedict, &iterable)) return NULL; if (check_codedict(codedict) < 0) return NULL; if ((iter = PyObject_GetIter(iterable)) == NULL) return PyErr_Format(PyExc_TypeError, "'%s' object is not iterable", Py_TYPE(iterable)->tp_name); /* extend self with the bitarrays from codedict */ while ((symbol = PyIter_Next(iter))) { value = PyDict_GetItem(codedict, symbol); Py_DECREF(symbol); if (value == NULL) { PyErr_Format(PyExc_ValueError, "symbol not defined in prefix code: %A", symbol); goto error; } if (check_value(value) < 0 || extend_bitarray(self, (bitarrayobject *) value) < 0) goto error; } Py_DECREF(iter); if (PyErr_Occurred()) /* from PyIter_Next() */ return NULL; Py_RETURN_NONE; error: Py_DECREF(iter); return NULL; } PyDoc_STRVAR(encode_doc, "encode(code, iterable, /)\n\ \n\ Given a prefix code (a dict mapping symbols to bitarrays),\n\ iterate over the iterable object with symbols, and extend bitarray\n\ with corresponding bitarray for each symbol."); /* ----------------------- binary tree (C-level) ----------------------- */ /* a node has either children or a symbol, NEVER both */ typedef struct _bin_node { struct _bin_node *child[2]; PyObject *symbol; } binode; static binode * binode_new(void) { binode *nd; nd = PyMem_New(binode, 1); if (nd == NULL) { PyErr_NoMemory(); return NULL; } nd->child[0] = NULL; nd->child[1] = NULL; nd->symbol = NULL; return nd; } static void binode_delete(binode *nd) { if (nd == NULL) return; binode_delete(nd->child[0]); binode_delete(nd->child[1]); Py_XDECREF(nd->symbol); PyMem_Free((void *) nd); } /* insert symbol (mapping to bitarray a) into tree */ static int binode_insert_symbol(binode *tree, bitarrayobject *a, PyObject *symbol) { binode *nd = tree, *prev; Py_ssize_t i; for (i = 0; i < a->nbits; i++) { int k = getbit(a, i); prev = nd; nd = nd->child[k]; if (nd) { if (nd->symbol) /* we cannot have already a symbol */ goto ambiguity; } else { /* if node does not exist, create new one */ nd = binode_new(); if (nd == NULL) return -1; prev->child[k] = nd; } } /* the new leaf node cannot already have a symbol or children */ if (nd->symbol || nd->child[0] || nd->child[1]) goto ambiguity; nd->symbol = symbol; Py_INCREF(symbol); return 0; ambiguity: PyErr_Format(PyExc_ValueError, "prefix code ambiguous: %A", symbol); return -1; } /* return a binary tree from a codedict, which is created by inserting all symbols mapping to bitarrays */ static binode * binode_make_tree(PyObject *codedict) { binode *tree; PyObject *symbol, *value; Py_ssize_t pos = 0; tree = binode_new(); if (tree == NULL) return NULL; while (PyDict_Next(codedict, &pos, &symbol, &value)) { if (check_value(value) < 0 || binode_insert_symbol(tree, (bitarrayobject *) value, symbol) < 0) { binode_delete(tree); return NULL; } } /* as we require the codedict to be non-empty the tree cannot be empty */ assert(tree); return tree; } /* Traverse using the branches corresponding to bits in ba, starting at *indexp. Return the symbol at the leaf node, or NULL when the end of the bitarray has been reached. On error, set the appropriate exception and also return NULL. */ static PyObject * binode_traverse(binode *tree, bitarrayobject *ba, Py_ssize_t *indexp) { binode *nd = tree; Py_ssize_t start = *indexp; while (*indexp < ba->nbits) { assert(nd); nd = nd->child[getbit(ba, *indexp)]; if (nd == NULL) return PyErr_Format(PyExc_ValueError, "prefix code unrecognized in bitarray " "at position %zd .. %zd", start, *indexp); (*indexp)++; if (nd->symbol) { /* leaf */ assert(nd->child[0] == NULL && nd->child[1] == NULL); return nd->symbol; } } if (nd != tree) PyErr_Format(PyExc_ValueError, "incomplete prefix code at position %zd", start); return NULL; } /* add the node's symbol to given dict */ static int binode_to_dict(binode *nd, PyObject *dict, bitarrayobject *prefix) { int k; if (nd == NULL) return 0; if (nd->symbol) { assert(nd->child[0] == NULL && nd->child[1] == NULL); return PyDict_SetItem(dict, nd->symbol, (PyObject *) prefix); } for (k = 0; k < 2; k++) { bitarrayobject *t; /* prefix of the two child nodes */ int ret; if ((t = bitarray_cp(prefix)) == NULL) return -1; if (resize(t, t->nbits + 1) < 0) return -1; setbit(t, t->nbits - 1, k); ret = binode_to_dict(nd->child[k], dict, t); Py_DECREF(t); if (ret < 0) return -1; } return 0; } /* return whether node is complete (has both children or is a symbol node) */ static int binode_complete(binode *nd) { if (nd == NULL) return 0; if (nd->symbol) { /* symbol node cannot have children */ assert(nd->child[0] == NULL && nd->child[1] == NULL); return 1; } return (binode_complete(nd->child[0]) && binode_complete(nd->child[1])); } /* return number of nodes */ static Py_ssize_t binode_nodes(binode *nd) { Py_ssize_t res; if (nd == NULL) return 0; /* a node cannot have a symbol and children */ assert(!(nd->symbol && (nd->child[0] || nd->child[1]))); /* a node must have a symbol or children */ assert(nd->symbol || nd->child[0] || nd->child[1]); res = 1; res += binode_nodes(nd->child[0]); res += binode_nodes(nd->child[1]); return res; } /******************************** decodetree ******************************/ typedef struct { PyObject_HEAD binode *tree; } decodetreeobject; static PyObject * decodetree_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { binode *tree; PyObject *codedict, *obj; if (!PyArg_ParseTuple(args, "O:decodetree", &codedict)) return NULL; if (check_codedict(codedict) < 0) return NULL; tree = binode_make_tree(codedict); if (tree == NULL) return NULL; obj = type->tp_alloc(type, 0); if (obj == NULL) { binode_delete(tree); return NULL; } ((decodetreeobject *) obj)->tree = tree; return obj; } static PyObject * decodetree_todict(decodetreeobject *self) { PyObject *dict; bitarrayobject *prefix; if ((dict = PyDict_New()) == NULL) return NULL; prefix = newbitarrayobject(&Bitarray_Type, 0, default_endian); if (prefix == NULL) goto error; if (binode_to_dict(self->tree, dict, prefix) < 0) goto error; Py_DECREF(prefix); return dict; error: Py_DECREF(dict); Py_XDECREF(prefix); return NULL; } PyDoc_STRVAR(todict_doc, "todict() -> dict\n\ \n\ Return a dict mapping the symbols to bitarrays. This dict is a\n\ reconstruction of the code dict which the object was created with."); static PyObject * decodetree_complete(decodetreeobject *self) { return PyBool_FromLong(binode_complete(self->tree)); } PyDoc_STRVAR(complete_doc, "complete() -> bool\n\ \n\ Return whether tree is complete. That is, whether or not all\n\ nodes have both children (unless they are symbols nodes)."); static PyObject * decodetree_nodes(decodetreeobject *self) { return PyLong_FromSsize_t(binode_nodes(self->tree)); } PyDoc_STRVAR(nodes_doc, "nodes() -> int\n\ \n\ Return number of nodes in tree (internal and symbol nodes)."); static PyObject * decodetree_sizeof(decodetreeobject *self) { Py_ssize_t res; res = sizeof(decodetreeobject); res += sizeof(binode) * binode_nodes(self->tree); return PyLong_FromSsize_t(res); } static void decodetree_dealloc(decodetreeobject *self) { binode_delete(self->tree); Py_TYPE(self)->tp_free((PyObject *) self); } /* These methods are mostly useful for debugging and testing. We provide docstrings, but they are not mentioned in the documentation, and are not part of the API */ static PyMethodDef decodetree_methods[] = { {"complete", (PyCFunction) decodetree_complete, METH_NOARGS, complete_doc}, {"nodes", (PyCFunction) decodetree_nodes, METH_NOARGS, nodes_doc}, {"todict", (PyCFunction) decodetree_todict, METH_NOARGS, todict_doc}, {"__sizeof__", (PyCFunction) decodetree_sizeof, METH_NOARGS, 0}, {NULL, NULL} /* sentinel */ }; PyDoc_STRVAR(decodetree_doc, "decodetree(code, /) -> decodetree\n\ \n\ Given a prefix code (a dict mapping symbols to bitarrays),\n\ create a binary tree object to be passed to `.decode()`."); static PyTypeObject DecodeTree_Type = { PyVarObject_HEAD_INIT(NULL, 0) "bitarray.decodetree", /* tp_name */ sizeof(decodetreeobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor) decodetree_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ PyObject_HashNotImplemented, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT, /* tp_flags */ decodetree_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ decodetree_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ PyType_GenericAlloc, /* tp_alloc */ decodetree_new, /* tp_new */ PyObject_Del, /* tp_free */ }; #define DecodeTree_Check(op) PyObject_TypeCheck(op, &DecodeTree_Type) /* -------------------------- END decodetree --------------------------- */ /* return a binary tree from a decodetree or codedict */ static binode * get_tree(PyObject *obj) { if (DecodeTree_Check(obj)) return ((decodetreeobject *) obj)->tree; if (check_codedict(obj) < 0) return NULL; return binode_make_tree(obj); } /*********************** (bitarray) Decode Iterator ***********************/ typedef struct { PyObject_HEAD bitarrayobject *self; /* bitarray we're decoding */ binode *tree; /* prefix tree containing symbols */ Py_ssize_t index; /* current index in bitarray */ PyObject *decodetree; /* decodetree or NULL */ } decodeiterobject; static PyTypeObject DecodeIter_Type; /* create a new initialized bitarray decode iterator object */ static PyObject * bitarray_decode(bitarrayobject *self, PyObject *obj) { decodeiterobject *it; /* iterator to be returned */ binode *tree; if ((tree = get_tree(obj)) == NULL) return NULL; it = PyObject_GC_New(decodeiterobject, &DecodeIter_Type); if (it == NULL) { if (!DecodeTree_Check(obj)) binode_delete(tree); return NULL; } Py_INCREF(self); it->self = self; it->tree = tree; it->index = 0; it->decodetree = DecodeTree_Check(obj) ? obj : NULL; Py_XINCREF(it->decodetree); PyObject_GC_Track(it); return (PyObject *) it; } PyDoc_STRVAR(decode_doc, "decode(code, /) -> iterator\n\ \n\ Given a prefix code (a dict mapping symbols to bitarrays, or `decodetree`\n\ object), decode content of bitarray and return an iterator over\n\ corresponding symbols."); static PyObject * decodeiter_next(decodeiterobject *it) { PyObject *symbol; symbol = binode_traverse(it->tree, it->self, &(it->index)); if (symbol == NULL) /* stop iteration OR error occured */ return NULL; Py_INCREF(symbol); return symbol; } static void decodeiter_dealloc(decodeiterobject *it) { if (it->decodetree) Py_DECREF(it->decodetree); else /* when decodeiter was created from dict - free tree */ binode_delete(it->tree); PyObject_GC_UnTrack(it); Py_DECREF(it->self); PyObject_GC_Del(it); } static int decodeiter_traverse(decodeiterobject *it, visitproc visit, void *arg) { Py_VISIT(it->self); Py_VISIT(it->decodetree); return 0; } static PyTypeObject DecodeIter_Type = { PyVarObject_HEAD_INIT(NULL, 0) "bitarray.decodeiterator", /* tp_name */ sizeof(decodeiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor) decodeiter_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ (traverseproc) decodeiter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc) decodeiter_next, /* tp_iternext */ 0, /* tp_methods */ }; /*********************** (Bitarray) Search Iterator ***********************/ typedef struct { PyObject_HEAD bitarrayobject *self; /* bitarray we're searching in */ PyObject *sub; /* object (bitarray or int) being searched for */ Py_ssize_t start; Py_ssize_t stop; int right; } searchiterobject; static PyTypeObject SearchIter_Type; /* create a new initialized bitarray search iterator object */ static PyObject * bitarray_search(bitarrayobject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"", "", "", "right", NULL}; Py_ssize_t start = 0, stop = PY_SSIZE_T_MAX; int right = 0; PyObject *sub; searchiterobject *it; /* iterator to be returned */ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|nni", kwlist, &sub, &start, &stop, &right)) return NULL; if (value_sub(sub) < 0) return NULL; PySlice_AdjustIndices(self->nbits, &start, &stop, 1); it = PyObject_GC_New(searchiterobject, &SearchIter_Type); if (it == NULL) return NULL; Py_INCREF(self); it->self = self; Py_INCREF(sub); it->sub = sub; it->start = start; it->stop = stop; it->right = right; PyObject_GC_Track(it); return (PyObject *) it; } PyDoc_STRVAR(search_doc, "search(sub_bitarray, start=0, stop=, /, right=False) -> iterator\n\ \n\ Return iterator over indices where sub_bitarray is found, such that\n\ sub_bitarray is contained within `[start:stop]`.\n\ The indices are iterated in ascending order (from lowest to highest),\n\ unless `right=True`, which will iterate in descending order (starting with\n\ rightmost match)."); static PyObject * searchiter_next(searchiterobject *it) { Py_ssize_t nbits = it->self->nbits, pos; /* range checks necessary in case self changed during iteration */ assert(it->start >= 0); if (it->start > nbits || it->stop < 0 || it->stop > nbits) return NULL; /* stop iteration */ pos = find_obj(it->self, it->sub, it->start, it->stop, it->right); assert(pos > -2); /* pos cannot be -2 as we called value_sub() before */ if (pos < 0) /* no more positions -- stop iteration */ return NULL; /* update start / stop for next iteration */ if (it->right) it->stop = pos + (bitarray_Check(it->sub) ? ((bitarrayobject *) it->sub)->nbits : 1) - 1; else it->start = pos + 1; return PyLong_FromSsize_t(pos); } static void searchiter_dealloc(searchiterobject *it) { PyObject_GC_UnTrack(it); Py_DECREF(it->self); Py_DECREF(it->sub); PyObject_GC_Del(it); } static int searchiter_traverse(searchiterobject *it, visitproc visit, void *arg) { Py_VISIT(it->self); return 0; } static PyTypeObject SearchIter_Type = { PyVarObject_HEAD_INIT(NULL, 0) "bitarray.searchiterator", /* tp_name */ sizeof(searchiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor) searchiter_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ (traverseproc) searchiter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc) searchiter_next, /* tp_iternext */ 0, /* tp_methods */ }; /*********************** bitarray method definitions **********************/ static PyMethodDef bitarray_methods[] = { {"all", (PyCFunction) bitarray_all, METH_NOARGS, all_doc}, {"any", (PyCFunction) bitarray_any, METH_NOARGS, any_doc}, {"append", (PyCFunction) bitarray_append, METH_O, append_doc}, {"buffer_info", (PyCFunction) bitarray_buffer_info, METH_NOARGS, buffer_info_doc}, {"bytereverse", (PyCFunction) bitarray_bytereverse, METH_VARARGS, bytereverse_doc}, {"clear", (PyCFunction) bitarray_clear, METH_NOARGS, clear_doc}, {"copy", (PyCFunction) bitarray_copy, METH_NOARGS, copy_doc}, {"count", (PyCFunction) bitarray_count, METH_VARARGS, count_doc}, {"decode", (PyCFunction) bitarray_decode, METH_O, decode_doc}, {"encode", (PyCFunction) bitarray_encode, METH_VARARGS, encode_doc}, {"extend", (PyCFunction) bitarray_extend, METH_O, extend_doc}, {"fill", (PyCFunction) bitarray_fill, METH_NOARGS, fill_doc}, {"find", (PyCFunction) bitarray_find, METH_VARARGS | METH_KEYWORDS, find_doc}, {"frombytes", (PyCFunction) bitarray_frombytes, METH_O, frombytes_doc}, {"fromfile", (PyCFunction) bitarray_fromfile, METH_VARARGS, fromfile_doc}, {"index", (PyCFunction) bitarray_index, METH_VARARGS | METH_KEYWORDS, index_doc}, {"insert", (PyCFunction) bitarray_insert, METH_VARARGS, insert_doc}, {"invert", (PyCFunction) bitarray_invert, METH_VARARGS, invert_doc}, {"pack", (PyCFunction) bitarray_pack, METH_O, pack_doc}, {"pop", (PyCFunction) bitarray_pop, METH_VARARGS, pop_doc}, {"remove", (PyCFunction) bitarray_remove, METH_O, remove_doc}, {"reverse", (PyCFunction) bitarray_reverse, METH_NOARGS, reverse_doc}, {"search", (PyCFunction) bitarray_search, METH_VARARGS | METH_KEYWORDS, search_doc}, {"setall", (PyCFunction) bitarray_setall, METH_O, setall_doc}, {"sort", (PyCFunction) bitarray_sort, METH_VARARGS | METH_KEYWORDS, sort_doc}, {"to01", (PyCFunction) bitarray_to01, METH_VARARGS | METH_KEYWORDS, to01_doc}, {"tobytes", (PyCFunction) bitarray_tobytes, METH_NOARGS, tobytes_doc}, {"tofile", (PyCFunction) bitarray_tofile, METH_O, tofile_doc}, {"tolist", (PyCFunction) bitarray_tolist, METH_NOARGS, tolist_doc}, {"unpack", (PyCFunction) bitarray_unpack, METH_VARARGS | METH_KEYWORDS, unpack_doc}, {"__copy__", (PyCFunction) bitarray_copy, METH_NOARGS, copy_doc}, {"__deepcopy__", (PyCFunction) bitarray_copy, METH_O, copy_doc}, {"__reduce__", (PyCFunction) bitarray_reduce, METH_NOARGS, reduce_doc}, {"__sizeof__", (PyCFunction) bitarray_sizeof, METH_NOARGS, sizeof_doc}, {"_freeze", (PyCFunction) bitarray_freeze, METH_NOARGS, 0}, #ifndef NDEBUG /* functionality exposed in debug mode for testing */ {"_shift_r8", (PyCFunction) bitarray_shift_r8, METH_VARARGS, 0}, {"_copy_n", (PyCFunction) bitarray_copy_n, METH_VARARGS, 0}, {"_overlap", (PyCFunction) bitarray_overlap, METH_O, 0}, #endif {NULL, NULL} /* sentinel */ }; /* ------------------------ bitarray initialization -------------------- */ /* Given string 'str', return an integer representing the bit-endianness. If the string is invalid, set exception and return -1. */ static int endian_from_string(const char *str) { assert(default_endian == ENDIAN_LITTLE || default_endian == ENDIAN_BIG); if (str == NULL) return default_endian; if (strcmp(str, "little") == 0) return ENDIAN_LITTLE; if (strcmp(str, "big") == 0) return ENDIAN_BIG; PyErr_Format(PyExc_ValueError, "bit-endianness must be either " "'little' or 'big', not '%s'", str); return -1; } /* create a new bitarray object whose buffer is imported from another object which exposes the buffer protocol */ static PyObject * newbitarray_from_buffer(PyTypeObject *type, PyObject *buffer, int endian) { Py_buffer view; bitarrayobject *obj; if (PyObject_GetBuffer(buffer, &view, PyBUF_SIMPLE) < 0) return NULL; obj = (bitarrayobject *) type->tp_alloc(type, 0); if (obj == NULL) { PyBuffer_Release(&view); return NULL; } Py_SET_SIZE(obj, view.len); obj->ob_item = (char *) view.buf; obj->allocated = 0; /* no buffer allocated (in this object) */ obj->nbits = 8 * view.len; obj->endian = endian; obj->ob_exports = 0; obj->weakreflist = NULL; obj->readonly = view.readonly; obj->buffer = PyMem_New(Py_buffer, 1); if (obj->buffer == NULL) { PyObject_Del(obj); PyBuffer_Release(&view); return PyErr_NoMemory(); } memcpy(obj->buffer, &view, sizeof(Py_buffer)); return (PyObject *) obj; } /* return new bitarray of length 'index', 'endian', and 'init_zero' (initialize buffer with zeros) */ static PyObject * newbitarray_from_index(PyTypeObject *type, PyObject *index, int endian, int init_zero) { bitarrayobject *res; Py_ssize_t nbits; assert(PyIndex_Check(index)); nbits = PyNumber_AsSsize_t(index, PyExc_OverflowError); if (nbits == -1 && PyErr_Occurred()) return NULL; if (nbits < 0) { PyErr_SetString(PyExc_ValueError, "bitarray length must be >= 0"); return NULL; } if ((res = newbitarrayobject(type, nbits, endian)) == NULL) return NULL; if (init_zero && nbits) memset(res->ob_item, 0x00, (size_t) Py_SIZE(res)); return (PyObject *) res; } /* return new bitarray from bytes-like object */ static PyObject * newbitarray_from_bytes(PyTypeObject *type, PyObject *buffer, int endian) { bitarrayobject *res; Py_buffer view; if (PyObject_GetBuffer(buffer, &view, PyBUF_SIMPLE) < 0) return NULL; res = newbitarrayobject(type, 8 * view.len, endian); if (res == NULL) { PyBuffer_Release(&view); return NULL; } assert(Py_SIZE(res) == view.len); memcpy(res->ob_item, (char *) view.buf, (size_t) view.len); PyBuffer_Release(&view); return (PyObject *) res; } /* As of bitarray version 2.9.0, "bitarray(nbits)" will initialize all items to 0 (previously, the buffer was be uninitialized). However, for speed, one might want to create an uninitialized bitarray. In 2.9.1, we added the ability to created uninitialized bitarrays again, using "bitarray(nbits, endian, Ellipsis)". */ static PyObject * bitarray_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"", "endian", "buffer", NULL}; PyObject *initializer = Py_None, *buffer = Py_None; bitarrayobject *res; char *endian_str = NULL; int endian; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OzO:bitarray", kwlist, &initializer, &endian_str, &buffer)) return NULL; if ((endian = endian_from_string(endian_str)) < 0) return NULL; /* import buffer */ if (buffer != Py_None && buffer != Py_Ellipsis) { if (initializer != Py_None) { PyErr_SetString(PyExc_TypeError, "buffer requires no initializer argument"); return NULL; } return newbitarray_from_buffer(type, buffer, endian); } /* no arg / None */ if (initializer == Py_None) return (PyObject *) newbitarrayobject(type, 0, endian); /* bool */ if (PyBool_Check(initializer)) { PyErr_SetString(PyExc_TypeError, "cannot create bitarray from 'bool' object"); return NULL; } /* index (a number) */ if (PyIndex_Check(initializer)) return newbitarray_from_index(type, initializer, endian, buffer == Py_None); /* bytes or bytearray */ if (PyBytes_Check(initializer) || PyByteArray_Check(initializer)) return newbitarray_from_bytes(type, initializer, endian); /* bitarray: use its bit-endianness when endian argument is None */ if (bitarray_Check(initializer) && endian_str == NULL) endian = ((bitarrayobject *) initializer)->endian; /* leave remaining type dispatch to extend method */ if ((res = newbitarrayobject(type, 0, endian)) == NULL) return NULL; if (extend_dispatch(res, initializer) < 0) { Py_DECREF(res); return NULL; } return (PyObject *) res; } static int ssize_richcompare(Py_ssize_t v, Py_ssize_t w, int op) { switch (op) { case Py_LT: return v < w; case Py_LE: return v <= w; case Py_EQ: return v == w; case Py_NE: return v != w; case Py_GT: return v > w; case Py_GE: return v >= w; default: Py_UNREACHABLE(); } } static PyObject * richcompare(PyObject *v, PyObject *w, int op) { Py_ssize_t i, vs, ws, c; bitarrayobject *va, *wa; char *vb, *wb; if (!bitarray_Check(v) || !bitarray_Check(w)) { Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } va = (bitarrayobject *) v; wa = (bitarrayobject *) w; vs = va->nbits; ws = wa->nbits; vb = va->ob_item; wb = wa->ob_item; if (op == Py_EQ || op == Py_NE) { /* shortcuts for EQ/NE */ if (vs != ws) { /* if sizes differ, the bitarrays differ */ return PyBool_FromLong(op == Py_NE); } else if (va->endian == wa->endian) { /* sizes and endianness are the same - use memcmp() */ int cmp = memcmp(vb, wb, (size_t) vs / 8); if (cmp == 0 && vs % 8) /* if equal, compare remaining bits */ cmp = zlc(va) != zlc(wa); return PyBool_FromLong((cmp == 0) ^ (op == Py_NE)); } } /* search for the first index where items are different */ c = Py_MIN(vs, ws) / 8; /* common buffer size */ i = 0; /* byte index */ if (va->endian == wa->endian) { /* equal endianness - skip ahead by comparing bytes directly */ while (i < c && vb[i] == wb[i]) i++; } else { /* opposite endianness - compare with reversed byte */ while (i < c && vb[i] == reverse_trans[(unsigned char) wb[i]]) i++; } i *= 8; /* i is now the bit index up to which we compared bytes */ for (; i < vs && i < ws; i++) { int vi = getbit(va, i); int wi = getbit(wa, i); if (vi != wi) /* we have an item that differs */ return PyBool_FromLong(ssize_richcompare(vi, wi, op)); } /* no more items to compare -- compare sizes */ return PyBool_FromLong(ssize_richcompare(vs, ws, op)); } /***************************** bitarray iterator **************************/ typedef struct { PyObject_HEAD bitarrayobject *self; /* bitarray we're iterating over */ Py_ssize_t index; /* current index in bitarray */ } bitarrayiterobject; static PyTypeObject BitarrayIter_Type; /* create a new initialized bitarray iterator object, this object is returned when calling iter(a) */ static PyObject * bitarray_iter(bitarrayobject *self) { bitarrayiterobject *it; it = PyObject_GC_New(bitarrayiterobject, &BitarrayIter_Type); if (it == NULL) return NULL; Py_INCREF(self); it->self = self; it->index = 0; PyObject_GC_Track(it); return (PyObject *) it; } static PyObject * bitarrayiter_next(bitarrayiterobject *it) { if (it->index < it->self->nbits) return PyLong_FromLong(getbit(it->self, it->index++)); return NULL; /* stop iteration */ } static void bitarrayiter_dealloc(bitarrayiterobject *it) { PyObject_GC_UnTrack(it); Py_DECREF(it->self); PyObject_GC_Del(it); } static int bitarrayiter_traverse(bitarrayiterobject *it, visitproc visit, void *arg) { Py_VISIT(it->self); return 0; } static PyTypeObject BitarrayIter_Type = { PyVarObject_HEAD_INIT(NULL, 0) "bitarray.bitarrayiterator", /* tp_name */ sizeof(bitarrayiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor) bitarrayiter_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ (traverseproc) bitarrayiter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc) bitarrayiter_next, /* tp_iternext */ 0, /* tp_methods */ }; /******************** bitarray buffer export interface ********************/ /* Here we create bitarray_as_buffer for exporting bitarray buffers. Buffer imports are handled in newbitarray_from_buffer(). */ static int bitarray_getbuffer(bitarrayobject *self, Py_buffer *view, int flags) { int ret; if (view == NULL) { self->ob_exports++; return 0; } ret = PyBuffer_FillInfo(view, (PyObject *) self, /* exporter */ (void *) self->ob_item, Py_SIZE(self), self->readonly, flags); if (ret >= 0) self->ob_exports++; return ret; } static void bitarray_releasebuffer(bitarrayobject *self, Py_buffer *view) { self->ob_exports--; } static PyBufferProcs bitarray_as_buffer = { (getbufferproc) bitarray_getbuffer, (releasebufferproc) bitarray_releasebuffer, }; /***************************** Bitarray Type ******************************/ PyDoc_STRVAR(bitarraytype_doc, "bitarray(initializer=0, /, endian='big', buffer=None) -> bitarray\n\ \n\ Return a new bitarray object whose items are bits initialized from\n\ the optional initializer, and bit-endianness.\n\ The initializer may be one of the following types:\n\ a.) `int` bitarray, initialized to zeros, of given length\n\ b.) `bytes` or `bytearray` to initialize buffer directly\n\ c.) `str` of 0s and 1s, ignoring whitespace and \"_\"\n\ d.) iterable of integers 0 or 1.\n\ \n\ Optional keyword arguments:\n\ \n\ `endian`: Specifies the bit-endianness of the created bitarray object.\n\ Allowed values are `big` and `little` (the default is `big`).\n\ The bit-endianness effects the buffer representation of the bitarray.\n\ \n\ `buffer`: Any object which exposes a buffer. When provided, `initializer`\n\ cannot be present (or has to be `None`). The imported buffer may be\n\ read-only or writable, depending on the object type."); static PyTypeObject Bitarray_Type = { PyVarObject_HEAD_INIT(NULL, 0) "bitarray.bitarray", /* tp_name */ sizeof(bitarrayobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor) bitarray_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ (reprfunc) bitarray_repr, /* tp_repr */ &bitarray_as_number, /* tp_as_number */ &bitarray_as_sequence, /* tp_as_sequence */ &bitarray_as_mapping, /* tp_as_mapping */ PyObject_HashNotImplemented, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ &bitarray_as_buffer, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ bitarraytype_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ richcompare, /* tp_richcompare */ offsetof(bitarrayobject, weakreflist), /* tp_weaklistoffset */ (getiterfunc) bitarray_iter, /* tp_iter */ 0, /* tp_iternext */ bitarray_methods, /* tp_methods */ 0, /* tp_members */ bitarray_getsets, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ PyType_GenericAlloc, /* tp_alloc */ bitarray_new, /* tp_new */ PyObject_Del, /* tp_free */ }; /***************************** Module functions ***************************/ static PyObject * bits2bytes(PyObject *module, PyObject *n) { PyObject *zero, *seven, *eight, *a, *b; int cmp_res; if (!PyLong_Check(n)) return PyErr_Format(PyExc_TypeError, "'int' object expected, " "got '%s'", Py_TYPE(n)->tp_name); zero = PyLong_FromLong(0); cmp_res = PyObject_RichCompareBool(n, zero, Py_LT); Py_DECREF(zero); if (cmp_res < 0) return NULL; if (cmp_res) { PyErr_SetString(PyExc_ValueError, "non-negative int expected"); return NULL; } seven = PyLong_FromLong(7); a = PyNumber_Add(n, seven); /* a = n + 7 */ Py_DECREF(seven); if (a == NULL) return NULL; eight = PyLong_FromLong(8); b = PyNumber_FloorDivide(a, eight); /* b = a // 8 */ Py_DECREF(eight); Py_DECREF(a); return b; } PyDoc_STRVAR(bits2bytes_doc, "bits2bytes(n, /) -> int\n\ \n\ Return the number of bytes necessary to store n bits."); static PyObject * reconstructor(PyObject *module, PyObject *args) { PyTypeObject *type; Py_ssize_t nbytes; PyObject *bytes; bitarrayobject *res; char *endian_str; int endian, padbits, readonly; if (!PyArg_ParseTuple(args, "OOsii:_bitarray_reconstructor", &type, &bytes, &endian_str, &padbits, &readonly)) return NULL; if (!PyType_Check(type)) return PyErr_Format(PyExc_TypeError, "first argument must be a type " "object, got '%s'", Py_TYPE(type)->tp_name); if (!PyType_IsSubtype(type, &Bitarray_Type)) return PyErr_Format(PyExc_TypeError, "'%s' is not a subtype of " "bitarray", type->tp_name); if (!PyBytes_Check(bytes)) return PyErr_Format(PyExc_TypeError, "second argument must be bytes, " "got '%s'", Py_TYPE(bytes)->tp_name); if ((endian = endian_from_string(endian_str)) < 0) return NULL; nbytes = PyBytes_GET_SIZE(bytes); if (padbits >> 3 || (nbytes == 0 && padbits)) return PyErr_Format(PyExc_ValueError, "invalid number of pad bits: %d", padbits); res = newbitarrayobject(type, 8 * nbytes - padbits, endian); if (res == NULL) return NULL; assert(Py_SIZE(res) == nbytes); memcpy(res->ob_item, PyBytes_AS_STRING(bytes), (size_t) nbytes); if (readonly) { set_padbits(res); res->readonly = 1; } return (PyObject *) res; } static PyObject * get_default_endian(PyObject *module) { return PyUnicode_FromString(ENDIAN_STR(default_endian)); } PyDoc_STRVAR(get_default_endian_doc, "get_default_endian() -> str\n\ \n\ Return the default bit-endianness for new bitarray objects being created.\n\ Unless `_set_default_endian('little')` was called, the default\n\ bit-endianness is `big`."); static PyObject * set_default_endian(PyObject *module, PyObject *args) { char *endian_str; int t; if (!PyArg_ParseTuple(args, "s:_set_default_endian", &endian_str)) return NULL; /* As endian_from_string() might return -1, we have to store its value in a temporary variable before setting default_endian. */ if ((t = endian_from_string(endian_str)) < 0) return NULL; default_endian = t; Py_RETURN_NONE; } PyDoc_STRVAR(set_default_endian_doc, "_set_default_endian(endian, /)\n\ \n\ Set the default bit-endianness for new bitarray objects being created."); static PyObject * sysinfo(PyObject *module, PyObject *args) { char *key; if (!PyArg_ParseTuple(args, "s:_sysinfo", &key)) return NULL; #define R(k, v) \ if (strcmp(key, k) == 0) \ return PyLong_FromLong((long) (v)) R("void*", sizeof(void *)); R("size_t", sizeof(size_t)); R("bitarrayobject", sizeof(bitarrayobject)); R("decodetreeobject", sizeof(decodetreeobject)); R("binode", sizeof(binode)); R("PY_LITTLE_ENDIAN", PY_LITTLE_ENDIAN); R("PY_BIG_ENDIAN", PY_BIG_ENDIAN); R("HAVE_BUILTIN_BSWAP64", HAVE_BUILTIN_BSWAP64); #ifdef Py_DEBUG /* Python configured using --with-pydebug */ R("Py_DEBUG", 1); #else R("Py_DEBUG", 0); #endif #ifndef NDEBUG /* bitarray compiled without -DNDEBUG */ R("DEBUG", 1); #else R("DEBUG", 0); #endif return PyErr_Format(PyExc_KeyError, "%s", key); #undef R } PyDoc_STRVAR(sysinfo_doc, "_sysinfo(key) -> int\n\ \n\ Return system and compile specific information given a key."); static PyMethodDef module_functions[] = { {"bits2bytes", (PyCFunction) bits2bytes, METH_O, bits2bytes_doc}, {"_bitarray_reconstructor", (PyCFunction) reconstructor, METH_VARARGS, reduce_doc}, {"get_default_endian", (PyCFunction) get_default_endian, METH_NOARGS, get_default_endian_doc}, {"_set_default_endian", (PyCFunction) set_default_endian, METH_VARARGS, set_default_endian_doc}, {"_sysinfo", (PyCFunction) sysinfo, METH_VARARGS, sysinfo_doc}, {NULL, NULL} /* sentinel */ }; /******************************* Install Module ***************************/ /* register bitarray as collections.abc.MutableSequence */ static int register_abc(void) { PyObject *abc_module, *mutablesequence, *res; if ((abc_module = PyImport_ImportModule("collections.abc")) == NULL) return -1; mutablesequence = PyObject_GetAttrString(abc_module, "MutableSequence"); Py_DECREF(abc_module); if (mutablesequence == NULL) return -1; res = PyObject_CallMethod(mutablesequence, "register", "O", (PyObject *) &Bitarray_Type); Py_DECREF(mutablesequence); if (res == NULL) return -1; Py_DECREF(res); return 0; } static PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_bitarray", 0, -1, module_functions, }; PyMODINIT_FUNC PyInit__bitarray(void) { PyObject *m; /* setup translation table, which maps each byte to its reversed: reverse_trans = {0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, ..., 0xff} */ setup_table(reverse_trans, 'r'); if ((m = PyModule_Create(&moduledef)) == NULL) return NULL; if (PyType_Ready(&Bitarray_Type) < 0) return NULL; Py_SET_TYPE(&Bitarray_Type, &PyType_Type); Py_INCREF((PyObject *) &Bitarray_Type); PyModule_AddObject(m, "bitarray", (PyObject *) &Bitarray_Type); if (register_abc() < 0) return NULL; if (PyType_Ready(&DecodeTree_Type) < 0) return NULL; Py_SET_TYPE(&DecodeTree_Type, &PyType_Type); Py_INCREF((PyObject *) &DecodeTree_Type); PyModule_AddObject(m, "decodetree", (PyObject *) &DecodeTree_Type); if (PyType_Ready(&DecodeIter_Type) < 0) return NULL; Py_SET_TYPE(&DecodeIter_Type, &PyType_Type); if (PyType_Ready(&BitarrayIter_Type) < 0) return NULL; Py_SET_TYPE(&BitarrayIter_Type, &PyType_Type); if (PyType_Ready(&SearchIter_Type) < 0) return NULL; Py_SET_TYPE(&SearchIter_Type, &PyType_Type); PyModule_AddObject(m, "__version__", PyUnicode_FromString(BITARRAY_VERSION)); return m; } bitarray-3.7.1/bitarray/_util.c000066400000000000000000002101731505414144000164360ustar00rootroot00000000000000/* Copyright (c) 2019 - 2025, Ilan Schnell; All Rights Reserved bitarray is published under the PSF license. This file contains the C implementation of some useful utility functions. Author: Ilan Schnell */ #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pythoncapi_compat.h" #include "bitarray.h" /* set during module initialization */ static PyTypeObject *bitarray_type; #define bitarray_Check(obj) PyObject_TypeCheck((obj), bitarray_type) /* Return 0 if obj is bitarray. If not, set exception and return -1. */ static int ensure_bitarray(PyObject *obj) { if (bitarray_Check(obj)) return 0; PyErr_Format(PyExc_TypeError, "bitarray expected, not '%s'", Py_TYPE(obj)->tp_name); return -1; } /* Return new bitarray of length 'nbits', endianness given by the PyObject 'endian' (which may be Py_None). Unless -1, 'c' is placed into all characters of buffer. */ static bitarrayobject * new_bitarray(Py_ssize_t nbits, PyObject *endian, int c) { PyObject *args; /* args for bitarray() */ bitarrayobject *res; args = Py_BuildValue("nOO", nbits, endian, Py_Ellipsis); if (args == NULL) return NULL; /* equivalent to: res = bitarray(nbits, endian, Ellipsis) */ res = (bitarrayobject *) PyObject_CallObject((PyObject *) bitarray_type, args); Py_DECREF(args); if (res == NULL) return NULL; assert(res->nbits == nbits && res->readonly == 0 && res->buffer == NULL); assert(-1 <= c && c < 256); if (c >= 0 && nbits) memset(res->ob_item, c, (size_t) Py_SIZE(res)); return res; } /* Starting from 64-bit word index i, count remaining population in bitarray a. Basically equivalent to: a[64 * i:].count() */ static Py_ssize_t count_from_word(bitarrayobject *a, Py_ssize_t i) { const Py_ssize_t nbits = a->nbits; Py_ssize_t cnt; assert(i >= 0); if (64 * i >= nbits) return 0; cnt = popcnt_words(WBUFF(a) + i, nbits / 64 - i); /* complete words */ if (nbits % 64) cnt += popcnt_64(zlw(a)); /* remaining bits */ return cnt; } /* like resize() but without over-allocation or buffer import/export checks */ static int resize_lite(bitarrayobject *self, Py_ssize_t nbits) { const Py_ssize_t newsize = BYTES(nbits); assert(self->allocated >= Py_SIZE(self)); assert(self->readonly == 0); assert(self->ob_exports == 0); assert(self->buffer == NULL); /* bypass everything when buffer size hasn't changed */ if (newsize == Py_SIZE(self)) { self->nbits = nbits; return 0; } if (newsize == 0) { PyMem_Free(self->ob_item); self->ob_item = NULL; Py_SET_SIZE(self, 0); self->allocated = 0; self->nbits = 0; return 0; } self->ob_item = PyMem_Realloc(self->ob_item, newsize); if (self->ob_item == NULL) { PyErr_NoMemory(); return -1; } Py_SET_SIZE(self, newsize); self->allocated = newsize; self->nbits = nbits; return 0; } /* ---------------------------- zeros / ones --------------------------- */ static PyObject * zeros(PyObject *module, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"", "endian", NULL}; PyObject *endian = Py_None; Py_ssize_t n; if (!PyArg_ParseTupleAndKeywords(args, kwds, "n|O:zeros", kwlist, &n, &endian)) return NULL; return (PyObject *) new_bitarray(n, endian, 0); } PyDoc_STRVAR(zeros_doc, "zeros(n, /, endian=None) -> bitarray\n\ \n\ Create a bitarray of length `n`, with all values `0`, and optional\n\ bit-endianness (`little` or `big`)."); static PyObject * ones(PyObject *module, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"", "endian", NULL}; PyObject *endian = Py_None; Py_ssize_t n; if (!PyArg_ParseTupleAndKeywords(args, kwds, "n|O:ones", kwlist, &n, &endian)) return NULL; return (PyObject *) new_bitarray(n, endian, 0xff); } PyDoc_STRVAR(ones_doc, "ones(n, /, endian=None) -> bitarray\n\ \n\ Create a bitarray of length `n`, with all values `1`, and optional\n\ bit-endianness (`little` or `big`)."); /* ------------------------------- count_n ----------------------------- */ /* Return smallest index i for which a.count(vi, 0, i) == n. When n exceeds the total count, the result is a negative number; the negative of the total count + 1, which is useful for displaying error messages. */ static Py_ssize_t count_n_core(bitarrayobject *a, Py_ssize_t n, int vi) { const Py_ssize_t nbits = a->nbits; uint64_t *wbuff = WBUFF(a); Py_ssize_t i = 0; /* index (result) */ Py_ssize_t t = 0; /* total count up to index */ Py_ssize_t m; /* popcount in each block */ assert(0 <= n && n <= nbits); /* by counting big blocks we save comparisons and updates */ #define BLOCK_BITS 4096 /* block size: 4096 bits = 64 words */ while (i + BLOCK_BITS < nbits) { m = popcnt_words(wbuff + i / 64, BLOCK_BITS / 64); if (!vi) m = BLOCK_BITS - m; if (t + m >= n) break; t += m; i += BLOCK_BITS; } #undef BLOCK_BITS while (i + 64 < nbits) { /* count blocks of single (64-bit) words */ m = popcnt_64(wbuff[i / 64]); if (!vi) m = 64 - m; if (t + m >= n) break; t += m; i += 64; } while (i < nbits && t < n) { t += getbit(a, i) == vi; i++; } if (t < n) { /* n exceeds total count */ assert((vi ? t : nbits - t) == count_from_word(a, 0)); return -(t + 1); } return i; } static PyObject * count_n(PyObject *module, PyObject *args) { bitarrayobject *a; Py_ssize_t n, i; int vi = 1; if (!PyArg_ParseTuple(args, "O!n|O&:count_n", bitarray_type, (PyObject *) &a, &n, conv_pybit, &vi)) return NULL; if (n < 0) { PyErr_SetString(PyExc_ValueError, "non-negative integer expected"); return NULL; } if (n > a->nbits) return PyErr_Format(PyExc_ValueError, "n = %zd larger than bitarray " "length %zd", n, a->nbits); i = count_n_core(a, n, vi); /* do actual work here */ if (i < 0) return PyErr_Format(PyExc_ValueError, "n = %zd exceeds total count " "(a.count(%d) = %zd)", n, vi, -(i + 1)); return PyLong_FromSsize_t(i); } PyDoc_STRVAR(count_n_doc, "count_n(a, n, value=1, /) -> int\n\ \n\ Return lowest index `i` for which `a[:i].count(value) == n`.\n\ Raises `ValueError` when `n` exceeds total count (`a.count(value)`)."); /* --------------------------- unary functions ------------------------- */ static PyObject * parity(PyObject *module, PyObject *obj) { bitarrayobject *a; uint64_t x, *wbuff; Py_ssize_t i; if (ensure_bitarray(obj) < 0) return NULL; a = (bitarrayobject *) obj; wbuff = WBUFF(a); x = zlw(a); i = a->nbits / 64; while (i--) x ^= *wbuff++; return PyLong_FromLong(parity_64(x)); } PyDoc_STRVAR(parity_doc, "parity(a, /) -> int\n\ \n\ Return parity of bitarray `a`.\n\ `parity(a)` is equivalent to `a.count() % 2` but more efficient."); /* Internal functions, like sum_indices(), but bitarrays are limited in size. For details see: devel/test_sum_indices.py */ static PyObject * ssqi(PyObject *module, PyObject *args) { static char count_table[256], sum_table[256], sum_sqr_table[256]; static int setup = -1; /* endianness of tables */ bitarrayobject *a; uint64_t nbytes, i; uint64_t sm = 0; /* accumulated sum */ int mode = 1; if (!PyArg_ParseTuple(args, "O!|i", bitarray_type, (PyObject *) &a, &mode)) return NULL; if (mode < 1 || mode > 2) return PyErr_Format(PyExc_ValueError, "unexpected mode %d", mode); if (((uint64_t) a->nbits) > (mode == 1 ? 6074001000LLU : 3810778LLU)) return PyErr_Format(PyExc_OverflowError, "ssqi %zd", a->nbits); if (setup != a->endian) { setup_table(count_table, 'c'); setup_table(sum_table, IS_LE(a) ? 'a' : 'A'); setup_table(sum_sqr_table, IS_LE(a) ? 's' : 'S'); setup = a->endian; } nbytes = Py_SIZE(a); set_padbits(a); for (i = 0; i < nbytes; i++) { unsigned char c = a->ob_item[i]; if (c) { uint64_t k = count_table[c], z1 = sum_table[c]; if (mode == 1) { sm += k * 8LLU * i + z1; } else { uint64_t z2 = (unsigned char) sum_sqr_table[c]; sm += (k * 64LLU * i + 16LLU * z1) * i + z2; } } } return PyLong_FromUnsignedLongLong(sm); } static PyObject * xor_indices(PyObject *module, PyObject *obj) { static char parity_table[256], xor_table[256]; static int setup = -1; /* endianness of xor_table */ bitarrayobject *a; Py_ssize_t res = 0, nbytes, i; if (ensure_bitarray(obj) < 0) return NULL; a = (bitarrayobject *) obj; nbytes = Py_SIZE(a); set_padbits(a); if (setup != a->endian) { setup_table(xor_table, IS_LE(a) ? 'x' : 'X'); setup_table(parity_table, 'p'); setup = a->endian; } for (i = 0; i < nbytes; i++) { unsigned char c = a->ob_item[i]; if (parity_table[c]) res ^= i << 3; res ^= xor_table[c]; } return PyLong_FromSsize_t(res); } PyDoc_STRVAR(xor_indices_doc, "xor_indices(a, /) -> int\n\ \n\ Return xor reduced indices of all active bits in bitarray `a`.\n\ This is essentially equivalent to\n\ `reduce(operator.xor, (i for i, v in enumerate(a) if v))`."); /* --------------------------- binary functions ------------------------ */ static PyObject * binary_function(PyObject *args, const char *format, const char oper) { Py_ssize_t cnt = 0, cwords, i; bitarrayobject *a, *b; uint64_t *wbuff_a, *wbuff_b; int rbits; if (!PyArg_ParseTuple(args, format, bitarray_type, (PyObject *) &a, bitarray_type, (PyObject *) &b)) return NULL; if (ensure_eq_size_endian(a, b) < 0) return NULL; wbuff_a = WBUFF(a); wbuff_b = WBUFF(b); cwords = a->nbits / 64; /* number of complete 64-bit words */ rbits = a->nbits % 64; /* remaining bits */ switch (oper) { case '&': /* count and */ for (i = 0; i < cwords; i++) cnt += popcnt_64(wbuff_a[i] & wbuff_b[i]); if (rbits) cnt += popcnt_64(zlw(a) & zlw(b)); break; case '|': /* count or */ for (i = 0; i < cwords; i++) cnt += popcnt_64(wbuff_a[i] | wbuff_b[i]); if (rbits) cnt += popcnt_64(zlw(a) | zlw(b)); break; case '^': /* count xor */ for (i = 0; i < cwords; i++) cnt += popcnt_64(wbuff_a[i] ^ wbuff_b[i]); if (rbits) cnt += popcnt_64(zlw(a) ^ zlw(b)); break; case 'a': /* any and */ for (i = 0; i < cwords; i++) { if (wbuff_a[i] & wbuff_b[i]) Py_RETURN_TRUE; } return PyBool_FromLong(rbits && (zlw(a) & zlw(b))); case 's': /* is subset */ for (i = 0; i < cwords; i++) { if ((wbuff_a[i] & wbuff_b[i]) != wbuff_a[i]) Py_RETURN_FALSE; } return PyBool_FromLong(rbits == 0 || (zlw(a) & zlw(b)) == zlw(a)); default: Py_UNREACHABLE(); } return PyLong_FromSsize_t(cnt); } #define COUNT_FUNC(oper, ostr) \ static PyObject * \ count_ ## oper (PyObject *module, PyObject *args) \ { \ return binary_function(args, "O!O!:count_" #oper, *ostr); \ } \ PyDoc_STRVAR(count_ ## oper ## _doc, \ "count_" #oper "(a, b, /) -> int\n\ \n\ Return `(a " ostr " b).count()` in a memory efficient manner,\n\ as no intermediate bitarray object gets created.") COUNT_FUNC(and, "&"); /* count_and */ COUNT_FUNC(or, "|"); /* count_or */ COUNT_FUNC(xor, "^"); /* count_xor */ static PyObject * any_and(PyObject *module, PyObject *args) { return binary_function(args, "O!O!:any_and", 'a'); } PyDoc_STRVAR(any_and_doc, "any_and(a, b, /) -> bool\n\ \n\ Efficient implementation of `any(a & b)`."); static PyObject * subset(PyObject *module, PyObject *args) { return binary_function(args, "O!O!:subset", 's'); } PyDoc_STRVAR(subset_doc, "subset(a, b, /) -> bool\n\ \n\ Return `True` if bitarray `a` is a subset of bitarray `b`.\n\ `subset(a, b)` is equivalent to `a | b == b` (and equally `a & b == a`) but\n\ more efficient as no intermediate bitarray object is created and the buffer\n\ iteration is stopped as soon as one mismatch is found."); static PyObject * correspond_all(PyObject *module, PyObject *args) { Py_ssize_t nff = 0, nft = 0, ntf = 0, ntt = 0, cwords, i; bitarrayobject *a, *b; uint64_t u, v, not_u, not_v; int rbits; if (!PyArg_ParseTuple(args, "O!O!:correspond_all", bitarray_type, (PyObject *) &a, bitarray_type, (PyObject *) &b)) return NULL; if (ensure_eq_size_endian(a, b) < 0) return NULL; cwords = a->nbits / 64; /* complete 64-bit words */ rbits = a->nbits % 64; /* remaining bits */ for (i = 0; i < cwords; i++) { u = WBUFF(a)[i]; v = WBUFF(b)[i]; not_u = ~u; not_v = ~v; nff += popcnt_64(not_u & not_v); nft += popcnt_64(not_u & v); ntf += popcnt_64(u & not_v); ntt += popcnt_64(u & v); } if (rbits) { u = zlw(a); v = zlw(b); not_u = ~u; not_v = ~v; /* for nff we need to substract the number of unused 1 bits */ nff += popcnt_64(not_u & not_v) - (64 - rbits); nft += popcnt_64(not_u & v); ntf += popcnt_64(u & not_v); ntt += popcnt_64(u & v); } return Py_BuildValue("nnnn", nff, nft, ntf, ntt); } PyDoc_STRVAR(correspond_all_doc, "correspond_all(a, b, /) -> tuple\n\ \n\ Return tuple with counts of: ~a & ~b, ~a & b, a & ~b, a & b"); static void byteswap_core(Py_buffer view, Py_ssize_t n) { char *buff = view.buf; Py_ssize_t m = view.len / n, k; assert(n >= 1 && n * m == view.len); if (n == 8 && HAVE_BUILTIN_BSWAP64) { uint64_t *w = (uint64_t *) buff; for (k = 0; k < m; k++) w[k] = builtin_bswap64(w[k]); } #if (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5))) else if (n == 4) { uint32_t *w = (uint32_t *) buff; for (k = 0; k < m; k++) w[k] = __builtin_bswap32(w[k]); } else if (n == 2) { uint16_t *w = (uint16_t *) buff; for (k = 0; k < m; k++) w[k] = __builtin_bswap16(w[k]); } #endif else if (n >= 2) { for (k = 0; k < view.len; k += n) swap_bytes(buff + k, n); } } static PyObject * byteswap(PyObject *module, PyObject *args) { PyObject *buffer; Py_buffer view; Py_ssize_t n = 0; if (!PyArg_ParseTuple(args, "O|n:byteswap", &buffer, &n)) return NULL; if (n < 0) return PyErr_Format(PyExc_ValueError, "positive int expect, got %zd", n); if (PyObject_GetBuffer(buffer, &view, PyBUF_SIMPLE | PyBUF_WRITABLE) < 0) return NULL; if (n == 0) /* avoid division by zero below when view.len = 0 */ n = Py_MAX(1, view.len); if (view.len % n) { PyErr_Format(PyExc_ValueError, "buffer size %zd not multiple of %zd", view.len, n); PyBuffer_Release(&view); return NULL; } byteswap_core(view, n); PyBuffer_Release(&view); Py_RETURN_NONE; } PyDoc_STRVAR(byteswap_doc, "byteswap(a, n=, /)\n\ \n\ Reverse every `n` consecutive bytes of `a` in-place.\n\ By default, all bytes are reversed. Note that `n` is not limited to 2, 4\n\ or 8, but can be any positive integer.\n\ Also, `a` may be any object that exposes a writable buffer.\n\ Nothing about this function is specific to bitarray objects."); /* ---------------------------- serialization -------------------------- */ /* The binary format used here is similar to the one used for pickling bitarray objects. However, this format has a head byte which encodes both the bit-endianness and the number of pad bits, whereas the binary pickle blob does not. */ static PyObject * serialize(PyObject *module, PyObject *obj) { bitarrayobject *a; PyObject *result; Py_ssize_t nbytes; char *str; if (ensure_bitarray(obj) < 0) return NULL; a = (bitarrayobject *) obj; nbytes = Py_SIZE(a); result = PyBytes_FromStringAndSize(NULL, nbytes + 1); if (result == NULL) return NULL; str = PyBytes_AsString(result); set_padbits(a); *str = (IS_BE(a) ? 0x10 : 0x00) | ((char) PADBITS(a)); memcpy(str + 1, a->ob_item, (size_t) nbytes); return result; } PyDoc_STRVAR(serialize_doc, "serialize(bitarray, /) -> bytes\n\ \n\ Return a serialized representation of the bitarray, which may be passed to\n\ `deserialize()`. It efficiently represents the bitarray object (including\n\ its bit-endianness) and is guaranteed not to change in future releases."); static PyObject * deserialize(PyObject *module, PyObject *buffer) { Py_buffer view; bitarrayobject *a; unsigned char head; Py_ssize_t nbits; if (PyObject_GetBuffer(buffer, &view, PyBUF_SIMPLE) < 0) return NULL; if (view.len == 0) { PyErr_SetString(PyExc_ValueError, "non-empty bytes-like object expected"); goto error; } head = *((unsigned char *) view.buf); if (head & 0xe8 || (view.len == 1 && head & 0xef)) { PyErr_Format(PyExc_ValueError, "invalid header byte: 0x%02x", head); goto error; } /* create bitarray of desired length */ nbits = 8 * (view.len - 1) - ((Py_ssize_t) (head & 0x07)); if ((a = new_bitarray(nbits, Py_None, -1)) == NULL) goto error; /* set bit-endianness and buffer */ a->endian = head & 0x10 ? ENDIAN_BIG : ENDIAN_LITTLE; assert(Py_SIZE(a) == view.len - 1); memcpy(a->ob_item, ((char *) view.buf) + 1, (size_t) view.len - 1); PyBuffer_Release(&view); return (PyObject *) a; error: PyBuffer_Release(&view); return NULL; } PyDoc_STRVAR(deserialize_doc, "deserialize(bytes, /) -> bitarray\n\ \n\ Return a bitarray given a bytes-like representation such as returned\n\ by `serialize()`."); /* ----------------------------- hexadecimal --------------------------- */ static const char hexdigits[] = "0123456789abcdef"; static int hex_to_int(char c) { if ('0' <= c && c <= '9') return c - '0'; if ('a' <= c && c <= 'f') return c - 'a' + 10; if ('A' <= c && c <= 'F') return c - 'A' + 10; return -1; } /* return hexadecimal string from bitarray, on failure set exception and return NULL */ static char * ba2hex_core(bitarrayobject *a, Py_ssize_t group, char *sep) { const int be = IS_BE(a); size_t strsize = a->nbits / 4, j, nsep; Py_ssize_t i; char *buff = a->ob_item, *str; nsep = (group && strsize) ? strlen(sep) : 0; /* 0 means no grouping */ if (nsep) strsize += nsep * ((strsize - 1) / group); str = PyMem_New(char, strsize + 1); if (str == NULL) { PyErr_NoMemory(); return NULL; } for (i = j = 0; i < a->nbits / 4; i++) { unsigned char c = buff[i / 2]; if (nsep && i && i % group == 0) { memcpy(str + j, sep, nsep); j += nsep; } str[j++] = hexdigits[(i + be) % 2 ? c >> 4 : 0x0f & c]; } assert(j == strsize); str[strsize] = 0; /* terminate string */ return str; } static PyObject * ba2hex(PyObject *module, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"", "group", "sep", NULL}; PyObject *result; bitarrayobject *a; Py_ssize_t group = 0; char *sep = " ", *str; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|ns:ba2hex", kwlist, bitarray_type, (PyObject *) &a, &group, &sep)) return NULL; if (a->nbits % 4) { PyErr_Format(PyExc_ValueError, "bitarray length %zd not " "multiple of 4", a->nbits); return NULL; } if (group < 0) { PyErr_Format(PyExc_ValueError, "non-negative integer " "expected for group, got: %zd", group); return NULL; } str = ba2hex_core(a, group, sep); if (str == NULL) return NULL; result = PyUnicode_FromString(str); PyMem_Free((void *) str); return result; } PyDoc_STRVAR(ba2hex_doc, "ba2hex(bitarray, /, group=0, sep=' ') -> hexstr\n\ \n\ Return a string containing the hexadecimal representation of\n\ the bitarray (which has to be multiple of 4 in length).\n\ When grouped, the string `sep` is inserted between groups\n\ of `group` characters, default is a space."); /* Translate hexadecimal digits from 'hexstr' into the bitarray 'a' buffer, which must be initialized to zeros. Each digit corresponds to 4 bits in the bitarray. Note that the number of hexadecimal digits may be odd. */ static int hex2ba_core(bitarrayobject *a, Py_buffer hexstr) { const int be = IS_BE(a); const char *str = hexstr.buf; Py_ssize_t i = 0, j; assert(a->nbits == 4 * hexstr.len); for (j = 0; j < hexstr.len; j++) { unsigned char c = str[j]; int x = hex_to_int(c); if (x < 0) { if (Py_UNICODE_ISSPACE(c)) continue; PyErr_Format(PyExc_ValueError, "invalid digit found for " "base16, got '%c' (0x%02x)", c, c); return -1; } assert(x >> 4 == 0); a->ob_item[i / 2] |= x << 4 * ((i + be) % 2); i++; } assert(i <= a->nbits); return resize_lite(a, 4 * i); /* in case we ignored whitespace */ } static PyObject * hex2ba(PyObject *module, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"", "endian", NULL}; PyObject *endian = Py_None; Py_buffer hexstr; bitarrayobject *a; if (!PyArg_ParseTupleAndKeywords(args, kwds, "s*|O:hex2ba", kwlist, &hexstr, &endian)) return NULL; a = new_bitarray(4 * hexstr.len, endian, 0); if (a == NULL) goto error; if (hex2ba_core(a, hexstr) < 0) goto error; PyBuffer_Release(&hexstr); return (PyObject *) a; error: PyBuffer_Release(&hexstr); Py_XDECREF((PyObject *) a); return NULL; } PyDoc_STRVAR(hex2ba_doc, "hex2ba(hexstr, /, endian=None) -> bitarray\n\ \n\ Bitarray of hexadecimal representation. hexstr may contain any number\n\ (including odd numbers) of hex digits (upper or lower case).\n\ Whitespace is ignored."); /* ----------------------- base 2, 4, 8, 16, 32, 64 -------------------- */ /* RFC 4648 Base32 alphabet */ static const char base32_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"; /* standard base 64 alphabet - also described in RFC 4648 */ static const char base64_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /* Given the length of the base m in [1..6] and a character c, return its index in the base 2**m alphabet , or -1 if when c is not included. Note: i >> m is true when i is not in range(0, 2**m) */ static int digit_to_int(int m, char c) { static signed char table[2][128]; static int setup = 0; int i; assert(1 <= m && m <= 6); if (m < 5) { /* base 2, 4, 8, 16 */ i = hex_to_int(c); return i >> m ? -1 : i; } if (0x80 & c) /* non-ASCII */ return -1; if (!setup) { memset(table, 0xff, sizeof table); /* (signed char) 0xff -> -1 */ for (i = 0; i < 32; i++) table[0][(unsigned char) base32_alphabet[i]] = i; for (i = 0; i < 64; i++) table[1][(unsigned char) base64_alphabet[i]] = i; setup = 1; } return table[m - 5][(unsigned char) c]; /* base 32, 64 */ } /* return m = log2(n) for m in [1..6] */ static int base_to_length(int n) { int m = 0; if (!n || n & (n - 1)) { PyErr_SetString(PyExc_ValueError, "base must be a power of 2"); return -1; } while (n >>= 1) m++; if (1 <= m && m <= 6) return m; PyErr_SetString(PyExc_ValueError, "base must be 2, 4, 8, 16, 32 or 64"); return -1; } /* return ASCII string from bitarray and base length m, on failure set exception and return NULL */ static char * ba2base_core(bitarrayobject *a, int m, Py_ssize_t group, char *sep) { const int le = IS_LE(a); const char *alphabet; size_t strsize = a->nbits / m, j, nsep; Py_ssize_t i; char *str; assert(1 <= m && m <= 6 && a->nbits % m == 0); switch (m) { case 5: alphabet = base32_alphabet; break; case 6: alphabet = base64_alphabet; break; default: alphabet = hexdigits; } nsep = (group && strsize) ? strlen(sep) : 0; /* 0 means no grouping */ if (nsep) strsize += nsep * ((strsize - 1) / group); str = PyMem_New(char, strsize + 1); if (str == NULL) { PyErr_NoMemory(); return NULL; } for (i = j = 0; i < a->nbits / m; i++) { int k, x = 0; if (nsep && i && i % group == 0) { memcpy(str + j, sep, nsep); j += nsep; } for (k = 0; k < m; k++) { int q = le ? k : (m - k - 1); x |= getbit(a, i * m + k) << q; } assert(x >> m == 0); str[j++] = alphabet[x]; } assert(j == strsize); str[strsize] = 0; /* terminate string */ return str; } static PyObject * ba2base(PyObject *module, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"", "", "group", "sep", NULL}; bitarrayobject *a; PyObject *result; Py_ssize_t group = 0; char *sep = " ", *str; int n, m; if (!PyArg_ParseTupleAndKeywords(args, kwds, "iO!|ns:ba2base", kwlist, &n, bitarray_type, (PyObject *) &a, &group, &sep)) return NULL; if ((m = base_to_length(n)) < 0) return NULL; if (a->nbits % m) { PyErr_Format(PyExc_ValueError, "bitarray length %zd not " "multiple of %d", a->nbits, m); return NULL; } if (group < 0) { PyErr_Format(PyExc_ValueError, "non-negative integer " "expected for group, got: %zd", group); return NULL; } if (m == 4) str = ba2hex_core(a, group, sep); else str = ba2base_core(a, m, group, sep); if (str == NULL) return NULL; result = PyUnicode_FromString(str); PyMem_Free((void *) str); return result; } PyDoc_STRVAR(ba2base_doc, "ba2base(n, bitarray, /, group=0, sep=' ') -> str\n\ \n\ Return a string containing the base `n` ASCII representation of\n\ the bitarray. Allowed values for `n` are 2, 4, 8, 16, 32 and 64.\n\ The bitarray has to be multiple of length 1, 2, 3, 4, 5 or 6 respectively.\n\ For `n=32` the RFC 4648 Base32 alphabet is used, and for `n=64` the\n\ standard base 64 alphabet is used.\n\ When grouped, the string `sep` is inserted between groups\n\ of `group` characters, default is a space."); /* translate ASCII digits (with base length m) into bitarray buffer */ static int base2ba_core(bitarrayobject *a, Py_buffer asciistr, int m) { const char *str = asciistr.buf; const int le = IS_LE(a); Py_ssize_t i = 0, j; assert(a->nbits == asciistr.len * m && 1 <= m && m <= 6); for (j = 0; j < asciistr.len; j++) { unsigned char c = str[j]; int k, x = digit_to_int(m, c); if (x < 0) { if (Py_UNICODE_ISSPACE(c)) continue; PyErr_Format(PyExc_ValueError, "invalid digit found for " "base%d, got '%c' (0x%02x)", 1 << m, c, c); return -1; } assert(x >> m == 0); for (k = 0; k < m; k++) { int q = le ? k : (m - k - 1); setbit(a, i++, x & (1 << q)); } } assert(i <= a->nbits); return resize_lite(a, i); /* in case we ignored whitespace */ } static PyObject * base2ba(PyObject *module, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"", "", "endian", NULL}; PyObject *endian = Py_None; Py_buffer asciistr; bitarrayobject *a = NULL; int m, n, t; /* n = 2**m */ if (!PyArg_ParseTupleAndKeywords(args, kwds, "is*|O:base2ba", kwlist, &n, &asciistr, &endian)) return NULL; if ((m = base_to_length(n)) < 0) goto error; a = new_bitarray(m * asciistr.len, endian, m == 4 ? 0 : -1); if (a == NULL) goto error; t = (m == 4) ? hex2ba_core(a, asciistr) : base2ba_core(a, asciistr, m); if (t < 0) goto error; PyBuffer_Release(&asciistr); return (PyObject *) a; error: PyBuffer_Release(&asciistr); Py_XDECREF((PyObject *) a); return NULL; } PyDoc_STRVAR(base2ba_doc, "base2ba(n, asciistr, /, endian=None) -> bitarray\n\ \n\ Bitarray of base `n` ASCII representation.\n\ Allowed values for `n` are 2, 4, 8, 16, 32 and 64.\n\ For `n=32` the RFC 4648 Base32 alphabet is used, and for `n=64` the\n\ standard base 64 alphabet is used. Whitespace is ignored."); /* ------------------------ utility C functions ------------------------ */ /* Consume one item from iterator and return its value as an integer in range(256). On failure, set an exception and return -1. */ static int next_char(PyObject *iter) { PyObject *item; Py_ssize_t v; if ((item = PyIter_Next(iter)) == NULL) { if (PyErr_Occurred()) /* from PyIter_Next() */ return -1; PyErr_SetString(PyExc_StopIteration, "unexpected end of stream"); return -1; } v = PyNumber_AsSsize_t(item, NULL); Py_DECREF(item); if (v == -1 && PyErr_Occurred()) return -1; if (v >> 8) { PyErr_Format(PyExc_ValueError, "byte must be in range(0, 256), got: %zd", v); return -1; } return (int) v; } /* write n bytes (into buffer str) representing non-negative integer i, using little endian byte-order */ static void write_n(char *str, int n, Py_ssize_t i) { int len = 0; assert(n <= 8 && i >= 0); while (len < n) { str[len++] = (char) i & 0xff; i >>= 8; } assert(i == 0); } /* read n bytes from iter and return corresponding non-negative integer, using little endian byte-order */ static Py_ssize_t read_n(PyObject *iter, int n) { Py_ssize_t i = 0; int j, c; assert(PyIter_Check(iter)); assert(n <= 8); for (j = 0; j < n; j++) { if ((c = next_char(iter)) < 0) return -1; i |= ((Py_ssize_t) c) << (8 * j); } if (i < 0) { PyErr_Format(PyExc_ValueError, "read %d bytes got negative value: %zd", n, i); return -1; } return i; } /* return number of bytes necessary to represent non-negative integer i */ static int byte_length(Py_ssize_t i) { int n = 0; assert(i >= 0); while (i) { i >>= 8; n++; } return n; } /*********************** sparse bitarray compression ***************** * * see also: doc/sparse_compression.rst */ /* Bitarray buffer size (in bytes) that can be indexed by n bytes. A sparse block of type n uses n bytes to index each bit. The decoded block size, that is the bitarray buffer size corresponding to a sparse block of type n, is given by BSI(n). Using 1 byte we can index 256 bits which have a decoded block size of 32 bytes: BSI(1) = 32 (BSI = Buffer Size Indexable) Moving from block type n to n + 1 multiplies the decoded block size by a factor of 256 (as the extra byte can index 256 times as much): BSI(n + 1) = 256 * BSI(n) */ #define BSI(n) (((Py_ssize_t) 1) << (8 * (n) - 3)) /* segment size in bytes (not to be confused with block size, see below) Although of little practical value, the code will work for values of SEGSIZE of: 8, 16, 32 BSI(n) must be divisible by SEGSIZE. So, 32 must be divisible by SEGSIZE. SEGSIZE must also be divisible by the word size sizeof(uint64_t) = 8. */ #define SEGSIZE 32 /* number of segments for given bitarray */ #define NSEG(self) ((Py_SIZE(self) + SEGSIZE - 1) / SEGSIZE) /* Calculate an array with the running totals (rts) for 256 bit segments. Note that we call these "segments", as opposed to "blocks", in order to avoid confusion with encode blocks. 0 1 2 3 4 index in rts array, i +-----------+-----------+-----------+-----------+ | 5 | 0 | 3 | 4 | segment population | | | | | | [0:256] | [256:512] | [512:768] | [768:987] | bitarray slice +-----------+-----------+-----------+-----------+ 0 5 5 8 12 running totals, rts[i] In this example we have a bitarray of length nbits = 987. Note that: * The number of segments is given by NSEG(self). Here we have 4 segments: NSEG(self) = 4 * The rts array has always NSEG(self) + 1 elements, such that last element is always indexed by NSEG(self). * The element rts[0] is always zero. * The last element rts[NSEG(self)] is always the total count. Here: rts[NSEG(self)] = rts[4] = 12 * The last segment may be partial. In that case, its size it given by nbits % 256. Here: nbits % 256 = 987 % 256 = 219 As each segment (at large) covers 256 bits (32 bytes), and each element in the running totals array takes up 8 bytes (on a 64-bit machine) the additional memory to accommodate the rts array is therefore 1/4 of the bitarray's memory. However, calculating this array upfront allows sc_count() to simply look up two entries from the array and take their difference. Thus, the speedup is significant. The function sc_write_indices() also takes advantage of the running totals array. It loops over segments and skips to the next segment as soon as the index count (population) of the current segment is reached. */ static Py_ssize_t * sc_rts(bitarrayobject *a) { const Py_ssize_t n_seg = NSEG(a); /* total number of segments */ const Py_ssize_t c_seg = a->nbits / (8 * SEGSIZE); /* complete segments */ char zeros[SEGSIZE]; /* segment with only zeros */ char *buff = a->ob_item; /* buffer in current segment */ Py_ssize_t cnt = 0; /* current count */ Py_ssize_t *res, m; memset(zeros, 0x00, SEGSIZE); res = PyMem_New(Py_ssize_t, n_seg + 1); if (res == NULL) { PyErr_NoMemory(); return NULL; } for (m = 0; m < c_seg; m++, buff += SEGSIZE) { /* complete segments */ res[m] = cnt; assert(buff + SEGSIZE <= a->ob_item + Py_SIZE(a)); if (memcmp(buff, zeros, SEGSIZE)) /* segment has not only zeros */ cnt += popcnt_words((uint64_t *) buff, SEGSIZE / 8); } res[c_seg] = cnt; if (n_seg > c_seg) { /* we have a final partial segment */ cnt += count_from_word(a, c_seg * SEGSIZE / 8); res[n_seg] = cnt; } return res; } /* expose sc_rts() to Python during debug mode for testing */ #ifndef NDEBUG static PyObject * module_sc_rts(PyObject *module, PyObject *obj) { PyObject *list; bitarrayobject *a; Py_ssize_t *rts, i; assert(bitarray_Check(obj)); a = (bitarrayobject *) obj; if ((rts = sc_rts(a)) == NULL) return NULL; if ((list = PyList_New(NSEG(a) + 1)) == NULL) goto error; for (i = 0; i <= NSEG(a); i++) { PyObject *item = PyLong_FromSsize_t(rts[i]); if (item == NULL) goto error; PyList_SET_ITEM(list, i, item); } PyMem_Free(rts); return list; error: Py_XDECREF(list); PyMem_Free(rts); return NULL; } #endif /* NDEBUG */ /* Return population count for the decoded block size of type n. Roughly equivalent to the Python expression: a.count(1, 8 * offset, 8 * offset + (1 << (8 * n))) The offset must be divisible by SEGSIZE, as this functions makes use of running totals, stored in rts[]. */ static Py_ssize_t sc_count(bitarrayobject *a, Py_ssize_t *rts, Py_ssize_t offset, int n) { const Py_ssize_t i = offset / SEGSIZE; /* indices into rts[] */ const Py_ssize_t j = Py_MIN(i + BSI(n) / SEGSIZE, NSEG(a)); assert(offset % SEGSIZE == 0 && 1 <= n && n <= 4); assert(0 <= i && i <= j && j <= NSEG(a)); return rts[j] - rts[i]; } /* Write a raw block, and return number of bytes copied. Note that the encoded block size is the return value + 1 (the head byte). The header byte is in range(0x01, 0xa0). * range(0x01, 0x20) number of raw bytes * range(0x20, 0xa0) number of 32-byte segments */ static int sc_write_raw(char *str, bitarrayobject *a, Py_ssize_t *rts, Py_ssize_t offset) { const Py_ssize_t nbytes = Py_SIZE(a) - offset; /* remaining bytes */ Py_ssize_t k = Py_MIN(32, nbytes); assert(nbytes > 0); if (k == 32) { /* The first 32 bytes are better represented using raw bytes. Now check up to the next 127 (32-byte) segments. */ const Py_ssize_t kmax = Py_MIN(32 * 128, nbytes); while (k + 32 <= kmax && sc_count(a, rts, offset + k, 1) >= 32) k += 32; } assert(0 < k && k <= 32 * 128 && k <= nbytes); assert(k >= 32 || k == nbytes); assert(k <= 32 || k % 32 == 0); /* block header */ *str = (char) (k <= 32 ? k : k / 32 + 31); /* block data */ assert(offset + k <= Py_SIZE(a)); memcpy(str + 1, a->ob_item + offset, (size_t) k); return (int) k; } /* Write 'k' indices (of 'n' bytes each) into buffer 'str'. Note that 'n' (which is also the block type) has been selected (in sc_encode_block()) such that: k = sc_count(a, rts, offset, n) < 256 */ static void sc_write_indices(char *str, bitarrayobject *a, Py_ssize_t *rts, Py_ssize_t offset, int n, int k) { const char *str_stop = str + n * k; /* stop position in buffer 'str' */ const char *buff = a->ob_item + offset; Py_ssize_t m; assert(0 < k && k < 256); /* note that k cannot be 0 in this function */ assert(k == sc_count(a, rts, offset, n)); /* see above */ rts += offset / SEGSIZE; /* rts index relative to offset now */ for (m = 0;;) { /* loop segments */ Py_ssize_t i, ni; assert(m + offset / SEGSIZE < NSEG(a)); /* number of indices in this segment, i.e. the segment population */ if ((ni = rts[m + 1] - rts[m]) == 0) goto next_segment; for (i = m * SEGSIZE;; i++) { /* loop bytes in segment */ int j; assert(i < (m + 1) * SEGSIZE && i + offset < Py_SIZE(a)); if (buff[i] == 0x00) continue; for (j = 0; j < 8; j++) { /* loop bits */ assert(8 * (offset + i) + j < a->nbits); if (buff[i] & BITMASK(a, j)) { write_n(str, n, 8 * i + j); str += n; if (--ni == 0) { /* we have encountered all indices in this segment */ if (str == str_stop) return; goto next_segment; } } } } Py_UNREACHABLE(); next_segment: m++; } Py_UNREACHABLE(); } /* Write a sparse block of type 'n' with 'k' indices. Return number of bytes written to buffer 'str' (encoded block size). Note that the decoded block size is always BSI(n). */ static Py_ssize_t sc_write_sparse(char *str, bitarrayobject *a, Py_ssize_t *rts, Py_ssize_t offset, int n, int k) { int len = 0; assert(1 <= n && n <= 4); assert(0 <= k && k < 256); /* write block header */ if (n == 1) { /* type 1 - one header byte */ assert(k < 32); str[len++] = (char) (0xa0 + k); /* index count in 0xa0 .. 0xbf */ } else { /* type 2, 3, 4 - two header bytes */ str[len++] = (char) (0xc0 + n); /* block type */ str[len++] = (char) k; /* index count */ } if (k == 0) /* no index bytes - sc_write_sparse() does not allow k = 0 */ return len; /* write block data - k indices, n bytes per index */ sc_write_indices(str + len, a, rts, offset, n, k); return len + n * k; } /* Encode one block (starting at offset) and return offset increment, i.e. the decoded block size. The output is written into buffer 'str' and 'len' is increased. Notes: - 32 index bytes take up as much space as a raw buffer of 32 bytes. Hence, if the bit count of the first 32 bytes of the bitarray buffer is greater or equal to 32, we choose a raw block (type 0). - Arguably, n index bytes always take up as much space as n raw bytes. So what makes 32 special here? A bitarray with a 32 byte buffer has 256 items (bits), and these 256 bits can be addressed using one index byte. That is, BSI(1) = 32, see above. This is also the reason, why the index count of type 1 blocks is limited to below 32. - If a raw block is used, we check if up to the next 127 32-byte segments are also suitable for raw encoding, see sc_write_raw(). Therefore, we have type 0 blocks with up to 128 * 32 = 4096 raw bytes. - If no raw block was used, we move on to deciding which type of sparse representation to use. Starting at type n = 1, we do this by first calculating the population count for the decoded block size of the *next* block type n+1. If this population is larger than 255 (too large for the count byte) we have to stick with type n. Otherwise we compare the encoded sizes of (a) sticking with many (up to 256) blocks of type n, and (b) moving to a single block of type n+1. These sizes are calculated as follows: (a) The encoded size of many blocks of type n is given by: header_size * number_of_blocks + n * population Regardless of the exact index count for each block, the total size of the index bytes is (n * population), as all blocks are of type n. The number_of_blocks is 256 (unless limited by the bitarray size). The header_size is only 1 byte for type 1 and 2 bytes otherwise. (b) The encoded size of a single block of type n+1 is: header_size + (n + 1) * population As n >= 1, the header_size will is always 2 bytes here. - As we only need to know which of these sizes is bigger, we can subtract (n * population) from both sizes. Hence, the costs are: (a) header_size * number_of_blocks (b) header_size + population The question of whether to choose type n or type n+1 ultimately comes down to whether the additional byte for each index is less expensive than having additional headers. */ static Py_ssize_t sc_encode_block(char *str, Py_ssize_t *len, bitarrayobject *a, Py_ssize_t *rts, Py_ssize_t offset) { const Py_ssize_t nbytes = Py_SIZE(a) - offset; /* remaining bytes */ int count, n; assert(nbytes > 0); count = (int) sc_count(a, rts, offset, 1); /* the number of index bytes exceeds the number of raw bytes */ if (count >= Py_MIN(32, nbytes)) { /* type 0 - raw bytes */ int k = sc_write_raw(str + *len, a, rts, offset); *len += 1 + k; return k; } for (n = 1; n < 4; n++) { Py_ssize_t next_count, nblocks, cost_a, cost_b; /* population for next block type n+1 */ next_count = sc_count(a, rts, offset, n + 1); if (next_count > 255) /* too many index bytes for next block type n+1 - use type n */ break; /* number of blocks of type n */ nblocks = Py_MIN(256, (nbytes - 1) / BSI(n) + 1); /* cost of nblocks blocks of type n */ cost_a = (n == 1 ? 1 : 2) * nblocks; /* cost of a single block of type n+1 */ cost_b = 2 + next_count; if (cost_b >= cost_a) /* block type n+1 is equally or more expensive - use type n */ break; /* we proceed with type n+1 - we already calculated its population */ count = (int) next_count; } *len += sc_write_sparse(str + *len, a, rts, offset, n, count); return BSI(n); } /* write header and return number or bytes written to buffer 'str' */ static int sc_encode_header(char *str, bitarrayobject *a) { int len; len = byte_length(a->nbits); *str = (IS_BE(a) ? 0x10 : 0x00) | ((char) len); write_n(str + 1, len, a->nbits); return 1 + len; } /* initial size of output buffer, and amount by which we increase our allocation if we run out */ #define ALLOC_SIZE 32768 static PyObject * sc_encode(PyObject *module, PyObject *obj) { PyObject *out; char *str; /* output buffer */ Py_ssize_t len = 0; /* bytes written into output buffer */ bitarrayobject *a; Py_ssize_t offset = 0; /* block offset into bitarray a in bytes */ Py_ssize_t *rts; /* running totals of segments */ Py_ssize_t total; /* total population count of bitarray */ if (ensure_bitarray(obj) < 0) return NULL; a = (bitarrayobject *) obj; set_padbits(a); if ((rts = sc_rts(a)) == NULL) return NULL; if ((out = PyBytes_FromStringAndSize(NULL, ALLOC_SIZE)) == NULL) goto error; str = PyBytes_AS_STRING(out); len += sc_encode_header(str, a); total = rts[NSEG(a)]; /* encode blocks as long as we haven't reached the end of the bitarray and haven't reached the total population count yet */ while (offset < Py_SIZE(a) && rts[offset / SEGSIZE] != total) { Py_ssize_t allocated = PyBytes_GET_SIZE(out); /* Make sure we have enough memory in output buffer for next block. The largest block possible is a type 0 block with 128 segments. Its size is: 1 head bytes + 128 * 32 raw bytes. Plus, we also may have the stop byte. */ if (allocated < len + 1 + 128 * 32 + 1) { if (_PyBytes_Resize(&out, allocated + ALLOC_SIZE) < 0) goto error; str = PyBytes_AS_STRING(out); } offset += sc_encode_block(str, &len, a, rts, offset); } PyMem_Free(rts); str[len++] = 0x00; /* add stop byte */ if (_PyBytes_Resize(&out, len) < 0) return NULL; return out; error: PyMem_Free(rts); return NULL; } #undef ALLOC_SIZE PyDoc_STRVAR(sc_encode_doc, "sc_encode(bitarray, /) -> bytes\n\ \n\ Compress a sparse bitarray and return its binary representation.\n\ This representation is useful for efficiently storing sparse bitarrays.\n\ Use `sc_decode()` for decompressing (decoding)."); /* read header from 'iter' and set 'endian' and 'nbits', return 0 on success and -1 of failure (after setting exception) */ static int sc_decode_header(PyObject *iter, int *endian, Py_ssize_t *nbits) { int head, len; if ((head = next_char(iter)) < 0) return -1; if (head & 0xe0) { PyErr_Format(PyExc_ValueError, "invalid header: 0x%02x", head); return -1; } *endian = head & 0x10 ? ENDIAN_BIG : ENDIAN_LITTLE; len = head & 0x0f; if (len > (int) sizeof(Py_ssize_t)) { PyErr_Format(PyExc_OverflowError, "sizeof(Py_ssize_t) = %d: cannot " "read %d bytes", (int) sizeof(Py_ssize_t), len); return -1; } if ((*nbits = read_n(iter, len)) < 0) return -1; return 0; } /* Read k bytes from iter and set elements in bitarray. Return the size of offset increment in bytes, or -1 on failure. */ static Py_ssize_t sc_read_raw(bitarrayobject *a, Py_ssize_t offset, PyObject *iter, int k) { char *buff = a->ob_item + offset; int i, c; assert(1 <= k && k <= 32 * 128); if (offset + k > Py_SIZE(a)) { PyErr_Format(PyExc_ValueError, "decode error (raw): %zd + %d > %zd", offset, k, Py_SIZE(a)); return -1; } for (i = 0; i < k; i++) { if ((c = next_char(iter)) < 0) return -1; buff[i] = (char) c; } return k; } /* Read n * k bytes from iter and set elements in bitarray. Return size of offset increment in bytes, or -1 on failure. */ static Py_ssize_t sc_read_sparse(bitarrayobject *a, Py_ssize_t offset, PyObject *iter, int n, int k) { assert(1 <= n && n <= 4 && k >= 0); while (k--) { Py_ssize_t i; if ((i = read_n(iter, n)) < 0) return -1; i += 8 * offset; /* also check for negative value as offset might cause overflow */ if (i < 0 || i >= a->nbits) { PyErr_Format(PyExc_ValueError, "decode error (n=%d): %zd >= %zd", n, i, a->nbits); return -1; } setbit(a, i, 1); } return BSI(n); } /* Decode one block: consume iter and set bitarray buffer starting at offset. Return decoded block size, or -1 on failure. */ static Py_ssize_t sc_decode_block(bitarrayobject *a, Py_ssize_t offset, PyObject *iter) { int head, k; if ((head = next_char(iter)) < 0) return -1; if (head < 0xa0) { /* type 0 - 0x00 .. 0x9f */ if (head == 0) /* stop byte */ return 0; k = head <= 0x20 ? head : 32 * (head - 31); return sc_read_raw(a, offset, iter, k); } if (head < 0xc0) /* type 1 - 0xa0 .. 0xbf */ return sc_read_sparse(a, offset, iter, 1, head - 0xa0); if (0xc2 <= head && head <= 0xc4) { /* type 2 .. 4 - 0xc2 .. 0xc4 */ if ((k = next_char(iter)) < 0) /* index count byte */ return -1; return sc_read_sparse(a, offset, iter, head - 0xc0, k); } PyErr_Format(PyExc_ValueError, "invalid block head: 0x%02x", head); return -1; } static PyObject * sc_decode(PyObject *module, PyObject *obj) { PyObject *iter; bitarrayobject *a = NULL; Py_ssize_t offset = 0, increase, nbits; int endian; if ((iter = PyObject_GetIter(obj)) == NULL) return PyErr_Format(PyExc_TypeError, "'%s' object is not iterable", Py_TYPE(obj)->tp_name); if (sc_decode_header(iter, &endian, &nbits) < 0) goto error; /* create bitarray of length nbits */ if ((a = new_bitarray(nbits, Py_None, 0)) == NULL) goto error; a->endian = endian; /* consume blocks until stop byte is encountered */ while ((increase = sc_decode_block(a, offset, iter))) { if (increase < 0) goto error; offset += increase; } Py_DECREF(iter); return (PyObject *) a; error: Py_DECREF(iter); Py_XDECREF((PyObject *) a); return NULL; } PyDoc_STRVAR(sc_decode_doc, "sc_decode(stream, /) -> bitarray\n\ \n\ Decompress binary stream (an integer iterator, or bytes-like object) of a\n\ sparse compressed (`sc`) bitarray, and return the decoded bitarray.\n\ This function consumes only one bitarray and leaves the remaining stream\n\ untouched. Use `sc_encode()` for compressing (encoding)."); #undef BSI #undef NSEG /* ------------------- variable length bitarray format ----------------- */ /* LEN_PAD_BITS is always 3 - the number of bits (length) that is necessary to represent the number of pad bits. The number of padding bits itself is called 'padding' below. 'padding' refers to the pad bits within the variable length format. This is not the same as the pad bits of the actual bitarray. For example, b'\x10' has padding = 1, and decodes to bitarray('000'), which has 5 pad bits. 'padding' can take values to up 6. */ #define LEN_PAD_BITS 3 /* initial number of bits we allocate in vl_decode(), and amount by which we increase our allocation by in vl_decode_core() if we run out */ #define ALLOC_BITS 1024 /* Consume 'iter' while extending bitarray 'a'. Return 0 on success. On failure, set exception and return -1. */ static int vl_decode_core(bitarrayobject *a, PyObject *iter) { Py_ssize_t i = 0; /* bit counter */ int padding, k, c; if ((c = next_char(iter)) < 0) /* head byte */ return -1; padding = (c & 0x70) >> 4; if (padding == 7 || ((c & 0x80) == 0 && padding > 4)) { PyErr_Format(PyExc_ValueError, "invalid head byte: 0x%02x", c); return -1; } for (k = 0; k < 4; k++) setbit(a, i++, (0x08 >> k) & c); while (c & 0x80) { if ((c = next_char(iter)) < 0) return -1; /* ensure bitarray is large enough to accommodate seven more bits */ if (a->nbits < i + 7 && resize_lite(a, a->nbits + ALLOC_BITS) < 0) return -1; assert(i + 6 < a->nbits); for (k = 0; k < 7; k++) setbit(a, i++, (0x40 >> k) & c); } /* set final length of bitarray */ return resize_lite(a, i - padding); } static PyObject * vl_decode(PyObject *module, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"", "endian", NULL}; PyObject *obj, *iter, *endian = Py_None; bitarrayobject *a; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:vl_decode", kwlist, &obj, &endian)) return NULL; iter = PyObject_GetIter(obj); if (iter == NULL) return PyErr_Format(PyExc_TypeError, "'%s' object is not iterable", Py_TYPE(obj)->tp_name); a = new_bitarray(ALLOC_BITS, endian, -1); if (a == NULL) goto error; if (vl_decode_core(a, iter) < 0) /* do actual decoding work */ goto error; Py_DECREF(iter); return (PyObject *) a; error: Py_DECREF(iter); Py_XDECREF((PyObject *) a); return NULL; } #undef ALLOC_BITS PyDoc_STRVAR(vl_decode_doc, "vl_decode(stream, /, endian=None) -> bitarray\n\ \n\ Decode binary stream (an integer iterator, or bytes-like object), and\n\ return the decoded bitarray. This function consumes only one bitarray and\n\ leaves the remaining stream untouched. Use `vl_encode()` for encoding."); static PyObject * vl_encode(PyObject *module, PyObject *obj) { PyObject *result; bitarrayobject *a; Py_ssize_t nbits, n, i, j = 0; /* j: byte counter */ int padding; char *str; if (ensure_bitarray(obj) < 0) return NULL; a = (bitarrayobject *) obj; nbits = a->nbits; n = (nbits + LEN_PAD_BITS + 6) / 7; /* number of resulting bytes */ padding = (int) (7 * n - LEN_PAD_BITS - nbits); result = PyBytes_FromStringAndSize(NULL, n); if (result == NULL) return NULL; str = PyBytes_AsString(result); str[0] = nbits > 4 ? 0x80 : 0x00; /* lead bit */ str[0] |= padding << 4; /* encode padding */ for (i = 0; i < 4 && i < nbits; i++) str[0] |= (0x08 >> i) * getbit(a, i); for (i = 4; i < nbits; i++) { int k = (i - 4) % 7; if (k == 0) { j++; str[j] = j < n - 1 ? 0x80 : 0x00; /* lead bit */ } str[j] |= (0x40 >> k) * getbit(a, i); } assert(j == n - 1); return result; } PyDoc_STRVAR(vl_encode_doc, "vl_encode(bitarray, /) -> bytes\n\ \n\ Return variable length binary representation of bitarray.\n\ This representation is useful for efficiently storing small bitarray\n\ in a binary stream. Use `vl_decode()` for decoding."); #undef LEN_PAD_BITS /* ----------------------- canonical Huffman decoder ------------------- */ /* The decode iterator object includes the Huffman code decoding tables: - count[1..MAXBITS] is the number of symbols of each length, which for a canonical code are stepped through in order. count[0] is not used. - symbol is a Python sequence of the symbols in canonical order where the number of entries is the sum of the counts in count[]. */ #define MAXBITS 31 /* maximum bit length in a code */ typedef struct { PyObject_HEAD bitarrayobject *array; /* bitarray we're decoding */ Py_ssize_t index; /* current index in bitarray */ int count[MAXBITS + 1]; /* number of symbols of each length */ PyObject *symbol; /* canonical ordered symbols */ } chdi_obj; /* canonical Huffman decode iterator */ static PyTypeObject CHDI_Type; /* set elements in count (from sequence) and return their sum, or -1 on error after setting exception */ static Py_ssize_t set_count(int *count, PyObject *sequence) { Py_ssize_t n, res = 0; int i; if ((n = PySequence_Size(sequence)) < 0) return -1; if (n > MAXBITS + 1) { PyErr_Format(PyExc_ValueError, "len(count) cannot be larger than %d", MAXBITS + 1); return -1; } memset(count, 0, sizeof(int) * (MAXBITS + 1)); for (i = 1; i < n; i++) { PyObject *item; Py_ssize_t c; if ((item = PySequence_GetItem(sequence, i)) == NULL) return -1; c = PyNumber_AsSsize_t(item, PyExc_OverflowError); Py_DECREF(item); if (c == -1 && PyErr_Occurred()) return -1; if (c >> i && (c - 1) >> i) { PyErr_Format(PyExc_ValueError, "count[%d] not in [0..%zu], " "got %zd", i, ((size_t) 1) << i, c); return -1; } count[i] = (int) c; res += c; } return res; } /* create a new initialized canonical Huffman decode iterator object */ static PyObject * chdi_new(PyObject *module, PyObject *args) { PyObject *a, *count, *symbol; Py_ssize_t count_sum; chdi_obj *it; /* iterator object to be returned */ if (!PyArg_ParseTuple(args, "O!OO:canonical_decode", bitarray_type, &a, &count, &symbol)) return NULL; if (!PySequence_Check(count)) return PyErr_Format(PyExc_TypeError, "count expected to be sequence, " "got '%s'", Py_TYPE(count)->tp_name); symbol = PySequence_Fast(symbol, "symbol not iterable"); if (symbol == NULL) return NULL; it = PyObject_GC_New(chdi_obj, &CHDI_Type); if (it == NULL) goto error; if ((count_sum = set_count(it->count, count)) < 0) goto error; if (count_sum != PySequence_Size(symbol)) { PyErr_Format(PyExc_ValueError, "sum(count) = %zd, but len(symbol) " "= %zd", count_sum, PySequence_Size(symbol)); goto error; } Py_INCREF(a); it->array = (bitarrayobject *) a; it->index = 0; /* PySequence_Fast() returns a new reference, so no Py_INCREF here */ it->symbol = symbol; PyObject_GC_Track(it); return (PyObject *) it; error: it->array = NULL; Py_XDECREF(symbol); it->symbol = NULL; Py_DECREF(it); return NULL; } PyDoc_STRVAR(chdi_doc, "canonical_decode(bitarray, count, symbol, /) -> iterator\n\ \n\ Decode bitarray using canonical Huffman decoding tables\n\ where `count` is a sequence containing the number of symbols of each length\n\ and `symbol` is a sequence of symbols in canonical order."); /* This function is based on the function decode() in: https://github.com/madler/zlib/blob/master/contrib/puff/puff.c */ static PyObject * chdi_next(chdi_obj *it) { Py_ssize_t nbits = it->array->nbits; int len; /* current number of bits in code */ int code; /* current code (of len bits) */ int first; /* first code of length len */ int count; /* number of codes of length len */ int index; /* index of first code of length len in symbol list */ if (it->index >= nbits) /* no bits - stop iteration */ return NULL; code = first = index = 0; for (len = 1; len <= MAXBITS; len++) { code |= getbit(it->array, it->index++); count = it->count[len]; assert(code - first >= 0); if (code - first < count) { /* if length len, return symbol */ return PySequence_ITEM(it->symbol, index + (code - first)); } index += count; /* else update for next length */ first += count; first <<= 1; code <<= 1; if (it->index >= nbits && len != MAXBITS) { PyErr_SetString(PyExc_ValueError, "reached end of bitarray"); return NULL; } } PyErr_SetString(PyExc_ValueError, "ran out of codes"); return NULL; } static void chdi_dealloc(chdi_obj *it) { PyObject_GC_UnTrack(it); Py_XDECREF(it->array); Py_XDECREF(it->symbol); PyObject_GC_Del(it); } static int chdi_traverse(chdi_obj *it, visitproc visit, void *arg) { Py_VISIT(it->array); Py_VISIT(it->symbol); return 0; } #undef MAXBITS static PyTypeObject CHDI_Type = { PyVarObject_HEAD_INIT(NULL, 0) "bitarray.util.canonical_decodeiter", /* tp_name */ sizeof(chdi_obj), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor) chdi_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ (traverseproc) chdi_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc) chdi_next, /* tp_iternext */ 0, /* tp_methods */ }; /* ---------- module functions exposed in debug mode for testing ------- */ #ifndef NDEBUG static PyObject * module_setup_table(PyObject *module, PyObject *obj) { char table[256]; assert(PyUnicode_Check(obj)); assert(PyUnicode_GET_LENGTH(obj) == 1); setup_table(table, PyUnicode_READ_CHAR(obj, 0)); return PyBytes_FromStringAndSize(table, 256); } /* Return zlw(a) as a new bitarray, rather than an int object. This makes testing easier, because the int result would depend on the machine byteorder. */ static PyObject * module_zlw(PyObject *module, PyObject *obj) { bitarrayobject *a, *res; uint64_t w; assert(bitarray_Check(obj)); a = (bitarrayobject *) obj; w = zlw(a); if ((res = new_bitarray(64, Py_None, -1)) == NULL) return NULL; res->endian = a->endian; memcpy(res->ob_item, &w, 8); return (PyObject *) res; } static PyObject * module_cfw(PyObject *module, PyObject *args) /* count_from_word() */ { bitarrayobject *a; Py_ssize_t i; if (!PyArg_ParseTuple(args, "O!n", bitarray_type, (PyObject *) &a, &i)) return NULL; return PyLong_FromSsize_t(count_from_word(a, i)); } static PyObject * module_d2i(PyObject *module, PyObject *args) { int m; char c; if (!PyArg_ParseTuple(args, "ic", &m, &c)) return NULL; return PyLong_FromLong(digit_to_int(m, c)); } static PyObject * module_read_n(PyObject *module, PyObject *args) { PyObject *iter; Py_ssize_t i; int n; if (!PyArg_ParseTuple(args, "Oi", &iter, &n)) return NULL; if ((i = read_n(iter, n)) < 0) return NULL; return PyLong_FromSsize_t(i); } static PyObject * module_write_n(PyObject *module, PyObject *args) { PyObject *result; char *str; Py_ssize_t i; int n; if (!PyArg_ParseTuple(args, "in", &n, &i)) return NULL; if ((result = PyBytes_FromStringAndSize(NULL, n)) == NULL) return NULL; str = PyBytes_AsString(result); write_n(str, n, i); return result; } #endif /* NDEBUG */ static PyMethodDef module_functions[] = { {"zeros", (PyCFunction) zeros, METH_KEYWORDS | METH_VARARGS, zeros_doc}, {"ones", (PyCFunction) ones, METH_KEYWORDS | METH_VARARGS, ones_doc}, {"count_n", (PyCFunction) count_n, METH_VARARGS, count_n_doc}, {"parity", (PyCFunction) parity, METH_O, parity_doc}, {"_ssqi", (PyCFunction) ssqi, METH_VARARGS, 0}, {"xor_indices", (PyCFunction) xor_indices, METH_O, xor_indices_doc}, {"count_and", (PyCFunction) count_and, METH_VARARGS, count_and_doc}, {"count_or", (PyCFunction) count_or, METH_VARARGS, count_or_doc}, {"count_xor", (PyCFunction) count_xor, METH_VARARGS, count_xor_doc}, {"any_and", (PyCFunction) any_and, METH_VARARGS, any_and_doc}, {"subset", (PyCFunction) subset, METH_VARARGS, subset_doc}, {"correspond_all", (PyCFunction) correspond_all, METH_VARARGS, correspond_all_doc}, {"byteswap", (PyCFunction) byteswap, METH_VARARGS, byteswap_doc}, {"serialize", (PyCFunction) serialize, METH_O, serialize_doc}, {"deserialize", (PyCFunction) deserialize, METH_O, deserialize_doc}, {"ba2hex", (PyCFunction) ba2hex, METH_KEYWORDS | METH_VARARGS, ba2hex_doc}, {"hex2ba", (PyCFunction) hex2ba, METH_KEYWORDS | METH_VARARGS, hex2ba_doc}, {"ba2base", (PyCFunction) ba2base, METH_KEYWORDS | METH_VARARGS, ba2base_doc}, {"base2ba", (PyCFunction) base2ba, METH_KEYWORDS | METH_VARARGS, base2ba_doc}, {"sc_encode", (PyCFunction) sc_encode, METH_O, sc_encode_doc}, {"sc_decode", (PyCFunction) sc_decode, METH_O, sc_decode_doc}, {"vl_encode", (PyCFunction) vl_encode, METH_O, vl_encode_doc}, {"vl_decode", (PyCFunction) vl_decode, METH_KEYWORDS | METH_VARARGS, vl_decode_doc}, {"canonical_decode", (PyCFunction) chdi_new, METH_VARARGS, chdi_doc}, #ifndef NDEBUG /* functions exposed in debug mode for testing */ {"_setup_table", (PyCFunction) module_setup_table, METH_O, 0}, {"_zlw", (PyCFunction) module_zlw, METH_O, 0}, {"_cfw", (PyCFunction) module_cfw, METH_VARARGS, 0}, {"_d2i", (PyCFunction) module_d2i, METH_VARARGS, 0}, {"_read_n", (PyCFunction) module_read_n, METH_VARARGS, 0}, {"_write_n", (PyCFunction) module_write_n, METH_VARARGS, 0}, {"_sc_rts", (PyCFunction) module_sc_rts, METH_O, 0}, #endif {NULL, NULL} /* sentinel */ }; /******************************* Install Module ***************************/ static PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_util", 0, -1, module_functions, }; PyMODINIT_FUNC PyInit__util(void) { PyObject *m; bitarray_type = (PyTypeObject *) bitarray_module_attr("bitarray"); if (bitarray_type == NULL) return NULL; if ((m = PyModule_Create(&moduledef)) == NULL) return NULL; if (PyType_Ready(&CHDI_Type) < 0) return NULL; Py_SET_TYPE(&CHDI_Type, &PyType_Type); #ifndef NDEBUG /* expose segment size in debug mode for testing */ PyModule_AddObject(m, "_SEGSIZE", PyLong_FromSsize_t(SEGSIZE)); #endif return m; } bitarray-3.7.1/bitarray/bitarray.h000066400000000000000000000250101505414144000171360ustar00rootroot00000000000000/* Copyright (c) 2008 - 2025, Ilan Schnell; All Rights Reserved bitarray is published under the PSF license. Author: Ilan Schnell */ #define BITARRAY_VERSION "3.7.1" #ifdef STDC_HEADERS # include #else # ifdef HAVE_SYS_TYPES_H # include /* For size_t */ # endif #endif /* Compatibility with Visual Studio 2013 and older which don't support the inline keyword in C (only in C++): use __inline instead. (copied from pythoncapi_compat.h) */ #if (defined(_MSC_VER) && _MSC_VER < 1900 \ && !defined(__cplusplus) && !defined(inline)) #define inline __inline #endif #ifdef _MSC_VER #include /* For _byteswap_uint64() */ #endif /* --- definitions specific to Python --- */ /* Py_UNREACHABLE was introduced in Python 3.7 */ #ifndef Py_UNREACHABLE #define Py_UNREACHABLE() assert(0) #endif /* --- bitarrayobject --- */ /* .ob_size is the buffer size (in bytes), not the number of elements. The number of elements (bits) is .nbits. */ typedef struct { PyObject_VAR_HEAD char *ob_item; /* buffer */ Py_ssize_t allocated; /* allocated buffer size (in bytes) */ Py_ssize_t nbits; /* length of bitarray, i.e. elements */ int endian; /* bit-endianness of bitarray */ int ob_exports; /* how many buffer exports */ PyObject *weakreflist; /* list of weak references */ Py_buffer *buffer; /* used when importing a buffer */ int readonly; /* buffer is readonly */ } bitarrayobject; /* --- bit-endianness --- */ #define ENDIAN_LITTLE 0 #define ENDIAN_BIG 1 #define IS_LE(self) ((self)->endian == ENDIAN_LITTLE) #define IS_BE(self) ((self)->endian == ENDIAN_BIG) /* endianness as string */ #define ENDIAN_STR(endian) ((endian) == ENDIAN_LITTLE ? "little" : "big") /* number of pad bits */ #define PADBITS(self) ((8 - (self)->nbits % 8) % 8) /* number of bytes necessary to store given nunmber of bits */ #define BYTES(bits) (((bits) + 7) >> 3) /* we're not using bitmask_table here, as it is actually slower */ #define BITMASK(self, i) (((char) 1) << ((self)->endian == ENDIAN_LITTLE ? \ ((i) % 8) : (7 - (i) % 8))) /* buffer as uint64 array */ #define WBUFF(self) ((uint64_t *) (self)->ob_item) /* assert that .nbits is in agreement with .ob_size */ #define assert_nbits(self) assert(BYTES((self)->nbits) == Py_SIZE(self)) /* assert byte index is in range */ #define assert_byte_in_range(self, j) \ assert(self->ob_item && 0 <= (j) && (j) < Py_SIZE(self)) /* ------------ low level access to bits in bitarrayobject ------------- */ static inline int getbit(bitarrayobject *self, Py_ssize_t i) { assert_nbits(self); assert(0 <= i && i < self->nbits); return self->ob_item[i >> 3] & BITMASK(self, i) ? 1 : 0; } static inline void setbit(bitarrayobject *self, Py_ssize_t i, int vi) { char *cp, mask; assert_nbits(self); assert(0 <= i && i < self->nbits); assert(self->readonly == 0); mask = BITMASK(self, i); cp = self->ob_item + (i >> 3); if (vi) *cp |= mask; else *cp &= ~mask; } static const char bitmask_table[2][8] = { {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80}, /* little endian */ {0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01}, /* big endian */ }; /* character with n leading ones is: ones_table[endian][n] */ static const char ones_table[2][8] = { {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f}, /* little endian */ {0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe}, /* big endian */ }; /* Return last byte in buffer with pad bits zeroed out. If the length of the bitarray is a multiple of 8 (which includes an empty bitarray), 0 is returned. */ static inline char zlc(bitarrayobject *self) /* zlc = zeroed last char */ { const int r = self->nbits % 8; /* index into mask table */ if (r == 0) return 0; return self->ob_item[Py_SIZE(self) - 1] & ones_table[IS_BE(self)][r]; } /* Return a uint64_t word representing the last (up to 63) remaining bits of the buffer. All missing bytes (to complete the word) and padbits are treated as zeros. If the length of the bitarray is a multiple of 64 (which also includes an empty bitarray), 0 is returned. */ static inline uint64_t zlw(bitarrayobject *self) /* zlw = zeroed last word */ { const size_t nbits = self->nbits; const size_t nw = (nbits / 64) * 8; /* bytes in complete words */ const size_t nr = (nbits % 64) / 8; /* complete remaining bytes */ uint64_t res = 0; assert(nw + nr == nbits / 8 && 8 * (nw + nr) + nbits % 8 == nbits); memcpy((char *) &res, self->ob_item + nw, nr); if (nbits % 8) *(((char *) &res) + nr) = zlc(self); return res; } /* unless buffer is readonly, zero out pad bits - self->nbits is unchanged */ static inline void set_padbits(bitarrayobject *self) { if (self->readonly == 0) { int r = self->nbits % 8; /* index into mask table */ if (r) self->ob_item[Py_SIZE(self) - 1] &= ones_table[IS_BE(self)][r]; } } /* population count - number of 1's in uint64 */ static inline int popcnt_64(uint64_t x) { #if (defined(__clang__) || defined(__GNUC__)) return __builtin_popcountll(x); #else /* https://en.wikipedia.org/wiki/Hamming_weight popcount64c */ const uint64_t m1 = 0x5555555555555555; const uint64_t m2 = 0x3333333333333333; const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; const uint64_t h01 = 0x0101010101010101; x -= (x >> 1) & m1; x = (x & m2) + ((x >> 2) & m2); x = (x + (x >> 4)) & m4; return (x * h01) >> 56; #endif } static inline int parity_64(uint64_t x) { #if (defined(__clang__) || defined(__GNUC__)) return __builtin_parityll(x); #else int i; for (i = 32; i > 0; i /= 2) x ^= x >> i; return x & 1; #endif } static inline uint64_t builtin_bswap64(uint64_t word) { #if (defined(__clang__) || \ (defined(__GNUC__) \ && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 3)))) /* __builtin_bswap64() is available since GCC 4.3 */ # define HAVE_BUILTIN_BSWAP64 1 return __builtin_bswap64(word); #elif defined(_MSC_VER) # define HAVE_BUILTIN_BSWAP64 1 return _byteswap_uint64(word); #else # define HAVE_BUILTIN_BSWAP64 0 Py_UNREACHABLE(); #endif } /* reverse order of first n bytes of p */ static inline void swap_bytes(char *p, Py_ssize_t n) { Py_ssize_t i, j; for (i = 0, j = n - 1; i < j; i++, j--) { char t = p[i]; p[i] = p[j]; p[j] = t; } } /* write 256 characters into table for given kernel operation */ static inline void setup_table(char *table, char kop) { int k; for (k = 0; k < 256; k++) { char t = 0, j; for (j = 0; j < 8; j++) { if (k & 1 << j) /* j are the indices of active bits in k (little endian) */ switch (kop) { case 'a': t += j; break; /* add active indices */ case 'A': t += 7 - j; break; /* 'a' for big endian */ case 's': t += j * j; /* add squares of active indices */ break; case 'S': t += (7-j) * (7-j); /* 's' for big endian */ break; case 'x': t ^= j; break; /* xor active indices */ case 'X': t ^= 7 - j; break; /* 'x' for big endian */ case 'c': t++; break; /* bit count */ case 'p': t ^= 1; break; /* parity */ case 'r': t |= 128 >> j; break; /* reverse bits */ default: Py_UNREACHABLE(); } } table[k] = t; } } /* Return distance [0..3] to next aligned pointer. While on modern compilers uint64_t pointers may be misaligned, it may cause problems on older ones. Moreover, it may lead to slowdown (even on modern compilers). */ static inline int to_aligned(void *p) { int r = ((uintptr_t) p) % 4; return (4 - r) % 4; } /* population count of n words starting from at uint64_t pointer w */ static inline Py_ssize_t popcnt_words(uint64_t *w, Py_ssize_t n) { Py_ssize_t cnt = 0; assert(n >= 0 && ((uintptr_t) w) % 4 == 0); while (n--) cnt += popcnt_64(*w++); return cnt; } /* Adjust slice parameters such that step is always positive. This produces simpler loops over elements when their order is irrelevant. Moreover, for step = -1, we can now use set_span() in set_range() and count_span() in count_range(). */ static inline void adjust_step_positive(Py_ssize_t slicelength, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step) { if (*step < 0) { *stop = *start + 1; *start = *stop + *step * (slicelength - 1) - 1; *step = -(*step); } assert(*start >= 0 && *stop >= 0 && *step > 0 && slicelength >= 0); /* slicelength == 0 implies stop <= start */ assert(slicelength != 0 || *stop <= *start); /* step == 1 and slicelength != 0 implies stop - start == slicelength */ assert(*step != 1 || slicelength == 0 || *stop - *start == slicelength); } /* convert Python object to C int and set value at address - return 1 on success, 0 on failure (and set exception) */ static inline int conv_pybit(PyObject *value, int *vi) { Py_ssize_t n; n = PyNumber_AsSsize_t(value, NULL); if (n == -1 && PyErr_Occurred()) return 0; if (n >> 1) { PyErr_Format(PyExc_ValueError, "bit must be 0 or 1, got %zd", n); return 0; } *vi = (int) n; return 1; } /* Return 0 if bitarrays have equal length and bit-endianness. Otherwise, set exception and return -1. */ static inline int ensure_eq_size_endian(bitarrayobject *a, bitarrayobject *b) { if (a->nbits != b->nbits) { PyErr_SetString(PyExc_ValueError, "bitarrays of equal length expected"); return -1; } if (a->endian != b->endian) { PyErr_SetString(PyExc_ValueError, "bitarrays of equal bit-endianness expected"); return -1; } return 0; } /* Equivalent to: import bitarray; return getattr(bitarray, name) */ static inline PyObject * bitarray_module_attr(char *name) { PyObject *bitarray_module, *result; bitarray_module = PyImport_ImportModule("bitarray"); if (bitarray_module == NULL) return NULL; result = PyObject_GetAttrString(bitarray_module, name); Py_DECREF(bitarray_module); return result; } bitarray-3.7.1/bitarray/py.typed000066400000000000000000000000001505414144000166370ustar00rootroot00000000000000bitarray-3.7.1/bitarray/pythoncapi_compat.h000066400000000000000000001673311505414144000210570ustar00rootroot00000000000000// Header file providing new C API functions to old Python versions. // // File distributed under the Zero Clause BSD (0BSD) license. // Copyright Contributors to the pythoncapi_compat project. // // Homepage: // https://github.com/python/pythoncapi_compat // // Latest version: // https://raw.githubusercontent.com/python/pythoncapi-compat/main/pythoncapi_compat.h // // SPDX-License-Identifier: 0BSD #ifndef PYTHONCAPI_COMPAT #define PYTHONCAPI_COMPAT #ifdef __cplusplus extern "C" { #endif #include #include // offsetof() // Python 3.11.0b4 added PyFrame_Back() to Python.h #if PY_VERSION_HEX < 0x030b00B4 && !defined(PYPY_VERSION) # include "frameobject.h" // PyFrameObject, PyFrame_GetBack() #endif #if PY_VERSION_HEX < 0x030C00A3 # include // T_SHORT, READONLY #endif #ifndef _Py_CAST # define _Py_CAST(type, expr) ((type)(expr)) #endif // Static inline functions should use _Py_NULL rather than using directly NULL // to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer, // _Py_NULL is defined as nullptr. #ifndef _Py_NULL # if (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ || (defined(__cplusplus) && __cplusplus >= 201103) # define _Py_NULL nullptr # else # define _Py_NULL NULL # endif #endif // Cast argument to PyObject* type. #ifndef _PyObject_CAST # define _PyObject_CAST(op) _Py_CAST(PyObject*, op) #endif #ifndef Py_BUILD_ASSERT # define Py_BUILD_ASSERT(cond) \ do { \ (void)sizeof(char [1 - 2 * !(cond)]); \ } while(0) #endif // bpo-42262 added Py_NewRef() to Python 3.10.0a3 #if PY_VERSION_HEX < 0x030A00A3 && !defined(Py_NewRef) static inline PyObject* _Py_NewRef(PyObject *obj) { Py_INCREF(obj); return obj; } #define Py_NewRef(obj) _Py_NewRef(_PyObject_CAST(obj)) #endif // bpo-42262 added Py_XNewRef() to Python 3.10.0a3 #if PY_VERSION_HEX < 0x030A00A3 && !defined(Py_XNewRef) static inline PyObject* _Py_XNewRef(PyObject *obj) { Py_XINCREF(obj); return obj; } #define Py_XNewRef(obj) _Py_XNewRef(_PyObject_CAST(obj)) #endif // bpo-39573 added Py_SET_REFCNT() to Python 3.9.0a4 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_REFCNT) static inline void _Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { ob->ob_refcnt = refcnt; } #define Py_SET_REFCNT(ob, refcnt) _Py_SET_REFCNT(_PyObject_CAST(ob), refcnt) #endif // Py_SETREF() and Py_XSETREF() were added to Python 3.5.2. // It is excluded from the limited C API. #if (PY_VERSION_HEX < 0x03050200 && !defined(Py_SETREF)) && !defined(Py_LIMITED_API) #define Py_SETREF(dst, src) \ do { \ PyObject **_tmp_dst_ptr = _Py_CAST(PyObject**, &(dst)); \ PyObject *_tmp_dst = (*_tmp_dst_ptr); \ *_tmp_dst_ptr = _PyObject_CAST(src); \ Py_DECREF(_tmp_dst); \ } while (0) #define Py_XSETREF(dst, src) \ do { \ PyObject **_tmp_dst_ptr = _Py_CAST(PyObject**, &(dst)); \ PyObject *_tmp_dst = (*_tmp_dst_ptr); \ *_tmp_dst_ptr = _PyObject_CAST(src); \ Py_XDECREF(_tmp_dst); \ } while (0) #endif // bpo-43753 added Py_Is(), Py_IsNone(), Py_IsTrue() and Py_IsFalse() // to Python 3.10.0b1. #if PY_VERSION_HEX < 0x030A00B1 && !defined(Py_Is) # define Py_Is(x, y) ((x) == (y)) #endif #if PY_VERSION_HEX < 0x030A00B1 && !defined(Py_IsNone) # define Py_IsNone(x) Py_Is(x, Py_None) #endif #if (PY_VERSION_HEX < 0x030A00B1 || defined(PYPY_VERSION)) && !defined(Py_IsTrue) # define Py_IsTrue(x) Py_Is(x, Py_True) #endif #if (PY_VERSION_HEX < 0x030A00B1 || defined(PYPY_VERSION)) && !defined(Py_IsFalse) # define Py_IsFalse(x) Py_Is(x, Py_False) #endif // bpo-39573 added Py_SET_TYPE() to Python 3.9.0a4 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_TYPE) static inline void _Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { ob->ob_type = type; } #define Py_SET_TYPE(ob, type) _Py_SET_TYPE(_PyObject_CAST(ob), type) #endif // bpo-39573 added Py_SET_SIZE() to Python 3.9.0a4 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_SIZE) static inline void _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { ob->ob_size = size; } #define Py_SET_SIZE(ob, size) _Py_SET_SIZE((PyVarObject*)(ob), size) #endif // bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1 #if PY_VERSION_HEX < 0x030900B1 || defined(PYPY_VERSION) static inline PyCodeObject* PyFrame_GetCode(PyFrameObject *frame) { assert(frame != _Py_NULL); assert(frame->f_code != _Py_NULL); return _Py_CAST(PyCodeObject*, Py_NewRef(frame->f_code)); } #endif static inline PyCodeObject* _PyFrame_GetCodeBorrow(PyFrameObject *frame) { PyCodeObject *code = PyFrame_GetCode(frame); Py_DECREF(code); return code; } // bpo-40421 added PyFrame_GetBack() to Python 3.9.0b1 #if PY_VERSION_HEX < 0x030900B1 && !defined(PYPY_VERSION) static inline PyFrameObject* PyFrame_GetBack(PyFrameObject *frame) { assert(frame != _Py_NULL); return _Py_CAST(PyFrameObject*, Py_XNewRef(frame->f_back)); } #endif #if !defined(PYPY_VERSION) static inline PyFrameObject* _PyFrame_GetBackBorrow(PyFrameObject *frame) { PyFrameObject *back = PyFrame_GetBack(frame); Py_XDECREF(back); return back; } #endif // bpo-40421 added PyFrame_GetLocals() to Python 3.11.0a7 #if PY_VERSION_HEX < 0x030B00A7 && !defined(PYPY_VERSION) static inline PyObject* PyFrame_GetLocals(PyFrameObject *frame) { #if PY_VERSION_HEX >= 0x030400B1 if (PyFrame_FastToLocalsWithError(frame) < 0) { return NULL; } #else PyFrame_FastToLocals(frame); #endif return Py_NewRef(frame->f_locals); } #endif // bpo-40421 added PyFrame_GetGlobals() to Python 3.11.0a7 #if PY_VERSION_HEX < 0x030B00A7 && !defined(PYPY_VERSION) static inline PyObject* PyFrame_GetGlobals(PyFrameObject *frame) { return Py_NewRef(frame->f_globals); } #endif // bpo-40421 added PyFrame_GetBuiltins() to Python 3.11.0a7 #if PY_VERSION_HEX < 0x030B00A7 && !defined(PYPY_VERSION) static inline PyObject* PyFrame_GetBuiltins(PyFrameObject *frame) { return Py_NewRef(frame->f_builtins); } #endif // bpo-40421 added PyFrame_GetLasti() to Python 3.11.0b1 #if PY_VERSION_HEX < 0x030B00B1 && !defined(PYPY_VERSION) static inline int PyFrame_GetLasti(PyFrameObject *frame) { #if PY_VERSION_HEX >= 0x030A00A7 // bpo-27129: Since Python 3.10.0a7, f_lasti is an instruction offset, // not a bytes offset anymore. Python uses 16-bit "wordcode" (2 bytes) // instructions. if (frame->f_lasti < 0) { return -1; } return frame->f_lasti * 2; #else return frame->f_lasti; #endif } #endif // gh-91248 added PyFrame_GetVar() to Python 3.12.0a2 #if PY_VERSION_HEX < 0x030C00A2 && !defined(PYPY_VERSION) static inline PyObject* PyFrame_GetVar(PyFrameObject *frame, PyObject *name) { PyObject *locals, *value; locals = PyFrame_GetLocals(frame); if (locals == NULL) { return NULL; } #if PY_VERSION_HEX >= 0x03000000 value = PyDict_GetItemWithError(locals, name); #else value = _PyDict_GetItemWithError(locals, name); #endif Py_DECREF(locals); if (value == NULL) { if (PyErr_Occurred()) { return NULL; } #if PY_VERSION_HEX >= 0x03000000 PyErr_Format(PyExc_NameError, "variable %R does not exist", name); #else PyErr_SetString(PyExc_NameError, "variable does not exist"); #endif return NULL; } return Py_NewRef(value); } #endif // gh-91248 added PyFrame_GetVarString() to Python 3.12.0a2 #if PY_VERSION_HEX < 0x030C00A2 && !defined(PYPY_VERSION) static inline PyObject* PyFrame_GetVarString(PyFrameObject *frame, const char *name) { PyObject *name_obj, *value; #if PY_VERSION_HEX >= 0x03000000 name_obj = PyUnicode_FromString(name); #else name_obj = PyString_FromString(name); #endif if (name_obj == NULL) { return NULL; } value = PyFrame_GetVar(frame, name_obj); Py_DECREF(name_obj); return value; } #endif // bpo-39947 added PyThreadState_GetInterpreter() to Python 3.9.0a5 #if PY_VERSION_HEX < 0x030900A5 || (defined(PYPY_VERSION) && PY_VERSION_HEX < 0x030B0000) static inline PyInterpreterState * PyThreadState_GetInterpreter(PyThreadState *tstate) { assert(tstate != _Py_NULL); return tstate->interp; } #endif // bpo-40429 added PyThreadState_GetFrame() to Python 3.9.0b1 #if PY_VERSION_HEX < 0x030900B1 && !defined(PYPY_VERSION) static inline PyFrameObject* PyThreadState_GetFrame(PyThreadState *tstate) { assert(tstate != _Py_NULL); return _Py_CAST(PyFrameObject *, Py_XNewRef(tstate->frame)); } #endif #if !defined(PYPY_VERSION) static inline PyFrameObject* _PyThreadState_GetFrameBorrow(PyThreadState *tstate) { PyFrameObject *frame = PyThreadState_GetFrame(tstate); Py_XDECREF(frame); return frame; } #endif // bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a5 #if PY_VERSION_HEX < 0x030900A5 || defined(PYPY_VERSION) static inline PyInterpreterState* PyInterpreterState_Get(void) { PyThreadState *tstate; PyInterpreterState *interp; tstate = PyThreadState_GET(); if (tstate == _Py_NULL) { Py_FatalError("GIL released (tstate is NULL)"); } interp = tstate->interp; if (interp == _Py_NULL) { Py_FatalError("no current interpreter"); } return interp; } #endif // bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a6 #if 0x030700A1 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x030900A6 && !defined(PYPY_VERSION) static inline uint64_t PyThreadState_GetID(PyThreadState *tstate) { assert(tstate != _Py_NULL); return tstate->id; } #endif // bpo-43760 added PyThreadState_EnterTracing() to Python 3.11.0a2 #if PY_VERSION_HEX < 0x030B00A2 && !defined(PYPY_VERSION) static inline void PyThreadState_EnterTracing(PyThreadState *tstate) { tstate->tracing++; #if PY_VERSION_HEX >= 0x030A00A1 tstate->cframe->use_tracing = 0; #else tstate->use_tracing = 0; #endif } #endif // bpo-43760 added PyThreadState_LeaveTracing() to Python 3.11.0a2 #if PY_VERSION_HEX < 0x030B00A2 && !defined(PYPY_VERSION) static inline void PyThreadState_LeaveTracing(PyThreadState *tstate) { int use_tracing = (tstate->c_tracefunc != _Py_NULL || tstate->c_profilefunc != _Py_NULL); tstate->tracing--; #if PY_VERSION_HEX >= 0x030A00A1 tstate->cframe->use_tracing = use_tracing; #else tstate->use_tracing = use_tracing; #endif } #endif // bpo-37194 added PyObject_CallNoArgs() to Python 3.9.0a1 // PyObject_CallNoArgs() added to PyPy 3.9.16-v7.3.11 #if !defined(PyObject_CallNoArgs) && PY_VERSION_HEX < 0x030900A1 static inline PyObject* PyObject_CallNoArgs(PyObject *func) { return PyObject_CallFunctionObjArgs(func, NULL); } #endif // bpo-39245 made PyObject_CallOneArg() public (previously called // _PyObject_CallOneArg) in Python 3.9.0a4 // PyObject_CallOneArg() added to PyPy 3.9.16-v7.3.11 #if !defined(PyObject_CallOneArg) && PY_VERSION_HEX < 0x030900A4 static inline PyObject* PyObject_CallOneArg(PyObject *func, PyObject *arg) { return PyObject_CallFunctionObjArgs(func, arg, NULL); } #endif // bpo-1635741 added PyModule_AddObjectRef() to Python 3.10.0a3 #if PY_VERSION_HEX < 0x030A00A3 static inline int PyModule_AddObjectRef(PyObject *module, const char *name, PyObject *value) { int res; if (!value && !PyErr_Occurred()) { // PyModule_AddObject() raises TypeError in this case PyErr_SetString(PyExc_SystemError, "PyModule_AddObjectRef() must be called " "with an exception raised if value is NULL"); return -1; } Py_XINCREF(value); res = PyModule_AddObject(module, name, value); if (res < 0) { Py_XDECREF(value); } return res; } #endif // bpo-40024 added PyModule_AddType() to Python 3.9.0a5 #if PY_VERSION_HEX < 0x030900A5 static inline int PyModule_AddType(PyObject *module, PyTypeObject *type) { const char *name, *dot; if (PyType_Ready(type) < 0) { return -1; } // inline _PyType_Name() name = type->tp_name; assert(name != _Py_NULL); dot = strrchr(name, '.'); if (dot != _Py_NULL) { name = dot + 1; } return PyModule_AddObjectRef(module, name, _PyObject_CAST(type)); } #endif // bpo-40241 added PyObject_GC_IsTracked() to Python 3.9.0a6. // bpo-4688 added _PyObject_GC_IS_TRACKED() to Python 2.7.0a2. #if PY_VERSION_HEX < 0x030900A6 && !defined(PYPY_VERSION) static inline int PyObject_GC_IsTracked(PyObject* obj) { return (PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)); } #endif // bpo-40241 added PyObject_GC_IsFinalized() to Python 3.9.0a6. // bpo-18112 added _PyGCHead_FINALIZED() to Python 3.4.0 final. #if PY_VERSION_HEX < 0x030900A6 && PY_VERSION_HEX >= 0x030400F0 && !defined(PYPY_VERSION) static inline int PyObject_GC_IsFinalized(PyObject *obj) { PyGC_Head *gc = _Py_CAST(PyGC_Head*, obj) - 1; return (PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(gc)); } #endif // bpo-39573 added Py_IS_TYPE() to Python 3.9.0a4 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_IS_TYPE) static inline int _Py_IS_TYPE(PyObject *ob, PyTypeObject *type) { return Py_TYPE(ob) == type; } #define Py_IS_TYPE(ob, type) _Py_IS_TYPE(_PyObject_CAST(ob), type) #endif // bpo-46906 added PyFloat_Pack2() and PyFloat_Unpack2() to Python 3.11a7. // bpo-11734 added _PyFloat_Pack2() and _PyFloat_Unpack2() to Python 3.6.0b1. // Python 3.11a2 moved _PyFloat_Pack2() and _PyFloat_Unpack2() to the internal // C API: Python 3.11a2-3.11a6 versions are not supported. #if 0x030600B1 <= PY_VERSION_HEX && PY_VERSION_HEX <= 0x030B00A1 && !defined(PYPY_VERSION) static inline int PyFloat_Pack2(double x, char *p, int le) { return _PyFloat_Pack2(x, (unsigned char*)p, le); } static inline double PyFloat_Unpack2(const char *p, int le) { return _PyFloat_Unpack2((const unsigned char *)p, le); } #endif // bpo-46906 added PyFloat_Pack4(), PyFloat_Pack8(), PyFloat_Unpack4() and // PyFloat_Unpack8() to Python 3.11a7. // Python 3.11a2 moved _PyFloat_Pack4(), _PyFloat_Pack8(), _PyFloat_Unpack4() // and _PyFloat_Unpack8() to the internal C API: Python 3.11a2-3.11a6 versions // are not supported. #if PY_VERSION_HEX <= 0x030B00A1 && !defined(PYPY_VERSION) static inline int PyFloat_Pack4(double x, char *p, int le) { return _PyFloat_Pack4(x, (unsigned char*)p, le); } static inline int PyFloat_Pack8(double x, char *p, int le) { return _PyFloat_Pack8(x, (unsigned char*)p, le); } static inline double PyFloat_Unpack4(const char *p, int le) { return _PyFloat_Unpack4((const unsigned char *)p, le); } static inline double PyFloat_Unpack8(const char *p, int le) { return _PyFloat_Unpack8((const unsigned char *)p, le); } #endif // gh-92154 added PyCode_GetCode() to Python 3.11.0b1 #if PY_VERSION_HEX < 0x030B00B1 && !defined(PYPY_VERSION) static inline PyObject* PyCode_GetCode(PyCodeObject *code) { return Py_NewRef(code->co_code); } #endif // gh-95008 added PyCode_GetVarnames() to Python 3.11.0rc1 #if PY_VERSION_HEX < 0x030B00C1 && !defined(PYPY_VERSION) static inline PyObject* PyCode_GetVarnames(PyCodeObject *code) { return Py_NewRef(code->co_varnames); } #endif // gh-95008 added PyCode_GetFreevars() to Python 3.11.0rc1 #if PY_VERSION_HEX < 0x030B00C1 && !defined(PYPY_VERSION) static inline PyObject* PyCode_GetFreevars(PyCodeObject *code) { return Py_NewRef(code->co_freevars); } #endif // gh-95008 added PyCode_GetCellvars() to Python 3.11.0rc1 #if PY_VERSION_HEX < 0x030B00C1 && !defined(PYPY_VERSION) static inline PyObject* PyCode_GetCellvars(PyCodeObject *code) { return Py_NewRef(code->co_cellvars); } #endif // Py_UNUSED() was added to Python 3.4.0b2. #if PY_VERSION_HEX < 0x030400B2 && !defined(Py_UNUSED) # if defined(__GNUC__) || defined(__clang__) # define Py_UNUSED(name) _unused_ ## name __attribute__((unused)) # else # define Py_UNUSED(name) _unused_ ## name # endif #endif // gh-105922 added PyImport_AddModuleRef() to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D00A0 static inline PyObject* PyImport_AddModuleRef(const char *name) { return Py_XNewRef(PyImport_AddModule(name)); } #endif // gh-105927 added PyWeakref_GetRef() to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D0000 static inline int PyWeakref_GetRef(PyObject *ref, PyObject **pobj) { PyObject *obj; if (ref != NULL && !PyWeakref_Check(ref)) { *pobj = NULL; PyErr_SetString(PyExc_TypeError, "expected a weakref"); return -1; } obj = PyWeakref_GetObject(ref); if (obj == NULL) { // SystemError if ref is NULL *pobj = NULL; return -1; } if (obj == Py_None) { *pobj = NULL; return 0; } *pobj = Py_NewRef(obj); return 1; } #endif // bpo-36974 added PY_VECTORCALL_ARGUMENTS_OFFSET to Python 3.8b1 #ifndef PY_VECTORCALL_ARGUMENTS_OFFSET # define PY_VECTORCALL_ARGUMENTS_OFFSET (_Py_CAST(size_t, 1) << (8 * sizeof(size_t) - 1)) #endif // bpo-36974 added PyVectorcall_NARGS() to Python 3.8b1 #if PY_VERSION_HEX < 0x030800B1 static inline Py_ssize_t PyVectorcall_NARGS(size_t n) { return n & ~PY_VECTORCALL_ARGUMENTS_OFFSET; } #endif // gh-105922 added PyObject_Vectorcall() to Python 3.9.0a4 #if PY_VERSION_HEX < 0x030900A4 static inline PyObject* PyObject_Vectorcall(PyObject *callable, PyObject *const *args, size_t nargsf, PyObject *kwnames) { #if PY_VERSION_HEX >= 0x030800B1 && !defined(PYPY_VERSION) // bpo-36974 added _PyObject_Vectorcall() to Python 3.8.0b1 return _PyObject_Vectorcall(callable, args, nargsf, kwnames); #else PyObject *posargs = NULL, *kwargs = NULL; PyObject *res; Py_ssize_t nposargs, nkwargs, i; if (nargsf != 0 && args == NULL) { PyErr_BadInternalCall(); goto error; } if (kwnames != NULL && !PyTuple_Check(kwnames)) { PyErr_BadInternalCall(); goto error; } nposargs = (Py_ssize_t)PyVectorcall_NARGS(nargsf); if (kwnames) { nkwargs = PyTuple_GET_SIZE(kwnames); } else { nkwargs = 0; } posargs = PyTuple_New(nposargs); if (posargs == NULL) { goto error; } if (nposargs) { for (i=0; i < nposargs; i++) { PyTuple_SET_ITEM(posargs, i, Py_NewRef(*args)); args++; } } if (nkwargs) { kwargs = PyDict_New(); if (kwargs == NULL) { goto error; } for (i = 0; i < nkwargs; i++) { PyObject *key = PyTuple_GET_ITEM(kwnames, i); PyObject *value = *args; args++; if (PyDict_SetItem(kwargs, key, value) < 0) { goto error; } } } else { kwargs = NULL; } res = PyObject_Call(callable, posargs, kwargs); Py_DECREF(posargs); Py_XDECREF(kwargs); return res; error: Py_DECREF(posargs); Py_XDECREF(kwargs); return NULL; #endif } #endif // gh-106521 added PyObject_GetOptionalAttr() and // PyObject_GetOptionalAttrString() to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D00A1 static inline int PyObject_GetOptionalAttr(PyObject *obj, PyObject *attr_name, PyObject **result) { // bpo-32571 added _PyObject_LookupAttr() to Python 3.7.0b1 #if PY_VERSION_HEX >= 0x030700B1 && !defined(PYPY_VERSION) return _PyObject_LookupAttr(obj, attr_name, result); #else *result = PyObject_GetAttr(obj, attr_name); if (*result != NULL) { return 1; } if (!PyErr_Occurred()) { return 0; } if (PyErr_ExceptionMatches(PyExc_AttributeError)) { PyErr_Clear(); return 0; } return -1; #endif } static inline int PyObject_GetOptionalAttrString(PyObject *obj, const char *attr_name, PyObject **result) { PyObject *name_obj; int rc; #if PY_VERSION_HEX >= 0x03000000 name_obj = PyUnicode_FromString(attr_name); #else name_obj = PyString_FromString(attr_name); #endif if (name_obj == NULL) { *result = NULL; return -1; } rc = PyObject_GetOptionalAttr(obj, name_obj, result); Py_DECREF(name_obj); return rc; } #endif // gh-106307 added PyObject_GetOptionalAttr() and // PyMapping_GetOptionalItemString() to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D00A1 static inline int PyMapping_GetOptionalItem(PyObject *obj, PyObject *key, PyObject **result) { *result = PyObject_GetItem(obj, key); if (*result) { return 1; } if (!PyErr_ExceptionMatches(PyExc_KeyError)) { return -1; } PyErr_Clear(); return 0; } static inline int PyMapping_GetOptionalItemString(PyObject *obj, const char *key, PyObject **result) { PyObject *key_obj; int rc; #if PY_VERSION_HEX >= 0x03000000 key_obj = PyUnicode_FromString(key); #else key_obj = PyString_FromString(key); #endif if (key_obj == NULL) { *result = NULL; return -1; } rc = PyMapping_GetOptionalItem(obj, key_obj, result); Py_DECREF(key_obj); return rc; } #endif // gh-108511 added PyMapping_HasKeyWithError() and // PyMapping_HasKeyStringWithError() to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D00A1 static inline int PyMapping_HasKeyWithError(PyObject *obj, PyObject *key) { PyObject *res; int rc = PyMapping_GetOptionalItem(obj, key, &res); Py_XDECREF(res); return rc; } static inline int PyMapping_HasKeyStringWithError(PyObject *obj, const char *key) { PyObject *res; int rc = PyMapping_GetOptionalItemString(obj, key, &res); Py_XDECREF(res); return rc; } #endif // gh-108511 added PyObject_HasAttrWithError() and // PyObject_HasAttrStringWithError() to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D00A1 static inline int PyObject_HasAttrWithError(PyObject *obj, PyObject *attr) { PyObject *res; int rc = PyObject_GetOptionalAttr(obj, attr, &res); Py_XDECREF(res); return rc; } static inline int PyObject_HasAttrStringWithError(PyObject *obj, const char *attr) { PyObject *res; int rc = PyObject_GetOptionalAttrString(obj, attr, &res); Py_XDECREF(res); return rc; } #endif // gh-106004 added PyDict_GetItemRef() and PyDict_GetItemStringRef() // to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D00A1 static inline int PyDict_GetItemRef(PyObject *mp, PyObject *key, PyObject **result) { #if PY_VERSION_HEX >= 0x03000000 PyObject *item = PyDict_GetItemWithError(mp, key); #else PyObject *item = _PyDict_GetItemWithError(mp, key); #endif if (item != NULL) { *result = Py_NewRef(item); return 1; // found } if (!PyErr_Occurred()) { *result = NULL; return 0; // not found } *result = NULL; return -1; } static inline int PyDict_GetItemStringRef(PyObject *mp, const char *key, PyObject **result) { int res; #if PY_VERSION_HEX >= 0x03000000 PyObject *key_obj = PyUnicode_FromString(key); #else PyObject *key_obj = PyString_FromString(key); #endif if (key_obj == NULL) { *result = NULL; return -1; } res = PyDict_GetItemRef(mp, key_obj, result); Py_DECREF(key_obj); return res; } #endif // gh-106307 added PyModule_Add() to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D00A1 static inline int PyModule_Add(PyObject *mod, const char *name, PyObject *value) { int res = PyModule_AddObjectRef(mod, name, value); Py_XDECREF(value); return res; } #endif // gh-108014 added Py_IsFinalizing() to Python 3.13.0a1 // bpo-1856 added _Py_Finalizing to Python 3.2.1b1. // _Py_IsFinalizing() was added to PyPy 7.3.0. #if (0x030201B1 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x030D00A1) \ && (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x7030000) static inline int Py_IsFinalizing(void) { #if PY_VERSION_HEX >= 0x030700A1 // _Py_IsFinalizing() was added to Python 3.7.0a1. return _Py_IsFinalizing(); #else return (_Py_Finalizing != NULL); #endif } #endif // gh-108323 added PyDict_ContainsString() to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D00A1 static inline int PyDict_ContainsString(PyObject *op, const char *key) { PyObject *key_obj = PyUnicode_FromString(key); if (key_obj == NULL) { return -1; } int res = PyDict_Contains(op, key_obj); Py_DECREF(key_obj); return res; } #endif // gh-108445 added PyLong_AsInt() to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D00A1 static inline int PyLong_AsInt(PyObject *obj) { #ifdef PYPY_VERSION long value = PyLong_AsLong(obj); if (value == -1 && PyErr_Occurred()) { return -1; } if (value < (long)INT_MIN || (long)INT_MAX < value) { PyErr_SetString(PyExc_OverflowError, "Python int too large to convert to C int"); return -1; } return (int)value; #else return _PyLong_AsInt(obj); #endif } #endif // gh-107073 added PyObject_VisitManagedDict() to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D00A1 static inline int PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) { PyObject **dict = _PyObject_GetDictPtr(obj); if (dict == NULL || *dict == NULL) { return -1; } Py_VISIT(*dict); return 0; } static inline void PyObject_ClearManagedDict(PyObject *obj) { PyObject **dict = _PyObject_GetDictPtr(obj); if (dict == NULL || *dict == NULL) { return; } Py_CLEAR(*dict); } #endif // gh-108867 added PyThreadState_GetUnchecked() to Python 3.13.0a1 // Python 3.5.2 added _PyThreadState_UncheckedGet(). #if PY_VERSION_HEX >= 0x03050200 && PY_VERSION_HEX < 0x030D00A1 static inline PyThreadState* PyThreadState_GetUnchecked(void) { return _PyThreadState_UncheckedGet(); } #endif // gh-110289 added PyUnicode_EqualToUTF8() and PyUnicode_EqualToUTF8AndSize() // to Python 3.13.0a1 #if PY_VERSION_HEX < 0x030D00A1 static inline int PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t str_len) { Py_ssize_t len; const void *utf8; PyObject *exc_type, *exc_value, *exc_tb; int res; // API cannot report errors so save/restore the exception PyErr_Fetch(&exc_type, &exc_value, &exc_tb); // Python 3.3.0a1 added PyUnicode_AsUTF8AndSize() #if PY_VERSION_HEX >= 0x030300A1 if (PyUnicode_IS_ASCII(unicode)) { utf8 = PyUnicode_DATA(unicode); len = PyUnicode_GET_LENGTH(unicode); } else { utf8 = PyUnicode_AsUTF8AndSize(unicode, &len); if (utf8 == NULL) { // Memory allocation failure. The API cannot report error, // so ignore the exception and return 0. res = 0; goto done; } } if (len != str_len) { res = 0; goto done; } res = (memcmp(utf8, str, (size_t)len) == 0); #else PyObject *bytes = PyUnicode_AsUTF8String(unicode); if (bytes == NULL) { // Memory allocation failure. The API cannot report error, // so ignore the exception and return 0. res = 0; goto done; } #if PY_VERSION_HEX >= 0x03000000 len = PyBytes_GET_SIZE(bytes); utf8 = PyBytes_AS_STRING(bytes); #else len = PyString_GET_SIZE(bytes); utf8 = PyString_AS_STRING(bytes); #endif if (len != str_len) { Py_DECREF(bytes); res = 0; goto done; } res = (memcmp(utf8, str, (size_t)len) == 0); Py_DECREF(bytes); #endif done: PyErr_Restore(exc_type, exc_value, exc_tb); return res; } static inline int PyUnicode_EqualToUTF8(PyObject *unicode, const char *str) { return PyUnicode_EqualToUTF8AndSize(unicode, str, (Py_ssize_t)strlen(str)); } #endif // gh-111138 added PyList_Extend() and PyList_Clear() to Python 3.13.0a2 #if PY_VERSION_HEX < 0x030D00A2 static inline int PyList_Extend(PyObject *list, PyObject *iterable) { return PyList_SetSlice(list, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, iterable); } static inline int PyList_Clear(PyObject *list) { return PyList_SetSlice(list, 0, PY_SSIZE_T_MAX, NULL); } #endif // gh-111262 added PyDict_Pop() and PyDict_PopString() to Python 3.13.0a2 #if PY_VERSION_HEX < 0x030D00A2 static inline int PyDict_Pop(PyObject *dict, PyObject *key, PyObject **result) { PyObject *value; if (!PyDict_Check(dict)) { PyErr_BadInternalCall(); if (result) { *result = NULL; } return -1; } // bpo-16991 added _PyDict_Pop() to Python 3.5.0b2. // Python 3.6.0b3 changed _PyDict_Pop() first argument type to PyObject*. // Python 3.13.0a1 removed _PyDict_Pop(). #if defined(PYPY_VERSION) || PY_VERSION_HEX < 0x030500b2 || PY_VERSION_HEX >= 0x030D0000 value = PyObject_CallMethod(dict, "pop", "O", key); #elif PY_VERSION_HEX < 0x030600b3 value = _PyDict_Pop(_Py_CAST(PyDictObject*, dict), key, NULL); #else value = _PyDict_Pop(dict, key, NULL); #endif if (value == NULL) { if (result) { *result = NULL; } if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_KeyError)) { return -1; } PyErr_Clear(); return 0; } if (result) { *result = value; } else { Py_DECREF(value); } return 1; } static inline int PyDict_PopString(PyObject *dict, const char *key, PyObject **result) { PyObject *key_obj = PyUnicode_FromString(key); if (key_obj == NULL) { if (result != NULL) { *result = NULL; } return -1; } int res = PyDict_Pop(dict, key_obj, result); Py_DECREF(key_obj); return res; } #endif #if PY_VERSION_HEX < 0x030200A4 // Python 3.2.0a4 added Py_hash_t type typedef Py_ssize_t Py_hash_t; #endif // gh-111545 added Py_HashPointer() to Python 3.13.0a3 #if PY_VERSION_HEX < 0x030D00A3 static inline Py_hash_t Py_HashPointer(const void *ptr) { #if PY_VERSION_HEX >= 0x030900A4 && !defined(PYPY_VERSION) return _Py_HashPointer(ptr); #else return _Py_HashPointer(_Py_CAST(void*, ptr)); #endif } #endif // Python 3.13a4 added a PyTime API. // Use the private API added to Python 3.5. #if PY_VERSION_HEX < 0x030D00A4 && PY_VERSION_HEX >= 0x03050000 typedef _PyTime_t PyTime_t; #define PyTime_MIN _PyTime_MIN #define PyTime_MAX _PyTime_MAX static inline double PyTime_AsSecondsDouble(PyTime_t t) { return _PyTime_AsSecondsDouble(t); } static inline int PyTime_Monotonic(PyTime_t *result) { return _PyTime_GetMonotonicClockWithInfo(result, NULL); } static inline int PyTime_Time(PyTime_t *result) { return _PyTime_GetSystemClockWithInfo(result, NULL); } static inline int PyTime_PerfCounter(PyTime_t *result) { #if PY_VERSION_HEX >= 0x03070000 && !defined(PYPY_VERSION) return _PyTime_GetPerfCounterWithInfo(result, NULL); #elif PY_VERSION_HEX >= 0x03070000 // Call time.perf_counter_ns() and convert Python int object to PyTime_t. // Cache time.perf_counter_ns() function for best performance. static PyObject *func = NULL; if (func == NULL) { PyObject *mod = PyImport_ImportModule("time"); if (mod == NULL) { return -1; } func = PyObject_GetAttrString(mod, "perf_counter_ns"); Py_DECREF(mod); if (func == NULL) { return -1; } } PyObject *res = PyObject_CallNoArgs(func); if (res == NULL) { return -1; } long long value = PyLong_AsLongLong(res); Py_DECREF(res); if (value == -1 && PyErr_Occurred()) { return -1; } Py_BUILD_ASSERT(sizeof(value) >= sizeof(PyTime_t)); *result = (PyTime_t)value; return 0; #else // Call time.perf_counter() and convert C double to PyTime_t. // Cache time.perf_counter() function for best performance. static PyObject *func = NULL; if (func == NULL) { PyObject *mod = PyImport_ImportModule("time"); if (mod == NULL) { return -1; } func = PyObject_GetAttrString(mod, "perf_counter"); Py_DECREF(mod); if (func == NULL) { return -1; } } PyObject *res = PyObject_CallNoArgs(func); if (res == NULL) { return -1; } double d = PyFloat_AsDouble(res); Py_DECREF(res); if (d == -1.0 && PyErr_Occurred()) { return -1; } // Avoid floor() to avoid having to link to libm *result = (PyTime_t)(d * 1e9); return 0; #endif } #endif // gh-111389 added hash constants to Python 3.13.0a5. These constants were // added first as private macros to Python 3.4.0b1 and PyPy 7.3.8. #if (!defined(PyHASH_BITS) \ && ((!defined(PYPY_VERSION) && PY_VERSION_HEX >= 0x030400B1) \ || (defined(PYPY_VERSION) && PY_VERSION_HEX >= 0x03070000 \ && PYPY_VERSION_NUM >= 0x07030800))) # define PyHASH_BITS _PyHASH_BITS # define PyHASH_MODULUS _PyHASH_MODULUS # define PyHASH_INF _PyHASH_INF # define PyHASH_IMAG _PyHASH_IMAG #endif // gh-111545 added Py_GetConstant() and Py_GetConstantBorrowed() // to Python 3.13.0a6 #if PY_VERSION_HEX < 0x030D00A6 && !defined(Py_CONSTANT_NONE) #define Py_CONSTANT_NONE 0 #define Py_CONSTANT_FALSE 1 #define Py_CONSTANT_TRUE 2 #define Py_CONSTANT_ELLIPSIS 3 #define Py_CONSTANT_NOT_IMPLEMENTED 4 #define Py_CONSTANT_ZERO 5 #define Py_CONSTANT_ONE 6 #define Py_CONSTANT_EMPTY_STR 7 #define Py_CONSTANT_EMPTY_BYTES 8 #define Py_CONSTANT_EMPTY_TUPLE 9 static inline PyObject* Py_GetConstant(unsigned int constant_id) { static PyObject* constants[Py_CONSTANT_EMPTY_TUPLE + 1] = {NULL}; if (constants[Py_CONSTANT_NONE] == NULL) { constants[Py_CONSTANT_NONE] = Py_None; constants[Py_CONSTANT_FALSE] = Py_False; constants[Py_CONSTANT_TRUE] = Py_True; constants[Py_CONSTANT_ELLIPSIS] = Py_Ellipsis; constants[Py_CONSTANT_NOT_IMPLEMENTED] = Py_NotImplemented; constants[Py_CONSTANT_ZERO] = PyLong_FromLong(0); if (constants[Py_CONSTANT_ZERO] == NULL) { goto fatal_error; } constants[Py_CONSTANT_ONE] = PyLong_FromLong(1); if (constants[Py_CONSTANT_ONE] == NULL) { goto fatal_error; } constants[Py_CONSTANT_EMPTY_STR] = PyUnicode_FromStringAndSize("", 0); if (constants[Py_CONSTANT_EMPTY_STR] == NULL) { goto fatal_error; } constants[Py_CONSTANT_EMPTY_BYTES] = PyBytes_FromStringAndSize("", 0); if (constants[Py_CONSTANT_EMPTY_BYTES] == NULL) { goto fatal_error; } constants[Py_CONSTANT_EMPTY_TUPLE] = PyTuple_New(0); if (constants[Py_CONSTANT_EMPTY_TUPLE] == NULL) { goto fatal_error; } // goto dance to avoid compiler warnings about Py_FatalError() goto init_done; fatal_error: // This case should never happen Py_FatalError("Py_GetConstant() failed to get constants"); } init_done: if (constant_id <= Py_CONSTANT_EMPTY_TUPLE) { return Py_NewRef(constants[constant_id]); } else { PyErr_BadInternalCall(); return NULL; } } static inline PyObject* Py_GetConstantBorrowed(unsigned int constant_id) { PyObject *obj = Py_GetConstant(constant_id); Py_XDECREF(obj); return obj; } #endif // gh-114329 added PyList_GetItemRef() to Python 3.13.0a4 #if PY_VERSION_HEX < 0x030D00A4 static inline PyObject * PyList_GetItemRef(PyObject *op, Py_ssize_t index) { PyObject *item = PyList_GetItem(op, index); Py_XINCREF(item); return item; } #endif // gh-114329 added PyList_GetItemRef() to Python 3.13.0a4 #if PY_VERSION_HEX < 0x030D00A4 static inline int PyDict_SetDefaultRef(PyObject *d, PyObject *key, PyObject *default_value, PyObject **result) { PyObject *value; if (PyDict_GetItemRef(d, key, &value) < 0) { // get error if (result) { *result = NULL; } return -1; } if (value != NULL) { // present if (result) { *result = value; } else { Py_DECREF(value); } return 1; } // missing: set the item if (PyDict_SetItem(d, key, default_value) < 0) { // set error if (result) { *result = NULL; } return -1; } if (result) { *result = Py_NewRef(default_value); } return 0; } #endif #if PY_VERSION_HEX < 0x030D00B3 # define Py_BEGIN_CRITICAL_SECTION(op) { # define Py_END_CRITICAL_SECTION() } # define Py_BEGIN_CRITICAL_SECTION2(a, b) { # define Py_END_CRITICAL_SECTION2() } #endif #if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION) typedef struct PyUnicodeWriter PyUnicodeWriter; static inline void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) { _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer); PyMem_Free(writer); } static inline PyUnicodeWriter* PyUnicodeWriter_Create(Py_ssize_t length) { if (length < 0) { PyErr_SetString(PyExc_ValueError, "length must be positive"); return NULL; } const size_t size = sizeof(_PyUnicodeWriter); PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size); if (pub_writer == _Py_NULL) { PyErr_NoMemory(); return _Py_NULL; } _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer; _PyUnicodeWriter_Init(writer); if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) { PyUnicodeWriter_Discard(pub_writer); return NULL; } writer->overallocate = 1; return pub_writer; } static inline PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer) { PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer); assert(((_PyUnicodeWriter*)writer)->buffer == NULL); PyMem_Free(writer); return str; } static inline int PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch) { if (ch > 0x10ffff) { PyErr_SetString(PyExc_ValueError, "character must be in range(0x110000)"); return -1; } return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch); } static inline int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) { PyObject *str = PyObject_Str(obj); if (str == NULL) { return -1; } int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str); Py_DECREF(str); return res; } static inline int PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj) { PyObject *str = PyObject_Repr(obj); if (str == NULL) { return -1; } int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str); Py_DECREF(str); return res; } static inline int PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, const char *str, Py_ssize_t size) { if (size < 0) { size = (Py_ssize_t)strlen(str); } PyObject *str_obj = PyUnicode_FromStringAndSize(str, size); if (str_obj == _Py_NULL) { return -1; } int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj); Py_DECREF(str_obj); return res; } static inline int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, const wchar_t *str, Py_ssize_t size) { if (size < 0) { size = (Py_ssize_t)wcslen(str); } PyObject *str_obj = PyUnicode_FromWideChar(str, size); if (str_obj == _Py_NULL) { return -1; } int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj); Py_DECREF(str_obj); return res; } static inline int PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str, Py_ssize_t start, Py_ssize_t end) { if (!PyUnicode_Check(str)) { PyErr_Format(PyExc_TypeError, "expect str, not %T", str); return -1; } if (start < 0 || start > end) { PyErr_Format(PyExc_ValueError, "invalid start argument"); return -1; } if (end > PyUnicode_GET_LENGTH(str)) { PyErr_Format(PyExc_ValueError, "invalid end argument"); return -1; } return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str, start, end); } static inline int PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...) { va_list vargs; va_start(vargs, format); PyObject *str = PyUnicode_FromFormatV(format, vargs); va_end(vargs); if (str == _Py_NULL) { return -1; } int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str); Py_DECREF(str); return res; } #endif // PY_VERSION_HEX < 0x030E0000 // gh-116560 added PyLong_GetSign() to Python 3.14.0a0 #if PY_VERSION_HEX < 0x030E00A0 static inline int PyLong_GetSign(PyObject *obj, int *sign) { if (!PyLong_Check(obj)) { PyErr_Format(PyExc_TypeError, "expect int, got %s", Py_TYPE(obj)->tp_name); return -1; } *sign = _PyLong_Sign(obj); return 0; } #endif // gh-126061 added PyLong_IsPositive/Negative/Zero() to Python in 3.14.0a2 #if PY_VERSION_HEX < 0x030E00A2 static inline int PyLong_IsPositive(PyObject *obj) { if (!PyLong_Check(obj)) { PyErr_Format(PyExc_TypeError, "expected int, got %s", Py_TYPE(obj)->tp_name); return -1; } return _PyLong_Sign(obj) == 1; } static inline int PyLong_IsNegative(PyObject *obj) { if (!PyLong_Check(obj)) { PyErr_Format(PyExc_TypeError, "expected int, got %s", Py_TYPE(obj)->tp_name); return -1; } return _PyLong_Sign(obj) == -1; } static inline int PyLong_IsZero(PyObject *obj) { if (!PyLong_Check(obj)) { PyErr_Format(PyExc_TypeError, "expected int, got %s", Py_TYPE(obj)->tp_name); return -1; } return _PyLong_Sign(obj) == 0; } #endif // gh-124502 added PyUnicode_Equal() to Python 3.14.0a0 #if PY_VERSION_HEX < 0x030E00A0 static inline int PyUnicode_Equal(PyObject *str1, PyObject *str2) { if (!PyUnicode_Check(str1)) { PyErr_Format(PyExc_TypeError, "first argument must be str, not %s", Py_TYPE(str1)->tp_name); return -1; } if (!PyUnicode_Check(str2)) { PyErr_Format(PyExc_TypeError, "second argument must be str, not %s", Py_TYPE(str2)->tp_name); return -1; } #if PY_VERSION_HEX >= 0x030d0000 && !defined(PYPY_VERSION) PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *str1, PyObject *str2); return _PyUnicode_Equal(str1, str2); #elif PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION) return _PyUnicode_EQ(str1, str2); #elif PY_VERSION_HEX >= 0x03090000 && defined(PYPY_VERSION) return _PyUnicode_EQ(str1, str2); #else return (PyUnicode_Compare(str1, str2) == 0); #endif } #endif // gh-121645 added PyBytes_Join() to Python 3.14.0a0 #if PY_VERSION_HEX < 0x030E00A0 static inline PyObject* PyBytes_Join(PyObject *sep, PyObject *iterable) { return _PyBytes_Join(sep, iterable); } #endif #if PY_VERSION_HEX < 0x030E00A0 static inline Py_hash_t Py_HashBuffer(const void *ptr, Py_ssize_t len) { #if PY_VERSION_HEX >= 0x03000000 && !defined(PYPY_VERSION) PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void *src, Py_ssize_t len); return _Py_HashBytes(ptr, len); #else Py_hash_t hash; PyObject *bytes = PyBytes_FromStringAndSize((const char*)ptr, len); if (bytes == NULL) { return -1; } hash = PyObject_Hash(bytes); Py_DECREF(bytes); return hash; #endif } #endif #if PY_VERSION_HEX < 0x030E00A0 static inline int PyIter_NextItem(PyObject *iter, PyObject **item) { iternextfunc tp_iternext; assert(iter != NULL); assert(item != NULL); tp_iternext = Py_TYPE(iter)->tp_iternext; if (tp_iternext == NULL) { *item = NULL; PyErr_Format(PyExc_TypeError, "expected an iterator, got '%s'", Py_TYPE(iter)->tp_name); return -1; } if ((*item = tp_iternext(iter))) { return 1; } if (!PyErr_Occurred()) { return 0; } if (PyErr_ExceptionMatches(PyExc_StopIteration)) { PyErr_Clear(); return 0; } return -1; } #endif #if PY_VERSION_HEX < 0x030E00A0 static inline PyObject* PyLong_FromInt32(int32_t value) { Py_BUILD_ASSERT(sizeof(long) >= 4); return PyLong_FromLong(value); } static inline PyObject* PyLong_FromInt64(int64_t value) { Py_BUILD_ASSERT(sizeof(long long) >= 8); return PyLong_FromLongLong(value); } static inline PyObject* PyLong_FromUInt32(uint32_t value) { Py_BUILD_ASSERT(sizeof(unsigned long) >= 4); return PyLong_FromUnsignedLong(value); } static inline PyObject* PyLong_FromUInt64(uint64_t value) { Py_BUILD_ASSERT(sizeof(unsigned long long) >= 8); return PyLong_FromUnsignedLongLong(value); } static inline int PyLong_AsInt32(PyObject *obj, int32_t *pvalue) { Py_BUILD_ASSERT(sizeof(int) == 4); int value = PyLong_AsInt(obj); if (value == -1 && PyErr_Occurred()) { return -1; } *pvalue = (int32_t)value; return 0; } static inline int PyLong_AsInt64(PyObject *obj, int64_t *pvalue) { Py_BUILD_ASSERT(sizeof(long long) == 8); long long value = PyLong_AsLongLong(obj); if (value == -1 && PyErr_Occurred()) { return -1; } *pvalue = (int64_t)value; return 0; } static inline int PyLong_AsUInt32(PyObject *obj, uint32_t *pvalue) { Py_BUILD_ASSERT(sizeof(long) >= 4); unsigned long value = PyLong_AsUnsignedLong(obj); if (value == (unsigned long)-1 && PyErr_Occurred()) { return -1; } #if SIZEOF_LONG > 4 if ((unsigned long)UINT32_MAX < value) { PyErr_SetString(PyExc_OverflowError, "Python int too large to convert to C uint32_t"); return -1; } #endif *pvalue = (uint32_t)value; return 0; } static inline int PyLong_AsUInt64(PyObject *obj, uint64_t *pvalue) { Py_BUILD_ASSERT(sizeof(long long) == 8); unsigned long long value = PyLong_AsUnsignedLongLong(obj); if (value == (unsigned long long)-1 && PyErr_Occurred()) { return -1; } *pvalue = (uint64_t)value; return 0; } #endif // gh-102471 added import and export API for integers to 3.14.0a2. #if PY_VERSION_HEX < 0x030E00A2 && PY_VERSION_HEX >= 0x03000000 && !defined(PYPY_VERSION) // Helpers to access PyLongObject internals. static inline void _PyLong_SetSignAndDigitCount(PyLongObject *op, int sign, Py_ssize_t size) { #if PY_VERSION_HEX >= 0x030C0000 op->long_value.lv_tag = (uintptr_t)(1 - sign) | ((uintptr_t)(size) << 3); #elif PY_VERSION_HEX >= 0x030900A4 Py_SET_SIZE(op, sign * size); #else Py_SIZE(op) = sign * size; #endif } static inline Py_ssize_t _PyLong_DigitCount(const PyLongObject *op) { #if PY_VERSION_HEX >= 0x030C0000 return (Py_ssize_t)(op->long_value.lv_tag >> 3); #else return _PyLong_Sign((PyObject*)op) < 0 ? -Py_SIZE(op) : Py_SIZE(op); #endif } static inline digit* _PyLong_GetDigits(const PyLongObject *op) { #if PY_VERSION_HEX >= 0x030C0000 return (digit*)(op->long_value.ob_digit); #else return (digit*)(op->ob_digit); #endif } typedef struct PyLongLayout { uint8_t bits_per_digit; uint8_t digit_size; int8_t digits_order; int8_t digit_endianness; } PyLongLayout; typedef struct PyLongExport { int64_t value; uint8_t negative; Py_ssize_t ndigits; const void *digits; Py_uintptr_t _reserved; } PyLongExport; typedef struct PyLongWriter PyLongWriter; static inline const PyLongLayout* PyLong_GetNativeLayout(void) { static const PyLongLayout PyLong_LAYOUT = { PyLong_SHIFT, sizeof(digit), -1, // least significant first PY_LITTLE_ENDIAN ? -1 : 1, }; return &PyLong_LAYOUT; } static inline int PyLong_Export(PyObject *obj, PyLongExport *export_long) { if (!PyLong_Check(obj)) { memset(export_long, 0, sizeof(*export_long)); PyErr_Format(PyExc_TypeError, "expected int, got %s", Py_TYPE(obj)->tp_name); return -1; } // Fast-path: try to convert to a int64_t PyLongObject *self = (PyLongObject*)obj; int overflow; #if SIZEOF_LONG == 8 long value = PyLong_AsLongAndOverflow(obj, &overflow); #else // Windows has 32-bit long, so use 64-bit long long instead long long value = PyLong_AsLongLongAndOverflow(obj, &overflow); #endif Py_BUILD_ASSERT(sizeof(value) == sizeof(int64_t)); // the function cannot fail since obj is a PyLongObject assert(!(value == -1 && PyErr_Occurred())); if (!overflow) { export_long->value = value; export_long->negative = 0; export_long->ndigits = 0; export_long->digits = 0; export_long->_reserved = 0; } else { export_long->value = 0; export_long->negative = _PyLong_Sign(obj) < 0; export_long->ndigits = _PyLong_DigitCount(self); if (export_long->ndigits == 0) { export_long->ndigits = 1; } export_long->digits = _PyLong_GetDigits(self); export_long->_reserved = (Py_uintptr_t)Py_NewRef(obj); } return 0; } static inline void PyLong_FreeExport(PyLongExport *export_long) { PyObject *obj = (PyObject*)export_long->_reserved; if (obj) { export_long->_reserved = 0; Py_DECREF(obj); } } static inline PyLongWriter* PyLongWriter_Create(int negative, Py_ssize_t ndigits, void **digits) { if (ndigits <= 0) { PyErr_SetString(PyExc_ValueError, "ndigits must be positive"); return NULL; } assert(digits != NULL); PyLongObject *obj = _PyLong_New(ndigits); if (obj == NULL) { return NULL; } _PyLong_SetSignAndDigitCount(obj, negative?-1:1, ndigits); *digits = _PyLong_GetDigits(obj); return (PyLongWriter*)obj; } static inline void PyLongWriter_Discard(PyLongWriter *writer) { PyLongObject *obj = (PyLongObject *)writer; assert(Py_REFCNT(obj) == 1); Py_DECREF(obj); } static inline PyObject* PyLongWriter_Finish(PyLongWriter *writer) { PyObject *obj = (PyObject *)writer; PyLongObject *self = (PyLongObject*)obj; Py_ssize_t j = _PyLong_DigitCount(self); Py_ssize_t i = j; int sign = _PyLong_Sign(obj); assert(Py_REFCNT(obj) == 1); // Normalize and get singleton if possible while (i > 0 && _PyLong_GetDigits(self)[i-1] == 0) { --i; } if (i != j) { if (i == 0) { sign = 0; } _PyLong_SetSignAndDigitCount(self, sign, i); } if (i <= 1) { long val = sign * (long)(_PyLong_GetDigits(self)[0]); Py_DECREF(obj); return PyLong_FromLong(val); } return obj; } #endif #if PY_VERSION_HEX < 0x030C00A3 # define Py_T_SHORT T_SHORT # define Py_T_INT T_INT # define Py_T_LONG T_LONG # define Py_T_FLOAT T_FLOAT # define Py_T_DOUBLE T_DOUBLE # define Py_T_STRING T_STRING # define _Py_T_OBJECT T_OBJECT # define Py_T_CHAR T_CHAR # define Py_T_BYTE T_BYTE # define Py_T_UBYTE T_UBYTE # define Py_T_USHORT T_USHORT # define Py_T_UINT T_UINT # define Py_T_ULONG T_ULONG # define Py_T_STRING_INPLACE T_STRING_INPLACE # define Py_T_BOOL T_BOOL # define Py_T_OBJECT_EX T_OBJECT_EX # define Py_T_LONGLONG T_LONGLONG # define Py_T_ULONGLONG T_ULONGLONG # define Py_T_PYSSIZET T_PYSSIZET # if PY_VERSION_HEX >= 0x03000000 && !defined(PYPY_VERSION) # define _Py_T_NONE T_NONE # endif # define Py_READONLY READONLY # define Py_AUDIT_READ READ_RESTRICTED # define _Py_WRITE_RESTRICTED PY_WRITE_RESTRICTED #endif // gh-127350 added Py_fopen() and Py_fclose() to Python 3.14a4 #if PY_VERSION_HEX < 0x030E00A4 static inline FILE* Py_fopen(PyObject *path, const char *mode) { #if 0x030400A2 <= PY_VERSION_HEX && !defined(PYPY_VERSION) PyAPI_FUNC(FILE*) _Py_fopen_obj(PyObject *path, const char *mode); return _Py_fopen_obj(path, mode); #else FILE *f; PyObject *bytes; #if PY_VERSION_HEX >= 0x03000000 if (!PyUnicode_FSConverter(path, &bytes)) { return NULL; } #else if (!PyString_Check(path)) { PyErr_SetString(PyExc_TypeError, "except str"); return NULL; } bytes = Py_NewRef(path); #endif const char *path_bytes = PyBytes_AS_STRING(bytes); f = fopen(path_bytes, mode); Py_DECREF(bytes); if (f == NULL) { PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path); return NULL; } return f; #endif } static inline int Py_fclose(FILE *file) { return fclose(file); } #endif #if 0x03090000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x030E0000 && !defined(PYPY_VERSION) static inline PyObject* PyConfig_Get(const char *name) { typedef enum { _PyConfig_MEMBER_INT, _PyConfig_MEMBER_UINT, _PyConfig_MEMBER_ULONG, _PyConfig_MEMBER_BOOL, _PyConfig_MEMBER_WSTR, _PyConfig_MEMBER_WSTR_OPT, _PyConfig_MEMBER_WSTR_LIST, } PyConfigMemberType; typedef struct { const char *name; size_t offset; PyConfigMemberType type; const char *sys_attr; } PyConfigSpec; #define PYTHONCAPI_COMPAT_SPEC(MEMBER, TYPE, sys_attr) \ {#MEMBER, offsetof(PyConfig, MEMBER), \ _PyConfig_MEMBER_##TYPE, sys_attr} static const PyConfigSpec config_spec[] = { PYTHONCAPI_COMPAT_SPEC(argv, WSTR_LIST, "argv"), PYTHONCAPI_COMPAT_SPEC(base_exec_prefix, WSTR_OPT, "base_exec_prefix"), PYTHONCAPI_COMPAT_SPEC(base_executable, WSTR_OPT, "_base_executable"), PYTHONCAPI_COMPAT_SPEC(base_prefix, WSTR_OPT, "base_prefix"), PYTHONCAPI_COMPAT_SPEC(bytes_warning, UINT, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(exec_prefix, WSTR_OPT, "exec_prefix"), PYTHONCAPI_COMPAT_SPEC(executable, WSTR_OPT, "executable"), PYTHONCAPI_COMPAT_SPEC(inspect, BOOL, _Py_NULL), #if 0x030C0000 <= PY_VERSION_HEX PYTHONCAPI_COMPAT_SPEC(int_max_str_digits, UINT, _Py_NULL), #endif PYTHONCAPI_COMPAT_SPEC(interactive, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(module_search_paths, WSTR_LIST, "path"), PYTHONCAPI_COMPAT_SPEC(optimization_level, UINT, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(parser_debug, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(platlibdir, WSTR, "platlibdir"), PYTHONCAPI_COMPAT_SPEC(prefix, WSTR_OPT, "prefix"), PYTHONCAPI_COMPAT_SPEC(pycache_prefix, WSTR_OPT, "pycache_prefix"), PYTHONCAPI_COMPAT_SPEC(quiet, BOOL, _Py_NULL), #if 0x030B0000 <= PY_VERSION_HEX PYTHONCAPI_COMPAT_SPEC(stdlib_dir, WSTR_OPT, "_stdlib_dir"), #endif PYTHONCAPI_COMPAT_SPEC(use_environment, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(verbose, UINT, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(warnoptions, WSTR_LIST, "warnoptions"), PYTHONCAPI_COMPAT_SPEC(write_bytecode, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(xoptions, WSTR_LIST, "_xoptions"), PYTHONCAPI_COMPAT_SPEC(buffered_stdio, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(check_hash_pycs_mode, WSTR, _Py_NULL), #if 0x030B0000 <= PY_VERSION_HEX PYTHONCAPI_COMPAT_SPEC(code_debug_ranges, BOOL, _Py_NULL), #endif PYTHONCAPI_COMPAT_SPEC(configure_c_stdio, BOOL, _Py_NULL), #if 0x030D0000 <= PY_VERSION_HEX PYTHONCAPI_COMPAT_SPEC(cpu_count, INT, _Py_NULL), #endif PYTHONCAPI_COMPAT_SPEC(dev_mode, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(dump_refs, BOOL, _Py_NULL), #if 0x030B0000 <= PY_VERSION_HEX PYTHONCAPI_COMPAT_SPEC(dump_refs_file, WSTR_OPT, _Py_NULL), #endif #ifdef Py_GIL_DISABLED PYTHONCAPI_COMPAT_SPEC(enable_gil, INT, _Py_NULL), #endif PYTHONCAPI_COMPAT_SPEC(faulthandler, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(filesystem_encoding, WSTR, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(filesystem_errors, WSTR, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(hash_seed, ULONG, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(home, WSTR_OPT, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(import_time, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(install_signal_handlers, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(isolated, BOOL, _Py_NULL), #ifdef MS_WINDOWS PYTHONCAPI_COMPAT_SPEC(legacy_windows_stdio, BOOL, _Py_NULL), #endif PYTHONCAPI_COMPAT_SPEC(malloc_stats, BOOL, _Py_NULL), #if 0x030A0000 <= PY_VERSION_HEX PYTHONCAPI_COMPAT_SPEC(orig_argv, WSTR_LIST, "orig_argv"), #endif PYTHONCAPI_COMPAT_SPEC(parse_argv, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(pathconfig_warnings, BOOL, _Py_NULL), #if 0x030C0000 <= PY_VERSION_HEX PYTHONCAPI_COMPAT_SPEC(perf_profiling, UINT, _Py_NULL), #endif PYTHONCAPI_COMPAT_SPEC(program_name, WSTR, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(run_command, WSTR_OPT, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(run_filename, WSTR_OPT, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(run_module, WSTR_OPT, _Py_NULL), #if 0x030B0000 <= PY_VERSION_HEX PYTHONCAPI_COMPAT_SPEC(safe_path, BOOL, _Py_NULL), #endif PYTHONCAPI_COMPAT_SPEC(show_ref_count, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(site_import, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(skip_source_first_line, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(stdio_encoding, WSTR, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(stdio_errors, WSTR, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(tracemalloc, UINT, _Py_NULL), #if 0x030B0000 <= PY_VERSION_HEX PYTHONCAPI_COMPAT_SPEC(use_frozen_modules, BOOL, _Py_NULL), #endif PYTHONCAPI_COMPAT_SPEC(use_hash_seed, BOOL, _Py_NULL), PYTHONCAPI_COMPAT_SPEC(user_site_directory, BOOL, _Py_NULL), #if 0x030A0000 <= PY_VERSION_HEX PYTHONCAPI_COMPAT_SPEC(warn_default_encoding, BOOL, _Py_NULL), #endif }; #undef PYTHONCAPI_COMPAT_SPEC const PyConfigSpec *spec; int found = 0; for (size_t i=0; i < sizeof(config_spec) / sizeof(config_spec[0]); i++) { spec = &config_spec[i]; if (strcmp(spec->name, name) == 0) { found = 1; break; } } if (found) { if (spec->sys_attr != NULL) { PyObject *value = PySys_GetObject(spec->sys_attr); if (value == NULL) { PyErr_Format(PyExc_RuntimeError, "lost sys.%s", spec->sys_attr); return NULL; } return Py_NewRef(value); } PyAPI_FUNC(const PyConfig*) _Py_GetConfig(void); const PyConfig *config = _Py_GetConfig(); void *member = (char *)config + spec->offset; switch (spec->type) { case _PyConfig_MEMBER_INT: case _PyConfig_MEMBER_UINT: { int value = *(int *)member; return PyLong_FromLong(value); } case _PyConfig_MEMBER_BOOL: { int value = *(int *)member; return PyBool_FromLong(value != 0); } case _PyConfig_MEMBER_ULONG: { unsigned long value = *(unsigned long *)member; return PyLong_FromUnsignedLong(value); } case _PyConfig_MEMBER_WSTR: case _PyConfig_MEMBER_WSTR_OPT: { wchar_t *wstr = *(wchar_t **)member; if (wstr != NULL) { return PyUnicode_FromWideChar(wstr, -1); } else { return Py_NewRef(Py_None); } } case _PyConfig_MEMBER_WSTR_LIST: { const PyWideStringList *list = (const PyWideStringList *)member; PyObject *tuple = PyTuple_New(list->length); if (tuple == NULL) { return NULL; } for (Py_ssize_t i = 0; i < list->length; i++) { PyObject *item = PyUnicode_FromWideChar(list->items[i], -1); if (item == NULL) { Py_DECREF(tuple); return NULL; } PyTuple_SET_ITEM(tuple, i, item); } return tuple; } default: Py_UNREACHABLE(); } } PyErr_Format(PyExc_ValueError, "unknown config option name: %s", name); return NULL; } static inline int PyConfig_GetInt(const char *name, int *value) { PyObject *obj = PyConfig_Get(name); if (obj == NULL) { return -1; } if (!PyLong_Check(obj)) { Py_DECREF(obj); PyErr_Format(PyExc_TypeError, "config option %s is not an int", name); return -1; } int as_int = PyLong_AsInt(obj); Py_DECREF(obj); if (as_int == -1 && PyErr_Occurred()) { PyErr_Format(PyExc_OverflowError, "config option %s value does not fit into a C int", name); return -1; } *value = as_int; return 0; } #endif // PY_VERSION_HEX > 0x03090000 && !defined(PYPY_VERSION) #ifdef __cplusplus } #endif #endif // PYTHONCAPI_COMPAT bitarray-3.7.1/bitarray/test_281.pickle000066400000000000000000000006721505414144000177210ustar00rootroot00000000000000€}q(Xf2qcbitarray._bitarray _bitarray_reconstructor q(cbitarray frozenbitarray qC@qXlittleqKKtqRq}qbXb3q h(cbitarray bitarray q C'€q Xbigq KKtq RqXf0qh(hCqXlittleqKKtqRq}qbXf1qh(hC`qXbigqKKtqRq}qbXf3qh(hC'€qXbigqKKtqRq}q bXb1q!h(h hXbigq"KKtq#Rq$Xb2q%h(h C@q&Xlittleq'KKtq(Rq)Xb0q*h(h hXlittleq+KKtq,Rq-u.bitarray-3.7.1/bitarray/test_bitarray.py000066400000000000000000005435551505414144000204210ustar00rootroot00000000000000# Copyright (c) 2008 - 2025, Ilan Schnell; All Rights Reserved # bitarray is published under the PSF license. # # Author: Ilan Schnell """ Tests for bitarray Author: Ilan Schnell """ from __future__ import absolute_import import re import os import sys import platform import unittest import shutil import tempfile from io import BytesIO, UnsupportedOperation from random import choice, getrandbits, randrange, randint, shuffle from string import whitespace # imports needed inside tests import array import copy import itertools import mmap import pickle import shelve import weakref pyodide = bool(platform.machine() == 'wasm32') is_pypy = bool(platform.python_implementation() == 'PyPy') from bitarray import (bitarray, frozenbitarray, bits2bytes, decodetree, get_default_endian, _set_default_endian, _bitarray_reconstructor, _sysinfo as sysinfo, BufferInfo, __version__) def skipIf(condition): "Skip a test if the condition is true." if condition: return lambda f: None return lambda f: f PTRSIZE = sysinfo("void*") # pointer size in bytes # avoid importing from bitarray.util zeros = bitarray def ones(n, endian=None): a = bitarray(n, endian) a.setall(1) return a def urandom_2(n, endian=""): if endian == "": endian = choice(['little', 'big']) a = bitarray(os.urandom(bits2bytes(n)), endian) del a[n:] return a class Util(object): @staticmethod def random_endian(): return choice(['little', 'big']) @staticmethod def randombitarrays(start=0): for n in range(start, 10): yield urandom_2(n) for _ in range(3): yield urandom_2(randrange(start, 1000)) def randomlists(self): for a in self.randombitarrays(): yield a.tolist() @staticmethod def opposite_endian(endian): t = {'little': 'big', 'big': 'little'} return t[endian] @staticmethod def random_slice(n, step=None): return slice(randint(-n - 2, n + 2), randint(-n - 2, n + 2), step or randint(-5, 5) or 1) def check_obj(self, a): self.assertIsInstance(a, bitarray) self.assertEqual(a.nbytes, bits2bytes(len(a))) self.assertTrue(0 <= a.padbits < 8) self.assertEqual(len(a) + a.padbits, 8 * a.nbytes) info = a.buffer_info() if info.imported: # imported buffer implies that no extra memory is allocated self.assertEqual(info.alloc, 0) # an imported buffer will always have a multiple of 8 bits self.assertEqual(len(a), 8 * a.nbytes) self.assertEqual(a.padbits, 0) else: # the allocated memory is always larger than the buffer size self.assertTrue(info.alloc >= a.nbytes) if info.address == 0: # the buffer being a NULL pointer implies that the buffer size # and the allocated memory size are 0 self.assertEqual(a.nbytes, 0) self.assertEqual(info.alloc, 0) if type(a) == frozenbitarray: # frozenbitarray have read-only memory self.assertEqual(a.readonly, 1) if a.padbits: # ensure padbits are zero b = bitarray(bytes(a)[-1:], endian=a.endian)[-a.padbits:] self.assertEqual(len(b), a.padbits) self.assertEqual(b.count(), 0) elif not info.imported: # otherwise, unless the buffer is imported, it is writable self.assertFalse(a.readonly) def assertEQUAL(self, a, b): self.assertEqual(a, b) self.assertEqual(a.endian, b.endian) def assertIsType(self, a, b): self.assertEqual(type(a).__name__, b) self.assertEqual(repr(type(a)), "" % b) @staticmethod def assertRaisesMessage(excClass, msg, callable, *args, **kwargs): try: callable(*args, **kwargs) raise AssertionError("%s not raised" % excClass.__name__) except excClass as e: if msg != str(e): raise AssertionError("message: %s\n got: %s" % (msg, e)) # -------------------------- Module Functions ----------------------------- class ModuleFunctionsTests(unittest.TestCase): def test_version_string(self): # the version string is not a function, but test it here anyway self.assertEqual(type(__version__), str) def test_sysinfo(self): for key in ["void*", "size_t", "bitarrayobject", "decodetreeobject", "binode", "HAVE_BUILTIN_BSWAP64", "PY_LITTLE_ENDIAN", "PY_BIG_ENDIAN", "Py_DEBUG", "DEBUG"]: res = sysinfo(key) self.assertEqual(type(res), int) def test_sysinfo_errors(self): self.assertRaises(TypeError, sysinfo) self.assertRaises(TypeError, sysinfo, b"void*") self.assertRaises(KeyError, sysinfo, "foo") def test_sysinfo_pointer_size(self): self.assertEqual(sysinfo("void*"), PTRSIZE) self.assertEqual(sysinfo("size_t"), PTRSIZE) self.assertEqual(sys.maxsize, 2 ** (8 * PTRSIZE - 1) - 1) if not is_pypy: # PyPy doesn't have tuple.__itemsize__ self.assertEqual(PTRSIZE, tuple.__itemsize__) def test_sysinfo_byteorder(self): self.assertEqual(sys.byteorder == "little", sysinfo("PY_LITTLE_ENDIAN")) self.assertEqual(sys.byteorder == "big", sysinfo("PY_BIG_ENDIAN")) def test_set_default_endian(self): for default_endian in 'big', 'little': _set_default_endian(default_endian) a = bitarray() self.assertEqual(a.endian, default_endian) for x in None, 0, 64, '10111', [1, 0]: a = bitarray(x) self.assertEqual(a.endian, default_endian) for endian in 'big', 'little', None: a = bitarray(endian=endian) self.assertEqual(a.endian, default_endian if endian is None else endian) # make sure that wrong calling _set_default_endian() does not # change the default endianness self.assertRaises(ValueError, _set_default_endian, 'foobar') self.assertEqual(bitarray().endian, default_endian) def test_set_default_endian_errors(self): self.assertRaises(TypeError, _set_default_endian, 0) self.assertRaises(TypeError, _set_default_endian, 'little', 0) self.assertRaises(ValueError, _set_default_endian, 'foo') def test_get_default_endian(self): for default_endian in 'big', 'little': _set_default_endian(default_endian) endian = get_default_endian() self.assertEqual(endian, default_endian) self.assertEqual(type(endian), str) def test_get_default_endian_errors(self): # takes no arguments self.assertRaises(TypeError, get_default_endian, 'big') def test_bits2bytes(self): for n, res in (0, 0), (1, 1), (7, 1), (8, 1), (9, 2): self.assertEqual(bits2bytes(n), res) for n in range(1, 200): m = bits2bytes(n) self.assertEqual(m, (n - 1) // 8 + 1) self.assertEqual(type(m), int) k = (1 << n) + randrange(1000) self.assertEqual(bits2bytes(k), (k - 1) // 8 + 1) def test_bits2bytes_errors(self): for arg in 'foo', [], None, {}, 187.0, -4.0: self.assertRaises(TypeError, bits2bytes, arg) self.assertRaises(TypeError, bits2bytes) self.assertRaises(TypeError, bits2bytes, 1, 2) self.assertRaises(ValueError, bits2bytes, -1) self.assertRaises(ValueError, bits2bytes, -924) # --------------------------------------------------------------------------- class CreateObjectTests(unittest.TestCase, Util): def test_noInitializer(self): a = bitarray() self.assertEqual(len(a), 0) self.assertEqual(a.tolist(), []) self.assertEqual(type(a), bitarray) self.check_obj(a) def test_endian(self): a = bitarray(b"ABC", endian='little') self.assertEqual(a.endian, 'little') self.assertEqual(type(a.endian), str) self.check_obj(a) b = bitarray(b"ABC", endian='big') self.assertEqual(b.endian, 'big') self.assertEqual(type(a.endian), str) self.check_obj(b) self.assertNotEqual(a, b) self.assertEqual(a.tobytes(), b.tobytes()) def test_endian_default(self): _set_default_endian('big') a_big = bitarray() _set_default_endian('little') a_little = bitarray() _set_default_endian('big') self.assertEqual(a_big.endian, 'big') self.assertEqual(a_little.endian, 'little') def test_endian_wrong(self): self.assertRaises(TypeError, bitarray, endian=0) self.assertRaises(ValueError, bitarray, endian='') self.assertRaisesMessage( ValueError, "bit-endianness must be either 'little' or 'big', not 'foo'", bitarray, endian='foo') self.assertRaisesMessage(TypeError, "'ellipsis' object is not iterable", bitarray, Ellipsis) def test_buffer_endian(self): for endian in 'big', 'little': a = bitarray(buffer=b'', endian=endian) self.assertEQUAL(a, bitarray(0, endian)) _set_default_endian(endian) a = bitarray(buffer=b'A') self.assertEqual(a.endian, endian) self.assertEqual(len(a), 8) def test_buffer_readonly(self): a = bitarray(buffer=b'\xf0', endian='little') self.assertTrue(a.readonly) self.assertRaises(TypeError, a.clear) self.assertRaises(TypeError, a.__setitem__, 3, 1) self.assertEQUAL(a, bitarray('00001111', 'little')) self.check_obj(a) @skipIf(is_pypy) def test_buffer_writable(self): a = bitarray(buffer=bytearray([65])) self.assertFalse(a.readonly) a[6] = 1 def test_buffer_args(self): # buffer requires no initial argument self.assertRaises(TypeError, bitarray, 5, buffer=b'DATA\0') # positinal arguments a = bitarray(None, 'big', bytearray([15])) self.assertEQUAL(a, bitarray('00001111', 'big')) a = bitarray(None, 'little', None) self.assertEQUAL(a, bitarray(0, 'little')) def test_none(self): for a in [bitarray(None), bitarray(None, buffer=None), bitarray(None, buffer=Ellipsis), bitarray(None, None, None), bitarray(None, None, Ellipsis)]: self.assertEqual(len(a), 0) def test_int(self): for n in range(50): a = bitarray(n) self.assertEqual(len(a), n) self.assertFalse(a.any()) self.assertEqual(a.to01(), n * '0') self.check_obj(a) # uninitialized buffer a = bitarray(n, buffer=Ellipsis) self.assertEqual(len(a), n) self.check_obj(a) self.assertRaises(ValueError, bitarray, -1) self.assertRaises(ValueError, bitarray, -924) def test_list(self): a = bitarray([0, 1, False, True]) self.assertEqual(a, bitarray('0101')) self.check_obj(a) self.assertRaises(ValueError, bitarray, [0, 1, 2]) self.assertRaises(TypeError, bitarray, [0, 1, None]) for n in range(50): lst = [bool(getrandbits(1)) for d in range(n)] a = bitarray(lst) self.assertEqual(a.tolist(), lst) self.check_obj(a) def test_sequence(self): lst = [0, 1, 1, 1, 0] for x in lst, tuple(lst), array.array('i', lst): self.assertEqual(len(x), 5) # sequences have len, iterables not a = bitarray(x) self.assertEqual(a, bitarray('01110')) self.check_obj(a) def test_iter1(self): for n in range(50): lst = [bool(getrandbits(1)) for d in range(n)] a = bitarray(iter(lst)) self.assertEqual(a.tolist(), lst) self.check_obj(a) def test_iter2(self): for lst in self.randomlists(): def foo(): for x in lst: yield x a = bitarray(foo()) self.assertEqual(a, bitarray(lst)) self.check_obj(a) def test_iter3(self): a = bitarray(itertools.repeat(False, 10)) self.assertEqual(a, zeros(10)) a = bitarray(itertools.repeat(1, 10)) self.assertEqual(a, bitarray(10 * '1')) def test_range(self): self.assertEqual(bitarray(range(2)), bitarray('01')) self.assertRaises(ValueError, bitarray, range(0, 3)) def test_string01(self): for s in '0010111', '0010 111', '0010_111': a = bitarray(s) self.assertEqual(a.tolist(), [0, 0, 1, 0, 1, 1, 1]) self.check_obj(a) for lst in self.randomlists(): s = ''.join([['0', '1'][x] for x in lst]) a = bitarray(s) self.assertEqual(a.tolist(), lst) self.check_obj(a) self.assertRaises(ValueError, bitarray, '01021') # UCS1 self.assertRaises(ValueError, bitarray, '1\u26050') # UCS2 self.assertRaises(ValueError, bitarray, '0\U00010348') # UCS4 def test_string01_whitespace(self): a = bitarray(whitespace) self.assertEqual(a, bitarray()) for c in whitespace: a = bitarray(c + '1101110001') self.assertEqual(a, bitarray('1101110001')) a = bitarray(' 0\n1\r0\t1\v0 ') self.assertEqual(a, bitarray('01010')) def test_bytes_bytearray(self): for x in b'\x80', bytearray(b'\x80'): a = bitarray(x, 'little') self.assertEqual(len(a), 8 * len(x)) self.assertEqual(a.tobytes(), x) self.assertEqual(a.to01(), '00000001') self.check_obj(a) for n in range(100): x = os.urandom(n) a = bitarray(x) self.assertEqual(len(a), 8 * n) self.assertEqual(memoryview(a), x) def test_byte(self): for byte, endian, res in [ (b'\x01', 'little', '10000000'), (b'\x80', 'little', '00000001'), (b'\x80', 'big', '10000000'), (b'\x01', 'big', '00000001')]: a = bitarray(byte, endian) self.assertEqual(a.to01(), res) def test_bitarray_simple(self): for n in range(10): a = bitarray(n) b = bitarray(a, endian=None) self.assertFalse(a is b) self.assertEQUAL(a, b) def test_bitarray_endian(self): # Test creating a new bitarray with different endianness from an # existing bitarray. for endian in 'little', 'big': a = bitarray(endian=endian) b = bitarray(a) self.assertFalse(a is b) self.assertEQUAL(a, b) endian2 = self.opposite_endian(endian) b = bitarray(a, endian2) self.assertEqual(b.endian, endian2) self.assertEqual(a, b) for a in self.randombitarrays(): endian2 = self.opposite_endian(a.endian) b = bitarray(a, endian2) self.assertEqual(a, b) self.assertEqual(b.endian, endian2) self.assertNotEqual(a.endian, b.endian) def test_bitarray_endianness(self): a = bitarray('11100001', endian='little') b = bitarray(a, endian='big') self.assertEqual(a, b) self.assertNotEqual(a.tobytes(), b.tobytes()) b.bytereverse() self.assertNotEqual(a, b) self.assertEqual(a.tobytes(), b.tobytes()) c = bitarray('11100001', endian='big') self.assertEqual(a, c) def test_frozenbitarray(self): a = bitarray(frozenbitarray()) self.assertEQUAL(a, bitarray()) self.assertEqual(type(a), bitarray) for endian in 'little', 'big': a = bitarray(frozenbitarray('011', endian=endian)) self.assertEQUAL(a, bitarray('011', endian)) self.assertEqual(type(a), bitarray) def test_create_empty(self): for x in (None, 0, '', list(), tuple(), set(), dict(), bytes(), bytearray(), bitarray(), frozenbitarray()): a = bitarray(x) self.assertEqual(len(a), 0) self.assertEQUAL(a, bitarray()) def test_wrong_args(self): # wrong types for x in False, True, Ellipsis, slice(0), 0.0, 0 + 0j: self.assertRaises(TypeError, bitarray, x) # wrong values for x in -1, 'A', '\0', '010\0 11': self.assertRaises(ValueError, bitarray, x) # test second (endian) argument self.assertRaises(TypeError, bitarray, 0, 0) self.assertRaises(ValueError, bitarray, 0, 'foo') # too many args self.assertRaises(TypeError, bitarray, 0, 'big', 0) @skipIf(is_pypy) def test_weakref(self): a = bitarray('0100') b = weakref.proxy(a) self.assertEqual(b.to01(), a.to01()) a = None self.assertRaises(ReferenceError, len, b) # --------------------------------------------------------------------------- class ToObjectsTests(unittest.TestCase, Util): def test_numeric(self): a = bitarray() self.assertRaises(Exception, int, a) self.assertRaises(Exception, float, a) self.assertRaises(Exception, complex, a) def test_list(self): for a in self.randombitarrays(): self.assertEqual(list(a), a.tolist()) def test_tuple(self): for a in self.randombitarrays(): self.assertEqual(tuple(a), tuple(a.tolist())) def test_bytes_bytearray(self): for n in range(20): a = urandom_2(8 * n) self.assertEqual(a.padbits, 0) self.assertEqual(bytes(a), a.tobytes()) self.assertEqual(bytearray(a), a.tobytes()) def test_set(self): for a in self.randombitarrays(): self.assertEqual(set(a), set(a.tolist())) # -------------------------- (Number) index tests --------------------------- class GetItemTests(unittest.TestCase, Util): def test_explicit(self): a = bitarray() self.assertRaises(IndexError, a.__getitem__, 0) a.append(True) self.assertEqual(a[0], 1) self.assertEqual(a[-1], 1) self.assertRaises(IndexError, a.__getitem__, 1) self.assertRaises(IndexError, a.__getitem__, -2) a.append(False) self.assertEqual(a[1], 0) self.assertEqual(a[-1], 0) self.assertRaises(IndexError, a.__getitem__, 2) self.assertRaises(IndexError, a.__getitem__, -3) self.assertRaises(TypeError, a.__getitem__, 1.5) self.assertRaises(TypeError, a.__getitem__, None) self.assertRaises(TypeError, a.__getitem__, 'A') def test_basic(self): a = bitarray('1100010') for i, v in enumerate(a): self.assertEqual(a[i], v) self.assertEqual(type(a[i]), int) self.assertEqual(a[-7 + i], v) self.assertRaises(IndexError, a.__getitem__, 7) self.assertRaises(IndexError, a.__getitem__, -8) def test_range(self): for a in self.randombitarrays(): aa = a.tolist() for i in range(len(a)): self.assertEqual(a[i], aa[i]) class SetItemTests(unittest.TestCase, Util): def test_explicit(self): a = bitarray('0') a[0] = 1 self.assertEqual(a, bitarray('1')) a = bitarray(2) a[0] = 0 a[1] = 1 self.assertEqual(a, bitarray('01')) a[-1] = 0 a[-2] = 1 self.assertEqual(a, bitarray('10')) self.assertRaises(ValueError, a.__setitem__, 0, -1) self.assertRaises(TypeError, a.__setitem__, 1, None) self.assertRaises(IndexError, a.__setitem__, 2, True) self.assertRaises(IndexError, a.__setitem__, -3, False) self.assertRaises(TypeError, a.__setitem__, 1.5, 1) # see issue 114 self.assertRaises(TypeError, a.__setitem__, None, 0) self.assertRaises(TypeError, a.__setitem__, 'a', True) self.assertEqual(a, bitarray('10')) def test_random(self): for a in self.randombitarrays(start=1): i = randrange(len(a)) aa = a.tolist() v = getrandbits(1) a[i] = v aa[i] = v self.assertEqual(a.tolist(), aa) self.check_obj(a) @skipIf(is_pypy) def test_imported(self): a = bytearray([5, 1, 2, 3]) b = bitarray(endian="little", buffer=a) self.assertFalse(b.readonly) # operate on imported (writable) buffer b[7] = 1 self.assertEqual(a, bytearray([0x85, 1, 2, 3])) b[-2] = 1 self.assertEqual(a, bytearray([0x85, 1, 2, 0x43])) class DelItemTests(unittest.TestCase, Util): def test_simple(self): a = bitarray('100110') del a[1] self.assertEqual(len(a), 5) del a[3], a[-2] self.assertEqual(a, bitarray('100')) self.assertRaises(IndexError, a.__delitem__, 3) self.assertRaises(IndexError, a.__delitem__, -4) def test_random(self): for a in self.randombitarrays(start=1): n = len(a) b = a.copy() i = randrange(n) del b[i] self.assertEQUAL(b, a[:i] + a[i + 1:]) self.assertEqual(len(b), n - 1) self.check_obj(b) @skipIf(is_pypy) def test_imported(self): a = bytearray([5, 11, 7]) b = bitarray(buffer=a) self.assertFalse(b.readonly) self.assertRaises(BufferError, b.__delitem__, 13) # -------------------------- Slice index tests ------------------------------ class GetSliceTests(unittest.TestCase, Util): def test_slice(self): a = bitarray('01001111 00001') self.assertEQUAL(a[:], a) self.assertFalse(a[:] is a) self.assertEQUAL(a[13:2:-3], bitarray('1010')) self.assertEQUAL(a[2:-1:4], bitarray('010')) self.assertEQUAL(a[::2], bitarray('0011001')) self.assertEQUAL(a[8:], bitarray('00001')) self.assertEQUAL(a[7:], bitarray('100001')) self.assertEQUAL(a[:8], bitarray('01001111')) self.assertEQUAL(a[::-1], bitarray('10000111 10010')) self.assertEQUAL(a[:8:-1], bitarray('1000')) self.assertRaises(ValueError, a.__getitem__, slice(None, None, 0)) def test_reverse(self): for _ in range(20): n = randrange(200) a = urandom_2(n) b = a.copy() b.reverse() self.assertEQUAL(a[::-1], b) def test_random_step1(self): for n in range(200): a = urandom_2(n) i = randint(0, n) j = randint(0, n) b = a[i:j] self.assertEqual(b.to01(), a.to01()[i:j]) self.assertEqual(len(b), max(j - i, 0)) self.assertEqual(b.endian, a.endian) def test_random(self): for n in range(200): a = urandom_2(n) s = self.random_slice(n) b = a[s] self.assertEqual(len(b), len(range(n)[s])) # slicelength self.assertEqual(list(b), a.tolist()[s]) self.assertEqual(b.endian, a.endian) class SetSliceTests(unittest.TestCase, Util): def test_simple(self): for a in self.randombitarrays(start=1): n = len(a) b = bitarray(n) b[0:n] = bitarray(a) self.assertEqual(a, b) self.assertFalse(a is b) b = bitarray(n) b[:] = bitarray(a) self.assertEqual(a, b) self.assertFalse(a is b) b = bitarray(n) b[::-1] = a self.assertEqual(b.tolist(), a.tolist()[::-1]) def test_random(self): for a in self.randombitarrays(start=1): len_a = len(a) for _ in range(10): s = self.random_slice(len_a) len_b = randrange(10) if s.step == 1 else len(range(len_a)[s]) b = bitarray(len_b) c = bitarray(a) c[s] = b self.check_obj(c) cc = a.tolist() cc[s] = b.tolist() self.assertEqual(c, bitarray(cc)) def test_self_random(self): for a in self.randombitarrays(): n = len(a) for step in -1, 1: s = slice(None, None, step) # ensure slicelength equals len(a) self.assertEqual(len(range(n)[s]), n) aa = a.tolist() a[s] = a aa[s] = aa self.assertEqual(a, bitarray(aa)) def test_special(self): for n in 0, 1, 10, 87: a = urandom_2(n) for m in 0, 1, 10, 99: x = urandom_2(m) b = a.copy() b[n:n] = x # insert at end - extend self.assertEqual(b, a + x) self.assertEqual(len(b), len(a) + len(x)) b[0:0] = x # insert at 0 - prepend self.assertEqual(b, x + a + x) self.check_obj(b) self.assertEqual(len(b), len(a) + 2 * len(x)) def test_range(self): # tests C function insert_n() for _ in range(100): n = randrange(200) a = urandom_2(n) p = randint(0, n) m = randint(0, 500) x = urandom_2(m) b = a.copy() b[p:p] = x self.assertEQUAL(b, a[:p] + x + a[p:]) self.assertEqual(len(b), len(a) + m) self.check_obj(b) def test_resize(self): for _ in range(100): n = randint(0, 200) a = urandom_2(n) p1 = randint(0, n) p2 = randint(0, n) m = randint(0, 300) x = urandom_2(m) b = a.copy() b[p1:p2] = x b_lst = a.tolist() b_lst[p1:p2] = x.tolist() self.assertEqual(b.tolist(), b_lst) if p1 <= p2: self.assertEQUAL(b, a[:p1] + x + a[p2:]) self.assertEqual(len(b), n + p1 - p2 + len(x)) else: self.assertEqual(b, a[:p1] + x + a[p1:]) self.assertEqual(len(b), n + len(x)) self.check_obj(b) def test_self(self): a = bitarray('1100111') a[::-1] = a self.assertEqual(a, bitarray('1110011')) a[4:] = a self.assertEqual(a, bitarray('11101110011')) a[:-5] = a self.assertEqual(a, bitarray('1110111001110011')) a = bitarray('01001') a[:-1] = a self.assertEqual(a, bitarray('010011')) a[2::] = a self.assertEqual(a, bitarray('01010011')) a[2:-2:1] = a self.assertEqual(a, bitarray('010101001111')) a = bitarray('011') a[2:2] = a self.assertEqual(a, bitarray('010111')) a[:] = a self.assertEqual(a, bitarray('010111')) def test_self_shared_buffer(self): # This is a special case. We have two bitarrays which share the # same buffer, and then do a slice assignment. The bitarray is # copied onto itself in reverse order. So we need to make a copy # in setslice_bitarray(). However, since a and b are two distinct # objects, it is not enough to check for self == other, but rather # check whether their buffers overlap. a = bitarray('11100000') b = bitarray(buffer=a) b[::-1] = a self.assertEqual(a, b) self.assertEqual(a, bitarray('00000111')) def test_self_shared_buffer_2(self): # This is an even more special case. We have a bitarrays which # shares part of anothers bitarray buffer. So in setslice_bitarray(), # we need to make a copy of other if: # # self->ob_item <= other->ob_item <= self->ob_item + Py_SIZE(self) # # In words: Is the other buffer inside the self buffer (which inclues # the previous case) a = bitarray('11111111 11000000 00000000') b = bitarray(buffer=memoryview(a)[1:2]) self.assertEqual(b, bitarray('11000000')) a[15:7:-1] = b self.assertEqual(a, bitarray('11111111 00000011 00000000')) @skipIf(is_pypy) def test_self_shared_buffer_3(self): # Requires to check for (in setslice_bitarray()): # # other->ob_item <= self->ob_item <= other->ob_item + Py_SIZE(other) # a = bitarray('11111111 11000000 00000000') b = bitarray(buffer=memoryview(a)[:2]) c = bitarray(buffer=memoryview(a)[1:]) self.assertEqual(b, bitarray('11111111 11000000')) self.assertEqual(c, bitarray('11000000 00000000')) c[::-1] = b self.assertEqual(c, bitarray('00000011 11111111')) self.assertEqual(a, bitarray('11111111 00000011 11111111')) def test_setslice_bitarray(self): a = ones(12) a[2:6] = bitarray('0010') self.assertEqual(a, bitarray('11001011 1111')) a.setall(0) a[::2] = bitarray('111001') self.assertEqual(a, bitarray('10101000 0010')) a.setall(0) a[3:] = bitarray('111') self.assertEqual(a, bitarray('000111')) a = zeros(12) a[1:11:2] = bitarray('11101') self.assertEqual(a, bitarray('01010100 0100')) a.setall(0) a[5:2] = bitarray('111') # make sure inserts before 5 (not 2) self.assertEqual(a, bitarray('00000111 0000000')) a = zeros(12) a[:-6:-1] = bitarray('10111') self.assertEqual(a, bitarray('00000001 1101')) def test_bitarray_2(self): a = bitarray('1111') a[3:3] = bitarray('000') # insert self.assertEqual(a, bitarray('1110001')) a[2:5] = bitarray() # remove self.assertEqual(a, bitarray('1101')) a = bitarray('1111') a[1:3] = bitarray('0000') self.assertEqual(a, bitarray('100001')) a[:] = bitarray('010') # replace all values self.assertEqual(a, bitarray('010')) # assign slice to bitarray with different length a = bitarray('111111') a[3:4] = bitarray('00') self.assertEqual(a, bitarray('1110011')) a[2:5] = bitarray('0') # remove self.assertEqual(a, bitarray('11011')) def test_frozenbitarray(self): a = bitarray('11111111 1111') b = frozenbitarray('0000') a[2:6] = b self.assertEqual(a, bitarray('11000011 1111')) self.assertEqual(type(b), frozenbitarray) self.assertEqual(b, bitarray('0000')) b = frozenbitarray('011100') a[::2] = b self.assertEqual(a, bitarray('01101011 0101')) self.check_obj(a) self.assertEqual(type(b), frozenbitarray) self.assertEqual(b, bitarray('011100')) def test_setslice_bitarray_random_same_length(self): for _ in range(100): n = randrange(200) a = urandom_2(n) lst_a = a.tolist() b = urandom_2(randint(0, n)) lst_b = b.tolist() i = randint(0, n - len(b)) j = i + len(b) self.assertEqual(j - i, len(b)) a[i:j] = b lst_a[i:j] = lst_b self.assertEqual(a.tolist(), lst_a) # a didn't change length self.assertEqual(len(a), n) self.check_obj(a) def test_bitarray_random_step1(self): for _ in range(50): n = randrange(300) a = urandom_2(n) lst_a = a.tolist() b = urandom_2(randrange(100)) lst_b = b.tolist() s = self.random_slice(n, step=1) a[s] = b lst_a[s] = lst_b self.assertEqual(a.tolist(), lst_a) self.check_obj(a) def test_bitarray_random(self): for _ in range(100): n = randrange(50) a = urandom_2(n) lst_a = a.tolist() b = urandom_2(randrange(50)) lst_b = b.tolist() s = self.random_slice(n) try: a[s] = b except ValueError: a = None try: lst_a[s] = lst_b except ValueError: lst_a = None if a is None: self.assertTrue(lst_a is None) else: self.assertEqual(a.tolist(), lst_a) self.check_obj(a) def test_bool_explicit(self): a = bitarray('11111111') a[::2] = False self.assertEqual(a, bitarray('01010101')) a[4::] = True # ^^^^ self.assertEqual(a, bitarray('01011111')) a[-2:] = False # ^^ self.assertEqual(a, bitarray('01011100')) a[:2:] = True # ^^ self.assertEqual(a, bitarray('11011100')) a[:] = True # ^^^^^^^^ self.assertEqual(a, bitarray('11111111')) a[2:5] = False # ^^^ self.assertEqual(a, bitarray('11000111')) a[1::3] = False # ^ ^ ^ self.assertEqual(a, bitarray('10000110')) a[1:6:2] = True # ^ ^ ^ self.assertEqual(a, bitarray('11010110')) a[3:3] = False # zero slicelength self.assertEqual(a, bitarray('11010110')) a[:] = False # ^^^^^^^^ self.assertEqual(a, bitarray('00000000')) a[-2:2:-1] = 1 # ^^^^ self.assertEqual(a, bitarray('00011110')) def test_bool_step1(self): for _ in range(100): n = randrange(1000) start = randint(0, n) stop = randint(start, n) a = bitarray(n) a[start:stop] = 1 self.assertEqual(a.count(1), stop - start) b = bitarray(n) b[range(start, stop)] = 1 self.assertEqual(a, b) def test_setslice_bool_random_slice(self): for _ in range(200): n = randrange(100) a = urandom_2(n) aa = a.tolist() s = self.random_slice(n) slicelength = len(range(n)[s]) v = getrandbits(1) a[s] = v aa[s] = slicelength * [v] self.assertEqual(a.tolist(), aa) a.setall(0) a[s] = 1 self.assertEqual(a.count(1), slicelength) def test_setslice_bool_step(self): # this test exercises set_range() when stop is much larger than start cnt = 0 for _ in range(500): n = randrange(3000, 4000) a = urandom_2(n) aa = a.tolist() s = slice(randrange(1000), randrange(1000, n), randint(1, 100)) self.assertTrue(s.stop - s.start >= 0) cnt += s.stop - s.start >= 1024 slicelength = len(range(n)[s]) self.assertTrue(slicelength > 0) v = getrandbits(1) a[s] = v aa[s] = slicelength * [v] self.assertEqual(a.tolist(), aa) self.assertTrue(cnt > 300) def test_to_int(self): a = bitarray('11111111') a[::2] = 0 # ^ ^ ^ ^ self.assertEqual(a, bitarray('01010101')) a[4::] = 1 # ^^^^ self.assertEqual(a, bitarray('01011111')) a.__setitem__(slice(-2, None, None), 0) self.assertEqual(a, bitarray('01011100')) self.assertRaises(ValueError, a.__setitem__, slice(None, None, 2), 3) self.assertRaises(ValueError, a.__setitem__, slice(None, 2, None), -1) # a[:2:] = '0' self.assertRaises(TypeError, a.__setitem__, slice(None, 2, None), '0') def test_invalid(self): a = bitarray('11111111') s = slice(2, 6, None) self.assertRaises(TypeError, a.__setitem__, s, 1.2) self.assertRaises(TypeError, a.__setitem__, s, None) self.assertRaises(TypeError, a.__setitem__, s, "0110") a[s] = False self.assertEqual(a, bitarray('11000011')) # step != 1 and slicelen != length of assigned bitarray self.assertRaisesMessage( ValueError, "attempt to assign sequence of size 3 to extended slice of size 4", a.__setitem__, slice(None, None, 2), bitarray('000')) self.assertRaisesMessage( ValueError, "attempt to assign sequence of size 3 to extended slice of size 2", a.__setitem__, slice(None, None, 4), bitarray('000')) self.assertRaisesMessage( ValueError, "attempt to assign sequence of size 7 to extended slice of size 8", a.__setitem__, slice(None, None, -1), bitarray('0001000')) self.assertEqual(a, bitarray('11000011')) @skipIf(is_pypy) def test_imported(self): a = bytearray([5, 1, 2, 3]) b = bitarray(endian="little", buffer=a) self.assertFalse(b.readonly) # operate on imported (writable) buffer b[8:-8] = 1 self.assertEqual(a, bytearray([5, 0xff, 0xff, 3])) b[8:-8:2] = 0 self.assertEqual(a, bytearray([5, 0xaa, 0xaa, 3])) b[8:20] = bitarray('11110000 0011') self.assertEqual(a, bytearray([5, 0x0f, 0xac, 3])) class DelSliceTests(unittest.TestCase, Util): def test_explicit(self): a = bitarray('10101100 10110') del a[3:9] # ^^^^^ ^ self.assertEqual(a, bitarray('1010110')) del a[::3] # ^ ^ ^ self.assertEqual(a, bitarray('0111')) a = bitarray('10101100 101101111') del a[5:-3:3] # ^ ^ ^ self.assertEqual(a, bitarray('1010100 0101111')) a = bitarray('10101100 1011011') del a[:-9:-2] # ^ ^ ^ ^ self.assertEqual(a, bitarray('10101100 011')) del a[3:3] # zero slicelength self.assertEqual(a, bitarray('10101100 011')) self.assertRaises(ValueError, a.__delitem__, slice(None, None, 0)) self.assertEqual(len(a), 11) del a[:] self.assertEqual(a, bitarray()) def test_special(self): for n in 0, 1, 10, 73: a = urandom_2(n) b = a.copy() del b[:0] del b[n:] self.assertEqual(b, a) del b[10:] # delete everything after 10th item self.assertEqual(b, a[:10]) del b[:] # clear self.assertEqual(len(b), 0) self.check_obj(b) def test_random(self): for _ in range(100): n = randrange(500) a = urandom_2(n) s = self.random_slice(n) slicelength = len(range(n)[s]) c = a.copy() del c[s] self.assertEqual(len(c), n - slicelength) self.check_obj(c) c_lst = a.tolist() del c_lst[s] self.assertEQUAL(c, bitarray(c_lst, endian=c.endian)) def test_range(self): # tests C function delete_n() for n in range(200): a = urandom_2(n) p = randint(0, n) m = randint(0, 200) b = a.copy() del b[p:p + m] self.assertEQUAL(b, a[:p] + a[p + m:]) self.check_obj(b) def test_range_step(self): n = 200 for step in range(-n - 1, n): if step == 0: continue a = urandom_2(n) lst = a.tolist() del a[::step] del lst[::step] self.assertEqual(a.tolist(), lst) @skipIf(is_pypy) def test_imported(self): a = bytearray([5, 1, 2, 3]) b = bitarray(buffer=a) self.assertFalse(b.readonly) self.assertRaises(BufferError, b.__delitem__, slice(3, 21)) # even though we don't delete anything, raise error self.assertRaises(BufferError, b.__delitem__, slice(3, 3)) # ----------------------------- Masked index tests -------------------------- class GetMaskedIndexTests(unittest.TestCase, Util): def test_basic(self): a = bitarray('1001001') mask = bitarray('1010111') self.assertEqual(a[mask], bitarray('10001')) self.assertRaises(IndexError, a.__getitem__, bitarray('1011')) def test_random(self): for a in self.randombitarrays(): n = len(a) # select items from a wehre a is 1 -> all 1 items self.assertEqual(a[a], a.count() * bitarray('1')) mask = zeros(n) self.assertEqual(a[mask], bitarray()) mask = ones(n) self.assertEqual(a[mask], a) mask = urandom_2(n) self.assertEqual(list(a[mask]), [a[i] for i in range(n) if mask[i]]) def test_random_slice_mask(self): for n in range(100): s = self.random_slice(n, step=randint(1, 5)) a = urandom_2(n) mask = zeros(n) mask[s] = 1 self.assertEQUAL(a[mask], a[s]) class SetMaskedIndexTests(unittest.TestCase, Util): def test_basic(self): a = bitarray('1001001') mask = bitarray('1010111') val = bitarray("0 1 110") res = bitarray("0011110") self.assertRaises(NotImplementedError, a.__setitem__, mask, 1) self.assertRaises(ValueError, a.__setitem__, mask, 2) a[mask] = val self.assertEqual(a, res) b = bitarray('0111') self.assertRaisesMessage( IndexError, "attempt to assign mask of size 5 to bitarray of size 4", a.__setitem__, mask, b) def test_issue225(self): # example from issue #225 a = bitarray('0000000') b = bitarray('1100110') c = bitarray('10 10 ') a[b] = c self.assertEqual(a, bitarray('1000100')) def test_zeros_mask(self): for a in self.randombitarrays(): b = a.copy() mask = zeros(len(a)) a[mask] = bitarray() self.assertEqual(a, b) def test_ones_mask(self): for a in self.randombitarrays(): n = len(a) mask = ones(n) c = urandom_2(n) a[mask] = c self.assertEqual(a, c) def test_random_mask_set_random(self): for a in self.randombitarrays(): b = a.copy() mask = urandom_2(len(a)) other = urandom_2(mask.count()) a[mask] = other b[list(mask.search(1))] = other self.assertEqual(a, b) def test_random_slice_mask(self): for n in range(100): s = self.random_slice(n, randint(1, 5)) slicelength = len(range(n)[s]) a = urandom_2(n) b = a.copy() mask = zeros(n) mask[s] = 1 other = urandom_2(slicelength) a[mask] = b[s] = other self.assertEQUAL(a, b) def test_random_mask_set_zeros(self): for a in self.randombitarrays(): mask = urandom_2(len(a), endian=a.endian) b = a.copy() self.assertRaisesMessage( NotImplementedError, "mask assignment to bool not implemented;\n" "`a[mask] = 0` equivalent to `a &= ~mask`", a.__setitem__, mask, 0) a[mask] = zeros(mask.count()) b &= ~mask self.assertEqual(a, b) def test_random_mask_set_ones(self): for a in self.randombitarrays(): mask = urandom_2(len(a), endian=a.endian) b = a.copy() self.assertRaisesMessage( NotImplementedError, "mask assignment to bool not implemented;\n" "`a[mask] = 1` equivalent to `a |= mask`", a.__setitem__, mask, 1) a[mask] = ones(mask.count()) b |= mask self.assertEqual(a, b) @skipIf(is_pypy) def test_imported(self): a = bytearray([0, 0xff]) # 00000000 11111111 b = bitarray(endian="big", buffer=a) c = bitarray('00001111 00110011') b[c] = bitarray(' 1001 01 10') self.assertEqual(a, bytearray([0b00001001, 0b11011110])) class DelMaskedIndexTests(unittest.TestCase, Util): def test_basic(self): a = bitarray('1001001') mask = bitarray('1010111') del a[mask] self.assertEqual(a, bitarray('01')) self.assertRaises(IndexError, a.__delitem__, bitarray('101')) def test_zeros_mask(self): for a in self.randombitarrays(): b = a.copy() # mask has only zeros - nothing will be removed mask = zeros(len(a)) del b[mask] self.assertEqual(b, a) def test_ones_mask(self): for a in self.randombitarrays(): # mask has only ones - everything will be removed mask = ones(len(a)) del a[mask] self.assertEqual(a, bitarray()) def test_self_mask(self): for a in self.randombitarrays(): cnt0 = a.count(0) # mask is bitarray itself - all 1 items are removed - # only all the 0's remain del a[a] self.assertEqual(a, zeros(cnt0)) def test_random_mask(self): for a in self.randombitarrays(): n = len(a) b = a.copy() mask = urandom_2(n) del b[mask] self.assertEqual(b, bitarray(a[i] for i in range(n) if not mask[i])) # `del a[mask]` is equivalent to the in-place version of # selecting the inverted mask `a = a[~mask]` self.assertEqual(b, a[~mask]) def test_random_slice_mask(self): for n in range(100): s = self.random_slice(n) a = urandom_2(n) b = a.copy() mask = zeros(n) mask[s] = 1 del a[mask], b[s] self.assertEQUAL(a, b) @skipIf(is_pypy) def test_imported(self): a = bytearray([5, 3]) b = bitarray(buffer=a) self.assertFalse(b.readonly) self.assertRaises(BufferError, b.__delitem__, bitarray('00001111 00110011')) # even though we don't delete anything, raise error self.assertRaises(BufferError, b.__delitem__, bitarray(16)) # ------------------------- Sequence index tests ---------------------------- class CommonSequenceIndexTests(unittest.TestCase, Util): def test_type_messages(self): for item, msg in [ (tuple([1, 2]), "multiple dimensions not supported"), (None, "bitarray indices must be integers, slices or " "sequences, not 'NoneType'"), (0.12, "bitarray indices must be integers, slices or " "sequences, not 'float'"), ]: a = bitarray('10111') self.assertRaisesMessage(TypeError, msg, a.__getitem__, item) self.assertRaisesMessage(TypeError, msg, a.__setitem__, item, 1) self.assertRaisesMessage(TypeError, msg, a.__delitem__, item) class GetSequenceIndexTests(unittest.TestCase, Util): def test_basic(self): a = bitarray('00110101 00') self.assertEqual(a[[2, 4, -3, 9]], bitarray('1010')) self.assertEqual(a[71 * [2, 4, 7]], 71 * bitarray('101')) self.assertEqual(a[[-1]], bitarray('0')) self.assertEqual(a[[]], bitarray()) self.assertRaises(IndexError, a.__getitem__, [1, 10]) self.assertRaises(IndexError, a.__getitem__, [-11]) def test_types(self): a = bitarray('11001101 01') lst = [1, 3, -2] for b in lst, array.array('i', lst): self.assertEqual(a[b], bitarray('100')) lst[2] += len(a) self.assertEqual(a[bytearray(lst)], bitarray('100')) self.assertEqual(a[bytes(lst)], bitarray('100')) self.assertRaises(TypeError, a.__getitem__, [2, "B"]) self.assertRaises(TypeError, a.__getitem__, [2, 1.2]) self.assertRaises(TypeError, a.__getitem__, tuple(lst)) def test_random(self): for a in self.randombitarrays(): n = len(a) lst = [randrange(n) for _ in range(n // 2)] b = a[lst] self.assertEqual(b, bitarray(a[i] for i in lst)) self.assertEqual(b.endian, a.endian) def test_range(self): for n in range(100): s = self.random_slice(n) r = range(n)[s] a = urandom_2(n) self.assertEQUAL(a[r], a[s]) class SetSequenceIndexTests(unittest.TestCase, Util): def test_bool_basic(self): a = zeros(10) a[[2, 3, 5, 7]] = 1 self.assertEqual(a, bitarray('00110101 00')) a[[]] = 1 self.assertEqual(a, bitarray('00110101 00')) a[[-1]] = True self.assertEqual(a, bitarray('00110101 01')) a[[3, -1]] = 0 self.assertEqual(a, bitarray('00100101 00')) self.assertRaises(IndexError, a.__setitem__, [1, 10], 0) self.assertRaises(ValueError, a.__setitem__, [1], 2) self.assertRaises(TypeError, a.__setitem__, [1], "A") self.assertRaises(TypeError, a.__setitem__, (3, -1)) self.assertRaises(TypeError, a.__setitem__, a) def test_bool_random(self): for a in self.randombitarrays(): n = len(a) lst = [randrange(n) for _ in range(n // 2)] b = a.copy() v = getrandbits(1) a[lst] = v for i in lst: b[i] = v self.assertEqual(a, b) def test_bool_range(self): for n in range(100): s = self.random_slice(n) r = range(n)[s] a = urandom_2(n) b = a.copy() a[s] = b[r] = getrandbits(1) self.assertEQUAL(a, b) def test_bitarray_basic(self): a = zeros(10) a[[2, 3, 5, 7]] = bitarray('1101') self.assertEqual(a, bitarray('00110001 00')) a[[]] = bitarray() self.assertEqual(a, bitarray('00110001 00')) a[[5, -1]] = bitarray('11') self.assertEqual(a, bitarray('00110101 01')) self.assertRaises(IndexError, a.__setitem__, [1, 10], bitarray('11')) self.assertRaises(ValueError, a.__setitem__, [1], bitarray()) msg = "attempt to assign sequence of size 2 to bitarray of size 3" self.assertRaisesMessage(ValueError, msg, a.__setitem__, [1, 2], bitarray('001')) def test_bitarray_random(self): for a in self.randombitarrays(): n = len(a) lst = [randrange(n) for _ in range(n // 2)] c = urandom_2(len(lst)) b = a.copy() a[lst] = c for i, j in enumerate(lst): b[j] = c[i] self.assertEqual(a, b) def test_bitarray_range(self): for n in range(100): s = self.random_slice(n) r = range(n)[s] a = urandom_2(n) b = a.copy() # note that len(r) is slicelength a[s] = b[r] = urandom_2(len(r)) self.assertEQUAL(a, b) def test_bitarray_random_self(self): for a in self.randombitarrays(): lst = list(range(len(a))) shuffle(lst) b = a.copy() c = a.copy() a[lst] = a for i, j in enumerate(lst): b[j] = c[i] self.assertEqual(a, b) @skipIf(is_pypy) def test_imported(self): a = bytearray([0, 1, 2, 3]) b = bitarray(endian="big", buffer=a) self.assertFalse(b.readonly) # operate on imported (writable) buffer b[range(0, 32, 8)] = 1 self.assertEqual(a, bytearray([0x80, 0x81, 0x82, 0x83])) b[range(0, 10)] = bitarray("00001111 01", "little") self.assertEqual(a, bytearray([0x0f, 0x41, 0x82, 0x83])) class DelSequenceIndexTests(unittest.TestCase, Util): def test_basic(self): a = bitarray('00110101 00') # ^ ^ ^ ^ del a[[2, 4, 7, 9]] self.assertEqual(a, bitarray('001100')) del a[[]] # delete nothing self.assertEqual(a, bitarray('001100')) del a[[2]] self.assertEqual(a, bitarray('00100')) a = bitarray('00110101 00') # same as earlier, but list is not ordered and has repeated indices del a[[7, 9, 2, 2, 4, 7]] self.assertEqual(a, bitarray('001100')) self.assertRaises(IndexError, a.__delitem__, [1, 10]) self.assertRaises(IndexError, a.__delitem__, [10]) self.assertRaises(TypeError, a.__delitem__, (1, 3)) def test_delete_one(self): for a in self.randombitarrays(start=1): b = a.copy() i = randrange(len(a)) del a[i], b[[i]] self.assertEqual(a, b) def test_random(self): for n in range(100): a = urandom_2(n) lst = [randrange(n) for _ in range(randint(0, n))] b = a.copy() del a[lst] self.assertEqual(len(a), n - len(set(lst))) for i in sorted(set(lst), reverse=True): del b[i] self.assertEqual(a, b) def test_shuffle(self): for a in self.randombitarrays(): lst = list(range(len(a))) shuffle(lst) del a[lst] self.assertEqual(len(a), 0) def test_range(self): for n in range(100): s = self.random_slice(n) r = range(n)[s] a = urandom_2(n) b = a.copy() del a[s], b[r] self.assertEQUAL(a, b) @skipIf(is_pypy) def test_imported(self): a = bytearray([0, 1, 2, 3]) b = bitarray(buffer=a) self.assertFalse(b.readonly) # operate on imported (writable) buffer self.assertRaises(BufferError, b.__delitem__, range(0, 32, 8)) # even though we don't delete anything, raise error self.assertRaises(BufferError, b.__delitem__, range(0)) # --------------------------------------------------------------------------- class MiscTests(unittest.TestCase, Util): def test_instancecheck(self): a = bitarray('011') self.assertIsInstance(a, bitarray) self.assertFalse(isinstance(a, str)) def test_booleanness(self): self.assertEqual(bool(bitarray('')), False) self.assertEqual(bool(bitarray('0')), True) self.assertEqual(bool(bitarray('1')), True) def test_iterate(self): for lst in self.randomlists(): acc = [] for b in bitarray(lst): acc.append(b) self.assertEqual(acc, lst) def test_iter1(self): it = iter(bitarray('011')) self.assertIsType(it, 'bitarrayiterator') for res in 0, 1, 1: item = next(it) self.assertEqual(type(item), int) self.assertEqual(item, res) self.assertRaises(StopIteration, next, it) def test_iter2(self): for a in self.randombitarrays(): aa = a.tolist() self.assertEqual(list(a), aa) self.assertEqual(list(iter(a)), aa) def test_assignment(self): a = bitarray('00110111001') a[1:3] = a[7:9] a[-1:] = a[:1] b = bitarray('01010111000') self.assertEqual(a, b) def test_subclassing(self): class ExaggeratingBitarray(bitarray): def __new__(cls, data, offset): return bitarray.__new__(cls, data) def __init__(self, data, offset): self.offset = offset def __getitem__(self, i): return bitarray.__getitem__(self, i - self.offset) for a in self.randombitarrays(): b = ExaggeratingBitarray(a, 1234) for i in range(len(a)): self.assertEqual(a[i], b[i + 1234]) @skipIf(is_pypy) def test_overflow(self): a = bitarray(1) for i in 0, 1: n = (1 << 63) + i self.assertRaises(OverflowError, a.__imul__, n) self.assertRaises(OverflowError, bitarray, n) a = bitarray(1 << 10) self.assertRaises(OverflowError, a.__imul__, 1 << 53) @skipIf(PTRSIZE != 4 or is_pypy) def test_overflow_32bit(self): a = bitarray(1000_000) self.assertRaises(OverflowError, a.__imul__, 17180) for i in 0, 1: self.assertRaises(OverflowError, bitarray, (1 << 31) + i) try: a = bitarray((1 << 31) - 1); except MemoryError: return self.assertRaises(OverflowError, bitarray.append, a, True) def test_unhashable(self): a = bitarray() self.assertRaises(TypeError, hash, a) self.assertRaises(TypeError, dict, [(a, 'foo')]) def test_abc(self): from collections import abc a = bitarray('001') self.assertIsInstance(a, abc.Iterable) self.assertIsInstance(a, abc.Sized) self.assertIsInstance(a, abc.Sequence) self.assertIsInstance(a, abc.MutableSequence) if sys.version_info[:2] >= (3, 12): self.assertIsInstance(a, abc.Buffer) if sys.platform != "win32": self.assertFalse(isinstance(a, abc.Hashable)) # --------------------------------------------------------------------------- class PickleTests(unittest.TestCase, Util): def test_attributes(self): a = frozenbitarray("00110") # as a is a subclass of bitarray, we can have attributes a.x = "bar" a.y = "baz" b = pickle.loads(pickle.dumps(a)) self.assertEqual(b, a) self.assertEqual(b.x, "bar") self.assertEqual(b.y, "baz") def test_readonly(self): a = bitarray(buffer=b'A') # readonly (because buffer is readonly), but not frozenbitarray self.assertTrue(a.readonly) self.assertEqual(type(a), bitarray) b = pickle.loads(pickle.dumps(a)) self.assertTrue(b.readonly) self.assertEqual(type(b), bitarray) def test_endian(self): for endian in 'little', 'big': a = bitarray(endian=endian) b = pickle.loads(pickle.dumps(a)) self.assertEqual(b.endian, endian) def test_reduce_explicit(self): a = frozenbitarray('11001111 01001', 'little') a.quux = 12 res = (_bitarray_reconstructor, (frozenbitarray, b'\xf3\x12', 'little', 3, 1), {'quux': 12}) self.assertEqual(a.__reduce__(), res) def check_reduce(self, a): try: attrs = a.__dict__ except AttributeError: attrs = None res = ( _bitarray_reconstructor, ( type(a), # type object a.tobytes(), # buffer a.endian, # endianness a.padbits, # number of pad bits int(a.readonly) # readonly ), attrs) # __dict__ or None self.assertEqual(a.__reduce__(), res) b = _bitarray_reconstructor(*res[1]) self.assertEqual(a, b) self.assertEqual(type(a), type(b)) self.assertEqual(a.endian, b.endian) self.assertEqual(a.readonly, b.readonly) self.check_obj(b) @skipIf(is_pypy) def test_reduce_random(self): for a in self.randombitarrays(): self.check_reduce(a) b = frozenbitarray(a) self.check_reduce(b) b.foo = 42 self.check_reduce(b) def test_reconstructor_explicit(self): a = _bitarray_reconstructor(bitarray, b'', 'little', 0, 0) self.assertEqual(len(a), 0) self.assertEqual(a.endian, 'little') self.check_obj(a) a = _bitarray_reconstructor(bitarray, b'\x0f', 'big', 1, 0) self.assertEqual(a, bitarray("0000111")) self.assertEqual(a.endian, 'big') self.check_obj(a) def test_reconstructor_invalid_args(self): # argument 1 - type object self.assertRaisesMessage( TypeError, "first argument must be a type object, got 'str'", _bitarray_reconstructor, "foo", b'', 'big', 0, 0) self.assertRaisesMessage( TypeError, "'list' is not a subtype of bitarray", _bitarray_reconstructor, list, b'', 'big', 0, 0) # argument 2 - buffer self.assertRaisesMessage( TypeError, "second argument must be bytes, got 'int'", _bitarray_reconstructor, bitarray, 123, 'big', 0, 0) # argument 3 - bit-endianness self.assertRaises(TypeError, _bitarray_reconstructor, bitarray, b'\x0f', 123, 1, 0) self.assertRaisesMessage( ValueError, "bit-endianness must be either 'little' or 'big', not 'small'", _bitarray_reconstructor, bitarray, b"", "small", 0, 0) # argument 4 - number of pad bits self.assertRaises(TypeError, _bitarray_reconstructor, bitarray, b'\x0f', 'big', 0.0, 0) self.assertRaisesMessage( ValueError, "invalid number of pad bits: 8", _bitarray_reconstructor, bitarray, b"A", "big", 8, 0) self.assertRaisesMessage( # the number of bytes is 0 zero, so pad bits cannot be 1 ValueError, "invalid number of pad bits: 1", _bitarray_reconstructor, bitarray, b"", "big", 1, 0) # argument 5 - readonly self.assertRaises(TypeError, _bitarray_reconstructor, bitarray, b'\x0f', 'big', 1, 'foo') def check_file(self, fn): path = os.path.join(os.path.dirname(__file__), fn) with open(path, 'rb') as fi: d = pickle.load(fi) for i, (s, end) in enumerate([ # 0x03 ('110', 'little'), # 0x60 ('011', 'big'), # 0x07 0x12 0x00 0x40 ('1110000001001000000000000000001', 'little'), # 0x27 0x80 0x00 0x02 ('0010011110000000000000000000001', 'big'), ]): b = d['b%d' % i] self.assertEqual(b.to01(), s) self.assertEqual(b.endian, end) self.assertEqual(type(b), bitarray) self.assertFalse(b.readonly) self.check_obj(b) f = d['f%d' % i] self.assertEqual(f.to01(), s) self.assertEqual(f.endian, end) self.assertEqual(type(f), frozenbitarray) self.assertTrue(f.readonly) self.check_obj(f) def test_load(self): # using bitarray 2.8.1 / Python 3.5.5 (_bitarray_reconstructor) self.check_file('test_281.pickle') def test_random(self): for a in self.randombitarrays(): b = pickle.loads(pickle.dumps(a)) self.assertFalse(b.readonly) self.assertFalse(b is a) self.assertEQUAL(a, b) self.check_obj(b) # ---------------------------- Richcompare tests ---------------------------- class RichCompareTests(unittest.TestCase, Util): def test_wrong_types(self): a = bitarray() for x in None, 7, 'A': self.assertEqual(a.__eq__(x), NotImplemented) self.assertEqual(a.__ne__(x), NotImplemented) self.assertEqual(a.__ge__(x), NotImplemented) self.assertEqual(a.__gt__(x), NotImplemented) self.assertEqual(a.__le__(x), NotImplemented) self.assertEqual(a.__lt__(x), NotImplemented) def test_explicit(self): for sa, sb, res in [ ('', '', '101010'), ('0', '0', '101010'), ('1', '1', '101010'), ('0', '', '011100'), ('1', '', '011100'), ('1', '0', '011100'), ('11', '10', '011100'), ('01', '00', '011100'), ('0', '1', '010011'), ('', '0', '010011'), ('', '1', '010011'), ]: a = bitarray(sa, self.random_endian()) b = bitarray(sb, self.random_endian()) r = bitarray(res) self.assertEqual(a == b, r[0]) self.assertEqual(a != b, r[1]) self.assertEqual(a >= b, r[2]) self.assertEqual(a > b, r[3]) self.assertEqual(a <= b, r[4]) self.assertEqual(a < b, r[5]) def test_eq_ne(self): for _ in range(5): self.assertTrue(urandom_2(0) == urandom_2(0)) self.assertFalse(urandom_2(0) != urandom_2(0)) for n in range(1, 20): a = ones(n, self.random_endian()) b = bitarray(a, self.random_endian()) self.assertTrue(a == b) self.assertFalse(a != b) b[-1] = 0 self.assertTrue(a != b) self.assertFalse(a == b) def test_eq_ne_random(self): for a in self.randombitarrays(start=1): b = bitarray(a, self.random_endian()) self.assertTrue(a == b) self.assertFalse(a != b) b.invert(randrange(len(a))) self.assertTrue(a != b) self.assertFalse(a == b) def check(self, a, b, c, d): self.assertEqual(a == b, c == d) self.assertEqual(a != b, c != d) self.assertEqual(a <= b, c <= d) self.assertEqual(a < b, c < d) self.assertEqual(a >= b, c >= d) self.assertEqual(a > b, c > d) def test_invert_random_element(self): for a in self.randombitarrays(start=1): n = len(a) b = bitarray(a, self.random_endian()) i = randrange(n) b.invert(i) self.check(a, b, a[i], b[i]) def test_size(self): for _ in range(10): a = bitarray(randrange(20), self.random_endian()) b = bitarray(randrange(20), self.random_endian()) self.check(a, b, len(a), len(b)) def test_random(self): for a in self.randombitarrays(): aa = a.tolist() if getrandbits(1): a = frozenbitarray(a) for b in self.randombitarrays(): bb = b.tolist() if getrandbits(1): b = frozenbitarray(b) self.check(a, b, aa, bb) self.check(a, b, aa, bb) # --------------------------------------------------------------------------- class SpecialMethodTests(unittest.TestCase, Util): def test_repr(self): r = repr(bitarray()) self.assertEqual(r, "bitarray()") self.assertEqual(type(r), str) r = repr(bitarray('10111')) self.assertEqual(r, "bitarray('10111')") self.assertEqual(type(r), str) for a in self.randombitarrays(): self.assertEqual(repr(a), str(a)) b = eval(repr(a)) self.assertFalse(b is a) self.assertEqual(a, b) self.check_obj(b) def test_copy(self): for a in self.randombitarrays(): b = a.copy() self.assertFalse(b is a) self.assertEQUAL(a, b) b = copy.copy(a) self.assertFalse(b is a) self.assertEQUAL(a, b) b = copy.deepcopy(a) self.assertFalse(b is a) self.assertEQUAL(a, b) @skipIf(is_pypy) def test_sizeof(self): a = bitarray() size = sys.getsizeof(a) self.assertEqual(size, a.__sizeof__()) self.assertEqual(type(size), int) self.assertTrue(size < 200) a = bitarray(8000) self.assertTrue(sys.getsizeof(a) > 1000) # -------------------------- Sequence methods tests ------------------------- class SequenceTests(unittest.TestCase, Util): def test_len(self): for n in range(100): a = bitarray(n) self.assertEqual(len(a), n) def test_concat(self): a = bitarray('001') b = a + bitarray('110') self.assertEQUAL(b, bitarray('001110')) b = a + [0, 1, True] self.assertEQUAL(b, bitarray('001011')) b = a + '100' self.assertEQUAL(b, bitarray('001100')) b = a + (1, 0, True) self.assertEQUAL(b, bitarray('001101')) self.assertRaises(ValueError, a.__add__, (0, 1, 2)) self.assertEQUAL(a, bitarray('001')) self.assertRaises(TypeError, a.__add__, 42) self.assertRaises(ValueError, a.__add__, b'1101') for a in self.randombitarrays(): aa = a.copy() for b in self.randombitarrays(): bb = b.copy() c = a + b self.assertEqual(c, bitarray(a.tolist() + b.tolist())) self.assertEqual(c.endian, a.endian) self.check_obj(c) self.assertEQUAL(a, aa) self.assertEQUAL(b, bb) def test_inplace_concat(self): a = bitarray('001') a += bitarray('110') self.assertEqual(a, bitarray('001110')) a += [0, 1, True] self.assertEqual(a, bitarray('001110011')) a += '100' self.assertEqual(a, bitarray('001110011100')) a += (1, 0, True) self.assertEqual(a, bitarray('001110011100101')) a = bitarray('110') self.assertRaises(ValueError, a.__iadd__, [0, 1, 2]) self.assertEqual(a, bitarray('110')) self.assertRaises(TypeError, a.__iadd__, 42) self.assertRaises(ValueError, a.__iadd__, b'101') for a in self.randombitarrays(): for b in self.randombitarrays(): c = bitarray(a) d = c d += b self.assertEqual(d, a + b) self.assertTrue(c is d) self.assertEQUAL(c, d) self.assertEqual(d.endian, a.endian) self.check_obj(d) def test_repeat_explicit(self): for m, s, r in [ ( 0, '', ''), ( 0, '1001111', ''), (-1, '100110', ''), (11, '', ''), ( 1, '110', '110'), ( 2, '01', '0101'), ( 5, '1', '11111'), ]: a = bitarray(s) self.assertEqual(a * m, bitarray(r)) self.assertEqual(m * a, bitarray(r)) c = a.copy() c *= m self.assertEqual(c, bitarray(r)) def test_repeat_wrong_args(self): a = bitarray() self.assertRaises(TypeError, a.__mul__, None) self.assertRaises(TypeError, a.__mul__, 2.0) self.assertRaises(TypeError, a.__imul__, None) self.assertRaises(TypeError, a.__imul__, 3.0) def test_repeat_random(self): for a in self.randombitarrays(): b = a.copy() for m in list(range(-3, 5)) + [randint(5, 200)]: res = bitarray(m * a.to01(), endian=a.endian) self.assertEqual(len(res), len(a) * max(0, m)) self.assertEQUAL(a * m, res) self.assertEQUAL(m * a, res) c = a.copy() c *= m self.assertEQUAL(c, res) self.check_obj(c) self.assertEQUAL(a, b) def test_contains_simple(self): a = bitarray() self.assertFalse(0 in a) self.assertFalse(1 in a) self.assertTrue(bitarray() in a) a.append(1) self.assertTrue(1 in a) self.assertFalse(0 in a) a = bitarray([0]) self.assertTrue(0 in a) self.assertFalse(1 in a) a.append(1) self.assertTrue(0 in a) self.assertTrue(1 in a) def test_contains_errors(self): a = bitarray() self.assertEqual(a.__contains__(1), False) a.append(1) self.assertEqual(a.__contains__(1), True) a = bitarray('0011') self.assertEqual(a.__contains__(bitarray('01')), True) self.assertEqual(a.__contains__(bitarray('10')), False) self.assertRaises(TypeError, a.__contains__, 'asdf') self.assertRaises(ValueError, a.__contains__, 2) self.assertRaises(ValueError, a.__contains__, -1) def test_contains_range(self): for n in range(2, 50): a = zeros(n) self.assertTrue(0 in a) self.assertFalse(1 in a) a[randrange(n)] = 1 self.assertTrue(1 in a) self.assertTrue(0 in a) a.setall(1) self.assertTrue(True in a) self.assertFalse(False in a) a[randrange(n)] = 0 self.assertTrue(True in a) self.assertTrue(False in a) def test_contains_explicit(self): a = bitarray('011010000001') for s, r in [('', True), # every bitarray contains an empty one ('1', True), ('11', True), ('111', False), ('011', True), ('0001', True), ('00011', False)]: c = bitarray(s) in a self.assertTrue(c is r) # -------------------------- Number methods tests --------------------------- class NumberTests(unittest.TestCase, Util): def test_misc(self): for a in self.randombitarrays(): b = ~a c = a & b self.assertEqual(c.any(), False) self.assertEqual(a, a ^ c) d = a ^ b self.assertEqual(d.all(), True) b &= d self.assertEqual(~b, a) def test_bool(self): a = bitarray() self.assertTrue(bool(a) is False) a.append(0) self.assertTrue(bool(a) is True) a.append(1) self.assertTrue(bool(a) is True) def test_size_error(self): a = bitarray('11001') b = bitarray('100111') self.assertRaises(ValueError, lambda: a & b) self.assertRaises(ValueError, lambda: a | b) self.assertRaises(ValueError, lambda: a ^ b) for x in (a.__and__, a.__or__, a.__xor__, a.__iand__, a.__ior__, a.__ixor__): self.assertRaises(ValueError, x, b) def test_endianness_error(self): a = bitarray('11001', 'big') b = bitarray('10011', 'little') self.assertRaises(ValueError, lambda: a & b) self.assertRaises(ValueError, lambda: a | b) self.assertRaises(ValueError, lambda: a ^ b) for x in (a.__and__, a.__or__, a.__xor__, a.__iand__, a.__ior__, a.__ixor__): self.assertRaises(ValueError, x, b) def test_and(self): a = bitarray('11001') b = bitarray('10011') c = a & b self.assertEqual(c, bitarray('10001')) self.check_obj(c) self.assertRaises(TypeError, lambda: a & 1) self.assertRaises(TypeError, lambda: 1 & a) self.assertEqual(a, bitarray('11001')) self.assertEqual(b, bitarray('10011')) def test_or(self): a = bitarray('11001') b = bitarray('10011') c = a | b self.assertEqual(c, bitarray('11011')) self.check_obj(c) self.assertRaises(TypeError, lambda: a | 1) self.assertRaises(TypeError, lambda: 1 | a) self.assertEqual(a, bitarray('11001')) self.assertEqual(b, bitarray('10011')) def test_xor(self): a = bitarray('11001') b = bitarray('10011') c = a ^ b self.assertEQUAL(c, bitarray('01010')) self.check_obj(c) self.assertRaises(TypeError, lambda: a ^ 1) self.assertRaises(TypeError, lambda: 1 ^ a) self.assertEqual(a, bitarray('11001')) self.assertEqual(b, bitarray('10011')) def test_iand(self): a = bitarray('110010110') b = bitarray('100110011') a &= b self.assertEqual(a, bitarray('100010010')) self.assertEqual(b, bitarray('100110011')) self.check_obj(a) self.check_obj(b) try: a &= 1 except TypeError: error = 1 self.assertEqual(error, 1) def test_ior(self): a = bitarray('110010110') b = bitarray('100110011') a |= b self.assertEQUAL(a, bitarray('110110111')) self.assertEQUAL(b, bitarray('100110011')) try: a |= 1 except TypeError: error = 1 self.assertEqual(error, 1) def test_ixor(self): a = bitarray('110010110') b = bitarray('100110011') a ^= b self.assertEQUAL(a, bitarray('010100101')) self.assertEQUAL(b, bitarray('100110011')) try: a ^= 1 except TypeError: error = 1 self.assertEqual(error, 1) def test_bitwise_self(self): for a in self.randombitarrays(): aa = a.copy() self.assertEQUAL(a & a, aa) self.assertEQUAL(a | a, aa) self.assertEQUAL(a ^ a, zeros(len(aa), aa.endian)) self.assertEQUAL(a, aa) def test_bitwise_inplace_self(self): for a in self.randombitarrays(): aa = a.copy() a &= a self.assertEQUAL(a, aa) a |= a self.assertEQUAL(a, aa) a ^= a self.assertEqual(a, zeros(len(aa), aa.endian)) def test_invert(self): a = bitarray('11011') b = ~a self.assertEQUAL(b, bitarray('00100')) self.assertEQUAL(a, bitarray('11011')) self.assertFalse(a is b) self.check_obj(b) for a in self.randombitarrays(): b = bitarray(a) b.invert() for i in range(len(a)): self.assertEqual(b[i], not a[i]) self.check_obj(b) self.assertEQUAL(~a, b) @staticmethod def shift(a, n, direction): if n >= len(a): return zeros(len(a), a.endian) if direction == 'right': return zeros(n, a.endian) + a[:len(a)-n] elif direction == 'left': return a[n:] + zeros(n, a.endian) else: raise ValueError("invalid direction: %s" % direction) def test_lshift(self): a = bitarray('11011') b = a << 2 self.assertEQUAL(b, bitarray('01100')) self.assertRaises(TypeError, lambda: a << 1.2) self.assertRaises(TypeError, a.__lshift__, 1.2) self.assertRaises(ValueError, lambda: a << -1) self.assertRaises(OverflowError, a.__lshift__, 1 << 63) for a in self.randombitarrays(): c = a.copy() n = randrange(len(a) + 4) b = a << n self.assertEqual(len(b), len(a)) self.assertEQUAL(b, self.shift(a, n, 'left')) self.assertEQUAL(a, c) def test_rshift(self): a = bitarray('1101101') b = a >> 1 self.assertEQUAL(b, bitarray('0110110')) self.assertRaises(TypeError, lambda: a >> 1.2) self.assertRaises(TypeError, a.__rshift__, 1.2) self.assertRaises(ValueError, lambda: a >> -1) for a in self.randombitarrays(): c = a.copy() n = randrange(len(a) + 4) b = a >> n self.assertEqual(len(b), len(a)) self.assertEQUAL(b, self.shift(a, n, 'right')) self.assertEQUAL(a, c) def test_ilshift(self): a = bitarray('110110101') a <<= 7 self.assertEQUAL(a, bitarray('010000000')) self.assertRaises(TypeError, a.__ilshift__, 1.2) self.assertRaises(ValueError, a.__ilshift__, -3) for a in self.randombitarrays(): b = a.copy() n = randrange(len(a) + 4) b <<= n self.assertEqual(len(b), len(a)) self.assertEQUAL(b, self.shift(a, n, 'left')) def test_irshift(self): a = bitarray('110110111') a >>= 3 self.assertEQUAL(a, bitarray('000110110')) self.assertRaises(TypeError, a.__irshift__, 1.2) self.assertRaises(ValueError, a.__irshift__, -4) for a in self.randombitarrays(): b = a.copy() n = randrange(len(a) + 4) b >>= n self.assertEqual(len(b), len(a)) self.assertEQUAL(b, self.shift(a, n, 'right')) def check_random(self, n, endian, n_shift, direction): a = urandom_2(n, endian) self.assertEqual(len(a), n) b = a.copy() if direction == 'left': b <<= n_shift else: b >>= n_shift self.assertEQUAL(b, self.shift(a, n_shift, direction)) def test_shift_range(self): for endian in 'little', 'big': for direction in 'left', 'right': for n in range(0, 200): self.check_random(n, endian, 1, direction) self.check_random(n, endian, randint(0, n), direction) for n_shift in range(0, 100): self.check_random(100, endian, n_shift, direction) def test_zero_shift(self): for a in self.randombitarrays(): aa = a.copy() self.assertEQUAL(a << 0, aa) self.assertEQUAL(a >> 0, aa) a <<= 0 self.assertEQUAL(a, aa) a >>= 0 self.assertEQUAL(a, aa) def test_len_or_larger_shift(self): # ensure shifts with len(a) (or larger) result in all zero bitarrays for a in self.randombitarrays(): c = a.copy() z = zeros(len(a), a.endian) n = randint(len(a), len(a) + 10) self.assertEQUAL(a << n, z) self.assertEQUAL(a >> n, z) self.assertEQUAL(a, c) a <<= n self.assertEQUAL(a, z) a = bitarray(c) a >>= n self.assertEQUAL(a, z) def test_shift_example(self): a = bitarray('0010011') self.assertEqual(a << 3, bitarray('0011000')) a >>= 4 self.assertEqual(a, bitarray('0000001')) def test_frozenbitarray(self): a = frozenbitarray('0010011') b = a << 3 self.assertEqual(b, bitarray('0011000')) self.assertEqual(type(b), frozenbitarray) self.assertRaises(TypeError, a.__ilshift__, 4) @skipIf(is_pypy) def test_imported(self): _set_default_endian("big") a = bytearray([0xf0, 0x01, 0x02, 0x0f]) b = bitarray(buffer=a) self.assertFalse(b.readonly) # operate on imported (writable) buffer b[8:24] <<= 3 self.assertEqual(a, bytearray([0xf0, 0x08, 0x10, 0x0f])) b[0:9] |= bitarray("0000 1100 1") self.assertEqual(a, bytearray([0xfc, 0x88, 0x10, 0x0f])) b[23:] ^= bitarray("1 1110 1110") self.assertEqual(a, bytearray([0xfc, 0x88, 0x11, 0xe1])) b[16:] &= bitarray("1111 0000 1111 0000") self.assertEqual(a, bytearray([0xfc, 0x88, 0x10, 0xe0])) b >>= 8 self.assertEqual(a, bytearray([0x00, 0xfc, 0x88, 0x10])) # -------------------------------- .extend() ---------------------------- class ExtendTests(unittest.TestCase, Util): def test_wrong_args(self): a = bitarray() self.assertRaises(TypeError, a.extend) for x in None, 1, True, 24, 1.0: self.assertRaises(TypeError, a.extend, x) self.assertEqual(len(a), 0) self.check_obj(a) def test_bitarray(self): a = bitarray() a.extend(bitarray()) self.assertEqual(a, bitarray()) a.extend(bitarray('110')) self.assertEqual(a, bitarray('110')) a.extend(bitarray('1110')) self.assertEqual(a, bitarray('110 1110')) a = bitarray('00001111', endian='little') a.extend(bitarray('00100111', endian='big')) self.assertEqual(a, bitarray('00001111 00100111')) def test_bitarray_random(self): for a in self.randombitarrays(): sa = a.to01() for b in self.randombitarrays(): bb = b.copy() c = bitarray(a) c.extend(b) self.assertEqual(c.to01(), sa + bb.to01()) self.assertEqual(c.endian, a.endian) self.assertEqual(len(c), len(a) + len(b)) self.check_obj(c) # ensure b hasn't changed self.assertEQUAL(b, bb) def test_list(self): a = bitarray() a.extend([]) self.assertEqual(a, bitarray()) a.extend([0, 1, True, False]) self.assertEqual(a, bitarray('0110')) self.assertRaises(ValueError, a.extend, [0, 1, 2]) self.assertRaises(TypeError, a.extend, [0, 1, 'a']) self.assertEqual(a, bitarray('0110')) for a in self.randomlists(): for b in self.randomlists(): c = bitarray(a) c.extend(b) self.assertEqual(c.tolist(), a + b) self.check_obj(c) def test_range(self): a = bitarray() a.extend(range(2)) self.assertEqual(a, bitarray('01')) self.check_obj(a) def test_sequence(self): lst = [0, 1, 0, 1, 1] for x in [lst, tuple(lst), bytes(lst), bytearray(lst), array.array('b', lst)]: self.assertEqual(len(x), 5) # sequences have len, iterables not a = bitarray() a.extend(x) self.assertEqual(a, bitarray("01011")) self.check_obj(a) lst.append(2) # will raise ValueError for x in [lst, tuple(lst), bytes(lst), bytearray(lst), array.array('b', lst)]: self.assertEqual(len(x), 6) a = bitarray() self.assertRaises(ValueError, a.extend, x) self.assertEqual(len(a), 0) self.check_obj(a) def test_generator_1(self): def gen(lst): for x in lst: yield x a = bitarray('0011') a.extend(gen([0, 1, False, True, 0])) self.assertEqual(a, bitarray('0011 01010')) self.assertRaises(ValueError, a.extend, gen([0, 1, 2])) self.assertRaises(TypeError, a.extend, gen([1, 0, None])) self.assertEqual(a, bitarray('0011 01010')) a = bytearray() a.extend(gen([0, 1, 255])) self.assertEqual(a, b'\x00\x01\xff') self.assertRaises(ValueError, a.extend, gen([0, 1, 256])) self.assertRaises(TypeError, a.extend, gen([1, 0, None])) self.assertEqual(a, b'\x00\x01\xff') for a in self.randomlists(): def foo(): for e in a: yield e b = bitarray() b.extend(foo()) self.assertEqual(b.tolist(), a) self.check_obj(b) def test_generator_2(self): def gen(): for i in range(10): if i == 4: raise KeyError yield i % 2 for a in bitarray(), []: self.assertRaises(KeyError, a.extend, gen()) self.assertEqual(list(a), [0, 1, 0, 1]) def test_iterator_1(self): a = bitarray() a.extend(iter([])) self.assertEqual(a, bitarray()) a.extend(iter([1, 1, 0, True, False])) self.assertEqual(a, bitarray('11010')) self.assertRaises(ValueError, a.extend, iter([1, 1, 0, 0, 2])) self.assertEqual(a, bitarray('11010')) for a in self.randomlists(): for b in self.randomlists(): c = bitarray(a) c.extend(iter(b)) self.assertEqual(c.tolist(), a + b) self.check_obj(c) def test_iterator_2(self): a = bitarray() a.extend(itertools.repeat(True, 23)) self.assertEqual(a, bitarray(23 * '1')) self.check_obj(a) def test_iterator_change(self): a = bitarray(1000) c = 0 for i, x in enumerate(a): if i == 10: a.clear() c += 1 self.assertEqual(c, 11) self.check_obj(a) def test_string01(self): a = bitarray() a.extend(str()) a.extend('') self.assertEqual(a, bitarray()) a.extend('0110111') self.assertEqual(a, bitarray('0110111')) self.assertRaises(ValueError, a.extend, '0011201') # ensure no bits got added after error was raised self.assertEqual(a, bitarray('0110111')) a = bitarray() self.assertRaises(ValueError, a.extend, 100 * '01' + '.') self.assertRaises(ValueError, a.extend, 100 * '01' + '\0') self.assertEqual(a, bitarray()) for a in self.randomlists(): for b in self.randomlists(): c = bitarray(a) c.extend(''.join(str(x) for x in b)) self.assertEqual(c, bitarray(a + b)) self.check_obj(c) def test_string01_whitespace(self): a = bitarray() a.extend(whitespace) self.assertEqual(len(a), 0) a.extend('0 1\n0\r1\t0\v1_') self.assertEqual(a, bitarray('010101')) a += '_ 1\n0\r1\t0\v' self.assertEqual(a, bitarray('010101 1010')) self.check_obj(a) def test_self(self): for s in '', '1', '110', '00110111': a = bitarray(s) a.extend(a) self.assertEqual(a, bitarray(2 * s)) for a in self.randombitarrays(): endian = a.endian s = a.to01() a.extend(a) self.assertEqual(a.to01(), 2 * s) self.assertEqual(a.endian, endian) self.assertEqual(len(a), 2 * len(s)) self.check_obj(a) # ------------------------ Tests for bitarray methods ----------------------- class AllAnyTests(unittest.TestCase, Util): def test_all(self): a = bitarray() self.assertTrue(a.all()) for s, r in ('0', False), ('1', True), ('01', False): self.assertTrue(bitarray(s).all() is r) for a in self.randombitarrays(): self.assertTrue(a.all() is all(a)) N = randint(1000, 2000) a = ones(N) self.assertTrue(a.all()) a[N - 1] = 0 self.assertFalse(a.all()) def test_any(self): a = bitarray() self.assertFalse(a.any()) for s, r in ('0', False), ('1', True), ('01', True): self.assertTrue(bitarray(s).any() is r) for a in self.randombitarrays(): self.assertTrue(a.any() is any(a)) N = randint(1000, 2000) a = zeros(N) self.assertFalse(a.any()) a[N - 1] = 1 self.assertTrue(a.any()) class AppendTests(unittest.TestCase, Util): def test_simple(self): a = bitarray() a.append(True) a.append(False) a.append(False) self.assertEQUAL(a, bitarray('100')) a.append(0) a.append(1) self.assertEQUAL(a, bitarray('10001')) self.check_obj(a) def test_wrong_args(self): a = bitarray("10001") self.assertRaises(ValueError, a.append, 2) self.assertRaises(TypeError, a.append, None) self.assertRaises(TypeError, a.append, '') self.assertEQUAL(a, bitarray('10001')) self.check_obj(a) def test_random(self): a = urandom_2(1000) b = bitarray(endian=a.endian) for i in range(len(a)): b.append(a[i]) self.assertEQUAL(b, a[:i+1]) self.check_obj(b) class BufferInfoTests(unittest.TestCase): def test_values(self): for a, views, res in [ (bitarray(11, endian='little'), 0, (2, 'little', 5, 2, False, False, 0)), (bitarray(endian='big', buffer=b"ABC"), 2, (3, 'big', 0, 0, True, True, 2)), (frozenbitarray("00100", 'big'), 5, (1, 'big', 3, 1, True, False, 5)), ]: d = {} for i in range(views): d[i] = memoryview(a) self.assertEqual(len(d), views) info = a.buffer_info() self.assertEqual(info[1:8], res) self.assertEqual(info.nbytes, a.nbytes) self.assertEqual(info.endian, a.endian) self.assertEqual(info.padbits, a.padbits) self.assertEqual(info.readonly, a.readonly) self.assertEqual(info.exports, views) def test_types(self): a = urandom_2(57) info = a.buffer_info() self.assertTrue(isinstance(info, tuple)) self.assertEqual(type(info), BufferInfo) self.assertEqual(len(info), 8) for i, (item, tp) in enumerate([ (info.address, int), (info.nbytes, int), (info.endian, str), (info.padbits, int), (info.alloc, int), (info.readonly, bool), (info.imported, bool), (info.exports, int), ]): self.assertEqual(type(item), tp) self.assertTrue(info[i] is item) class InsertTests(unittest.TestCase, Util): def test_basic(self): a = bitarray('00111') a.insert(0, 1) self.assertEqual(a, bitarray('1 00111')) a.insert(0, 0) self.assertEqual(a, bitarray('01 00111')) a.insert(2, 1) self.assertEqual(a, bitarray('011 00111')) def test_errors(self): a = bitarray('111100') self.assertRaises(ValueError, a.insert, 0, 2) self.assertRaises(TypeError, a.insert, 0, None) self.assertRaises(TypeError, a.insert) self.assertRaises(TypeError, a.insert, None) self.assertEqual(a, bitarray('111100')) self.check_obj(a) def test_random(self): for a in self.randombitarrays(): aa = a.tolist() for _ in range(20): item = getrandbits(1) pos = randint(-len(a) - 5, len(a) + 5) a.insert(pos, item) aa.insert(pos, item) self.assertEqual(a.tolist(), aa) self.check_obj(a) class FillTests(unittest.TestCase, Util): def test_simple(self): a = bitarray(endian=self.random_endian()) self.assertEqual(a.fill(), 0) self.assertEqual(len(a), 0) a = bitarray('101', self.random_endian()) self.assertEqual(a.fill(), 5) self.assertEqual(a, bitarray('10100000')) self.assertEqual(a.fill(), 0) self.assertEqual(a, bitarray('10100000')) self.check_obj(a) def test_exported(self): a = bitarray('11101') b = bitarray(buffer=a) v = memoryview(a) self.assertEqual(a.fill(), 3) self.assertEqual(a, b) self.assertEqual(v.nbytes, 1) def test_random(self): for a in self.randombitarrays(): b = a.copy() res = b.fill() self.assertTrue(0 <= res < 8) self.assertTrue(b.padbits == 0) self.assertEqual(len(b) % 8, 0) self.assertEqual(b, a + zeros(res)) self.assertEqual(b.endian, a.endian) self.check_obj(b) class InvertTests(unittest.TestCase, Util): def test_simple(self): a = bitarray() a.invert() self.assertEQUAL(a, bitarray()) self.check_obj(a) a = bitarray('11011') a.invert() self.assertEQUAL(a, bitarray('00100')) for i, res in [( 0, '10100'), ( 4, '10101'), ( 2, '10001'), (-1, '10000'), (-5, '00000')]: a.invert(i) self.assertEqual(a.to01(), res) def test_errors(self): a = bitarray(5) self.assertRaises(IndexError, a.invert, 5) self.assertRaises(IndexError, a.invert, -6) self.assertRaises(TypeError, a.invert, "A") self.assertRaises(TypeError, a.invert, 0, 1) self.assertFalse(a.any()) self.check_obj(a) def test_random(self): for a in self.randombitarrays(start=1): n = len(a) b = a.copy() i = randint(-n, n - 1) a[i] = not a[i] b.invert(i) self.assertEQUAL(b, a) self.check_obj(b) def test_all(self): for a in self.randombitarrays(): b = a.copy() a.invert() self.assertEqual(a, bitarray([not v for v in b])) self.assertEqual(a.endian, b.endian) self.check_obj(a) self.assertEQUAL(b, ~a) def test_span(self): for a in self.randombitarrays(): n = len(a) b = a.copy() for _ in range(10): i = randint(0, n) j = randint(i, n) a.invert(slice(i, j)) b[i:j] = ~b[i:j] self.assertEqual(a, b) def test_random_slice(self): for a in self.randombitarrays(): n = len(a) b = a.copy() for _ in range(10): s = self.random_slice(n) a.invert(s) b[s] = ~b[s] self.assertEQUAL(a, b) @skipIf(is_pypy) def test_imported(self): a = bytearray([0, 1, 2, 3, 32, 255]) b = bitarray(buffer=a) # operate on imported (writable) buffer self.assertFalse(b.readonly) b.invert() self.assertEqual(a, bytearray([255, 254, 253, 252, 223, 0])) class SortTests(unittest.TestCase, Util): def test_simple(self): a = bitarray('1101000') a.sort() self.assertEqual(a, bitarray('0000111')) self.check_obj(a) a = bitarray('1101000') a.sort(reverse=True) self.assertEqual(a, bitarray('1110000')) a.sort(reverse=False) self.assertEqual(a, bitarray('0000111')) a.sort(True) self.assertEqual(a, bitarray('1110000')) a.sort(False) self.assertEqual(a, bitarray('0000111')) self.assertRaises(TypeError, a.sort, 'A') def test_random(self): for rev in False, True, 0, 1, 7, -1, -7, None: for a in self.randombitarrays(): lst = a.tolist() if rev is None: lst.sort() a.sort() else: lst.sort(reverse=rev) a.sort(reverse=rev) self.assertEqual(a, bitarray(lst)) self.check_obj(a) @skipIf(is_pypy) def test_imported(self): a = bytearray([0x6f, 0xa5]) b = bitarray(endian="big", buffer=a) self.assertFalse(b.readonly) # operate on imported (writable) buffer b.sort() self.assertEqual(b.count(), 10) self.assertEqual(a, bytearray([0x03, 0xff])) # ----------------------- .pack() .unpack() --------------------------- class PackTests(unittest.TestCase, Util): def test_pack_simple(self): for endian in 'little', 'big': _set_default_endian(endian) a = bitarray() a.pack(bytes()) self.assertEQUAL(a, bitarray()) a.pack(b'\x00') self.assertEQUAL(a, bitarray('0')) a.pack(b'\xff') self.assertEQUAL(a, bitarray('01')) a.pack(b'\x01\x00\x7a') self.assertEQUAL(a, bitarray('01101')) a.pack(bytearray([0x01, 0x00, 0xff, 0xa7])) self.assertEQUAL(a, bitarray('01101 1011')) self.check_obj(a) def test_pack_types(self): a = bitarray() a.pack(b'\0\x01') # bytes self.assertEqual(a, bitarray('01')) a.pack(bytearray([0, 2])) # bytearray self.assertEqual(a, bitarray('01 01')) a.pack(memoryview(b'\x02\0')) # memoryview self.assertEqual(a, bitarray('01 01 10')) a.pack(array.array('B', [0, 255, 192])) self.assertEqual(a, bitarray('01 01 10 011')) self.check_obj(a) def test_pack_bitarray(self): b = bitarray("00000000 00000001 10000000 11111111 00000000") a = bitarray() a.pack(bitarray(b)) self.assertEqual(a, bitarray('01110')) self.check_obj(a) def test_pack_self(self): a = bitarray() self.assertRaisesMessage( BufferError, "cannot resize bitarray that is exporting buffers", a.pack, a) def test_pack_allbytes(self): a = bitarray() a.pack(bytearray(range(256))) self.assertEqual(a.to01(), '0' + 255 * '1') self.check_obj(a) def test_pack_errors(self): a = bitarray() self.assertRaises(TypeError, a.pack, 0) self.assertRaises(TypeError, a.pack, '1') self.assertRaises(TypeError, a.pack, [1, 3]) def test_unpack_simple(self): a = bitarray('01') self.assertEqual(type(a.unpack()), bytes) self.assertEqual(a.unpack(), b'\x00\x01') self.assertEqual(a.unpack(b'A'), b'A\x01') self.assertEqual(a.unpack(b'0', b'1'), b'01') self.assertEqual(a.unpack(one=b'\xff'), b'\x00\xff') self.assertEqual(a.unpack(zero=b'A'), b'A\x01') self.assertEqual(a.unpack(one=b't', zero=b'f'), b'ft') def test_unpack_random(self): for a in self.randombitarrays(): self.assertEqual(a.unpack(b'0', b'1'), a.to01().encode()) # round trip b = bitarray() b.pack(a.unpack()) self.assertEqual(b, a) # round trip with invert b = bitarray() b.pack(a.unpack(b'\x01', b'\x00')) b.invert() self.assertEqual(b, a) # use .extend() to pack b = bitarray() b.extend(a.unpack()) self.assertEqual(b, a) def test_unpack_errors(self): a = bitarray('01') self.assertRaises(TypeError, a.unpack, b'') self.assertRaises(TypeError, a.unpack, b'0', b'') self.assertRaises(TypeError, a.unpack, b'a', zero=b'b') self.assertRaises(TypeError, a.unpack, foo=b'b') self.assertRaises(TypeError, a.unpack, one=b'aa', zero=b'b') self.assertRaises(TypeError, a.unpack, '0') self.assertRaises(TypeError, a.unpack, one='a') self.assertRaises(TypeError, a.unpack, b'0', '1') class PopTests(unittest.TestCase, Util): def test_basic(self): a = bitarray('01') self.assertRaisesMessage(IndexError, "pop index out of range", a.pop, 2) self.assertEqual(a.pop(), True) self.assertEqual(a.pop(), False) self.assertEqual(a, bitarray()) # pop from empty bitarray self.assertRaisesMessage(IndexError, "pop from empty bitarray", a.pop) def test_simple(self): for x, n, r, y in [('1', 0, 1, ''), ('0', -1, 0, ''), ('0011100', 3, 1, '001100')]: a = bitarray(x) self.assertTrue(a.pop(n) is r) self.assertEqual(a, bitarray(y)) self.check_obj(a) def test_reverse(self): for a in self.randombitarrays(): c = a.copy() b = bitarray() while a: b.append(a.pop()) self.assertEqual(a, bitarray()) b.reverse() self.assertEqual(b, c) def test_random_1(self): for a in self.randombitarrays(): self.assertRaises(IndexError, a.pop, len(a)) self.assertRaises(IndexError, a.pop, -len(a) - 1) if len(a) == 0: continue aa = a.tolist() enda = a.endian self.assertEqual(a.pop(), aa[-1]) self.check_obj(a) self.assertEqual(a.endian, enda) def test_random_2(self): for a in self.randombitarrays(start=1): n = randrange(-len(a), len(a)) aa = a.tolist() x = a.pop(n) self.assertEqual(x, aa[n]) self.assertEqual(type(x), int) y = aa.pop(n) self.assertEqual(a, bitarray(aa)) self.assertEqual(x, y) self.check_obj(a) class ReverseTests(unittest.TestCase, Util): def test_explicit(self): for x, y in [('', ''), ('1', '1'), ('10', '01'), ('001', '100'), ('1110', '0111'), ('11100', '00111'), ('011000', '000110'), ('1101100', '0011011'), ('11110000', '00001111'), ('11111000011', '11000011111')]: a = bitarray(x) a.reverse() self.assertEQUAL(a, bitarray(y)) self.check_obj(a) def test_argument(self): a = bitarray(3) self.assertRaises(TypeError, a.reverse, 42) def test_random(self): for a in self.randombitarrays(): b = a.copy() a.reverse() self.assertEqual(a.to01(), b.to01()[::-1]) self.assertEQUAL(a, bitarray(reversed(b), endian=a.endian)) self.assertEQUAL(a, b[::-1]) self.check_obj(a) @skipIf(is_pypy) def test_imported(self): a = bytearray([0, 1, 2, 3, 255]) b = bitarray(buffer=a) # reversing an imported (writable) buffer self.assertFalse(b.readonly) b.reverse() self.assertEqual(a, bytearray([255, 192, 64, 128, 0])) class RemoveTests(unittest.TestCase, Util): def test_explicit(self): a = bitarray('1010110') for val, res in [(False, '110110'), (True, '10110'), (1, '0110'), (1, '010'), (0, '10'), (0, '1'), (1, '')]: a.remove(val) self.assertEQUAL(a, bitarray(res)) self.check_obj(a) def test_errors(self): a = bitarray('0010011') a.remove(1) self.assertEQUAL(a, bitarray('000011')) self.assertRaises(TypeError, a.remove, 'A') self.assertRaises(ValueError, a.remove, 21) self.assertEQUAL(a, bitarray('000011')) a = bitarray() for i in (True, False, 1, 0): self.assertRaises(ValueError, a.remove, i) a = zeros(21) self.assertRaises(ValueError, a.remove, 1) a.setall(1) self.assertRaises(ValueError, a.remove, 0) def test_random(self): for a in self.randombitarrays(): b = a.tolist() v = getrandbits(1) if v not in a: continue a.remove(v) b.remove(v) self.assertEqual(a.tolist(), b) self.check_obj(a) class SetAllTests(unittest.TestCase, Util): def test_explicit(self): a = urandom_2(5) a.setall(True) self.assertRaises(ValueError, a.setall, -1) self.assertRaises(TypeError, a.setall, None) self.assertEqual(a.to01(), '11111') a.setall(0) self.assertEqual(a.to01(), '00000') self.check_obj(a) def test_empty(self): a = bitarray() for v in 0, 1: a.setall(v) self.assertEqual(len(a), 0) self.check_obj(a) def test_random(self): for a in self.randombitarrays(): endian = a.endian val = getrandbits(1) a.setall(val) self.assertEqual(a.to01(), len(a) * str(val)) self.assertEqual(a.endian, endian) self.check_obj(a) @skipIf(is_pypy) def test_imported(self): a = bytearray([0, 1, 2, 3]) b = bitarray(buffer=a) self.assertFalse(b.readonly) # operate on imported (writable) buffer b.setall(1) self.assertEqual(a, bytearray([0xff, 0xff, 0xff, 0xff])) class To01Tests(unittest.TestCase, Util): def test_no_grouping(self): a = bitarray() self.assertEqual(a.to01(1), "") a = bitarray("100011110") for s in [a.to01(), a.to01(0), a.to01(0, "X"), a.to01(1, ""), a.to01(group=0), a.to01(sep="X"), a.to01(group=2, sep="")]: self.assertEqual(type(s), str) self.assertEqual(len(s), len(a)) self.assertEqual(s, "100011110") def test_examples(self): a = bitarray("0000 1111 0011 0101") self.assertEqual(a.to01(1, "-"), "0-0-0-0-1-1-1-1-0-0-1-1-0-1-0-1") self.assertEqual(a.to01(2, sep='+'), "00+00+11+11+00+11+01+01") self.assertEqual(a.to01(3), "000 011 110 011 010 1") self.assertEqual(a.to01(group=4, sep="_"), "0000_1111_0011_0101") self.assertEqual(a.to01(group=5, sep='.'), "00001.11100.11010.1") self.assertEqual(a.to01(group=6), "000011 110011 0101") self.assertEqual(a.to01(7), "0000111 1001101 01") self.assertEqual(a.to01(8, ", "), "00001111, 00110101") self.assertEqual(a.to01(9, "ABC"), "000011110ABC0110101") def test_wrong_args(self): a = bitarray("1101100") self.assertRaises(TypeError, a.to01, None) self.assertRaises(ValueError, a.to01, -1) self.assertRaises(TypeError, a.to01, foo=4) self.assertRaises(TypeError, a.to01, 2, None) self.assertRaises(TypeError, a.to01, 4, b"_") def test_sep(self): for a in self.randombitarrays(): sep = "".join(chr(randint(32, 126)) for _ in range(randrange(10))) self.assertEqual(a.to01(1, sep), sep.join(str(v) for v in a)) a = bitarray("11100111") # use unicode character black star as separator s = a.to01(3, "\u2605") self.assertEqual(s, "111\u2605001\u260511") def test_random(self): for a in self.randombitarrays(): n = len(a) group = randrange(10) nsep = randrange(6) s = a.to01(group, nsep * " ") self.assertEqual(a, bitarray(s)) nspace = s.count(" ") self.assertEqual(len(s), n + nspace) self.assertEqual(nspace, nsep * ((n - 1) // group) if group and n else 0) class ByteReverseTests(unittest.TestCase, Util): def test_explicit_all(self): for x, y in [('', ''), ('11101101', '10110111'), ('00000001', '10000000'), ('11011111 00100000 00011111', '11111011 00000100 11111000')]: a = bitarray(x) a.bytereverse() self.assertEqual(a, bitarray(y)) def test_explicit_range(self): a = bitarray('11100000 00000011 00111111 11111000') a.bytereverse(0, 1) # reverse byte 0 self.assertEqual(a, bitarray('00000111 00000011 00111111 11111000')) a.bytereverse(1, -1) # reverse bytes 1 and 2 self.assertEqual(a, bitarray('00000111 11000000 11111100 11111000')) a.bytereverse(2) # reverse bytes 2 till end of buffer self.assertEqual(a, bitarray('00000111 11000000 00111111 00011111')) a.bytereverse(-1) # reverse last byte self.assertEqual(a, bitarray('00000111 11000000 00111111 11111000')) a.bytereverse(3, 1) # start > stop (nothing to reverse) self.assertEqual(a, bitarray('00000111 11000000 00111111 11111000')) a.bytereverse(0, 4) # reverse all bytes self.assertEqual(a, bitarray('11100000 00000011 11111100 00011111')) a.bytereverse(-2) # last two bytes self.assertEqual(a, bitarray('11100000 00000011 00111111 11111000')) self.assertRaises(IndexError, a.bytereverse, -5) self.assertRaises(IndexError, a.bytereverse, 0, -5) self.assertRaises(IndexError, a.bytereverse, 5) self.assertRaises(IndexError, a.bytereverse, 0, 5) def test_byte(self): for i in range(256): a = bitarray(bytearray([i])) self.assertEqual(len(a), 8) b = a.copy() b.bytereverse() self.assertEqual(b, a[::-1]) a.reverse() self.assertEqual(b, a) self.check_obj(b) def test_consecutive(self): for a in self.randombitarrays(): b = a.copy() # two consecutive calls to .bytereverse() leave the bitarray # unchanged (even when the length is not a multiple of 8). a.bytereverse() a.bytereverse() self.assertEQUAL(a, b) def test_random(self): t = bitarray(bytearray(range(256)), self.random_endian()) t.bytereverse() table = t.tobytes() # translation table self.assertEqual(table[:9], b'\x00\x80\x40\xc0\x20\xa0\x60\xe0\x10') for n in range(100): a = urandom_2(8 * n) i = randint(0, n) # start j = randint(0, n) # stop b = a.copy() memoryview(b)[i:j] = b.tobytes()[i:j].translate(table) a.bytereverse(i, j) self.assertEQUAL(a, b) self.check_obj(a) def test_endian(self): for n in range(20): a = urandom_2(8 * n) b = a.copy() a.bytereverse() a = bitarray(a, self.opposite_endian(a.endian)) self.assertEqual(a.tobytes(), b.tobytes()) @skipIf(is_pypy) def test_imported(self): a = bytearray([0, 1, 2, 3, 255]) b = bitarray(buffer=a) # operate on imported (writable) buffer self.assertFalse(b.readonly) b.bytereverse() self.assertEqual(a, bytearray([0, 128, 64, 192, 255])) class ToListTests(unittest.TestCase, Util): def test_empty(self): a = bitarray() self.assertEqual(a.tolist(), []) def test_simple(self): a = bitarray('110') lst = a.tolist() self.assertEqual(type(lst), list) self.assertEqual(lst, [1, 1, 0]) for item in lst: self.assertEqual(type(item), int) def test_random(self): for a in self.randombitarrays(): res = a.tolist() self.assertEqual(res, list(a)) self.assertEqual(res, [int(v) for v in a.to01()]) class ClearTests(unittest.TestCase, Util): def test_simple(self): a = bitarray("1110000001001000011111") a.clear() self.assertEqual(len(a), 0) def test_random(self): for a in self.randombitarrays(): endian = a.endian a.clear() self.assertFalse(a) self.assertEqual(len(a), 0) self.assertEqual(a.endian, endian) self.check_obj(a) # -------------------------------- .count() --------------------------------- class CountTests(unittest.TestCase, Util): def test_basic(self): a = bitarray('10011') self.assertEqual(a.count(), 3) self.assertEqual(a.count(True), 3) self.assertEqual(a.count(False), 2) self.assertEqual(a.count(1), 3) self.assertEqual(a.count(0), 2) self.assertEqual(a.count(0, 5, 0, -1), 2) self.assertEqual(a.count(bitarray('0')), 2) self.assertEqual(a.count(bitarray('00')), 1) self.assertRaises(ValueError, a.count, 2) self.assertRaises(ValueError, a.count, 1, 0, 5, 0) self.assertRaises(TypeError, a.count, '') self.assertRaises(TypeError, a.count, 'A') self.assertRaises(TypeError, a.count, 1, 2.0) self.assertRaises(TypeError, a.count, 1, 2, 4.0) self.assertRaises(TypeError, a.count, 0, 'A') self.assertRaises(TypeError, a.count, 0, 0, 'A') def test_sub(self): a = bitarray('10011000 1110000') self.assertEqual(len(a), 15) self.assertEqual(a.count(bitarray('')), 16) self.assertEqual(a.count(bitarray('00')), 4) self.assertEqual(a.count(bitarray('11')), 2) self.assertEqual(a.count(bitarray('000')), 2) self.assertEqual(a.count(bitarray('000'), 8), 1) self.assertEqual(a.count(bitarray('000'), -3), 1) self.assertEqual(a.count(bitarray('000'), -4), 1) self.assertEqual(a.count(bitarray('000'), 4, -1), 2) self.assertEqual(a.count(bitarray('00'), -3), 1) self.assertEqual(a.count(bitarray('00'), -4), 2) self.assertRaises(ValueError, a.count, bitarray(''), 0, 15, 2) self.assertRaises(ValueError, a.count, bitarray('11'), 0, 15, 2) self.assertRaises(ValueError, a.count, bitarray('11'), 15, 0, -1) def test_random_sub(self): for _ in range(1000): n = randrange(100) a = urandom_2(n) s = a.to01() b = urandom_2(randrange(8)) t = b.to01() i = randint(-n - 10, n + 10) j = randint(-n - 10, n + 10) self.assertEqual(a.count(b, i, j), s.count(t, i, j)) def test_byte(self): for i in range(256): a = bitarray(bytearray([i])) self.assertEqual(len(a), 8) self.assertEqual(a.count(), bin(i)[2:].count('1')) def test_whole_range(self): for n in range(500): a = urandom_2(n) s = a.to01() for v in 0, 1: ref = s.count(str(v)) self.assertEqual(a.count(v), ref) self.assertEqual(a.count(v, n, -n - 1, -1), ref) def test_sparse(self): n = 65536 a = bitarray(n) indices = set(randrange(n) for _ in range(256)) a[list(indices)] = 1 self.assertEqual(a.count(1), len(indices)) self.assertEqual(a.count(0), n - len(indices)) for _ in range(100): i = randrange(n) j = randrange(i, n) cnt = sum(1 for k in indices if i <= k < j) self.assertEqual(a.count(1, i, j), cnt) self.assertEqual(a.count(0, i, j), j - i - cnt) def test_step1(self): n = 300 a = urandom_2(n) s = a.to01() for _ in range(1000): i = randrange(n) j = randrange(i, n) t = s[i:j] c0 = t.count('0') c1 = t.count('1') self.assertEqual(c0 + c1, j - i) self.assertEqual(a.count(0, i, j), c0) self.assertEqual(a.count(1, i, j), c1) b = a[i:j] self.assertEqual(b.count(0), c0) self.assertEqual(b.count(1), c1) def test_explicit(self): a = bitarray('01001100 01110011 01') self.assertEqual(a.count(), 9) self.assertEqual(a.count(0, 12), 3) self.assertEqual(a.count(1, 1, 18, 2), 6) self.assertEqual(a.count(1, 0, 18, 3), 2) self.assertEqual(a.count(1, 15, 4, -3), 2) self.assertEqual(a.count(1, -5), 3) self.assertEqual(a.count(1, 2, 17), 7) self.assertEqual(a.count(1, 6, 11), 2) self.assertEqual(a.count(0, 7, -3), 4) self.assertEqual(a.count(1, 1, -1), 8) self.assertEqual(a.count(1, 17, 14), 0) def test_random_slice(self): for n in range(500): a = urandom_2(n) v = randrange(2) s = self.random_slice(n) self.assertEqual(a.count(v, s.start, s.stop, s.step), a[s].count(v)) def test_offest_buffer(self): # this tests if words are aligned in popcnt_words() N = 1 << 16 for i in range(20): a = urandom_2(N, 'little') b = bitarray(buffer=memoryview(a)[i:], endian='little') self.assertEqual(b.count(), a.count(1, 8 * i)) # -------------------------- .find() and .index() --------------------------- class IndexTests(unittest.TestCase, Util): def test_errors(self): a = bitarray() for i in True, False, 1, 0: self.assertEqual(a.find(i), -1) self.assertRaises(ValueError, a.index, i) a = zeros(100) self.assertRaises(TypeError, a.find) self.assertRaises(TypeError, a.find, 1, 'a') self.assertRaises(TypeError, a.find, 1, 0, 'a') self.assertRaises(TypeError, a.find, 1, 0, 100, 'a') self.assertEqual(a.find(1, right=True), -1) self.assertRaises(ValueError, a.index, True) self.assertRaises(TypeError, a.index) self.assertRaises(TypeError, a.index, 1, 'a') self.assertRaises(TypeError, a.index, 1, 0, 'a') self.assertRaises(TypeError, a.index, 1, 0, 100, 'a') def test_explicit(self): a = bitarray('10011000 101000') for sub, start, stop, right, res in [ ('', 7, 13, 0, 7), ('', 15, 99, 0, -1), ('0', 0, 99, 0, 1), ('1', 8, 12, 1, 10), ('1', -99, -4, 1, 8), ('11', 0, 99, 0, 3), ('11', 4, 99, 0, -1), ('111', 0, 99, 1, -1), ('101', 0, 99, 1, 8), (a.to01(), 0, 99, 0, 0), ]: b = bitarray(sub, self.random_endian()) self.assertEqual(a.find(b, start, stop, right), res) if res >= 0: self.assertEqual(a.index(b, start, stop, right), res) else: self.assertRaises(ValueError, a.index, start, stop, right) if len(b) == 1: self.assertEqual(a.find(b[0], start, stop, right), res) @staticmethod def find_empty(n, start=0, stop=sys.maxsize, right=0): """ Return first (or rightmost (right=1)) index of an empty sequence inside a sequence S of length n with S[start:stop], or -1 when no empty sequence is found. """ if start > n: return -1 s = slice(start, stop, 1) start, stop, stride = s.indices(n) if start > stop: return -1 return stop if right else start def test_find_empty(self): # test staticmethod .find_empty() against Python builtins for x in bytearray([0]), b"\0", "A": empty = 0 * x # empty sequence self.assertEqual(len(empty), 0) for _ in range(50): n = randint(0, 5) z = n * x # sequence of length n self.assertEqual(len(z), n) self.assertTrue(type(x) == type(empty) == type(z)) self.assertEqual(z.find(empty), self.find_empty(n)) self.assertEqual(z.rfind(empty), self.find_empty(n, right=1)) start = randint(-5, 5) self.assertEqual(z.find(empty, start), self.find_empty(n, start)) self.assertEqual(z.rfind(empty, start), self.find_empty(n, start, right=1)) stop = randint(-5, 5) self.assertEqual(z.find(empty, start, stop), self.find_empty(n, start, stop)) self.assertEqual(z.rfind(empty, start, stop), self.find_empty(n, start, stop, 1)) def test_empty(self): # now that we have the established .find_empty(), we use it to # test .find() with an empty bitarray empty = bitarray() for _ in range(50): n = randint(0, 5) z = bitarray(n) right = getrandbits(1) self.assertEqual(z.find(empty, right=right), self.find_empty(n, right=right)) start = randint(-5, 5) self.assertEqual(z.find(empty, start, right=right), self.find_empty(n, start, right=right)) stop = randint(-5, 5) self.assertEqual(z.find(empty, start, stop, right), self.find_empty(n, start, stop, right)) def test_range_explicit(self): n = 150 a = bitarray(n) for m in range(n): a.setall(0) self.assertRaises(ValueError, a.index, 1) self.assertEqual(a.find(1), -1) a[m] = 1 self.assertEqual(a.index(1), m) self.assertEqual(a.find(1), m) a.setall(1) self.assertRaises(ValueError, a.index, 0) self.assertEqual(a.find(0), -1) a[m] = 0 self.assertEqual(a.index(0), m) self.assertEqual(a.find(0), m) def test_random_start_stop(self): for _ in range(500): n = randrange(1, 200) a = zeros(n) plst = sorted(randrange(n) for _ in range(1, 10)) a[plst] = 1 # test without start and stop self.assertEqual(a.find(1, right=0), plst[0]) self.assertEqual(a.find(1, right=1), plst[-1]) start = randint(0, n) stop = randint(0, n) plst2 = [i for i in plst if start <= i < stop] if plst2: self.assertEqual(a.find(1, start, stop, 0), plst2[0]) self.assertEqual(a.find(1, start, stop, 1), plst2[-1]) else: right = getrandbits(1) self.assertEqual(a.find(1, start, stop, right), -1) def test_random_sub(self): # test finding sub_bitarray for _ in range(200): n = randrange(1, 100) a = urandom_2(n) s = a.to01() self.assertEqual(a.find(a), 0) n = len(a) b = bitarray(randrange(10), self.random_endian()) t = b.to01() self.assertEqual(a.find(b), s.find(t)) i = randint(-n - 5, n + 5) j = randint(-n - 5, n + 5) ref_l = s.find(t, i, j) ref_r = s.rfind(t, i, j) self.assertEqual(ref_l == -1, ref_r == -1) self.assertEqual(a.find(b, i, j, 0), ref_l) self.assertEqual(a.find(b, i, j, 1), ref_r) if len(b) == 1: # test finding int v = b[0] self.assertTrue(v in range(2)) self.assertEqual(a.find(v, i, j, 0), ref_l) self.assertEqual(a.find(v, i, j, 1), ref_r) # ----------------------------- .search() ----------------------------------- class SearchTests(unittest.TestCase, Util): def test_no_itersearch(self): a = bitarray() # removed in bitarray 3.0 self.assertRaises(AttributeError, a.__getattribute__, 'itersearch') def test_simple(self): a = bitarray() for s in 0, 1, False, True, bitarray('0'), bitarray('1'): self.assertEqual(list(a.search(s)), []) a = bitarray('00100') for s in 1, True, bitarray('1'), bitarray('10'): self.assertEqual(list(a.search(s)), [2]) a = 100 * bitarray('1') self.assertEqual(list(a.search(0)), []) self.assertEqual(list(a.search(1)), list(range(100))) self.assertRaises(TypeError, a.search, '010') def test_search_next(self): a = bitarray('10011') self.assertRaises(TypeError, a.search, '') it = a.search(1) self.assertIsType(it, 'searchiterator') self.assertEqual(next(it), 0) self.assertEqual(next(it), 3) self.assertEqual(next(it), 4) self.assertRaises(StopIteration, next, it) x = bitarray('11') it = a.search(x) del a, x self.assertEqual(next(it), 3) def test_search_empty(self): a = bitarray('10011') empty = bitarray() self.assertEqual(list(a.search(empty)), [0, 1, 2, 3, 4, 5]) for start, stop, right, res in [ (-9, 9, 0, [0, 1, 2, 3, 4, 5]), ( 1, 4, 0, [1, 2, 3, 4]), (-3, -2, 0, [2, 3]), (-1, 0, 1, []), ( 3, 3, 0, [3]), ( 4, 3, 0, []), ( 2, 2, 1, [2]), ( 2, 1, 1, []), ]: self.assertEqual(list(a.search(empty, start, stop, right)), res) def test_explicit_1(self): a = bitarray('10011', self.random_endian()) for s, res in [('0', [1, 2]), ('1', [0, 3, 4]), ('01', [2]), ('11', [3]), ('000', []), ('1001', [0]), ('011', [2]), ('0011', [1]), ('10011', [0]), ('100111', [])]: b = bitarray(s, self.random_endian()) self.assertEqual(list(a.search(b)), res) def test_explicit_2(self): a = bitarray('10010101 11001111 1001011') for s, res in [('011', [6, 11, 20]), ('111', [7, 12, 13, 14]), # note the overlap ('1011', [5, 19]), ('100', [0, 9, 16])]: b = bitarray(s) self.assertEqual(list(a.search(b)), res) def test_bool_random(self): for a in self.randombitarrays(): b = a.copy() b.setall(0) b[list(a.search(1))] = 1 self.assertEQUAL(b, a) b.setall(1) b[list(a.search(0))] = 0 self.assertEQUAL(b, a) s = set(a.search(0)) | set(a.search(1)) self.assertEqual(len(s), len(a)) def test_random(self): for a in self.randombitarrays(): if a: # search for a in itself self.assertEqual(list(a.search(a)), [0]) self.assertEqual(list(a.search(a, right=1)), [0]) for sub in '0', '1', '01', '01', '11', '101', '1101', '01100': b = bitarray(sub, self.random_endian()) plst = [i for i in range(len(a)) if a[i:i + len(b)] == b] self.assertEqual(list(a.search(b)), plst) for p in a.search(b): self.assertEqual(a[p:p + len(b)], b) self.assertEqual(list(a.search(b)), plst) for p in a.search(b, right=1): self.assertEqual(a[p:p + len(b)], b) self.assertEqual(list(a.search(b, right=1)), plst[::-1]) def test_search_random(self): for _ in range(500): n = randrange(1, 50) a = urandom_2(n) b = urandom_2(randrange(10)) i = randrange(n) j = randrange(n) aa = a[i:j] # list of positions if b: plst = [i + k for k in range(len(aa)) if aa[k:k + len(b)] == b] else: # empty sub-bitarray plst = list(range(i, j + 1)) self.assertEqual(sorted(plst), plst) self.assertEqual(list(a.search(b, i, j)), plst) if len(b) == 1: # test sub-bitarray being int self.assertEqual(list(a.search(b[0], i, j)), plst) if plst: # test first and last using .find() self.assertEqual(a.find(b, i, j, 0), plst[0]) self.assertEqual(a.find(b, i, j, 1), plst[-1]) plst.reverse() self.assertEqual(list(a.search(b, i, j, 1)), plst) if len(b) == 1: # test sub-bitarray being int self.assertEqual(list(a.search(b[0], i, j, 1)), plst) # test contains self.assertEqual(b in aa, bool(plst) if b else True) if not plst: # test .find() not found right = getrandbits(1) self.assertEqual(a.find(b, i, j, right), -1) def test_iterator_change(self): for right in 0, 1: a = zeros(100) b = zeros(10) c = 0 for i, x in enumerate(a.search(b, right=right)): if i == 40: a.clear() c += 1 self.assertEqual(c, 41) def test_iterator_change_sub(self): for right in 0, 1: a = zeros(100) b = zeros(0) c = 0 for i, x in enumerate(a.search(b, right=right)): if i == 20: b.append(1) c += 1 self.assertEqual(c, 21) # ------------------------ .frombytes() and .tobytes() ---------------------- class BytesTests(unittest.TestCase, Util): def test_frombytes_simple(self): a = bitarray("110", "big") a.frombytes(b'A') self.assertEqual(a, bitarray('110 01000001')) a.frombytes(b'BC') self.assertEQUAL(a, bitarray('110 01000001 01000010 01000011', endian='big')) def test_frombytes_types(self): a = bitarray(endian='big') a.frombytes(b'A') # bytes self.assertEqual(a, bitarray('01000001')) a.frombytes(bytearray([254])) # bytearray self.assertEqual(a, bitarray('01000001 11111110')) a.frombytes(memoryview(b'C')) # memoryview self.assertEqual(a, bitarray('01000001 11111110 01000011')) a.clear() arr = array.array('H', [0x010f, 0xff]) # array self.assertEqual(arr.itemsize, 2) if sys.byteorder == "big": arr.byteswap() a.frombytes(arr) self.assertEqual(a, bitarray("00001111 00000001 11111111 00000000")) self.check_obj(a) for x in '', 0, 1, False, True, None, []: self.assertRaises(TypeError, a.frombytes, x) def test_frombytes_bitarray(self): # endianness doesn't matter here as we're writting the buffer # from bytes, and then extend from buffer bytes again b = bitarray(0, self.random_endian()) b.frombytes(b'ABC') a = bitarray(0, 'big') a.frombytes(bitarray(b)) # get bytes from bitarray buffer self.assertEqual(a.endian, 'big') self.assertEqual(a.tobytes(), b'ABC') self.check_obj(a) def test_frombytes_self(self): a = bitarray() self.assertRaisesMessage( BufferError, "cannot resize bitarray that is exporting buffers", a.frombytes, a) def test_frombytes_empty(self): for a in self.randombitarrays(): b = a.copy() a.frombytes(b'') a.frombytes(bytearray()) self.assertEQUAL(a, b) self.assertFalse(a is b) self.check_obj(a) def test_frombytes_errors(self): a = bitarray() self.assertRaises(TypeError, a.frombytes) self.assertRaises(TypeError, a.frombytes, b'', b'') self.assertRaises(TypeError, a.frombytes, 1) self.check_obj(a) def test_frombytes_random(self): for n in range(20): s = os.urandom(n) b = bitarray(0, self.random_endian()) b.frombytes(s) self.assertEqual(len(b), 8 * n) for a in self.randombitarrays(): c = bitarray(a, b.endian) c.frombytes(s) self.assertEqual(len(c), len(a) + 8 * n) self.assertEqual(c, a + b) self.check_obj(c) def test_tobytes_empty(self): a = bitarray() self.assertEqual(a.tobytes(), b'') def test_tobytes_endian(self): a = bitarray(endian=self.random_endian()) a.frombytes(b'foo') self.assertEqual(a.tobytes(), b'foo') for n in range(20): s = os.urandom(n) a = bitarray(s, endian=self.random_endian()) self.assertEqual(len(a), 8 * n) self.assertEqual(a.tobytes(), s) self.check_obj(a) def test_tobytes_explicit_ones(self): for n, s in [(1, b'\x01'), (2, b'\x03'), (3, b'\x07'), (4, b'\x0f'), (5, b'\x1f'), (6, b'\x3f'), (7, b'\x7f'), (8, b'\xff'), (12, b'\xff\x0f'), (15, b'\xff\x7f'), (16, b'\xff\xff'), (17, b'\xff\xff\x01'), (24, b'\xff\xff\xff')]: a = ones(n, endian='little') self.assertEqual(a.tobytes(), s) # -------------------------- test attributes -------------------------------- class DescriptorTests(unittest.TestCase, Util): def test_endian(self): for endian in "little", "big": a = bitarray('1101100', endian) self.assertEqual(a.endian, endian) self.assertEqual(type(a.endian), str) def test_nbytes_padbits(self): for n in range(50): a = bitarray(n) # .nbytes self.assertEqual(a.nbytes, bits2bytes(n)) self.assertEqual(type(a.nbytes), int) # .padbits self.assertEqual(a.padbits, 8 * a.nbytes - n) self.assertTrue(0 <= a.padbits < 8) self.assertEqual(type(a.padbits), int) def test_readonly(self): a = bitarray('110') self.assertFalse(a.readonly) self.assertEqual(type(a.readonly), bool) b = frozenbitarray(a) self.assertTrue(b.readonly) self.assertEqual(type(b.readonly), bool) # --------------------------------------------------------------------------- class FileTests(unittest.TestCase, Util): def setUp(self): self.tmpdir = tempfile.mkdtemp() self.tmpfname = os.path.join(self.tmpdir, 'testfile') def tearDown(self): shutil.rmtree(self.tmpdir) def read_file(self): with open(self.tmpfname, 'rb') as fi: return fi.read() def assertFileSize(self, size): self.assertEqual(os.path.getsize(self.tmpfname), size) def test_pickle(self): d1 = {i: a for i, a in enumerate(self.randombitarrays())} with open(self.tmpfname, 'wb') as fo: pickle.dump(d1, fo) with open(self.tmpfname, 'rb') as fi: d2 = pickle.load(fi) for key in d1.keys(): self.assertEQUAL(d1[key], d2[key]) # pyodide has no dbm module @skipIf(pyodide) def test_shelve(self): d1 = shelve.open(self.tmpfname) stored = [] for i, a in enumerate(self.randombitarrays()): key = str(i) d1[key] = a stored.append((key, a)) d1.close() d2 = shelve.open(self.tmpfname) for k, v in stored: self.assertEQUAL(d2[k], v) d2.close() def test_fromfile_empty(self): with open(self.tmpfname, 'wb') as fo: pass self.assertFileSize(0) a = bitarray() with open(self.tmpfname, 'rb') as fi: a.fromfile(fi) self.assertEqual(a, bitarray()) self.check_obj(a) def test_fromfile_Foo(self): with open(self.tmpfname, 'wb') as fo: fo.write(b'Foo') self.assertFileSize(3) a = bitarray(endian='big') with open(self.tmpfname, 'rb') as fi: a.fromfile(fi) self.assertEqual(a, bitarray('01000110 01101111 01101111')) a = bitarray(endian='little') with open(self.tmpfname, 'rb') as fi: a.fromfile(fi) self.assertEqual(a, bitarray('01100010 11110110 11110110')) def test_fromfile_wrong_args(self): a = bitarray() self.assertRaises(TypeError, a.fromfile) self.assertRaises(AttributeError, a.fromfile, 42) self.assertRaises(AttributeError, a.fromfile, 'bar') with open(self.tmpfname, 'wb') as fo: fo.write(b"ABC") with open(self.tmpfname, 'rb') as fi: self.assertRaises(TypeError, a.fromfile, fi, None) def test_fromfile_erros(self): with open(self.tmpfname, 'wb') as fo: fo.write(b'0123456789') self.assertFileSize(10) a = bitarray() with open(self.tmpfname, 'wb') as fi: self.assertRaisesMessage(UnsupportedOperation, "read", a.fromfile, fi) with open(self.tmpfname, 'r') as fi: self.assertRaisesMessage(TypeError, ".read() did not return " "'bytes', got 'str'", a.fromfile, fi) def test_frombytes_invalid_reader(self): class Reader: def read(self, n): return 12 a = bitarray() f = Reader() self.assertRaisesMessage(TypeError, ".read() did not return " "'bytes', got 'int'", a.fromfile, f) def test_from_large_files(self): for N in range(65534, 65538): data = os.urandom(N) with open(self.tmpfname, 'wb') as fo: fo.write(data) a = bitarray() with open(self.tmpfname, 'rb') as fi: a.fromfile(fi) self.assertEqual(len(a), 8 * N) self.assertEqual(a.nbytes, N) self.assertEqual(a.tobytes(), data) self.check_obj(a) def test_fromfile_extend_existing(self): with open(self.tmpfname, 'wb') as fo: fo.write(b'Foo') foo_le = '01100010 11110110 11110110' for n in range(20): a = bitarray(n * '1', endian='little') with open(self.tmpfname, 'rb') as fi: a.fromfile(fi) self.assertEqual(a, bitarray(n * '1' + foo_le)) self.check_obj(a) def test_fromfile_n(self): a = bitarray(b'ABCDEFGHIJ') with open(self.tmpfname, 'wb') as fo: a.tofile(fo) self.assertFileSize(10) with open(self.tmpfname, 'rb') as f: a = bitarray() a.fromfile(f, 0); self.assertEqual(a.tobytes(), b'') a.fromfile(f, 1); self.assertEqual(a.tobytes(), b'A') f.read(1) # skip B a.fromfile(f, 1); self.assertEqual(a.tobytes(), b'AC') a = bitarray() a.fromfile(f, 2); self.assertEqual(a.tobytes(), b'DE') a.fromfile(f, 1); self.assertEqual(a.tobytes(), b'DEF') a.fromfile(f, 0); self.assertEqual(a.tobytes(), b'DEF') a.fromfile(f); self.assertEqual(a.tobytes(), b'DEFGHIJ') a.fromfile(f); self.assertEqual(a.tobytes(), b'DEFGHIJ') self.check_obj(a) a = bitarray() with open(self.tmpfname, 'rb') as f: f.read(1) self.assertRaises(EOFError, a.fromfile, f, 10) # check that although we received an EOFError, the bytes were read self.assertEqual(a.tobytes(), b'BCDEFGHIJ') a = bitarray() with open(self.tmpfname, 'rb') as f: # negative values - like ommiting the argument a.fromfile(f, -1) self.assertEqual(a.tobytes(), b'ABCDEFGHIJ') self.assertRaises(EOFError, a.fromfile, f, 1) def test_fromfile_BytesIO(self): f = BytesIO(b'somedata') a = bitarray() a.fromfile(f, 4) self.assertEqual(len(a), 32) self.assertEqual(a.tobytes(), b'some') a.fromfile(f) self.assertEqual(len(a), 64) self.assertEqual(a.tobytes(), b'somedata') self.check_obj(a) def test_tofile_empty(self): a = bitarray() with open(self.tmpfname, 'wb') as f: a.tofile(f) self.assertFileSize(0) def test_tofile_Foo(self): a = bitarray('0100011 001101111 01101111', endian='big') b = a.copy() with open(self.tmpfname, 'wb') as f: a.tofile(f) self.assertEQUAL(a, b) self.assertFileSize(3) self.assertEqual(self.read_file(), b'Foo') def test_tofile_random(self): for a in self.randombitarrays(): with open(self.tmpfname, 'wb') as fo: a.tofile(fo) n = a.nbytes self.assertFileSize(n) raw = self.read_file() self.assertEqual(len(raw), n) self.assertEqual(raw, a.tobytes()) def test_tofile_errors(self): n = 100 a = bitarray(8 * n) self.assertRaises(TypeError, a.tofile) with open(self.tmpfname, 'wb') as f: a.tofile(f) self.assertFileSize(n) # write to closed file self.assertRaises(ValueError, a.tofile, f) with open(self.tmpfname, 'w') as f: self.assertRaises(TypeError, a.tofile, f) with open(self.tmpfname, 'rb') as f: self.assertRaises(Exception, a.tofile, f) def test_tofile_large(self): n = 100_000 a = zeros(8 * n) a[2::37] = 1 with open(self.tmpfname, 'wb') as f: a.tofile(f) self.assertFileSize(n) raw = self.read_file() self.assertEqual(len(raw), n) self.assertEqual(raw, a.tobytes()) def test_tofile_ones(self): for n in range(20): a = n * bitarray('1', endian='little') with open(self.tmpfname, 'wb') as fo: a.tofile(fo) raw = self.read_file() self.assertEqual(len(raw), a.nbytes) # when we fill the pad bits in a, we can compare a.fill() b = bitarray(raw, endian='little') self.assertEqual(a, b) def test_tofile_BytesIO(self): for n in list(range(10)) + list(range(65534, 65538)): data = os.urandom(n) a = bitarray(data, 'big') self.assertEqual(a.nbytes, n) f = BytesIO() a.tofile(f) self.assertEqual(f.getvalue(), data) @skipIf(is_pypy) def test_mmap(self): with open(self.tmpfname, 'wb') as fo: fo.write(1000 * b'\0') with open(self.tmpfname, 'r+b') as f: # see issue #141 with mmap.mmap(f.fileno(), 0) as mapping: a = bitarray(buffer=mapping, endian='little') info = a.buffer_info() self.assertFalse(info.readonly) self.assertTrue(info.imported) self.assertEqual(a, zeros(8000)) a[::2] = True # not sure this is necessary, without 'del a', I get: # BufferError: cannot close exported pointers exist del a self.assertEqual(self.read_file(), 1000 * b'\x55') # pyodide hits emscripten mmap bug @skipIf(pyodide or is_pypy) def test_mmap_2(self): with open(self.tmpfname, 'wb') as fo: fo.write(1000 * b'\x22') with open(self.tmpfname, 'r+b') as f: a = bitarray(buffer=mmap.mmap(f.fileno(), 0), endian='little') info = a.buffer_info() self.assertFalse(info.readonly) self.assertTrue(info.imported) self.assertEqual(a, 1000 * bitarray('0100 0100')) a[::4] = 1 self.assertEqual(self.read_file(), 1000 * b'\x33') @skipIf(is_pypy) def test_mmap_readonly(self): with open(self.tmpfname, 'wb') as fo: fo.write(994 * b'\x89' + b'Veedon') with open(self.tmpfname, 'rb') as fi: # readonly m = mmap.mmap(fi.fileno(), 0, access=mmap.ACCESS_READ) a = bitarray(buffer=m, endian='big') info = a.buffer_info() self.assertTrue(info.readonly) self.assertTrue(info.imported) self.assertRaisesMessage(TypeError, "cannot modify read-only memory", a.__setitem__, 0, 1) self.assertEqual(a[:8 * 994], 994 * bitarray('1000 1001')) self.assertEqual(a[8 * 994:].tobytes(), b'Veedon') # ----------------------------- Decode Tree --------------------------------- alphabet_code = { ' ': bitarray('001'), '.': bitarray('0101010'), 'a': bitarray('0110'), 'b': bitarray('0001100'), 'c': bitarray('000011'), 'd': bitarray('01011'), 'e': bitarray('111'), 'f': bitarray('010100'), 'g': bitarray('101000'), 'h': bitarray('00000'), 'i': bitarray('1011'), 'j': bitarray('0111101111'), 'k': bitarray('00011010'), 'l': bitarray('01110'), 'm': bitarray('000111'), 'n': bitarray('1001'), 'o': bitarray('1000'), 'p': bitarray('101001'), 'q': bitarray('00001001101'), 'r': bitarray('1101'), 's': bitarray('1100'), 't': bitarray('0100'), 'u': bitarray('000100'), 'v': bitarray('0111100'), 'w': bitarray('011111'), 'x': bitarray('0000100011'), 'y': bitarray('101010'), 'z': bitarray('00011011110') } class DecodeTreeTests(unittest.TestCase, Util): def test_create(self): dt = decodetree(alphabet_code) self.assertEqual(type(dt), decodetree) self.assertRaises(TypeError, decodetree, None) self.assertRaises(TypeError, decodetree, 'foo') d = dict(alphabet_code) d['-'] = bitarray() self.assertRaises(ValueError, decodetree, d) def test_ambiguous_code(self): for d in [ {'a': bitarray('0'), 'b': bitarray('0'), 'c': bitarray('1')}, {'a': bitarray('01'), 'b': bitarray('01'), 'c': bitarray('1')}, {'a': bitarray('0'), 'b': bitarray('01')}, {'a': bitarray('0'), 'b': bitarray('11'), 'c': bitarray('111')}, ]: self.assertRaises(ValueError, decodetree, d) @skipIf(is_pypy) def test_sizeof(self): dt = decodetree({'.': bitarray('1')}) self.assertTrue(0 < sys.getsizeof(dt) < 100) dt = decodetree({'a': zeros(20)}) self.assertTrue(sys.getsizeof(dt) > 200) def test_nodes(self): for n in range(1, 20): dt = decodetree({'a': zeros(n)}) self.assertEqual(dt.nodes(), n + 1) self.assertFalse(dt.complete()) dt = decodetree({'I': bitarray('1'), 'l': bitarray('01'), 'a': bitarray('001'), 'n': bitarray('000')}) self.assertEqual(dt.nodes(), 7) dt = decodetree(alphabet_code) self.assertEqual(dt.nodes(), 70) def test_complete(self): dt = decodetree({'.': bitarray('1')}) self.assertEqual(type(dt.complete()), bool) self.assertFalse(dt.complete()) dt = decodetree({'a': bitarray('0'), 'b': bitarray('1')}) self.assertTrue(dt.complete()) dt = decodetree({'a': bitarray('0'), 'b': bitarray('11')}) self.assertFalse(dt.complete()) dt = decodetree({'a': bitarray('0'), 'b': bitarray('11'), 'c': bitarray('10')}) self.assertTrue(dt.complete()) def test_todict(self): t = decodetree(alphabet_code) d = t.todict() self.assertEqual(type(d), dict) self.assertEqual(d, alphabet_code) def test_decode(self): t = decodetree(alphabet_code) a = bitarray('1011 01110 0110 1001') self.assertEqual(list(a.decode(t)), ['i', 'l', 'a', 'n']) self.assertEqual(''.join(a.decode(t)), 'ilan') a = bitarray() self.assertEqual(list(a.decode(t)), []) self.check_obj(a) @skipIf(is_pypy) def test_large(self): d = {i: bitarray(bool((1 << j) & i) for j in range(10)) for i in range(1024)} t = decodetree(d) self.assertEqual(t.todict(), d) self.assertEqual(t.nodes(), 2047) self.assertTrue(t.complete()) self.assertTrue(sys.getsizeof(t) > 10000) # ------------------ variable length encoding and decoding ------------------ class PrefixCodeTests(unittest.TestCase, Util): def test_encode_string(self): a = bitarray() a.encode(alphabet_code, '') self.assertEqual(a, bitarray()) a.encode(alphabet_code, 'a') self.assertEqual(a, bitarray('0110')) def test_encode_list(self): a = bitarray() a.encode(alphabet_code, []) self.assertEqual(a, bitarray()) a.encode(alphabet_code, ['e']) self.assertEqual(a, bitarray('111')) def test_encode_iter(self): a = bitarray() d = {0: bitarray('0'), 1: bitarray('1')} a.encode(d, iter([0, 1, 1, 0])) self.assertEqual(a, bitarray('0110')) def foo(): for c in 1, 1, 0, 0, 1, 1: yield c a.clear() a.encode(d, foo()) a.encode(d, range(2)) self.assertEqual(a, bitarray('11001101')) self.assertEqual(d, {0: bitarray('0'), 1: bitarray('1')}) def test_encode_symbol_not_in_code(self): d = dict(alphabet_code) a = bitarray() a.encode(d, 'is') self.assertEqual(a, bitarray('1011 1100')) self.assertRaises(ValueError, a.encode, d, 'ilAn') msg = "symbol not defined in prefix code: None" self.assertRaisesMessage(ValueError, msg, a.encode, d, [None, 2]) def test_encode_not_iterable(self): d = {'a': bitarray('0'), 'b': bitarray('1')} a = bitarray() a.encode(d, 'abba') self.assertRaises(TypeError, a.encode, d, 42) self.assertRaises(TypeError, a.encode, d, 1.3) self.assertRaises(TypeError, a.encode, d, None) self.assertEqual(a, bitarray('0110')) def test_check_codedict_encode(self): a = bitarray() self.assertRaises(TypeError, a.encode, None, '') self.assertRaises(ValueError, a.encode, {}, '') self.assertRaises(TypeError, a.encode, {'a': 'b'}, 'a') self.assertRaises(ValueError, a.encode, {'a': bitarray()}, 'a') self.assertEqual(len(a), 0) def test_check_codedict_decode(self): a = bitarray('1100101') self.assertRaises(TypeError, a.decode, 0) self.assertRaises(ValueError, a.decode, {}) self.assertRaises(TypeError, a.decode, {'a': 42}) self.assertRaises(TypeError, a.decode, {'a': []}) self.assertRaises(ValueError, a.decode, {'a': bitarray()}) self.assertEqual(a, bitarray('1100101')) def test_no_iterdecode(self): a = bitarray() # removed in bitarray 3.0 self.assertRaises(AttributeError, a.__getattribute__, 'iterdecode') def test_decode_simple(self): d = {'I': bitarray('1'), 'l': bitarray('01'), 'a': bitarray('001'), 'n': bitarray('000')} dcopy = dict(d) a = bitarray('101001000') res = list("Ilan") self.assertEqual(list(a.decode(d)), res) self.assertEqual(d, dcopy) self.assertEqual(a, bitarray('101001000')) def test_decode_type(self): a = bitarray('0110') it = a.decode(alphabet_code) self.assertIsType(it, 'decodeiterator') self.assertEqual(list(it), ['a']) def test_decode_remove(self): d = {'I': bitarray('1'), 'l': bitarray('01'), 'a': bitarray('001'), 'n': bitarray('000')} t = decodetree(d) a = bitarray('101001000') it = a.decode(t) del t # remove tree self.assertEqual(''.join(it), "Ilan") it = a.decode(d) del a self.assertEqual(''.join(it), "Ilan") def test_decode_empty(self): d = {'a': bitarray('1')} a = bitarray() self.assertEqual(list(a.decode(d)), []) self.assertEqual(d, {'a': bitarray('1')}) self.assertEqual(len(a), 0) def test_decode_incomplete(self): d = {'a': bitarray('0'), 'b': bitarray('111')} a = bitarray('00011') msg = "incomplete prefix code at position 3" self.assertRaisesMessage(ValueError, msg, list, a.decode(d)) it = a.decode(d) self.assertIsType(it, 'decodeiterator') self.assertRaisesMessage(ValueError, msg, list, it) t = decodetree(d) self.assertRaisesMessage(ValueError, msg, list, a.decode(t)) self.assertEqual(a, bitarray('00011')) self.assertEqual(d, {'a': bitarray('0'), 'b': bitarray('111')}) self.assertEqual(t.todict(), d) def test_decode_incomplete_2(self): a = bitarray() a.encode(alphabet_code, "now we rise") x = len(a) a.extend('00') msg = "incomplete prefix code at position %d" % x self.assertRaisesMessage(ValueError, msg, list, a.decode(alphabet_code)) def test_decode_no_term(self): d = {'a': bitarray('0'), 'b': bitarray('111')} a = bitarray('011') it = a.decode(d) self.assertEqual(next(it), 'a') self.assertRaisesMessage(ValueError, "incomplete prefix code at position 1", next, it) self.assertEqual(a, bitarray('011')) def test_decode_buggybitarray(self): d = dict(alphabet_code) # i s t a = bitarray('1011 1100 0100 011110111001101001') msg = "prefix code unrecognized in bitarray at position 12 .. 21" self.assertRaisesMessage(ValueError, msg, list, a.decode(d)) t = decodetree(d) self.assertRaisesMessage(ValueError, msg, list, a.decode(t)) self.check_obj(a) self.assertEqual(t.todict(), d) def test_decode_buggybitarray2(self): d = {'a': bitarray('0')} a = bitarray('1') it = a.decode(d) self.assertRaises(ValueError, next, it) self.assertEqual(a, bitarray('1')) self.assertEqual(d, {'a': bitarray('0')}) def test_decode_buggybitarray3(self): d = {'a': bitarray('00'), 'b': bitarray('01')} a = bitarray('1') self.assertRaises(ValueError, next, a.decode(d)) t = decodetree(d) self.assertRaises(ValueError, next, a.decode(t)) self.assertEqual(a, bitarray('1')) self.assertEqual(d, {'a': bitarray('00'), 'b': bitarray('01')}) self.assertEqual(t.todict(), d) def test_decode_random(self): pat1 = re.compile(r'incomplete prefix code.+\s(\d+)') pat2 = re.compile(r'prefix code unrecognized.+\s(\d+)\s*\.\.\s*(\d+)') t = decodetree(alphabet_code) for a in self.randombitarrays(): try: a.decode(t) except ValueError as e: msg = str(e) m1 = pat1.match(msg) m2 = pat2.match(msg) self.assertFalse(m1 and m2) if m1: i = int(m1.group(1)) if m2: i, j = int(m2.group(1)), int(m2.group(2)) self.assertFalse(a[i:j] in alphabet_code.values()) a[:i].decode(t) def test_decode_ambiguous_code(self): for d in [ {'a': bitarray('0'), 'b': bitarray('0'), 'c': bitarray('1')}, {'a': bitarray('01'), 'b': bitarray('01'), 'c': bitarray('1')}, {'a': bitarray('0'), 'b': bitarray('01')}, {'a': bitarray('0'), 'b': bitarray('11'), 'c': bitarray('111')}, ]: a = bitarray() self.assertRaises(ValueError, a.decode, d) self.check_obj(a) def test_miscitems(self): d = {None : bitarray('00'), 0 : bitarray('110'), 1 : bitarray('111'), '' : bitarray('010'), 2 : bitarray('011')} a = bitarray() a.encode(d, [None, 0, 1, '', 2]) self.assertEqual(a, bitarray('00110111010011')) self.assertEqual(list(a.decode(d)), [None, 0, 1, '', 2]) # iterator it = a.decode(d) self.assertTrue(next(it) is None) self.assertEqual(next(it), 0) self.assertEqual(next(it), 1) self.assertEqual(next(it), '') self.assertEqual(next(it), 2) self.assertRaises(StopIteration, next, it) def test_quick_example(self): a = bitarray() message = 'the quick brown fox jumps over the lazy dog.' a.encode(alphabet_code, message) self.assertEqual(a, bitarray( # t h e q u i c k '0100 00000 111 001 00001001101 000100 1011 000011 00011010 001' # b r o w n f o x '0001100 1101 1000 011111 1001 001 010100 1000 0000100011 001' # j u m p s o v e r '0111101111 000100 000111 101001 1100 001 1000 0111100 111 1101' # t h e l a z y '001 0100 00000 111 001 01110 0110 00011011110 101010 001' # d o g . '01011 1000 101000 0101010')) self.assertEqual(''.join(a.decode(alphabet_code)), message) t = decodetree(alphabet_code) self.assertEqual(''.join(a.decode(t)), message) self.check_obj(a) # --------------------------- Buffer Import --------------------------------- class BufferImportTests(unittest.TestCase, Util): def test_bytes(self): b = 100 * b'\0' a = bitarray(buffer=b) info = a.buffer_info() self.assertEqual(info.alloc, 0) self.assertTrue(info.readonly) self.assertTrue(info.imported) self.assertRaises(TypeError, a.setall, 1) self.assertRaises(TypeError, a.clear) self.assertEqual(a, zeros(800)) self.check_obj(a) @skipIf(is_pypy) def test_bytearray(self): b = bytearray(100 * [0]) a = bitarray(buffer=b, endian='little') info = a.buffer_info() self.assertEqual(info.alloc, 0) self.assertFalse(info.readonly) self.assertTrue(info.imported) a[0] = 1 self.assertEqual(b[0], 1) a[7] = 1 self.assertEqual(b[0], 129) a[:] = 1 self.assertEqual(b, bytearray(100 * [255])) self.assertRaises(BufferError, a.pop) a[8:16] = bitarray('10000010', endian='big') self.assertEqual(b, bytearray([255, 65] + 98 * [255])) self.assertEqual(a.tobytes(), b) for n in 7, 9: self.assertRaises(BufferError, a.__setitem__, slice(8, 16), bitarray(n)) b[1] = b[2] = 255 self.assertEqual(b, bytearray(100 * [255])) self.assertEqual(a, 800 * bitarray('1')) self.check_obj(a) @skipIf(is_pypy) def test_array(self): a = array.array('B', [0, 255, 64]) b = bitarray(None, 'little', a) self.assertEqual(b, bitarray('00000000 11111111 00000010')) a[1] = 32 self.assertEqual(b, bitarray('00000000 00000100 00000010')) b[3] = 1 self.assertEqual(a.tolist(), [8, 32, 64]) self.check_obj(b) def test_bitarray(self): a = urandom_2(10000, 'little') b = bitarray(endian='little', buffer=a) # a and b are two distinct bitarrays that share the same buffer now self.assertFalse(a is b) a_info = a.buffer_info() self.assertFalse(a_info.imported) self.assertEqual(a_info.exports, 1) b_info = b.buffer_info() self.assertTrue(b_info.imported) self.assertEqual(b_info.exports, 0) # buffer address is the same self.assertEqual(a_info.address, b_info.address) self.assertFalse(a is b) self.assertEqual(a, b) b[437:461] = 0 self.assertEqual(a, b) a[327:350] = 1 self.assertEqual(a, b) b[101:1187] <<= 79 self.assertEqual(a, b) a[100:9800:5] = 1 self.assertEqual(a, b) self.assertRaisesMessage( BufferError, "cannot resize bitarray that is exporting buffers", a.pop) self.assertRaisesMessage( BufferError, "cannot resize imported buffer", b.pop) self.check_obj(a) self.check_obj(b) def test_copy(self): a = bitarray(buffer=b'XA') self.assertTrue(a.readonly) for b in [a.copy(), 3 * a, 5 * a, a & bitarray(16), a >> 2, ~a, a + bitarray(8*'1'), a[:], a[::2], a[[0, 1]], a[bitarray(16)]]: self.assertFalse(b.readonly) self.check_obj(b) @skipIf(is_pypy) def test_bitarray_shared_sections(self): a = urandom_2(0x2000, 'big') b = bitarray(buffer=memoryview(a)[0x100:0x300]) self.assertEqual(b.buffer_info().address, a.buffer_info().address + 0x100) c = bitarray(buffer=memoryview(a)[0x200:0x800]) self.assertEqual(c.buffer_info().address, a.buffer_info().address + 0x200) self.assertEqual(a[8 * 0x100 : 8 * 0x300], b) self.assertEqual(a[8 * 0x200 : 8 * 0x800], c) a.setall(0) b.setall(1) c.setall(0) d = bitarray(0x2000) d.setall(0) d[8 * 0x100 : 8 * 0x200] = 1 self.assertEqual(a, d) def test_bitarray_range(self): for n in range(100): a = urandom_2(n) b = bitarray(buffer=a, endian=a.endian) # an imported buffer will never have any pad bits self.assertEqual(b.padbits, 0) self.assertEqual(len(b) % 8, 0) self.assertEQUAL(b[:n], a) self.check_obj(a) self.check_obj(b) def test_bitarray_chain(self): d = [urandom_2(64, 'big')] for n in range(1, 100): d.append(bitarray(endian='big', buffer=d[n - 1])) self.assertEqual(d[99], d[0]) d[0].setall(1) for a in d: self.assertEqual(len(a), 64) self.assertTrue(a.all()) self.check_obj(a) def test_frozenbitarray(self): a = frozenbitarray('10011011 011') self.assertTrue(a.readonly) self.check_obj(a) b = bitarray(buffer=a) self.assertTrue(b.readonly) # also readonly self.assertRaises(TypeError, b.__setitem__, 1, 0) self.check_obj(b) def test_invalid_buffer(self): # these objects do not expose a buffer for arg in (123, 1.23, [1, 2, 3], (1, 2, 3), {1: 2}, set([1, 2, 3]),): self.assertRaises(TypeError, bitarray, buffer=arg) @skipIf(is_pypy) def test_del_import_object(self): b = bytearray(100 * [0]) a = bitarray(buffer=b) del b self.assertEqual(a, zeros(800)) a.setall(1) self.assertTrue(a.all()) self.check_obj(a) @skipIf(is_pypy) def test_readonly_errors(self): a = bitarray(buffer=b'A') info = a.buffer_info() self.assertTrue(info.readonly) self.assertTrue(info.imported) self.assertRaises(TypeError, a.append, True) self.assertRaises(TypeError, a.bytereverse) self.assertRaises(TypeError, a.clear) self.assertRaises(TypeError, a.encode, {'a': bitarray('0')}, 'aa') self.assertRaises(TypeError, a.extend, [0, 1, 0]) self.assertRaises(TypeError, a.fill) self.assertRaises(TypeError, a.frombytes, b'') self.assertRaises(TypeError, a.insert, 0, 1) self.assertRaises(TypeError, a.invert) self.assertRaises(TypeError, a.pack, b'\0\0\xff') self.assertRaises(TypeError, a.pop) self.assertRaises(TypeError, a.remove, 1) self.assertRaises(TypeError, a.reverse) self.assertRaises(TypeError, a.setall, 0) self.assertRaises(TypeError, a.sort) self.assertRaises(TypeError, a.__delitem__, 0) self.assertRaises(TypeError, a.__delitem__, slice(None, None, 2)) self.assertRaises(TypeError, a.__setitem__, 0, 0) self.assertRaises(TypeError, a.__iadd__, bitarray(8)) self.assertRaises(TypeError, a.__ior__, bitarray(8)) self.assertRaises(TypeError, a.__ixor__, bitarray(8)) self.assertRaises(TypeError, a.__irshift__, 1) self.assertRaises(TypeError, a.__ilshift__, 1) self.check_obj(a) @skipIf(is_pypy) def test_resize_errors(self): a = bitarray(buffer=bytearray([123])) info = a.buffer_info() self.assertFalse(info.readonly) self.assertTrue(info.imported) self.assertRaises(BufferError, a.append, True) self.assertRaises(BufferError, a.clear) self.assertRaises(BufferError, a.encode, {'a': bitarray('0')}, 'aa') self.assertRaises(BufferError, a.extend, [0, 1, 0]) self.assertRaises(BufferError, a.frombytes, b'a') self.assertRaises(BufferError, a.insert, 0, 1) self.assertRaises(BufferError, a.pack, b'\0\0\xff') self.assertRaises(BufferError, a.pop) self.assertRaises(BufferError, a.remove, 1) self.assertRaises(BufferError, a.__delitem__, 0) self.check_obj(a) # --------------------------- Buffer Export --------------------------------- class BufferExportTests(unittest.TestCase, Util): def test_read_simple(self): a = bitarray('01000001 01000010 01000011', endian='big') v = memoryview(a) self.assertFalse(v.readonly) self.assertEqual(a.buffer_info().exports, 1) self.assertEqual(len(v), 3) self.assertEqual(v[0], 65) self.assertEqual(v.tobytes(), b'ABC') a[13] = 1 self.assertEqual(v.tobytes(), b'AFC') w = memoryview(a) # a second buffer export self.assertFalse(w.readonly) self.assertEqual(a.buffer_info().exports, 2) self.check_obj(a) def test_many_exports(self): a = bitarray('01000111 01011111') d = {} # put bitarrays in dict to key object around for n in range(1, 20): d[n] = bitarray(buffer=a) self.assertEqual(a.buffer_info().exports, n) self.assertEqual(len(d[n]), 16) self.check_obj(a) def test_range(self): for n in range(100): a = bitarray(n) v = memoryview(a) self.assertEqual(len(v), a.nbytes) info = a.buffer_info() self.assertFalse(info.readonly) self.assertFalse(info.imported) self.assertEqual(info.exports, 1) self.check_obj(a) def test_read_random(self): a = bitarray(os.urandom(100)) v = memoryview(a) self.assertEqual(len(v), 100) b = a[34 * 8 : 67 * 8] self.assertEqual(v[34:67].tobytes(), b.tobytes()) self.assertEqual(v.tobytes(), a.tobytes()) self.check_obj(a) def test_resize(self): a = bitarray('011', endian='big') v = memoryview(a) self.assertFalse(v.readonly) self.assertRaises(BufferError, a.append, 1) self.assertRaises(BufferError, a.clear) self.assertRaises(BufferError, a.encode, {'a': bitarray('0')}, 'aa') self.assertRaises(BufferError, a.extend, '0') self.assertRaises(BufferError, a.frombytes, b'\0') self.assertRaises(BufferError, a.insert, 0, 1) self.assertRaises(BufferError, a.pack, b'\0') self.assertRaises(BufferError, a.pop) self.assertRaises(BufferError, a.remove, 1) self.assertRaises(BufferError, a.__delitem__, slice(0, 8)) a.fill() self.assertEqual(v.tobytes(), a.tobytes()) self.check_obj(a) def test_frozenbitarray(self): a = frozenbitarray(40) v = memoryview(a) self.assertTrue(v.readonly) self.assertEqual(len(v), 5) self.assertEqual(v.tobytes(), a.tobytes()) self.check_obj(a) def test_write(self): a = zeros(8000) v = memoryview(a) self.assertFalse(v.readonly) v[500] = 255 self.assertEqual(a[3999:4009], bitarray('0111111110')) a[4003] = 0 self.assertEqual(a[3999:4009], bitarray('0111011110')) v[301:304] = b'ABC' self.assertEqual(a[300 * 8 : 305 * 8].tobytes(), b'\x00ABC\x00') self.check_obj(a) def test_write_memoryview_slice(self): a = zeros(40) m = memoryview(a) v = m[1:4] v[0] = 65 v[1] = 66 v[2] = 67 self.assertEqual(a.tobytes(), b'\x00ABC\x00') m[1:4] = b'XYZ' self.assertEqual(a.tobytes(), b'\x00XYZ\x00') self.check_obj(a) # --------------------------------------------------------------------------- class FrozenbitarrayTests(unittest.TestCase, Util): def test_init(self): a = frozenbitarray('110') self.assertEqual(a, bitarray('110')) self.assertEqual(a.to01(), '110') self.assertIsInstance(a, bitarray) self.assertEqual(type(a), frozenbitarray) self.assertTrue(a.readonly) self.check_obj(a) a = frozenbitarray(bitarray()) self.assertEQUAL(a, frozenbitarray()) self.assertEqual(type(a), frozenbitarray) for endian in 'big', 'little': a = frozenbitarray(0, endian) self.assertEqual(a.endian, endian) self.assertEqual(type(a), frozenbitarray) a = frozenbitarray(bitarray(0, endian)) self.assertEqual(a.endian, endian) self.assertEqual(type(a), frozenbitarray) def test_methods(self): # test a few methods which do not raise the TypeError a = frozenbitarray('1101100') self.assertEqual(a[2], 0) self.assertEqual(a[:4].to01(), '1101') self.assertEqual(a.count(), 4) self.assertEqual(a.index(0), 2) b = a.copy() self.assertEqual(b, a) self.assertEqual(type(b), frozenbitarray) self.assertEqual(len(b), 7) self.assertFalse(b.all()) self.assertTrue(b.any()) self.check_obj(a) def test_init_from_bitarray(self): for a in self.randombitarrays(): b = frozenbitarray(a) self.assertFalse(b is a) self.assertEQUAL(b, a) c = frozenbitarray(b) self.assertFalse(c is b) self.assertEQUAL(c, b) self.assertEqual(hash(c), hash(b)) self.check_obj(b) def test_init_from_misc(self): tup = 0, 1, 0, 1, 1, False, True for obj in list(tup), tup, iter(tup), bitarray(tup): a = frozenbitarray(obj) self.assertEqual(type(a), frozenbitarray) self.assertEqual(a, bitarray(tup)) a = frozenbitarray(b'AB', "big") self.assertEqual(a.to01(), "0100000101000010") self.assertEqual(a.endian, "big") def test_init_bytes_bytearray(self): for x in b'\x80ABCD', bytearray(b'\x80ABCD'): a = frozenbitarray(x, 'little') self.assertEqual(type(a), frozenbitarray) self.assertEqual(len(a), 8 * len(x)) self.assertEqual(a.tobytes(), x) self.assertEqual(a[:8].to01(), '00000001') self.check_obj(a) def test_repr(self): a = frozenbitarray() self.assertEqual(repr(a), "frozenbitarray()") self.assertEqual(str(a), "frozenbitarray()") a = frozenbitarray('10111') self.assertEqual(repr(a), "frozenbitarray('10111')") self.assertEqual(str(a), "frozenbitarray('10111')") def test_immutable(self): a = frozenbitarray('111') self.assertRaises(TypeError, a.append, True) self.assertRaises(TypeError, a.bytereverse) self.assertRaises(TypeError, a.clear) self.assertRaises(TypeError, a.encode, {'a': bitarray('0')}, 'aa') self.assertRaises(TypeError, a.extend, [0, 1, 0]) self.assertRaises(TypeError, a.fill) self.assertRaises(TypeError, a.frombytes, b'') self.assertRaises(TypeError, a.insert, 0, 1) self.assertRaises(TypeError, a.invert) self.assertRaises(TypeError, a.pack, b'\0\0\xff') self.assertRaises(TypeError, a.pop) self.assertRaises(TypeError, a.remove, 1) self.assertRaises(TypeError, a.reverse) self.assertRaises(TypeError, a.setall, 0) self.assertRaises(TypeError, a.sort) self.assertRaises(TypeError, a.__delitem__, 0) self.assertRaises(TypeError, a.__delitem__, slice(None, None, 2)) self.assertRaises(TypeError, a.__setitem__, 0, 0) self.assertRaises(TypeError, a.__iadd__, bitarray('010')) self.assertRaises(TypeError, a.__ior__, bitarray('100')) self.assertRaises(TypeError, a.__ixor__, bitarray('110')) self.assertRaises(TypeError, a.__irshift__, 1) self.assertRaises(TypeError, a.__ilshift__, 1) self.check_obj(a) def test_copy(self): a = frozenbitarray('101') # not only .copy() creates new frozenbitarray which are read-only for b in [a, a.copy(), 3 * a, 5 * a, a & bitarray('110'), a >> 2, ~a, a + bitarray(8*'1'), a[:], a[::2], a[[0, 1]], a[bitarray('011')]]: self.assertEqual(type(b), frozenbitarray) self.assertTrue(b.readonly) self.check_obj(b) def test_freeze(self): # not so much a test for frozenbitarray, but how it is initialized a = bitarray(78) self.assertFalse(a.readonly) # not readonly a._freeze() self.assertTrue(a.readonly) # readonly def test_memoryview(self): a = frozenbitarray('01000001 01000010', 'big') v = memoryview(a) self.assertEqual(v.tobytes(), b'AB') self.assertRaises(TypeError, v.__setitem__, 0, 0x7c) def test_buffer_import_readonly(self): b = bytes([15, 95, 128]) a = frozenbitarray(buffer=b, endian='big') self.assertEQUAL(a, bitarray('00001111 01011111 10000000', 'big')) info = a.buffer_info() self.assertTrue(info.readonly) self.assertTrue(info.imported) @skipIf(is_pypy) def test_buffer_import_writable(self): c = bytearray([15, 95]) self.assertRaisesMessage( TypeError, "cannot import writable buffer into frozenbitarray", frozenbitarray, buffer=c) def test_as_set(self): a = frozenbitarray('1') b = frozenbitarray('11') c = frozenbitarray('01') d = frozenbitarray('011') s = set([a, b, c, d]) self.assertEqual(len(s), 4) self.assertTrue(d in s) self.assertFalse(frozenbitarray('0') in s) def test_as_dictkey(self): a = frozenbitarray('01') b = frozenbitarray('1001') d = {a: 123, b: 345} self.assertEqual(d[frozenbitarray('01')], 123) self.assertEqual(d[frozenbitarray(b)], 345) def test_as_dictkey2(self): # taken slightly modified from issue #74 a1 = frozenbitarray("10") a2 = frozenbitarray("00") dct = {a1: "one", a2: "two"} a3 = frozenbitarray("10") self.assertEqual(a3, a1) self.assertEqual(dct[a3], 'one') def test_mix(self): a = bitarray('110') b = frozenbitarray('0011') self.assertEqual(a + b, bitarray('1100011')) a.extend(b) self.assertEqual(a, bitarray('1100011')) def test_hash_endianness_simple(self): a = frozenbitarray('1', 'big') b = frozenbitarray('1', 'little') self.assertEqual(a, b) self.assertEqual(hash(a), hash(b)) d = {a: 'value'} self.assertEqual(d[b], 'value') self.assertEqual(len(set([a, b])), 1) def test_hash_endianness_random(self): for a in self.randombitarrays(): a = frozenbitarray(a) b = frozenbitarray(a, self.opposite_endian(a.endian)) self.assertEqual(a, b) self.assertNotEqual(a.endian, b.endian) self.assertEqual(hash(a), hash(b)) d = {a: 1, b: 2} self.assertEqual(len(d), 1) def test_pickle(self): for a in self.randombitarrays(): f = frozenbitarray(a) f.foo = 42 # unlike bitarray itself, we can have attributes g = pickle.loads(pickle.dumps(f)) self.assertEqual(f, g) self.assertEqual(f.endian, g.endian) self.assertTrue(str(g).startswith('frozenbitarray')) self.assertTrue(g.readonly) self.check_obj(a) self.check_obj(f) self.check_obj(g) self.assertEqual(g.foo, 42) def test_bytes_bytearray(self): for a in self.randombitarrays(): a = frozenbitarray(a) self.assertEqual(bytes(a), a.tobytes()) self.assertEqual(bytearray(a), a.tobytes()) self.check_obj(a) # --------------------------------------------------------------------------- def run(verbosity=1): import bitarray.test_util default_endian = get_default_endian() print('bitarray is installed in: %s' % os.path.dirname(__file__)) print('bitarray version: %s' % __version__) print('sys.version: %s' % sys.version) print('sys.prefix: %s' % sys.prefix) print('pointer size: %d bit' % (8 * PTRSIZE)) print('sizeof(size_t): %d' % sysinfo("size_t")); print('sizeof(bitarrayobject): %d' % sysinfo("bitarrayobject")) print('HAVE_BUILTIN_BSWAP64: %d' % sysinfo("HAVE_BUILTIN_BSWAP64")) print('default bit-endianness: %s' % default_endian) print('machine byte-order: %s' % sys.byteorder) print('Py_DEBUG: %s' % sysinfo("Py_DEBUG")) print('DEBUG: %s' % sysinfo("DEBUG")) loader = unittest.TestLoader() suite = unittest.TestSuite() suite.addTests(loader.loadTestsFromModule(sys.modules[__name__])) suite.addTests(loader.loadTestsFromModule(bitarray.test_util)) runner = unittest.TextTestRunner(verbosity=verbosity) result = runner.run(suite) _set_default_endian(default_endian) return result if __name__ == '__main__': unittest.main() bitarray-3.7.1/bitarray/test_util.py000066400000000000000000003242631505414144000175520ustar00rootroot00000000000000# Copyright (c) 2019 - 2025, Ilan Schnell; All Rights Reserved # bitarray is published under the PSF license. # # Author: Ilan Schnell """ Tests for bitarray.util module """ from __future__ import absolute_import import os import sys import math import array import base64 import binascii import operator import struct import shutil import tempfile import unittest from io import StringIO from functools import reduce from random import (choice, getrandbits, randrange, randint, random, sample, seed) from string import hexdigits, whitespace from collections import Counter from bitarray import (bitarray, frozenbitarray, decodetree, bits2bytes, _set_default_endian) from bitarray.test_bitarray import Util, skipIf, is_pypy, urandom_2, PTRSIZE from bitarray.util import ( zeros, ones, urandom, random_k, random_p, pprint, strip, count_n, parity, gen_primes, sum_indices, xor_indices, count_and, count_or, count_xor, any_and, subset, correspond_all, byteswap, intervals, serialize, deserialize, ba2hex, hex2ba, ba2base, base2ba, ba2int, int2ba, sc_encode, sc_decode, vl_encode, vl_decode, _huffman_tree, huffman_code, canonical_huffman, canonical_decode, ) from bitarray.util import _Random, _ssqi # type: ignore # --------------------------- zeros() ones() ----------------------------- class ZerosOnesTests(unittest.TestCase): def test_basic(self): for _ in range(50): default_endian = choice(['little', 'big']) _set_default_endian(default_endian) a = choice([zeros(0), zeros(0, None), zeros(0, endian=None), ones(0), ones(0, None), ones(0, endian=None)]) self.assertEqual(a, bitarray()) self.assertEqual(a.endian, default_endian) self.assertEqual(type(a), bitarray) endian = choice(['little', 'big', None]) n = randrange(100) a = choice([zeros(n, endian), zeros(n, endian=endian)]) self.assertEqual(a.to01(), n * "0") self.assertEqual(a.endian, endian or default_endian) b = choice([ones(n, endian), ones(n, endian=endian)]) self.assertEqual(b.to01(), n * "1") self.assertEqual(b.endian, endian or default_endian) def test_errors(self): for f in zeros, ones: self.assertRaises(TypeError, f) # no argument self.assertRaises(TypeError, f, '') self.assertRaises(TypeError, f, bitarray()) self.assertRaises(TypeError, f, []) self.assertRaises(TypeError, f, 1.0) self.assertRaises(ValueError, f, -1) # endian not string for x in 0, 1, {}, [], False, True: self.assertRaises(TypeError, f, 0, x) # endian wrong string self.assertRaises(ValueError, f, 0, 'foo') # ----------------------------- urandom() --------------------------------- class URandomTests(unittest.TestCase): def test_basic(self): for _ in range(20): default_endian = choice(['little', 'big']) _set_default_endian(default_endian) a = choice([urandom(0), urandom(0, endian=None)]) self.assertEqual(a, bitarray()) self.assertEqual(a.endian, default_endian) endian = choice(['little', 'big', None]) n = randrange(100) a = choice([urandom(n, endian), urandom(n, endian=endian)]) self.assertEqual(len(a), n) self.assertEqual(a.endian, endian or default_endian) self.assertEqual(type(a), bitarray) def test_errors(self): U = urandom self.assertRaises(TypeError, U) self.assertRaises(TypeError, U, '') self.assertRaises(TypeError, U, bitarray()) self.assertRaises(TypeError, U, []) self.assertRaises(TypeError, U, 1.0) self.assertRaises(ValueError, U, -1) self.assertRaises(TypeError, U, 0, 1) self.assertRaises(ValueError, U, 0, 'foo') def test_count(self): a = urandom(10_000_000) # see if population is within expectation self.assertTrue(abs(a.count() - 5_000_000) <= 15_811) # ---------------------------- random_k() --------------------------------- HAVE_RANDBYTES = sys.version_info[:2] >= (3, 9) @skipIf(HAVE_RANDBYTES) class Random_K_Not_Implemented(unittest.TestCase): def test_not_implemented(self): self.assertRaises(NotImplementedError, random_k, 100, 60) @skipIf(not HAVE_RANDBYTES) class Random_K_Tests(unittest.TestCase): def test_basic(self): for _ in range(250): default_endian = choice(['little', 'big']) _set_default_endian(default_endian) endian = choice(['little', 'big', None]) n = randrange(120) k = randint(0, n) a = random_k(n, k, endian) self.assertTrue(type(a), bitarray) self.assertEqual(len(a), n) self.assertEqual(a.count(), k) self.assertEqual(a.endian, endian or default_endian) def test_inputs_and_edge_cases(self): R = random_k self.assertRaises(TypeError, R) self.assertRaises(TypeError, R, 4) self.assertRaises(TypeError, R, 1, "0.5") self.assertRaises(TypeError, R, 1, p=1) self.assertRaises(TypeError, R, 11, 5.5) # see issue #239 self.assertRaises(ValueError, R, -1, 0) for k in -1, 11: # k is not 0 <= k <= n self.assertRaises(ValueError, R, 10, k) self.assertRaises(ValueError, R, 10, 7, 'foo') self.assertRaises(ValueError, R, 10, 7, endian='foo') for n in range(20): self.assertEqual(R(n, k=0), zeros(n)) self.assertEqual(R(n, k=n), ones(n)) def test_count(self): for n in range(10): # test explicitly for small n for k in range(n + 1): a = random_k(n, k) self.assertEqual(len(a), n) self.assertEqual(a.count(), k) for _ in range(100): n = randrange(10_000) k = randint(0, n) a = random_k(n, k) self.assertEqual(len(a), n) self.assertEqual(a.count(), k) def test_active_bits(self): # test if all bits are active n = 240 cum = zeros(n) for _ in range(1000): k = randint(30, 40) a = random_k(n, k) self.assertEqual(a.count(), k) cum |= a if cum.all(): break else: self.fail() def test_combinations(self): # for entire range of 0 <= k <= n, validate that random_k() # generates all possible combinations n = 7 total = 0 for k in range(n + 1): expected = math.comb(n, k) combs = set() for _ in range(10_000): combs.add(frozenbitarray(random_k(n, k))) if len(combs) == expected: total += expected break else: self.fail() self.assertEqual(total, 2 ** n) def collect_code_branches(self): # return list of bitarrays from all code branches of random_k() res = [] # test small k (no .combine_half()) res.append(random_k(300, 10)) # general cases for k in 100, 500, 2_500, 4_000: res.append(random_k(5_000, k)) return res def test_seed(self): # We ensure that after setting a seed value, random_k() will # always return the same random bitarrays. However, we do not ensure # that these results will not change in future versions of bitarray. _set_default_endian("little") a = [] for val in 654321, 654322, 654321, 654322: seed(val) a.append(self.collect_code_branches()) self.assertEqual(a[0], a[2]) self.assertEqual(a[1], a[3]) self.assertNotEqual(a[0], a[1]) # initialize seed with current system time again seed() # ---------------- tests for internal _Random methods ------------------- def test_op_seq(self): r = _Random() G = r.op_seq K = r.K M = r.M # special cases self.assertRaises(ValueError, G, 0) self.assertEqual(G(1), zeros(M - 1)) self.assertEqual(G(K // 2), bitarray()) self.assertEqual(G(K - 1), ones(M - 1)) self.assertRaises(ValueError, G, K) # examples for p, s in [ (0.15625, '0100'), (0.25, '0'), # 1/2 AND -> 1/4 (0.375, '10'), # 1/2 OR -> 3/4 AND -> 3/8 (0.5, ''), (0.625, '01'), # 1/2 AND -> 1/4 OR -> 5/8 (0.6875, '101'), (0.75, '1'), # 1/2 OR -> 3/4 ]: seq = G(int(p * K)) self.assertEqual(seq.to01(), s) for i in range(1, K): seq = G(i) self.assertTrue(0 <= len(s) < M) q = 0.5 # a = random_half() for k in seq: # k=0: AND k=1: OR if k: q += 0.5 * (1.0 - q) # a |= random_half() else: q *= 0.5 # a &= random_half() self.assertEqual(q, i / K) def test_combine_half(self): r = _Random(1_000_000) for seq, mean in [ ([], 500_000), # .random_half() itself ([0], 250_000), # AND ([1], 750_000), # OR ([1, 0], 375_000), # OR followed by AND ]: a = r.combine_half(seq) self.assertTrue(abs(a.count() - mean) < 5_000) # ---------------------------- random_p() --------------------------------- HAVE_BINOMIALVARIATE = sys.version_info[:2] >= (3, 12) @skipIf(HAVE_BINOMIALVARIATE) class Random_P_Not_Implemented(unittest.TestCase): def test_not_implemented(self): self.assertRaises(NotImplementedError, random_p, 100, 0.25) @skipIf(not HAVE_BINOMIALVARIATE) class Random_P_Tests(unittest.TestCase): def test_basic(self): for _ in range(250): default_endian = choice(['little', 'big']) _set_default_endian(default_endian) endian = choice(['little', 'big', None]) n = randrange(120) p = choice([0.0, 0.0001, 0.2, 0.5, 0.9, 1.0]) a = random_p(n, p, endian) self.assertTrue(type(a), bitarray) self.assertEqual(len(a), n) self.assertEqual(a.endian, endian or default_endian) def test_inputs_and_edge_cases(self): R = random_p self.assertRaises(TypeError, R) self.assertRaises(TypeError, R, 0.25) self.assertRaises(TypeError, R, 1, "0.5") self.assertRaises(ValueError, R, -1) self.assertRaises(ValueError, R, 1, -0.5) self.assertRaises(ValueError, R, 1, p=1.5) self.assertRaises(ValueError, R, 1, 0.15, 'foo') self.assertRaises(ValueError, R, 10, 0.5, endian='foo') self.assertEqual(R(0), bitarray()) for n in range(20): self.assertEqual(R(n, 0), zeros(n)) self.assertEqual(len(R(n, 0.5)), n) self.assertEqual(R(n, p=1), ones(n)) def test_default(self): a = random_p(10_000_000) # p defaults to 0.5 # see if population is within expectation self.assertTrue(abs(a.count() - 5_000_000) <= 15_811) def test_count(self): for _ in range(500): n = choice([randrange(4, 120), randrange(100, 1000)]) p = choice([0.0001, 0.001, 0.01, 0.1, 0.25, 0.5, 0.9]) sigma = math.sqrt(n * p * (1.0 - p)) a = random_p(n, p) self.assertEqual(len(a), n) self.assertTrue(abs(a.count() - n * p) < max(4, 10 * sigma)) def collect_code_branches(self): # return list of bitarrays from all code branches of random_p() res = [] # for default p=0.5, random_p uses randbytes res.append(random_p(32)) # test small p res.append(random_p(5_000, 0.002)) # small n (note that p=0.4 will call the "literal definition" case) res.append(random_p(15, 0.4)) # general cases for p in 0.1, 0.2, 0.375, 0.4999, 0.7: res.append(random_p(150, p)) return res def test_seed(self): # We ensure that after setting a seed value, random_p() will always # return the same random bitarrays. However, we do not ensure that # these results will not change in future versions of bitarray. _set_default_endian("little") a = [] for val in 123456, 123457, 123456, 123457: seed(val) a.append(self.collect_code_branches()) self.assertEqual(a[0], a[2]) self.assertEqual(a[1], a[3]) self.assertNotEqual(a[0], a[1]) # initialize seed with current system time again seed() def test_small_p_limit(self): # For understanding how the algorithm works, see ./doc/random_p.rst # Also, see VerificationTests in devel/test_random.py r = _Random() limit = 1.0 / (r.K + 1) # lower limit for p self.assertTrue(r.SMALL_P > limit) # ---------------------------- gen_primes() ------------------------------- class PrimeTests(unittest.TestCase): primes = [ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, ] def test_errors(self): P = gen_primes self.assertRaises(TypeError, P, 3, 1) self.assertRaises(ValueError, P, "1.0") self.assertRaises(ValueError, P, -1) self.assertRaises(TypeError, P, 8, 4) self.assertRaises(TypeError, P, 8, foo="big") self.assertRaises(ValueError, P, 8, "foo") self.assertRaises(ValueError, P, 8, endian="foo") def test_explitcit(self): for n in range(230): default_endian = choice(['little', 'big']) _set_default_endian(default_endian) endian = choice(["little", "big", None]) odd = getrandbits(1) a = gen_primes(n, endian, odd) self.assertEqual(len(a), n) self.assertEqual(a.endian, endian or default_endian) if odd: lst = [2] + [2 * i + 1 for i in a.search(1)] else: lst = [i for i in a.search(1)] self.assertEqual(lst, self.primes[:len(lst)]) def test_cmp(self): N = 10_000 c = ones(N) c[:2] = 0 for i in range(int(math.sqrt(N) + 1.0)): if c[i]: c[i * i :: i] = 0 self.assertEqual(list(c.search(1, 0, 462)), self.primes) for _ in range(20): n = randrange(N) endian = choice(["little", "big"]) a = gen_primes(n, endian=endian) self.assertEqual(a, c[:n]) self.assertEqual(a.endian, endian) b = gen_primes(n // 2, endian, odd=True) self.assertEqual(b, a[1::2]) self.assertEqual(b, c[1:n:2]) for _ in range(20): i = randrange(10, 100) x = randint(-1, 1) n = i * i + x self.assertEqual(gen_primes(n), c[:n]) self.assertEqual(gen_primes(n // 2, odd=1), c[1:n:2]) self.assertEqual(gen_primes(N), c) self.assertEqual(gen_primes(N // 2, odd=1), c[1::2]) def test_count(self): for n, count, sum_p, sum_sqr_p in [ ( 10, 4, 17, 87), ( 100, 25, 1_060, 65_796), ( 1_000, 168, 76_127, 49_345_379), (10_000, 1229, 5_736_396, 37_546_387_960), ]: a = gen_primes(n) self.assertEqual(len(a), n) self.assertEqual(a.count(), count) self.assertEqual(sum_indices(a), sum_p) self.assertEqual(sum_indices(a, 2), sum_sqr_p) b = gen_primes(n // 2, odd=1) self.assertEqual(len(b), n // 2) self.assertEqual(b.count() + 1, count) # +1 because of prime 2 self.assertEqual(b, a[1::2]) # ----------------------------- pprint() ---------------------------------- class PPrintTests(unittest.TestCase): @staticmethod def get_code_string(a): f = StringIO() pprint(a, stream=f) return f.getvalue() def round_trip(self, a): b = eval(self.get_code_string(a)) self.assertEqual(b, a) self.assertEqual(type(b), type(a)) def test_bitarray(self): a = bitarray('110') self.assertEqual(self.get_code_string(a), "bitarray('110')\n") self.round_trip(a) def test_frozenbitarray(self): a = frozenbitarray('01') self.assertEqual(self.get_code_string(a), "frozenbitarray('01')\n") self.round_trip(a) def test_formatting(self): a = bitarray(200) for width in range(40, 130, 10): for n in range(1, 10): f = StringIO() pprint(a, stream=f, group=n, width=width) r = f.getvalue() self.assertEqual(eval(r), a) s = r.strip("bitary(')\n") for group in s.split()[:-1]: self.assertEqual(len(group), n) for line in s.split('\n'): self.assertTrue(len(line) < width) def test_fallback(self): for a in None, 'asd', [1, 2], bitarray(), frozenbitarray('1'): self.round_trip(a) def test_subclass(self): class Foo(bitarray): pass a = Foo() code = self.get_code_string(a) self.assertEqual(code, "Foo()\n") b = eval(code) self.assertEqual(b, a) self.assertEqual(type(b), type(a)) def test_random(self): for n in range(150): self.round_trip(urandom(n)) def test_file(self): tmpdir = tempfile.mkdtemp() tmpfile = os.path.join(tmpdir, 'testfile') a = urandom_2(1000) try: with open(tmpfile, 'w') as fo: pprint(a, fo) with open(tmpfile, 'r') as fi: b = eval(fi.read()) self.assertEqual(a, b) finally: shutil.rmtree(tmpdir) # ----------------------------- strip() ----------------------------------- class StripTests(unittest.TestCase, Util): def test_simple(self): self.assertRaises(TypeError, strip, '0110') self.assertRaises(TypeError, strip, bitarray(), 123) self.assertRaises(ValueError, strip, bitarray(), 'up') for default_endian in 'big', 'little': _set_default_endian(default_endian) a = bitarray('00010110000') self.assertEQUAL(strip(a), bitarray('0001011')) self.assertEQUAL(strip(a, 'left'), bitarray('10110000')) self.assertEQUAL(strip(a, 'both'), bitarray('1011')) b = frozenbitarray('00010110000') c = strip(b, 'both') self.assertEqual(c, bitarray('1011')) self.assertEqual(type(c), frozenbitarray) def test_zeros_ones(self): for _ in range(50): n = randrange(10) mode = choice(['left', 'right', 'both']) a = zeros(n) c = strip(a, mode) self.assertEqual(type(c), bitarray) self.assertEqual(len(c), 0) self.assertEqual(a, zeros(n)) b = frozenbitarray(a) c = strip(b, mode) self.assertEqual(type(c), frozenbitarray) self.assertEqual(len(c), 0) a.setall(1) c = strip(a, mode) self.assertEqual(c, ones(n)) def test_random(self): for a in self.randombitarrays(): b = a.copy() f = frozenbitarray(a) s = a.to01() for mode, res in [ ('left', bitarray(s.lstrip('0'), a.endian)), ('right', bitarray(s.rstrip('0'), a.endian)), ('both', bitarray(s.strip('0'), a.endian)), ]: c = strip(a, mode) self.assertEQUAL(c, res) self.assertEqual(type(c), bitarray) self.assertEQUAL(a, b) c = strip(f, mode) self.assertEQUAL(c, res) self.assertEqual(type(c), frozenbitarray) self.assertEQUAL(f, b) def test_one_set(self): for _ in range(10): n = randint(1, 10000) a = bitarray(n) a.setall(0) a[randrange(n)] = 1 self.assertEqual(strip(a, 'both'), bitarray('1')) self.assertEqual(len(a), n) # ----------------------------- count_n() --------------------------------- class CountN_Tests(unittest.TestCase, Util): @staticmethod def count_n(a, n): "return lowest index i for which a[:i].count() == n" i, j = n, a.count(1, 0, n) while j < n: j += a[i] i += 1 return i def check_result(self, a, n, i, v=1): self.assertEqual(a.count(v, 0, i), n) if i == 0: self.assertEqual(n, 0) else: self.assertEqual(a[i - 1], v) def test_empty(self): a = bitarray() self.assertEqual(count_n(a, 0), 0) self.assertEqual(count_n(a, 0, 0), 0) self.assertEqual(count_n(a, 0, 1), 0) self.assertRaises(ValueError, count_n, a, 1) self.assertRaises(TypeError, count_n, '', 0) self.assertRaises(TypeError, count_n, a, 7.0) self.assertRaises(ValueError, count_n, a, 0, 2) self.assertRaisesMessage(ValueError, "n = 1 larger than bitarray " "length 0", count_n, a, 1) def test_simple(self): a = bitarray('111110111110111110111110011110111110111110111000') b = a.copy() self.assertEqual(len(a), 48) self.assertEqual(a.count(), 37) self.assertEqual(a.count(0), 11) self.assertEqual(count_n(a, 0), 0) self.assertEqual(count_n(a, 0, 0), 0) self.assertEqual(count_n(a, 2, 0), 12) self.assertEqual(count_n(a, 10, 0), 47) self.assertEqual(count_n(a, 20), 23) self.assertEqual(count_n(a, 20, 1), 23) self.assertEqual(count_n(a, 37), 45) # n < 0 self.assertRaisesMessage(ValueError, "non-negative integer expected", count_n, a, -1) # n > len(a) self.assertRaisesMessage(ValueError, "n = 49 larger than bitarray " "length 48", count_n, a, 49) # n > a.count(0) self.assertRaisesMessage(ValueError, "n = 12 exceeds total count " "(a.count(0) = 11)", count_n, a, 12, 0) # n > a.count(1) self.assertRaisesMessage(ValueError, "n = 38 exceeds total count " "(a.count(1) = 37)", count_n, a, 38, 1) for v in 0, 1: for n in range(a.count(v) + 1): i = count_n(a, n, v) self.check_result(a, n, i, v) self.assertEqual(a[:i].count(v), n) self.assertEqual(i, self.count_n(a if v else ~a, n)) self.assertEQUAL(a, b) def test_frozenbitarray(self): a = frozenbitarray('001111101111101111101111100111100') self.assertEqual(len(a), 33) self.assertEqual(a.count(), 24) self.assertEqual(count_n(a, 0), 0) self.assertEqual(count_n(a, 10), 13) self.assertEqual(count_n(a, 24), 31) self.assertRaises(ValueError, count_n, a, -1) # n < 0 self.assertRaises(ValueError, count_n, a, 25) # n > a.count() self.assertRaises(ValueError, count_n, a, 34) # n > len(a) for n in range(25): self.check_result(a, n, count_n(a, n)) def test_ones(self): n = randint(1, 100_000) a = ones(n) self.assertEqual(count_n(a, n), n) self.assertRaises(ValueError, count_n, a, 1, 0) self.assertRaises(ValueError, count_n, a, n + 1) for _ in range(20): i = randint(0, n) self.assertEqual(count_n(a, i), i) def test_one_set(self): n = randint(1, 100_000) a = zeros(n) self.assertEqual(count_n(a, 0), 0) self.assertRaises(ValueError, count_n, a, 1) for _ in range(20): a.setall(0) i = randrange(n) a[i] = 1 self.assertEqual(count_n(a, 1), i + 1) self.assertRaises(ValueError, count_n, a, 2) def test_last(self): for N in range(1, 1000): a = zeros(N) a[-1] = 1 self.assertEqual(count_n(a, 1), N) if N == 1: msg = "n = 2 larger than bitarray length 1" else: msg = "n = 2 exceeds total count (a.count(1) = 1)" self.assertRaisesMessage(ValueError, msg, count_n, a, 2) def test_primes(self): a = gen_primes(10_000) # there are 1229 primes below 10,000 self.assertEqual(a.count(), 1229) for n, p in [( 10, 29), # the 10th prime number is 29 ( 100, 541), # the 100th prime number is 541 (1000, 7919)]: # the 1000th prime number is 7919 self.assertEqual(count_n(a, n) - 1, p) def test_large(self): for _ in range(100): N = randint(100_000, 250_000) a = bitarray(N) v = getrandbits(1) a.setall(not v) for _ in range(randrange(100)): a[randrange(N)] = v tc = a.count(v) # total count i = count_n(a, tc, v) self.check_result(a, tc, i, v) n = tc + 1 self.assertRaisesMessage(ValueError, "n = %d exceeds total count " "(a.count(%d) = %d)" % (n, v, tc), count_n, a, n, v) for _ in range(20): n = randint(0, tc) i = count_n(a, n, v) self.check_result(a, n, i, v) # --------------------------------------------------------------------------- class BitwiseCountTests(unittest.TestCase, Util): def test_count_byte(self): for i in range(256): a = bitarray(bytearray([i])) cnt = a.count() self.assertEqual(count_and(a, zeros(8)), 0) self.assertEqual(count_and(a, ones(8)), cnt) self.assertEqual(count_and(a, a), cnt) self.assertEqual(count_or(a, zeros(8)), cnt) self.assertEqual(count_or(a, ones(8)), 8) self.assertEqual(count_or(a, a), cnt) self.assertEqual(count_xor(a, zeros(8)), cnt) self.assertEqual(count_xor(a, ones(8)), 8 - cnt) self.assertEqual(count_xor(a, a), 0) def test_1(self): a = bitarray('001111') aa = a.copy() b = bitarray('010011') bb = b.copy() self.assertEqual(count_and(a, b), 2) self.assertEqual(count_or(a, b), 5) self.assertEqual(count_xor(a, b), 3) for f in count_and, count_or, count_xor: # not two arguments self.assertRaises(TypeError, f) self.assertRaises(TypeError, f, a) self.assertRaises(TypeError, f, a, b, 3) # wrong argument types self.assertRaises(TypeError, f, a, '') self.assertRaises(TypeError, f, '1', b) self.assertRaises(TypeError, f, a, 4) self.assertEQUAL(a, aa) self.assertEQUAL(b, bb) b.append(1) for f in count_and, count_or, count_xor: self.assertRaises(ValueError, f, a, b) self.assertRaises(ValueError, f, bitarray('110', 'big'), bitarray('101', 'little')) def test_frozen(self): a = frozenbitarray('001111') b = frozenbitarray('010011') self.assertEqual(count_and(a, b), 2) self.assertEqual(count_or(a, b), 5) self.assertEqual(count_xor(a, b), 3) def test_random(self): for _ in range(100): n = randrange(1000) a = urandom_2(n) b = urandom(n, a.endian) self.assertEqual(count_and(a, b), (a & b).count()) self.assertEqual(count_or(a, b), (a | b).count()) self.assertEqual(count_xor(a, b), (a ^ b).count()) def test_misc(self): for a in self.randombitarrays(): n = len(a) b = urandom(n, a.endian) # any and self.assertEqual(any(a & b), count_and(a, b) > 0) self.assertEqual(any_and(a, b), any(a & b)) # any or self.assertEqual(any(a | b), count_or(a, b) > 0) self.assertEqual(any(a | b), any(a) or any(b)) # any xor self.assertEqual(any(a ^ b), count_xor(a, b) > 0) self.assertEqual(any(a ^ b), a != b) # all and self.assertEqual(all(a & b), count_and(a, b) == n) self.assertEqual(all(a & b), all(a) and all(b)) # all or self.assertEqual(all(a | b), count_or(a, b) == n) # all xor self.assertEqual(all(a ^ b), count_xor(a, b) == n) self.assertEqual(all(a ^ b), a == ~b) # --------------------------- any_and() ----------------------------------- class BitwiseAnyTests(unittest.TestCase, Util): def test_basic(self): a = frozenbitarray('0101') b = bitarray('0111') self.assertTrue(any_and(a, b)) self.assertRaises(TypeError, any_and) self.assertRaises(TypeError, any_and, a, 4) b.append(1) self.assertRaises(ValueError, any_and, a, b) self.assertRaises(ValueError, any_and, bitarray('01', 'little'), bitarray('11', 'big')) def test_overlap(self): n = 100 for _ in range(500): i1 = randint(0, n) j1 = randint(i1, n) r1 = range(i1, j1) i2 = randint(0, n) j2 = randint(i2, n) r2 = range(i2, j2) # test if ranges r1 and r2 overlap res1 = bool(r1) and bool(r2) and (i2 in r1 or i1 in r2) res2 = bool(set(r1) & set(r2)) self.assertEqual(res1, res2) a1, a2 = bitarray(n), bitarray(n) a1[i1:j1] = a2[i2:j2] = 1 self.assertEqual(any_and(a1, a2), res1) def test_common(self): n = 100 for _ in range(500): s1 = self.random_slice(n) s2 = self.random_slice(n) r1 = range(n)[s1] r2 = range(n)[s2] # test if ranges r1 and r2 have common items a1, a2 = bitarray(n), bitarray(n) a1[s1] = a2[s2] = 1 self.assertEqual(any_and(a1, a2), bool(set(r1) & set(r2))) def check(self, a, b): r = any_and(a, b) self.assertEqual(type(r), bool) self.assertEqual(r, any_and(b, a)) # symmetry self.assertEqual(r, any(a & b)) self.assertEqual(r, (a & b).any()) self.assertEqual(r, count_and(a, b) > 0) def test_explitcit(self): for a, b , res in [ ('', '', False), ('0', '1', False), ('0', '0', False), ('1', '1', True), ('00011', '11100', False), ('00001011 1', '01000100 1', True)]: a = bitarray(a) b = bitarray(b) self.assertTrue(any_and(a, b) is res) self.check(a, b) def test_random(self): for a in self.randombitarrays(): n = len(a) b = urandom(n, a.endian) self.check(a, b) def test_one(self): for n in range(1, 300): a = zeros(n) b = urandom(n) i = randrange(n) a[i] = 1 self.assertEqual(b[i], any_and(a, b)) # ---------------------------- subset() ----------------------------------- class SubsetTests(unittest.TestCase, Util): def test_basic(self): a = frozenbitarray('0101') b = bitarray('0111') self.assertTrue(subset(a, b)) self.assertFalse(subset(b, a)) self.assertRaises(TypeError, subset) self.assertRaises(TypeError, subset, a, '') self.assertRaises(TypeError, subset, '1', b) self.assertRaises(TypeError, subset, a, 4) b.append(1) self.assertRaises(ValueError, subset, a, b) self.assertRaises(ValueError, subset, bitarray('01', 'little'), bitarray('11', 'big')) def check(self, a, b, res): r = subset(a, b) self.assertEqual(type(r), bool) self.assertEqual(r, res) self.assertEqual(a | b == b, res) self.assertEqual(a & b == a, res) def test_True(self): for a, b in [('', ''), ('0', '1'), ('0', '0'), ('1', '1'), ('000', '111'), ('0101', '0111'), ('000010111', '010011111')]: self.check(bitarray(a), bitarray(b), True) def test_False(self): for a, b in [('1', '0'), ('1101', '0111'), ('0000101111', '0100111011')]: self.check(bitarray(a), bitarray(b), False) def test_random(self): for a in self.randombitarrays(start=1): b = a.copy() # we set one random bit in b to 1, so a is always a subset of b b[randrange(len(a))] = 1 self.check(a, b, True) # but b is only a subset when they are equal self.check(b, a, a == b) # we set all bits in a, which ensures that b is a subset of a a.setall(1) self.check(b, a, True) # ------------------------- correspond_all() ------------------------------ class CorrespondAllTests(unittest.TestCase): def test_basic(self): a = frozenbitarray('0101') b = bitarray('0111') self.assertTrue(correspond_all(a, b), (1, 1, 1, 1)) self.assertRaises(TypeError, correspond_all) b.append(1) self.assertRaises(ValueError, correspond_all, a, b) self.assertRaises(ValueError, correspond_all, bitarray('01', 'little'), bitarray('11', 'big')) def test_explitcit(self): for a, b, res in [ ('', '', (0, 0, 0, 0)), ('0000011111', '0000100111', (4, 1, 2, 3)), ]: self.assertEqual(correspond_all(bitarray(a), bitarray(b)), res) def test_random(self): for _ in range(100): n = randrange(3000) a = urandom_2(n) b = urandom(n, a.endian) res = correspond_all(a, b) self.assertEqual(res[0], count_and(~a, ~b)) self.assertEqual(res[1], count_and(~a, b)) self.assertEqual(res[2], count_and(a, ~b)) self.assertEqual(res[3], count_and(a, b)) self.assertEqual(res[0], n - count_or(a, b)) self.assertEqual(res[1] + res[2], count_xor(a, b)) self.assertEqual(sum(res), n) # ----------------------------- byteswap() -------------------------------- @skipIf(is_pypy) class ByteSwapTests(unittest.TestCase): def test_basic_bytearray(self): a = bytearray(b"ABCD") byteswap(a, 2) self.assertEqual(a, bytearray(b"BADC")) byteswap(a) self.assertEqual(a, bytearray(b"CDAB")) a = bytearray(b"ABCDEF") byteswap(a, 3) self.assertEqual(a, bytearray(b"CBAFED")) byteswap(a, 1) self.assertEqual(a, bytearray(b"CBAFED")) def test_basic_bitarray(self): a = bitarray("11110000 01010101") byteswap(a) self.assertEqual(a, bitarray("01010101 11110000")) a = bitarray("01111000 1001") b = a.copy() a.tobytes() # clear padbits byteswap(a) self.assertEqual(a, bitarray("10010000 0111")) byteswap(a) self.assertEqual(a, b) def test_basic_array(self): r = os.urandom(64) for typecode in array.typecodes: # type code 'u' is deprecated and will be removed in Python 3.16 if typecode == 'u': continue a = array.array(typecode, r) self.assertEqual(len(a) * a.itemsize, 64) a.byteswap() byteswap(a, a.itemsize) self.assertEqual(a.tobytes(), r) def test_empty(self): a = bytearray() byteswap(a) self.assertEqual(a, bytearray()) for n in range(10): byteswap(a, n) self.assertEqual(a, bytearray()) def test_one_byte(self): a = bytearray(b'\xab') byteswap(a) self.assertEqual(a, bytearray(b'\xab')) for n in range(2): byteswap(a, n) self.assertEqual(a, bytearray(b'\xab')) def test_errors(self): # buffer not writable for a in b"AB", frozenbitarray(16): self.assertRaises(BufferError, byteswap, a) a = bytearray(b"ABCD") b = bitarray(32) for n in -1, 3, 5, 6: # byte size not multiple of n self.assertRaises(ValueError, byteswap, a, n) self.assertRaises(ValueError, byteswap, b, n) def test_range(self): for n in range(20): for m in range(20): r = os.urandom(m * n) a = bytearray(r) byteswap(a, n) lst = [] for i in range(m): x = r[i * n:i * n + n] lst.extend(x[::-1]) self.assertEqual(a, bytearray(lst)) def test_reverse_bytearray(self): for n in range(100): r = os.urandom(n) a = bytearray(r) byteswap(a) self.assertEqual(a, bytearray(r[::-1])) def test_reverse_bitarray(self): for n in range(100): a = urandom(8 * n) b = a.copy() byteswap(a) a.bytereverse() self.assertEqual(a, b[::-1]) # ------------------------------ parity() --------------------------------- class ParityTests(unittest.TestCase): def test_explitcit(self): for s, res in [('', 0), ('1', 1), ('0010011', 1), ('10100110', 0)]: self.assertTrue(parity(bitarray(s)) is res) self.assertTrue(parity(frozenbitarray(s)) is res) def test_zeros_ones(self): for n in range(2000): self.assertEqual(parity(zeros(n)), 0) self.assertEqual(parity(ones(n)), n % 2) def test_random(self): endian = choice(["little", "big"]) a = bitarray(endian=endian) par = 0 for i in range(2000): self.assertEqual(parity(a), par) self.assertEqual(par, a.count() % 2) self.assertEqual(a.endian, endian) self.assertEqual(len(a), i) v = getrandbits(1) a.append(v) par ^= v def test_wrong_args(self): self.assertRaises(TypeError, parity, '') self.assertRaises(TypeError, parity, 1) self.assertRaises(TypeError, parity) self.assertRaises(TypeError, parity, bitarray("110"), 1) # ---------------------------- sum_indices() ------------------------------ class SumIndicesUtil(unittest.TestCase): def check_explicit(self, S): for s, r1, r2 in [ ("", 0, 0), ("0", 0, 0), ("1", 0, 0), ("11", 1, 1), ("011", 3, 5), ("001", 2, 4), ("0001100", 7, 25), ("00001111", 22, 126), ("01100111 1101", 49, 381), ]: for a in [bitarray(s, choice(['little', 'big'])), frozenbitarray(s, choice(['little', 'big']))]: self.assertEqual(S(a, 1), r1) self.assertEqual(S(a, 2), r2) self.assertEqual(a, bitarray(s)) def check_wrong_args(self, S): self.assertRaises(TypeError, S, '') self.assertRaises(TypeError, S, 1.0) self.assertRaises(TypeError, S) for mode in -1, 0, 3, 4: self.assertRaises(ValueError, S, bitarray("110"), mode) def check_urandom(self, S, n): a = urandom_2(n) self.assertEqual(S(a, 1), sum(i for i, v in enumerate(a) if v)) self.assertEqual(S(a, 2), sum(i * i for i, v in enumerate(a) if v)) def check_sparse(self, S, n, k, mode=1, freeze=False, inv=False): a = zeros(n, choice(['little', 'big'])) self.assertEqual(S(a, mode), 0) self.assertFalse(a.any()) indices = sample(range(n), k) a[indices] = 1 res = sum(indices) if mode == 1 else sum(i * i for i in indices) if inv: a.invert() sum_ones = 3 if mode == 1 else 2 * n - 1 sum_ones *= n * (n - 1) sum_ones //= 6 res = sum_ones - res if freeze: a = frozenbitarray(a) c = a.copy() self.assertEqual(a.count(), n - k if inv else k) self.assertEqual(S(a, mode), res) self.assertEqual(a, c) class SSQI_Tests(SumIndicesUtil): # Additional tests for _ssqi() in: devel/test_sum_indices.py def test_explicit(self): self.check_explicit(_ssqi) def test_wrong_args(self): self.check_wrong_args(_ssqi) def test_small(self): a = bitarray() sm1 = sm2 = 0 for i in range(100): v = getrandbits(1) a.append(v) if v: sm1 += i sm2 += i * i self.assertEqual(_ssqi(a, 1), sm1) self.assertEqual(_ssqi(a, 2), sm2) def test_urandom(self): self.check_urandom(_ssqi, 10_037) def test_sparse(self): for _ in range(5): mode = randint(1, 2) freeze = getrandbits(1) inv = getrandbits(1) self.check_sparse(_ssqi, n=1_000_003, k=400, mode=mode, freeze=freeze, inv=inv) class SumIndicesTests(SumIndicesUtil): # Additional tests in: devel/test_sum_indices.py def test_explicit(self): self.check_explicit(sum_indices) a = gen_primes(100) self.assertEqual(sum_indices(a, mode=1), 1_060) self.assertEqual(sum_indices(a, mode=2), 65_796) def test_wrong_args(self): self.check_wrong_args(sum_indices) def test_ones(self): for mode in 1, 2: self.check_sparse(sum_indices, n=1_600_037, k=0, mode=mode, freeze=True, inv=True) def test_sparse(self): for _ in range(20): n = choice([500_029, 600_011]) # below and above block size k = randrange(1_000) mode = randint(1, 2) freeze = getrandbits(1) inv = getrandbits(1) self.check_sparse(sum_indices, n, k, mode, freeze, inv) # --------------------------------------------------------------------------- class XoredIndicesTests(unittest.TestCase, Util): def test_explicit(self): for s, r in [("", 0), ("0", 0), ("1", 0), ("11", 1), ("011", 3), ("001", 2), ("0001100", 7), ("01100111 1101", 13)]: for a in [bitarray(s, self.random_endian()), frozenbitarray(s, self.random_endian())]: self.assertEqual(xor_indices(a), r) def test_wrong_args(self): X = xor_indices self.assertRaises(TypeError, X, '') self.assertRaises(TypeError, X, 1) self.assertRaises(TypeError, X) self.assertRaises(TypeError, X, bitarray("110"), 1) def test_ones(self): # OEIS A003815 lst = [0, 1, 3, 0, 4, 1, 7, 0, 8, 1, 11, 0, 12, 1, 15, 0, 16, 1, 19] self.assertEqual([xor_indices(ones(i)) for i in range(1, 20)], lst) a = bitarray() x = 0 for i in range(1000): a.append(1) x ^= i self.assertEqual(xor_indices(a), x) if i < 19: self.assertEqual(lst[i], x) def test_primes(self): # OEIS A126084 lst = [0, 2, 1, 4, 3, 8, 5, 20, 7, 16, 13, 18, 55, 30, 53, 26, 47] primes = gen_primes(1000) x = 0 for i, p in enumerate(primes.search(1)): self.assertEqual(xor_indices(primes[:p]), x) if i < 17: self.assertEqual(lst[i], x) x ^= p def test_large_random(self): n = 10_037 for a in [urandom_2(n), frozenbitarray(urandom_2(n))]: res = reduce(operator.xor, (i for i, v in enumerate(a) if v)) b = a.copy() self.assertEqual(xor_indices(a), res) self.assertEqual(a, b) def test_random(self): for a in self.randombitarrays(): c = 0 for i, v in enumerate(a): c ^= i * v self.assertEqual(xor_indices(a), c) def test_flips(self): a = bitarray(128) c = 0 for _ in range(1000): self.assertEqual(xor_indices(a), c) i = randrange(len(a)) a.invert(i) c ^= i def test_error_correct(self): parity_bits = [1, 2, 4, 8, 16, 32, 64, 128] # parity bit positions a = urandom(256) a[parity_bits] = 0 c = xor_indices(a) # set parity bits such that block is well prepared a[parity_bits] = int2ba(c, length=8, endian="little") for i in range(0, 256): self.assertEqual(xor_indices(a), 0) # ensure well prepared a.invert(i) self.assertEqual(xor_indices(a), i) # index of the flipped bit! a.invert(i) # ------------------ intervals of uninterrupted runs -------------------- def runs(a): "return number of uninterrupted intervals of 1s and 0s" n = len(a) if n < 2: return n return 1 + count_xor(a[:-1], a[1:]) class IntervalsTests(unittest.TestCase, Util): def test_explicit(self): for s, lst in [ ('', []), ('0', [(0, 0, 1)]), ('1', [(1, 0, 1)]), ('00111100 0000011', [(0, 0, 2), (1, 2, 6), (0, 6, 13), (1, 13, 15)]), ]: a = bitarray(s) self.assertEqual(list(intervals(a)), lst) self.assertEqual(runs(a), len(lst)) def test_uniform(self): for n in range(1, 100): for v in 0, 1: a = n * bitarray([v], self.random_endian()) self.assertEqual(list(intervals(a)), [(v, 0, n)]) self.assertEqual(runs(a), 1) def test_random(self): for a in self.randombitarrays(): n = len(a) b = urandom(n) for value, start, stop in intervals(a): self.assertFalse(isinstance(value, bool)) self.assertTrue(0 <= start < stop <= n) b[start:stop] = value self.assertEqual(a, b) def test_list_runs(self): for a in self.randombitarrays(): # list of length of runs of alternating bits alt_runs = [stop - start for _, start, stop in intervals(a)] self.assertEqual(len(alt_runs), runs(a)) b = bitarray() v = a[0] if a else None # value of first run for length in alt_runs: self.assertTrue(length > 0) b.extend(length * bitarray([v])) v = not v self.assertEqual(a, b) # -------------------------- ba2hex() hex2ba() --------------------------- class HexlifyTests(unittest.TestCase, Util): def test_explicit(self): data = [ # little big ('', '', ''), ('1000', '1', '8'), ('0101 0110', 'a6', '56'), ('0100 1001 1101', '29b', '49d'), ('0000 1100 1110 1111', '037f', '0cef'), ] for bs, hex_le, hex_be in data: a_be = bitarray(bs, 'big') a_le = bitarray(bs, 'little') self.assertEQUAL(hex2ba(hex_be, 'big'), a_be) self.assertEQUAL(hex2ba(hex_le, 'little'), a_le) self.assertEqual(ba2hex(a_be), hex_be) self.assertEqual(ba2hex(a_le), hex_le) def test_ba2hex_group(self): a = bitarray('1000 0000 0101 1111', 'little') self.assertEqual(ba2hex(a), "10af") self.assertEqual(ba2hex(a, 0), "10af") self.assertEqual(ba2hex(a, 1, ""), "10af") self.assertEqual(ba2hex(a, 1), "1 0 a f") self.assertEqual(ba2hex(a, group=2), "10 af") self.assertEqual(ba2hex(a, 2, "-"), "10-af") self.assertEqual(ba2hex(a, group=3, sep="_"), "10a_f") self.assertEqual(ba2hex(a, 3, sep=", "), "10a, f") def test_ba2hex_errors(self): self.assertRaises(TypeError, ba2hex) self.assertRaises(TypeError, ba2hex, None) self.assertRaises(TypeError, ba2hex, '101') # length not multiple of 4 self.assertRaises(ValueError, ba2hex, bitarray('10')) a = bitarray('1000 0000 0101 1111', 'little') self.assertRaises(ValueError, ba2hex, a, -1) self.assertRaises(ValueError, ba2hex, a, group=-1) # sep not str self.assertRaises(TypeError, ba2hex, a, 1, b" ") # embedded null character in sep self.assertRaises(ValueError, ba2hex, a, 2, " \0") def test_hex2ba_whitespace(self): _set_default_endian('big') self.assertEqual(hex2ba("F1 FA %s f3 c0" % whitespace), bitarray("11110001 11111010 11110011 11000000")) self.assertEQUAL(hex2ba(b' a F ', 'big'), bitarray('1010 1111', 'big')) self.assertEQUAL(hex2ba(860 * " " + '0 1D' + 590 * " ", 'little'), bitarray('0000 1000 1011', 'little')) def test_hex2ba_errors(self): self.assertRaises(TypeError, hex2ba, 0) self.assertRaises(TypeError, hex2ba, "F", 1) self.assertRaises(ValueError, hex2ba, "F", "foo") for s in '01a7g89', '0\u20ac', '0 \0', b'\x00': self.assertRaises(ValueError, hex2ba, s) for s in 'g', 'ag', 'aag' 'aaaga', 'ag': msg = "invalid digit found for base16, got 'g' (0x67)" self.assertRaisesMessage(ValueError, msg, hex2ba, s, 'big') def test_hex2ba_types(self): for c in 'e', 'E', b'e', b'E', bytearray(b'e'), bytearray(b'E'): a = hex2ba(c, "big") self.assertEqual(a.to01(), '1110') self.assertEqual(a.endian, 'big') self.assertEqual(type(a), bitarray) def test_random(self): for _ in range(100): default_endian = self.random_endian() _set_default_endian(default_endian) endian = choice(["little", "big", None]) a = urandom_2(4 * randrange(100), endian) s = ba2hex(a, group=randrange(10), sep=choice(whitespace)) b = hex2ba(s, endian) self.assertEqual(b.endian, endian or default_endian) self.assertEqual(a, b) self.check_obj(b) def test_hexdigits(self): a = hex2ba(hexdigits) self.assertEqual(len(a), 4 * len(hexdigits)) self.assertEqual(type(a), bitarray) self.check_obj(a) t = ba2hex(a) self.assertEqual(t, hexdigits.lower()) self.assertEqual(type(t), str) self.assertEQUAL(a, hex2ba(t)) def test_binascii(self): a = urandom(80, 'big') s = binascii.hexlify(a.tobytes()).decode() self.assertEqual(ba2hex(a), s) b = bitarray(binascii.unhexlify(s), endian='big') self.assertEQUAL(hex2ba(s, 'big'), b) # -------------------------- ba2base() base2ba() ------------------------- class BaseTests(unittest.TestCase, Util): def test_explicit(self): data = [ # n little big ('', 2, '', ''), ('1 0 1', 2, '101', '101'), ('11 01 00', 4, '320', '310'), ('111 001', 8, '74', '71'), ('1111 0001', 16, 'f8', 'f1'), ('11111 00001', 32, '7Q', '7B'), ('111111 000001', 64, '/g', '/B'), ] for bs, n, s_le, s_be in data: a_le = bitarray(bs, 'little') a_be = bitarray(bs, 'big') self.assertEQUAL(base2ba(n, s_le, 'little'), a_le) self.assertEQUAL(base2ba(n, s_be, 'big'), a_be) self.assertEqual(ba2base(n, a_le), s_le) self.assertEqual(ba2base(n, a_be), s_be) def test_base2ba_types(self): for c in '7', b'7', bytearray(b'7'): a = base2ba(32, c) self.assertEqual(a.to01(), '11111') self.assertEqual(type(a), bitarray) def test_base2ba_whitespace(self): self.assertEqual(base2ba(8, bytearray(b"17 0"), "little"), bitarray("100 111 000")) self.assertEqual(base2ba(32, "7 A"), bitarray("11111 00000")) self.assertEqual(base2ba(64, b"A /"), bitarray("000000 111111")) for n in 2, 4, 8, 16, 32, 64: a = base2ba(n, whitespace) self.assertEqual(a, bitarray()) a = urandom(60) c = list(ba2base(n, a)) for _ in range(randrange(80)): c.insert(randint(0, len(c)), choice(whitespace)) s = ''.join(c) self.assertEqual(base2ba(n, s), a) def test_ba2base_group(self): a = bitarray("001 011 100 111", "little") self.assertEqual(ba2base(8, a, 3), "461 7") self.assertEqual(ba2base(8, a, group=2), "46 17") self.assertEqual(ba2base(8, a, sep="_", group=2), "46_17") self.assertEqual(ba2base(8, a, 2, sep="."), "46.17") for n, s, group, sep, res in [ (2, '10100', 2, '-', '10-10-0'), (4, '10 11 00 01', 1, "_", "2_3_0_1"), (8, "101 100 011 101 001 010", 3, " ", "543 512"), (8, "101 100 011 101 001 010", 3, "", "543512"), (16, '1011 0001 1101 1010 1111', 4, "+", "b1da+f"), (32, "10110 00111 01101 01111", 2, ", ", "WH, NP"), (64, "101100 011101 101011 111110 101110", 2, ".", "sd.r+.u"), ]: a = bitarray(s, "big") s = ba2base(n, a, group, sep) self.assertEqual(type(s), str) self.assertEqual(s, res) def test_empty(self): for n in 2, 4, 8, 16, 32, 64: a = base2ba(n, '') self.assertEqual(a, bitarray()) self.assertEqual(ba2base(n, a), '') def test_invalid_characters(self): for n, s in ((2, '2'), (4, '4'), (8, '8'), (16, 'g'), (32, '8'), (32, '1'), (32, 'a'), (64, '-'), (64, '_')): msg = ("invalid digit found for base%d, " "got '%s' (0x%02x)" % (n, s, ord(s))) self.assertRaisesMessage(ValueError, msg, base2ba, n, s) for n in 2, 4, 8, 16, 32, 64: for s in '_', '@', '[', '\u20ac', '\0', b'\0', b'\x80', b'\xff': self.assertRaises(ValueError, base2ba, n, s) msg = "invalid digit found for base%d, got '{' (0x7b)" % n self.assertRaisesMessage(ValueError, msg, base2ba, n, '{') def test_invalid_args(self): a = bitarray() self.assertRaises(TypeError, ba2base, None, a) self.assertRaises(TypeError, base2ba, None, '') self.assertRaises(TypeError, ba2base, 16.0, a) self.assertRaises(TypeError, base2ba, 16.0, '') self.assertRaises(TypeError, ba2base, 32, None) self.assertRaises(TypeError, base2ba, 32, None) for values, msg in [ ([-1023, -16, -1, 0, 3, 5, 31, 48, 63, 129, 511, 4123], "base must be a power of 2"), ([1, 128, 256, 512, 1024, 2048, 4096, 8192], "base must be 2, 4, 8, 16, 32 or 64")]: for i in values: self.assertRaisesMessage(ValueError, msg, ba2base, i, a) self.assertRaisesMessage(ValueError, msg, base2ba, i, '') a = bitarray(29) for m in range(2, 7): msg = "bitarray length 29 not multiple of %d" % m self.assertRaisesMessage(ValueError, msg, ba2base, 1 << m, a) def test_hexadecimal(self): a = base2ba(16, 'F61', 'big') self.assertEqual(a, bitarray('1111 0110 0001')) self.assertEqual(ba2base(16, a), 'f61') for n in range(50): s = ''.join(choice(hexdigits) for _ in range(n)) endian = self.random_endian() a = base2ba(16, s, endian) self.assertEQUAL(a, hex2ba(s, endian)) self.assertEqual(ba2base(16, a), ba2hex(a)) def test_base32(self): msg = os.urandom(randint(10, 100) * 5) s = base64.b32encode(msg).decode() a = base2ba(32, s, 'big') self.assertEqual(a.tobytes(), msg) self.assertEqual(ba2base(32, a), s) self.assertEqual(base64.b32decode(s), msg) def test_base64(self): msg = os.urandom(randint(10, 100) * 3) s = base64.standard_b64encode(msg).decode() a = base2ba(64, s, 'big') self.assertEqual(a.tobytes(), msg) self.assertEqual(ba2base(64, a), s) self.assertEqual(base64.standard_b64decode(s), msg) def test_primes(self): primes = gen_primes(60, odd=True) base_2 = primes.to01() for n, endian, rep in [ ( 2, "little", base_2), ( 2, "big", base_2), ( 4, "little", "232132030132012122122010132110"), ( 4, "big", "131231030231021211211020231220"), ( 8, "little", "65554155441515405550"), ( 8, "big", "35551455114545105550"), (16, "little", "e6bc4b46a921d61"), (16, "big", "76d32d265948b68"), (32, "little", "O3SJLSJTSI3C"), (32, "big", "O3JS2JSZJC3I"), (64, "little", "utMtkppEtF"), (64, "big", "dtMtJllIto"), ]: a = bitarray(primes, endian) s = ba2base(n, a) self.assertEqual(type(s), str) self.assertEqual(s, rep) b = base2ba(n, rep, endian) self.assertEqual(b, a) self.assertEqual(type(b), bitarray) self.assertEqual(b.endian, endian) alphabets = [ # m n alphabet (1, 2, '01'), (2, 4, '0123'), (3, 8, '01234567'), (4, 16, '0123456789abcdef'), (4, 16, '0123456789ABCDEF'), (5, 32, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'), (6, 64, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef' 'ghijklmnopqrstuvwxyz0123456789+/'), ] def test_alphabets(self): for m, n, alphabet in self.alphabets: self.assertEqual(1 << m, n) self.assertEqual(len(alphabet), n) for i, c in enumerate(alphabet): endian = self.random_endian() self.assertEqual(ba2int(base2ba(n, c, endian)), i) if m == 4 and c in "ABCDEF": c = chr(ord(c) + 32) self.assertEqual(ba2base(n, int2ba(i, m, endian)), c) def test_not_alphabets(self): for m, n, alphabet in self.alphabets: for i in range(256): c = chr(i) if c in alphabet or c.isspace(): continue if n == 16 and c in hexdigits: continue self.assertRaises(ValueError, base2ba, n, c) def test_random(self): for _ in range(100): m = randint(1, 6) a = urandom_2(m * randrange(100)) n = 1 << m s = ba2base(n, a, group=randrange(10), sep=randrange(5) * " ") if m == 4 and getrandbits(1): s = s.upper() if getrandbits(1): s = s.encode() b = base2ba(n, s, a.endian) self.assertEQUAL(a, b) self.check_obj(b) # --------------------------- sparse compression ---------------------------- class SC_Tests(unittest.TestCase, Util): def test_explicit(self): for b, bits, endian in [ (b'\x00\0', '', 'little'), (b'\x01\x03\x01\x03\0', '110', 'little'), (b'\x01\x07\x01\x40\0', '0000001', 'little'), (b'\x11\x07\x01\x02\0', '0000001', 'big'), (b'\x01\x10\x02\xf0\x0f\0', '00001111 11110000', 'little'), (b'\x11\x10\xa1\x0c\0', '00000000 00001000', 'big'), (b'\x11\x09\xa1\x08\0', '00000000 1', 'big'), (b'\x01g\xa4abde\0', 97 * '0' + '110110', 'little'), ]: a = bitarray(bits, endian) self.assertEqual(sc_encode(a), b) self.assertEQUAL(sc_decode(b), a) def test_encode_types(self): for a in bitarray('1', 'big'), frozenbitarray('1', 'big'): b = sc_encode(a) self.assertEqual(type(b), bytes) self.assertEqual(b, b'\x11\x01\x01\x80\0') for a in None, [], 0, 123, b'', b'\x00', 3.14: self.assertRaises(TypeError, sc_encode, a) def test_decode_types(self): blob = b'\x11\x03\x01\x20\0' for b in blob, bytearray(blob), list(blob), array.array('B', blob): a = sc_decode(b) self.assertEqual(type(a), bitarray) self.assertEqual(a.endian, 'big') self.assertEqual(a.to01(), '001') a = [17, 3, 1, 32, 0] self.assertEqual(sc_decode(a), bitarray("001")) for x in 256, -1: a[-1] = x self.assertRaises(ValueError, sc_decode, a) self.assertRaises(TypeError, sc_decode, [0x02, None]) for x in None, 3, 3.2, Ellipsis, 'foo': self.assertRaises(TypeError, sc_decode, x) def test_decode_header_nbits(self): for b, n in [ (b'\x00\0', 0), (b'\x01\x00\0', 0), (b'\x01\x01\0', 1), (b'\x02\x00\x00\0', 0), (b'\x02\x00\x01\0', 256), (b'\x03\x00\x00\x00\0', 0), (b'\x03\x00\x00\x01\0', 65536), ]: a = sc_decode(b) self.assertEqual(len(a), n) self.assertFalse(a.any()) def test_decode_untouch(self): stream = iter(b'\x01\x03\x01\x03\0XYZ') self.assertEqual(sc_decode(stream), bitarray('110')) self.assertEqual(next(stream), ord('X')) stream = iter([0x11, 0x05, 0x01, 0xff, 0, None, 'foo']) self.assertEqual(sc_decode(stream), bitarray('11111')) self.assertTrue(next(stream) is None) self.assertEqual(next(stream), 'foo') def test_decode_header_errors(self): # invalid header for c in 0x20, 0x21, 0x40, 0x80, 0xc0, 0xf0, 0xff: self.assertRaisesMessage(ValueError, "invalid header: 0x%02x" % c, sc_decode, [c]) # invalid block head for c in 0xc0, 0xc1, 0xc5, 0xff: self.assertRaisesMessage(ValueError, "invalid block head: 0x%02x" % c, sc_decode, [0x01, 0x10, c]) def test_decode_header_overflow(self): self.assertRaisesMessage( OverflowError, "sizeof(Py_ssize_t) = %d: cannot read 9 bytes" % PTRSIZE, sc_decode, b'\x09' + 9 * b'\x00') self.assertRaisesMessage( ValueError, "read %d bytes got negative value: -1" % PTRSIZE, sc_decode, [PTRSIZE] + PTRSIZE * [0xff]) if PTRSIZE == 4: self.assertRaisesMessage( OverflowError, "sizeof(Py_ssize_t) = 4: cannot read 5 bytes", sc_decode, b'\x05' + 5 * b'\x00') self.assertRaisesMessage( ValueError, "read 4 bytes got negative value: -2147483648", sc_decode, b'\x04\x00\x00\x00\x80') def test_decode_errors(self): # too many raw bytes self.assertRaisesMessage( ValueError, "decode error (raw): 0 + 2 > 1", sc_decode, b"\x01\x05\x02\xff\xff\0") self.assertRaisesMessage( ValueError, "decode error (raw): 32 + 3 > 34", sc_decode, b"\x02\x0f\x01\xa0\x03\xff\xff\xff\0") # sparse index too high self.assertRaisesMessage( ValueError, "decode error (n=1): 128 >= 128", sc_decode, b"\x01\x80\xa1\x80\0") self.assertRaisesMessage( ValueError, "decode error (n=2): 512 >= 512", sc_decode, b"\x02\x00\x02\xc2\x01\x00\x02\0") self.assertRaisesMessage( ValueError, "decode error (n=3): 32768 >= 32768", sc_decode, b"\x02\x00\x80\xc3\x01\x00\x80\x00\0") msg = {4: "read 4 bytes got negative value: -2147483648", 8: "decode error (n=4): 2147483648 >= 16"} self.assertRaisesMessage( ValueError, msg[PTRSIZE], sc_decode, b"\x01\x10\xc4\x01\x00\x00\x00\x80\0") msg = {4: "read 4 bytes got negative value: -1", 8: "decode error (n=4): 4294967295 >= 16"} self.assertRaisesMessage( ValueError, msg[PTRSIZE], sc_decode, b"\x01\x10\xc4\x01\xff\xff\xff\xff\0") def test_decode_end_of_stream(self): for stream in [b'', b'\x00', b'\x01', b'\x02\x77', b'\x01\x04\x01', b'\x01\x04\xa1', b'\x01\x04\xa0']: self.assertRaises(StopIteration, sc_decode, stream) def test_decode_ambiguity(self): for b in [ # raw: b'\x11\x03\x01\x20\0', # this is what sc_encode gives us b'\x11\x03\x01\x3f\0', # but we can set the pad bits to 1 # sparse: b'\x11\x03\xa1\x02\0', # block type 1 b'\x11\x03\xc2\x01\x02\x00\0', # block type 2 b'\x11\x03\xc3\x01\x02\x00\x00\0', # block type 3 b'\x11\x03\xc4\x01\x02\x00\x00\x00\0', # block type 4 ]: a = sc_decode(b) self.assertEqual(a.to01(), '001') def test_block_type0(self): for k in range(0x01, 0xa0): nbytes = k if k <= 32 else 32 * (k - 31) nbits = 8 * nbytes a = ones(nbits, "little") b = bytearray([0x01, nbits] if nbits < 256 else [0x02, nbits % 256, nbits // 256]) b.append(k) b.extend(a.tobytes()) b.append(0) # stop byte self.assertEqual(sc_decode(b), a) self.assertEqual(sc_encode(a), b) def test_block_type1(self): a = bitarray(256, 'little') for n in range(1, 32): a[getrandbits(8)] = 1 b = bytearray([0x02, 0x00, 0x01, 0xa0 + a.count()]) b.extend(list(a.search(1))) # sorted indices with no duplicates b.append(0) # stop byte self.assertEqual(sc_decode(b), a) self.assertEqual(sc_encode(a), b) def test_block_type2(self): a = bitarray(65536, 'little') for n in range(1, 256): a[getrandbits(16)] = 1 b = bytearray([0x03, 0x00, 0x00, 0x01, 0xc2, a.count()]) for i in a.search(1): b.extend(struct.pack("= 2 * 4) indices = sorted(set(randrange(len(a)) for _ in range(5))) a[indices] = 1 b = bytearray(b'\x04\x00\x00\x00\x04\xc4') b.append(len(indices)) for i in indices: b.extend(struct.pack(" 9) # count byte and second index byte m += bool(i > 16) # third index byte m += bool(i > 24) # fourth index byte self.check_blob_length(a, m) def test_encode_ones(self): for _ in range(10): nbits = randrange(100_000) a = ones(nbits) m = 2 # head byte and stop byte m += bits2bytes(nbits.bit_length()) # size bytes nbytes = bits2bytes(nbits) m += nbytes # actual raw bytes # number of head bytes, all of block type 0: m += bool(nbytes % 32) # number in 0x01 .. 0x1f m += (nbytes // 32 + 127) // 128 # number in 0x20 .. 0xbf self.check_blob_length(a, m) def round_trip(self, a): c = a.copy() i = iter(sc_encode(a)) b = sc_decode(i) self.assertTrue(a == b == c) self.assertTrue(a.endian == b.endian == c.endian) self.assertEqual(list(i), []) def test_random(self): for _ in range(10): n = randrange(100_000) endian = self.random_endian() a = ones(n, endian) while a.count(): a &= urandom(n, endian) self.round_trip(a) # --------------------------------------------------------------------------- class VLFTests(unittest.TestCase, Util): def test_explicit(self): for blob, s in [ (b'\x40', ''), (b'\x30', '0'), (b'\x38', '1'), (b'\x00', '0000'), (b'\x01', '0001'), (b'\xd3\x20', '001101'), (b'\xe0\x40', '0000 1'), (b'\x90\x02', '0000 000001'), (b'\xb5\xa7\x18', '0101 0100111 0011'), (b'\x95\xb7\x1c', '0101 0110111 001110'), ]: default_endian = self.random_endian() _set_default_endian(default_endian) a = bitarray(s) self.assertEqual(vl_encode(a), blob) c = vl_decode(blob) self.assertEqual(c, a) self.assertEqual(c.endian, default_endian) for endian in 'big', 'little', None: a = bitarray(s, endian) c = vl_encode(a) self.assertEqual(type(c), bytes) self.assertEqual(c, blob) c = vl_decode(blob, endian) self.assertEqual(c, a) self.assertEqual(c.endian, endian or default_endian) def test_encode_types(self): s = "0011 01" for a in bitarray(s), frozenbitarray(s): b = vl_encode(a) self.assertEqual(type(b), bytes) self.assertEqual(b, b'\xd3\x20') for a in None, [], 0, 123, b'', b'\x00', 3.14: self.assertRaises(TypeError, vl_encode, a) def test_decode_types(self): blob = b'\xd3\x20' for s in (blob, iter(blob), memoryview(blob), iter([0xd3, 0x20]), bytearray(blob)): a = vl_decode(s, endian=self.random_endian()) self.assertEqual(type(a), bitarray) self.assertEqual(a, bitarray('0011 01')) # these objects are not iterable for arg in None, 0, 1, 0.0: self.assertRaises(TypeError, vl_decode, arg) # these items cannot be interpreted as ints for item in None, 2.34, Ellipsis, 'foo': self.assertRaises(TypeError, vl_decode, iter([0x95, item])) def test_decode_args(self): # item not integer self.assertRaises(TypeError, vl_decode, iter([b'\x40'])) self.assertRaises(TypeError, vl_decode, b'\x40', 'big', 3) self.assertRaises(ValueError, vl_decode, b'\x40', 'foo') def test_decode_trailing(self): for s, bits in [(b'\x40ABC', ''), (b'\xe0\x40A', '00001')]: stream = iter(s) self.assertEqual(vl_decode(stream), bitarray(bits)) self.assertEqual(next(stream), 65) def test_decode_ambiguity(self): for s in b'\x40', b'\x4f', b'\x45': self.assertEqual(vl_decode(s), bitarray()) for s in b'\x1e', b'\x1f': self.assertEqual(vl_decode(s), bitarray('111')) def test_decode_stream(self): stream = iter(b'\x40\x30\x38\x40\x2c\xe0\x40\xd3\x20') for bits in '', '0', '1', '', '11', '0000 1', '0011 01': self.assertEqual(vl_decode(stream), bitarray(bits)) arrays = [urandom(randrange(30)) for _ in range(1000)] stream = iter(b''.join(vl_encode(a) for a in arrays)) for a in arrays: self.assertEqual(vl_decode(stream), a) def test_decode_errors(self): # decode empty bytes self.assertRaises(StopIteration, vl_decode, b'') # invalid head byte for s in [ b'\x70', b'\xf0', # padding = 7 b'\x50', b'\x60', b'\x70', # no second byte, but padding > 4 ]: self.assertRaisesMessage(ValueError, "invalid head byte: 0x%02x" % s[0], vl_decode, s) # high bit set, but no terminating byte for s in b'\x80', b'\x80\x80': self.assertRaises(StopIteration, vl_decode, s) # decode list with out of range items for i in -1, 256: self.assertRaises(ValueError, vl_decode, [i]) # wrong type self.assertRaises(TypeError, vl_decode, [None]) def test_decode_invalid_stream(self): N = 100 s = iter(N * (3 * [0x80] + ['XX']) + ['end.']) for _ in range(N): a = None try: a = vl_decode(s) except TypeError: pass self.assertTrue(a is None) self.assertEqual(next(s), 'end.') def test_explicit_zeros(self): for n in range(100): a = zeros(4 + n * 7) s = n * b'\x80' + b'\x00' self.assertEqual(vl_encode(a), s) self.assertEqual(vl_decode(s), a) def round_trip(self, a): c = a.copy() s = vl_encode(a) b = vl_decode(s) self.check_obj(b) self.assertTrue(a == b == c) LEN_PAD_BITS = 3 self.assertEqual(len(s), (len(a) + LEN_PAD_BITS + 6) // 7) head = s[0] padding = (head & 0x70) >> 4 self.assertEqual(len(a) + padding, 7 * len(s) - LEN_PAD_BITS) def test_large(self): for _ in range(10): a = urandom(randrange(100_000)) self.round_trip(a) def test_random(self): for a in self.randombitarrays(): self.round_trip(a) # --------------------------------------------------------------------------- class IntegerizationTests(unittest.TestCase, Util): def test_ba2int(self): self.assertEqual(ba2int(bitarray('0')), 0) self.assertEqual(ba2int(bitarray('1')), 1) self.assertEqual(ba2int(bitarray('00101', 'big')), 5) self.assertEqual(ba2int(bitarray('00101', 'little')), 20) self.assertEqual(ba2int(frozenbitarray('11')), 3) self.assertRaises(ValueError, ba2int, bitarray()) self.assertRaises(ValueError, ba2int, frozenbitarray()) self.assertRaises(TypeError, ba2int, '101') a = bitarray('111') b = a.copy() self.assertEqual(ba2int(a), 7) # ensure original object wasn't altered self.assertEQUAL(a, b) def test_ba2int_frozen(self): for a in self.randombitarrays(start=1): b = frozenbitarray(a) self.assertEqual(ba2int(b), ba2int(a)) self.assertEQUAL(a, b) def test_ba2int_random(self): for a in self.randombitarrays(start=1): b = bitarray(a, 'big') self.assertEqual(a, b) self.assertEqual(ba2int(b), int(b.to01(), 2)) def test_ba2int_bytes(self): for n in range(1, 50): a = urandom_2(8 * n) c = bytearray(a.tobytes()) i = 0 for x in (c if a.endian == 'big' else reversed(c)): i <<= 8 i |= x self.assertEqual(ba2int(a), i) def test_int2ba(self): self.assertEqual(int2ba(0), bitarray('0')) self.assertEqual(int2ba(1), bitarray('1')) self.assertEqual(int2ba(5), bitarray('101')) self.assertEQUAL(int2ba(6, endian='big'), bitarray('110', 'big')) self.assertEQUAL(int2ba(6, endian='little'), bitarray('011', 'little')) self.assertRaises(TypeError, int2ba, 1.0) self.assertRaises(TypeError, int2ba, 1, 3.0) self.assertRaises(ValueError, int2ba, 1, 0) self.assertRaises(TypeError, int2ba, 1, 10, 123) self.assertRaises(ValueError, int2ba, 1, 10, 'asd') # signed integer requires length self.assertRaises(TypeError, int2ba, 100, signed=True) def test_signed(self): for s, i in [ ('0', 0), ('1', -1), ('00', 0), ('10', 1), ('01', -2), ('11', -1), ('000', 0), ('100', 1), ('010', 2), ('110', 3), ('001', -4), ('101', -3), ('011', -2), ('111', -1), ('00000', 0), ('11110', 15), ('00001', -16), ('11111', -1), ('00000000 0', 0), ('11111111 0', 255), ('00000000 1', -256), ('11111111 1', -1), ]: self.assertEqual(ba2int(bitarray(s, 'little'), signed=1), i) self.assertEqual(ba2int(bitarray(s[::-1], 'big'), signed=1), i) len_s = len(bitarray(s)) self.assertEQUAL(int2ba(i, len_s, 'little', signed=1), bitarray(s, 'little')) self.assertEQUAL(int2ba(i, len_s, 'big', signed=1), bitarray(s[::-1], 'big')) def test_zero(self): for endian in "little", "big": a = int2ba(0, endian=endian) self.assertEQUAL(a, bitarray('0', endian=endian)) for n in range(1, 100): a = int2ba(0, length=n, endian=endian, signed=True) b = bitarray(n * '0', endian) self.assertEQUAL(a, b) for signed in 0, 1: self.assertEqual(ba2int(b, signed=signed), 0) def test_negative_one(self): for endian in "little", "big": for n in range(1, 100): a = int2ba(-1, length=n, endian=endian, signed=True) b = bitarray(n * '1', endian) self.assertEQUAL(a, b) self.assertEqual(ba2int(b, signed=True), -1) def test_int2ba_overflow(self): self.assertRaises(OverflowError, int2ba, -1) self.assertRaises(OverflowError, int2ba, -1, 4) self.assertRaises(OverflowError, int2ba, 128, 7) self.assertRaises(OverflowError, int2ba, 64, 7, signed=1) self.assertRaises(OverflowError, int2ba, -65, 7, signed=1) for n in range(1, 20): self.assertRaises(OverflowError, int2ba, 1 << n, n) self.assertRaises(OverflowError, int2ba, 1 << (n - 1), n, signed=1) self.assertRaises(OverflowError, int2ba, -(1 << (n - 1)) - 1, n, signed=1) def test_int2ba_length(self): self.assertRaises(TypeError, int2ba, 0, 1.0) self.assertRaises(ValueError, int2ba, 0, 0) self.assertEqual(int2ba(5, length=6, endian='big'), bitarray('000101')) for n in range(1, 100): ab = int2ba(1, n, 'big') al = int2ba(1, n, 'little') self.assertEqual(ab.endian, 'big') self.assertEqual(al.endian, 'little') self.assertEqual(len(ab), n), self.assertEqual(len(al), n) self.assertEqual(ab, bitarray((n - 1) * '0') + bitarray('1')) self.assertEqual(al, bitarray('1') + bitarray((n - 1) * '0')) ab = int2ba(0, n, 'big') al = int2ba(0, n, 'little') self.assertEqual(len(ab), n) self.assertEqual(len(al), n) self.assertEqual(ab, bitarray(n * '0', 'big')) self.assertEqual(al, bitarray(n * '0', 'little')) self.assertEqual(int2ba(2 ** n - 1), bitarray(n * '1')) self.assertEqual(int2ba(2 ** n - 1, endian='little'), bitarray(n * '1')) def test_explicit(self): _set_default_endian('big') for i, sa in [( 0, '0'), (1, '1'), ( 2, '10'), (3, '11'), (25, '11001'), (265, '100001001'), (3691038, '1110000101001000011110')]: ab = bitarray(sa, 'big') al = bitarray(sa[::-1], 'little') self.assertEQUAL(int2ba(i), ab) self.assertEQUAL(int2ba(i, endian='big'), ab) self.assertEQUAL(int2ba(i, endian='little'), al) self.assertEqual(ba2int(ab), ba2int(al), i) def check_round_trip(self, i): for endian in 'big', 'little': a = int2ba(i, endian=endian) self.check_obj(a) self.assertEqual(a.endian, endian) self.assertTrue(len(a) > 0) # ensure we have no leading zeros if a.endian == 'big': self.assertTrue(len(a) == 1 or a.index(1) == 0) self.assertEqual(ba2int(a), i) if i > 0: self.assertEqual(i.bit_length(), len(a)) # add a few trailing / leading zeros to bitarray if endian == 'big': a = zeros(randrange(4), endian) + a else: a = a + zeros(randrange(4), endian) self.assertEqual(a.endian, endian) self.assertEqual(ba2int(a), i) def test_many(self): for _ in range(20): self.check_round_trip(randrange(10 ** randint(3, 300))) @staticmethod def twos_complement(i, num_bits): # https://en.wikipedia.org/wiki/Two%27s_complement mask = 2 ** (num_bits - 1) return -(i & mask) + (i & ~mask) def test_random_signed(self): for a in self.randombitarrays(start=1): i = ba2int(a, signed=True) b = int2ba(i, len(a), a.endian, signed=True) self.assertEQUAL(a, b) j = ba2int(a, signed=False) # unsigned if i >= 0: self.assertEqual(i, j) self.assertEqual(i, self.twos_complement(j, len(a))) # --------------------------------------------------------------------------- class MixedTests(unittest.TestCase, Util): def test_bin(self): for _ in range(20): i = randrange(1000) s = bin(i) self.assertEqual(s[:2], '0b') a = bitarray(s[2:], 'big') self.assertEqual(ba2int(a), i) t = a.to01() self.assertEqual(t, s[2:]) self.assertEqual(int(t, 2), i) def test_oct(self): for _ in range(20): i = randrange(1000) s = oct(i) self.assertEqual(s[:2], '0o') a = base2ba(8, s[2:], 'big') self.assertEqual(ba2int(a), i) t = ba2base(8, a) self.assertEqual(t, s[2:]) self.assertEqual(int(t, 8), i) def test_hex(self): for _ in range(20): i = randrange(1000) s = hex(i) self.assertEqual(s[:2], '0x') a = hex2ba(s[2:], 'big') self.assertEqual(ba2int(a), i) t = ba2hex(a) self.assertEqual(t, s[2:]) self.assertEqual(int(t, 16), i) def test_bitwise(self): for a in self.randombitarrays(start=1): b = urandom(len(a), a.endian) aa = a.copy() bb = b.copy() i = ba2int(a) j = ba2int(b) self.assertEqual(ba2int(a & b), i & j) self.assertEqual(ba2int(a | b), i | j) self.assertEqual(ba2int(a ^ b), i ^ j) n = randint(0, len(a)) if a.endian == 'big': self.assertEqual(ba2int(a >> n), i >> n) c = zeros(len(a), 'big') + a self.assertEqual(ba2int(c << n), i << n) self.assertEQUAL(a, aa) self.assertEQUAL(b, bb) def test_bitwise_inplace(self): for a in self.randombitarrays(start=1): b = urandom(len(a), a.endian) bb = b.copy() i = ba2int(a) j = ba2int(b) c = a.copy() c &= b self.assertEqual(ba2int(c), i & j) c = a.copy() c |= b self.assertEqual(ba2int(c), i | j) c = a.copy() c ^= b self.assertEqual(ba2int(c), i ^ j) self.assertEQUAL(b, bb) n = randint(0, len(a)) if a.endian == 'big': c = a.copy() c >>= n self.assertEqual(ba2int(c), i >> n) c = zeros(len(a), 'big') + a c <<= n self.assertEqual(ba2int(c), i << n) # ---------------------- serialize() deserialize() ----------------------- class SerializationTests(unittest.TestCase, Util): def test_explicit(self): for blob, endian, bits in [ (b'\x00', 'little', ''), (b'\x07\x01', 'little', '1'), (b'\x17\x80', 'big', '1'), (b'\x13\xf8', 'big', '11111'), (b'\x00\x0f', 'little', '11110000'), (b'\x10\xf0', 'big', '11110000'), (b'\x12\x87\xd8', 'big', '10000111 110110') ]: a = bitarray(bits, endian) s = serialize(a) self.assertEqual(blob, s) self.assertEqual(type(s), bytes) b = deserialize(blob) self.assertEqual(b, a) self.assertEqual(b.endian, endian) self.assertEqual(type(b), bitarray) def test_serialize_args(self): for x in '0', 0, 1, b'\x00', 0.0, [0, 1], bytearray([0]): self.assertRaises(TypeError, serialize, x) # no arguments self.assertRaises(TypeError, serialize) # too many arguments self.assertRaises(TypeError, serialize, bitarray(), 1) for a in bitarray('0111', 'big'), frozenbitarray('0111', 'big'): self.assertEqual(serialize(a), b'\x14\x70') def test_deserialize_args(self): for x in 0, 1, False, True, None, '', '01', 0.0, [0, 1]: self.assertRaises(TypeError, deserialize, x) # no arguments self.assertRaises(TypeError, deserialize) # too many arguments self.assertRaises(TypeError, deserialize, b'\x00', 1) blob = b'\x03\x06' x = bitarray(blob) for s in blob, bytearray(blob), memoryview(blob), x: a = deserialize(s) self.assertEqual(a.to01(), '01100') self.assertEqual(a.endian, 'little') def test_invalid_bytes(self): self.assertRaises(ValueError, deserialize, b'') def check_msg(b): msg = "invalid header byte: 0x%02x" % b[0] self.assertRaisesMessage(ValueError, msg, deserialize, b) for i in range(256): b = bytearray([i]) if i == 0 or i == 16: self.assertEqual(deserialize(b), bitarray()) else: self.assertRaises(ValueError, deserialize, b) check_msg(b) b.append(0) if i < 32 and i % 16 < 8: self.assertEqual(deserialize(b), zeros(8 - i % 8)) else: self.assertRaises(ValueError, deserialize, b) check_msg(b) def test_padbits_ignored(self): for blob, endian in [ (b'\x07\x01', 'little'), (b'\x07\x03', 'little'), (b'\x07\xff', 'little'), (b'\x17\x80', 'big'), (b'\x17\xc0', 'big'), (b'\x17\xff', 'big'), ]: a = deserialize(blob) self.assertEqual(a.to01(), '1') self.assertEqual(a.endian, endian) def test_random(self): for a in self.randombitarrays(): b = serialize(a) c = deserialize(b) self.assertEqual(a, c) self.assertEqual(a.endian, c.endian) self.check_obj(c) # --------------------------------------------------------------------------- class HuffmanTreeTests(unittest.TestCase): # tests for _huffman_tree() def test_empty(self): freq = {} self.assertRaises(IndexError, _huffman_tree, freq) def test_one_symbol(self): freq = {"A": 1} tree = _huffman_tree(freq) self.assertEqual(tree.symbol, "A") self.assertEqual(tree.freq, 1) self.assertRaises(AttributeError, getattr, tree, 'child') def test_two_symbols(self): freq = {"A": 1, "B": 1} tree = _huffman_tree(freq) self.assertRaises(AttributeError, getattr, tree, 'symbol') self.assertEqual(tree.freq, 2) self.assertEqual(tree.child[0].symbol, "A") self.assertEqual(tree.child[0].freq, 1) self.assertEqual(tree.child[1].symbol, "B") self.assertEqual(tree.child[1].freq, 1) class HuffmanTests(unittest.TestCase): def test_simple(self): freq = {0: 10, 'as': 2, None: 1.6} code = huffman_code(freq) self.assertEqual(len(code), 3) self.assertEqual(len(code[0]), 1) self.assertEqual(len(code['as']), 2) self.assertEqual(len(code[None]), 2) def test_endianness(self): freq = {'A': 10, 'B': 2, 'C': 5} for endian in 'big', 'little': code = huffman_code(freq, endian) self.assertEqual(len(code), 3) for v in code.values(): self.assertEqual(v.endian, endian) def test_wrong_arg(self): self.assertRaises(TypeError, huffman_code, [('a', 1)]) self.assertRaises(TypeError, huffman_code, 123) self.assertRaises(TypeError, huffman_code, None) # cannot compare 'a' with 1 self.assertRaises(TypeError, huffman_code, {'A': 'a', 'B': 1}) # frequency map cannot be empty self.assertRaises(ValueError, huffman_code, {}) def test_one_symbol(self): cnt = {'a': 1} code = huffman_code(cnt) self.assertEqual(code, {'a': bitarray('0')}) for n in range(4): msg = n * ['a'] a = bitarray() a.encode(code, msg) self.assertEqual(a.to01(), n * '0') self.assertEqual(list(a.decode(code)), msg) a.append(1) self.assertRaises(ValueError, list, a.decode(code)) def check_tree(self, code): n = len(code) tree = decodetree(code) self.assertEqual(tree.todict(), code) # ensure tree has 2n-1 nodes (n symbol nodes and n-1 internal nodes) self.assertEqual(tree.nodes(), 2 * n - 1) # a proper Huffman tree is complete self.assertTrue(tree.complete()) def test_balanced(self): n = 6 freq = {} for i in range(1 << n): freq[i] = 1 code = huffman_code(freq) self.assertEqual(len(code), 1 << n) self.assertTrue(all(len(v) == n for v in code.values())) self.check_tree(code) def test_unbalanced(self): n = 27 freq = {} for i in range(n): freq[i] = 1 << i code = huffman_code(freq) self.assertEqual(len(code), n) for i in range(n): self.assertEqual(len(code[i]), n - max(1, i)) self.check_tree(code) def test_counter(self): message = 'the quick brown fox jumps over the lazy dog.' code = huffman_code(Counter(message)) a = bitarray() a.encode(code, message) self.assertEqual(''.join(a.decode(code)), message) self.check_tree(code) def test_random_list(self): plain = [randrange(100) for _ in range(500)] code = huffman_code(Counter(plain)) a = bitarray() a.encode(code, plain) self.assertEqual(list(a.decode(code)), plain) self.check_tree(code) def test_random_freq(self): for n in 2, 3, 4, randint(5, 200): # create Huffman code for n symbols code = huffman_code({i: random() for i in range(n)}) self.check_tree(code) # --------------------------------------------------------------------------- class CanonicalHuffmanTests(unittest.TestCase, Util): def test_basic(self): plain = bytearray(b'the quick brown fox jumps over the lazy dog.') chc, count, symbol = canonical_huffman(Counter(plain)) self.assertEqual(type(chc), dict) self.assertEqual(type(count), list) self.assertEqual(type(symbol), list) a = bitarray() a.encode(chc, plain) self.assertEqual(bytearray(a.decode(chc)), plain) self.assertEqual(bytearray(canonical_decode(a, count, symbol)), plain) def test_example(self): cnt = {'a': 5, 'b': 3, 'c': 1, 'd': 1, 'r': 2} codedict, count, symbol = canonical_huffman(cnt) self.assertEqual(codedict, {'a': bitarray('0'), 'b': bitarray('10'), 'c': bitarray('1110'), 'd': bitarray('1111'), 'r': bitarray('110')}) self.assertEqual(count, [0, 1, 1, 1, 2]) self.assertEqual(symbol, ['a', 'b', 'r', 'c', 'd']) a = bitarray('01011001110011110101100') msg = "abracadabra" self.assertEqual(''.join(a.decode(codedict)), msg) self.assertEqual(''.join(canonical_decode(a, count, symbol)), msg) def test_canonical_huffman_errors(self): self.assertRaises(TypeError, canonical_huffman, []) # frequency map cannot be empty self.assertRaises(ValueError, canonical_huffman, {}) self.assertRaises(TypeError, canonical_huffman) cnt = huffman_code(Counter('aabc')) self.assertRaises(TypeError, canonical_huffman, cnt, 'a') def test_one_symbol(self): cnt = {'a': 1} chc, count, symbol = canonical_huffman(cnt) self.assertEqual(chc, {'a': bitarray('0')}) self.assertEqual(count, [0, 1]) self.assertEqual(symbol, ['a']) for n in range(4): msg = n * ['a'] a = bitarray() a.encode(chc, msg) self.assertEqual(a.to01(), n * '0') self.assertEqual(list(canonical_decode(a, count, symbol)), msg) a.append(1) self.assertRaises(ValueError, list, canonical_decode(a, count, symbol)) def test_canonical_decode_errors(self): a = bitarray('1101') s = ['a'] # bitarray not of bitarray type self.assertRaises(TypeError, canonical_decode, '11', [0, 1], s) # count not sequence self.assertRaises(TypeError, canonical_decode, a, {0, 1}, s) # count element not an int self.assertRaises(TypeError, canonical_decode, a, [0, 1.0], s) # count element overflow self.assertRaises(OverflowError, canonical_decode, a, [0, 1 << 65], s) # symbol not sequence self.assertRaises(TypeError, canonical_decode, a, [0, 1], 43) symbol = ['a', 'b', 'c', 'd'] # sum(count) != len(symbol) self.assertRaisesMessage(ValueError, "sum(count) = 3, but len(symbol) = 4", canonical_decode, a, [0, 1, 2], symbol) # count list too long self.assertRaisesMessage(ValueError, "len(count) cannot be larger than 32", canonical_decode, a, 33 * [0], symbol) def test_canonical_decode_count_range(self): a = bitarray() for i in range(1, 32): count = 32 * [0] # negative count count[i] = -1 self.assertRaisesMessage(ValueError, "count[%d] not in [0..%d], got -1" % (i, 1 << i), canonical_decode, a, count, []) maxbits = 1 << i count[i] = maxbits if i == 31 and PTRSIZE == 4: self.assertRaises(OverflowError, canonical_decode, a, count, []) continue self.assertRaisesMessage(ValueError, "sum(count) = %d, but len(symbol) = 0" % maxbits, canonical_decode, a, count, []) count[i] = maxbits + 1 self.assertRaisesMessage(ValueError, "count[%d] not in [0..%d], got %d" % (i, maxbits, count[i]), canonical_decode, a, count, []) iter = canonical_decode(a, 32 * [0], []) self.assertEqual(list(iter), []) def test_canonical_decode_simple(self): # symbols can be anything, they do not even have to be hashable here cnt = [0, 0, 4] s = ['A', 42, [1.2-3.7j, 4j], {'B': 6}] a = bitarray('00 01 10 11') # count can be a list self.assertEqual(list(canonical_decode(a, cnt, s)), s) # count can also be a tuple (any sequence object in fact) self.assertEqual(list(canonical_decode(a, (0, 0, 4), s)), s) self.assertEqual(list(canonical_decode(7 * a, cnt, s)), 7 * s) # the count list may have extra 0's at the end (but not too many) count = [0, 0, 4, 0, 0, 0, 0, 0] self.assertEqual(list(canonical_decode(a, count, s)), s) # the element count[0] is unused self.assertEqual(list(canonical_decode(a, [-47, 0, 4], s)), s) # in fact it can be anything, as it is entirely ignored self.assertEqual(list(canonical_decode(a, [None, 0, 4], s)), s) # the symbol argument can be any sequence object s = [65, 66, 67, 98] self.assertEqual(list(canonical_decode(a, cnt, s)), s) self.assertEqual(list(canonical_decode(a, cnt, bytearray(s))), s) self.assertEqual(list(canonical_decode(a, cnt, tuple(s))), s) self.assertEqual(list(canonical_decode(a, cnt, bytes(s))), s) # Implementation Note: # The symbol can even be an iterable. This was done because we # want to use PySequence_Fast in order to convert sequence # objects (like bytes and bytearray) to a list. This is faster # as all objects are now elements in an array of pointers (as # opposed to having the object's __getitem__ method called on # every iteration). self.assertEqual(list(canonical_decode(a, cnt, iter(s))), s) def test_canonical_decode_empty(self): a = bitarray() # count and symbol are empty, ok because sum([]) == len([]) self.assertEqual(list(canonical_decode(a, [], [])), []) a.append(0) self.assertRaisesMessage(ValueError, "reached end of bitarray", list, canonical_decode(a, [], [])) a = bitarray(31 * '0') self.assertRaisesMessage(ValueError, "ran out of codes", list, canonical_decode(a, [], [])) def test_canonical_decode_one_symbol(self): symbols = ['A'] count = [0, 1] a = bitarray('000') self.assertEqual(list(canonical_decode(a, count, symbols)), 3 * symbols) a.append(1) a.extend(bitarray(10 * '0')) iterator = canonical_decode(a, count, symbols) self.assertRaisesMessage(ValueError, "reached end of bitarray", list, iterator) a.extend(bitarray(20 * '0')) iterator = canonical_decode(a, count, symbols) self.assertRaisesMessage(ValueError, "ran out of codes", list, iterator) def test_canonical_decode_large(self): with open(__file__, 'rb') as f: msg = bytearray(f.read()) self.assertTrue(len(msg) > 50000) codedict, count, symbol = canonical_huffman(Counter(msg)) a = bitarray() a.encode(codedict, msg) self.assertEqual(bytearray(canonical_decode(a, count, symbol)), msg) self.check_code(codedict, count, symbol) def test_canonical_decode_symbol_change(self): msg = bytearray(b"Hello World!") codedict, count, symbol = canonical_huffman(Counter(msg)) self.check_code(codedict, count, symbol) a = bitarray() a.encode(codedict, 10 * msg) it = canonical_decode(a, count, symbol) def decode_one_msg(): return bytearray(next(it) for _ in range(len(msg))) self.assertEqual(decode_one_msg(), msg) symbol[symbol.index(ord("l"))] = ord("k") self.assertEqual(decode_one_msg(), bytearray(b"Hekko Workd!")) del symbol[:] self.assertRaises(IndexError, decode_one_msg) def ensure_sorted(self, chc, symbol): # ensure codes are sorted for i in range(len(symbol) - 1): a = chc[symbol[i]] b = chc[symbol[i + 1]] self.assertTrue(ba2int(a) < ba2int(b)) def ensure_consecutive(self, chc, count, symbol): start = 0 for nbits, cnt in enumerate(count): for i in range(start, start + cnt - 1): # ensure two consecutive codes (with same bit length) have # consecutive integer values a = chc[symbol[i]] b = chc[symbol[i + 1]] self.assertTrue(len(a) == len(b) == nbits) self.assertEqual(ba2int(a) + 1, ba2int(b)) start += cnt def ensure_count(self, chc, count): # ensure count list corresponds to length counts from codedict maxbits = len(count) - 1 self.assertEqual(maxbits, max(len(a) for a in chc.values())) my_count = (maxbits + 1) * [0] for a in chc.values(): self.assertEqual(a.endian, 'big') my_count[len(a)] += 1 self.assertEqual(my_count, count) def ensure_complete(self, count): # ensure code is complete and not oversubscribed len_c = len(count) x = sum(count[i] << (len_c - i) for i in range(1, len_c)) self.assertEqual(x, 1 << len_c) def ensure_complete_2(self, chc): # ensure code is complete dt = decodetree(chc) self.assertTrue(dt.complete()) def ensure_round_trip(self, chc, count, symbol): # create a short test message, encode and decode msg = [choice(symbol) for _ in range(10)] a = bitarray() a.encode(chc, msg) it = canonical_decode(a, count, symbol) # the iterator holds a reference to the bitarray and symbol list del a, count, symbol self.assertEqual(type(it).__name__, 'canonical_decodeiter') self.assertEqual(list(it), msg) def check_code(self, chc, count, symbol): self.assertTrue(len(chc) == len(symbol) == sum(count)) self.assertEqual(count[0], 0) # no codes have length 0 self.assertTrue(set(chc) == set(symbol)) # the code of the last symbol has all 1 bits self.assertTrue(chc[symbol[-1]].all()) # the code of the first symbol starts with bit 0 self.assertFalse(chc[symbol[0]][0]) self.ensure_sorted(chc, symbol) self.ensure_consecutive(chc, count, symbol) self.ensure_count(chc, count) self.ensure_complete(count) self.ensure_complete_2(chc) self.ensure_round_trip(chc, count, symbol) def test_simple_counter(self): plain = bytearray(b'the quick brown fox jumps over the lazy dog.') cnt = Counter(plain) self.check_code(*canonical_huffman(cnt)) def test_no_comp(self): freq = {None: 1, "A": 1} # None and "A" are not comparable self.check_code(*canonical_huffman(freq)) def test_balanced(self): n = 7 freq = {} for i in range(1 << n): freq[i] = 1 code, count, sym = canonical_huffman(freq) self.assertEqual(len(code), 1 << n) self.assertTrue(all(len(v) == n for v in code.values())) self.check_code(code, count, sym) def test_unbalanced(self): n = 32 freq = {} for i in range(n): freq[i] = 1 << i code = canonical_huffman(freq)[0] self.assertEqual(len(code), n) for i in range(n): self.assertEqual(len(code[i]), n - max(1, i)) self.check_code(*canonical_huffman(freq)) def test_random_freq(self): for n in 2, 3, 4, randint(5, 200): freq = {i: random() for i in range(n)} self.check_code(*canonical_huffman(freq)) # --------------------------------------------------------------------------- if __name__ == '__main__': unittest.main() bitarray-3.7.1/bitarray/util.py000066400000000000000000000520101505414144000164770ustar00rootroot00000000000000# Copyright (c) 2019 - 2025, Ilan Schnell; All Rights Reserved # bitarray is published under the PSF license. # # Author: Ilan Schnell """ Useful utilities for working with bitarrays. """ from __future__ import absolute_import import os import sys import math import random from bitarray import bitarray, bits2bytes from bitarray._util import ( zeros, ones, count_n, parity, _ssqi, xor_indices, count_and, count_or, count_xor, any_and, subset, correspond_all, byteswap, serialize, deserialize, ba2hex, hex2ba, ba2base, base2ba, sc_encode, sc_decode, vl_encode, vl_decode, canonical_decode, ) __all__ = [ 'zeros', 'ones', 'urandom', 'random_k', 'random_p', 'gen_primes', 'pprint', 'strip', 'count_n', 'parity', 'sum_indices', 'xor_indices', 'count_and', 'count_or', 'count_xor', 'any_and', 'subset', 'correspond_all', 'byteswap', 'intervals', 'ba2hex', 'hex2ba', 'ba2base', 'base2ba', 'ba2int', 'int2ba', 'serialize', 'deserialize', 'sc_encode', 'sc_decode', 'vl_encode', 'vl_decode', 'huffman_code', 'canonical_huffman', 'canonical_decode', ] def urandom(__length, endian=None): """urandom(n, /, endian=None) -> bitarray Return random bitarray of length `n` (uses `os.urandom()`). """ a = bitarray(os.urandom(bits2bytes(__length)), endian) del a[__length:] return a def random_k(__n, k, endian=None): """random_k(n, /, k, endian=None) -> bitarray Return (pseudo-) random bitarray of length `n` with `k` elements set to one. Mathematically equivalent to setting (in a bitarray of length `n`) all bits at indices `random.sample(range(n), k)` to one. The random bitarrays are reproducible when giving Python's `random.seed()` with a specific seed value. This function requires Python 3.9 or higher, as it depends on the standard library function `random.randbytes()`. Raises `NotImplementedError` when Python version is too low. """ if sys.version_info[:2] < (3, 9): raise NotImplementedError("bitarray.util.random_k() requires " "Python 3.9 or higher") r = _Random(__n, endian) if not isinstance(k, int): raise TypeError("int expected, got '%s'" % type(k).__name__) return r.random_k(k) def random_p(__n, p=0.5, endian=None): """random_p(n, /, p=0.5, endian=None) -> bitarray Return (pseudo-) random bitarray of length `n`, where each bit has probability `p` of being one (independent of any other bits). Mathematically equivalent to `bitarray((random() < p for _ in range(n)), endian)`, but much faster for large `n`. The random bitarrays are reproducible when giving Python's `random.seed()` with a specific seed value. This function requires Python 3.12 or higher, as it depends on the standard library function `random.binomialvariate()`. Raises `NotImplementedError` when Python version is too low. """ if sys.version_info[:2] < (3, 12): raise NotImplementedError("bitarray.util.random_p() requires " "Python 3.12 or higher") r = _Random(__n, endian) return r.random_p(p) class _Random: # The main reason for this class it to enable testing functionality # individually in the test class Random_P_Tests in 'test_util.py'. # The test class also contains many comments and explanations. # To better understand how the algorithm works, see ./doc/random_p.rst # See also, VerificationTests in devel/test_random.py # maximal number of calls to .random_half() in .combine() M = 8 # number of resulting probability intervals K = 1 << M # limit for setting individual bits randomly SMALL_P = 0.01 def __init__(self, n=0, endian=None): self.n = n self.nbytes = bits2bytes(n) self.endian = endian def random_half(self): """ Return bitarray with each bit having probability p = 1/2 of being 1. """ # use randbytes() for reproducibility (not urandom()) a = bitarray(random.randbytes(self.nbytes), self.endian) del a[self.n:] return a def op_seq(self, i): """ Return bitarray containing operator sequence. Each item represents a bitwise operation: 0: AND 1: OR After applying the sequence (see .combine_half()), we obtain a bitarray with probability q = i / K """ if not 0 < i < self.K: raise ValueError("0 < i < %d, got i = %d" % (self.K, i)) # sequence of &, | operations - least significant operations first a = bitarray(i.to_bytes(2, byteorder="little"), "little") return a[a.index(1) + 1 : self.M] def combine_half(self, seq): """ Combine random bitarrays with probability 1/2 according to given operator sequence. """ a = self.random_half() for k in seq: if k: a |= self.random_half() else: a &= self.random_half() return a def random_k(self, k): n = self.n # error check inputs and handle edge cases if k <= 0 or k >= n: if k == 0: return zeros(n, self.endian) if k == n: return ones(n, self.endian) raise ValueError("k must be in range 0 <= k <= n, got %s" % k) # exploit symmetry to establish: k <= n // 2 if k > n // 2: a = self.random_k(n - k) a.invert() # use in-place to avoid copying return a # decide on sequence, see VerificationTests devel/test_random.py if k < 16 or k * self.K < 3 * n: i = 0 else: p = k / n # p <= 0.5 p -= (0.2 - 0.4 * p) / math.sqrt(n) i = int(p * (self.K + 1)) # combine random bitarrays using bitwise AND and OR operations if i < 3: a = zeros(n, self.endian) diff = -k else: a = self.combine_half(self.op_seq(i)) diff = a.count() - k randrange = random.randrange if diff < 0: # not enough bits 1 - increase count for _ in range(-diff): i = randrange(n) while a[i]: i = randrange(n) a[i] = 1 elif diff > 0: # too many bits 1 - decrease count for _ in range(diff): i = randrange(n) while not a[i]: i = randrange(n) a[i] = 0 return a def random_p(self, p): # error check inputs and handle edge cases if p <= 0.0 or p == 0.5 or p >= 1.0: if p == 0.0: return zeros(self.n, self.endian) if p == 0.5: return self.random_half() if p == 1.0: return ones(self.n, self.endian) raise ValueError("p must be in range 0.0 <= p <= 1.0, got %s" % p) # for small n, use literal definition if self.n < 16: return bitarray((random.random() < p for _ in range(self.n)), self.endian) # exploit symmetry to establish: p < 0.5 if p > 0.5: a = self.random_p(1.0 - p) a.invert() # use in-place to avoid copying return a # for small p, set randomly individual bits if p < self.SMALL_P: return self.random_k(random.binomialvariate(self.n, p)) # calculate operator sequence i = int(p * self.K) if p * (self.K + 1) > i + 1: # see devel/test_random.py i += 1 seq = self.op_seq(i) q = i / self.K # when n is small compared to number of operations, also use literal if self.n < 100 and self.nbytes <= len(seq) + 3 * bool(q != p): return bitarray((random.random() < p for _ in range(self.n)), self.endian) # combine random bitarrays using bitwise AND and OR operations a = self.combine_half(seq) if q < p: x = (p - q) / (1.0 - q) a |= self.random_p(x) elif q > p: x = p / q a &= self.random_p(x) return a def gen_primes(__n, endian=None, odd=False): """gen_primes(n, /, endian=None, odd=False) -> bitarray Generate a bitarray of length `n` in which active indices are prime numbers. By default (`odd=False`), active indices correspond to prime numbers directly. When `odd=True`, only odd prime numbers are represented in the resulting bitarray `a`, and `a[i]` corresponds to `2*i+1` being prime or not. """ n = int(__n) if n < 0: raise ValueError("bitarray length must be >= 0") if odd: a = ones(105, endian) # 105 = 3 * 5 * 7 a[1::3] = 0 a[2::5] = 0 a[3::7] = 0 f = "01110110" else: a = ones(210, endian) # 210 = 2 * 3 * 5 * 7 for i in 2, 3, 5, 7: a[::i] = 0 f = "00110101" # repeating the array many times is faster than setting the multiples # of the low primes to 0 a *= (n + len(a) - 1) // len(a) a[:8] = bitarray(f, endian) del a[n:] # perform sieve starting at 11 if odd: for i in a.search(1, 5, int(math.sqrt(n // 2) + 1.0)): # 11//2 = 5 j = 2 * i + 1 a[(j * j) // 2 :: j] = 0 else: # i*i is always odd, and even bits are already set to 0: use step 2*i for i in a.search(1, 11, int(math.sqrt(n) + 1.0)): a[i * i :: 2 * i] = 0 return a def sum_indices(__a, mode=1): """sum_indices(a, /, mode=1) -> int Return sum of indices of all active bits in bitarray `a`. Equivalent to `sum(i for i, v in enumerate(a) if v)`. `mode=2` sums square of indices. """ if mode not in (1, 2): raise ValueError("unexpected mode %r" % mode) # For details see: devel/test_sum_indices.py n = 1 << 19 # block size 512 Kbits if len(__a) <= n: # shortcut for single block return _ssqi(__a, mode) # Constants m = n // 8 # block size in bytes o1 = n * (n - 1) // 2 o2 = o1 * (2 * n - 1) // 3 nblocks = (len(__a) + n - 1) // n padbits = __a.padbits sm = 0 for i in range(nblocks): # use memoryview to avoid copying memory v = memoryview(__a)[i * m : (i + 1) * m] block = bitarray(None, __a.endian, buffer=v) if padbits and i == nblocks - 1: if block.readonly: block = bitarray(block) block[-padbits:] = 0 k = block.count() if k: y = n * i z1 = o1 if k == n else _ssqi(block) if mode == 1: sm += k * y + z1 else: z2 = o2 if k == n else _ssqi(block, 2) sm += (k * y + 2 * z1) * y + z2 return sm def pprint(__a, stream=None, group=8, indent=4, width=80): """pprint(bitarray, /, stream=None, group=8, indent=4, width=80) Pretty-print bitarray object to `stream`, defaults is `sys.stdout`. By default, bits are grouped in bytes (8 bits), and 64 bits per line. Non-bitarray objects are printed using `pprint.pprint()`. """ if stream is None: stream = sys.stdout if not isinstance(__a, bitarray): import pprint as _pprint _pprint.pprint(__a, stream=stream, indent=indent, width=width) return group = int(group) if group < 1: raise ValueError('group must be >= 1') indent = int(indent) if indent < 0: raise ValueError('indent must be >= 0') width = int(width) if width <= indent: raise ValueError('width must be > %d (indent)' % indent) gpl = (width - indent) // (group + 1) # groups per line epl = group * gpl # elements per line if epl == 0: epl = width - indent - 2 type_name = type(__a).__name__ # here 4 is len("'()'") multiline = len(type_name) + 4 + len(__a) + len(__a) // group >= width if multiline: quotes = "'''" elif __a: quotes = "'" else: quotes = "" stream.write("%s(%s" % (type_name, quotes)) for i, b in enumerate(__a): if multiline and i % epl == 0: stream.write('\n%s' % (indent * ' ')) if i % group == 0 and i % epl != 0: stream.write(' ') stream.write(str(b)) if multiline: stream.write('\n') stream.write("%s)\n" % quotes) stream.flush() def strip(__a, mode='right'): """strip(bitarray, /, mode='right') -> bitarray Return a new bitarray with zeros stripped from left, right or both ends. Allowed values for mode are the strings: `left`, `right`, `both` """ if not isinstance(mode, str): raise TypeError("str expected for mode, got '%s'" % type(__a).__name__) if mode not in ('left', 'right', 'both'): raise ValueError("mode must be 'left', 'right' or 'both', got %r" % mode) start = None if mode == 'right' else __a.find(1) if start == -1: return __a[:0] stop = None if mode == 'left' else __a.find(1, right=1) + 1 return __a[start:stop] def intervals(__a): """intervals(bitarray, /) -> iterator Compute all uninterrupted intervals of 1s and 0s, and return an iterator over tuples `(value, start, stop)`. The intervals are guaranteed to be in order, and their size is always non-zero (`stop - start > 0`). """ try: value = __a[0] # value of current interval except IndexError: return n = len(__a) stop = 0 # "previous" stop - becomes next start while stop < n: start = stop # assert __a[start] == value try: # find next occurrence of opposite value stop = __a.index(not value, start) except ValueError: stop = n yield int(value), start, stop value = not value # next interval has opposite value def ba2int(__a, signed=False): """ba2int(bitarray, /, signed=False) -> int Convert the given bitarray to an integer. The bit-endianness of the bitarray is respected. `signed` indicates whether two's complement is used to represent the integer. """ if not isinstance(__a, bitarray): raise TypeError("bitarray expected, got '%s'" % type(__a).__name__) length = len(__a) if length == 0: raise ValueError("non-empty bitarray expected") if __a.padbits: pad = zeros(__a.padbits, __a.endian) __a = __a + pad if __a.endian == "little" else pad + __a res = int.from_bytes(__a.tobytes(), byteorder=__a.endian) if signed and res >> length - 1: res -= 1 << length return res def int2ba(__i, length=None, endian=None, signed=False): """int2ba(int, /, length=None, endian=None, signed=False) -> bitarray Convert the given integer to a bitarray (with given bit-endianness, and no leading (big-endian) / trailing (little-endian) zeros), unless the `length` of the bitarray is provided. An `OverflowError` is raised if the integer is not representable with the given number of bits. `signed` determines whether two's complement is used to represent the integer, and requires `length` to be provided. """ if not isinstance(__i, int): raise TypeError("int expected, got '%s'" % type(__i).__name__) if length is not None: if not isinstance(length, int): raise TypeError("int expected for argument 'length'") if length <= 0: raise ValueError("length must be > 0") if signed: if length is None: raise TypeError("signed requires argument 'length'") m = 1 << length - 1 if not (-m <= __i < m): raise OverflowError("signed integer not in range(%d, %d), " "got %d" % (-m, m, __i)) if __i < 0: __i += 1 << length else: # unsigned if length and __i >> length: raise OverflowError("unsigned integer not in range(0, %d), " "got %d" % (1 << length, __i)) a = bitarray(0, endian) b = __i.to_bytes(bits2bytes(__i.bit_length()), byteorder=a.endian) a.frombytes(b) le = a.endian == 'little' if length is None: return strip(a, 'right' if le else 'left') if a else a + '0' if len(a) > length: return a[:length] if le else a[-length:] if len(a) == length: return a # len(a) < length, we need padding pad = zeros(length - len(a), a.endian) return a + pad if le else pad + a # ------------------------------ Huffman coding ----------------------------- def _huffman_tree(__freq_map): """_huffman_tree(dict, /) -> Node Given a dict mapping symbols to their frequency, construct a Huffman tree and return its root node. """ from heapq import heappush, heappop class Node(object): """ There are to tyes of Node instances (both have 'freq' attribute): * leaf node: has 'symbol' attribute * parent node: has 'child' attribute (tuple with both children) """ def __lt__(self, other): # heapq needs to be able to compare the nodes return self.freq < other.freq minheap = [] # create all leaf nodes and push them onto the queue for sym, f in __freq_map.items(): leaf = Node() leaf.symbol = sym leaf.freq = f heappush(minheap, leaf) # repeat the process until only one node remains while len(minheap) > 1: # take the two nodes with lowest frequencies from the queue # to construct a new parent node and push it onto the queue parent = Node() parent.child = heappop(minheap), heappop(minheap) parent.freq = parent.child[0].freq + parent.child[1].freq heappush(minheap, parent) # the single remaining node is the root of the Huffman tree return minheap[0] def huffman_code(__freq_map, endian=None): """huffman_code(dict, /, endian=None) -> dict Given a frequency map, a dictionary mapping symbols to their frequency, calculate the Huffman code, i.e. a dict mapping those symbols to bitarrays (with given bit-endianness). Note that the symbols are not limited to being strings. Symbols may be any hashable object. """ if not isinstance(__freq_map, dict): raise TypeError("dict expected, got '%s'" % type(__freq_map).__name__) if len(__freq_map) < 2: if len(__freq_map) == 0: raise ValueError("cannot create Huffman code with no symbols") # Only one symbol: Normally if only one symbol is given, the code # could be represented with zero bits. However here, the code should # be at least one bit for the .encode() and .decode() methods to work. # So we represent the symbol by a single code of length one, in # particular one 0 bit. This is an incomplete code, since if a 1 bit # is received, it has no meaning and will result in an error. sym = list(__freq_map)[0] return {sym: bitarray('0', endian)} result = {} def traverse(nd, prefix=bitarray(0, endian)): try: # leaf result[nd.symbol] = prefix except AttributeError: # parent, so traverse each child traverse(nd.child[0], prefix + '0') traverse(nd.child[1], prefix + '1') traverse(_huffman_tree(__freq_map)) return result def canonical_huffman(__freq_map): """canonical_huffman(dict, /) -> tuple Given a frequency map, a dictionary mapping symbols to their frequency, calculate the canonical Huffman code. Returns a tuple containing: 0. the canonical Huffman code as a dict mapping symbols to bitarrays 1. a list containing the number of symbols of each code length 2. a list of symbols in canonical order Note: the two lists may be used as input for `canonical_decode()`. """ if not isinstance(__freq_map, dict): raise TypeError("dict expected, got '%s'" % type(__freq_map).__name__) if len(__freq_map) < 2: if len(__freq_map) == 0: raise ValueError("cannot create Huffman code with no symbols") # Only one symbol: see note above in huffman_code() sym = list(__freq_map)[0] return {sym: bitarray('0', 'big')}, [0, 1], [sym] code_length = {} # map symbols to their code length def traverse(nd, length=0): # traverse the Huffman tree, but (unlike in huffman_code() above) we # now just simply record the length for reaching each symbol try: # leaf code_length[nd.symbol] = length except AttributeError: # parent, so traverse each child traverse(nd.child[0], length + 1) traverse(nd.child[1], length + 1) traverse(_huffman_tree(__freq_map)) # We now have a mapping of symbols to their code length, which is all we # need to construct a list of tuples (symbol, code length) sorted by # code length: table = sorted(code_length.items(), key=lambda item: item[1]) maxbits = table[-1][1] codedict = {} count = (maxbits + 1) * [0] code = 0 for i, (sym, length) in enumerate(table): codedict[sym] = int2ba(code, length, 'big') count[length] += 1 if i + 1 < len(table): code += 1 code <<= table[i + 1][1] - length return codedict, count, [item[0] for item in table] bitarray-3.7.1/bitarray/util.pyi000066400000000000000000000056031505414144000166560ustar00rootroot00000000000000# Copyright (c) 2021 - 2025, Ilan Schnell; All Rights Reserved from collections import Counter from collections.abc import Iterable, Iterator, Sequence from typing import Any, AnyStr, BinaryIO, Optional, Union from bitarray import bitarray, BytesLike, CodeDict FreqMap = Union[Counter[int], dict[Any, Union[int, float]]] def zeros(length: int, endian: Optional[str] = ...) -> bitarray: ... def ones(length: int, endian: Optional[str] = ...) -> bitarray: ... def urandom(length: int, endian: Optional[str] = ...) -> bitarray: ... def random_p(n: int, p = ..., endian: Optional[str] = ...) -> bitarray: ... def random_k(n: int, k: int, endian: Optional[str] = ...) -> bitarray: ... def gen_primes(n: int, endian: Optional[str] = ..., odd: Optional[bool] = ...) -> bitarray: ... def pprint(a: Any, stream: BinaryIO = ..., group: int = ..., indent: int = ..., width: int = ...) -> None: ... def strip(a: bitarray, mode: str = ...) -> bitarray: ... def count_n(a: bitarray, n: int, value: int = ...) -> int: ... def parity(a: bitarray) -> int: ... def sum_indices(a: bitarray, mode: int = ...) -> int: ... def xor_indices(a: bitarray) -> int: ... def count_and(a: bitarray, b: bitarray) -> int: ... def count_or(a: bitarray, b: bitarray) -> int: ... def count_xor(a: bitarray, b: bitarray) -> int: ... def any_and(a: bitarray, b: bitarray) -> bool: ... def subset(a: bitarray, b: bitarray) -> bool: ... def correspond_all(a: bitarray, b: bitarray) -> tuple: ... def byteswap(a: BytesLike, n: int) -> None: ... def intervals(a: bitarray) -> Iterator: ... def ba2hex(a: bitarray, group: int = ..., sep: str = ...) -> str: ... def hex2ba(s: AnyStr, endian: Optional[str] = ...) -> bitarray: ... def ba2base(n: int, a: bitarray, group: int = ..., sep: str = ...) -> str: ... def base2ba(n: int, s: AnyStr, endian: Optional[str] = ...) -> bitarray: ... def ba2int(a: bitarray, signed: int = ...) -> int: ... def int2ba(i: int, length: int = ..., endian: str = ..., signed: int = ...) -> bitarray: ... def serialize(a: bitarray) -> bytes: ... def deserialize(b: BytesLike) -> bitarray: ... def sc_encode(a: bitarray) -> bytes: ... def sc_decode(stream: Iterable[int]) -> bitarray: ... def vl_encode(a: bitarray) -> bytes: ... def vl_decode(stream: Iterable[int], endian: Optional[str] = ...) -> bitarray: ... def _huffman_tree(freq_map: FreqMap) -> Any: ... def huffman_code(freq_map: FreqMap, endian: Optional[str] = ...) -> CodeDict: ... def canonical_huffman(Freq_Map) -> tuple[CodeDict, list, list]: ... def canonical_decode(a: bitarray, count: Sequence[int], symbol: Iterable[Any]) -> Iterator: ... bitarray-3.7.1/contributing.md000066400000000000000000000033441505414144000163720ustar00rootroot00000000000000Contributing to bitarray ======================== The bitarray type is very stable and feature complete at this point, which means that pull requests to `bitarray/_bitarray.c` will most likely be rejected, unless they improve readability and performance. There may be room for improvements/additions in the `bitarray.util` module, added in the 1.2.0 release. However, due to the slow release cycle of this package, it may be more practical to create your own library which depends on bitarray. This is completely possible, even on the C-level. Please study the implementation of `bitarray/_util.c` for details. In particular for C extensions to work with the bitarray type, it is important that the `bitarrayobject` struct is defined in the same way: typedef struct { PyObject_VAR_HEAD char *ob_item; /* buffer */ Py_ssize_t allocated; /* allocated buffer size (in bytes) */ Py_ssize_t nbits; /* length of bitarray, i.e. elements */ int endian; /* bit-endianness of bitarray */ int ob_exports; /* how many buffer exports */ PyObject *weakreflist; /* list of weak references */ Py_buffer *buffer; /* used when importing a buffer */ int readonly; /* buffer is readonly */ } bitarrayobject; /* member endian may have these values */ #define ENDIAN_LITTLE 0 #define ENDIAN_BIG 1 The last two members, `buffer` and `readonly`, were introduced in bitarray version 2.3. If those are not needed, and one wants to maintain backward compatibility with earlier version, one can omit them. These essential (as well as other useful) declarations can be found in `bitarray/bitarray.h`. bitarray-3.7.1/devel/000077500000000000000000000000001505414144000144345ustar00rootroot00000000000000bitarray-3.7.1/devel/README000066400000000000000000000023011505414144000153100ustar00rootroot00000000000000This directory contains files that are relevant for development of bitarray, as well as additional tests and verifications. copy_n.py Illustrate how copy_n() in _bitarray.c works. This is essentially a Python implementation of copy_n() with output of the different stages of the bitarray we copy into. random/ development files for statistical tests resize/ Things to study the bitarray resize function, including the growth pattern it creates and tests for the current implementation. shift_r8.c C program is to illustrate and document shift_r8() test_debug.py Tests for internal C code which is exposed in debug builds. These tests will only work when bitarray is compiled in debug mode. test_sum_indices.py Additional tests for util.sum_indices() for very large n, as well as some verifications and test for the internal function _ssqi(). test_random.py * statistical tests for random functions in bitarray.util * verification of some statistical equations * verification of code used in random_k() and random_p() tricks.py Some little tricks and verifications for some code which is used mostly in the C implementation of bitarray. bitarray-3.7.1/devel/architecture.txt000066400000000000000000000015111505414144000176550ustar00rootroot00000000000000Dependency of files ------------------- A depends on B A --------> B A imports B in a function A - - - - > B +------------+ +------------+ | util.py |------->| _util.c | +------------+ +------------+ | ^ | | | +--------------------------------------------------+ | | | V v | +-------------+ - - - > +------------------+ - - - > +--------------+ | __init__.py | | test_bitarray.py | | test_util.py | +-------------+ <------ +------------------+ <------ +--------------+ | V +-------------+ | _bitarray.c | +-------------+ bitarray-3.7.1/devel/copy_n.py000066400000000000000000000156311505414144000163030ustar00rootroot00000000000000""" The purpose of this script is to illustrate how copy_n() in _bitarray.c works. This is a Python implementation of copy_n() with output of the different stages of the bitarray we copy into. Sample output: a = 21 b = 6 n = 31 p1 = 2 p2 = 6 p3 = 0 sa = 5 sb = -6 -> p3 = 1 -> sb = 2 other bitarray('00101110 11111001 01011101 11001011 10110000 01011110 011') b..b+n ^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^ ======== ======== ======== ======== 33 self bitarray('01011101 11100101 01110101 01011001 01110100 10001010 01111011') a..a+n ^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ 11111 2222 memmove 4 ======== ======== ======== ======== bitarray('01011101 11100101 11111001 01011101 11001011 10110000 01111011') rshift 7 >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> bitarray('01011101 11100101 00000001 11110010 10111011 10010111 01100000') = ======== ======== ======== ======== 11111 2222 33 bitarray('01011101 11100101 01110101 11110010 10111011 10010111 01101011') """ from io import StringIO from bitarray import bitarray, bits2bytes from bitarray.util import pprint verbose = False def mark_range_n(i, n, c, text=''): a = bitarray(i * '0' + n * '1') f = StringIO() pprint(a, stream=f) s = f.getvalue() print("%-10s" % text + ''.join(c if e == '1' else ' ' for e in s[10:])) def mark_range(i, j, c, text=''): mark_range_n(i, j - i, c, text) def shift_r8(self, a, b, k): """ shift bits in byte-range(a, b) by k bits to right (in-place) """ assert 0 <= k < 8 assert 0 <= a <= self.nbytes assert 0 <= b <= self.nbytes if k == 0 or a >= b: return self[8 * a : 8 * b] >>= k def is_be(self): return self.endian == 'big' bitmask_table = [ [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80], # little endian [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01], # big endian ] ones_table = [ [0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f], # little endian [0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe], # big endian ] def copy_n(self, a, other, b, n): """ copy n bits from other (starting at b) onto self (starting at a) """ p1 = a // 8 # first byte to be copied to p2 = (a + n - 1) // 8 # last byte to be copied to p3 = b // 8 # first byte to be memmoved from sa = a % 8 sb = -(b % 8) t3 = 0 if verbose: print('a =', a) print('b =', b) print('n =', n) print('p1 =', p1) print('p2 =', p2) print('p3 =', p3) print('sa =', sa) print('sb =', sb) assert 0 <= n <= min(len(self), len(other)) assert 0 <= a <= len(self) - n assert 0 <= b <= len(other) - n if n == 0 or (self is other and a == b): return if sa + sb < 0: # In order to keep total right shift (sa + sb) positive, we # increase the first byte to be copied from (p3) by one byte, # such that memmove() will move all bytes one extra to the left. # As other may be self, we need to store this byte as its memory # location may be overwritten or changed by memmove or rshift. t3 = memoryview(other)[p3] p3 += 1 sb += 8 if verbose: print(' -> p3 =', p3) print(' -> sb =', sb) assert a - sa == 8 * p1 assert b + sb == 8 * p3 assert p1 <= p2 assert 8 * p2 < a + n <= 8 * (p2 + 1) if verbose: print('other') pprint(other) mark_range_n(b, n, '^', 'b..b+n') if n > sb: mark_range_n(8 * p3, 8 * bits2bytes(n - sb), '=') mark_range_n(b, sb, '3') print('self') pprint(self) mark_range_n(a, n, '^', 'a..a+n') if n > sb: mark_range(8 * p1, a, '1') mark_range(a + n, 8 * p2 + 8, '2') if n > sb: m = bits2bytes(n - sb) # number of bytes memmoved table = ones_table[is_be(self)] m1 = table[sa] m2 = table[(a + n) % 8] t1 = memoryview(self)[p1] t2 = memoryview(self)[p2] assert p1 + m in [p2, p2 + 1] assert p1 + m <= self.nbytes and p3 + m <= other.nbytes # aligned copy -- copy first sb bits (if any) later memoryview(self)[p1:p1 + m] = memoryview(other)[p3:p3 + m] if self.endian != other.endian: self.bytereverse(p1, p1 + m) if verbose: print('memmove', m) mark_range_n(8 * p1, 8 * m, '=') pprint(self) print('rshift', sa + sb) mark_range(8 * p1, 8 * (p2 + 1), '>') shift_r8(self, p1, p2 + 1, sa + sb) # right shift if verbose: pprint(self) mark_range(8 * p1 + sa + sb, 8 * (p2 + 1), '=') if m1: # restore bits at p1 if verbose: mark_range(8 * p1, a, '1') memoryview(self)[p1] = (memoryview(self)[p1] & ~m1) | (t1 & m1) if m2: # restore bits at p2 if verbose: mark_range(a + n, 8 * p2 + 8, '2') memoryview(self)[p2] = (memoryview(self)[p2] & m2) | (t2 & ~m2) if verbose: mark_range_n(a, sb, '3') for i in range(min(sb, n)): # copy first sb bits self[i + a] = bool(t3 & bitmask_table[is_be(other)][(i + b) % 8]) if verbose: pprint(self) def test_copy_n(): from random import choice, randrange, randint from bitarray.util import urandom def random_endian(): return choice(['little', 'big']) max_size = 56 for _ in range(10_000): N = randrange(max_size) M = randrange(max_size) n = randint(0, min(N, M)) a = randint(0, N - n) b = randint(0, M - n) x = urandom(N, random_endian()) y = urandom(M, random_endian()) z = x.copy() copy_n(x, a, y, b, n) z[a:a + n] = y[b:b + n] assert x == z for _ in range(10_000): N = randrange(max_size) n = randint(0, N) a = randint(0, N - n) b = randint(0, N - n) x = urandom(N, random_endian()) z = x.copy() copy_n(x, a, x, b, n) z[a:a + n] = z[b:b + n] assert x == z if __name__ == '__main__': test_copy_n() verbose = True other = bitarray( '00101110 11111001 01011101 11001011 10110000 01011110 011') self = bitarray( '01011101 11100101 01110101 01011001 01110100 10001010 01111011') copy_n(self, 21, other, 6, 31) assert self == bitarray( '01011101 11100101 01110101 11110010 10111011 10010111 01101011') #copy_n(self, 2, other, 12, 1) #copy_n(self, 9, other, 17, 23) bitarray-3.7.1/devel/random/000077500000000000000000000000001505414144000157145ustar00rootroot00000000000000bitarray-3.7.1/devel/random/binomial.py000066400000000000000000000103261505414144000200620ustar00rootroot00000000000000import sys from math import sqrt from scipy.special import betainc class BinomialDist: # This class describes the binomial distribution with parameters n and p. # That is, the (discrete) probability distribution of the number of # successes in a sequence of n independent Bernoulli trails, with each # trail having a probability p of success. def __init__(self, n, p): assert n > 0 assert 0.0 <= p <= 1.0 self.n = n self.p = p self.q = 1.0 - p self.mu = n * p self.sigma = sqrt(n * p * self.q) def print(self): print("n = %d p = %f" % (self.n, self.p)) print("mu = %f" % self.mu) print("sigma = %f" % self.sigma) def pmf(self, k): assert 0 <= k <= self.n, k # The reason we use .cdf() to calculate the PMF is because # comb(n, k) * p ** k * (1.0 - p) ** (n - k) will fail for large # n, whereas .cdf() uses the regularized incomplete beta function. return self.cdf(k) - self.cdf(k - 1) def cdf(self, k): return betainc(self.n - k, k + 1, self.q) def range_k(self, k1, k2): "probability for k being in k1 <= k <= k2" assert 0 <= k1 <= k2 <= self.n return self.cdf(k2) - self.cdf(k1 - 1) # --------------------------------------------------------------------------- import unittest from math import comb class BinomialDistTests(unittest.TestCase): def test_pmf_simple(self): b = BinomialDist(1, 0.7) self.assertAlmostEqual(b.pmf(0), 0.3) self.assertAlmostEqual(b.pmf(1), 0.7) b = BinomialDist(2, 0.5) self.assertAlmostEqual(b.pmf(0), 0.25) self.assertAlmostEqual(b.pmf(1), 0.50) self.assertAlmostEqual(b.pmf(2), 0.25) def test_pmf_sum(self): for n in 10, 100, 1_000, 10_000: b = BinomialDist(n, 0.5) self.assertAlmostEqual(sum(b.pmf(k) for k in range(n + 1)), 1.0) def test_pmf(self): for n in 10, 50, 100, 250: for p in 0.1, 0.2, 0.5, 0.7: b = BinomialDist(n, p) for k in range(n + 1): res = comb(n, k) * p ** k * (1.0 - p) ** (n - k) self.assertAlmostEqual(b.pmf(k), res, delta=1e-14) def test_cdf(self): for n in 5, 50, 500: b = BinomialDist(n, 0.3) self.assertAlmostEqual(b.cdf(-1), 0.0) self.assertAlmostEqual(b.cdf(n), 1.0) sm = 0.0 for k in range(n + 1): sm += b.pmf(k) self.assertAlmostEqual(b.cdf(k), sm) def test_range_k(self): n = 10_000 for p in 0.1, 0.2, 0.5, 0.7: b = BinomialDist(n, p) self.assertAlmostEqual(b.range_k(0, n), 1.0) for k in range(n + 1): self.assertAlmostEqual(b.range_k(k, k), b.pmf(k)) def test_range_half(self): n = 1_000_001 b = BinomialDist(n, 0.5) self.assertAlmostEqual(b.range_k(0, 500_000), 0.5) self.assertAlmostEqual(b.range_k(500_001, n), 0.5) if __name__ == '__main__': if len(sys.argv) == 1: unittest.main() # This code was used to create some of the tests for util.random_p() # in ../test_random.py # # python binomial.py 250_000 5 0.3 # python binomial.py 100_000 100 .375 37..48 # python binomial.py 25_000 100_000 .5 48_000..50_000 50_000..50_200 m, n = [int(i) for i in sys.argv[1:3]] p = float(sys.argv[3]) bd = BinomialDist(n, p) bd.print() if n <= 100_000: p_tot = 0.0 for k in range(n + 1): p = bd.pmf(k) p_tot += p if p < 0.01: continue bb = BinomialDist(m, p) fmt = "self.assertTrue(abs(C[{:d}] - {:6_d}) <= {:6_d}) # p = {:f}" print(fmt.format(k, round(bb.mu), round(10 * bb.sigma), p)) assert abs(p_tot - 1.0) < 1e-15, p_tot for s in sys.argv[4:]: k1, k2 = [int(t) for t in s.split("..")] p = bd.range_k(k1, k2) bb = BinomialDist(m, p) print("x = sum(C[k] for k in %s)" % range(k1, k2 + 1)) fmt = "self.assertTrue(abs(x - {:6_d}) <= {:6_d}) # p = {:f}" print(fmt.format(round(bb.mu), round(10 * bb.sigma), p)) bitarray-3.7.1/devel/random/plot.py000066400000000000000000000022211505414144000172410ustar00rootroot00000000000000from collections import Counter from statistics import fmean, pstdev from matplotlib import pyplot as plt from bitarray.util import random_k, sum_indices from sample import SampleMeanDist # The code will also work, if you change these parameters here: SMD = SampleMeanDist(n=1_000, k=600) M = 100_000 # number of trails DX = 2.5 # width used for counting trail outcomes def plot_pdf(plt, xmin, xmax): X = [] Y = [] n = 2_000 for i in range(n + 1): x = xmin + i * (xmax - xmin) / n X.append(x) Y.append(SMD.pdf(x) * DX) plt.plot(X, Y) def plot_count(plt, C): X = [] Y = [] for i in range(min(C), max(C) + 1): x = i * DX X.append(x) Y.append(C[i] / M) plt.scatter(X, Y, color='red') if __name__ == '__main__': SMD.print() C = Counter() values = [] for _ in range(M): x = sum_indices(random_k(SMD.n, SMD.k)) / SMD.k C[round(x / DX)] += 1 values.append(x) assert C.total() == M print("mean", fmean(values)) print("stdev", pstdev(values, SMD.mu)) plot_count(plt, C) plot_pdf(plt, min(C) * DX, max(C) * DX) plt.show() bitarray-3.7.1/devel/random/sample.py000066400000000000000000000106541505414144000175550ustar00rootroot00000000000000import sys from math import sqrt, erf, exp, pi class SampleMeanDist: # This class describes the mean of a discrete uniform distribution # without replacement of k integers in range(n). # That is, from the integers in range(n) (population), we take a random # sample of k (without replacement) and calculate their mean value x. # The distribution of x is what we are interested it. def __init__(self, n, k): assert n >= 1 assert 1 <= k < n self.n = n self.k = k # the sample mean is the population mean self.mu = 0.5 * (n - 1) # sum(range(n)) / n = (n - 1) / 2 # The standard deviation of a sample, is also called standard error. # The standard error of the mean is the standard deviation of the # population, divided by the square root of the sample size k. # The variance of the population is: # # (n**2 - 1) / 12 (see below) # # So for the standard deviation, we get: self.sigma = sqrt((n * n - 1) / (12 * k)) # Finite population correction (FPC) # ---------------------------------- # # Let us consider two cases: # # (a) For very small sample sizes k (compared to the population # size n), the FPC is close to one. That is, it has no effect # on the standard error. Our distribution is basically the same # as if we had replacement. # # (b) For sample sizes k close to the population size n, the FPC (and # hence the standard error) becomes zero. That is, when we # sample all elements, we always get the same sample mean (the # population mean) with no standard error. # fpc = sqrt((n - k) / (n - 1)) self.sigma *= fpc def print(self): print("n = %d k = %d" % (self.n, self.k)) print("mu = %f" % self.mu) print("sigma = %f" % self.sigma) def pdf(self, x): return exp(-0.5 * ((x - self.mu) / self.sigma) ** 2) / ( sqrt(2.0 * pi) * self.sigma) def cdf(self, x): return 0.5 * (1.0 + erf((x - self.mu) / (self.sigma * sqrt(2.0)))) def range_p(self, x1, x2): "probability for x (the mean of the sample) being in x1 < x < x2" assert 0 <= x1 <= x2 <= self.n return self.cdf(x2) - self.cdf(x1) # --------------------------------------------------------------------------- import unittest class SampleMeanDistTests(unittest.TestCase): def test_pdf(self): n, k = 100, 30 smd = SampleMeanDist(n, k) N = 1000 dx = n / N self.assertAlmostEqual( sum(smd.pdf(i * dx) * dx for i in range(N + 1)), 1.0) def test_cdf(self): smd = SampleMeanDist(100, 30) self.assertAlmostEqual(smd.cdf( 0.0), 0.0) self.assertAlmostEqual(smd.cdf( 49.5), 0.5) self.assertAlmostEqual(smd.cdf(100.0), 1.0) def test_range_half(self): for k in 10, 20, 50, 100, 150: smd = SampleMeanDist(200, k) self.assertAlmostEqual(smd.range_p(0.0, 99.5), 0.5) self.assertAlmostEqual(smd.range_p(99.5, 200), 0.5) def test_verify_pop_mean(self): for n in range(1, 100): self.assertEqual((n - 1) /2, sum(range(n)) / n) def test_verify_pop_variance(self): for n in range(1, 100): mean = (n - 1) / 2 sigma2 = sum((j - mean)**2 for j in range(n)) / n self.assertEqual((n * n - 1) / 12, sigma2) if __name__ == '__main__': if len(sys.argv) == 1: unittest.main() # This code was used to create some of the tests for util.random_k() # in ../test_random.py # # python sample.py 100_000 1000 500 0 500 510 520 1000 # python sample.py 100_000 500 400 200 249.5 251 255 260 300 from itertools import pairwise m, n, k = [int(i) for i in sys.argv[1:4]] smd = SampleMeanDist(n, k) smd.print() ranges = [float(x) for x in sys.argv[4:]] print("ranges = %r" % ranges) p_tot = 0.0 for i, (x1, x2) in enumerate(pairwise(ranges)): p = smd.range_p(x1, x2) p_tot += p mu = m * p sigma = sqrt(m * p * (1.0 - p)) if mu < 10 * sigma: continue fmt = "self.assertTrue(abs(C[{:d}] - {:6_d}) <= {:6_d}) # p = {:f}" print(fmt.format(i, round(mu), round(10 * sigma), p)) if abs(p_tot - 1.0) > 1e-15: print(p_tot) bitarray-3.7.1/devel/random/test_sample.py000066400000000000000000000016461505414144000206150ustar00rootroot00000000000000import unittest from itertools import pairwise from collections import Counter from statistics import fmean from random import sample class RandomSampleTests(unittest.TestCase): def test_mean(self): # python sample.py 100_000 100 20 0 45.5 49.5 100 M = 100_000 # number of trails N = 100 # population size K = 20 # sample size C = Counter() ranges = [0.0, 45.5, 49.5, 100.0] for _ in range(M): x = fmean(sample(range(N), K)) for i, (x1, x2) in enumerate(pairwise(ranges)): if x1 <= x < x2: C[i] += 1 self.assertEqual(C.total(), M) self.assertTrue(abs(C[0] - 24_529) <= 1_361) # p = 0.245291 self.assertTrue(abs(C[1] - 25_471) <= 1_378) # p = 0.254709 self.assertTrue(abs(C[2] - 50_000) <= 1_581) # p = 0.500000 if __name__ == '__main__': unittest.main() bitarray-3.7.1/devel/resize/000077500000000000000000000000001505414144000157355ustar00rootroot00000000000000bitarray-3.7.1/devel/resize/.gitignore000066400000000000000000000000211505414144000177160ustar00rootroot00000000000000resize pattern-* bitarray-3.7.1/devel/resize/Makefile000066400000000000000000000003551505414144000174000ustar00rootroot00000000000000resize: resize.c gcc -Wall resize.c -o resize test: resize ./resize >pattern-c.txt python resize.py >pattern-py.txt diff pattern-c.txt pattern-py.txt python test_resize.py clean: rm -f resize rm -f pattern-* rm -rf __pycache__ bitarray-3.7.1/devel/resize/README.md000066400000000000000000000007601505414144000172170ustar00rootroot00000000000000The bitarray resize function and growth pattern =============================================== Running `python resize.py` will display the bitarray growth pattern. This is done by appending one bit to a bitarray in a loop, and displaying the allocated size of the bitarray object each time it changes. The program `resize.c` contains a distilled version of the `resize()` function which contains the implementation of this growth pattern. Running this C program gives exactly the same output. bitarray-3.7.1/devel/resize/resize.c000066400000000000000000000044211505414144000174030ustar00rootroot00000000000000#include #include typedef struct { int size; int nbits; int allocated; } bitarrayobject; /* number of bytes necessary to store given bits */ #define BYTES(bits) (((bits) + 7) >> 3) uint64_t s = 290797; int bbs(void) { s *= s; s %= 50515093; return s % 8000; } void resize(bitarrayobject *self, int nbits) { int size = self->size, allocated = self->allocated; int newsize = BYTES(nbits), new_allocated; if (newsize == size) { self->nbits = nbits; return; } if (newsize == 0) { /* free(self->ob_item) */ self->size = 0; self->allocated = 0; self->nbits = 0; return; } if (allocated >= newsize) { if (newsize >= allocated / 2) { self->size = newsize; self->nbits = nbits; return; } new_allocated = newsize; } else { new_allocated = newsize; if (size != 0 && newsize / 2 <= allocated) { new_allocated += (newsize >> 4) + (newsize < 8 ? 3 : 7); new_allocated &= ~(int) 3; } } /* realloc(self->ob_item) */ self->size = newsize; self->allocated = new_allocated; self->nbits = nbits; } int main() { int i, nbits, prev_alloc = -1; bitarrayobject x; #define SHOW printf("%d %d\n", x.size, x.allocated) x.size = 0; x.allocated = 0; for (nbits = 0; nbits < 1000; nbits++) { if (prev_alloc != x.allocated) SHOW; prev_alloc = x.allocated; resize(&x, nbits); } resize(&x, 800000); SHOW; resize(&x, 400000); SHOW; resize(&x, 399992); SHOW; resize(&x, 500000); SHOW; resize(&x, 0); SHOW; resize(&x, 0); SHOW; resize(&x, 10000); SHOW; resize(&x, 400); SHOW; resize(&x, 600); SHOW; resize(&x, 2000); SHOW; for (nbits = 2000; nbits >= 0; nbits--) { if (prev_alloc != x.allocated) SHOW; prev_alloc = x.allocated; resize(&x, nbits); } SHOW; for (nbits = 0; nbits < 100; nbits += 8) { x.size = 0; x.allocated = 0; resize(&x, nbits); SHOW; } for (i = 0; i < 100000; i++) { resize(&x, bbs()); SHOW; } return 0; } bitarray-3.7.1/devel/resize/resize.py000066400000000000000000000013011505414144000176030ustar00rootroot00000000000000from bitarray import bitarray from test_resize import get_alloc, resize, show s = 290797 def bbs(): global s s = pow(s, 2, 50515093) return s % 8000 a = bitarray() prev = -1 while len(a) < 1_000: alloc = get_alloc(a) if prev != alloc: show(a) prev = alloc a.append(1) for i in 800_000, 400_000, 399_992, 500_000, 0, 0, 10_000, 400, 600, 2_000: resize(a, i) assert len(a) == i show(a) while len(a): alloc = get_alloc(a) if prev != alloc: show(a) prev = alloc a.pop() show(a) for nbits in range(0, 100, 8): a = bitarray() a.extend(bitarray(nbits)) show(a) for _ in range(100_000): resize(a, bbs()) show(a) bitarray-3.7.1/devel/resize/test_resize.py000066400000000000000000000044011505414144000206460ustar00rootroot00000000000000import unittest from bitarray import bitarray PATTERN = [0, 1, 4, 8, 16, 24, 32, 40, 48, 56, 64, 76, 88, 100, 112, 124, 136] def get_alloc(a): info = a.buffer_info() return info.alloc def resize(a, n): increase = n - len(a) if increase > 0: a.extend(bitarray(increase)) elif increase < 0: del a[n:] def show(a): info = a.buffer_info() print('%d %d' % (info.nbytes, info.alloc)) class ResizeTests(unittest.TestCase): def test_pattern(self): pat = [] a = bitarray() prev = -1 while len(a) < 1000: alloc = get_alloc(a) if prev != alloc: pat.append(alloc) prev = alloc a.append(0) self.assertEqual(pat, PATTERN) def test_increase(self): # make sure sequence of appends will always increase allocated size a = bitarray() prev = -1 while len(a) < 100_000: alloc = get_alloc(a) self.assertTrue(prev <= alloc) prev = alloc a.append(1) def test_decrease(self): # ensure that when we start from a large array and delete part, we # always get a decreasing allocation a = bitarray(10_000_000) prev = get_alloc(a) while a: del a[-100_000:] alloc = get_alloc(a) self.assertTrue(alloc <= prev) prev = alloc def test_no_overalloc(self): # initalizing a bitarray from a list or bitarray does not overallocate for n in range(1000): a = bitarray(8 * n * [1]) self.assertEqual(get_alloc(a), n) b = bitarray(a) self.assertEqual(get_alloc(b), n) c = bitarray(8 * n) self.assertEqual(get_alloc(c), n) def test_no_overalloc_large(self): # starting from a large bitarray, make we sure we don't realloc each # time we extend a = bitarray(1_000_000) # no overallocation self.assertEqual(get_alloc(a), 125_000) a.extend(bitarray(8)) # overallocation happens here alloc = get_alloc(a) for _ in range(1000): a.extend(bitarray(8)) self.assertEqual(get_alloc(a), alloc) if __name__ == '__main__': unittest.main() bitarray-3.7.1/devel/sc/000077500000000000000000000000001505414144000150415ustar00rootroot00000000000000bitarray-3.7.1/devel/sc/compress.py000066400000000000000000000030551505414144000172510ustar00rootroot00000000000000import bz2 import gzip import random from time import perf_counter from bitarray.util import ( ones, random_p, serialize, deserialize, sc_encode, sc_decode, vl_encode, vl_decode, ) from sc_stat import sc_stat def p_range(): n = 1 << 28 p = 1.0 a = ones(n) print(" p ratio raw" " type 1 type 2 type 3 type 4") print(" " + 73 *'-') while p > 1e-8: b = sc_encode(a) blocks = sc_stat(b)['blocks'] print(' %11.8f %11.8f %8d %8d %8d %8d %8d' % tuple([p, len(b) / (n / 8)] + blocks)) assert a == sc_decode(b) a &= random_p(n) p /= 2 def compare(): n = 1 << 26 a = random_p(n, 1.0 / 1024) raw = a.tobytes() print(20 * ' ' + "compress (ms) decompress (ms) ratio") print(70 * '-') for name, f_e, f_d in [ ('serialize', serialize, deserialize), ('vl', vl_encode, vl_decode), ('sc' , sc_encode, sc_decode), ('gzip', gzip.compress, gzip.decompress), ('bz2', bz2.compress, bz2.decompress)]: x = a if name in ('serialize', 'vl', 'sc') else raw t0 = perf_counter() b = f_e(x) # compression t1 = perf_counter() c = f_d(b) # decompression t2 = perf_counter() print(" %-11s %16.3f %16.3f %16.4f" % (name, 1000 * (t1 - t0), 1000 * (t2 - t1), len(b) / len(raw))) assert c == x if __name__ == '__main__': random.seed(123) compare() p_range() bitarray-3.7.1/devel/sc/sc_stat.py000066400000000000000000000120011505414144000170450ustar00rootroot00000000000000from itertools import islice from random import randrange bitcount_table = [bin(i)[2:].count('1') for i in range(256)] def read_n(n, stream): i = 0 for j in range(n): i |= next(stream) << 8 * j return i def sc_decode_header(stream): head = next(stream) if head & 0xe0: raise ValueError("invalid header: 0x%02x" % head) endian = 'big' if head & 0x10 else 'little' length = head & 0x0f nbits = read_n(length, stream) return endian, nbits def sc_decode_block(stream, stats): head = next(stream) if head < 0xa0: # type 0 - 0x00 -- 0x9f if head == 0: # stop byte return False n = 0 k = head if head <= 32 else 32 * (head - 31) elif head < 0xc0: # type 1 - 0xa0 .. 0xbf n = 1 k = head - 0xa0 elif 0xc2 <= head <= 0xc4: # type 2 .. 4 - 0xc2 .. 0xc4 n = head - 0xc0 k = next(stream) # index count byte else: raise ValueError("Invalid block head: 0x%02x" % head) stats['blocks'][n] += 1 # consume block data nconsume = max(1, n) * k # size of block data to consume below if stats.get('count'): if n == 0: stats['count'][0] += sum(bitcount_table[next(stream)] for _ in range(k)) nconsume = 0 else: stats['count'][n] += k next(islice(stream, nconsume, nconsume), None) return True def sc_stat(stream, count=False): """sc_stat(stream) -> dict Decode a compressed byte stream (generated by `sc_encode()` and return useful statistics. In particular, a list of length 5 with the count for each block type. """ stream = iter(stream) endian, nbits = sc_decode_header(stream) stats = {'endian': endian, 'nbits': nbits, 'blocks': 5 * [0]} if count: stats['count'] = 5 * [0] while sc_decode_block(stream, stats): pass return stats # --------------------------------------------------------------------------- import unittest from bitarray import bitarray from bitarray.util import sc_encode, sc_decode class Tests(unittest.TestCase): def test_empty(self): blob = b"\x01\x00\0" self.assertEqual(sc_stat(blob), {'endian': 'little', 'nbits': 0, 'blocks': [0, 0, 0, 0, 0]}) self.assertEqual(sc_decode(blob), bitarray()) def test_zeros_explitcit(self): for blob, blocks in [ (b"\x11\x08\0", [0, 0, 0, 0, 0]), (b"\x11\x08\x01\x00\0", [1, 0, 0, 0, 0]), (b"\x11\x08\xa0\0", [0, 1, 0, 0, 0]), (b"\x11\x08\xc2\x00\0", [0, 0, 1, 0, 0]), (b"\x11\x08\xc3\x00\0", [0, 0, 0, 1, 0]), (b"\x11\x08\xc4\x00\0", [0, 0, 0, 0, 1]), ]: stat = sc_stat(blob, count=True) self.assertEqual(stat['blocks'], blocks) self.assertEqual(stat['count'], 5 * [0]) self.assertEqual(sc_decode(blob), bitarray(8)) def test_untouch(self): stream = iter(b"\x01\x07\x01\x73\0XYZ") self.assertEqual(sc_decode(stream), bitarray("1100111")) self.assertEqual(next(stream), ord('X')) def test_random(self): n = 20_000_000 a = bitarray(n) for c in range(0, 21, 2): lst = [randrange(n) for _ in range(1 << c)] a[lst] = 1 stat = sc_stat(sc_encode(a), count=True) # print(c, len(a), a.count(), stat['blocks']) self.assertEqual(sum(stat['count']), a.count()) def test_end_of_stream(self): for blob in [b'', b'\x00', b'\x01', b'\x02\x77', b'\x01\x04\x01', b'\x01\x04\xa1', b'\x01\x04\xa0']: self.assertRaises(StopIteration, sc_stat, blob) self.assertRaises(StopIteration, sc_decode, blob) def test_values(self): b = [0x11, 3, 1, 32, 0] self.assertEqual(sc_decode(b), bitarray("001")) self.assertEqual(sc_stat(b), {'endian': 'big', 'nbits': 3, 'blocks': [1, 0, 0, 0, 0]}) for x in -1, 256: b[-1] = x self.assertRaises(ValueError, sc_stat, b) for x in None, "F", Ellipsis, []: b[-1] = x self.assertRaises(TypeError, sc_stat, b) def test_example(self): n = 1 << 26 a = bitarray(n, 'little') a[:1 << 16] = 1 for i in range(2, 1 << 16): a[n // i] = 1 b = sc_encode(a) stat = sc_stat(b, True) self.assertEqual(stat['blocks'], [2, 147, 3, 1, 1]) self.assertEqual(stat['count'], [1 << 16, 374, 427, 220, 2]) self.assertEqual(a, sc_decode(b)) a.reverse() b = sc_encode(a) self.assertEqual(sc_stat(b)['blocks'], [2, 256, 254, 3, 0]) self.assertEqual(a, sc_decode(b)) if __name__ == '__main__': unittest.main() bitarray-3.7.1/devel/set_range_opt.py000066400000000000000000000046451505414144000176500ustar00rootroot00000000000000import unittest from random import getrandbits, randint, randrange from time import perf_counter from bitarray import bitarray from bitarray.util import urandom def nxir(x, start, step): assert x >= start and step > 0 # in Python we can use a simpler expression than in C return (start - x) % step def set_range_opt(self, start, stop, step, value): ca = (start + 7) // 8 cb = stop // 8 m = (cb - ca) * 8 assert m >= 0 assert 0 <= 8 * ca - start < 8 assert 0 <= stop - 8 * cb < 8 mask = bitarray(step, self.endian) mask.setall(not value) mask[nxir(8 * ca, start, step)] = value mask *= (m - 1) // step + 1 del mask[m:] # in the C version we wouldn't bother assert len(mask) % 8 == 0 self[start : 8 * ca : step] = value if value: self[8 * ca : 8 * cb] |= mask else: self[8 * ca : 8 * cb] &= mask self[8 * cb + nxir(8 * cb, start, step) : stop : step] = value class Tests(unittest.TestCase): def test_nxir(self): for _ in range(1000): start = randrange(100) step = randrange(1, 20) x = randrange(start, start + 100) nx = nxir(x, start, step) self.assertTrue(0 <= nx < step) self.assertEqual((x + nx) % step, start % step) def test_setslice_bool_step(self): # this test exercises set_range() when stop is much larger than start for _ in range(5000): n = randrange(3000, 4000) a = urandom(n) aa = a.tolist() s = slice(randrange(1000), randrange(1000, n), randint(1, 100)) self.assertTrue(s.stop - s.start >= 0) slicelength = len(range(n)[s]) self.assertTrue(slicelength > 0) v = getrandbits(1) set_range_opt(a, s.start, s.stop, s.step, v) aa[s] = slicelength * [v] self.assertEqual(a.tolist(), aa) def speed_cmp(): n = 1_000_000 print("n=%d\ntimes in micro seconds\n" % n) print('%8s %12s %12s' % ("step", "this-code", "native")) for step in range(1, 20): a = bitarray(n) b = bitarray(n) t0 = perf_counter() set_range_opt(a, 0, n, step, 1) t1 = perf_counter() b[::step] = 1 t2 = perf_counter() print('%8d %12.3f %12.3f' % (step, 1E6 * (t1 - t0), 1E6 * (t2 - t1))) assert a == b if __name__ == '__main__': speed_cmp() unittest.main() bitarray-3.7.1/devel/shift_r8.c000066400000000000000000000113621505414144000163310ustar00rootroot00000000000000/* The purpose of this C program is to illustrate the functions shift_r8le() and shift_r8be(), which are called from shift_r8(). These functions are symmetrical with the following replacements: PY_LITTLE_ENDIAN <-> PY_BIG_ENDIAN <<= >>= >> << Creating a macro from which both functions can be created is not possible, unless one replaces the existing preprocessor introductions with ordinary if statements. For the sake of simplicity we do not want to do this here, even though it would avoid the spammish repetition. */ #include #include #include #define Py_ssize_t ssize_t static inline uint64_t builtin_bswap64(uint64_t word) { #if (defined(__clang__) || \ (defined(__GNUC__) \ && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 3)))) /* __builtin_bswap64() is available since GCC 4.3. */ # define HAVE_BUILTIN_BSWAP64 1 return __builtin_bswap64(word); #elif defined(_MSC_VER) # define HAVE_BUILTIN_BSWAP64 1 return _byteswap_uint64(word); #else # define HAVE_BUILTIN_BSWAP64 0 abort() #endif } /* machine byte-order */ #define PY_LITTLE_ENDIAN 1 #define PY_BIG_ENDIAN 0 /* bit-endianness */ #define ENDIAN_LITTLE 0 #define ENDIAN_BIG 1 #define BITMASK(endian, i) (((char) 1) << (endian == ENDIAN_LITTLE ? \ ((i) % 8) : (7 - (i) % 8))) /* The following two functions operate on first n bytes in buffer. Within this region, they shift all bits by k positions to right, i.e. towards higher addresses. They operate on little-endian and bit-endian bitarrays respectively. As we shift right, we need to start with the highest address and loop downwards such that lower bytes are still unaltered. */ static void shift_r8le(unsigned char *buff, Py_ssize_t n, int k) { Py_ssize_t w = 0; #if HAVE_BUILTIN_BSWAP64 || PY_LITTLE_ENDIAN /* use shift word */ w = n / 8; /* number of words used for shifting */ n %= 8; /* number of remaining bytes */ #endif while (n--) { /* shift in byte-range(8 * w, n) */ Py_ssize_t i = n + 8 * w; buff[i] <<= k; /* shift byte */ if (n || w) /* add shifted next lower byte */ buff[i] |= buff[i - 1] >> (8 - k); } while (w--) { /* shift in word-range(0, w) */ uint64_t *p = ((uint64_t *) buff) + w; #if HAVE_BUILTIN_BSWAP64 && PY_BIG_ENDIAN *p = builtin_bswap64(*p); *p <<= k; *p = builtin_bswap64(*p); #else *p <<= k; #endif if (w) /* add shifted byte from next lower word */ buff[8 * w] |= buff[8 * w - 1] >> (8 - k); } } static void shift_r8be(unsigned char *buff, Py_ssize_t n, int k) { Py_ssize_t w = 0; #if HAVE_BUILTIN_BSWAP64 || PY_BIG_ENDIAN /* use shift word */ w = n / 8; /* number of words used for shifting */ n %= 8; /* number of remaining bytes */ #endif while (n--) { /* shift in byte-range(8 * w, n) */ Py_ssize_t i = n + 8 * w; buff[i] >>= k; /* shift byte */ if (n || w) /* add shifted next lower byte */ buff[i] |= buff[i - 1] << (8 - k); } while (w--) { /* shift in word-range(0, w) */ uint64_t *p = ((uint64_t *) buff) + w; #if HAVE_BUILTIN_BSWAP64 && PY_LITTLE_ENDIAN *p = builtin_bswap64(*p); *p >>= k; *p = builtin_bswap64(*p); #else *p >>= k; #endif if (w) /* add shifted byte from next lower word */ buff[8 * w] |= buff[8 * w - 1] << (8 - k); } } /* display first nbits bytes of buffer given assumed bit-endianness to one line in stdout */ void display(unsigned char *buffer, Py_ssize_t nbits, int endian) { Py_ssize_t i; for (i = 0; i < nbits; i++) printf("%d", (buffer[i / 8] & BITMASK(endian, i)) ? 1 : 0); printf("\n"); } int main() { #define NBYTES 10 unsigned char array[NBYTES] = {1, 15, 0, 131, 0, 255, 0, 7, 0, 1}; ssize_t i; if ((PY_LITTLE_ENDIAN != (*((uint64_t *) "\xff\0\0\0\0\0\0\0") == 0xff)) || (PY_BIG_ENDIAN != (*((uint64_t *) "\0\0\0\0\0\0\0\xff") == 0xff))) { printf("Error: machine byte-order\n"); return 1; } for (i = 0; i < 15; i++) { display(array, 77, ENDIAN_LITTLE); shift_r8le(array, NBYTES, 1); } for (i = 0; i < 15; i++) { display(array, 77, ENDIAN_BIG); shift_r8be(array, NBYTES, 1); } return 0; } bitarray-3.7.1/devel/test_debug.py000066400000000000000000000374251505414144000171460ustar00rootroot00000000000000import os import sys import unittest from random import randint, randrange from bitarray import bitarray, _sysinfo from bitarray.util import zeros, ones, int2ba, parity from bitarray.test_bitarray import Util, urandom_2, skipIf, PTRSIZE # --------------------- internal C-level debug tests ------------------------ from bitarray._util import ( _setup_table, _zlw, # defined in bitarray.h _cfw, _d2i, _read_n, _write_n, _sc_rts, _SEGSIZE, # _util.h ) SEGBITS = 8 * _SEGSIZE # ---------------------------- bitarray.h ----------------------------------- class SetupTableTests(unittest.TestCase): def test_common(self): for kop in 'aAsSxXcpr': table = _setup_table(kop) self.assertEqual(type(table), bytes) self.assertEqual(len(table), 256) self.assertEqual(table[0], 0) # all tables start with 0 def test_add(self): table = _setup_table('a') self.assertEqual(max(table), 28) self.assertTrue(table[255] == sum(range(8)) == 28) self.assertEqual(table[15], 0+1+2+3) table = _setup_table('A') self.assertEqual(table[15], 4+5+6+7) for kop, endian in ('a', 'little'), ('A', 'big'): t = _setup_table(kop) for i in range(256): a = int2ba(i, 8, endian) self.assertEqual(t[i], sum(i for i, v in enumerate(a) if v)) def test_add_sqr(self): table = _setup_table('s') self.assertEqual(max(table), 140) for kop, endian in ('s', 'little'), ('S', 'big'): t = _setup_table(kop) for i in range(256): a = int2ba(i, 8, endian) self.assertEqual(t[i], sum(i * i for i, v in enumerate(a) if v)) def test_xor(self): table = _setup_table('x') self.assertEqual(max(table), 7) # max index is 7 self.assertTrue(table[255] == 0^1^2^3^4^5^6^7 == 0) self.assertEqual(table[2], 1) self.assertTrue(table[29] == table[0b11101] == 0^2^3^4 == 5) self.assertTrue(table[34] == table[0b100010] == 1^5 == 4) self.assertTrue(table[157] == table[0b10011101] == 2^3^4^7 == 2) table = _setup_table('X') self.assertEqual(table[2], 6) self.assertTrue(table[157] == 3^4^5^7 == 5) for kop, endian in ('x', 'little'), ('X', 'big'): t = _setup_table(kop) for i in range(256): a = int2ba(i, 8, endian) c = 0 for j, v in enumerate(a): c ^= j * v self.assertEqual(t[i], c) def test_count(self): table = _setup_table('c') self.assertEqual(max(table), 8) # 8 active bits the most self.assertEqual(table[255], 8) self.assertTrue(table[29] == table[0b11101] == 4) for endian in 'little', 'big': for i in range(256): a = int2ba(i, 8, endian) self.assertEqual(table[i], a.count()) def test_parity(self): table = _setup_table('p') self.assertEqual(max(table), 1) self.assertEqual(table[254], 1) self.assertEqual(table[255], 0) for endian in 'little', 'big': for i in range(256): a = int2ba(i, 8, endian) self.assertEqual(table[i], parity(a)) def test_reverse(self): table = _setup_table('r') self.assertEqual(max(table), 255) self.assertEqual(table[255], 255) # reversed is still 255 self.assertEqual(table[1], 128) for i in range(256): j = table[i] self.assertEqual(table[j], i) self.assertEqual(int2ba(i, 8, 'little'), int2ba(j, 8, 'big')) def test_opposite_endian(self): reverse_trans = _setup_table('r') for kop1, kop2 in 'aA', 'xX', 'sS': a = _setup_table(kop1) b = _setup_table(kop2) for i in range(256): j = reverse_trans[i] self.assertEqual(a[i], b[j]) class ZLW_Tests(unittest.TestCase, Util): def test_zeros(self): for n in range(200): a = zeros(n, self.random_endian()) self.assertEqual(_zlw(a), zeros(64)) def test_ones(self): for n in range(200): a = ones(n, self.random_endian()) b = _zlw(a) r = n % 64 self.assertEqual(b, ones(r) + zeros(64 - r)) def test_random(self): for n in range(200): a = urandom_2(n) b = _zlw(a) self.assertEqual(len(b), 64) self.assertEqual(a.endian, b.endian) self.assertEqual(b[63], 0) # last bit is always 0 q, r = divmod(n, 64) self.assertEqual(b, a[64 * q:] + zeros(64 - r)) # ---------------------------- _bitarray.c -------------------------------- class SysInfo_Tests(unittest.TestCase): def test_debug(self): self.assertTrue(_sysinfo("DEBUG")) class ShiftR8_Tests(unittest.TestCase, Util): def test_empty(self): a = bitarray() a._shift_r8(0, 0, 3) self.assertEqual(a, bitarray()) def test_explicit(self): x = bitarray('11000100 11111111 11100111 10111111 00001000') y = bitarray('11000100 00000111 11111111 00111101 00001000') x._shift_r8(1, 4, 5) self.assertEqual(x, y) x._shift_r8(2, 1, 5) # start > stop -- do nothing self.assertEqual(x, y) x._shift_r8(0, 5, 0) # shift = 0 -- do nothing self.assertEqual(x, y) x = bitarray('11000100 11110') y = bitarray('00011000 10011') x._shift_r8(0, 2, 3) self.assertEqual(x, y) x = bitarray('1100011') y = bitarray('0110001') x._shift_r8(0, 1, 1) self.assertEqual(x, y) def test_random(self): for _ in range(2000): n = randrange(200) x = urandom_2(n) a = randint(0, x.nbytes) b = randint(a, x.nbytes) k = randrange(8) y = x.copy() y[8 * a : 8 * b] >>= k s = x.to01() if a < b: s = s[:8 * a] + k * "0" + s[8 * a : 8 * b - k] + s[8 * b:] if 8 * b > n: s = s[:n] x._shift_r8(a, b, k) self.assertEqual(x.to01(), s) self.assertEqual(x, y) self.assertEqual(x.endian, y.endian) self.assertEqual(len(x), n) class CopyN_Tests(unittest.TestCase, Util): def test_explicit(self): x = bitarray('11000100 11110') # ^^^^ ^ y = bitarray('0101110001') # ^^^^^ x._copy_n(4, y, 1, 5) self.assertEqual(x, bitarray('11001011 11110')) # ^^^^ ^ x = bitarray('10110111 101', 'little') y = x.copy() x._copy_n(3, x, 3, 7) # copy region of x onto x self.assertEqual(x, y) x._copy_n(3, bitarray(x, 'big'), 3, 7) # as before but other endian self.assertEqual(x, y) x._copy_n(5, bitarray(), 0, 0) # copy empty bitarray onto x self.assertEqual(x, y) def test_example(self): # example given in devel/copy_n.py y = bitarray( '00101110 11111001 01011101 11001011 10110000 01011110 011') x = bitarray( '01011101 11100101 01110101 01011001 01110100 10001010 01111011') x._copy_n(21, y, 6, 31) self.assertEqual(x, bitarray( '01011101 11100101 01110101 11110010 10111011 10010111 01101011')) def check_copy_n(self, N, M, a, b, n): x = urandom_2(N) x_lst = x.tolist() y = x if M < 0 else urandom_2(M) y_lst = y.tolist() x_lst[a:a + n] = y_lst[b:b + n] x._copy_n(a, y, b, n) self.assertEqual(x, bitarray(x_lst)) self.assertEqual(len(x), N) self.check_obj(x) if M < 0: return self.assertEqual(y, bitarray(y_lst)) self.assertEqual(len(y), M) self.check_obj(y) def test_random(self): for _ in range(1000): N = randrange(1000) n = randint(0, N) a = randint(0, N - n) b = randint(0, N - n) self.check_copy_n(N, -1, a, b, n) M = randrange(1000) n = randint(0, min(N, M)) a = randint(0, N - n) b = randint(0, M - n) self.check_copy_n(N, M, a, b, n) @staticmethod def getslice(a, start, slicelength): # this is the Python eqivalent of __getitem__ for slices with step=1 b = bitarray(slicelength, a.endian) b._copy_n(0, a, start, slicelength) return b def test_getslice(self): for a in self.randombitarrays(): a_lst = a.tolist() n = len(a) i = randint(0, n) j = randint(i, n) b = self.getslice(a, i, j - i) self.assertEqual(b.tolist(), a_lst[i:j]) self.assertEQUAL(b, a[i:j]) class Overlap_Tests(unittest.TestCase, Util): def check_overlap(self, a, b, res): r1 = a._overlap(b) r2 = b._overlap(a) self.assertTrue(r1 is r2 is res) self.check_obj(a) self.check_obj(b) def test_empty(self): a = bitarray() self.check_overlap(a, a, False) b = bitarray() self.check_overlap(a, b, False) def test_distinct(self): for a in self.randombitarrays(): # buffers overlaps with itself, unless buffer is NULL self.check_overlap(a, a, bool(a)) b = a.copy() self.check_overlap(a, b, False) def test_shared(self): a = bitarray(64) b = bitarray(buffer=a) self.check_overlap(b, a, True) c = bitarray(buffer=memoryview(a)[2:4]) self.check_overlap(c, a, True) d = bitarray(buffer=memoryview(a)[5:]) self.check_overlap(d, c, False) self.check_overlap(d, b, True) e = bitarray(buffer=memoryview(a)[3:3]) self.check_overlap(e, c, False) self.check_overlap(e, d, False) def test_shared_random(self): n = 100 # buffer size in bytes a = bitarray(8 * n) for _ in range(1000): i1 = randint(0, n) j1 = randint(i1, n) b1 = bitarray(buffer=memoryview(a)[i1:j1]) i2 = randint(0, n) j2 = randint(i2, n) b2 = bitarray(buffer=memoryview(a)[i2:j2]) r1, r2 = range(i1, j1), range(i2, j2) res = bool(r1) and bool(r2) and (i2 in r1 or i1 in r2) self.check_overlap(b1, b2, res) # -------------------------------- _util.c ---------------------------------- class CountFromWord_Tests(unittest.TestCase, Util): def test_ones_zeros_empty(self): for _ in range(1000): n = randrange(1024) a = ones(n) # ones i = randrange(20) self.assertEqual(_cfw(a, i), max(0, n - i * 64)) a.setall(0) # zeros self.assertEqual(_cfw(a, i), 0) a.clear() # empty self.assertEqual(_cfw(a, i), 0) def test_random(self): for _ in range(1000): n = randrange(1024) a = urandom_2(n) i = randrange(20) res = _cfw(a, i) self.assertEqual(res, a[64 * i:].count()) class DigitToInt_Tests(unittest.TestCase): # digit_to_int() alphabets = [ (1, b'01'), (2, b'0123'), (3, b'01234567'), (4, b'0123456789abcdef'), (4, b'0123456789ABCDEF'), (5, b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'), (6, b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef' b'ghijklmnopqrstuvwxyz0123456789+/'), ] def test_alphabets(self): for m, alphabet in self.alphabets: self.assertEqual(len(alphabet), 1 << m) for i, c in enumerate(alphabet): self.assertEqual(_d2i(m, bytes([c])), i) def test_not_alphabets(self): for m, alphabet in self.alphabets: for c in range(256): if c in alphabet or (m == 4 and c in b'abcdefABCDEF'): continue self.assertEqual(_d2i(m, bytes([c])), -1) class RTS_Tests(unittest.TestCase): # _sc_rts() (running totals debug test) def test_segsize(self): self.assertEqual(type(_SEGSIZE), int) self.assertTrue(_SEGSIZE in [8, 16, 32]) def test_empty(self): rts = _sc_rts(bitarray()) self.assertEqual(len(rts), 1) self.assertEqual(rts, [0]) @skipIf(SEGBITS != 256) def test_example(self): # see example before sc_calc_rts() in _util.c a = zeros(987) a[:5] = a[512:515] = a[768:772] = 1 self.assertEqual(a.count(), 12) rts = _sc_rts(a) self.assertEqual(type(rts), list) self.assertEqual(len(rts), 5) self.assertEqual(rts, [0, 5, 5, 8, 12]) @staticmethod def nseg(a): # number of segments, see also SegmentTests in tricks.py return (a.nbytes + _SEGSIZE - 1) // _SEGSIZE def test_ones(self): for n in range(1000): a = ones(n) rts = _sc_rts(a) self.assertEqual(len(rts), self.nseg(a) + 1) self.assertEqual(rts[0], 0) self.assertEqual(rts[-1], n) for i, v in enumerate(rts): self.assertEqual(v, min(SEGBITS * i, n)) def test_random(self): for _ in range(200): a = urandom_2(randrange(10000)) rts = _sc_rts(a) self.assertEqual(len(rts), self.nseg(a) + 1) self.assertEqual(rts[0], 0) self.assertEqual(rts[-1], a.count()) for i in range(self.nseg(a)): seg_pop = a.count(1, SEGBITS * i, SEGBITS * (i + 1)) self.assertEqual(rts[i + 1] - rts[i], seg_pop) class ReadN_WriteN_Tests(unittest.TestCase, Util): # Regardless of machine byte-order, read_n() and write_n() use # little endian byte-order. def test_explicit(self): for blob, x in [(b"", 0), (b"\x00", 0), (b"\x01", 1), (b"\xff", 255), (b"\xff\x00", 255), (b"\xaa\xbb\xcc", 0xccbbaa)]: n = len(blob) self.assertEqual(_read_n(iter(blob), n), x) self.assertEqual(_write_n(n, x), blob) def test_zeros(self): for n in range(PTRSIZE): blob = n * b"\x00" self.assertEqual(_read_n(iter(blob), n), 0) self.assertEqual(_write_n(n, 0), blob) def test_max(self): blob = (PTRSIZE - 1) * b"\xff" + b"\x7f" self.assertEqual(_read_n(iter(blob), PTRSIZE), sys.maxsize) self.assertEqual(_write_n(PTRSIZE, sys.maxsize), blob) def test_round_trip_random(self): for _ in range(1000): n = randint(1, PTRSIZE - 1); blob = os.urandom(n) i = _read_n(iter(blob), n) self.assertEqual(_write_n(n, i), blob) def test_read_n_untouch(self): it = iter(b"\x00XY") self.assertEqual(_read_n(it, 1), 0) self.assertEqual(next(it), ord('X')) self.assertEqual(_read_n(it, 0), 0) self.assertEqual(next(it), ord('Y')) self.assertRaises(StopIteration, _read_n, it, 1) def test_read_n_item_errors(self): for v in -1, 256: self.assertRaises(ValueError, _read_n, iter([3, v]), 2) for v in None, "F", Ellipsis, []: self.assertRaises(TypeError, _read_n, iter([3, v]), 2) def test_read_n_negative(self): it = iter(PTRSIZE * b"\xff") self.assertRaisesMessage( ValueError, "read %d bytes got negative value: -1" % PTRSIZE, _read_n, it, PTRSIZE) # --------------------------------------------------------------------------- if __name__ == '__main__': unittest.main() bitarray-3.7.1/devel/test_random.py000066400000000000000000000735731505414144000173440ustar00rootroot00000000000000""" Statistical Tests for Random Functions in bitarray.util ------------------------------------------------------- These are statistical tests. They do not test any basic functionality of random functions. Those are already tested in the regular utility tests. Therefore, and because these tests take longer to run, we decided to put them in a separate file. In addition, this file contains some important verification tests that don't test actual functionality in random_p(), but rather verify some of the logic and establish some tricky equations. """ import sys import unittest from copy import deepcopy from collections import Counter from itertools import pairwise from math import comb, fmod, sqrt from statistics import fmean, stdev, pstdev from random import randint, randrange, random, binomialvariate from bitarray import bitarray, frozenbitarray from bitarray.util import ( zeros, ones, urandom, random_k, random_p, sum_indices, int2ba, count_and, count_or, count_xor, parity, ) from bitarray.util import _Random # type: ignore HEAVY = False # set True for heavy testing _r = _Random() M = _r.M K = _r.K limit = 1.0 / (K + 1) # lower limit for p SMALL_P = _r.SMALL_P def count_each_index(arrays): """ Given an iterable of bitarrays, count the sums of all bits at each index and return those counts in a Counter object. For example, for a returned Counter c, c[2] = 4 means that a sum of 2 across all bitarrays occurs at 4 indices. """ b = bitarray() n = None # length of each bitarray for a in arrays: if n is None: n = len(a) elif len(a) != n: raise ValueError("bitarrays of same length expected") b.extend(a) if n is None: return Counter() return Counter(b.count(1, i, len(b), n) for i in range(n)) class CountEachIndexTests(unittest.TestCase): def test_example(self): arrays = [bitarray("0011101"), bitarray("1010100"), bitarray("1011001")] # sums: 2032202 c = count_each_index(arrays) self.assertEqual(c.total(), 7) # length of each bitarray self.assertEqual(c[0], 2) self.assertEqual(c[1], 0) self.assertEqual(c[2], 4) self.assertEqual(c[3], 1) def test_random(self): for _ in range(1_000): m = randrange(10) n = randrange(10) if m else 0 arrays = [urandom(n) for _ in range(m)] c = count_each_index(arrays) self.assertEqual(c.total(), n) for j in range(m + 1): self.assertTrue(0 <= c[j] <= n) c2 = Counter(sum(arrays[j][i] for j in range(m)) for i in range(n)) self.assertEqual(c, c2) # generator gen = (arrays[j] for j in range(m)) self.assertEqual(count_each_index(gen), c) self.assertEqual(list(gen), []) def test_empty(self): arrays = [] for m in range(10): self.assertEqual(count_each_index(arrays), Counter()) arrays.append(bitarray()) def test_zeros_ones(self): for _ in range(1_000): m = randrange(10) n = randrange(10) if m else 0 c = count_each_index(zeros(n) for _ in range(m)) self.assertEqual(c[0], n) c = count_each_index(ones(n) for _ in range(m)) self.assertEqual(c[m], n) def test_errors(self): C = count_each_index self.assertRaises(ValueError, C, "ABC") self.assertRaises(TypeError, C, [0, 1]) self.assertRaises(ValueError, C, [bitarray("01"), bitarray("1")]) def create_masks(m): """ Create a list with m masks. Each mask has a length of 2**m bits. """ masks = [] for i in range(m): j = 1 << i mask = zeros(j) + ones(j) mask *= 1 << (m - i - 1) masks.append(mask) return masks class CreateMasksTests(unittest.TestCase): def test_explict(self): C = create_masks self.assertEqual(C(0), []) self.assertEqual(C(1), [bitarray("01")]) self.assertEqual(C(2), [bitarray("0101"), bitarray("0011")]) self.assertEqual(C(3), [bitarray("01010101"), bitarray("00110011"), bitarray("00001111")]) def test_11(self): m = 11 masks = create_masks(m) n = 1 << m self.assertEqual(len(masks), m) self.assertEqual(count_each_index(masks), Counter(int2ba(i).count() for i in range(n))) for i in range(m): a = masks[i] self.assertEqual(len(a), n) self.assertEqual(a.count(), n // 2) for j in range(i): b = masks[j] self.assertEqual(count_and(a, b), n // 4) self.assertEqual(count_or(a, b), 3 * n // 4) self.assertEqual(count_xor(a, b), n // 2) class Util(unittest.TestCase): def check_binomial_dist(self, n, p, x): mu = n * p sigma = sqrt(n * p * (1.0 - p)) msg = "n=%d p=%f mu=%f sigma=%f x=%f" % (n, p, mu, sigma, x) self.assertTrue(abs(x - mu) < 10.0 * sigma, msg) def check_probability(self, a, p): n = len(a) c = a.count() if p == 0: self.assertEqual(c, 0) elif p == 1: self.assertEqual(c, n) else: self.check_binomial_dist(n, p, c) class UtilTests(Util): def test_check_probability(self): C = self.check_probability N = 1_000_000 a = zeros(N) C(a, 0.0) a.setall(1) C(a, 1.0) a[::2] = 0 self.assertEqual(a.count(), N // 2) C(a, 0.501) C(a, 0.499) self.assertRaises(AssertionError, C, a, 0.506) self.assertRaises(AssertionError, C, a, 0.494) class URandomTests(Util): def test_count(self): a = urandom(10_000_000) self.check_probability(a, 0.5) def test_stat(self): for c in [ Counter(urandom(100).count() for _ in range(100_000)), count_each_index(urandom(100_000) for _ in range(100)), ]: self.assertTrue(set(c) <= set(range(101))) self.assertEqual(c.total(), 100_000) x = sum(c[k] for k in range(40, 51)) # p = 0.522195 mean = 52219.451858 stdev = 157.958033 self.assertTrue(abs(x - 52_219) <= 1_580) class Random_K_Tests(Util): def test_mean(self): M = 100_000 # number of trails N = 1_000 # bitarray length K = 500 # sample size C = Counter() ranges = [0.0, 500.0, 510.0, 520.0, 1000.0] for _ in range(M): x = sum_indices(random_k(N, K)) / K for i, (x1, x2) in enumerate(pairwise(ranges)): if x1 <= x < x2: C[i] += 1 self.assertEqual(C.total(), M) # python random/sample.py 100_000 1000 500 0 500 510 520 1000 self.assertTrue(abs(C[0] - 52_183) <= 1_580) # p = 0.521829 self.assertTrue(abs(C[1] - 35_303) <= 1_511) # p = 0.353025 self.assertTrue(abs(C[2] - 11_275) <= 1_000) # p = 0.112747 self.assertTrue(abs(C[3] - 1_240) <= 350) # p = 0.012399 def test_mean_2(self): M = 100_000 # number of trails N = 500 # bitarray length K = 400 # sample size C = Counter() ranges = [200.0, 249.5, 251.0, 255.0, 260.0, 300.0] for _ in range(M): x = sum_indices(random_k(N, K)) / K for i, (x1, x2) in enumerate(pairwise(ranges)): if x1 <= x < x2: C[i] += 1 self.assertEqual(C.total(), M) # python random/sample.py 100_000 500 400 200 249.5 251 255 260 300 self.assertTrue(abs(C[0] - 50_000) <= 1_581) # p = 0.500000 self.assertTrue(abs(C[1] - 17_878) <= 1_212) # p = 0.178781 self.assertTrue(abs(C[2] - 27_688) <= 1_415) # p = 0.276879 self.assertTrue(abs(C[3] - 4_376) <= 647) # p = 0.043762 def test_apply_masks(self): Na = 25_000 # number of bitarrays to test against masks Nm = 12 # number of masks n = 1 << Nm # length of each mask # Create masks for selecting half elements in random bitarray a. # For example, masks[0] selects all odd elements, and masks[-1] # selects the upper half of a. masks = create_masks(Nm) cm = Nm * [0] # counter for each mask for _ in range(Na): k = randrange(1, n, 2) # k is odd a = random_k(n, k) self.assertEqual(len(a), n) self.assertTrue(parity(a)) # count is odd for i in range(Nm): c1 = count_and(a, masks[i]) c0 = k - c1 # counts cannot be equal because k is odd self.assertNotEqual(c0, c1) # the probability for having more, e.g. even than # odd (masks[0]) elements should be 1/2, or having more bits # in upper vs lower half (mask(-1)) if c0 > c1: cm[i] += 1 for c in cm: # for each mask, check counter self.check_binomial_dist(Na, 0.5, c) def test_random_masks(self): Na = 10 # number of arrays to test Nm = 500_000 if HEAVY else 25_000 # number of masks n = 7000 # bitarray length # count for each array ka = [randrange(1, n, 2) for _ in range(Na)] arrays = [random_k(n, k) for k in ka] for k, a in zip(ka, arrays): # sanity check arrays self.assertEqual(len(a), n) self.assertEqual(a.count(), k) self.assertTrue(parity(a)) ca = Na * [0] # counter for each array for _ in range(Nm): # each mask has exactly half elements set to 1 mask = random_k(n, n//2) self.assertEqual(mask.count(0), mask.count(1)) # test each array against this masks for i in range(Na): c1 = count_and(arrays[i], mask) c0 = ka[i] - c1 # counts cannot be equal because k is odd self.assertNotEqual(c0, c1) if c0 > c1: ca[i] += 1 for c in ca: # for each array, check counter self.check_binomial_dist(Nm, 0.5, c) def test_elements_uniform(self): arrays = [random_k(100_000, 30_000) for _ in range(100)] for a in arrays: # for each bitarray check sample size k self.assertEqual(a.count(), 30_000) c = count_each_index(arrays) self.assertTrue(abs(c[30] - 8_678) <= 890) x = sum(c[k] for k in range(20, 31)) # p = 0.540236 mean = 54023.639245 stdev = 157.601089 self.assertTrue(abs(x - 54_024) <= 1_576) self.assertEqual(c.total(), 100_000) def test_all_bits_active(self): for _ in range(100): n = randrange(10, 10_000) cum = zeros(n) for _ in range(10_000): k = n // 7 a = random_k(n, k) self.assertEqual(len(a), n) self.assertEqual(a.count(), k) cum |= a if cum.all(): break else: self.fail() def test_combinations(self): # for entire range of 0 <= k <= n, validate that random_k() # generates all possible combinations n = 12 total = 0 for k in range(n + 1): expected = comb(n, k) combs = set() for _ in range(100_000): a = random_k(n, k) self.assertEqual(a.count(), k) combs.add(frozenbitarray(a)) if len(combs) == expected: total += expected break else: self.fail() self.assertEqual(total, 2 ** n) def test_evenly(self): # Calculate random_k(n, k) N times, and count each specific outcome. # We know that there are m=comb(n, k) possible outcomes, so each one # has a probability 1/m and the mean of the count should be N/m. N = 100_000 n = 9 k = 3 m = comb(n, k) c = Counter() for _ in range(N): a = frozenbitarray(random_k(n, k)) c[a] += 1 self.assertEqual(c.total(), N) self.assertEqual(len(c), m) p = 1.0 / m self.assertAlmostEqual(fmean(c.values()), N * p) if 0: print(m) print(N * p) print(sqrt(N * p * (1.0 - p))) print(stdev(c.values())) for x in c.values(): self.check_binomial_dist(N, p, x) def random_p_alt(self, n, p=0.5): """ Alternative implementation of random_p(). While the performance is about the same for large n, we found that for smaller n the handling of special cases leads to better overall performance in the current implementation. """ k = binomialvariate(n, p) self.assertTrue(0 <= k <= n) a = random_k(n, k) self.assertEqual(len(a), n) self.assertEqual(a.count(), k) return a def test_random_p_alt(self): n = 1_000_000 for _ in range(100): p = random() a = self.random_p_alt(n, p) self.check_probability(a, p) class Random_P_Tests(Util): def test_apply_masks(self): M = 12 # number of masks # Create masks for selecting half elements in the random bitarray a. # For example, masks[0] selects all odd elements, and masks[-1] # selects the upper half of a. masks = create_masks(M) n = M * [0] # sample size for each mask c = M * [0] # count for each mask for _ in range(25_000): p = 1.5 * SMALL_P * random() a = random_p(1 << M, p) tot = a.count() for i in range(M): c1 = count_and(a, masks[i]) c0 = tot - c1 if c0 == c1: # counts are equal -> continue # ignore this mask for this bitarray a n[i] += 1 # counts are not equal, the probability for having more, # e.g. even than odd (masks[0]) elements should be 1/2, # or having more bits in upper vs lower half (mask(-1)) if c0 > c1: c[i] += 1 for i in range(M): self.assertTrue(n[i] > 20_000, n[i]) self.check_binomial_dist(n[i], 0.5, c[i]) def test_elements_uniform(self): arrays = [random_p(100_000, 0.3) for _ in range(100)] for a in arrays: # for each bitarray see if population is within expectation self.check_probability(a, 0.3) c = count_each_index(arrays) self.assertTrue(abs(c[30] - 8_678) <= 890) x = sum(c[k] for k in range(20, 31)) # p = 0.540236 mean = 54023.639245 stdev = 157.601089 self.assertTrue(abs(x - 54_024) <= 1_576) self.assertEqual(c.total(), 100_000) def test_tiny_p(self): for n in 4, 10, 1000: for p in 1e-9, 1e-12, 1e-15, 1e-18: a = random_p(n, p) self.assertTrue(a.count() <= 1) def test_literal(self): # test "literal definition" case, n = 5 M = 250_000 # number of trails C = Counter(random_p(5, 0.3).count() for _ in range(M)) self.assertEqual(C.total(), M) # python random/binomial.py 250_000 5 0.3 self.assertTrue(abs(C[0] - 42_017) <= 1_870) # p = 0.168070 self.assertTrue(abs(C[1] - 90_037) <= 2_400) # p = 0.360150 self.assertTrue(abs(C[2] - 77_175) <= 2_310) # p = 0.308700 self.assertTrue(abs(C[3] - 33_075) <= 1_694) # p = 0.132300 self.assertTrue(abs(C[4] - 7_087) <= 830) # p = 0.028350 def test_small_p(self): # test small p case C = Counter(random_p(50, p=0.005).count() for _ in range(100_000)) self.assertEqual(C.total(), 100_000) # python random/binomial.py 100_000 50 .005 self.assertTrue(abs(C[0] - 77_831) <= 1_314) # p = 0.778313 self.assertTrue(abs(C[1] - 19_556) <= 1_254) # p = 0.195556 def test_small_p_symmetry(self): # same as above - exploiting symmetry C = Counter(random_p(50, p=0.995).count() for _ in range(100_000)) self.assertEqual(C.total(), 100_000) self.assertTrue(abs(C[49] - 19_556) <= 1_254) self.assertTrue(abs(C[50] - 77_831) <= 1_314) def test_small_p_uniform(self): C = count_each_index(random_p(100_000, 0.005) for _ in range(50)) self.assertEqual(C.total(), 100_000) self.assertTrue(abs(C[0] - 77_831) <= 1_314) self.assertTrue(abs(C[1] - 19_556) <= 1_254) def test_p375(self): # test .combine_half() M = 100_000 # number of trails C = Counter(random_p(100, 0.375).count() for _ in range(M)) self.assertEqual(C.total(), M) # python random/binomial.py 100_000 100 .375 37..48 self.assertTrue(abs(C[36] - 7_898) <= 853) # p = 0.078977 self.assertTrue(abs(C[37] - 8_196) <= 867) # p = 0.081965 self.assertTrue(abs(C[38] - 8_153) <= 865) # p = 0.081533 self.assertTrue(abs(C[39] - 7_777) <= 847) # p = 0.077770 x = sum(C[k] for k in range(37, 49)) self.assertTrue(abs(x - 56_614) <= 1_567) # p = 0.566139 def test_ne5(self): M = 25_000 # number of trails C = Counter(random_p(100_000, 0.5).count() for _ in range(M)) self.assertEqual(C.total(), M) # python binomial.py 25_000 100_000 .5 48_000..50_000 50_000..50_200 x = sum(C[k] for k in range(48000, 50001)) self.assertTrue(abs(x - 12_532) <= 791) # p = 0.501262 x = sum(C[k] for k in range(50000, 50201)) self.assertTrue(abs(x - 9_972) <= 774) # p = 0.398876 def test_probabilities(self): n = 100_000_000 special_p = [ 65 / 257 - 1e-9, # largest x for OR 65 / 257 + 1e-9, # smallest x for AND 0.0, 1e-12, 0.25, 1/3, 3/8, 127/257, 0.5, ] for j in range(100 if HEAVY else 2): sys.stdout.write('.') sys.stdout.flush() try: p = special_p[j] except IndexError: p = random() a = random_p(n, p) self.check_probability(a, p) class VerificationTests(Util): def test_uniform_stdev(self): # verify that the standard deviation of a uniform distribution # of population size n is given by: n / sqrt(12) for _ in range(100): n = randrange(10, 10_000) pop = list(range(n)) self.assertEqual(fmean(pop), (n - 1) / 2) self.assertAlmostEqual(pstdev(pop), n / sqrt(12), delta=0.1) def test_operations(self): C = self.check_probability n = 1_000_000 values = [i / 16.0 for i in range(17)] arrays0, arrays1 = ([(random_p(n, p), p) for p in values] for _ in range(2)) for a, p in arrays0: C(a, p) C(~a, 1.0 - p) # invert for b, q in arrays1: C(b, q) C(a & b, p * q) # AND C(a | b, p + q - p * q) # OR C(a ^ b, p + q - 2 * p * q) # XOR for b, q in arrays0: C(b, q) for a, p in deepcopy(arrays1): C(a, p) a &= b # in-place AND p *= q C(a, p) for a, p in deepcopy(arrays1): C(a, p) a |= b # in-place OR p += q * (1.0 - p) C(a, p) for a, p in deepcopy(arrays1): C(a, p) a ^= b # in-place XOR p += q * (1.0 - 2 * p) C(a, p) # ---------------- verifications relevant for random_k() ---------------- def test_decide_on_sequence(self): N = 100_000 cdiff = Counter() for _ in range(N): n = randrange(1, 10_000) k = randint(0, n // 2) self.assertTrue(0 <= k <= n // 2) if k < 16 or k * K < 3 * n: # for small k, we increase the count of a zeros(n) bitarray i = 0 else: # We could simply have `i = int(k / n * K)`. However, # when k is small, many reselections are required to # decrease the count. On the other hand, for k near n/2, # increasing and decreasing the count is equally expensive. p = k / n # p <= 0.5 # Numerator: f(p)=(1-2*p)*c -> f(0)=c, f(1/2)=0 # As the standard deviation of the .combine_half() bitarrays # gets smaller with larger n, we divide by sqrt(n). p -= (0.2 - 0.4 * p) / sqrt(n) # Note that we divide by K+1. This will round towards the # nearest probability as we get closer to p = 1/2. i = int(p * (K + 1)) if i < 3: # a = zeros(n), count is 0 diff = -k else: self.assertTrue(k >= 16) self.assertTrue(n >= 32) self.assertTrue(3 <= i <= K // 2) # a = self.combine_half(self.op_seq(i)) # count is given by binomialvariate(n, i / K) diff = binomialvariate(n, i / K) - k cdiff[diff] += 1 self.assertEqual(cdiff.total(), N) # count the number of cases where the count needs to be decreased above = sum(cdiff[i] for i in range(1, max(cdiff) + 1)) self.assertTrue(M != 8 or 0.28 < above / N < 0.34) # ---------------- verifications relevant for random_p() ---------------- def test_equal_x(self): """ Verify that the probabilities p for which final AND and OR result in equal x are: p = j / (K + 1) j in range(1, K) Also, verify these x are all: x = 1 / (K + 1) = limit These are also the maximal x. """ for j in range(1, K): # probabilities p for which final AND and OR result in equal x p = j / (K + 1) i = int(p * K) self.assertEqual(i, j - 1) # as K / (K + 1) < 1 self.assertEqual(p * (K + 1), i + 1) q = i / K x1 = (p - q) / (1.0 - q) # OR x2 = 1.0 - p / (q + 1.0 / K) # AND x2 = 1 - p / next q self.assertAlmostEqual(x1, x2, delta=1e-14) self.assertAlmostEqual(x1, limit, delta=1e-14) def special_p(self): """ generate special test values of p < 0.5 """ EPS = 1e-12 for j in range(1, K // 2 + 1): # probabilities for which final AND and OR result in equal x p = j / (K + 1) for e in -EPS, EPS: yield p + e for j in range(1, K // 2): # probabilities for which no final AND or OR is not necessary p = j / K for e in -EPS, 0.0, EPS: yield p + e for p in 0.0, EPS, 0.5 - EPS: yield p for e in -EPS, 0.0, EPS: yield SMALL_P + e for _ in range(10_000): yield 0.5 * random() def test_decide_on_operation(self): """ Verify that `x1 > x2` equates to `p * (K + 1) > i + 1`. """ for p in self.special_p(): self.assertTrue(0 <= p < 0.5, p) i = int(p * K) q = i / K self.assertTrue(q <= p) x1 = (p - q) / (1.0 - q) # OR x2 = 1.0 - p / (q + 1.0 / K) # AND x2 = 1 - p / next q # decided whether to use next i (next q) self.assertEqual(x1 > x2, p * (K + 1) > i + 1) def test_decision_limit(self): """ Verify that decision operation works as desired, and that resulting probability q is within limit of p. """ # limit = 1/(K+1) is slightly smaller than 1/K: self.assertEqual(limit, 1.0 / K - 1.0 / (K * (K + 1))) self.assertTrue(1.0 / K - limit < K ** -2 == 1.0 / (1 << (2 * M))) for p in self.special_p(): i = int(p * K) q0 = i / K q1 = (i + 1) / K self.assertTrue(q0 <= p < q1) self.assertTrue(q1 - q0 == 1.0 / K > limit) self.assertTrue(q0 + 0.5 * limit < q1 - 0.5 * limit) if p * (K + 1) > i + 1: self.assertTrue(q1 - 0.5 * limit < p < q1) # implies: self.assertNotEqual(q0, p) q = q1 self.assertTrue(q > p) # use AND operation else: self.assertTrue(q0 <= p < q0 + limit) q = q0 self.assertTrue(q <= p) # use OR operation self.assertTrue(p - limit < q < p + 0.5 * limit) self.assertTrue(abs(p - q) < limit) self.assertEqual(bool(q != p), bool(fmod(p, 1.0 / K))) def test_final_op(self): """ Verify final operation always gives us the correct probability. """ for p in self.special_p(): i = int(p * K) if p * (K + 1) > i + 1: # see above i += 1 if p > limit: self.assertNotEqual(i, 0) # Note that all the below handles this case fine. # However, rather than extending .op_seq() and .combine_half() # to handle i=0, we decided to "filter out" i=0 by the small p # case (see test below). self.assertTrue(0 <= i <= K // 2) q = i / K self.assertTrue(abs(p - q) < limit) # see above if q < p: # increase probability - OR x = (p - q) / (1.0 - q) # ensure small p case is called self.assertTrue(0.0 < x < limit) q += x * (1.0 - q) # OR elif q > p: # decrease probability - AND x = p / q # ensure small p case is called (after symmetry is exploited) self.assertTrue(0.0 < 1.0 - x < limit) q *= x # AND self.assertEqual(q, p) def test_i_not_0(self): """ Verify that for `p > limit`, we always get `i > 0`. This is important, as the small p case has to "filter out" `i = 0`, as the sequence of operations do not handle `i = 0`. """ p = limit + 1e-12 i = int(p * K) self.assertEqual(i, 0) # as K / (K + 1) < 1 if p * (K + 1) > i + 1: i += 1 # So for i be non-zero we must have: # p * (K + 1) > 1 # or # p > 1 / (K + 1) = limit q.e.d. self.assertEqual(i, 1) def dummy_random_p(self, p=0.5, verbose=False): """ Unlike random_p(), this function returns the desired probability q itself, and not a random bitarray. The point of this function is to illustrate how random_p() essentially works. Instead of actual bitarray operations, we change q accordingly. This method is neither concerned with the bitarray length n nor endianness. """ # error check inputs and handle edge cases if p <= 0.0 or p == 0.5 or p >= 1.0: if p in (0.0, 0.5, 1.0): return p raise ValueError("p must be in range 0.0 <= p <= 1.0, got %f", p) # exploit symmetry to establish: p < 0.5 if p > 0.5: return 1.0 - self.dummy_random_p(1.0 - p, verbose) # for small p set randomly individual bits, which is much faster if p < SMALL_P: return p # random.binomialvariate() and .random_pop() # calculate operator sequence i = int(p * K) if p * (K + 1) > i + 1: i += 1 self.assertTrue(0 < i <= K // 2) a = bitarray(i.to_bytes(2, byteorder="little"), "little") seq = a[a.index(1) + 1 : M] # combine random bitarrays using bitwise AND and OR operations q = 0.5 # start with randbytes() for k in seq: if k: q += 0.5 * (1.0 - q) # OR else: q *= 0.5 # AND self.assertEqual(q, i / K) x = 0.0 if q < p: # increase probability x = (p - q) / (1.0 - q) self.assertTrue(0.0 < x < SMALL_P) q += x * (1.0 - q) # OR elif q > p: # decrease probability x = p / q self.assertTrue(0.0 < 1.0 - x < SMALL_P) q *= x # AND if verbose: print("%15.9f %9d %9d %15.9f" % (p, len(seq) + 1, i, x)) self.assertEqual(q, p) return q def test_dummy_random_p(self): for p in self.special_p(): self.assertEqual(self.dummy_random_p(p), p) # test 0 <= p < 1; self.special_p() only gives us 0 <= p < 0.5 for _ in range(10_000): p = random() self.assertEqual(self.dummy_random_p(p), p) def disp(): i = sys.argv.index('--disp') args = sys.argv[i + 1:] if args: plist = [float(eval(s)) for s in args] else: plist = [1/4, 1/8, 1/16, 1/32, 1/64, 3/128, 127/256, SMALL_P, 0.1, 0.2, 0.3, 0.4, 65/257, 127/257 + 1e-9, 0.5 - 1e-9] print(" p k i x") print(55 * '-') for p in plist: VerificationTests().dummy_random_p(p, True) if __name__ == '__main__': if '--disp' in sys.argv: disp() sys.exit() if "--heavy" in sys.argv: HEAVY = True sys.argv.remove("--heavy") unittest.main() bitarray-3.7.1/devel/test_sum_indices.py000066400000000000000000000225671505414144000203630ustar00rootroot00000000000000""" In both ssqi() (in _util.c) and sum_indices() (in util.py), we divide our bitarray into equally sized blocks in order to calculate the sum of active indices. We use the same trick but for different reasons: (a) in ssqi(), we want to loop over bytes (blocks of 8 bits) and use lookup tables (for sum z_j [**2]) (b) in sum_indices() we want to loop over blocks of smaller bitarrays in order to keep the summation in ssqi() from overflowing The trick is to write x_j = y_j + z_j where y_j = y : if bit j is active 0 : otherwise for each block. Here, j is the index within each block. That is, j is in range(block size). Using the above, we get: sum x_j = k * y + sum z_j where k is the bit count (per block). And: sum x_j**2 = k * y**2 + 2 * sum z_j * y + sum z_j**2 These are the sums for each block and their sum (over all blocks) is what we are interested in. (a) ssqi() (b) sum_indices() ------------------------------------------------------------ block c (char) block (bitarray) block size 8 n i byte index block index y 8 * i n * i k count_table[c] block.count() z1 = sum z_j sum_table[c] _ssqi(block) z2 = sum z_j**2 sum_sqr_table[c] _ssqi(block, 2) """ import math import unittest from random import choice, getrandbits, randint, randrange, sample from bitarray.util import zeros, ones, gen_primes, urandom, _ssqi, sum_indices from bitarray.test_util import SumIndicesUtil N19 = 1 << 19 # 512 Kbit = 64 KB N20 = 1 << 20 # 1 Mbit = 128 KB N21 = 1 << 21 # 2 Mbit = 256 KB N22 = 1 << 22 # 4 Mbit = 512 KB N23 = 1 << 23 # 8 Mbit = 1 MB N28 = 1 << 28 # 256 Mbit = 32 MB N30 = 1 << 30 # 1 Gbit = 128 MB N31 = 1 << 31 # 2 Gbit = 256 MB N32 = 1 << 32 # 4 Gbit = 512 MB N33 = 1 << 33 # 8 Gbit = 1 GB MAX_UINT64 = (1 << 64) - 1 def sum_range(n): "Return sum(range(n))" return n * (n - 1) // 2 def sum_sqr_range(n): "Return sum(i * i for i in range(n))" return n * (n - 1) * (2 * n - 1) // 6 class SumRangeTests(unittest.TestCase): def test_sum_range(self): for n in range(1000): self.assertEqual(sum_range(n), sum(range(n))) def test_sum_sqr_range(self): for n in range(1000): self.assertEqual(sum_sqr_range(n), sum(i * i for i in range(n))) def test_mode(self): for n in range(1000): for mode, f in [(1, sum_range), (2, sum_sqr_range)]: sum_ones = 3 if mode == 1 else 2 * n - 1 sum_ones *= n * (n - 1) sum_ones //= 6 self.assertEqual(sum_ones, f(n)) def test_o2(self): for n in range(1000): o1 = n * (n - 1) // 2 o2, r = divmod(o1 * (2 * n - 1), 3) self.assertEqual(r, 0) self.assertEqual(o2, sum_sqr_range(n)) class ExampleImplementationTests(unittest.TestCase): def sum_indices(self, a, mode=1): n = 503 # block size in bits nblocks = (len(a) + n - 1) // n # number of blocks sm = 0 for i in range(nblocks): y = n * i block = a[y : y + n] k = block.count() z1 = _ssqi(block) self.assertEqual( # Note that j are indices within each block. # Also note that we use len(block) instead of block_size, # as the last block may be truncated. z1, sum(j for j in range(len(block)) if block[j])) if mode == 1: x = k * y + z1 else: z2 = _ssqi(block, 2) x = (k * y + 2 * z1) * y + z2 # x is the sum [of squares] of indices for each block self.assertEqual( # Note that here t are indices of the full bitarray a. x, sum(t ** mode for t in range(y, y + len(block)) if a[t])) sm += x return sm def test_sum_indices(self): for _ in range(100): n = randrange(10_000) a = urandom(n) mode = randint(1, 2) self.assertEqual(self.sum_indices(a, mode), sum_indices(a, mode)) class SSQI_Tests(SumIndicesUtil): # Note carefully that the limits that are calculated and tested here # are limits used in internal function _ssqi(). # The public Python function sum_indices() does NOT impose any limits # on the size of bitarrays it can compute. def test_calculate_limits(self): # calculation of limits used in ssqi() (in _util.c) for f, limit in [(sum_range, 6_074_001_000), (sum_sqr_range, 3_810_778)]: lo = 0 hi = MAX_UINT64 while hi > lo + 1: n = (lo + hi) // 2 if f(n) > MAX_UINT64: hi = n else: lo = n self.assertTrue(f(n) < MAX_UINT64) self.assertTrue(f(n + 1) > MAX_UINT64) self.assertEqual(n, limit) def test_overflow(self): # _ssqi() is limited to bitarrays of about 6 Gbit (4 Mbit mode=2). # This limit is never reached because sum_indices() uses # a much smaller block size for practical reasons. for mode, f, n in [(1, sum_range, 6_074_001_000), (2, sum_sqr_range, 3_810_778)]: a = ones(n) self.assertTrue(f(len(a)) <= MAX_UINT64) self.assertEqual(_ssqi(a, mode), f(n)) a.append(1) self.assertTrue(f(len(a)) > MAX_UINT64) self.assertRaises(OverflowError, _ssqi, a, mode) def test_primes(self): n = 3_800_000 endian = choice(['little', 'big']) a = gen_primes(n, endian) self.assertEqual(_ssqi(a, 1), 493_187_952_850) self.assertEqual(_ssqi(a, 2), 1_234_421_634_142_352_974) def test_sparse(self): for _ in range(500): n = randint(2, 3_810_778) k = randrange(min(1_000, n // 2)) mode = randint(1, 2) freeze = getrandbits(1) inv = getrandbits(1) self.check_sparse(_ssqi, n, k, mode, freeze, inv) class SumIndicesTests(SumIndicesUtil): def test_urandom(self): self.check_urandom(sum_indices, 1_000_003) def test_random_sample(self): n = N31 for k in 1, 31, 503: mode = randint(1, 2) freeze = getrandbits(1) inv = getrandbits(1) self.check_sparse(sum_indices, n, k, mode, freeze, inv) def test_primes(self): n = 10_000_000 endian = choice(['little', 'big']) a = gen_primes(n, endian) self.assertEqual(sum_indices(a, 1), 3_203_324_994_356) self.assertEqual(sum_indices(a, 2), 21_113_978_675_102_768_574) def test_ones(self): for m in range(19, 32): n = randrange(1 << m) mode = randint(1, 2) freeze = getrandbits(1) self.check_sparse(sum_indices, n, 0, mode, freeze, inv=True) def test_sum_random(self): for _ in range(50): n = randrange(1 << randrange(19, 32)) k = randrange(min(1_000, n // 2)) mode = randint(1, 2) freeze = getrandbits(1) inv = getrandbits(1) self.check_sparse(sum_indices, n, k, mode, freeze, inv) def test_hypot(self): a = urandom(10_000) self.assertAlmostEqual(math.sqrt(sum_indices(a, 2)), math.hypot(*list(a.search(1)))) class VarianceTests(unittest.TestCase): def variance(self, a, mu=None): si = sum_indices(a) k = a.count() if mu is None: mu = si / k return (sum_indices(a, 2) - 2 * mu * si) / k + mu * mu def variance_values(self, values, mu=None): k = len(values) if mu is None: mu = sum(values) / k return sum((x - mu) ** 2 for x in values) / k def test_variance(self): for _ in range(1_000): n = randrange(1, 1_000) k = randint(1, max(1, n // 2)) indices = sample(range(n), k) a = zeros(n) a[indices] = 1 mean = sum(indices) / len(indices) self.assertAlmostEqual(self.variance(a), self.variance_values(indices)) self.assertAlmostEqual(self.variance(a, mean), self.variance_values(indices, mean)) mean = 20.5 self.assertAlmostEqual(self.variance(a, mean), self.variance_values(indices, mean)) def test_ones(): for n in [3_810_778, 3_810_779, 6_074_001_000, 6_074_001_001, N33, 2 * N33]: a = ones(n) print("n = %32d %6.2f Gbit %6.2f GB" % (n, n / N30, n / N33)) print("2^64 = %32d" % (1 << 64)) res = sum_indices(a) print("sum = %32d" % res) assert res == sum_range(n) res = sum_indices(a, 2) print("sum2 = %32d" % res) assert res == sum_sqr_range(n) print() print("OK") if __name__ == "__main__": import sys if '--ones' in sys.argv: test_ones() sys.exit() unittest.main() bitarray-3.7.1/devel/tricks.py000066400000000000000000000201511505414144000163040ustar00rootroot00000000000000""" This file contains some little tricks and verifications for some code which is used in the C implementation of bitarray. """ from random import randint import unittest # ---------------------------- Range checks --------------------------------- class RangeTests(unittest.TestCase): def test_check_simple(self): r = range(0, 256) for k in range(-10, 300): self.assertEqual(k < 0 or k > 0xff, bool(k >> 8)) self.assertEqual(k not in r, bool(k >> 8)) def test_check(self): # used in various places in C code for i in range(0, 11): m = 1 << i for k in range(-10, 2000): res1 = k not in range(0, m) res2 = k < 0 or k >= m self.assertEqual(res1, res2) # simply shift i to right and see if anything remains res3 = bool(k >> i) self.assertEqual(res1, res3) def test_check_2(self): # this is used in _util.c in set_count() for i in range(0, 11): m = 1 << i for k in range(-10, 2000): res1 = k not in range(0, m + 1) res2 = k < 0 or k > m self.assertEqual(res1, res2) # same as above but combined with k substracted by 1 res3 = bool(k >> i) and bool((k - 1) >> i) self.assertEqual(res1, res3) # ------------------------------ Slicing ------------------------------------ def adjust_step_positive(slicelength, start, stop, step): """ This is the adjust_step_positive() implementation from bitarray.h. """ if step < 0: stop = start + 1 start = stop + step * (slicelength - 1) - 1 step = -step assert start >= 0 and stop >= 0 assert step > 0 assert slicelength >= 0 if slicelength == 0: assert stop <= start elif step == 1: assert stop - start == slicelength return start, stop, step def slicelength(start, stop, step): """ This is the slicelength implementation from PySlice_AdjustIndices(). a / b does integer division. If either a or b is negative, the result depends on the compiler (rounding can go toward 0 or negative infinity). Therefore, we are careful that both a and b are always positive. """ if step < 0: if stop < start: return (start - stop - 1) // (-step) + 1 else: if start < stop: return (stop - start - 1) // step + 1 return 0 class ListSliceTests(unittest.TestCase): def random_slices(self, max_len=100, repeat=10_000): for _ in range(repeat): n = randint(0, max_len) s = slice(randint(-n - 2, n + 2), randint(-n - 2, n + 2), randint(-5, 5) or 1) yield n, s, range(n)[s] def test_basic(self): for n, s, r in self.random_slices(): self.assertEqual(range(*s.indices(n)), r) def test_indices(self): for n, s, r in self.random_slices(): start, stop, step = s.indices(n) self.assertEqual(start, r.start) self.assertEqual(stop, r.stop) self.assertEqual(step, r.step) self.assertNotEqual(step, 0) if step > 0: self.assertTrue(0 <= start <= n) self.assertTrue(0 <= stop <= n) else: self.assertTrue(-1 <= start < n) self.assertTrue(-1 <= stop < n) self.assertEqual(range(start, stop, step), r) def test_list_get(self): for n, s, r in self.random_slices(): a = list(range(n)) b = a[s] self.assertEqual(len(b), len(r)) self.assertEqual(b, list(r)) def test_list_set(self): for n, s, r in self.random_slices(20): a = n * [None] b = list(a) a[s] = range(len(r)) for i, j in enumerate(r): b[j] = i self.assertEqual(a, b) def test_list_del(self): for n, s, r in self.random_slices(): a = list(range(n)) b = list(a) del a[s] self.assertEqual(len(a), n - len(r)) for i in sorted(r, reverse=True): del b[i] self.assertEqual(a, b) def test_adjust_step_positive(self): for n, s, r in self.random_slices(): if s.step < 0: r = r[::-1] start, stop, step = adjust_step_positive(len(r), *s.indices(n)) self.assertEqual(range(start, stop, step), r) self.assertTrue(step > 0) if r: self.assertTrue(0 <= start < n) self.assertTrue(0 < stop <= n) def test_slicelength(self): for n, s, r in self.random_slices(): self.assertEqual(slicelength(r.start, r.stop, r.step), len(r)) # ------------------------- Modular Arithmetic ------------------------------ class ModularTests(unittest.TestCase): def test_remainder(self): for _ in range(1000): a = randint(-20, 20) b = randint(1, 20) # integer division in Python returns the floor of the result # instead of truncating towards zero like C q = a // b if a < 0: self.assertTrue(q < 0) r = a % b self.assertEqual(b * q + r, a) self.assertTrue(0 <= r < b) def test_avoid_neg_numerator(self): # # equality: a % b = (b - (-a) % b) % b # for _ in range(1000): a = randint(-20, 20) b = randint(1, 20) r = a % b # Note that even though a may be negative, the remainder is # always positive: self.assertTrue(r >= 0) # The following equality: s = (b - (-a) % b) % b self.assertEqual(s, r) # can be used to implement a % b in C when a <= 0 if a <= 0: # here % always operates on positive numerator self.assertTrue(-a >= 0) self.assertTrue(b - (-a) % b > 0) # ----------------------------- Segments ------------------------------------ class SegmentTests(unittest.TestCase): def test_nseg(self): SEGSIZE = 32 # segment size in bytes SEGBITS = 8 * SEGSIZE for nbits in range(1000): nbytes = (nbits + 7) // 8 # number of segments in terms of bytes nseg = (nbytes + SEGSIZE - 1) // SEGSIZE # and in terms of bits self.assertEqual((nbits + SEGBITS - 1) // SEGBITS, nseg) # number of complete segments cseg = nbits // SEGBITS self.assertTrue(cseg <= nseg) # The number of complete segments cannot be calculated in terms # of bytes, as it isn't possible to tell how many bits are # actually used within the last byte of each segment. if (nbits % SEGBITS > SEGBITS - 8): self.assertNotEqual(nbytes // SEGSIZE, cseg) else: self.assertEqual(nbytes // SEGSIZE, cseg) # remaining bits rbits = nbits % SEGBITS self.assertEqual(cseg * SEGBITS + rbits, nbits) if cseg == nseg: self.assertEqual(rbits, 0) self.assertEqual(nbytes % SEGSIZE, 0) else: self.assertEqual(nseg, cseg + 1) self.assertTrue(rbits > 0) # ------------------------ Variable Length Format --------------------------- class VLFTests(unittest.TestCase): def test_padding(self): LEN_PAD_BITS = 3 for nbits in range(1000): n = (nbits + LEN_PAD_BITS + 6) // 7 # number of resulting bytes padding = 7 * n - LEN_PAD_BITS - nbits self.assertTrue(0 <= padding < 7) self.assertEqual(divmod(nbits + padding + LEN_PAD_BITS, 7), (n, 0)) # alternative equation for padding padding_2 = (7 - (nbits + LEN_PAD_BITS) % 7) % 7 self.assertEqual(padding_2, padding) if __name__ == '__main__': unittest.main() bitarray-3.7.1/doc/000077500000000000000000000000001505414144000141025ustar00rootroot00000000000000bitarray-3.7.1/doc/bitarray3.rst000066400000000000000000000023261505414144000165370ustar00rootroot00000000000000Bitarray 3 transition ===================== The bitarray version 3 release is bitarray's farewell to Python 2. Apart from removing Python 2 support, this release also migrates bitarray's ``.decode()`` and ``.search()`` methods to return iterators. This is similar to how Python's ``dict.keys()``, ``.values()`` and ``.items()`` methods were revamped in the Python 2 to 3 transition. In the following table, ``a`` is assumed to a bitarray object. +----------------------+----------------------+ | before version 3 | version 3 | +======================+======================+ | ``a.iterdecode()`` | ``a.decode()`` | +----------------------+----------------------+ | ``a.decode()`` | ``list(a.decode()`` | +----------------------+----------------------+ | ``a.itersearch()`` | ``a.search()`` | +----------------------+----------------------+ | ``a.search()`` | ``list(a.search()`` | +----------------------+----------------------+ Aside from these changes which will make bitarray 3 more pythonic, there are a few other minor changes (see changelog). It should be emphasized that in most common use cases the bitarray 3 transition will require only minor code changes, or no changes at all. bitarray-3.7.1/doc/buffer.rst000066400000000000000000000074261505414144000161160ustar00rootroot00000000000000Buffer protocol =============== Bitarray objects support the buffer protocol. They can both export their own buffer, as well as import another object's buffer. Exporting buffers ----------------- Here is an example where the bitarray's buffer is exported: .. code-block:: python >>> from bitarray import bitarray >>> a = bitarray('01000001 01000010 01000011', endian='big') >>> v = memoryview(a) >>> v.tobytes() b'ABC' >>> v[1] = 255 >>> a bitarray('010000011111111101000011') Note that it is possible to change the shared buffer from both ``a`` and ``v``: .. code-block:: python >>> a[6] = 1 >>> v.tobytes() b'C\xffC' However, as ``a``'s buffer is shared, it is not possible to resize it: .. code-block:: python >>> a.append(0) Traceback (most recent call last): ... BufferError: cannot resize bitarray that is exporting buffers When exporting the buffer of a ``frozenbitarray``, it is not possible to change its ``memoryview`` either: .. code-block:: python >>> from bitarray import frozenbitarray >>> a = frozenbitarray('01000001 01000010') >>> v = memoryview(a) >>> v.readonly True >>> v[0] = 15 Traceback (most recent call last): ... TypeError: cannot modify read-only memory Importing buffers ----------------- As of bitarray version 2.3, it is also possible to import the buffer from an object which exposes its buffer. Here the bytearray: .. code-block:: python >>> c = bytearray([0x41, 0xff, 0x01]) >>> a = bitarray(buffer=c, endian='big') >>> a bitarray('010000011111111100000001') >>> a <<= 3 # shift all bits by 3 to the left >>> c bytearray(b'\x0f\xf8\x08') >>> a[20:] = 1 >>> a bitarray('000011111111100000001111') Again, the shared buffer can be represented and modify by either object ``a`` and ``c``. When importing a buffer into a bitarray, the length of the bitarray will always be multiple of 8 bits, as buffers are bases on bytes. Also, we may specify the endianness of the bitarray: .. code-block:: python >>> b = bitarray(buffer=c, endian='little') >>> b bitarray('111100000001111111110000') The bytearray ``c`` is now exporting its buffer twice: to big-endian bitarray ``a``, and a little-endian bitarray ``b``. At this point all three object ``a``, ``b`` and ``c`` share the same buffer. Using the ``.buffer_info()`` method, we can actually verify that the bitarrays ``a`` and ``b`` point to the same address: .. code-block:: python >>> def address(a): ... info = a.buffer_info() ... return info[0] # using bitarray 3.7, we can also: info.address >>> assert address(a) == address(b) As bitarray's expose their buffer, we can also directly create a bitarray which imports the buffer from another bitarray: .. code-block:: python >>> a = bitarray(32) >>> b = bitarray(buffer=a) >>> # the buffer address is the same >>> assert address(a) == address(b) >>> a.setall(0) >>> assert a == b >>> b[::7] = 1 >>> assert a == b >>> a bitarray('10000001000000100000010000001000') We can also create bitarrays which share part of the buffer. Let's create a large bitarray ``a``, and then have ``b`` and ``c`` share different portions of ``a``'s buffer: .. code-block:: python >>> a = bitarray(1 << 23) >>> a.setall(0) >>> b = bitarray(buffer=memoryview(a)[0x10000:0x30000]) >>> assert address(a) + 0x10000 == address(b) >>> c = bitarray(buffer=memoryview(a)[0x20000:0x50000]) >>> assert address(a) + 0x20000 == address(c) >>> c[0] = 1 >>> assert b[8 * 0x10000] == 1 >>> assert a[8 * 0x20000] == 1 Finally, importing buffers allows creating bitarrays that are memory mapped to a file. Please see the `mmapped-file.py <../examples/mmapped-file.py>`__ example. bitarray-3.7.1/doc/canonical.rst000066400000000000000000000054531505414144000165720ustar00rootroot00000000000000Canonical Huffman Coding ======================== Bitarray supports creating, encoding and decoding canonical Huffman codes. Consider the following frequency map: .. code-block:: python >>> cnt = {'a': 5, 'b': 3, 'c': 1, 'd': 1, 'r': 2} We can now use ``canonical_huffman()`` to create a canonical Huffman code: .. code-block:: python >>> from pprint import pprint >>> from bitarray.util import canonical_huffman >>> codedict, count, symbol = canonical_huffman(cnt) >>> pprint(codedict) {'a': bitarray('0'), 'b': bitarray('10'), 'c': bitarray('1110'), 'd': bitarray('1111'), 'r': bitarray('110')} >>> count [0, 1, 1, 1, 2] >>> symbol ['a', 'b', 'r', 'c', 'd'] The output is tuple with the following elements: * A dictionary mapping each symbols to a ``bitarray`` * A list containing the number of symbols for each code length, e.g. `count[3] = 1` because there is one symbol (``r``) with code length ``3``. * A list of symbols in canonical order If we add up numbers in ``count``, we get the total number of symbols coded: .. code-block:: python >>> sum(count) == len(symbol) True The canonical Huffman code is: .. code-block:: index symbol code length --------------------------- 0 a 0 1 1 b 10 2 2 r 110 3 3 c 1110 4 4 d 1111 4 Encode a message using this code: .. code-block:: python >>> from bitarray import bitarray >>> msg = "abracadabra" >>> a = bitarray() >>> a.encode(codedict, msg) >>> a bitarray('01011001110011110101100') >>> assert ''.join(a.decode(codedict)) == msg And now decode using not ``codedict``, but the canonical decoding tables ``count`` and ``symbol`` instead: .. code-block:: python >>> from bitarray.util import canonical_decode >>> ''.join(canonical_decode(a, count, symbol)) 'abracadabra' Notes on DEFLATE: ----------------- DEFLATE is a lossless data compression file format that uses a combination of LZ77 and Huffman coding. It is used by ``gzip`` and implemented in ``zlib``. The format is organized in blocks, which contain Huffman encoded data (except for raw blocks). In addition to symbols that represent bytes, there is a stop symbol and up to 29 LZ77 match length symbols. When a LZ77 symbol is encountered, more bits are read from the stream before continuing with decoding the next element in the stream. The fact that extra bits are taken from the stream makes our decode function (``canonical_decode()``) unsuitable for DEFLATE decompression, or at least inefficient as we would have to create a new iterator for decoding each symbol. A more efficient implementation can be found in `examples/puff `__ bitarray-3.7.1/doc/changelog.rst000066400000000000000000001071301505414144000165650ustar00rootroot00000000000000Change log ========== **3.7.1** (2025-08-28): * fix type hinting for memoryviews, see `#241 `__ * add `bit-endianness `__ documentation * improve testing, including debug mode test for ``digit_to_int()`` **3.7.0** (2025-08-24): * add ``util.gen_primes()``, generate bitarrays in which active indices are prime numbers * improve ``.buffer_info()`` to return named tuple * add optional ``mode`` argument to ``util.sum_indices()`` to sum square of active indices * improve internal ``_sysinfo()`` to include ``Py_DEBUG`` * add `Dubner's conjecture <../examples/dubner.rst>`__ (in memory of Harvey Dubner) * add `dynamically growing sieve <../examples/dyn_sieve.py>`__ **3.6.1** (2025-08-12): * add development files for statistical tests in ``devel/random/`` * optimize ``util.sum_indices()`` * fix RecursionError in ``util.random_k()``, see `#239 `__ * add ``devel/test_sum_indices.py`` **3.6.0** (2025-07-29): * add ``util.random_k()``, see `#237 `__ * add ``util.sum_indices()`` * optimize ``util.xor_indices()`` * move development files from ``examples/`` to ``devel/`` **3.5.2** (2025-07-21): * optimize ``util.random_p()`` by also using bitwise AND in final step * fix DeprecationWarning regarding ``u`` type code * add `verification tests <../devel/test_random.py>`__ for internals of ``util.random_p()`` **3.5.1** (2025-07-14): * optimize ``util.random_p()`` for ``n < 100`` * add `Random Bitarrays `__ documentation * add `statistical tests for random functions <../devel/test_random.py>`__ **3.5.0** (2025-07-06): * add ``util.random_p()`` * improve sparse compression testing **3.4.3** (2025-06-23): * minor updates to documentation * C-level: - simplify and speedup ``extend_unicode01()`` - customize ``resize_lite()`` - avoid unused code - use ``PyTypeObject`` for bitarray type object in ``_util.c`` to be consistent with ``_bitarray.c`` - add and improve comments to implementation of sparse bitarray compression - simplify ``sc_count()`` **3.4.2** (2025-05-21): * extend documentation of `compression of sparse bitarrays `__ * ``util.sc_decode()`` and ``util.vl_decode()`` now raise ``StopIteration`` instead of ``ValueError`` when unexpected end of stream is encountered * add debug mode tests for ``read_n()``, ``write_n()`` and ``count_from_word()`` **3.4.1** (2025-05-15): * add ``pyproject.toml``, see `#233 `__ * implement ``bits2bytes()`` in C * optimize ``delslice()`` when ``step`` is larger than about 5 * consistently name ``*_span()`` and ``*_range()`` in C for invert, set and count * organize and add tests (including debug mode tests for ``zlw()``) **3.4.0** (2025-05-06): * remove ``.endian()`` method in favor of data descriptor ``.endian`` * allow bitarray initializer ``bytes`` or ``bytearray`` to set buffer directly * allow calling ``.extend()`` with ``bytes`` object (although the only valid bytes are 0x00 and 0x01) * add ``util.byteswap()`` * add ``util.correspond_all()`` * fix ``.reverse()`` for imported buffer * drop Python 3.5 support * add tests **3.3.2** (2025-05-02): * fix off-by-one-error in check for length of count argument in ``util.canonical_decode()`` * simplify ``util.int2ba()`` * add tests * add `masked indexing example <../examples/masked.py>`__ * add `tricks example <../devel/tricks.py>`__ **3.3.1** (2025-04-04): * remove ``License`` classifier in favor of a SPDX license expression, `#231 `__ * reorganize and cleanup many tests **3.3.0** (2025-03-30): * add optional ``group`` and ``sep`` arguments' to ``.to01()``, `#230 `__ - as well as ``util.ba2hex()`` and ``util.ba2base()`` * ignore whitespace in ``util.base2ba()`` and ``util.hex2ba()`` * check for embedded nul characters when extending (and initializing) bitarray from string * improve testing * add `double precision floating point number example <../examples/double.py>`__ **3.2.0** (2025-03-19): * add ``util.xor_indices()``, `#229 `__ * add `Hamming code example <../examples/hamming.py>`__ **3.1.1** (2025-03-06): * updated ``pythoncapi_compat.h`` for pypy3.11 support, see `#227 `__ * use ``__builtin_parityll()`` when available in ``util.parity()`` * add ``parity_64()`` to header * simplify some tests * add `LFSR example <../examples/lfsr.py>`__ **3.1.0** (2025-02-19): * allow mask assignment to bitarrays, see `#225 `__ * add missing masked operations to pyi-file * refactor ``resize()`` and avoid overallocation when downsizing buffer * update ``build_wheels.yml`` * fix some typos * minor simplifications * rename ``growth/`` example to ``resize/`` and add tests for ``resize()`` * update gene example * add comments **3.0.0** (2024-10-15): * see `Bitarray 3 transition `__ * remove Python 2.7 support * ``.decode()`` now returns iterator (equivalent to past ``.iterdecode()``) * ``.search()`` now returns iterator (equivalent to past ``.itersearch()``) * remove ``.iterdecode()`` and ``.itersearch()`` * remove ``util.rindex()``, use ``.index(..., right=1)`` instead, deprecated since 2.9 * remove ``util.make_endian()``, use ``bitarray(..., endian=...)`` instead, deprecated since 2.9 * remove hackish support for ``bitarray()`` handling unpickling, see detailed explaination in `#207 `__ - closes `#206 `__ **2.9.3** (2024-10-10): * add official Python 3.13 support * update cibuildwheel to 2.21.3 * minor simplifications * fix some typos **2.9.2** (2024-01-01): * optimize initialization from strings by not constantly resizing buffer * optimize ``util.hex2ba()`` and ``util.base2ba()`` by avoiding unnecessary copies * optimize ``util.base2ba()`` and ``util.ba2base()`` for ``n=16`` (hexadecimal) **2.9.1** (2023-12-23): * avoid buffer being unnecessarily initialized with 0s in several functions of the ``bitarray.util`` module * fix ``.count()`` type hint in pyi-file * improve testing **2.9.0** (2023-12-17): * deprecate support for Python 2 - Python 2.7 support will be removed in bitarray version 3.0, see `roadmap `__ * ``bitarray(n)`` for integer initializer ``n`` will always return a bitarray of length ``n`` with all items initialized to ``0``, see `#212 `__ * allow sub-bitarray in ``.count()``, `#212 `__ * add ``util.ones()`` * ``.find()`` and ``.index()``: add keyword argument ``right`` for rightmost index * ``.itersearch()``: add start and stop argument, and keyword argument ``right`` (for descending order - starting with rightmost match) * deprecate ``util.rindex()`` (will be removed in 3.0 release), use ``.index(..., right=True)`` instead * deprecate ``util.make_endian()`` (will be removed in 3.0 release), use ``bitarray(..., endian=...)`` instead **2.8.5** (2023-12-09): * speedup unaligned copies by always using word shifts (in combination with builtin byte swap 64 when available) when bit-endianness and machine byte-order are opposite * add ``HAVE_BUILTIN_BSWAP64`` to header * avoid misaligned pointers when casting to ``(uint64_t *)`` * add tests **2.8.4** (2023-12-04): * simplify ``copy_n()`` (remove special cases), see `#d2d6fd53 `__ * add `word shift example C program <../devel/shift_r8.c>`__, and simplify ``shift_r8()`` * improve documentation and testing * add `roadmap `__ **2.8.3** (2023-11-11): * ensure readonly attribute is set correctly when creating new objects, see `#211 `__ * optimize sparse bitarray compression for raw block type * improve hash functions in Bloom filter example **2.8.2** (2023-10-03): * update cibuildwheel to 2.16.1 in order to provide cp312 wheels on PyPI * improve error messages for masked assignment * simplify test collection * added ``pytest.ini`` to allow running pytest with no additional arguments, see `#208 `__ * ``util.sc_encode()``: avoid writing empty blocks at end of compressed stream, i.e. skip encoding when total population count is reached **2.8.1** (2023-08-06): * use reconstructor function for pickling, see `#207 `__ * simplify implementation of ``.reverse()`` **2.8.0** (2023-07-22): * allow `integer sequence indexing `__ by list of indices, see `#204 `__ (addresses `#156 `__ and `#190 `__) * add `masked indexing `__ by bitarray masks, see `#205 `__ (addresses `#190 `__) * improve ``.bytereverse()`` docstring, see issue `#202 `__ **2.7.6** (2023-06-24): * remove caching hash value, fixes issue `#201 `__ **2.7.5** (2023-06-10): * fix for pypy3.9-v7.3.11, `#198 `__ (fixes `#188 `__) * enable building wheels for PyPy **2.7.4** (2023-05-29): * register ``bitarray`` as ``abc.MutableSequence``, see `#196 `__ * cibuildwheel: update macOS version to 11 from unsupported 10.15, see https://github.com/actions/runner-images/issues/5583 * improve documentation regarding type when indexing single bitarray items, `#192 `__ **2.7.3** (2023-02-20): * fix popcount64 name conflict on NetBSD, `#189 `__ * even though PyPy is not actively supported, allow running tests for PyPy 3.7 and 3.8, see also `#188 `__ * allow running ``python setup.py test`` * add official Python 3.12 support * simplify count functionality in util module * retire ``bitcount_lookup[256]`` table * improve ``util.count_n()`` error messages * avoid ``util`` module tests from being run more than once in each call to ``bitarray.test()`` when called multiple times in the same Python process, see `#6e52e49a `__ * improve testing **2.7.2** (2023-02-12): * speedup all count functionality by using ``__builtin_popcountll`` when available, see `#187 `__ * add ``popcount64()`` to ``bitarray.h`` - we assume now that ``uint64_t`` is always available * improve testing **2.7.1** (2023-02-10): * optimize ``util.sc_encode()`` **2.7.0** (2023-02-05): * add ``util.sc_encode()`` and ``util.sc_decode()`` for `compression of sparse bitarrays `__ * add ``util.any_and()`` * add ``util.intervals()`` * move functionality of the following utility functions entirely to C: ``hex2ba()``, ``base2ba()``, ``deserialize()``, ``vl_decode()``, ``zeros()`` * improve error checking for unpickling * add `distance metrics <../examples/distance.py>`__ example: dice, hamming, jaccard, kulczynski1, rogerstanimoto, russellrao, sokalmichener, sokalsneath, yule * add example `sparse bitarray <../examples/sparse>`__ implementations **2.6.2** (2023-01-01): * optimize ``richcompare()`` for opposite endianness * improve some docstrings add tests **2.6.1** (2022-12-18): * add documentation on shift operators, `#181 `__ * fix typo in iterable initializer description, `#179 `__ * optimize ``richcompare()`` **2.6.0** (2022-07-19): * add data descriptors: ``.nbytes``, ``.padbits``, ``.readonly`` * allow optional ``endian`` argument to be ``None`` when creating bitarrays * fix type annotation for ``canonical_decode()``, `#178 `__ * frozenbitarray's pad bits are now guaranteed to be zero * add tests **2.5.1** (2022-05-10): * optimize ``.reverse()``, see `#177 `__ * allow negative (byte) indices in ``.bytereverse()`` **2.5.0** (2022-05-04): * add calculating of canonical Huffman codes ``util.canonical_huffman()`` and decoding thereof ``util.canonical_decode()``, see `#173 `__ * allow creating "Huffman codes" from frequency maps with a single symbol in ``util.huffman_code()`` and ``util.canonical_huffman()``, see `#172 `__ * allow bytes-like argument in ``.frombytes()`` and ``.pack()`` - previously, the arguments were limited to the ``bytes`` object, see `#174 `__ * allow bytes-like arguments in ``util.deserialize()`` * add official `pyodide `__ support * add `DEFLATE decompression <../examples/puff/>`__ example * optimize ``.bytereverse()`` * optimize ``delslice()`` for cases like ``del a[1:17:2]`` when ``a`` is large * fix ``examples/huffman/compress.py`` to handle files with 0 or 1 characters, see also `#172 `__ * add ``skipIf`` decorator for skipping tests * add tests **2.4.1** (2022-03-29): * improve ``resize()``, see `#167 `__ * optimize ``copy_n()`` by avoiding loops, `#171 `__ * ``.bytereverse()`` no longer sets unused pad bits to zero **2.4.0** (2022-03-01): * enable building wheels for multiple platforms and Python versions using pypa/cibuildwheel, see `#165 `__ and `#170 `__ (thanks Brian Thorne, @hardbyte) * use setuptools instead of distutils in ``setup.py``, `#168 `__ * add missing type hinting for ``.count()`` step argument **2.3.7** (2022-02-21): * add optional step argument to ``.count()`` method, see `#162 `__ and `#163 `__ * add tests **2.3.6** (2022-02-07): * add optional value parameter to ``util.count_n()``, see `#154 `__ and `#161 `__ * determine machine endianness at build time when possible, by using the ``PY_LITTLE_ENDIAN`` macro, in order to optimize ``shift_r8()`` * add official Python 3.11 support **2.3.5** (2022-01-07): * Fixed bug for big-endian systems (such as IBM s390), see `#159 `__ and `#160 `__ * Pass ``zip_safe=False`` to ``setup()``, see `#151 `__ **2.3.4** (2021-09-12): * Fix ``util.ba2int()`` for frozenbitarrays. A bug was introduced in 2.3.0 as ``.tobytes()`` no longer treats pad bits for read-only buffers as zero. * add tests **2.3.3** (2021-09-05): * improve some error messages * add tests **2.3.2** (2021-08-23): * fix slice assignment for shared buffer with offset case, see `#3ba05687 `__ and `#73081e98 `__ * add tests (including internal debug mode tests for ``buffers_overlap()``) **2.3.1** (2021-08-20): * fix special shared buffer copy case, see `#815c2a11 `__ * add and improve tests **2.3.0** (2021-08-15): * add optional ``buffer`` argument to ``bitarray()`` to import the buffer of another object, `#141 `__, `#146 `__, see also: `buffer protocol `__ * update ``.buffer_info()`` to include: a read-only flag, an imported buffer flag, and the number of buffer exports * add optional start and stop arguments to ``util.rindex()`` * add `memory-mapped file <../examples/mmapped-file.py>`__ example * ignore underscore (``_``) in string input, e.g. ``bitarray('1100_0111')`` * add missing type hinting for new ``.bytereverse()`` arguments * fix ``.extend()`` type annotations, `#145 `__ * avoid ``.reverse()`` using temporary memory * make ``.unpack()``, ``util.serialize()``, ``util.vl_encode()`` and ``.__reduce__()`` more memory efficient * add and improve tests **2.2.5** (2021-08-07): * speedup ``find_bit()`` and ``find_last()`` using uint64 checking, this means a speedup for ``.find()``, ``.index()``, ``.search()`` and ``util.rindex()`` * add optional start and stop arguments to ``.bytereverse()`` * add example to illustrate how `unaligned copying <../devel/copy_n.py>`__ works internally * add documentation * add tests **2.2.4** (2021-07-29): * use shift operations to speedup all unaligned copy operations, `#142 `__ * expose functionality to Python level only in debug mode for testing * add and improve tests **2.2.3** (2021-07-22): * speedup ``repeat()``, `#136 `__ * speedup shift operations, `#139 `__ * optimize slice assignment with negative step, e.g.: ``a[::-1] = 1`` * add tests **2.2.2** (2021-07-16): * speedup slice assignment, see `#132 `__ and `#135 `__ * speedup bitwise operations, `#133 `__ * optimize ``getbit()`` and ``setbit()`` in ``bitarray.h`` * fix TypeError messages when bitarray or int (0, 1) are expected (bool is a subclass of int) * add and improve tests **2.2.1** (2021-07-06): * improve documentation * speedup ``vl_encode()`` * ``bitarray.h``: make ``getbit()`` always an (inline) function * add assertions in C code **2.2.0** (2021-07-03): * add ``bitarray.util.vl_encode()`` and ``bitarray.util.vl_decode()`` which uses a `variable length bitarray format `__, `#131 `__ **2.1.3** (2021-06-15): * Fix building with MSVC / Bullseye, `#129 `__ **2.1.2** (2021-06-13): * support type hinting for all Python 3 versions (that bitarray supports, 3.5 and higher currently), fixed `#128 `__ * add explicit endianness to two tests, fixes `#127 `__ **2.1.1** (2021-06-11): * add type hinting (see PEP 484, 561) using stub (``.pyi``) files * add tests **2.1.0** (2021-05-05): * add ``.find()`` method, see `#122 `__ * ``.find()``, ``.index()``, ``.search()`` and ``.itersearch()`` now all except both (sub-) bitarray as well as bool items to be searched for * improve encode/decode error messages * add `lexicographical permutations example <../examples/lexico.py>`__ * add tests **2.0.1** (2021-04-19): * update documentation * improve some error messages **2.0.0** (2021-04-14): * require more specific objects, int (0 or 1) or bool, see `#119 `__ * items are always returned as int 0 or 1, `#119 `__ * remove ``.length()`` method (deprecated since 1.5.1 - use ``len()``) * in ``.unpack()`` the ``one`` argument now defaults to 0x01 (was 0xff) * ``.tolist()`` now always returns a list of integers (0 or 1) * fix frozenbitarray hash function, see `#121 `__ * fix frozenbitarray being mutable by ``<<=`` and ``>>=`` * support sequence protocol in ``.extend()`` (and bitarray creation) * improve OverflowError messages from ``util.int2ba()`` * add `hexadecimal example <../examples/hexadecimal.py>`__ **1.9.2** (2021-04-10): * update pythoncapi_compat: Fix support with PyPy 3.7, `#120 `__ * update readme **1.9.1** (2021-04-05): * switch documentation from markdown to reStructuredText * add tests **1.9.0** (2021-04-03): * add shift operations (``<<``, ``>>``, ``<<=``, ``>>=``), see `#117 `__ * add ``bitarray.util.ba2base()`` and ``bitarray.util.base2ba()``, see last paragraph in `Bitarray representations `__ * documentation and tests **1.8.2** (2021-03-31): * fix crash caused by unsupported types in binary operations, `#116 `__ * speedup initializing or extending a bitarray from another with different bit-endianness * add formatting options to ``bitarray.util.pprint()`` * add documentation on `bitarray representations `__ * add and improve tests (all 291 tests run in less than half a second on a modern machine) **1.8.1** (2021-03-25): * moved implementation of and ``hex2ba()`` and ``ba2hex()`` to C-level * add ``bitarray.util.parity()`` **1.8.0** (2021-03-21): * add ``bitarray.util.serialize()`` and ``bitarray.util.deserialize()`` * allow whitespace (ignore space and ``\n\r\t\v``) in input strings, e.g. ``bitarray('01 11')`` or ``a += '10 00'`` * add ``bitarray.util.pprint()`` * When initializing a bitarray from another with different bit-endianness, e.g. ``a = bitarray('110', 'little')`` and ``b = bitarray(a, 'big')``, the buffer used to be simply copied, with consequence that ``a == b`` would result in ``False``. This is fixed now, that is ``a == b`` will always evaluate to ``True``. * add test for loading existing pickle file (created using bitarray 1.5.0) * add example showing how to `jsonize bitarrays <../examples/extend_json.py>`__ * add tests **1.7.1** (2021-03-12): * fix issue `#114 `__, raise TypeError when incorrect index is used during assignment, e.g. ``a[1.5] = 1`` * raise TypeError (not IndexError) when assigning slice to incorrect type, e.g. ``a[1:4] = 1.2`` * improve some docstrings and tests **1.7.0** (2021-02-27): * add ``bitarray.util.urandom()`` * raise TypeError when trying to extend bitarrays from bytes on Python 3, i.e. ``bitarray(b'011')`` and ``.extend(b'110')``. (Deprecated since 1.4.1) **1.6.3** (2021-01-20): * add missing .h files to sdist tarball, `#113 `__ **1.6.2** (2021-01-20): * use ``Py_SET_TYPE()`` and ``Py_SET_SIZE()`` for Python 3.10, `#109 `__ * add official Python 3.10 support * fix slice assignment to same object, e.g. ``a[2::] = a`` or ``a[::-1] = a``, `#112 `__ * add bitarray.h, `#110 `__ **1.6.1** (2020-11-05): * use PyType_Ready for all types: bitarray, bitarrayiterator, decodeiterator, decodetree, searchiterator **1.6.0** (2020-10-17): * add ``decodetree`` object, for speeding up consecutive calls to ``.decode()`` and ``.iterdecode()``, in particular when dealing with large prefix codes, see `#103 `__ * add optional parameter to ``.tolist()`` which changes the items in the returned list to integers (0 or 1), as opposed to Booleans * remove deprecated ``bitdiff()``, which has been deprecated since version 1.2.0, use ``bitarray.util.count_xor()`` instead * drop Python 2.6 support * update license file, `#104 `__ **1.5.3** (2020-08-24): * add optional index parameter to ``.index()`` to invert single bit * fix ``sys.getsizeof(bitarray)`` by adding ``.__sizeof__()``, see issue `#100 `__ **1.5.2** (2020-08-16): * add PyType_Ready usage, issue `#66 `__ * speedup search() for bitarrays with length 1 in sparse bitarrays, see issue `#67 `__ * add tests **1.5.1** (2020-08-10): * support signed integers in ``util.ba2int()`` and ``util.int2ba()``, see issue `#85 `__ * deprecate ``.length()`` in favor of ``len()`` **1.5.0** (2020-08-05): * Use ``Py_ssize_t`` for bitarray index. This means that on 32bit systems, the maximum number of elements in a bitarray is 2 GBits. We used to have a special 64bit index type for all architectures, but this prevented us from using Python's sequence, mapping and number methods, and made those method lookups slow. * speedup slice operations when step size = 1 (if alignment allows copying whole bytes) * Require equal endianness for operations: ``&``, ``|``, ``^``, ``&=``, ``|=``, ``^=``. This should have always been the case but was overlooked in the past. * raise TypeError when trying to create bitarray from boolean * This will be last release to still support Python 2.6 (which was retired in 2013). We do NOT plan to stop support for Python 2.7 anytime soon. **1.4.2** (2020-07-15): * add more tests * C-level: - simplify pack/unpack code - fix memory leak in ``~`` operation (bitarray_cpinvert) **1.4.1** (2020-07-14): * add official Python 3.9 support * improve many docstrings * add DeprecationWarning for ``bitdiff()`` * add DeprecationWarning when trying to extend bitarrays from bytes on Python 3 (``bitarray(b'011')`` and ``.extend(b'110')``) * C-level: - Rewrote ``.fromfile()`` and ``.tofile()`` implementation, such that now the same code is used for Python 2 and 3. The new implementation is more memory efficient on Python 3. - use ``memcmp()`` in ``richcompare()`` to shortcut EQ/NE, when comparing two very large bitarrays for equality the speedup can easily be 100x - simplify how unpacking is handled * add more tests **1.4.0** (2020-07-11): * add ``.clear()`` method (Python 3.3 added this method to lists) * avoid over-allocation when bitarray objects are initially created * raise BufferError when resizing bitarrays which is exporting buffers * add example to study the resize() function * improve some error messages * add more tests * raise ``NotImplementedError`` with (useful message) when trying to call the ``.fromstring()`` or ``.tostring()`` methods, which have been removed in the last release **1.3.0** (2020-07-06): * add ``bitarray.util.make_endian()`` * ``util.ba2hex()`` and ``util.hex2ba()`` now also support little-endian * add ``bitarray.get_default_endian()`` * made first argument of initializer a positional-only parameter * remove ``.fromstring()`` and ``.tostring()`` methods, these have been deprecated 8 years ago, since version 0.4.0 * add ``__all__`` in ``bitarray/__init__.py`` * drop Python 3.3 and 3.4 support **1.2.2** (2020-05-18): * ``util.ba2hex()`` now always return a string object (instead of bytes object for Python 3), see issue `#94 `__ * ``util.hex2ba`` allows a unicode object as input on Python 2 * Determine 64-bitness of interpreter in a cross-platform fashion `#91 `__, in order to better support PyPy **1.2.1** (2020-01-06): * simplify markdown of readme so PyPI renders better * make tests for bitarray.util required (instead of warning when they cannot be imported) **1.2.0** (2019-12-06): * add bitarray.util module which provides useful utility functions * deprecate ``bitarray.bitdiff()`` in favor of ``bitarray.util.count_xor()`` * use markdown for documentation * fix bug in ``.count()`` on 32bit systems in special cases when array size is 2^29 bits or larger * simplified tests by using bytes syntax * update smallints and sieve example to use new utility module * simplified mandel example to use numba * use file context managers in tests **1.1.0** (2019-11-07): * add frozenbitarray object * add optional start and stop arguments to ``.count()`` method * add official Python 3.8 support * optimize ``setrange()`` (C-function) by using ``memset()`` * fix issue `#74 `__, bitarray is hashable on Python 2 * fix issue `#68 `__, ``unittest.TestCase.assert_`` deprecated * improved test suite - tests should run in about 1 second * update documentation to use positional-only syntax in docstrings * update readme to pass Python 3 doctest * add utils module to examples **1.0.1** (2019-07-19): * fix readme to pass ``twine check`` **1.0.0** (2019-07-15): * fix bitarrays beings created from unicode in Python 2 * use ``PyBytes_*`` in C code, treating the Py3k function names as default, which also removes all redefinitions of ``PyString_*`` * handle negative arguments of .index() method consistently with how they are treated for lists * add a few more comments to the C code * move imports outside tests: pickle, io, etc. * drop Python 2.5 support **0.9.3** (2019-05-20): * refactor resize() - only shrink allocated memory if new size falls lower than half the allocated size * improve error message when trying to initialize from float or complex **0.9.2** (2019-04-29): * fix to compile on Windows with VS 2015, issue `#72 `__ **0.9.1** (2019-04-28): * fix types to actually be types, `#29 `__ * check for ambiguous prefix codes when building binary tree for decoding * remove Python level methods: encode, decode, iterdecode (in favor of having these implemented on the C-level along with check_codedict) * fix self tests for Python 2.5 and 2.6 * move all Huffman code related example code into examples/huffman * add code to generate graphviz .dot file of Huffman tree to examples **0.9.0** (2019-04-22): * more efficient decode and iterdecode by using C-level binary tree instead of a python one, `#54 `__ * added buffer protocol support for Python 3, `#55 `__ * fixed invalid pointer exceptions in pypy, `#47 `__ * made all examples Py3k compatible * add gene sequence example * add official Python 3.7 support * drop Python 2.4, 3.1 and 3.2 support **0.8.3** (2018-07-06): * add exception to setup.py when README.rst cannot be opened **0.8.2** (2018-05-30): * add official Python 3.6 support (although it was already working) * fix description of ``fill()``, `#52 `__ * handle extending self correctly, `#28 `__ * ``copy_n()``: fast copy with ``memmove()`` fixed, `#43 `__ * minor clarity/wording changes to README, `#23 `__ **0.8.1** (2013-03-30): * fix issue `#10 `__, i.e. ``int(bitarray())`` segfault * added tests for using a bitarray object as an argument to functions like int, long (on Python 2), float, list, tuple, dict **0.8.0** (2012-04-04): * add Python 2.4 support * add (module level) function bitdiff for calculating the difference between two bitarrays **0.7.0** (2012-02-15): * add iterdecode method (C level), which returns an iterator but is otherwise like the decode method * improve memory efficiency and speed of pickling large bitarray objects **0.6.0** (2012-02-06): * add buffer protocol to bitarray objects (Python 2.7 only) * allow slice assignment to 0 or 1, e.g. ``a[::3] = 0`` (in addition to booleans) * moved implementation of itersearch method to C level (Lluis Pamies) * search, itersearch now only except bitarray objects, whereas ``__contains__`` excepts either booleans or bitarrays * use a priority queue for Huffman tree example (thanks to Ushma Bhatt) * improve documentation **0.5.2** (2012-02-02): * fixed MSVC compile error on Python 3 (thanks to Chris Gohlke) * add missing start and stop optional parameters to index() method * add examples/compress.py **0.5.1** (2012-01-31): * update documentation to use tobytes and frombytes, rather than tostring and fromstring (which are now deprecated) * simplified how tests are run **0.5.0** (2012-01-23): * added itersearch method * added Bloom filter example * minor fixes in docstrings, added more tests **0.4.0** (2011-12-29): * porting to Python 3.x (Roland Puntaier) * introduced ``.tobytes()`` and ``.frombytes()`` (``.tostring()`` and ``.fromstring()`` are now deprecated) * updated development status * added sieve prime number example * moved project to github: https://github.com/ilanschnell/bitarray **0.3.5** (2009-04-06): * fixed reference counts bugs * added possibility to slice assign to ``True`` or ``False``, e.g. ``a[::3] = True`` will set every third element to ``True`` **0.3.4** (2009-01-15): * Made C code less ambiguous, such that the package compiles on Visual Studio, with all tests passing. **0.3.3** (2008-12-14): * Made changes to the C code to allow compilation with more compilers. Compiles on Visual Studio, although there are still a few tests failing. **0.3.2** (2008-10-19): * Added sequential search method. * The special method ``__contains__`` now also takes advantage of the sequential search. **0.3.1** (2008-10-12): * Simplified state information for pickling. Argument for count is now optional, defaults to True. Fixed typos. **0.3.0** (2008-09-30): * Fixed a severe bug for 64-bit machines. Implemented all methods in C, improved tests. * Removed deprecated methods ``.from01()`` and ``.fromlist()``. **0.2.5** (2008-09-23): * Added section in README about prefix codes. Implemented _multiply method for faster ``__mul__`` and ``__imul__``. Fixed some typos. **0.2.4** (2008-09-22): * Implemented encode and decode method (in C) for variable-length prefix codes. * Added more examples, wrote README for the examples. * Added more tests, fixed some typos. **0.2.3** (2008-09-16): * Fixed a memory leak, implemented a number of methods in C. These include __getitem__, __setitem__, __delitem__, pop, remove, insert. The methods implemented on the Python level is very limit now. * Implemented bitwise operations. **0.2.2** (2008-09-09): * Rewrote parts of the README * Implemented memory efficient algorithm for the reverse method * Fixed typos, added a few tests, more C refactoring. **0.2.1** (2008-09-07): * Improved tests, in particular added checking for memory leaks. * Refactored many things on the C level. * Implemented a few more methods. **0.2.0** (2008-09-02): * Added bit-endianness property to the bitarray object * Added the examples to the release package. **0.1.0** (2008-08-17): * First official release; put project to http://pypi.python.org/pypi/bitarray/ May 2008: Wrote the initial code, and put it on my personal web-site: http://ilan.schnell-web.net/prog/ bitarray-3.7.1/doc/endianness.rst000066400000000000000000000104601505414144000167640ustar00rootroot00000000000000Bit-endianness ============== Unless explicitly converting to machine representation, i.e. initializing the buffer directly, using ``.tobytes()``, ``.frombytes()``, ``.tofile()`` or ``.fromfile()``, as well as using ``memoryview()``, the bit-endianness will have no effect on any computation, and one can skip this section. Since bitarrays allows addressing individual bits, where the machine represents 8 bits in one byte, there are two obvious choices for this mapping: little-endian and big-endian. When dealing with the machine representation of bitarray objects, it is recommended to always explicitly specify the endianness. By default, bitarrays use big-endian representation: .. code-block:: python >>> from bitarray import bitarray >>> a = bitarray(b'A') >>> a.endian 'big' >>> a bitarray('01000001') >>> a[6] = 1 >>> a.tobytes() b'C' Big-endian means that the most-significant bit comes first. Here, ``a[0]`` is the lowest address (index) and most significant bit, and ``a[7]`` is the highest address and least significant bit. When creating a new bitarray object, the endianness can always be specified explicitly: .. code-block:: python >>> a = bitarray(b'A', endian='little') >>> a bitarray('10000010') >>> a.endian 'little' Here, the low-bit comes first because little-endian means that increasing numeric significance corresponds to an increasing address. So ``a[0]`` is the lowest address and least significant bit, and ``a[7]`` is the highest address and most significant bit. The bit-endianness is a property of the bitarray object. The endianness cannot be changed once a bitarray object has been created. When comparing bitarray objects, the endianness (and hence the machine representation) is irrelevant; what matters is the mapping from indices to bits: .. code-block:: python >>> bitarray('11001', endian='big') == bitarray('11001', endian='little') True >>> a = bitarray(b'\x01', endian='little') >>> b = bitarray(b'\x80', endian='big') >>> a == b True >>> a.tobytes() == b.tobytes() False Bitwise operations (``|``, ``^``, ``&=``, ``|=``, ``^=``, ``~``) are implemented efficiently using the corresponding byte operations in C, i.e. the operators act on the machine representation of the bitarray objects. Therefore, it is not possible to perform bitwise operators on bitarrays with different endianness. As mentioned above, the endianness can not be changed once an object is created. However, you can create a new bitarray with different endianness: .. code-block:: python >>> a = bitarray('111000', endian='little') >>> b = bitarray(a, endian='big') >>> b bitarray('111000') >>> a == b True Utility functions ----------------- A number of utility functions take into the bit-endianness into account. For example consider: .. code-block:: python >>> from bitarray.util import ba2int, int2ba >>> int2ba(12) bitarray('1100') This is what one would normally expect, as Python's built-in ``bin()`` gives the same result: .. code-block:: python >>> bin(12) '0b1100' However, this is only true because big-endian is the default bit-endianness. When explicitly requesting a little-endian bitarray, we get: .. code-block:: python >>> int2ba(12, endian="little") bitarray('0011') Similarly, the function ``ba2int()`` takes into account the bit-endianness of the bitarray it is provided with: .. code-block:: python >>> a = bitarray("11001", "little") >>> ba2int(a) 19 >>> ba2int(bitarray(a, "big")) 25 The same behavior is valid for ``hex2ba()``, ``ba2hex()``, ``base2ba()`` and ``ba2base()``. Regardless of bit-endianness, these are always inverse functions of each other: .. code-block:: python >>> from bitarray.util import ba2hex, hex2ba, ba2base, base2ba >>> for endian in "little", "big": ... a = bitarray("1010 0011 1110", endian) ... assert int2ba(ba2int(a), len(a), a.endian) == a ... assert hex2ba(ba2hex(a), a.endian) == a ... assert base2ba(64, ba2base(64, a), a.endian) == a or: .. code-block:: python >>> for endian in "little", "big": ... assert ba2int(int2ba(29, endian=endian)) == 29 ... assert ba2hex(hex2ba("e7a", endian)) == "e7a" ... assert ba2base(64, base2ba(64, "h+E7", endian)) == "h+E7" bitarray-3.7.1/doc/indexing.rst000066400000000000000000000036041505414144000164440ustar00rootroot00000000000000Bitarray indexing ================= Bitarrays can be indexed like usual Python lists. They support slice indexing and assignment: .. code-block:: python >>> from bitarray import bitarray >>> a = bitarray('01000001 01000010 01000011') >>> a[1::3] bitarray('10100001') >>> a[8:20:2] = bitarray('110111') >>> a bitarray('010000011110001011100011') >>> del a[::2] # remove every second element >>> a bitarray('100110001001') >>> a[::3] = 0 # set every third element to 0 >>> a bitarray('000010001001') Integer sequence indexing ------------------------- As of bitarray version 2.8, indices may also be lists of arbitrary indices (like in NumPy). Negative values are permitted in the index list and work as they do with single indices or slices. For example: .. code-block:: python >>> a = bitarray(12) >>> a.setall(0) >>> a[[1, 2, 5, 7]] = 1 # set elements 1, 2, 5, 7 to value 1 >>> a bitarray('011001010000') >>> a[[-1, -2, 1, 0]] bitarray('0010') >>> del a[[0, 1, 5, 8, 9]] >>> a bitarray('1000100') >>> a[[1, 2, 4]] = bitarray('010') # assign indices to elements >>> a bitarray('1010000') Masked indexing --------------- Also, as of bitarray version 2.8, indices may be bitarrays which are considered masks. For example: .. code-block:: python >>> a = bitarray('1001001') >>> mask = bitarray('1010111') >>> a[mask] # create bitarray with items from `a` whose mask is 1 bitarray('10001') >>> del a[mask] # delete items in `a` whose mask is 1 >>> a bitarray('01') Note that ``del a[mask]`` is equivalent to the in-place version of selecting the reverse mask ``a = a[~mask]``. Also note that masked assignment is not implemented, as ``a[mask] = 1`` would be equivalent to the bitwise operation ``a |= mask``. And ``a[mask] = 0`` would be equivalent to ``a &= ~mask``. bitarray-3.7.1/doc/random_p.rst000066400000000000000000000112731505414144000164370ustar00rootroot00000000000000Random Bitarrays ================ Bitarray 3.5 introduced the utility function ``util.random_p(n, p=0.5)``. It returns a pseudo-random bitarray (of length ``n``) for which each bit has probability ``p`` of being one. This is mathematically equivalent to: .. code-block:: python bitarray(random() < p for _ in range(n)) While this expression work well for small ``n``, it is quite slow when ``n`` is large. In the following we focus on the case of ``n`` being large. When ``p`` is small, a fast implementation of ``random_p()`` is to (a) calculate the population of the bitarray, and then (b) set the required number of bits, using ``random.randrange()`` for each bit. Python 3.12 introduced ``random.binomialvariate()`` which is exactly what we need to determine the bitarray's population. When ``p == 0.5``, we use ``random.randbytes()`` to initialize our bitarray buffer. It should be noted that ``util.urandom()`` uses ``os.urandom()``, but since ``util.random_p()`` is designed to give reproducible pseudo-random bitarrays, it uses ``randbytes()``. Taking two (independent) such bitarrays and combining them using the bitwise AND operation, gives us a random bitarray with probability 1/4. Likewise, taking two bitwise OR operation gives us probability 3/4. Without going into too much further detail, it is possible to combine more than two "randbytes" bitarray to get probabilities ``i / 2**M``, where ``M`` is the maximal number of "randbytes" bitarrays we combine, and ``i`` is an integer. The required sequence of AND and OR operations is calculated from the desired probability ``p`` and ``M``. Once we have calculated our sequence, and obtained a bitarray with probability ``q = i / 2**M``, we perform a final OR or AND operation with a random bitarray of probability ``x``. In order to arrive at exactly the requested probability ``p``, it can be verified that: .. code-block:: python x = (p - q) / (1.0 - q) # OR x = p / q # AND It should be noted that ``x`` is always small (once symmetry is applied in case of AND) such that it always uses the "small p" case. Unlike the combinations, this gives us a bitarray with exact probability ``x``. Therefore, the requested probability ``p`` is exactly obtained. For more details, see ``VerificationTests`` in the additional `random tests <../devel/test_random.py>`__. Speedup ------- The speedup is largest, when the number of number of random numbers our algorithm uses is smallest. In the following, let ``k`` be the number of calls to ``randbytes()``. For example, when ``p=0.5`` we have ``k=1``. When ``p`` is below our limit for using the procedure of setting individual bits, we call this limit ``small_p``, we have ``k=0``. In our implementation, we are using ``M=8`` and ``small_p=0.01``. These parameters have carefully been selected to optimize the average (with respect to ``p``) execution time. The following table shows execution times (in milliseconds) of ``random_p()`` for different values of ``p`` for ``n=100_000_000``: .. code-block:: p t/ms k notes ----------------------------------------------------------------------- edge cases: 0.0 0.4 0 0.5 21.7 1 1.0 0.4 0 pure combinations: 1/4 44.6 2 1/8 65.2 3 1/16 88.7 4 1/32 108.6 5 1/64 132.4 6 3/128 151.9 7 p = 1/128 < small_p, so we take different p 127/256 174.9 8 priciest pure combinations case(s) small p: 0.0001 2.2 0 0.001 18.7 0 0.003891051 72.9 0 p = 1/257 - largest x in mixed case 0.009999999 192.3 0 priciest small p case mixed: x (final operation) 0.01 194.3 7 0.002204724 OR smallest p for mixed case 0.1 223.4 8 0.002597403 OR 0.2 194.7 8 0.000975610 OR 0.3 213.7 8 0.997402597 AND 0.4 203.3 7 0.002597403 OR 0.252918288 118.7 2 0.003891051 OR p=65/257 0.494163425 249.5 8 0.996108951 AND priciest mixed case(s) 0.499999999 22.4 1 0.999999998 AND cheapest mixed case literal: any 3740.2 - bitarray(random() < p for _ in range(n)) Using the literal definition one always uses ``n`` calls to ``random()``, regardless of ``p``. For 1000 random values of ``p`` (between 0 and 1), we get an average speedup of about 19. In summary: Even in the worst cases ``random_p()`` performs about 15 times better than the literal definition for large ``n``, while on average we get a speedup of almost 20. For very small ``p``, and for special values of ``p`` the speedup is significantly higher. bitarray-3.7.1/doc/reference.rst000066400000000000000000000530301505414144000165730ustar00rootroot00000000000000Reference ========= bitarray version: 3.7.1 -- `change log `__ In the following, ``item`` and ``value`` are usually a single bit - an integer 0 or 1. Also, ``sub_bitarray`` refers to either a bitarray, or an ``item``. The bitarray object: -------------------- ``bitarray(initializer=0, /, endian='big', buffer=None)`` -> bitarray Return a new bitarray object whose items are bits initialized from the optional initializer, and bit-endianness. The initializer may be one of the following types: a.) ``int`` bitarray, initialized to zeros, of given length b.) ``bytes`` or ``bytearray`` to initialize buffer directly c.) ``str`` of 0s and 1s, ignoring whitespace and "_" d.) iterable of integers 0 or 1. Optional keyword arguments: ``endian``: Specifies the bit-endianness of the created bitarray object. Allowed values are ``big`` and ``little`` (the default is ``big``). The bit-endianness effects the buffer representation of the bitarray. ``buffer``: Any object which exposes a buffer. When provided, ``initializer`` cannot be present (or has to be ``None``). The imported buffer may be read-only or writable, depending on the object type. New in version 2.3: optional ``buffer`` argument New in version 3.4: allow initializer ``bytes`` or ``bytearray`` to set buffer directly bitarray methods: ----------------- ``all()`` -> bool Return ``True`` when all bits in bitarray are 1. ``a.all()`` is a faster version of ``all(a)``. ``any()`` -> bool Return ``True`` when any bit in bitarray is 1. ``a.any()`` is a faster version of ``any(a)``. ``append(item, /)`` Append ``item`` to the end of the bitarray. ``buffer_info()`` -> BufferInfo Return named tuple with following fields: 0. ``address``: memory address of buffer 1. ``nbytes``: buffer size (in bytes) 2. ``endian``: bit-endianness as a string 3. ``padbits``: number of pad bits 4. ``alloc``: allocated memory for buffer (in bytes) 5. ``readonly``: memory is read-only (bool) 6. ``imported``: buffer is imported (bool) 7. ``exports``: number of buffer exports New in version 3.7: return named tuple ``bytereverse(start=0, stop=, /)`` For each byte in byte-range(``start``, ``stop``) reverse bits in-place. The start and stop indices are given in terms of bytes (not bits). Also note that this method only changes the buffer; it does not change the bit-endianness of the bitarray object. Pad bits are left unchanged such that two consecutive calls will always leave the bitarray unchanged. New in version 2.2.5: optional start and stop arguments ``clear()`` Remove all items from bitarray. New in version 1.4 ``copy()`` -> bitarray Return copy of bitarray (with same bit-endianness). ``count(value=1, start=0, stop=, step=1, /)`` -> int Number of occurrences of ``value`` bitarray within ``[start:stop:step]``. Optional arguments ``start``, ``stop`` and ``step`` are interpreted in slice notation, meaning ``a.count(value, start, stop, step)`` equals ``a[start:stop:step].count(value)``. The ``value`` may also be a sub-bitarray. In this case non-overlapping occurrences are counted within ``[start:stop]`` (``step`` must be 1). New in version 1.1.0: optional start and stop arguments New in version 2.3.7: optional step argument New in version 2.9: add non-overlapping sub-bitarray count ``decode(code, /)`` -> iterator Given a prefix code (a dict mapping symbols to bitarrays, or ``decodetree`` object), decode content of bitarray and return an iterator over corresponding symbols. See also: `Bitarray 3 transition `__ New in version 3.0: returns iterator (equivalent to past ``.iterdecode()``) ``encode(code, iterable, /)`` Given a prefix code (a dict mapping symbols to bitarrays), iterate over the iterable object with symbols, and extend bitarray with corresponding bitarray for each symbol. ``extend(iterable, /)`` Append items from to the end of the bitarray. If ``iterable`` is a (Unicode) string, each ``0`` and ``1`` are appended as bits (ignoring whitespace and underscore). New in version 3.4: allow ``bytes`` object ``fill()`` -> int Add zeros to the end of the bitarray, such that the length will be a multiple of 8, and return the number of bits added [0..7]. ``find(sub_bitarray, start=0, stop=, /, right=False)`` -> int Return lowest (or rightmost when ``right=True``) index where sub_bitarray is found, such that sub_bitarray is contained within ``[start:stop]``. Return -1 when sub_bitarray is not found. New in version 2.1 New in version 2.9: add optional keyword argument ``right`` ``frombytes(bytes, /)`` Extend bitarray with raw bytes from a bytes-like object. Each added byte will add eight bits to the bitarray. New in version 2.5.0: allow bytes-like argument ``fromfile(f, n=-1, /)`` Extend bitarray with up to ``n`` bytes read from file object ``f`` (or any other binary stream what supports a ``.read()`` method, e.g. ``io.BytesIO``). Each read byte will add eight bits to the bitarray. When ``n`` is omitted or negative, reads and extends all data until EOF. When ``n`` is non-negative but exceeds the available data, ``EOFError`` is raised. However, the available data is still read and extended. ``index(sub_bitarray, start=0, stop=, /, right=False)`` -> int Return lowest (or rightmost when ``right=True``) index where sub_bitarray is found, such that sub_bitarray is contained within ``[start:stop]``. Raises ``ValueError`` when sub_bitarray is not present. New in version 2.9: add optional keyword argument ``right`` ``insert(index, value, /)`` Insert ``value`` into bitarray before ``index``. ``invert(index=, /)`` Invert all bits in bitarray (in-place). When the optional ``index`` is given, only invert the single bit at ``index``. New in version 1.5.3: optional index argument ``pack(bytes, /)`` Extend bitarray from a bytes-like object, where each byte corresponds to a single bit. The byte ``b'\x00'`` maps to bit 0 and all other bytes map to bit 1. This method, as well as the ``.unpack()`` method, are meant for efficient transfer of data between bitarray objects to other Python objects (for example NumPy's ndarray object) which have a different memory view. New in version 2.5.0: allow bytes-like argument ``pop(index=-1, /)`` -> item Remove and return item at ``index`` (default last). Raises ``IndexError`` if index is out of range. ``remove(value, /)`` Remove the first occurrence of ``value``. Raises ``ValueError`` if value is not present. ``reverse()`` Reverse all bits in bitarray (in-place). ``search(sub_bitarray, start=0, stop=, /, right=False)`` -> iterator Return iterator over indices where sub_bitarray is found, such that sub_bitarray is contained within ``[start:stop]``. The indices are iterated in ascending order (from lowest to highest), unless ``right=True``, which will iterate in descending order (starting with rightmost match). See also: `Bitarray 3 transition `__ New in version 2.9: optional start and stop arguments - add optional keyword argument ``right`` New in version 3.0: returns iterator (equivalent to past ``.itersearch()``) ``setall(value, /)`` Set all elements in bitarray to ``value``. Note that ``a.setall(value)`` is equivalent to ``a[:] = value``. ``sort(reverse=False)`` Sort all bits in bitarray (in-place). ``to01(group=0, sep=' ')`` -> str Return bitarray as (Unicode) string of ``0``s and ``1``s. The bits are grouped into ``group`` bits (default is no grouping). When grouped, the string ``sep`` is inserted between groups of ``group`` characters, default is a space. New in version 3.3: optional ``group`` and ``sep`` arguments ``tobytes()`` -> bytes Return the bitarray buffer (pad bits are set to zero). ``tofile(f, /)`` Write bitarray buffer to file object ``f``. ``tolist()`` -> list Return bitarray as list of integers. ``a.tolist()`` equals ``list(a)``. Note that the list object being created will require 32 or 64 times more memory (depending on the machine architecture) than the bitarray object, which may cause a memory error if the bitarray is very large. ``unpack(zero=b'\x00', one=b'\x01')`` -> bytes Return bytes that contain one byte for each bit in the bitarray, using specified mapping. bitarray data descriptors: -------------------------- Data descriptors were added in version 2.6. ``endian`` -> str bit-endianness as Unicode string New in version 3.4: replaces former ``.endian()`` method ``nbytes`` -> int buffer size in bytes ``padbits`` -> int number of pad bits ``readonly`` -> bool bool indicating whether buffer is read-only Other objects: -------------- ``frozenbitarray(initializer=0, /, endian='big', buffer=None)`` -> frozenbitarray Return a ``frozenbitarray`` object. Initialized the same way a ``bitarray`` object is initialized. A ``frozenbitarray`` is immutable and hashable, and may therefore be used as a dictionary key. New in version 1.1 ``decodetree(code, /)`` -> decodetree Given a prefix code (a dict mapping symbols to bitarrays), create a binary tree object to be passed to ``.decode()``. New in version 1.6 Functions defined in the `bitarray` module: ------------------------------------------- ``bits2bytes(n, /)`` -> int Return the number of bytes necessary to store n bits. ``get_default_endian()`` -> str Return the default bit-endianness for new bitarray objects being created. Unless ``_set_default_endian('little')`` was called, the default bit-endianness is ``big``. New in version 1.3 ``test(verbosity=1)`` -> TextTestResult Run self-test, and return ``unittest.runner.TextTestResult`` object. Functions defined in `bitarray.util` module: -------------------------------------------- This sub-module was added in version 1.2. ``any_and(a, b, /)`` -> bool Efficient implementation of ``any(a & b)``. New in version 2.7 ``ba2base(n, bitarray, /, group=0, sep=' ')`` -> str Return a string containing the base ``n`` ASCII representation of the bitarray. Allowed values for ``n`` are 2, 4, 8, 16, 32 and 64. The bitarray has to be multiple of length 1, 2, 3, 4, 5 or 6 respectively. For ``n=32`` the RFC 4648 Base32 alphabet is used, and for ``n=64`` the standard base 64 alphabet is used. When grouped, the string ``sep`` is inserted between groups of ``group`` characters, default is a space. See also: `Bitarray representations `__ New in version 1.9 New in version 3.3: optional ``group`` and ``sep`` arguments ``ba2hex(bitarray, /, group=0, sep=' ')`` -> hexstr Return a string containing the hexadecimal representation of the bitarray (which has to be multiple of 4 in length). When grouped, the string ``sep`` is inserted between groups of ``group`` characters, default is a space. New in version 3.3: optional ``group`` and ``sep`` arguments ``ba2int(bitarray, /, signed=False)`` -> int Convert the given bitarray to an integer. The bit-endianness of the bitarray is respected. ``signed`` indicates whether two's complement is used to represent the integer. ``base2ba(n, asciistr, /, endian=None)`` -> bitarray Bitarray of base ``n`` ASCII representation. Allowed values for ``n`` are 2, 4, 8, 16, 32 and 64. For ``n=32`` the RFC 4648 Base32 alphabet is used, and for ``n=64`` the standard base 64 alphabet is used. Whitespace is ignored. See also: `Bitarray representations `__ New in version 1.9 New in version 3.3: ignore whitespace ``byteswap(a, n=, /)`` Reverse every ``n`` consecutive bytes of ``a`` in-place. By default, all bytes are reversed. Note that ``n`` is not limited to 2, 4 or 8, but can be any positive integer. Also, ``a`` may be any object that exposes a writable buffer. Nothing about this function is specific to bitarray objects. New in version 3.4 ``canonical_decode(bitarray, count, symbol, /)`` -> iterator Decode bitarray using canonical Huffman decoding tables where ``count`` is a sequence containing the number of symbols of each length and ``symbol`` is a sequence of symbols in canonical order. See also: `Canonical Huffman Coding `__ New in version 2.5 ``canonical_huffman(dict, /)`` -> tuple Given a frequency map, a dictionary mapping symbols to their frequency, calculate the canonical Huffman code. Returns a tuple containing: 0. the canonical Huffman code as a dict mapping symbols to bitarrays 1. a list containing the number of symbols of each code length 2. a list of symbols in canonical order Note: the two lists may be used as input for ``canonical_decode()``. See also: `Canonical Huffman Coding `__ New in version 2.5 ``correspond_all(a, b, /)`` -> tuple Return tuple with counts of: ~a & ~b, ~a & b, a & ~b, a & b New in version 3.4 ``count_and(a, b, /)`` -> int Return ``(a & b).count()`` in a memory efficient manner, as no intermediate bitarray object gets created. ``count_n(a, n, value=1, /)`` -> int Return lowest index ``i`` for which ``a[:i].count(value) == n``. Raises ``ValueError`` when ``n`` exceeds total count (``a.count(value)``). New in version 2.3.6: optional value argument ``count_or(a, b, /)`` -> int Return ``(a | b).count()`` in a memory efficient manner, as no intermediate bitarray object gets created. ``count_xor(a, b, /)`` -> int Return ``(a ^ b).count()`` in a memory efficient manner, as no intermediate bitarray object gets created. This is also known as the Hamming distance. ``deserialize(bytes, /)`` -> bitarray Return a bitarray given a bytes-like representation such as returned by ``serialize()``. See also: `Bitarray representations `__ New in version 1.8 New in version 2.5.0: allow bytes-like argument ``gen_primes(n, /, endian=None, odd=False)`` -> bitarray Generate a bitarray of length ``n`` in which active indices are prime numbers. By default (``odd=False``), active indices correspond to prime numbers directly. When ``odd=True``, only odd prime numbers are represented in the resulting bitarray ``a``, and ``a[i]`` corresponds to ``2*i+1`` being prime or not. Apart from working with prime numbers, this function is useful for testing, as it provides a simple way to create a well-defined bitarray of any length. New in version 3.7 ``hex2ba(hexstr, /, endian=None)`` -> bitarray Bitarray of hexadecimal representation. hexstr may contain any number (including odd numbers) of hex digits (upper or lower case). Whitespace is ignored. New in version 3.3: ignore whitespace ``huffman_code(dict, /, endian=None)`` -> dict Given a frequency map, a dictionary mapping symbols to their frequency, calculate the Huffman code, i.e. a dict mapping those symbols to bitarrays (with given bit-endianness). Note that the symbols are not limited to being strings. Symbols may be any hashable object. ``int2ba(int, /, length=None, endian=None, signed=False)`` -> bitarray Convert the given integer to a bitarray (with given bit-endianness, and no leading (big-endian) / trailing (little-endian) zeros), unless the ``length`` of the bitarray is provided. An ``OverflowError`` is raised if the integer is not representable with the given number of bits. ``signed`` determines whether two's complement is used to represent the integer, and requires ``length`` to be provided. ``intervals(bitarray, /)`` -> iterator Compute all uninterrupted intervals of 1s and 0s, and return an iterator over tuples ``(value, start, stop)``. The intervals are guaranteed to be in order, and their size is always non-zero (``stop - start > 0``). New in version 2.7 ``ones(n, /, endian=None)`` -> bitarray Create a bitarray of length ``n``, with all values ``1``, and optional bit-endianness (``little`` or ``big``). New in version 2.9 ``parity(a, /)`` -> int Return parity of bitarray ``a``. ``parity(a)`` is equivalent to ``a.count() % 2`` but more efficient. New in version 1.9 ``pprint(bitarray, /, stream=None, group=8, indent=4, width=80)`` Pretty-print bitarray object to ``stream``, defaults is ``sys.stdout``. By default, bits are grouped in bytes (8 bits), and 64 bits per line. Non-bitarray objects are printed using ``pprint.pprint()``. New in version 1.8 ``random_k(n, /, k, endian=None)`` -> bitarray Return (pseudo-) random bitarray of length ``n`` with ``k`` elements set to one. Mathematically equivalent to setting (in a bitarray of length ``n``) all bits at indices ``random.sample(range(n), k)`` to one. The random bitarrays are reproducible when giving Python's ``random.seed()`` with a specific seed value. This function requires Python 3.9 or higher, as it depends on the standard library function ``random.randbytes()``. Raises ``NotImplementedError`` when Python version is too low. New in version 3.6 ``random_p(n, /, p=0.5, endian=None)`` -> bitarray Return (pseudo-) random bitarray of length ``n``, where each bit has probability ``p`` of being one (independent of any other bits). Mathematically equivalent to ``bitarray((random() < p for _ in range(n)), endian)``, but much faster for large ``n``. The random bitarrays are reproducible when giving Python's ``random.seed()`` with a specific seed value. This function requires Python 3.12 or higher, as it depends on the standard library function ``random.binomialvariate()``. Raises ``NotImplementedError`` when Python version is too low. See also: `Random Bitarrays `__ New in version 3.5 ``sc_decode(stream, /)`` -> bitarray Decompress binary stream (an integer iterator, or bytes-like object) of a sparse compressed (``sc``) bitarray, and return the decoded bitarray. This function consumes only one bitarray and leaves the remaining stream untouched. Use ``sc_encode()`` for compressing (encoding). See also: `Compression of sparse bitarrays `__ New in version 2.7 ``sc_encode(bitarray, /)`` -> bytes Compress a sparse bitarray and return its binary representation. This representation is useful for efficiently storing sparse bitarrays. Use ``sc_decode()`` for decompressing (decoding). See also: `Compression of sparse bitarrays `__ New in version 2.7 ``serialize(bitarray, /)`` -> bytes Return a serialized representation of the bitarray, which may be passed to ``deserialize()``. It efficiently represents the bitarray object (including its bit-endianness) and is guaranteed not to change in future releases. See also: `Bitarray representations `__ New in version 1.8 ``strip(bitarray, /, mode='right')`` -> bitarray Return a new bitarray with zeros stripped from left, right or both ends. Allowed values for mode are the strings: ``left``, ``right``, ``both`` ``subset(a, b, /)`` -> bool Return ``True`` if bitarray ``a`` is a subset of bitarray ``b``. ``subset(a, b)`` is equivalent to ``a | b == b`` (and equally ``a & b == a``) but more efficient as no intermediate bitarray object is created and the buffer iteration is stopped as soon as one mismatch is found. ``sum_indices(a, /, mode=1)`` -> int Return sum of indices of all active bits in bitarray ``a``. Equivalent to ``sum(i for i, v in enumerate(a) if v)``. ``mode=2`` sums square of indices. New in version 3.6 New in version 3.7: add optional mode argument ``urandom(n, /, endian=None)`` -> bitarray Return random bitarray of length ``n`` (uses ``os.urandom()``). New in version 1.7 ``vl_decode(stream, /, endian=None)`` -> bitarray Decode binary stream (an integer iterator, or bytes-like object), and return the decoded bitarray. This function consumes only one bitarray and leaves the remaining stream untouched. Use ``vl_encode()`` for encoding. See also: `Variable length bitarray format `__ New in version 2.2 ``vl_encode(bitarray, /)`` -> bytes Return variable length binary representation of bitarray. This representation is useful for efficiently storing small bitarray in a binary stream. Use ``vl_decode()`` for decoding. See also: `Variable length bitarray format `__ New in version 2.2 ``xor_indices(a, /)`` -> int Return xor reduced indices of all active bits in bitarray ``a``. This is essentially equivalent to ``reduce(operator.xor, (i for i, v in enumerate(a) if v))``. New in version 3.2 ``zeros(n, /, endian=None)`` -> bitarray Create a bitarray of length ``n``, with all values ``0``, and optional bit-endianness (``little`` or ``big``). bitarray-3.7.1/doc/represent.rst000066400000000000000000000126601505414144000166500ustar00rootroot00000000000000Bitarray representations ======================== The bitarray library offers many ways to represent bitarray objects. Here, we take a closer look at those representations and discuss their advantages and disadvantages. Binary representation --------------------- The most common representation of bitarrays is its native binary string representation, which is great for interactively analyzing bitarray objects: .. code-block:: python >>> from bitarray import bitarray >>> a = bitarray('11001') >>> repr(a) # same as str(a) "bitarray('11001')" >>> a.to01() # the raw string of 0's and 1's '11001' However, this representation is very large compared to the bitarray object itself, and it not efficient for large bitarrays. Byte representation ------------------- As bitarray objects are stored in a byte buffer in memory, it is very efficient (in terms of size and time) to use this representation of large bitarrays. However, this representation is not very human readable. .. code-block:: python >>> a = bitarray('11001110000011010001110001111000010010101111000111100') >>> a.tobytes() # raw buffer b'\xce\r\x1cxJ\xf1\xe0' Here, the number of pad bits within the last byte, as well as the bit-endianness, is not part of the byte buffer itself. Therefore, extra work is required to store this information. The utility function ``serialize()`` adds this information to a header byte: .. code-block:: python >>> from bitarray.util import serialize, deserialize >>> x = serialize(a) >>> x b'\x13\xce\r\x1cxJ\xf1\xe0' >>> b = deserialize(x) >>> assert a == b and a.endian == b.endian The header byte is structured the following way: .. code-block:: python >>> x[0] # 0x13 19 >>> x[0] % 16 # number of pad bits (0..7) within last byte 3 >>> x[0] // 16 # bit-endianness: 0 little, 1 big 1 Hence, valid values for the header byte are in the ranges 0 .. 7 or 16 .. 23 (inclusive). Moreover, if the serialized bitarray is empty (``x`` only consists of a single byte - the header byte), the only valid values for the header are 0 or 16 (corresponding to a little-endian and big-endian empty bitarray). The functions ``serialize()`` and ``deserialize()`` are the recommended and fasted way to (de-) serialize bitarray objects to bytes objects (and vice versa). The exact format of this representation is guaranteed to not change in future releases. Hexadecimal representation -------------------------- As four bits of a bitarray may be represented by a hexadecimal digit, we can represent bitarrays (whose length is a multiple of 4) as a hexadecimal string: .. code-block:: python >>> from bitarray.util import ba2hex, hex2ba >>> a = bitarray('1100 1110 0001 1010 0011 1000 1111') >>> ba2hex(a) 'ce1a38f' >>> hex2ba('ce1a38f') bitarray('1100111000011010001110001111') Note that the representation is different for the same bitarray if the endianness changes: .. code-block:: python >>> a.endian 'big' >>> b = bitarray(a, 'little') >>> assert a == b >>> b.endian 'little' >>> ba2hex(b) '3785c1f' The functions ``ba2hex()`` and ``hex2ba()`` are very efficiently implemented in C, and take advantage of byte level operations. Base 2, 4, 8, 16, 32 and 64 representation ------------------------------------------ The utility function ``ba2base()`` allows representing bitarrays by base ``n``, with possible bases 2, 4, 8, 16, 32 and 64. The bitarray has to be multiple of length 1, 2, 3, 4, 5 or 6 respectively: .. code-block:: python >>> from bitarray.util import ba2base >>> a = bitarray('001010111111100000111011100110110001111100101110111110010010') >>> len(a) # divisible by 2, 3, 4, 5 and 6 60 >>> ba2base(2, a) # binary '001010111111100000111011100110110001111100101110111110010010' >>> ba2base(4, a) # quaternary '022333200323212301330232332102' >>> ba2base(8, a) # octal '12774073466174567622' >>> ba2base(16, a) # hexadecimal '2bf83b9b1f2ef92' >>> ba2base(32, a) # base 32 (using RFC 4648 Base32 alphabet) 'FP4DXGY7F34S' >>> ba2base(64, a) # base 64 (using standard base 64 alphabet) 'K/g7mx8u+S' Note that ``ba2base(2, a)`` is equivalent to ``a.to01()`` and that ``ba2base(16, a)`` is equivalent to ``ba2hex(a)``. Unlike ``ba2hex()``, ``ba2base()`` does not take advantage of byte level operations and is therefore slower, although it is also implemented in C. The inverse function is called ``base2ba()``. Variable length representation ------------------------------ In some cases, it is useful to represent bitarrays in a binary format that is "self terminating" (in the same way that C strings are NUL terminated). That is, when an encoded bitarray of unknown length is encountered in a stream of binary data, the format lets us know when the end of the encoded bitarray is reached. See `variable length format <./variable_length.rst>`__ for this representation. Compressed sparse bitarrays --------------------------- Another representation is `compressed sparse bitarrays <./sparse_compression.rst>`__, whose format is also "self terminating". This, format actually uses different representations dependent on how sparsely the population of the bitarray (even sections of the bitarray) is. For large sparse bitarrays, the format reduces (compresses) the amount of data very efficiently, while only requiring a very tiny overhead for non-sparsely populated bitarrays. bitarray-3.7.1/doc/sparse_compression.rst000066400000000000000000000153471505414144000205640ustar00rootroot00000000000000Compression of sparse bitarrays =============================== In a ``bitarray`` object each byte in memory represents eight bits. While this representation is very compact and efficient when dealing with most data, there are situations when this representation is inefficient. One such situation are sparsely populated bitarray. That is, bitarray in which only a few bits are 1, but most bits are 0. In this situation, one might consider using a data structure which stores the indices of the 1 bits and not use the ``bitarray`` object at all. However, having all of bitarray's functionality is very convenient. It may be desired to convert ``bitarray`` objects into a more compact (index based) format when storing objects on disk or sending them over the network. This is the use case of the utility functions ``sc_encode()`` and ``sc_decode()``. The lower the population count, the more efficient the compression will be: .. code-block:: python >>> from bitarray import bitarray >>> from bitarray.util import zeros, sc_encode, sc_decode >>> a = zeros(1 << 24, 'little') # 16 mbits >>> a[0xaa] = a[0xbbcc] = a[0xddeeff] = 1 >>> blob = sc_encode(a) >>> blob b'\x04\x00\x00\x00\x01\xc3\x03\xaa\x00\x00\xcc\xbb\x00\xff\xee\xdd\x00' >>> assert sc_decode(blob) == a How it works ------------ Consider a ``bitarray`` of length 256, that is 32 bytes of memory. If we represent this object by the indices of 1 bits as one byte each, the object will be represent more efficiently when the population (number of 1 bits) is less than 32. Based on the population, the function ``sc_encode()`` chooses to represent the object as either raw bytes or as bytes of indices of 1 bits. These are the block types 0 and 1. Next, we consider a ``bitarray`` of length 65536. When each section of 256 bits has a population below 32, it would be stored as 256 blocks of type 1. That is, we need 256 block headers and one (index) byte for each 1 bit. However, when the total population is below 256, we could also introduce a new block type 2 in which each index is represented by two bytes and represent the entire bitarray as a single block (of type 2). This saves us the 256 block headers (of type 1). Similarly, with even less populated bitarrays, it will become more efficient to move to blocks representing each index using 3 or more bytes. The encoding algorithm starts at the front of the ``bitarray``, inspects the population and decides which block type to use to encode the following bits. Once the first block is written, the algorithm moves on to inspecting the remaining population, and so on. This way, a large bitarray with densely and sparsely populated areas will be compressed efficiently using different block types. The binary blob consists of a header which encodes the bit-endianness and the total length of the bitarray, i.e. the number of bits. The header is followed by an arbitrary number of blocks. There are 5 block types. Each block starts with a block header encoding the block type and specifying the size of the block data that follows. .. code-block:: block head count count bytes block size type byte byte per index (encoded) (decoded) ------------------------------------------------------------------------- 0 0x00..0x9f 0..4096 no raw 1..4097 0..4096 1 0xa0..0xbf 0..31 no 1 1..32 32 2 0xc2 0..255 yes 2 2..512 8,192 3 0xc3 0..255 yes 3 2..767 2,097,152 4 0xc4 0..255 yes 4 2..1022 536,870,912 As the decoder stops whenever the decoded block size is 0, the head byte 0x00 (type 0 with no raw bytes) is considered the stop byte. Speed ----- We create a 64 mbit (8mb) random bitarray with a probability of 1/1024 for each bit being 1. The table shows a comparison of different compression methods: .. code-block:: compress (ms) decompress (ms) ratio ---------------------------------------------------------- serialize 3.876 1.002 1.0000 sc 5.502 2.703 0.0158 gzip 918.937 10.057 0.0169 bz2 59.500 32.611 0.0117 Statistics ---------- We create 256 mbit (32mb) random bitarrays with varying probability ``p`` for elements being 1. After compression, we look at the compression ratio, and the number of blocks of each type: .. code-block:: p ratio raw type 1 type 2 type 3 type 4 ------------------------------------------------------------------------- 1.00000000 1.00024432 8192 0 0 0 0 0.50000000 1.00024432 8192 0 0 0 0 0.25000000 1.00024432 8192 0 0 0 0 0.12500000 0.95681512 261581 494833 0 0 0 0.06250000 0.53125548 176 1048400 0 0 0 0.03125000 0.28118369 0 1048576 0 0 0 0.01562500 0.15618166 0 1048576 0 0 0 0.00781250 0.09370023 0 1048576 0 0 0 0.00390625 0.06188744 0 281719 2996 0 0 0.00195312 0.03140587 0 0 4096 0 0 0.00097656 0.01582754 0 0 4096 0 0 0.00048828 0.00804502 0 0 4096 0 0 0.00024414 0.00415075 0 0 4096 0 0 0.00012207 0.00220025 0 0 4096 0 0 0.00006104 0.00122154 0 0 4096 0 0 0.00003052 0.00072917 0 0 3958 1 0 0.00001526 0.00038746 0 0 817 13 0 0.00000763 0.00018182 0 0 0 16 0 0.00000381 0.00009358 0 0 0 16 0 0.00000191 0.00004664 0 0 0 16 0 0.00000095 0.00002545 0 0 0 16 0 0.00000048 0.00001267 0 0 0 16 0 0.00000024 0.00000739 0 0 0 16 0 0.00000012 0.00000426 0 0 0 16 0 0.00000006 0.00000226 0 0 0 0 1 0.00000003 0.00000107 0 0 0 0 1 0.00000001 0.00000060 0 0 0 0 1 bitarray-3.7.1/doc/variable_length.rst000066400000000000000000000035601505414144000177660ustar00rootroot00000000000000Variable length bitarray format =============================== In some cases, it is useful to represent bitarrays in a binary format that is "self terminating" (in the same way that C strings are NUL terminated). That is, when an encoded bitarray of unknown length is encountered in a stream of binary data, the format lets us know when the end of the encoded bitarray is reached. Such a "variable length format" (most memory efficient for small bitarrays) is implemented in ``vl_encode()`` and ``vl_decode()``: .. code-block:: python >>> from bitarray import bitarray >>> from bitarray.util import vl_encode, vl_decode >>> a = bitarray('0110001111') >>> b = bitarray('001') >>> data = vl_encode(a) + vl_encode(b) + b'other stuff' >>> data b'\x96\x1e\x12other stuff' >>> stream = iter(data) >>> vl_decode(stream) # the remaining stream is untouched bitarray('0110001111') >>> vl_decode(stream) bitarray('001') >>> bytes(stream) b'other stuff' The variable length format is similar to LEB128. A single byte can store bitarrays up to 4 element, every additional byte stores up to 7 more elements. The most significant bit of each byte indicated whether more bytes follow. In addition, the first byte contains 3 bits which indicate the number of padding bits at the end of the stream. Here is an example of encoding ``bitarray('01010110111001110')``: .. code-block:: 01010110111001110 raw bitarray 0101 0110111 001110 grouped (4, 7, 7, ...) 0101 0110111 0011100 pad last group with zeros 0010101 0110111 0011100 add number of pad bits (1) to front (001) 10010101 10110111 00011100 add high bits (1, except 0 for last group) 0x95 0xb7 0x1c in hexadecimal - output stream .. code-block:: python >>> vl_encode(bitarray('01010110111001110')) b'\x95\xb7\x1c' bitarray-3.7.1/examples/000077500000000000000000000000001505414144000151535ustar00rootroot00000000000000bitarray-3.7.1/examples/README000066400000000000000000000056301505414144000160370ustar00rootroot00000000000000All files under this 'examples/' directory are unsupported. While bitarray itself supports Python 3.6+, some examples may require higher Python versions. bloom.py Demonstrates the implementation of a "Bloom filter", see: http://en.wikipedia.org/wiki/Bloom_filter distance.py Implementation of distance functions and comparison to the corresponding functions in the scipy.spatial.distance module. double.py Functionality to analyze double precision floating point numbers (IEEE 754 binary64). dubner.rst We show how bitarrays can be used to calculate twin primes and "middle numbers" very efficiently, and verify the some conjectures in: https://oeis.org/A007534/a007534.pdf dyn_sieve.py Prime numbers, implemented as a dynamically growing sieve of Eratosthenes. Similar to prime number sieve in SymPy, but implemented using a bitarray. extend_json.py Demonstrates how to construct a json encoder and decoder (using the 'json' standard library) which can handle extended Python data structures containing bitarrays. gene.py Shows how gene sequences (ATGC) can be very easily and efficiently represented by bitarrays. hamming.py Implementation of Hamming codes for error correction with send and receive functionality. huffman/ Directory containing a library and examples for working with Huffman trees and codes. lexico.py Contains two functions, all_perm and next_perm, which are handle lexicographical permutations of bitarrays. lfsr.py Linear Feedback Shift Register mandel.py Generates a .ppm image file of size 4000 x 3000 of the Mandelbrot set. Despite its size, the output image file has only a size of slightly over 1.5 Million bytes (uncompressed) because each pixel is stored in one bit. Requires: numba masked.py Illustrate masked indexing, i.e. using bitarrays to select indices mmapped-file.py Demonstrates how to memory map a file into a bitarray. ndarray.py Demonstrates how to efficiently convert boolean data from a bitarray to a numpy.ndarray of dtype bool. Requires: numpy pbm.py Defines a simple class called PBM (Portable Bit Map) which allows: - addressing pixels by their coordinates - storing and loading .ppm (P4), which is the same as .pbm, files puff/ DEFLATE decompression implementation based on Mark Adler's excellent https://github.com/madler/zlib/blob/master/contrib/puff/puff.c sieve.py Demonstrates the "Sieve of Eratosthenes" algorithm for finding all prime numbers up to a specified integer. smallints.py A class is defined which allows efficiently storing an array of integers represented by a specified number of bits (1 through 8). For example, an array with 1000 5 bit integers can be created, allowing each element in the array to take values form 0 to 31, while the size of the object is 625 (5000/8) bytes. bitarray-3.7.1/examples/bloom.py000066400000000000000000000055231505414144000166420ustar00rootroot00000000000000import hashlib from math import ceil, exp, log, log2 from bitarray import bitarray class BloomFilter(object): """ Implementation of a Bloom filter. An instance is initialized by its capacity `n` and error rate `p`. The capacity tells how many elements can be stored while maintaining no more than `p` false positives. """ def __init__(self, n, p=0.01): assert 0 < p < 1 self.n = n # number of hash functions self.k = ceil(-log2(p)) # size of array self.m = ceil(-n * log2(p) / log(2)) self.array = bitarray(self.m) def calculate_p(self): """ Calculate the actual false positive error rate `p` from the number of hashes `k` and the size if the bitarray `m`. This is slightly different from the given `p`, because the integer value of `k` is being used. """ return pow(1 - exp(-self.k * self.n / self.m), self.k) def approx_items(self): """ Return the approximate number of items in the Bloom filter. """ x = self.array.count() if x == 0: return 0.0 return -self.m / self.k * log(1 - x / self.m) def population(self): """ Return the current relative array population as a float. """ return self.array.count() / self.m def add(self, key): self.array[list(self._hashes(key))] = 1 def __contains__(self, key): return all(self.array[i] for i in self._hashes(key)) def _hashes(self, key): """ generate k different hashes, each of which maps a key to one of the m array positions with a uniform random distribution """ n = 1 << self.m.bit_length() h = hashlib.new('sha1') h.update(str(key).encode()) x = 0 i = 0 while i < self.k: if x < n: h.update(b'X') x = int.from_bytes(h.digest(), 'little') x, y = divmod(x, n) if y < self.m: yield y i += 1 def test_bloom(n, p): print("Testing Bloom filter:") print("capacity n = %d" % n) print("given p = %.3f%%" % (100.0 * p)) b = BloomFilter(n, p) print("hashes k = %d = ceil(%.3f)" % (b.k, -log2(p))) print("array size m = %d" % b.m) for i in range(n): b.add(i) assert i in b print("population: %.2f%%" % (100.0 * b.population())) print("approx_items(): %.2f" % b.approx_items()) print("calculate_p(): %.3f%%" % (100.0 * b.calculate_p())) N = 100_000 false_pos = sum(i in b for i in range(n, n + N)) print("experimental : %.3f%%\n" % (100.0 * false_pos / N)) if __name__ == '__main__': test_bloom( 5_000, 0.05) test_bloom( 10_000, 0.01) test_bloom( 50_000, 0.005) test_bloom(100_000, 0.002) bitarray-3.7.1/examples/distance.py000066400000000000000000000034621505414144000173240ustar00rootroot00000000000000""" In this module, we implement distance functions and compare them to the corresponding functions in the scipy.spatial.distance module. The functions using bitarray are typically around 50 to 200 times faster. """ from time import perf_counter from bitarray.util import correspond_all, count_and, count_xor, urandom import numpy import scipy.spatial.distance as distance # type: ignore def dice(u, v): x = count_xor(u, v) return x / (2 * count_and(u, v) + x) def hamming(u, v): return count_xor(u, v) / len(u) def jaccard(u, v): x = count_xor(u, v) return x / (count_and(u, v) + x) def rogerstanimoto(u, v): x = count_xor(u, v) return 2 * x / (len(u) + x) def russellrao(u, v): n = len(u) return (n - count_and(u, v)) / n def sokalsneath(u, v): R = 2 * count_xor(u, v) return R / (count_and(u, v) + R) def yule(u, v): nff, nft, ntf, ntt = correspond_all(u, v) half_R = ntf * nft if half_R == 0: return 0.0 else: return 2 * half_R / (ntt * nff + half_R) def test(n): a = urandom(n) b = urandom(n) aa = numpy.frombuffer(a.unpack(), dtype=bool) bb = numpy.frombuffer(b.unpack(), dtype=bool) for name in ['dice', 'hamming', 'jaccard', 'rogerstanimoto', 'russellrao', 'sokalsneath', 'yule']: f1 = eval(name) # function defined above t0 = perf_counter() x1 = f1(a, b) t1 = perf_counter() - t0 print('%.14f %6.3f ms %s' % (x1, 1000.0 * t1, name)) f2 = getattr(distance, name) # scipy.spatial.distance function t0 = perf_counter() x2 = f2(aa, bb) t2 = perf_counter() - t0 print('%.14f %6.3f ms %9.2f' % (x2, 1000.0 * t2, t2 / t1)) assert abs(x1 - x2) < 1E-14 if __name__ == "__main__": test(2 ** 22 + 67) bitarray-3.7.1/examples/double.py000066400000000000000000000123241505414144000170010ustar00rootroot00000000000000from struct import pack, unpack from bitarray import bitarray from bitarray.util import ba2int, int2ba class Double: def __init__(self, x=0.0): if isinstance(x, (float, int)): self.from_float(float(x)) elif isinstance(x, str): self.from_string(x) else: raise TypeError("float or str expected") def __float__(self): a = self.to_bitarray() return unpack(" %r" % x) # --------------------------------------------------------------------------- from math import pi, inf, nan, isnan from random import getrandbits, randint import unittest from bitarray.util import urandom EXAMPLES = [ ( 0.0, "0 00000000000 " + 52 * "0"), ( 1.0, "0 01111111111 " + 52 * "0"), ( 1.5, "0 01111111111 1" + 51 * "0"), ( 2.0, "0 10000000000 " + 52 * "0"), ( 5.0, "0 10000000001 01" + 50 * "0"), (-5.0, "1 10000000001 01" + 50 * "0"), # smallest number > 1 (1.0000000000000002, "0 01111111111 " + 51 * "0" + "1"), # minimal subnormal double (4.9406564584124654e-324, "0 00000000000 " + 51 * "0" + "1"), # maximal subnormal double (2.2250738585072009e-308, "0 00000000000 " + 52 * "1"), # minimal normal double (2.2250738585072014e-308, "0 00000000001 " + 52 * "0"), # maximal (normal) double (1.7976931348623157e+308, "0 11111111110 " + 52 * "1"), ( inf, "0 11111111111 " + 52 * "0"), (-inf, "1 11111111111 " + 52 * "0"), ( 1/3, "0 01111111101 " + 26 * "01"), ( pi, "0 10000000000 " "1001001000011111101101010100010001000010110100011000"), # largest number exactly representated as integer (2 ** 53 - 1, "0 10000110011 " + 52 * "1"), ] class DoubleTests(unittest.TestCase): def test_zero(self): d = Double() self.assertEqual(float(d), 0.0) self.assertEqual(d.sign, 0) self.assertEqual(d.exponent, -1023) self.assertEqual(d.fraction, bitarray(52)) def test_examples(self): for x, s in EXAMPLES: for d in Double(x), Double(s): self.assertEqual(float(d), x) self.assertEqual(str(d), s) def test_nan(self): s = "0 11111111111 1" + 51 * "0" for x in nan, s: d = Double(x) self.assertEqual(str(d), s) self.assertTrue(isnan(float(d))) def test_nan_msg(self): msg = urandom(52) d = Double() d.exponent = 1024 d.fraction = msg x = float(d) self.assertEqual(type(x), float) self.assertTrue(isnan(x)) e = Double(x) self.assertEqual(e.exponent, 1024) self.assertEqual(e.fraction, msg) def test_exponent52(self): for _ in range(1000): d = Double() d.fraction = urandom(52, endian="little") d.exponent = 52 d.sign = getrandbits(1) i = (1 << 52) + ba2int(d.fraction) if d.sign: i = -i self.assertEqual(float(d), i) def test_exact_ints(self): for _ in range(1000): i = getrandbits(randint(1, 53)) if i == 0: continue d = Double(i) self.assertEqual(d.sign, 0) a = int2ba(i, endian="little") a.pop() n = len(a) self.assertEqual(d.exponent, n) a = bitarray(52 - n, endian="little") + a self.assertEqual(d.fraction, a) if __name__ == '__main__': import sys if len(sys.argv) > 1: for arg in sys.argv[1:]: d = Double(eval(arg)) d.info() else: unittest.main() bitarray-3.7.1/examples/dubner.rst000066400000000000000000000067021505414144000171710ustar00rootroot00000000000000Dubner's conjecture =================== Harvey Dubner proposed a strengthening of the Goldbach conjecture: every even integer greater than 4208 is the sum of two twin primes (not necessarily belonging to the same pair). Only 34 even integers less than 4208 are not the sum of two twin primes. Dubner has verified computationally that this list is complete up to 2e10. A proof of this stronger conjecture would imply not only Goldbach's conjecture but also the twin prime conjecture. For more details, see `Dubner's conjecture `__. In this document, we want to show how bitarrays can be used to calculate twin primes and "middle numbers" very efficiently, and with very little code. We start by calculating all primes up to a limit ``N`` using the `Sieve of Eratosthenes <../examples/sieve.py>`__: .. code-block:: python >>> from bitarray.util import zeros, gen_primes >>> N = 1_000_000 >>> primes = gen_primes(N) >>> list(primes.search(1, 0, 50)) [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47] In order to save memory and compute time, we only consider odd numbers being twin primes. Note that 2 is not considered a twin prime. .. code-block:: python >>> twins = primes[1::2] >>> # we have a twin when next odd number is also prime: >>> twins &= twins << 1 >>> # The first twin primes (only lower of each pair) are: >>> [2 * i + 1 for i in twins.search(1, 0, 60)] [3, 5, 11, 17, 29, 41, 59, 71, 101, 107] We define a "middle number" to be ``m``, the number sandwiched between a pair of twin primes ``m − 1`` and ``m + 1``. It is obvious from the characteristics of twin primes that all such ``m`` greater that 4 are divisible by 6. Again, to save memory and compute time, we only consider multiples of 6. If we let ``i=m//6``, we are looking for the numbers ``i`` such that ``6*i-1``, ``6*i+1`` are twin primes. The numbers ``i`` are given in `sequence A002822 `__. .. code-block:: python >>> middles = zeros(1) # middle numbers >>> middles += twins[2::3] >>> list(middles.search(1, 0, 46)) # sequence A002822 [1, 2, 3, 5, 7, 10, 12, 17, 18, 23, 25, 30, 32, 33, 38, 40, 45] Although not as memory efficient, a very elegant alternative to calculate the middle numbers directly from primes is: .. code-block:: python >>> ((primes >> 1) & (primes << 1))[::6] == middles True We now mark multiples of 6 that are sum of two middle numbers: .. code-block:: python >>> M = len(middles) >>> mark = zeros(M) >>> for i in middles.search(1): ... mark[i:] |= middles[:M - i] Positive integers divisible by 6 and greater than 6 that are not the sum of two middle numbers (greater than 4): .. code-block:: python >>> [6 * i for i in mark.search(0, 2)] [96, 402, 516, 786, 906, 1116, 1146, 1266, 1356, 3246, 4206] This is `sequence A179825 `__, the multiples of 6 which are not the sum of a pair of twin primes. None of the above values are middle numbers themselves (this would contradict Conjecture 1): .. code-block:: python >>> any(middles[m] for m in mark.search(0, 2)) False As `A007534 `__, is the sequence of positive even numbers that are not the sum of a pair of twin primes (not just multiples of 6), A179825 is a subset of A007534. .. image:: https://github.com/ilanschnell/visual/blob/master/dubner/image.png?raw=true :alt: visualization of middle numbers bitarray-3.7.1/examples/dyn_sieve.py000066400000000000000000000073771505414144000175300ustar00rootroot00000000000000import math import itertools from bitarray import bitarray from bitarray.util import ones, count_n class Sieve: """ Prime numbers, implemented as a dynamically growing sieve of Eratosthenes. Similar to prime number sieve in SymPy, but implemented using a bitarray. """ a = ones(105) a[1::3] = 0 a[2::5] = 0 a[3::7] = 0 def __init__(self): self.data = bitarray() def extend(self, i): "grow to accomodate i, ie. self.data[i//2] will not raise IndexError" if i < 0: raise ValueError("positive integer expected") if i == 0: # reset self.data = bitarray() return n = i // 2 + 1 # n is minimal length of self.data if n <= len(self.data): return fresh_data = not self.data if fresh_data: self.data = self.a.copy() self.data[:8] = bitarray("01110110") a = self.data n1 = len(a) m = (n - n1 + 105 - 1) // 105 assert fresh_data or m > 0 a += m * self.a if fresh_data: n1 = 60 for i in a.search(1, 5, int(math.sqrt(len(a) // 2) + 1.0)): j = 2 * i + 1 j2 = (j * j) // 2 k = (j2 - n1) % j + n1 if j2 < n1 else j2 assert k >= n1 a[k :: j] = 0 def extend_to_no(self, n): while self.data.count() + 1 < n: self.extend(3 * len(self.data) + 1) def __contains__(self, i): if i < 0: raise ValueError("positive integer expected") if i % 2 == 0: return i == 2 self.extend(i) return self.data[i // 2] def __iter__(self): yield 2 for i in itertools.count(start=3, step=2): self.extend(i) if self.data[i // 2]: yield i def __getitem__(self, n): "return n-th prime" if n < 1: # offset is one, so forbid explicit access to sieve[0] raise IndexError("Sieve indices start at 1") if n == 1: return 2 self.extend_to_no(n) i = count_n(self.data, n - 1) - 1 assert self.data[i] return 2 * i + 1 # --------------------------------------------------------------------------- import unittest from random import randrange from bitarray.util import gen_primes N = 1_000_000 PRIMES = gen_primes(N) ODD_PRIMES = PRIMES[1::2] class SieveTests(unittest.TestCase): def check_data(self, s, i): if i == 0: self.assertEqual(len(s.data), 0) return n = i // 2 + 1 if n <= len(s.data): n = len(s.data) n = 105 * ((n + 105 - 1) // 105) self.assertEqual(len(s.data), n) self.assertEqual(s.data, ODD_PRIMES[:n]) def test_random(self): s = Sieve() for _ in range(1000): i = randrange(1000) if randrange(10) else 0 s.extend(i) self.check_data(s, i) #print(n, len(s.data)) def test_contains(self): s = Sieve() for i, v in enumerate(PRIMES[:1000]): self.assertEqual(i in s, v) for _ in range(1000): i = randrange(1_000_000) self.assertEqual(i in s, PRIMES[i]) def test_iter(self): s = Sieve() a = [] for i in s: if len(a) >= 168: break a.append(i) self.assertEqual(a[-1], 997) self.assertEqual(a, list(PRIMES.search(1, 0, 1000))) def test_getitem(self): s = Sieve() self.assertEqual(s[1], 2) self.assertEqual(s[2], 3) self.assertEqual(s[3], 5) self.assertEqual(s[10], 29) self.assertEqual(s[1_000_000], 15_485_863) if __name__ == '__main__': unittest.main() bitarray-3.7.1/examples/extend_json.py000066400000000000000000000026361505414144000200540ustar00rootroot00000000000000import json from base64 import b64encode, b64decode from bitarray import bitarray from bitarray.util import serialize, deserialize class JSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, bitarray): if len(obj) > 50: return {'bitarray_b64': b64encode(serialize(obj)).decode()} else: return {'bitarray': obj.to01()} return json.JSONEncoder.default(self, obj) class JSONDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) def object_hook(self, obj): if isinstance(obj, dict) and len(obj) == 1: if 'bitarray_b64' in obj: return deserialize(b64decode(obj['bitarray_b64'])) if 'bitarray' in obj: return bitarray(obj['bitarray']) return obj def test(): from random import getrandbits from bitarray.util import urandom a = [urandom(n * n, endian=['little', 'big'][getrandbits(1)]) for n in range(12)] a.append({'key1': bitarray('010'), 'key2': 'value2', 'key3': urandom(300)}) j = JSONEncoder(indent=2).encode(a) print(j) b = JSONDecoder().decode(j) assert a == b assert b[-1]['key1'] == bitarray('010') if __name__ == '__main__': test() bitarray-3.7.1/examples/gene.py000066400000000000000000000007621505414144000164500ustar00rootroot00000000000000# gene sequence example from @yoch, see # https://github.com/ilanschnell/bitarray/pull/54 from random import choice from timeit import timeit from bitarray import bitarray trans = { "A": bitarray("00"), "T": bitarray("01"), "G": bitarray("10"), "C": bitarray("11") } N = 10_000 seq = [choice("ATGC") for _ in range(N)] arr = bitarray() arr.encode(trans, seq) assert list(arr.decode(trans)) == seq # decodage t = timeit(lambda: list(arr.decode(trans)), number=1000) print(t) bitarray-3.7.1/examples/hamming.py000066400000000000000000000072721505414144000171550ustar00rootroot00000000000000# https://www.youtube.com/watch?v=b3NxrZOu_CE # https://en.wikipedia.org/wiki/Hamming_code from bitarray.util import xor_indices, int2ba, parity class Hamming: def __init__(self, r): self.r = r self.n = 1 << r # block length self.k = self.n - r - 1 # message length self.parity_bits = [0] # the 0th bit is to make overall parity 0 i = 1 while i < self.n: self.parity_bits.append(i) i <<= 1 def send(self, a): "encode message inplace" if len(a) != self.k: raise ValueError("expected bitarray of message length %d" % self.k) for i in self.parity_bits: a.insert(i, 0) # prepare block c = xor_indices(a) a[self.parity_bits[1:]] = int2ba(c, length=self.r, endian="little") a[0] = parity(a) def receive(self, a): "decode inplace and return number of bit errors" if len(a) != self.n: raise ValueError("expected bitarray of block length %d" % self.n) p = parity(a) c = xor_indices(a) a.invert(c) # fix bit error del a[self.parity_bits] if p: # overall parity is wrong, so we have a 1 bit error return 1 if c: # overall parity is OK, but since we have wrong partial # parities, there must have been 2 bit errors return 2 # overall parity as well as partial parities as fine, so no error return 0 # --------------------------------------------------------------------------- from random import getrandbits, randint import unittest from bitarray import bitarray from bitarray.util import urandom, count_xor class HammingTests(unittest.TestCase): def test_init(self): for r, n, k in [ (1, 2, 0), ( 8, 256, 247), (2, 4, 1), (16, 65536, 65519), (3, 8, 4), (4, 16, 11), (5, 32, 26), (6, 64, 57), ]: h = Hamming(r) self.assertEqual(h.r, r) self.assertEqual(h.n, n) self.assertEqual(h.k, k) self.assertEqual(len(h.parity_bits), h.r + 1) def check_well_prepared(self, a): n = len(a) self.assertEqual(n & (n - 1), 0) # n is power of 2 self.assertEqual(xor_indices(a), 0) # partial parity bits are 0 self.assertEqual(parity(a), 0) # overall parity is 0 def test_example(self): a = bitarray(" 0 010 111 0110") # 012 4 8 c = a.copy() b = bitarray("1100 1010 1111 0110") # 012 4 8 h = Hamming(4) self.check_well_prepared(b) h.send(a) self.assertEqual(a, b) a.invert(10) self.assertEqual(h.receive(a), 1) self.assertEqual(a, c) def test_send(self): for _ in range(1000): h = Hamming(randint(2, 10)) a = urandom(h.k) h.send(a) self.assertEqual(len(a), h.n) self.check_well_prepared(a) def test_receive(self): for _ in range(1000): h = Hamming(randint(2, 10)) a = urandom(h.k) b = a.copy() h.send(a) t = a.copy() for _ in range(randint(0, 2)): a.invert(getrandbits(h.r)) dist = count_xor(a, t) self.assertTrue(0 <= dist <= 2) res = h.receive(a) self.assertEqual(len(a), h.k) if dist <= 1: self.check_well_prepared(t) self.assertEqual(a, b) self.assertEqual(res, dist) if __name__ == '__main__': unittest.main() bitarray-3.7.1/examples/hexadecimal.py000066400000000000000000000032231505414144000177710ustar00rootroot00000000000000from time import perf_counter from bitarray import bitarray, get_default_endian from bitarray.util import urandom, ba2hex, hex2ba # ----- conversion using prefix codes CODEDICT = {'little': {}, 'big': { '0': bitarray('0000'), '1': bitarray('0001'), '2': bitarray('0010'), '3': bitarray('0011'), '4': bitarray('0100'), '5': bitarray('0101'), '6': bitarray('0110'), '7': bitarray('0111'), '8': bitarray('1000'), '9': bitarray('1001'), 'a': bitarray('1010'), 'b': bitarray('1011'), 'c': bitarray('1100'), 'd': bitarray('1101'), 'e': bitarray('1110'), 'f': bitarray('1111'), }} for k, v in CODEDICT['big'].items(): # type: ignore CODEDICT['little'][k] = v[::-1] # type: ignore def prefix_ba2hex(a): return ''.join(a.decode(CODEDICT[a.endian])) def prefix_hex2ba(s, endian=None): a = bitarray(0, endian or get_default_endian()) a.encode(CODEDICT[a.endian], s) return a # ----- test def test_round(f, g, n, endian): # f: function which takes bitarray and returns hexstr # g: function which takes hexstr and returns bitarray # n: size of random bitarray a = urandom(n, endian) t0 = perf_counter() s = f(a) print('%s: %6.3f ms' % (f.__name__, 1000.0 * (perf_counter() - t0))) t0 = perf_counter() b = g(s, endian) print('%s: %6.3f ms' % (g.__name__, 1000.0 * (perf_counter() - t0))) assert b == a if __name__ == '__main__': n = 100_000_004 for endian in 'little', 'big': print('%s-endian:' % endian) for f in ba2hex, prefix_ba2hex: for g in hex2ba, prefix_hex2ba: test_round(f, g, n, endian) print() bitarray-3.7.1/examples/huffman/000077500000000000000000000000001505414144000165775ustar00rootroot00000000000000bitarray-3.7.1/examples/huffman/README000066400000000000000000000013201505414144000174530ustar00rootroot00000000000000compress.py: Demonstrates how Huffman codes can be used to efficiently compress and uncompress files (text or binary). Given an input file, calculates the number of occurrences for each character; from those frequencies, a Huffman tree is build. Also allows encoding and decoding of a file, see -h option. decodetree.py: Demonstrates how to use the `decodetree` object, and how much speedup may be achieved when using very large prefix codes. decoding.py: This example demonstrates how much faster bitarray's decoing is as opposed to traversing the Huffman tree using Python. huffman.py: Library containing useful functionality for working with Huffman trees and codes. bitarray-3.7.1/examples/huffman/canonical.py000066400000000000000000000023201505414144000210750ustar00rootroot00000000000000from collections import Counter from bitarray import bitarray from bitarray.util import canonical_huffman, canonical_decode from huffman import write_dot, print_code, make_tree def main(): from optparse import OptionParser p = OptionParser("usage: %prog [options] [FILE]") p.add_option( '-p', '--print', action="store_true", help="print Huffman code") p.add_option( '-t', '--tree', action="store_true", help="store the tree as a .dot file") opts, args = p.parse_args() if len(args) == 0: filename = 'README' elif len(args) == 1: filename = args[0] else: p.error('only one argument expected') with open(filename, 'rb') as fi: plain = bytearray(fi.read()) freq = Counter(plain) code, count, symbol = canonical_huffman(freq) if opts.print: print_code(freq, code) if opts.tree: # create tree from code (no frequencies) write_dot(make_tree(code), 'tree_raw.dot', 0 in plain) a = bitarray() a.encode(code, plain) assert bytearray(a.decode(code)) == plain assert bytearray(canonical_decode(a, count, symbol)) == plain if __name__ == '__main__': main() bitarray-3.7.1/examples/huffman/compress.py000066400000000000000000000057311505414144000210120ustar00rootroot00000000000000""" This program demonstrates how Huffman codes can be used to efficiently compress and uncompress files (text or binary). """ import os import struct from itertools import islice from optparse import OptionParser from collections import Counter from bitarray import bitarray from bitarray.util import (serialize, deserialize, vl_encode, vl_decode, huffman_code) def encode_code(code): res = bytearray(struct.pack(" 0: return huffman_code(cnt) # special case for empty file return {0: bitarray('0')} def encode(filename): with open(filename, 'rb') as fi: plain = fi.read() code = create_code(Counter(plain)) with open(filename + '.huff', 'wb') as fo: fo.write(encode_code(code)) a = bitarray(endian='little') a.encode(code, plain) fo.write(serialize(a)) if len(plain) == 0: assert len(a) == 0 else: print('Bits: %d / %d' % (len(a), 8 * len(plain))) print('Ratio =%6.2f%%' % (100.0 * a.nbytes / len(plain))) def decode(filename): assert filename.endswith('.huff') with open(filename, 'rb') as fi: stream = iter(fi.read()) code = decode_code(stream) a = deserialize(bytes(stream)) with open(filename[:-5] + '.out', 'wb') as fo: fo.write(bytearray(a.decode(code))) def main(): p = OptionParser("usage: %prog [options] FILE") p.add_option( '-e', '--encode', action="store_true", help="encode (compress) FILE using the Huffman code calculated for " "the frequency of characters in FILE itself. " "The output is FILE.huff which contains both the Huffman " "code and the bitarray resulting from the encoding.") p.add_option( '-d', '--decode', action="store_true", help="decode (decompress) FILE.huff and write the output to FILE.out") p.add_option( '-t', '--test', action="store_true", help="encode FILE, decode FILE.huff, compare FILE with FILE.out, " "and unlink created files.") opts, args = p.parse_args() if len(args) != 1: p.error('exactly one argument required') filename = args[0] if opts.encode: encode(filename) elif opts.decode: decode(filename + '.huff') elif opts.test: huff = filename + '.huff' out = filename + '.out' encode(filename) decode(huff) assert open(filename, 'rb').read() == open(out, 'rb').read() os.unlink(huff) os.unlink(out) else: p.error("no option provided") if __name__ == '__main__': main() bitarray-3.7.1/examples/huffman/compress2.py000066400000000000000000000060501505414144000210670ustar00rootroot00000000000000""" This program demonstrates how Huffman codes can be used to efficiently compress and uncompress files (text or binary). """ import os import struct from itertools import islice from optparse import OptionParser from collections import Counter from bitarray import bitarray from bitarray.util import (serialize, deserialize, canonical_huffman, canonical_decode) def create_code(cnt): if len(cnt) > 0: return canonical_huffman(cnt) # special case for empty file return {0: bitarray('0')}, [0, 1], [0] def encode(filename): with open(filename, 'rb') as fi: plain = fi.read() code, count, symbol = create_code(Counter(plain)) with open(filename + '.huff2', 'wb') as fo: fo.write(struct.pack(" 0: print('Bits: %d / %d' % (len(a), 8 * len(plain))) print('Ratio =%6.2f%%' % (100.0 * a.nbytes / len(plain))) def decode(filename): assert filename.endswith('.huff2') with open(filename, 'rb') as fi: stream = iter(fi.read()) maxbits = struct.unpack("%d;\n' % (id(nd), id(nd.child[k]))) for k in range(2): if nd.child[k]: write_nd(fo, nd.child[k]) with open(fn, 'w') as fo: # dot -Tpng tree.dot -O fo.write('digraph BT {\n') fo.write(' node [shape=box, fontsize=20, fontname="Arial"];\n') write_nd(fo, tree) fo.write('}\n') def print_code(freq, codedict): """ Given a frequency map (dictionary mapping symbols to their frequency) and a codedict, print them in a readable form. """ special_ascii = {0: 'NUL', 9: 'TAB', 10: 'LF', 13: 'CR', 127: 'DEL'} def disp_char(i): if 32 <= i < 127: return repr(chr(i)) return special_ascii.get(i, '') print(' symbol char hex frequency Huffman code') print(70 * '-') for i in sorted(codedict, key=lambda c: (freq[c], c), reverse=True): print('%7r %-4s 0x%02x %10i %s' % ( i, disp_char(i), i, freq[i], codedict[i].to01())) def test(): from bitarray.util import _huffman_tree freq = {'a': 10, 'b': 2, 'c': 1} tree = _huffman_tree(freq) code = huff_code(tree) assert len(code['a']) == 1 assert len(code['b']) == len(code['c']) == 2 code = {'a': bitarray('0'), 'b': bitarray('10'), 'c': bitarray('11')} tree = make_tree(code) txt = 'abca' a = bitarray() a.encode(code, txt) assert a == bitarray('010110') assert ''.join(iterdecode(tree, a)) == txt if __name__ == '__main__': test() bitarray-3.7.1/examples/lexico.py000066400000000000000000000163151505414144000170160ustar00rootroot00000000000000# issue 6 # http://www-graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation from bitarray import bitarray from bitarray.util import zeros, ba2int, int2ba def lexico_all(n, k, endian=None): """lexico_all(n, k, endian=None) -> iterator Return an iterator over all bitarrays of length `n` and population count `k` in lexicographical order. """ if n < 0: raise ValueError("length must be >= 0") # error check inputs and handle edge cases if k <= 0 or k > n: if k == 0: yield zeros(n, endian) return raise ValueError("k must be in range 0 <= k <= n, got %s" % k) v = (1 << k) - 1 while True: try: yield int2ba(v, length=n, endian=endian) except OverflowError: return t = (v | (v - 1)) + 1 v = t | ((((t & -t) // (v & -v)) >> 1) - 1) def lexico_next(__a): """lexico_next(a, /) -> bitarray Return the next lexicographical permutation of bitarray `a`. The length and population count of the result remains unchanged. The integer value (`ba2int()`) of the next permutation will always increase, except when the cycle is completed. In that case, the lowest lexicographical permutation will be returned. """ if not __a: return __a v = ba2int(__a) if v == 0: return __a t = (v | (v - 1)) + 1 v = t | ((((t & -t) // (v & -v)) >> 1) - 1) try: return int2ba(v, length=len(__a), endian=__a.endian) except OverflowError: return __a[::-1] # ---------------------- lexicographical permutations ----------------------- import math import unittest from random import choice, getrandbits, randrange from itertools import pairwise from bitarray import frozenbitarray from bitarray.util import random_k class LexicoTests(unittest.TestCase): def test_errors(self): N = lexico_next self.assertRaises(TypeError, N) self.assertRaises(TypeError, N, bitarray('1'), 1) self.assertRaises(TypeError, N, '1') A = lexico_all self.assertRaises(TypeError, A) self.assertRaises(TypeError, A, 4) self.assertRaises(TypeError, next, A("4", 2)) self.assertRaises(TypeError, next, A(1, "0.5")) self.assertRaises(TypeError, A, 1, p=1) self.assertRaises(TypeError, next, A(11, 5.5)) self.assertRaises(ValueError, next, A(-1, 0)) for k in -1, 11: # k is not 0 <= k <= n self.assertRaises(ValueError, next, A(10, k)) self.assertRaises(ValueError, next, A(10, 7, 'foo')) self.assertRaises(ValueError, next, A(10, 7, endian='foo')) def test_zeros_ones(self): for n in range(30): endian = choice(["little", "big"]) a = bitarray(n, endian) a.setall(getrandbits(1)) b = lexico_next(a) self.assertEqual(b.endian, endian) # nothing to permute self.assertEqual(b, a) def test_next_explicit(self): for perm, endian in [ (['100', '010', '001', '100'], 'little'), (['00001', '00010', '00100', '01000', '10000'], 'big'), (['0011', '0101', '0110', '1001', '1010', '1100', '0011'], 'big'), (['0000111', '1110000'], 'little'), ]: a = bitarray(perm[0], endian) for s in perm[1:]: a = lexico_next(a) self.assertEqual(a, bitarray(s)) def check_cycle(self, n, k): endian = choice(["little", "big"]) a0 = bitarray(n, endian) a0[:k] = 1 if endian == "big": a0.reverse() ncycle = math.comb(n, k) # cycle length self.assertTrue(ncycle >= 2) coll = set() a = a0.copy() for i in range(ncycle): coll.add(frozenbitarray(a)) b = lexico_next(a) self.assertEqual(len(b), n) self.assertEqual(b.count(), k) self.assertEqual(b.endian, endian) self.assertNotEqual(a, b) if b == a0: self.assertEqual(i, ncycle - 1) self.assertTrue(ba2int(b) < ba2int(a)) break self.assertTrue(ba2int(b) > ba2int(a)) a = b else: self.fail() self.assertEqual(len(coll), ncycle) def test_cycle(self): for _ in range(20): n = randrange(2, 10) k = randrange(1, n) self.check_cycle(n, k) def test_next_random(self): for _ in range(100): endian = choice(["little", "big"]) n = randrange(2, 1_000) k = randrange(1, n) a = random_k(n, k, endian) b = lexico_next(a) self.assertEqual(len(b), n) self.assertEqual(b.count(), k) self.assertEqual(b.endian, endian) self.assertNotEqual(a, b) if ba2int(a) > ba2int(b): c = a.copy() c.sort(endian == 'big') self.assertEqual(a, c) self.assertEqual(b, a[::-1]) # ------------------- lexico_all ------------------- def test_all_explicit(self): for n, k, res in [ (0, 0, ['']), (1, 0, ['0']), (1, 1, ['1']), (2, 0, ['00']), (2, 1, ['01', '10']), (2, 2, ['11']), (3, 0, ['000']), (3, 1, ['001', '010', '100']), (3, 2, ['011', '101', '110']), (3, 3, ['111']), (4, 2, ['0011', '0101', '0110', '1001', '1010', '1100']), ]: lst = list(lexico_all(n, k, 'big')) self.assertEqual(len(lst), math.comb(n, k)) self.assertEqual(lst, [bitarray(s) for s in res]) if n == 0: continue a = lst[0] for i in range(20): self.assertEqual(a, bitarray(res[i % len(lst)])) a = lexico_next(a) def test_all_perm(self): n, k = 17, 5 endian=choice(["little", "big"]) prev = None cnt = 0 coll = set() for a in lexico_all(n, k, endian): self.assertEqual(type(a), bitarray) self.assertEqual(len(a), n) self.assertEqual(a.count(), k) self.assertEqual(a.endian, endian) coll.add(frozenbitarray(a)) if prev is None: first = a.copy() c = a.copy() c.sort(c.endian == "little") self.assertEqual(a, c) else: self.assertNotEqual(a, first) self.assertEqual(lexico_next(prev), a) self.assertTrue(ba2int(prev) < ba2int(a)) prev = a cnt += 1 self.assertEqual(cnt, math.comb(n, k)) self.assertEqual(len(coll), cnt) # a is now the last permutation last = a.copy() self.assertTrue(ba2int(first) < ba2int(last)) self.assertEqual(last, first[::-1]) def test_all_order(self): n, k = 10, 5 for a, b in pairwise(lexico_all(n, k, 'little')): self.assertTrue(ba2int(b) > ba2int(a)) self.assertEqual(lexico_next(a), b) if __name__ == '__main__': unittest.main() bitarray-3.7.1/examples/lfsr.py000066400000000000000000000041271505414144000164770ustar00rootroot00000000000000# Linear Feedback Shift Register # https://www.youtube.com/watch?v=Ks1pw1X22y4 # https://en.wikipedia.org/wiki/Linear-feedback_shift_register from bitarray import bitarray from bitarray.util import parity def get_period(tabs, verbose=False): "given a bitarray of tabs return period of lfsr" n = len(tabs) state0 = bitarray(n) state0[0] = state0[-1] = 1 state = state0.copy() period = 0 while True: if verbose: print(state[0], end='') newbit = parity(state & tabs) state <<= 1 state[-1] = newbit period += 1 if state == state0: break if verbose: print() return period def simple(): "example from computerphile" state = 0b1001 for _ in range(20): print(state & 1, end='') newbit = (state ^ (state >> 1)) & 1 state = (state >> 1) | (newbit << 3) print() get_period(bitarray("1100"), True) def test_wiki(): "test list of tabs shown on Wikipedia" all_tabs = [ "11", "110", "1100", "10100", "110000", "1100000", "10111000", "100010000", "1001000000", "10100000000", "111000001000", "1110010000000", "11100000000010", "110000000000000", "1101000000001000", "10010000000000000", "100000010000000000", "1110010000000000000", "10010000000000000000", "101000000000000000000", "1100000000000000000000", "10000100000000000000000", "111000010000000000000000", ] for tabs in all_tabs: tabs = bitarray(tabs) period = get_period(tabs) print(period) n = len(tabs) assert period == 2 ** n - 1 assert parity(tabs) == 0 def n128(): tabs = bitarray(128) tabs[[0, 1, 2, 7]] = 1 state = bitarray(128) state[0] = state[-1] = 1 while True: print(state[0], end='') newbit = parity(state & tabs) state <<= 1 state[-1] = newbit if __name__ == "__main__": simple() test_wiki() #n128() bitarray-3.7.1/examples/mandel.py000066400000000000000000000015141505414144000167660ustar00rootroot00000000000000import sys from bitarray import bitarray from numba import jit # type: ignore width, height = 4000, 3000 maxdepth = 500 @jit(nopython=True) def mandel(c): d = 0 z = c while abs(z) < 4.0 and d <= maxdepth: d += 1 z = z * z + c return d def main(): data = bitarray(endian='big') for j in range(height): sys.stdout.write('.') sys.stdout.flush() y = +1.5 - 3.0 * j / height for i in range(width): x = -2.75 + 4.0 * i / width c = mandel(complex(x, y)) % 2 data.append(c) print("done") with open('out.ppm', 'wb') as fo: fo.write(b'P4\n') fo.write(b'# partable bitmap image of the Mandelbrot set\n') fo.write(b'%i %i\n' % (width, height)) data.tofile(fo) if __name__ == '__main__': main() bitarray-3.7.1/examples/masked.py000066400000000000000000000005421505414144000167720ustar00rootroot00000000000000# example to illustrate masked indexing from bitarray import bitarray a = bitarray('1110000') b = bitarray('1100110') # select bits from a where b is 1 assert a[b] == bitarray('1100') # set bits in a where b is 1 a[b] = bitarray('1010') assert a == bitarray('1010100') # delete bits in a where b is 1 del a[b] assert a == bitarray('100') print("Ok") bitarray-3.7.1/examples/mmapped-file.py000066400000000000000000000020641505414144000200670ustar00rootroot00000000000000""" Demonstrates how to memory map a file into a bitarray. """ import os import mmap from bitarray import bitarray filename = 'big.data' filesize = 10_000_000 # create a large file with zeros with open(filename, 'wb') as fo: fo.write(filesize * b'\0') # open file in binary read-write mode for mapping into bitarray with open(filename, 'r+b') as f: mapping = mmap.mmap(f.fileno(), 0) a = bitarray(buffer=mapping, endian='little') assert len(a) == 8 * filesize assert not a.any() # no bits 1 a[-1] = 1 # set the last bit in the array to 1 # open in binary read-only mode with open(filename, 'rb') as fi: m = mmap.mmap(fi.fileno(), 0, access=mmap.ACCESS_READ) b = bitarray(buffer=m, endian='little') assert len(b) == 8 * filesize assert b.count() == 1 # only one bit is set assert b[-1] == 1 # the last one try: b[0] = 1 # TypeError: cannot modify read-only memory except TypeError: pass assert b[0] == 0 # wasn't changed, still 0 os.unlink(filename) print("OK") bitarray-3.7.1/examples/ndarray.py000066400000000000000000000006161505414144000171700ustar00rootroot00000000000000# # This example illusatrates how binary data can be efficiently be passed # between a bitarray object and an ndarray with dtype bool # import bitarray import numpy # type: ignore a = bitarray.bitarray('100011001001') print(a) # bitarray -> ndarray b = numpy.frombuffer(a.unpack(), dtype=bool) print(repr(b)) # ndarray -> bitarray c = bitarray.bitarray() c.pack(b.tobytes()) assert a == c bitarray-3.7.1/examples/pbm.py000066400000000000000000000041531505414144000163060ustar00rootroot00000000000000from bitarray import bitarray, bits2bytes class PBM: # Portable Bitmap def __init__(self, w = 0, h = 0): self.size = w, h self.update() self.data = bitarray(self.bits, endian='big') def update(self): w, h = self.size self.bytes_per_row = bits2bytes(w) self.bits_per_row = 8 * self.bytes_per_row self.bytes = self.bytes_per_row * h self.bits = 8 * self.bytes def info(self): print('size: %d x %d' % self.size) print('bytes per row: %d' % self.bytes_per_row) print('bits per row: %d' % self.bits_per_row) print('bitarray: ' + repr(self.data.buffer_info())) def clear(self): self.data.setall(0) def save(self, filename): with open(filename, 'wb') as fo: fo.write(b'P4\n') fo.write(b'# This is a partable bitmap (pbm) file.\n') fo.write(('%d %d\n' % self.size).encode()) self.data.tofile(fo) def load(self, filename): with open(filename, 'rb') as fi: assert fi.readline().strip() == b'P4' while True: line = fi.readline() if not line.startswith(b'#'): self.size = tuple(map(int, line.split())) break self.update() self.data = bitarray(endian='big') self.data.fromfile(fi) fi.close() assert self.data.nbytes == self.bytes def address(self, pt): x, y = pt return x + self.bits_per_row * y def __getitem__(self, pt): return self.data[self.address(pt)] def __setitem__(self, pt, val): self.data[self.address(pt)] = val if __name__ == '__main__': # draw picture with straight line from (10, 10) to (390, 390) a = PBM(500, 400) a.info() a.clear() for x in range(10, 391): a[x, x] = True a.save('pic1.ppm') # copy the picture b = PBM() b.load('pic1.ppm') b.save('pic2.ppm') # draw a straight line from (490, 10) to (110, 390) on top for i in range(381): b[490 - i, 10 + i] = 1 b.save('pic3.ppm') bitarray-3.7.1/examples/puff/000077500000000000000000000000001505414144000161135ustar00rootroot00000000000000bitarray-3.7.1/examples/puff/.gitignore000066400000000000000000000000211505414144000200740ustar00rootroot00000000000000build/ *.so *.gz bitarray-3.7.1/examples/puff/Makefile000066400000000000000000000002361505414144000175540ustar00rootroot00000000000000PYTHON=python _puff.so: _puff.c $(PYTHON) setup.py build_ext --inplace test: _puff.so $(PYTHON) test_puff.py clean: rm -rf *.o *.so __pycache__/ build/ bitarray-3.7.1/examples/puff/README.txt000066400000000000000000000014401505414144000176100ustar00rootroot00000000000000Ilan Schnell April, 2022 This is a simple implemention of inflate based on Mark Adler's excellent Puff: https://github.com/madler/zlib/blob/master/contrib/puff/puff.c While writing this, I also found useful: https://github.com/nayuki/Simple-DEFLATE-decompressor/tree/master/python I wrote this to better understand the DEFLATE format, and also to give an example on how to write a Python C extension which makes use of bitarray on the C-level. To try it out (you need to have bitarray installed into your Python 3): $ make test ... $ python gunzip.py ... Files: _puff.c - an object State (similar to the struct state in puff.c) puff.py - a class Puff which inherits from State gunzip.py - a class GunZip which inherits from Puff - a simple CLI bitarray-3.7.1/examples/puff/_puff.c000066400000000000000000000524251505414144000173660ustar00rootroot00000000000000/* Much of the code below is copied and/or derived from Mark Adler's Puff: https://github.com/madler/zlib/blob/master/contrib/puff This is Marks's copyright notice: Copyright (C) 2002-2013 Mark Adler, all rights reserved version 2.3, 21 Jan 2013 This software is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. */ #define PY_SSIZE_T_CLEAN #include "Python.h" #include "bitarray.h" #define MAXBITS 15 /* maximum bits in a code */ #define MAXLCODES 286 /* maximum number of literal/length codes */ #define MAXDCODES 30 /* maximum number of distance codes */ #define MAXCODES (MAXLCODES+MAXDCODES) /* maximum codes lengths to read */ #define FIXLCODES 288 /* number of fixed literal/length codes */ /* input and output state */ typedef struct { PyObject_HEAD /* input */ bitarrayobject *in; /* bitarray we're decoding */ Py_ssize_t incnt; /* current index in bitarray */ /* output */ PyObject *out; /* bytearray output buffer */ Py_ssize_t outcnt; /* bytes written to out so far */ } state_obj; static PyTypeObject state_type; static int read_uint(state_obj *s, int numbits) { long res = 0; int i; if (s->incnt + numbits > s->in->nbits) Py_FatalError("not enough bits in buffer"); for (i = 0; i < numbits; i++) res |= (long) getbit(s->in, s->incnt++) << i; return (int) res; } struct huffman { short *count; /* number of symbols of each length */ short *symbol; /* canonically ordered symbols */ }; static int decode(state_obj *s, const struct huffman *h) { Py_ssize_t nbits = s->in->nbits; int len; /* current number of bits in code */ int code; /* len bits being decoded */ int first; /* first code of length len */ int count; /* number of codes of length len */ int index; /* index of first code of length len in symbol table */ if (s->incnt >= nbits) { PyErr_SetString(PyExc_ValueError, "no more bits to decode"); return -1; } code = first = index = 0; for (len = 1; len <= MAXBITS; len++) { code |= getbit(s->in, s->incnt++); /* get next bit */ count = h->count[len]; if (code - count < first) /* if length len, return symbol */ return h->symbol[index + (code - first)]; index += count; /* else update for next length */ first += count; first <<= 1; code <<= 1; if (s->incnt >= nbits && len != MAXBITS) { PyErr_SetString(PyExc_ValueError, "reached end of bitarray"); return -1; } } PyErr_SetString(PyExc_ValueError, "ran out of codes"); return -1; } /* add a byte to s->out */ static int append_byte(state_obj *s, int byte) { char *cp; if (byte >> 8) { PyErr_Format(PyExc_ValueError, "invalid byte: %d", byte); return -1; } if (PyByteArray_Resize(s->out, s->outcnt + 1) < 0) { PyErr_NoMemory(); return -1; } cp = PyByteArray_AS_STRING(s->out) + s->outcnt; *cp = (char) byte; s->outcnt++; return 0; } /* copy 'len' bytes starting at 'dist' bytes ago in s->out */ static int dist_len_copy(state_obj *s, int dist, int len) { char *str; if (len < 0) { PyErr_SetString(PyExc_ValueError, "length cannot be negative"); return -1; } if (dist <= 0) { PyErr_SetString(PyExc_ValueError, "distance cannot be negative or 0"); return -1; } if (dist > s->outcnt) { PyErr_SetString(PyExc_ValueError, "distance too far back"); return -1; } if (PyByteArray_Resize(s->out, s->outcnt + len) < 0) { PyErr_NoMemory(); return -1; } str = PyByteArray_AS_STRING(s->out); while (len--) { str[s->outcnt] = str[s->outcnt - dist]; s->outcnt++; } return 0; } /* Given the list of code lengths length[0..n-1] representing a canonical Huffman code for n symbols, construct the tables required to decode those codes. */ static int construct(struct huffman *h, const short *length, int n) { int symbol; /* current symbol when stepping through length[] */ int len; /* current length when stepping through h->count[] */ int left; /* number of possible codes left of current length */ short offs[MAXBITS+1]; /* offsets in symbol table for each length */ /* count number of codes of each length */ for (len = 0; len <= MAXBITS; len++) h->count[len] = 0; for (symbol = 0; symbol < n; symbol++) (h->count[length[symbol]])++; /* assumes lengths are within bounds */ if (h->count[0] == n) /* no codes! */ return 0; /* complete, but decode() will fail */ /* check for an over-subscribed or incomplete set of lengths */ left = 1; /* one possible code of zero length */ for (len = 1; len <= MAXBITS; len++) { left <<= 1; /* one more bit, double codes left */ left -= h->count[len]; /* deduct count from possible codes */ if (left < 0) return left; /* over-subscribed--return negative */ } /* left > 0 means incomplete */ /* generate offsets into symbol table for each length for sorting */ offs[1] = 0; for (len = 1; len < MAXBITS; len++) offs[len + 1] = offs[len] + h->count[len]; /* put symbols in table sorted by length, by symbol order within each length */ for (symbol = 0; symbol < n; symbol++) if (length[symbol] != 0) h->symbol[offs[length[symbol]]++] = symbol; /* return zero for complete set, positive for incomplete set */ return left; } /* decode literal/length and distance codes until an end-of-block code */ static int codes(state_obj *s, const struct huffman *lencode, const struct huffman *distcode) { int symbol; /* decoded symbol */ static const short lens[29] = { /* size base for length codes 257..285 */ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258}; static const short lext[29] = { /* extra bits for length codes 257..285 */ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0}; static const short dists[30] = { /* offset base for distance codes 0..29 */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577}; static const short dext[30] = { /* extra bits for distance codes 0..29 */ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; /* decode literals and length/distance pairs */ do { symbol = decode(s, lencode); if (symbol < 0) /* error in decode() */ return -1; if (symbol < 256) { /* literal: symbol is the byte */ /* write out the literal */ if (append_byte(s, symbol) < 0) return -1; } else if (symbol > 256) { int len; /* length for copy */ unsigned dist; /* distance for copy */ /* get and compute length */ symbol -= 257; if (symbol >= 29) { PyErr_Format(PyExc_ValueError, "invalid fixed code: %d", symbol); return -1; } len = lens[symbol] + read_uint(s, lext[symbol]); /* get and check distance */ symbol = decode(s, distcode); if (symbol < 0) /* error in decode() */ return -1; dist = dists[symbol] + read_uint(s, dext[symbol]); /* copy length bytes from distance bytes back */ if (dist_len_copy(s, dist, len) < 0) return -1; } } while (symbol != 256); /* end of block symbol */ /* done with a valid fixed or dynamic block */ return 0; } /* ------------------------ State Python interface ------------------ */ /* set during module init */ static PyTypeObject *bitarray_type; /* create a new initialized canonical Huffman decode iterator object */ static PyObject * state_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *in, *out; state_obj *self; if (!PyArg_ParseTuple(args, "OO:State", &in, &out)) return NULL; if (!PyObject_TypeCheck(in, bitarray_type)) { PyErr_SetString(PyExc_TypeError, "bitarray expected"); return NULL; } if (!PyByteArray_Check(out)) { PyErr_SetString(PyExc_TypeError, "bytearary expected"); return NULL; } self = (state_obj *) type->tp_alloc(type, 0); if (self == NULL) return NULL; Py_INCREF(in); self->in = (bitarrayobject *) in; self->incnt = 0; Py_INCREF(out); self->out = out; self->outcnt = PyByteArray_Size(out); return (PyObject *) self; } /* append one byte to self->out */ static PyObject * state_append_byte(state_obj *self, PyObject *obj) { Py_ssize_t byte; byte = PyNumber_AsSsize_t(obj, NULL); if (byte == -1 && PyErr_Occurred()) return NULL; if (append_byte(self, (int) byte) < 0) return NULL; Py_RETURN_NONE; } /* extend self->out with n bytes from self->in */ static PyObject * state_extend_block(state_obj *self, PyObject *value) { Py_ssize_t nbytes; nbytes = PyNumber_AsSsize_t(value, NULL); if (nbytes == -1 && PyErr_Occurred()) return NULL; if (nbytes >> 16) return PyErr_Format(PyExc_ValueError, "invalid block size: %zd", nbytes); if (self->incnt % 8 != 0) { PyErr_SetString(PyExc_ValueError, "bits not aligned"); return NULL; } if (self->incnt + 8 * nbytes > self->in->nbits) { PyErr_SetString(PyExc_ValueError, "not enough input"); return NULL; } if (PyByteArray_Resize(self->out, self->outcnt + nbytes) < 0) return PyErr_NoMemory(); memcpy(PyByteArray_AS_STRING(self->out) + self->outcnt, self->in->ob_item + self->incnt / 8, (size_t) nbytes); self->incnt += 8 * nbytes; self->outcnt += nbytes; Py_RETURN_NONE; } /* set array[0..n-1] from the n items of the Python sequence */ static int set_lengths(PyObject *sequence, Py_ssize_t n, short *array) { Py_ssize_t i, len; if (!PySequence_Check(sequence)) { PyErr_SetString(PyExc_TypeError, "sequence expected"); return -1; } if (PySequence_Size(sequence) != n) { PyErr_Format(PyExc_ValueError, "sequence of size %zd expected", n); return -1; } for (i = 0; i < n; i++) { PyObject *item = PySequence_GetItem(sequence, i); if (item == NULL) return -1; len = PyNumber_AsSsize_t(item, PyExc_OverflowError); Py_DECREF(item); if (len == -1 && PyErr_Occurred()) return -1; if (len < 0 || len > MAXBITS) { PyErr_Format(PyExc_ValueError, "length cannot be negative or " "larger than %d, got %zd", MAXBITS, len); return -1; } array[i] = (short) len; } return 0; } #define CHECK_MAX(n, maxcodes) \ if (n < 0) \ return PyErr_Format(PyExc_ValueError, \ "size of length list cannot be negative: %zd", n); \ if (n > maxcodes) \ return PyErr_Format(PyExc_ValueError, \ "size of length list too large: %zd > %d", n, maxcodes) /* given the liter/lengths and distance lengths as one big list, decode literal/length and distance codes until an end-of-block code */ static PyObject * state_decode_block(state_obj *self, PyObject *args) { PyObject *sequence; int nlen, ndist; struct huffman lencode, distcode; /* length and distance codes */ short lengths[FIXLCODES + MAXDCODES]; /* descriptor code lengths */ short lencnt[MAXBITS+1], lensym[FIXLCODES]; /* lencode memory */ short distcnt[MAXBITS+1], distsym[MAXDCODES]; /* distcode memory */ int err; /* construct() return value */ if (!PyArg_ParseTuple(args, "Oii:decode_block", &sequence, &nlen, &ndist)) return NULL; /* check arguments and set values in lengths[0..nlen+ndist-1] */ CHECK_MAX(nlen, FIXLCODES); CHECK_MAX(ndist, MAXDCODES); if (set_lengths(sequence, nlen + ndist, lengths) < 0) return NULL; /* build huffman table for literal/length codes */ lencode.count = lencnt; lencode.symbol = lensym; err = construct(&lencode, lengths, nlen); if (err && (err < 0 || nlen != lencode.count[0] + lencode.count[1])) { PyErr_SetString(PyExc_ValueError, "incomplete literal/lengths code"); return NULL; } /* build huffman table for distance codes */ distcode.count = distcnt; distcode.symbol = distsym; err = construct(&distcode, lengths + nlen, ndist); /* Fixed distance codes also have two invalid symbols that should result in an error if received. This can be implemented as an incomplete code, which is why the error is ignored for fixed codes. */ if (nlen != FIXLCODES && err && (err < 0 || ndist != distcode.count[0] + distcode.count[1])) { PyErr_SetString(PyExc_ValueError, "incomplete distance code"); return NULL; } /* decode data until end-of-block code */ if (codes(self, &lencode, &distcode) < 0) return NULL; Py_RETURN_NONE; } /* create a Python list from array[0..n-1] with n elements */ static PyObject * list_from_shorts(const short *array, Py_ssize_t n) { PyObject *list, *item; Py_ssize_t i; list = PyList_New(n); if (list == NULL) return NULL; for (i = 0; i < n; i++) { item = PyLong_FromLong((long) array[i]); if (item == NULL) { Py_DECREF(list); return NULL; } PyList_SET_ITEM(list, i, item); } return list; } /* given the code length code lengths (always 19 of them), decode the liter/lengths and distance lengths into one big list */ static PyObject * state_decode_lengths(state_obj *self, PyObject *args) { PyObject *sequence; int ncode; /* number of lengths in descriptor (nlen + ndist) */ int index; /* index of lengths[] */ int err; /* construct() return value */ short lengths[MAXCODES]; /* descriptor code lengths */ short cnt[MAXBITS+1], sym[19]; /* codelencode memory */ struct huffman codelencode; /* length and distance code length code */ if (!PyArg_ParseTuple(args, "Oi:decode_lengths", &sequence, &ncode)) return NULL; /* check arguments and set lengths[0..18] */ if (set_lengths(sequence, 19, lengths) < 0) return NULL; CHECK_MAX(ncode, MAXCODES); /* build huffman table for code lengths codes (codelencode) */ codelencode.count = cnt; codelencode.symbol = sym; err = construct(&codelencode, lengths, 19); if (err != 0) { PyErr_SetString(PyExc_ValueError, "require complete code"); return NULL; } /* as the coding information from lengths[] is now in codelencode, we can now use lengths[] to write the decoded codelencode into */ /* read length/literal and distance code length tables */ index = 0; while (index < ncode) { int symbol; /* decoded value */ symbol = decode(self, &codelencode); if (symbol < 0) { PyErr_SetString(PyExc_ValueError, "invalid symbol"); return NULL; } if (symbol < 16) /* length in 0..15 */ lengths[index++] = symbol; else { /* repeat instruction */ int len = 0; /* last length to repeat, assume repeating zeros */ int n; /* time to repeat last length */ if (symbol == 16) { /* repeat last length 3..6 times */ if (index == 0) { PyErr_SetString(PyExc_ValueError, "no last length!"); return NULL; } len = lengths[index - 1]; /* last length */ n = 3 + read_uint(self, 2); } else if (symbol == 17) /* repeat zero 3..10 times */ n = 3 + read_uint(self, 3); else /* == 18, repeat zero 11..138 times */ n = 11 + read_uint(self, 7); if (index + n > ncode) { PyErr_SetString(PyExc_ValueError, "too many lengths!"); return NULL; } while (n--) /* repeat last or zero n times */ lengths[index++] = len; } } /* check for end-of-block code -- there better be one! */ if (lengths[256] == 0) { PyErr_SetString(PyExc_ValueError, "no end-of-block code!"); return NULL; } return list_from_shorts(lengths, ncode); } /* copy 'len' bytes starting at 'dist' bytes ago in self->out, if the count 'len' exceeds the distance 'dist, then some of the output data will be a copy of data that was copied earlier in the process */ static PyObject * state_copy(state_obj *self, PyObject *args) { int dist, len; if (!PyArg_ParseTuple(args, "ii:copy", &dist, &len)) return NULL; if (dist_len_copy(self, dist, len) < 0) return NULL; Py_RETURN_NONE; } /* return the value of the bit input counter */ static PyObject * state_get_incnt(state_obj *self) { return PyLong_FromSsize_t(self->incnt); } /* read numbits from the bit input and return them as an integer */ static PyObject * state_read_uint(state_obj *self, PyObject *obj) { Py_ssize_t numbits, res = 0; int i; numbits = PyNumber_AsSsize_t(obj, NULL); if (numbits == -1 && PyErr_Occurred()) return NULL; if (numbits < 0) { PyErr_SetString(PyExc_ValueError, "number of bits cannot be negative"); return NULL; } if (self->incnt + numbits > self->in->nbits) { PyErr_SetString(PyExc_ValueError, "not enough bits in buffer"); return NULL; } for (i = 0; i < numbits; i++) res |= (Py_ssize_t) getbit(self->in, self->incnt++) << i; return PyLong_FromSsize_t(res); } static PyMethodDef state_methods[] = { {"append_byte", (PyCFunction) state_append_byte, METH_O, 0}, {"extend_block", (PyCFunction) state_extend_block, METH_O, 0}, {"decode_block", (PyCFunction) state_decode_block, METH_VARARGS, 0}, {"decode_lengths", (PyCFunction) state_decode_lengths, METH_VARARGS, 0}, {"copy", (PyCFunction) state_copy, METH_VARARGS, 0}, {"get_incnt", (PyCFunction) state_get_incnt, METH_NOARGS, 0}, {"read_uint", (PyCFunction) state_read_uint, METH_O, 0}, {NULL, NULL} /* sentinel */ }; static void state_dealloc(state_obj *self) { Py_TYPE(self)->tp_free((PyObject *) self); } static PyTypeObject state_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "State", .tp_basicsize = sizeof(state_obj), .tp_dealloc = (destructor) state_dealloc, .tp_hash = PyObject_HashNotImplemented, .tp_getattro = PyObject_GenericGetAttr, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, .tp_methods = state_methods, .tp_alloc = PyType_GenericAlloc, .tp_new = state_new, .tp_free = PyObject_Del, }; /* --------------------------------------------------------------------- */ static PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_puff", 0, -1, }; PyMODINIT_FUNC PyInit__puff(void) { PyObject *m; bitarray_type = (PyTypeObject *) bitarray_module_attr("bitarray"); if (bitarray_type == NULL) return NULL; if ((m = PyModule_Create(&moduledef)) == NULL) return NULL; Py_SET_TYPE(&state_type, &PyType_Type); Py_INCREF((PyObject *) &state_type); PyModule_AddObject(m, "State", (PyObject *) &state_type); PyModule_AddObject(m, "MAXLCODES", PyLong_FromSsize_t(MAXLCODES)); PyModule_AddObject(m, "MAXDCODES", PyLong_FromSsize_t(MAXDCODES)); PyModule_AddObject(m, "FIXLCODES", PyLong_FromSsize_t(FIXLCODES)); return m; } bitarray-3.7.1/examples/puff/gunzip.py000066400000000000000000000111451505414144000200030ustar00rootroot00000000000000import sys import datetime import zlib from pprint import pprint from bitarray import bitarray from puff import Puff class GunZip(Puff): operating_system = { 0: "FAT", 1: "Amiga", 2: "VMS", 3: "Unix", 4: "VM/CMS", 5: "Atari TOS", 6: "HPFS", 7: "Macintosh", 8: "Z-System", 9: "CP/M", 10: "TOPS-20", 11: "NTFS", 12: "QDOS", 13: "Acorn RISCOS", 255: "Unknown", } def read_nul_terminated_string(self) -> str: a = bytearray() while True: b: int = self.read_uint(8) if b == 0: return a.decode("UTF-8") a.append(b) def read_header(self, verbose=False) -> None: def vprint(txt): if verbose: print(txt) if self.read_uint(16) != 0x8b1f: raise ValueError("Invalid GZIP magic number") cmeth = self.read_uint(8) if cmeth != 8: raise ValueError(f"Unsupported compression method: {str(cmeth)}") # reserved flags flags: int = self.read_uint(8) if flags & 0xe0 != 0: vprint("Reserved flags are set") # modification time mtime = self.read_uint(32) if mtime != 0: dt = datetime.datetime.fromtimestamp(mtime, datetime.timezone.utc) vprint(f"Last modified: {dt}") else: vprint("Last modified: N/A") # extra flags extraflags = self.read_uint(8) if extraflags == 2: vprint("Extra flags: Maximum compression") elif extraflags == 4: vprint("Extra flags: Fastest compression") else: vprint(f"Extra flags: Unknown ({extraflags})") osbyte = self.read_uint(8) osstr: str = self.operating_system.get(osbyte, "Really unknown") vprint(f"Operating system: {osstr}") # handle assorted flags if flags & 0x01: vprint("Flag: Text") if flags & 0x04: vprint("Flag: Extra") count: int = self.read_uint(16) while count > 0: # Skip extra data self.read_uint(8) count -= 1 if flags & 0x08: vprint(f"File name: {self.read_nul_terminated_string()}") if flags & 0x02: vprint(f"Header CRC-16: {self.read_uint(16):04X}") if flags & 0x10: vprint(f"Comment: {self.read_nul_terminated_string()}") def check_footer(self, decomp): self.align_byte_boundary() crc = self.read_uint(32) size = self.read_uint(32) # check decompressed data's length and CRC if size != len(decomp): raise ValueError(f"Size mismatch: expected={size}, " f"actual={len(decomp)}") actualcrc = zlib.crc32(decomp) & 0xffffffff if crc != actualcrc: raise ValueError(f"CRC-32 mismatch: expected={crc:08X}, " f"actual={actualcrc:08X}") def print_dot(*args): sys.stdout.write('.') sys.stdout.flush() def decompress_file(infile, outfile, opts): # read input file and store content in little endian bitarray input_bits = bitarray(0, 'little') with open(infile, "rb") as fi: input_bits.fromfile(fi) # gunzip: the output is accumulated in a bytearray output = bytearray() d = GunZip(input_bits, output) d.read_header(verbose=opts.verbose) stats = d.process_blocks(print_dot if opts.progress else None) d.check_footer(output) if opts.progress: sys.stdout.write('\n') if opts.stats: pprint(stats) # write output to file with open(outfile, "wb") as fo: fo.write(output) def main(): from argparse import ArgumentParser p = ArgumentParser() p.add_argument('-p', '--progress', action="store_true", help="show progress while decoding") p.add_argument('-s', '--stats', action="store_true", help="show block statistics") p.add_argument('-v', '--verbose', action="store_true") p.add_argument('-o', '--out', action="store", dest='dst', help='output filename') p.add_argument(dest='src', metavar='SRC') args = p.parse_args() if args.dst is None: if args.src.endswith('.gz'): args.dst = args.src[:-3] elif args.src.endswith('.tgz'): args.dst = '%s.tar' % args.src[:-4] else: p.error('cannot guess uncompressed filename from %r, ' 'please provide -o/-out option' % args.src) decompress_file(args.src, args.dst, args) if __name__ == "__main__": main() bitarray-3.7.1/examples/puff/puff.py000066400000000000000000000054431505414144000174330ustar00rootroot00000000000000from collections import Counter, defaultdict from _puff import State, MAXLCODES, MAXDCODES, FIXLCODES # fixed literal/lengths and distance lengths FIXED_LENGTHS = tuple( # literal/length lengths (FIXLCODES elements) [8] * 144 + [9] * 112 + [7] * 24 + [8] * 8 + # distance lengths (MAXDCODES elements) [5] * MAXDCODES ) class Puff(State): def process_blocks(self, callback=None): self.stats = defaultdict(Counter) while True: # read the three bit block header last = self.read_uint(1) # 1 if last block btype = self.read_uint(2) # block type self.stats['btype'][btype] += 1 if btype == 0: # process stored block self.process_stored_block() elif btype == 1: # process fixed block self.decode_block(FIXED_LENGTHS, FIXLCODES, MAXDCODES) elif btype == 2: # process dynamic block self.process_dynamic_block() else: raise ValueError("Reserved block type: %d" % btype) if callback: callback(self.stats) if last: break return self.stats def process_stored_block(self): # uncompressed block self.align_byte_boundary() # read length blen: int = self.read_uint(16) nlen: int = self.read_uint(16) if blen ^ 0xffff != nlen: raise ValueError("Invalid length in uncompressed block") self.stats['stored blen'][blen] += 1 # copy bytes self.extend_block(blen) def process_dynamic_block(self) -> None: # permutation of code length codes order = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] # get number of lengths in each table, check lengths nlen = self.read_uint(5) + 257 ndist = self.read_uint(5) + 1 ncode = self.read_uint(4) + 4 if nlen > MAXLCODES or ndist > MAXDCODES: raise ValueError("bad counts") self.stats['dynamic nlen'][nlen] += 1 self.stats['dynamic ndist'][ndist] += 1 self.stats['dynamic ncode'][ncode] += 1 # read code length code lengths (really), missing lengths are zero lengths = 19 * [0] for index in range(ncode): lengths[order[index]] = self.read_uint(3) # decode literal/lengths and distance lengths lengths = self.decode_lengths(lengths, nlen + ndist) # decode actual block data self.decode_block(lengths, nlen, ndist) def align_byte_boundary(self): # discard bits to align to byte boundary skip = 8 - self.get_incnt() % 8 # bits to skip if skip != 8: self.read_uint(skip) bitarray-3.7.1/examples/puff/setup.py000066400000000000000000000005231505414144000176250ustar00rootroot00000000000000from os.path import dirname try: from setuptools import setup, Extension except ImportError: from distutils.core import setup, Extension import bitarray setup( name = "puff", ext_modules = [Extension( name = "_puff", sources = ["_puff.c"], include_dirs = [dirname(bitarray.__file__)], )], ) bitarray-3.7.1/examples/puff/test_puff.py000066400000000000000000000227101505414144000204660ustar00rootroot00000000000000import os import sys import zlib import unittest from bitarray import bitarray from puff import State, Puff, FIXLCODES, MAXDCODES, FIXED_LENGTHS class TestState(unittest.TestCase): def test_simple(self): a = bitarray(80) b = bytearray() s = State(a, b) self.assertEqual(s.get_incnt(), 0) self.assertEqual(len(b), 0) s.extend_block(4) self.assertEqual(s.get_incnt(), 32) self.assertEqual(len(b), 4) a[32:35] = bitarray('011') self.assertEqual(s.read_uint(3), 6) self.assertEqual(s.get_incnt(), 35) def test_read_uint(self): # works for either bit-endianness inp = bitarray('11011100 1') out = bytearray() s = State(inp, out) self.assertRaises(ValueError, s.read_uint, -1) # negative bits self.assertEqual(s.read_uint(0), 0) # reading zero bits is OK self.assertEqual(s.read_uint(3), 3) self.assertEqual(s.read_uint(5), 7) self.assertEqual(s.get_incnt(), 8) self.assertRaises(ValueError, s.read_uint, 2) # end of input self.assertEqual(s.read_uint(1), 1) self.assertEqual(s.read_uint(0), 0) self.assertEqual(s.get_incnt(), 9) self.assertRaises(ValueError, s.read_uint, 1) # end of input self.assertEqual(len(out), 0) # nothing in output def test_read_uint32(self): a = bitarray(b'\x7e\xae\xd4\xbb', endian='little') s = State(a, bytearray()) self.assertEqual(s.read_uint(32), 0xbbd4ae7e) self.assertEqual(s.get_incnt(), 32) a = bitarray(32 * '1', endian='little') s = State(a, bytearray()) self.assertEqual(s.read_uint(32), (1 << 32) - 1) self.assertEqual(s.get_incnt(), 32) def test_copy(self): a = bitarray() # nothing is read from input in this test out = bytearray(b'ABC') s = State(a, out) s.copy(3, 2) self.assertEqual(bytes(out), b'ABCAB') self.assertRaises(ValueError, s.copy, 6, 1) # distance too far back s.copy(5, 10) s.copy(6, 0) # does nothing (length is zero) self.assertEqual(bytes(out), b'ABCABABCABABCAB') self.assertRaises(ValueError, s.copy, 0, 1) # distance zero self.assertRaises(ValueError, s.copy, -1, 1) # distance negative self.assertRaises(ValueError, s.copy, 16, 1) # distance too far back self.assertRaises(ValueError, s.copy, 1, -1) # length negative def test_append_byte(self): out = bytearray() s = State(bitarray(), out) s.append_byte(0) self.assertRaises(ValueError, s.append_byte, -1) self.assertRaises(ValueError, s.append_byte, 256) s.append_byte(255) self.assertEqual(bytes(out), b'\0\xff') def test_extend_block(self): a = bitarray(b'ABCDEF') b = bytearray() s = State(a, b) s.extend_block(0) self.assertEqual(bytes(b), b'') s.extend_block(2) self.assertEqual(bytes(b), b'AB') self.assertRaises(ValueError, s.extend_block, 5) # not enough input s.extend_block(1) self.assertEqual(bytes(b), b'ABC') self.assertEqual(s.get_incnt(), 24) s.read_uint(3) self.assertRaises(ValueError, s.extend_block, 1) # input unaligned s.read_uint(5) s.extend_block(2) self.assertEqual(bytes(b), b'ABCEF') # we skipped 'D' # invalid block size self.assertRaises(ValueError, s.extend_block, -1) self.assertRaises(ValueError, s.extend_block, 0x10000) def test_decode_lengths(self): # this is taken from the stream of dynamic header bits - after nlen, # ndist, ncode and the (up to 19) code length code lengths are read a = bitarray(''' 11001100 00001100 00011101 11011101 11000011 00000111 00001000 00101100 00011100 10000011 01100000 11101110 11101011 00000111 01011000 00111011 10111100 00000010 00011000 00111011 10111010 01000010 00001110 11100001 00000111 01110000 10010010 00001001 10000011 10110100 00100001 10000011 10111011 10101100 00011101 11011100 00100111 10111111 11011001 11011110 11111011 01010111 10110100 11111010 11101010 11101010 10101110 11110100 01011110 10001110 01010101 11010101 01011111 11010111 1100 ''') ncode = 279 + 23 b = bytearray() s = State(a, b) length = s.decode_lengths([4, 0, 6, 5, 4, 0, 0, 4, 2, 3, 6, 0, 5, 5, 0, 0, 2, 4, 0], ncode) # no bytes were added to the output stream self.assertEqual(len(b), 0) # the code lengths list contains literal/lengths and distance codes self.assertEqual(len(length), ncode) # we've exhausted the input array exactly self.assertEqual(s.get_incnt(), len(a)) # simple sum check, as I didn't want to cut and paste the whole list self.assertEqual(sum(length), 2183) def test_decode_lengths_error(self): a = bitarray(1000) b = bytearray() s = State(a, b) lengths = 19 * [0] # nlen > 316 (MAXCODES) self.assertRaises(ValueError, s.decode_lengths, lengths, 317) # sequence length not 19 self.assertRaises(ValueError, s.decode_lengths, 20 * [0], 316) lengths[1] = 16 # length[1] > MAXBITS self.assertRaises(ValueError, s.decode_lengths, lengths, 316) def test_decode_block_error(self): a = bitarray(1000) b = bytearray() s = State(a, b) lengths = 302 * [0] # nlen > 288 (FIXLCODES) self.assertRaises(ValueError, s.decode_block, lengths, 289, 23) # ndist > 30 (MAXDCODES) self.assertRaises(ValueError, s.decode_block, lengths, 279, 31) # sequence length not 279 + 23 = 302 self.assertRaises(ValueError, s.decode_block, 301 * [0], 279, 23) lengths[1] = 16 # length[1] > MAXBITS self.assertRaises(ValueError, s.decode_block, lengths, 279, 23) class TestFixedBlock(unittest.TestCase): @staticmethod def decode(a, buffer=b''): res = bytearray(buffer) s = State(a, res) s.decode_block(FIXED_LENGTHS, FIXLCODES, MAXDCODES) return bytes(res) def test_literal(self): a = bitarray('01111001 10011100 10010001 10011110 0000000') # I l a n end-of-block self.assertEqual(self.decode(a), b"Ilan") def test_rle(self): a = bitarray('01110001 0000001 00000 0000000') # A len=3 dist=1 end-of-block self.assertEqual(self.decode(a), b"AAAA") def test_rle_258(self): a = bitarray('01110001 11000101 00000 0000000') # A len=258 dist=1 end-of-block self.assertEqual(self.decode(a), 259 * b"A") a = bitarray('01110010 11000100 11111 00000 0000000') # B len=227 31 dist=1 end-of-block # here len = 227 + 31 = 258, same as before self.assertEqual(self.decode(a), 259 * b"B") def test_max_back(self): a = bitarray('0000001 11101 1111111111111 0000000') # len=3 dist=24577 + 8191 buffer = b'ABCD' + 32764 * b'-' self.assertEqual(len(buffer), 1 << 15) out = self.decode(a, buffer) self.assertEqual(out, buffer + b'ABC') def test_too_far_back(self): a = bitarray('01110001 0000001 00001 0000000') # A len=3 dist=2 end-of-block self.assertRaises(ValueError, self.decode, a) def test_invalid_length_symbols(self): a = bitarray('11000110') # symbol 286 self.assertRaises(ValueError, self.decode, a) a = bitarray('11000111') # symbol 287 self.assertRaises(ValueError, self.decode, a) class TestPuff(unittest.TestCase): def test_constants(self): self.assertEqual(len(FIXED_LENGTHS), FIXLCODES + MAXDCODES) def test_align_byte_boundary(self): a = bitarray(15) d = Puff(a, bytearray()) d.read_uint(5) d.align_byte_boundary() self.assertEqual(d.get_incnt(), 8) d.align_byte_boundary() self.assertEqual(d.get_incnt(), 8) d.read_uint(1) self.assertRaises(ValueError, d.align_byte_boundary) def round_trip(self, data, level=-1): compressed = zlib.compress(data, level=level) a = bitarray(compressed, 'little') out = bytearray() p = Puff(a, out) # check zlib header self.assertEqual(p.read_uint(8), 0x78) self.assertTrue(p.read_uint(8) in (0x01, # no compression 0x5e, # low compression 0x9c, # default compression 0xda)) # best compression p.process_blocks() self.assertEqual(bytes(out), data) def test_zeros(self): for n in 0, 1, 10, 100, 1000, 10_000: self.round_trip(n * b'\0') def test_this_file(self): with open(__file__, 'rb') as f: data = f.read() for level in range(1, 10): self.round_trip(data, level) @unittest.skipIf(sys.platform == "win32", "Windows doesn't have this file") def test_words(self): with open('/usr/share/dict/words', 'rb') as f: data = f.read() self.round_trip(data) def test_random_data(self): self.round_trip(os.urandom(2000)) if __name__ == '__main__': unittest.main() bitarray-3.7.1/examples/sieve.py000066400000000000000000000030431505414144000166400ustar00rootroot00000000000000""" Demonstrates the implementation of "Sieve of Eratosthenes" algorithm for finding all prime numbers up to any given limit. It should be noted that bitarray version 3.7 added util.gen_primes(). The library function basically uses an optimized version of the same algorithm. """ from math import isqrt from bitarray import bitarray from bitarray.util import ones, count_n, sum_indices N = 100_000_000 # Each bit a[i] corresponds to whether or not i is a prime a = ones(N) # Zero and one are not prime a[:2] = False # Perform sieve for i in range(2, isqrt(N) + 1): if a[i]: # i is prime, so all multiples are not a[i*i::i] = False stop = 50 print('primes up to %d are:' % stop) print(list(a.search(1, 0, stop))) # There are 5,761,455 primes up to 100 million x = a.count() print('there are {:,d} primes up to {:,d}'.format(x, N)) assert x == 5_761_455 or N != 100_000_000 # The number of twin primes up to 100 million is 440,312 # we need to add 1 as .count() only counts non-overlapping sub_bitarrays # and (3, 5) as well as (5, 7) are both twin primes x = a.count(bitarray('101')) + 1 print('number of twin primes up to {:,d} is {:,d}'.format(N, x)) assert x == 440_312 or N != 100_000_000 # The 1 millionth prime number is 15,485,863 m = 1_000_000 x = count_n(a, m) - 1 print('the {:,d}-th prime is {:,d}'.format(m, x)) assert x == 15_485_863 # The sum of all prime numbers below one million is 37,550,402,023. m = 1_000_000 x = sum_indices(a[:m]) print('the sum of prime numbers below {:,d} is {:,d}'.format(m, x)) assert x == 37_550_402_023 bitarray-3.7.1/examples/smallints.py000066400000000000000000000024221505414144000175330ustar00rootroot00000000000000from bitarray import bitarray from bitarray.util import int2ba, ba2int, pprint class SmallIntArray(object): """ Class which allows efficiently storing an array of integers represented by a specified number of bits. For example, an array with 1000 5 bit integers can be created, allowing each element in the array to take values form 0 to 31, while the size of the object is 625 (5000/8) bytes. """ def __init__(self, N, k): self.N = N # number of integers self.k = k # bits for each integer self.array = bitarray(N * k) def slice_i(self, i): assert 0 <= i < self.N return slice(self.k * i, self.k * (i + 1)) def __getitem__(self, i): return ba2int(self.array[self.slice_i(i)]) def __setitem__(self, i, v): self.array[self.slice_i(i)] = int2ba(v, self.k) if __name__ == '__main__': from random import randrange # define array with 1000 integers, each represented by 5 bits a = SmallIntArray(1000, 5) b = [] # store values, for assertion below for i in range(1000): v = randrange(32) b.append(v) a[i] = v print(b[:5]) print(a.array.buffer_info()) pprint(a.array[:25], group=5) for i in range(1000): assert a[i] == b[i] bitarray-3.7.1/examples/sparse/000077500000000000000000000000001505414144000164505ustar00rootroot00000000000000bitarray-3.7.1/examples/sparse/Makefile000066400000000000000000000001631505414144000201100ustar00rootroot00000000000000test: python tests.py - python tests.py flips python tests.py ones clean: rm -f *.pyc *~ rm -rf __pycache__ bitarray-3.7.1/examples/sparse/README.txt000066400000000000000000000007751505414144000201570ustar00rootroot00000000000000Ilan Schnell January, 2023 Here are two different implementations of sparse bitarrays. Makefile: Run: $ make test common.py Common functionally used by both the flips and ones implementation. flips.py The bitarray is represented by a list of positions at which a bit changes from 1 to 0 or vice versa. ones.py: The bitarray is represented by a (sorted) list containing the position of 1 bits (as well as the length of the array). tests.py Tests for both implementations bitarray-3.7.1/examples/sparse/common.py000066400000000000000000000021441505414144000203130ustar00rootroot00000000000000class Common: def __repr__(self): return "SparseBitarray('%s')" % (''.join(str(v) for v in self)) def pop(self, i = -1): if i < 0: i += len(self) res = self[i] del self[i] return res def remove(self, value): i = self.find(value) if i < 0: raise ValueError del self[i] def sort(self, reverse=False): if reverse: c1 = self.count(1) self[:c1:] = 1 self[c1::] = 0 else: c0 = self.count(0) self[:c0:] = 0 self[c0::] = 1 def _get_start_stop(self, key): if key.step not in (1, None): raise ValueError("only step = 1 allowed, got %r" % key) start = key.start if start is None: start = 0 stop = key.stop if stop is None: stop = len(self) return start, stop def _adjust_index(self, i): n = len(self) if i < 0: i += n if i < 0: i = 0 elif i > n: i = n return i bitarray-3.7.1/examples/sparse/flips.py000066400000000000000000000125571505414144000201510ustar00rootroot00000000000000""" Implementation of a sparse bitarray Internally we store a list of positions at which a bit changes from 1 to 0 or vice versa. Moreover, we start with bit 0, meaning that if the first bit in the bitarray is 1 our list starts with posistion 0. For example: bitarray('110011111000') is represented as: flips: [0, 2, 4, 9, 12] The last element in the list is always the length of the bitarray, such that an empty bitarray is represented as [0]. """ from bisect import bisect, bisect_left from bitarray import bitarray from common import Common class SparseBitarray(Common): def __init__(self, x = 0): if isinstance(x, int): self.flips = [x] # bitarray with x zeros else: self.flips = [0] for v in x: self.append(int(v)) def __len__(self): return self.flips[-1] def __getitem__(self, key): if isinstance(key, slice): start, stop = self._get_start_stop(key) if stop <= start: return SparseBitarray() i = bisect(self.flips, start) j = bisect_left(self.flips, stop) res = SparseBitarray() res.flips = [0] if i % 2 else [] for k in range(i, j): res.flips.append(self.flips[k] - start) res.flips.append(stop - start) return res elif isinstance(key, int): if not 0 <= key < len(self): raise IndexError return bisect(self.flips, key) % 2 else: raise TypeError def __setitem__(self, key, value): if isinstance(key, slice): start, stop = self._get_start_stop(key) if stop <= start: return i = bisect(self.flips, start) j = bisect_left(self.flips, stop) self.flips[i:j] = ( ([] if i % 2 == value else [start]) + ([] if j % 2 == value else [stop]) ) elif isinstance(key, int): if not 0 <= key < len(self): raise IndexError p = bisect(self.flips, key) if p % 2 == value: return self.flips[p:p] = [key, key + 1] else: raise TypeError self._reduce() def __delitem__(self, key): if isinstance(key, slice): start, stop = self._get_start_stop(key) if stop <= start: return i = bisect(self.flips, start) j = bisect_left(self.flips, stop) size = stop - start for k in range(j, len(self.flips)): self.flips[k] -= size self.flips[i:j] = [start] if (j - i) % 2 else [] elif isinstance(key, int): if not 0 <= key < len(self): raise IndexError p = bisect(self.flips, key) for j in range(p, len(self.flips)): self.flips[j] -= 1 else: raise TypeError self._reduce() def _reduce(self): n = self.flips[-1] # length of bitarray lst = [] # new representation list i = 0 while True: c = self.flips[i] # current element (at index i) if c == n: # element with bitarray length reached break j = i + 1 # find next value (at index j) while self.flips[j] == c: j += 1 if (j - i) % 2: # only append index if repeated odd times lst.append(c) i = j lst.append(n) self.flips = lst def _intervals(self): v = 0 start = 0 for stop in self.flips: yield v, start, stop v = 1 - v start = stop def append(self, value): if value == len(self.flips) % 2: # opposite value as last element self.flips.append(len(self) + 1) else: # same value as last element self.flips[-1] += 1 def extend(self, other): n = len(self) m = len(other.flips) if len(self.flips) % 2: self.flips.append(n) for i in range(m): self.flips.append(other.flips[i] + n) self._reduce() def find(self, value): if len(self) == 0: return -1 flips = self.flips if value: return -1 if len(flips) == 1 else flips[0] else: if flips[0] > 0: return 0 return -1 if len(flips) == 2 else flips[1] def to_bitarray(self): a = bitarray(len(self)) for v, start, stop in self._intervals(): a[start:stop] = v return a def invert(self): self.flips.insert(0, 0) self._reduce() def insert(self, i, value): i = self._adjust_index(i) p = bisect_left(self.flips, i) for j in range(p, len(self.flips)): self.flips[j] += 1 self[i] = value def count(self, value=1): cnt = 0 for v, start, stop in self._intervals(): if v == value: cnt += stop - start return cnt def reverse(self): n = len(self) lst = [0] if len(self.flips) % 2 else [] lst.extend(n - p for p in reversed(self.flips)) lst.append(n) self.flips = lst self._reduce() bitarray-3.7.1/examples/sparse/ones.py000066400000000000000000000102411505414144000177640ustar00rootroot00000000000000""" Implementation of a sparse bitarray For example: bitarray('110011111000') is represented as: length: 11 ones: [0, 1, 4, 5, 6, 7, 8] """ from bisect import bisect_left from bitarray import bitarray from common import Common class SparseBitarray(Common): def __init__(self, x = 0): if isinstance(x, int): self.n = x self.ones = [] else: self.n = 0 self.ones = [] for v in x: self.append(int(v)) def __len__(self): return self.n def __getitem__(self, key): if isinstance(key, slice): start, stop = self._get_start_stop(key) if stop <= start: return SparseBitarray() i = bisect_left(self.ones, start) j = bisect_left(self.ones, stop) res = SparseBitarray(stop - start) for k in range(i, j): res.ones.append(self.ones[k] - start) return res elif isinstance(key, int): if not 0 <= key < self.n: raise IndexError i = bisect_left(self.ones, key) return int(i != len(self.ones) and self.ones[i] == key) else: raise TypeError def __setitem__(self, key, value): if isinstance(key, slice): start, stop = self._get_start_stop(key) if stop <= start: return i = bisect_left(self.ones, start) j = bisect_left(self.ones, stop) del self.ones[i:j] if value == 0: return self.ones.extend((range(start, stop))) self.ones.sort() elif isinstance(key, int): if not 0 <= key < self.n: raise IndexError i = bisect_left(self.ones, key) if i != len(self.ones) and self.ones[i] == key: # key present if value == 0: del self.ones[i] else: # key not present if value == 1: self.ones.insert(i, key) else: raise TypeError def __delitem__(self, key): if isinstance(key, slice): start, stop = self._get_start_stop(key) if stop <= start: return i = bisect_left(self.ones, start) j = bisect_left(self.ones, stop) del self.ones[i:j] size = stop - start for k in range(i, len(self.ones)): self.ones[k] -= size self.n -= size elif isinstance(key, int): if not 0 <= key < len(self): raise IndexError i = bisect_left(self.ones, key) if i != len(self.ones) and self.ones[i] == key: del self.ones[i] for k in range(i, len(self.ones)): self.ones[k] -= 1 self.n -= 1 else: raise TypeError def append(self, value): if value: self.ones.append(self.n) self.n += 1 def find(self, value): ones = self.ones if value: return ones[0] if ones else -1 else: m = len(ones) if m == self.n: return -1 for i in range(m): if ones[i] != i: return i return m def extend(self, other): self.ones.extend(other.ones[i] + self.n for i in range(len(other.ones))) self.n += other.n def to_bitarray(self): a = bitarray(self.n) a.setall(0) a[self.ones] = 1 return a def insert(self, k, value): k = self._adjust_index(k) i = bisect_left(self.ones, k) for j in range(i, len(self.ones)): self.ones[j] += 1 self.n += 1 self[k] = value def invert(self): self.ones = sorted(set(range(self.n)) - set(self.ones)) def count(self, value=1): if value: return len(self.ones) else: return self.n - len(self.ones) def reverse(self): lst = [self.n - i - 1 for i in self.ones] lst.reverse() self.ones = lst bitarray-3.7.1/examples/sparse/tests.py000066400000000000000000000147401505414144000201720ustar00rootroot00000000000000import sys import unittest from itertools import pairwise from random import getrandbits, randint, randrange from bitarray import bitarray from bitarray.util import intervals from bitarray.test_bitarray import Util if len(sys.argv) != 2 or sys.argv[1] not in ('flips', 'ones', '-'): sys.exit("Argument 'flips' or 'ones' expected.") MODE = sys.argv[1] del sys.argv[1] class TestsSparse(unittest.TestCase, Util): def check(self, s, a): if MODE == 'flips': self.assertTrue(len(s.flips) > 0) self.assertTrue(s.flips[0] >= 0) for x, y in pairwise(s.flips): self.assertTrue(y > x) self.assertEqual(s.to_bitarray(), a) elif MODE == 'ones': if s.ones: self.assertTrue(s.ones[-1] < s.n) for x, y in pairwise(s.ones): self.assertTrue(y > x) self.assertEqual(s.to_bitarray(), a) else: self.assertEqual(s, a) def test_init(self): if MODE != '-': for n in 0, 1, 2, 3, 99: a = bitarray(n) a.setall(0) t = BitArray(n) self.check(t, a) for s in '', '0', '1', '01110001': a = bitarray(s) t = BitArray(s) self.check(t, a) def test_repr(self): s = BitArray('01001') if MODE != '-': self.assertEqual(repr(s), "SparseBitarray('01001')") def test_len(self): for a in self.randombitarrays(): s = BitArray(a) self.assertEqual(len(s), len(a)) def test_getitem_index(self): for a in self.randombitarrays(start=1): s = BitArray(a) for i in range(len(a)): self.assertEqual(s[i], a[i]) def test_getitem_slice(self): for a in self.randombitarrays(): s = BitArray(a) i = randint(0, len(s)) j = randint(0, len(s)) self.check(s[i:j], a[i:j]) def test_setitem_index(self): for a in self.randombitarrays(start=1): s = BitArray(a) for _ in range(10): i = randrange(len(s)) v = getrandbits(1) s[i] = a[i] = v self.check(s, a) def test_setitem_slice(self): for a in self.randombitarrays(): s = BitArray(a) for _ in range(10): i = randint(0, len(s)) j = randint(0, len(s)) v = getrandbits(1) s[i:j] = a[i:j] = v self.check(s, a) def test_delitem_index(self): for a in self.randombitarrays(start=1): s = BitArray(a) i = randrange(len(s)) del s[i] del a[i] self.check(s, a) def test_delitem_slice(self): for a in self.randombitarrays(): s = BitArray(a) i = randint(0, len(s)) j = randint(0, len(s)) del s[i:j] del a[i:j] self.check(s, a) def test_append(self): for a in self.randombitarrays(): s = BitArray() for v in a: s.append(v) self.check(s, a) def test_find(self): for a in self.randombitarrays(): s = BitArray(a) for v in 0, 1: self.assertEqual(s.find(v), a.find(v)) def test_extent(self): for aa in self.randombitarrays(): for b in self.randombitarrays(): a = aa.copy() s = BitArray(a) t = BitArray(b) s.extend(t) a.extend(b) self.check(s, a) s = BitArray(aa) s.extend(s) self.check(s, 2 * aa) def test_count(self): for a in self.randombitarrays(): s = BitArray(a) for v in 0, 1: self.assertEqual(s.count(v), a.count(v)) def test_insert(self): for a in self.randombitarrays(): s = BitArray(a) i = randint(-2, len(s) + 2) v = getrandbits(1) s.insert(i, v) a.insert(i, v) self.check(s, a) def test_invert(self): for a in self.randombitarrays(): s = BitArray(a) s.invert() a.invert() self.check(s, a) def test_pop(self): for a in self.randombitarrays(start=1): s = BitArray(a) i = randrange(-len(a), len(a)) self.assertEqual(s.pop(i), a.pop(i)) self.check(s, a) def test_remove(self): for a in self.randombitarrays(): s = BitArray(a) v = getrandbits(1) error = 0 try: s.remove(v) except ValueError: error += 1 try: a.remove(v) except ValueError: error += 1 self.assertTrue(error % 2 == 0) self.check(s, a) def test_reverse(self): for a in self.randombitarrays(): s = BitArray(a) s.reverse() a.reverse() self.check(s, a) def test_sort(self): for a in self.randombitarrays(): s = BitArray(a) for rev in 0, 1: s.sort(rev) a.sort(rev) self.check(s, a) if MODE == 'flips': def test_flips(self): for a in self.randombitarrays(): lst = [] if a and a[0] == 0 else [0] lst.extend(t[2] for t in intervals(a)) s = BitArray(a) self.assertEqual(s.flips, lst) def test_reduce(self): for a, b in [ ([0], [0]), ([0, 0], [0]), ([3, 7], [3, 7]), ([3, 7, 7], [3, 7]), ([3, 3, 7, 7, 7], [7]), ([3, 3, 3, 7, 7], [3, 7]), ([0, 0, 2, 2], [2]), ([0, 2, 2, 2, 2, 3], [0, 3]), ([0, 0, 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 5], [0, 2, 3, 5]), ]: s = BitArray() s.flips = a s._reduce() self.assertEqual(s.flips, b) if __name__ == '__main__': if MODE == '-': BitArray = bitarray else: BitArray = __import__(MODE).SparseBitarray # type: ignore unittest.main() bitarray-3.7.1/examples/utf-8.py000066400000000000000000000021341505414144000164700ustar00rootroot00000000000000from bitarray import bitarray from bitarray.util import ba2int, pprint # See: https://en.wikipedia.org/wiki/UTF-8 def code_point(u): print('character:', u) b = u.encode('utf-8') print('hexadecimal:', ' '.join('%02x' % i for i in b)) a = bitarray(b, endian='big') pprint(a) # calculate binary code point from binary UTF-8 representation if a[0:1] == bitarray('0'): c = a[1:8] assert len(a) == 8 elif a[0:3] == bitarray('110'): c = a[3:8] + a[10:16] assert a[8:10] == bitarray('10') assert len(a) == 16 elif a[0:4] == bitarray('1110'): c = a[4:8] + a[10:16] + a[18:24] assert a[8:10] == a[16:18] == bitarray('10') assert len(a) == 24 elif a[0:5] == bitarray('11110'): c = a[5:8] + a[10:16] + a[18:24] + a[26:32] assert a[8:10] == a[16:18] == a[24:26] == bitarray('10') assert len(a) == 32 else: raise code_point = ba2int(c) print('code point:', hex(code_point)) print() for u in u'\u0024 \u00a2 \u20ac \ud55c \U00010348 \U0010ffff'.split(): code_point(u) bitarray-3.7.1/pyproject.toml000066400000000000000000000001541505414144000162510ustar00rootroot00000000000000# pyproject.toml [build-system] requires = ["setuptools >= 42.0.0"] build-backend = "setuptools.build_meta" bitarray-3.7.1/pytest.ini000066400000000000000000000007361505414144000153740ustar00rootroot00000000000000# This file merely exists to allow running the tests using pytest without # additional options. That is (from the source tree): # # $ pytest # # The bitarray tests themselves do not depend on pytest, and the recommended # way to run the tests is (from the source tree on in an installed system): # # $ python -c 'import bitarray; assert bitarray.test().wasSuccessful()' # # Unlike just running pytest, this will provide useful debug information. [pytest] testpaths = bitarray bitarray-3.7.1/setup.py000066400000000000000000000047261505414144000150600ustar00rootroot00000000000000import re import sys import platform if sys.version_info[:3] < (3, 6, 1): sys.exit("""\ **************************************************************************** * bitarray requires Python 3.6.1 or later. * The last version supporting Python 2 is bitarray 2.9.3. **************************************************************************** """) if "test" in sys.argv: import bitarray # when test was successful, return 0 (hence not) sys.exit(not bitarray.test().wasSuccessful()) try: from setuptools import setup, Extension except ImportError: from distutils.core import setup, Extension kwds = {} kwds['long_description'] = open('README.rst').read() # Read version from bitarray/bitarray.h pat = re.compile(r'#define\s+BITARRAY_VERSION\s+"(\S+)"', re.M) data = open('bitarray/bitarray.h').read() kwds['version'] = pat.search(data).group(1) macros = [] if platform.python_implementation() == 'PyPy': macros.append(("PY_LITTLE_ENDIAN", str(int(sys.byteorder == 'little')))) macros.append(("PY_BIG_ENDIAN", str(int(sys.byteorder == 'big')))) setup( name = "bitarray", author = "Ilan Schnell", author_email = "ilanschnell@gmail.com", url = "https://github.com/ilanschnell/bitarray", license = "PSF-2.0", classifiers = [ "Development Status :: 6 - Mature", "Intended Audience :: Developers", "Operating System :: OS Independent", "Programming Language :: C", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Topic :: Utilities", ], description = "efficient arrays of booleans -- C extension", packages = ["bitarray"], package_data = {"bitarray": ["*.h", "*.pickle", "py.typed", # see PEP 561 "*.pyi"]}, ext_modules = [Extension(name = "bitarray._bitarray", define_macros = macros, sources = ["bitarray/_bitarray.c"]), Extension(name = "bitarray._util", sources = ["bitarray/_util.c"])], zip_safe = False, **kwds ) bitarray-3.7.1/update_doc.py000066400000000000000000000242561505414144000160270ustar00rootroot00000000000000import re import sys from doctest import testfile from glob import glob from io import StringIO import bitarray.util BASE_URL = "https://github.com/ilanschnell/bitarray" NEW_IN = { 'bitarray': ['2.3: optional `buffer` argument', '3.4: allow initializer `bytes` or `bytearray` ' 'to set buffer directly'], 'bitarray.buffer_info': '3.7: return named tuple', 'bitarray.bytereverse': '2.2.5: optional start and stop arguments', 'bitarray.clear': '1.4', 'bitarray.count': ['1.1.0: optional start and stop arguments', '2.3.7: optional step argument', '2.9: add non-overlapping sub-bitarray count'], 'bitarray.decode': '3.0: returns iterator (equivalent to past ' '`.iterdecode()`)', 'bitarray.endian': '3.4: replaces former `.endian()` method', 'bitarray.extend': '3.4: allow `bytes` object', 'bitarray.find': ['2.1', '2.9: add optional keyword argument `right`'], 'bitarray.frombytes': '2.5.0: allow bytes-like argument', 'bitarray.index': '2.9: add optional keyword argument `right`', 'bitarray.invert': '1.5.3: optional index argument', 'bitarray.pack': '2.5.0: allow bytes-like argument', 'bitarray.search': ['2.9: optional start and stop arguments - ' 'add optional keyword argument `right`', '3.0: returns iterator (equivalent to past ' '`.itersearch()`)'], 'bitarray.to01': '3.3: optional `group` and `sep` arguments', 'decodetree': '1.6', 'frozenbitarray': '1.1', 'get_default_endian': '1.3', 'util.any_and': '2.7', 'util.byteswap': '3.4', 'util.ba2base': ['1.9', '3.3: optional `group` and `sep` arguments'], 'util.base2ba': ['1.9', '3.3: ignore whitespace'], 'util.ba2hex': '3.3: optional `group` and `sep` arguments', 'util.hex2ba': '3.3: ignore whitespace', 'util.correspond_all': '3.4', 'util.count_n': '2.3.6: optional value argument', 'util.deserialize': ['1.8', '2.5.0: allow bytes-like argument'], 'util.intervals': '2.7', 'util.ones': '2.9', 'util.parity': '1.9', 'util.sum_indices': ['3.6', '3.7: add optional mode argument'], 'util.xor_indices': '3.2', 'util.pprint': '1.8', 'util.serialize': '1.8', 'util.urandom': '1.7', 'util.random_k': '3.6', 'util.random_p': '3.5', 'util.gen_primes': '3.7', 'util.sc_encode': '2.7', 'util.sc_decode': '2.7', 'util.vl_decode': '2.2', 'util.vl_encode': '2.2', 'util.canonical_huffman': '2.5', 'util.canonical_decode': '2.5', } DOCS = { 'ba3': ('Bitarray 3 transition', 'bitarray3.rst'), 'chc': ('Canonical Huffman Coding', 'canonical.rst'), 'rep': ('Bitarray representations', 'represent.rst'), 'rnd': ('Random Bitarrays', 'random_p.rst'), 'sc': ('Compression of sparse bitarrays', 'sparse_compression.rst'), 'vlf': ('Variable length bitarray format', 'variable_length.rst'), } DOC_LINKS = { 'bitarray.decode': 'ba3', 'bitarray.search': 'ba3', 'util.canonical_huffman': 'chc', 'util.canonical_decode': 'chc', 'util.ba2base': 'rep', 'util.base2ba': 'rep', 'util.deserialize': 'rep', 'util.random_p': 'rnd', 'util.serialize': 'rep', 'util.sc_decode': 'sc', 'util.sc_encode': 'sc', 'util.vl_decode': 'vlf', 'util.vl_encode': 'vlf', } NOTES = { 'bitarray.pack': """\ This method, as well as the ``.unpack()`` method, are meant for efficient transfer of data between bitarray objects to other Python objects (for example NumPy's ndarray object) which have a different memory view.""", 'bitarray.tolist': """\ Note that the list object being created will require 32 or 64 times more memory (depending on the machine architecture) than the bitarray object, which may cause a memory error if the bitarray is very large.""", 'util.gen_primes': """\ Apart from working with prime numbers, this function is useful for testing, as it provides a simple way to create a well-defined bitarray of any length.""", 'util.count_xor': " This is also known as the Hamming distance." } GETSET = { 'bitarray.endian': 'str', 'bitarray.nbytes': 'int', 'bitarray.padbits': 'int', 'bitarray.readonly': 'bool', } _NAMES = set() sig_pat = re.compile(r""" ( # group 1 (\w+) # function name, group 2 \([^()]*\) # (...) ) ( # optional group 3 \s->\s(.+) # return type, group 4 )? """, re.VERBOSE) def get_doc(name): parts = name.split('.') obj = bitarray while parts: obj = getattr(obj, parts.pop(0)) lines = obj.__doc__.splitlines() if len(lines) == 1: sig = '``%s`` -> %s' % (obj.__name__, GETSET[name]) return sig, lines m = sig_pat.match(lines[0]) if m is None: raise Exception("signature invalid: %r" % lines[0]) sig = '``%s``' % m.group(1) assert m.group(2) == obj.__name__, lines[0] if m.group(4): sig += ' -> %s' % m.group(4) assert lines[1] == '' return sig, lines[2:] def write_doc(fo, name): _NAMES.add(name) sig, lines = get_doc(name) fo.write(sig + '\n') for line in lines: out = line.rstrip() fo.write(" %s\n" % out.replace('`', '``') if out else "\n") note = NOTES.get(name) if note: fo.write("\n%s\n" % note) link = DOC_LINKS.get(name) if link: title, filename = DOCS[link] url = BASE_URL + '/blob/master/doc/' + filename fo.write("\n See also: `%s <%s>`__\n" % (title, url)) new_in = NEW_IN.get(name) if new_in: for line in new_in if isinstance(new_in, list) else [new_in]: fo.write("\n New in version %s\n" % line.replace('`', '``')) fo.write('\n\n') def write_reference(fo): fo.write("""\ Reference ========= bitarray version: %s -- `change log <%s>`__ In the following, ``item`` and ``value`` are usually a single bit - an integer 0 or 1. Also, ``sub_bitarray`` refers to either a bitarray, or an ``item``. The bitarray object: -------------------- """ % (bitarray.__version__, BASE_URL + "/blob/master/doc/changelog.rst")) write_doc(fo, 'bitarray') fo.write("bitarray methods:\n" "-----------------\n\n") for method in sorted(dir(bitarray.bitarray)): if method.startswith('_'): continue name = 'bitarray.%s' % method if name not in GETSET: write_doc(fo, name) fo.write("bitarray data descriptors:\n" "--------------------------\n\n" "Data descriptors were added in version 2.6.\n\n") for getset in sorted(dir(bitarray.bitarray)): name = 'bitarray.%s' % getset if name in GETSET: write_doc(fo, name) fo.write("Other objects:\n" "--------------\n\n") write_doc(fo, 'frozenbitarray') write_doc(fo, 'decodetree') fo.write("Functions defined in the `bitarray` module:\n" "-------------------------------------------\n\n") for func in sorted(['test', 'bits2bytes', 'get_default_endian']): write_doc(fo, func) fo.write("Functions defined in `bitarray.util` module:\n" "--------------------------------------------\n\n" "This sub-module was added in version 1.2.\n\n") for func in sorted(bitarray.util.__all__): write_doc(fo, 'util.%s' % func) for name in list(NEW_IN) + list(DOC_LINKS): assert name in _NAMES, name def update_readme(path): ver_pat = re.compile(r'(bitarray.+?)\s(\d+\.\d+\.\d+)') with open(path, 'r') as fi: data = fi.read() with StringIO() as fo: for line in data.splitlines(): if line == 'Reference': break line = ver_pat.sub(r'\1 ' + bitarray.__version__, line) fo.write("%s\n" % line.rstrip()) write_reference(fo) new_data = fo.getvalue() if new_data == data: print("already up-to-date") else: with open(path, 'w') as f: f.write(new_data) def write_changelog(fo): ver_pat = re.compile(r'(\d{4}-\d{2}-\d{2})\s+(\d+\.\d+\.\d+)') hash_pat = re.compile(r'#([0-9a-f]+)') link_pat = re.compile(r'\[(.+)\]\((.+)\)') def hash_replace(match): group1 = match.group(1) if len(group1) >= 7: if len(group1) != 8: print("Warning: commit hash length != 8, got", len(group1)) url = "%s/commit/%s" % (BASE_URL, group1) else: url = "%s/issues/%d" % (BASE_URL, int(group1)) return "`%s <%s>`__" % (match.group(0), url) fo.write("Change log\n" "==========\n\n") for line in open('./CHANGE_LOG'): line = line.rstrip() match = ver_pat.match(line) if match: line = match.expand(r'**\2** (\1):') elif line.startswith('-----'): line = '' elif line.startswith(' '): line = line[2:] line = line.replace('`', '``') line = hash_pat.sub(hash_replace, line) line = link_pat.sub(r"`\1 <\2>`__", line) fo.write(line + '\n') def main(): if len(sys.argv) > 1: sys.exit("no arguments expected") update_readme('./README.rst') with open('./doc/reference.rst', 'w') as fo: write_reference(fo) with open('./doc/changelog.rst', 'w') as fo: write_changelog(fo) testfile('./README.rst') for path in glob("./doc/*.rst"): testfile(path) for path in glob("./examples/*.rst"): testfile(path) if __name__ == '__main__': main()