bitarray-0.8.1/0000755000076500000240000000000012125715156013410 5ustar ilanstaff00000000000000bitarray-0.8.1/AUTHORS0000644000076500000240000000113012125715114014445 0ustar ilanstaff00000000000000bitarray is written and maintained by Ilan Schnell The author would like to thank the following people for patches, feedback, suggestions, and discussions: - Roland Puntaier (Porting to Python 3.x) - Daniel Stutzbach (for help with reference counts) - Lluís Pàmies (C level itersearch) - Vangelis Koukis (Python 2.4 support) - Kris Kennaway - Travis Oliphant - Gael Varoquaux - David Ormsbee - David Kammeyer - Bryce Hendrix - Judah De Paula - Corran Webster - Chris Mueller - Koen van de Sande - Paul McGuire - Chris Gohlke - Ushma Bhatt bitarray-0.8.1/bitarray/0000755000076500000240000000000012125715114015217 5ustar ilanstaff00000000000000bitarray-0.8.1/bitarray/__init__.py0000644000076500000240000001053012125715114017327 0ustar ilanstaff00000000000000""" This package defines an object type which can efficiently represent a bitarray. Bitarrays are sequence types and behave very much like lists. Please find a description of this package at: http://pypi.python.org/pypi/bitarray/ Author: Ilan Schnell """ from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo __version__ = '0.8.1' def _tree_insert(tree, sym, ba): """ Insert symbol which is mapped to bitarray into tree """ v = ba[0] if len(ba) > 1: if tree[v] == []: tree[v] = [[], []] _tree_insert(tree[v], sym, ba[1:]) else: if tree[v] != []: raise ValueError("prefix code ambiguous") tree[v] = sym def _mk_tree(codedict): # Generate tree from codedict tree = [[], []] for sym, ba in codedict.items(): _tree_insert(tree, sym, ba) return tree def _check_codedict(codedict): if not isinstance(codedict, dict): raise TypeError("dictionary expected") if len(codedict) == 0: raise ValueError("prefix code empty") for k, v in codedict.items(): if not isinstance(v, bitarray): raise TypeError("bitarray expected for dictionary value") if v.length() == 0: raise ValueError("non-empty bitarray expected") class bitarray(_bitarray): """bitarray([initial], [endian=string]) Return a new bitarray object whose items are bits initialized from the optional initial, and endianness. If no object is provided, the bitarray is initialized to have length zero. The initial object may be of the following types: int, long Create bitarray of length given by the integer. The initial values in the array are random, because only the memory allocated. string Create bitarray from a string of '0's and '1's. list, tuple, iterable Create bitarray from a sequence, each element in the sequence is converted to a bit using truth value value. bitarray Create bitarray from another bitarray. This is done by copying the memory holding the bitarray data, and is hence very fast. The optional keyword arguments 'endian' specifies the bit endianness of the created bitarray object. Allowed values are 'big' and 'little' (default is 'big'). Note that setting the bit endianness only has an effect when accessing the machine representation of the bitarray, i.e. when using the methods: tofile, fromfile, tobytes, frombytes.""" def fromstring(self, string): """fromstring(string) Append from a string, interpreting the string as machine values. Deprecated since version 0.4.0, use ``frombytes()`` instead.""" return self.frombytes(string.encode()) def tostring(self): """tostring() -> string Return the string representing (machine values) of the bitarray. When the length of the bitarray is not a multiple of 8, the few remaining bits (1..7) are set to 0. Deprecated since version 0.4.0, use ``tobytes()`` instead.""" return self.tobytes().decode() def decode(self, codedict): """decode(code) -> list Given a prefix code (a dict mapping symbols to bitarrays), decode the content of the bitarray and return the list of symbols.""" _check_codedict(codedict) return self._decode(_mk_tree(codedict)) def iterdecode(self, codedict): """iterdecode(code) -> iterator Given a prefix code (a dict mapping symbols to bitarrays), decode the content of the bitarray and iterate over the symbols.""" _check_codedict(codedict) return self._iterdecode(_mk_tree(codedict)) def encode(self, codedict, iterable): """encode(code, iterable) Given a prefix code (a dict mapping symbols to bitarrays), iterates over iterable object with symbols, and extends the bitarray with the corresponding bitarray for each symbols.""" _check_codedict(codedict) self._encode(codedict, iterable) def __int__(self): raise TypeError("int() argument cannot be a bitarray") def __long__(self): raise TypeError("long() argument cannot be a bitarray") def __float__(self): raise TypeError("float() argument cannot be a bitarray") def test(verbosity=1, repeat=1): """test(verbosity=1, repeat=1) -> TextTestResult Run self-test, and return unittest.runner.TextTestResult object. """ from bitarray import test_bitarray return test_bitarray.run(verbosity=verbosity, repeat=repeat) bitarray-0.8.1/bitarray/_bitarray.c0000644000076500000240000024546412125715114017356 0ustar ilanstaff00000000000000/* This file is the C part of the bitarray package. Almost all functionality is implemented here. Author: Ilan Schnell */ #define PY_SSIZE_T_CLEAN #include "Python.h" #if PY_MAJOR_VERSION >= 3 #define IS_PY3K #endif #ifdef IS_PY3K #include "bytesobject.h" #define PyString_FromStringAndSize PyBytes_FromStringAndSize #define PyString_FromString PyBytes_FromString #define PyString_Check PyBytes_Check #define PyString_Size PyBytes_Size #define PyString_AsString PyBytes_AsString #define PyString_ConcatAndDel PyBytes_ConcatAndDel #define Py_TPFLAGS_HAVE_WEAKREFS 0 #endif #if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 5 /* Py_ssize_t was introduced in Python 2.5, substitute long for it */ typedef long Py_ssize_t; #define PY_SSIZE_T_MAX LONG_MAX #define PY_SSIZE_T_MIN LONG_MIN Py_ssize_t PyNumber_AsSsize_t(PyObject *o, PyObject *exc) { return PyLong_AsLong(o); } int PyIndex_Check(PyObject *o) { return 0; } #define PY_SSIZE_T_FMT "l" #else /* Python 2.5 and up uses 'n' as the format char for Py_ssize_t */ #define PY_SSIZE_T_FMT "n" #endif #if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 6 /* backward compatibility with Python 2.5 */ #define Py_TYPE(ob) (((PyObject *) (ob))->ob_type) #define Py_SIZE(ob) (((PyVarObject *) (ob))->ob_size) #endif #if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 7 /* (new) buffer protocol */ #define WITH_BUFFER #endif #ifdef STDC_HEADERS #include #else /* !STDC_HEADERS */ #ifdef HAVE_SYS_TYPES_H #include /* For size_t */ #endif /* HAVE_SYS_TYPES_H */ #endif /* !STDC_HEADERS */ typedef long long int idx_t; /* throughout: 0 = little endian 1 = big endian */ #define DEFAULT_ENDIAN 1 typedef struct { PyObject_VAR_HEAD #ifdef WITH_BUFFER int ob_exports; /* how many buffer exports */ #endif char *ob_item; Py_ssize_t allocated; /* how many bytes allocated */ idx_t nbits; /* length og bitarray */ int endian; /* bit endianness of bitarray */ PyObject *weakreflist; /* list of weak references */ } bitarrayobject; static PyTypeObject Bitarraytype; #define bitarray_Check(obj) PyObject_TypeCheck(obj, &Bitarraytype) #define BITS(bytes) (((idx_t) 8) * ((idx_t) (bytes))) #define BYTES(bits) (((bits) == 0) ? 0 : (((bits) - 1) / 8 + 1)) #define BITMASK(endian, i) (((char) 1) << ((endian) ? (7 - (i)%8) : (i)%8)) /* ------------ low level access to bits in bitarrayobject ------------- */ #define GETBIT(self, i) \ ((self)->ob_item[(i) / 8] & BITMASK((self)->endian, i) ? 1 : 0) static void setbit(bitarrayobject *self, idx_t i, int bit) { char *cp, mask; mask = BITMASK(self->endian, i); cp = self->ob_item + i / 8; if (bit) *cp |= mask; else *cp &= ~mask; } static int check_overflow(idx_t nbits) { idx_t max_bits; assert(nbits >= 0); if (sizeof(void *) == 4) { /* 32bit machine */ max_bits = ((idx_t) 1) << 34; /* 2^34 = 16 Gbits*/ if (nbits > max_bits) { char buff[256]; sprintf(buff, "cannot create bitarray of size %lld, " "max size is %lld", nbits, max_bits); PyErr_SetString(PyExc_OverflowError, buff); return -1; } } return 0; } static int resize(bitarrayobject *self, idx_t nbits) { Py_ssize_t newsize; size_t _new_size; /* for allocation */ if (check_overflow(nbits) < 0) return -1; newsize = (Py_ssize_t) BYTES(nbits); /* Bypass realloc() when a previous overallocation is large enough to accommodate the newsize. If the newsize is 16 smaller than the current size, then proceed with the realloc() to shrink the list. */ if (self->allocated >= newsize && Py_SIZE(self) < newsize + 16 && self->ob_item != NULL) { Py_SIZE(self) = newsize; self->nbits = nbits; return 0; } if (newsize >= Py_SIZE(self) + 65536) /* Don't overallocate when the size increase is very large. */ _new_size = newsize; else /* This over-allocates proportional to the bitarray size, making room for additional growth. The over-allocation is mild, but is enough to give linear-time amortized behavior over a long sequence of appends() in the presence of a poorly-performing system realloc(). The growth pattern is: 0, 4, 8, 16, 25, 34, 44, 54, 65, 77, ... Note, the pattern starts out the same as for lists but then grows at a smaller rate so that larger bitarrays only overallocate by about 1/16th -- this is done because bitarrays are assumed to be memory critical. */ _new_size = (newsize >> 4) + (Py_SIZE(self) < 8 ? 3 : 7) + newsize; self->ob_item = PyMem_Realloc(self->ob_item, _new_size); if (self->ob_item == NULL) { PyErr_NoMemory(); return -1; } Py_SIZE(self) = newsize; self->allocated = _new_size; self->nbits = nbits; return 0; } /* create new bitarray object without initialization of buffer */ static PyObject * newbitarrayobject(PyTypeObject *type, idx_t nbits, int endian) { bitarrayobject *obj; Py_ssize_t nbytes; if (check_overflow(nbits) < 0) return NULL; obj = (bitarrayobject *) type->tp_alloc(type, 0); if (obj == NULL) return NULL; nbytes = (Py_ssize_t) BYTES(nbits); Py_SIZE(obj) = nbytes; obj->nbits = nbits; obj->endian = endian; if (nbytes == 0) { obj->ob_item = NULL; } else { obj->ob_item = PyMem_Malloc((size_t) nbytes); if (obj->ob_item == NULL) { PyObject_Del(obj); PyErr_NoMemory(); return NULL; } } obj->allocated = nbytes; obj->weakreflist = NULL; return (PyObject *) obj; } static void bitarray_dealloc(bitarrayobject *self) { if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); if (self->ob_item != NULL) PyMem_Free((void *) self->ob_item); Py_TYPE(self)->tp_free((PyObject *) self); } /* copy n bits from other (starting at b) onto self (starting at a) */ static void copy_n(bitarrayobject *self, idx_t a, bitarrayobject *other, idx_t b, idx_t n) { idx_t i; assert(0 <= n && n <= self->nbits && n <= other->nbits); assert(0 <= a && a <= self->nbits - n); assert(0 <= b && b <= other->nbits - n); /* XXX if (self->endian == other->endian && a % 8 == 0 && b % 8 == 0 && n >= 8) { Py_ssize_t bytes; idx_t bits; bytes = n / 8; bits = 8 * bytes; copy_n(self, bits + a, other, bits + b, n - bits); memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes); return; } */ /* the different type of looping is only relevant when other and self are the same object, i.e. when copying a piece of an bitarrayobject onto itself */ if (a < b) { for (i = 0; i < n; i++) /* loop forward (delete) */ setbit(self, i + a, GETBIT(other, i + b)); } else { for (i = n - 1; i >= 0; i--) /* loop backwards (insert) */ setbit(self, i + a, GETBIT(other, i + b)); } } /* starting at start, delete n bits from self */ static int delete_n(bitarrayobject *self, idx_t start, idx_t n) { assert(0 <= start && start <= self->nbits); assert(0 <= n && n <= self->nbits - start); if (n == 0) return 0; copy_n(self, start, self, start + n, self->nbits - start - n); return resize(self, self->nbits - n); } /* starting at start, insert n (uninitialized) bits into self */ static int insert_n(bitarrayobject *self, idx_t start, idx_t n) { assert(0 <= start && start <= self->nbits); assert(n >= 0); if (n == 0) return 0; if (resize(self, self->nbits + n) < 0) return -1; copy_n(self, start + n, self, start, self->nbits - start - n); return 0; } /* sets ususet bits to 0, i.e. the ones in the last byte (if any), and return the number of bits set -- self->nbits is unchanged */ static int setunused(bitarrayobject *self) { idx_t i, n; int res = 0; n = BITS(Py_SIZE(self)); for (i = self->nbits; i < n; i++) { setbit(self, i, 0); res++; } assert(res < 8); return res; } /* repeat self n times */ static int repeat(bitarrayobject *self, idx_t n) { idx_t nbits, i; if (n <= 0) { if (resize(self, 0) < 0) return -1; } if (n > 1) { nbits = self->nbits; if (resize(self, nbits * n) < 0) return -1; for (i = 1; i < n; i++) copy_n(self, i * nbits, self, 0, nbits); } return 0; } enum op_type { OP_and, OP_or, OP_xor, }; /* perform bitwise operation */ static int bitwise(bitarrayobject *self, PyObject *arg, enum op_type oper) { bitarrayobject *other; Py_ssize_t i; if (!bitarray_Check(arg)) { PyErr_SetString(PyExc_TypeError, "bitarray object expected for bitwise operation"); return -1; } other = (bitarrayobject *) arg; if (self->nbits != other->nbits) { PyErr_SetString(PyExc_ValueError, "bitarrays of equal length expected for bitwise operation"); return -1; } setunused(self); setunused(other); switch (oper) { case OP_and: for (i = 0; i < Py_SIZE(self); i++) self->ob_item[i] &= other->ob_item[i]; break; case OP_or: for (i = 0; i < Py_SIZE(self); i++) self->ob_item[i] |= other->ob_item[i]; break; case OP_xor: for (i = 0; i < Py_SIZE(self); i++) self->ob_item[i] ^= other->ob_item[i]; break; } return 0; } /* set the bits from start to stop (excluding) in self to val */ static void setrange(bitarrayobject *self, idx_t start, idx_t stop, int val) { idx_t i; assert(0 <= start && start <= self->nbits); assert(0 <= stop && stop <= self->nbits); for (i = start; i < stop; i++) setbit(self, i, val); } static void invert(bitarrayobject *self) { Py_ssize_t i; for (i = 0; i < Py_SIZE(self); i++) self->ob_item[i] = ~self->ob_item[i]; } /* reverse the order of bits in each byte of the buffer */ static void bytereverse(bitarrayobject *self) { static char trans[256]; static int setup = 0; Py_ssize_t i; unsigned char c; if (!setup) { /* setup translation table, which maps each byte to it's reversed: trans = {0, 128, 64, 192, 32, 160, ..., 255} */ int j, k; for (k = 0; k < 256; k++) { trans[k] = 0x00; for (j = 0; j < 8; j++) if (1 << (7 - j) & k) trans[k] |= 1 << j; } setup = 1; } setunused(self); for (i = 0; i < Py_SIZE(self); i++) { c = self->ob_item[i]; self->ob_item[i] = trans[c]; } } static int bitcount_lookup[256] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, }; /* returns number of 1 bits */ static idx_t count(bitarrayobject *self) { Py_ssize_t i; idx_t res = 0; unsigned char c; setunused(self); for (i = 0; i < Py_SIZE(self); i++) { c = self->ob_item[i]; res += bitcount_lookup[c]; } return res; } /* return index of first occurrence of vi, -1 when x is not in found. */ static idx_t findfirst(bitarrayobject *self, int vi, idx_t start, idx_t stop) { Py_ssize_t j; idx_t i; char c; if (Py_SIZE(self) == 0) return -1; if (start < 0 || start > self->nbits) start = 0; if (stop < 0 || stop > self->nbits) stop = self->nbits; if (start >= stop) return -1; if (stop > start + 8) { /* seraching for 1 means: break when byte is not 0x00 searching for 0 means: break when byte is not 0xff */ c = vi ? 0x00 : 0xff; /* skip ahead by checking whole bytes */ for (j = (Py_ssize_t) (start / 8); j < BYTES(stop); j++) if (c ^ self->ob_item[j]) break; if (j == Py_SIZE(self)) j--; assert(0 <= j && j < Py_SIZE(self)); if (start < BITS(j)) start = BITS(j); } /* fine grained search */ for (i = start; i < stop; i++) if (GETBIT(self, i) == vi) return i; return -1; } /* search for the first occurrence bitarray xa (in self), starting at p, and return its position (-1 when not found) */ static idx_t search(bitarrayobject *self, bitarrayobject *xa, idx_t p) { idx_t i; assert(p >= 0); while (p < self->nbits - xa->nbits + 1) { for (i = 0; i < xa->nbits; i++) if (GETBIT(self, p + i) != GETBIT(xa, i)) goto next; return p; next: p++; } return -1; } static int set_item(bitarrayobject *self, idx_t i, PyObject *v) { long vi; assert(0 <= i && i < self->nbits); vi = PyObject_IsTrue(v); if (vi < 0) return -1; setbit(self, i, vi); return 0; } static int append_item(bitarrayobject *self, PyObject *item) { if (resize(self, self->nbits + 1) < 0) return -1; return set_item(self, self->nbits - 1, item); } static PyObject * unpack(bitarrayobject *self, char zero, char one) { PyObject *res; Py_ssize_t i; char *str; if (self->nbits > PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, "bitarray too large to unpack"); return NULL; } str = PyMem_Malloc((size_t) self->nbits); if (str == NULL) { PyErr_NoMemory(); return NULL; } for (i = 0; i < self->nbits; i++) { *(str + i) = GETBIT(self, i) ? one : zero; } res = PyString_FromStringAndSize(str, (Py_ssize_t) self->nbits); PyMem_Free((void *) str); return res; } static int extend_bitarray(bitarrayobject *self, bitarrayobject *other) { idx_t n_sum; if (other->nbits == 0) return 0; n_sum = self->nbits + other->nbits; if (resize(self, n_sum) < 0) return -1; copy_n(self, n_sum - other->nbits, other, 0, other->nbits); return 0; } static int extend_iter(bitarrayobject *self, PyObject *iter) { PyObject *item; assert(PyIter_Check(iter)); while ((item = PyIter_Next(iter)) != NULL) { if (append_item(self, item) < 0) { Py_DECREF(item); return -1; } Py_DECREF(item); } if (PyErr_Occurred()) return -1; return 0; } static int extend_list(bitarrayobject *self, PyObject *list) { PyObject *item; Py_ssize_t n, i; assert(PyList_Check(list)); n = PyList_Size(list); if (n == 0) return 0; if (resize(self, self->nbits + n) < 0) return -1; for (i = 0; i < n; i++) { item = PyList_GetItem(list, i); if (item == NULL) return -1; if (set_item(self, self->nbits - n + i, item) < 0) return -1; } return 0; } static int extend_tuple(bitarrayobject *self, PyObject *tuple) { PyObject *item; Py_ssize_t n, i; assert(PyTuple_Check(tuple)); n = PyTuple_Size(tuple); if (n == 0) return 0; if (resize(self, self->nbits + n) < 0) return -1; for (i = 0; i < n; i++) { item = PyTuple_GetItem(tuple, i); if (item == NULL) return -1; if (set_item(self, self->nbits - n + i, item) < 0) return -1; } return 0; } /* extend_string(): extend the bitarray from a string, where each whole characters is converted to a single bit */ enum conv_tp { STR_01, /* '0' -> 0 '1' -> 1 no other characters allowed */ STR_RAW, /* 0x00 -> 0 other -> 1 */ }; static int extend_string(bitarrayobject *self, PyObject *string, enum conv_tp conv) { Py_ssize_t strlen, i; char c, *str; int vi = 0; assert(PyString_Check(string)); strlen = PyString_Size(string); if (strlen == 0) return 0; if (resize(self, self->nbits + strlen) < 0) return -1; str = PyString_AsString(string); for (i = 0; i < strlen; i++) { c = *(str + i); /* depending on conv, map c to bit */ switch (conv) { case STR_01: switch (c) { case '0': vi = 0; break; case '1': vi = 1; break; default: PyErr_Format(PyExc_ValueError, "character must be '0' or '1', found '%c'", c); return -1; } break; case STR_RAW: vi = c ? 1 : 0; break; } setbit(self, self->nbits - strlen + i, vi); } return 0; } static int extend_rawstring(bitarrayobject *self, PyObject *string) { Py_ssize_t strlen; char *str; assert(PyString_Check(string) && self->nbits % 8 == 0); strlen = PyString_Size(string); if (strlen == 0) return 0; if (resize(self, self->nbits + BITS(strlen)) < 0) return -1; str = PyString_AsString(string); memcpy(self->ob_item + (Py_SIZE(self) - strlen), str, strlen); return 0; } static int extend_dispatch(bitarrayobject *self, PyObject *obj) { PyObject *iter; int ret; /* dispatch on type */ if (bitarray_Check(obj)) /* bitarray */ return extend_bitarray(self, (bitarrayobject *) obj); if (PyList_Check(obj)) /* list */ return extend_list(self, obj); if (PyTuple_Check(obj)) /* tuple */ return extend_tuple(self, obj); if (PyString_Check(obj)) /* str01 */ return extend_string(self, obj, STR_01); #ifdef IS_PY3K if (PyUnicode_Check(obj)) { /* str01 */ PyObject *string; string = PyUnicode_AsEncodedString(obj, NULL, NULL); ret = extend_string(self, string, STR_01); Py_DECREF(string); return ret; } #endif if (PyIter_Check(obj)) /* iter */ return extend_iter(self, obj); /* finally, try to get the iterator of the object */ iter = PyObject_GetIter(obj); if (iter == NULL) { PyErr_SetString(PyExc_TypeError, "could not extend bitarray"); return -1; } ret = extend_iter(self, iter); Py_DECREF(iter); return ret; } /* --------- helper functions NOT involving bitarrayobjects ------------ */ #define ENDIAN_STR(ba) (((ba)->endian) ? "big" : "little") #ifdef IS_PY3K #define IS_INDEX(x) (PyLong_Check(x) || PyIndex_Check(x)) #define IS_INT_OR_BOOL(x) (PyBool_Check(x) || PyLong_Check(x)) #else #define IS_INDEX(x) (PyInt_Check(x) || PyLong_Check(x) || PyIndex_Check(x)) #define IS_INT_OR_BOOL(x) (PyBool_Check(x) || PyInt_Check(x) || \ PyLong_Check(x)) #endif /* given an PyLong (which must be 0 or 1), or a PyBool, return 0 or 1, or -1 on error */ static int IntBool_AsInt(PyObject *v) { long x; if (PyBool_Check(v)) return PyObject_IsTrue(v); #ifndef IS_PY3K if (PyInt_Check(v)) { x = PyInt_AsLong(v); } else #endif if (PyLong_Check(v)) { x = PyLong_AsLong(v); } else { PyErr_SetString(PyExc_TypeError, "integer or bool expected"); return -1; } if (x < 0 || x > 1) { PyErr_SetString(PyExc_ValueError, "integer value between 0 and 1 expected"); return -1; } return (int) x; } /* Extract a slice index from a PyInt or PyLong or an object with the nb_index slot defined, and store in *i. However, this function returns -1 on error and 0 on success. This is almost _PyEval_SliceIndex() with Py_ssize_t replaced by idx_t */ static int getIndex(PyObject *v, idx_t *i) { idx_t x; #ifndef IS_PY3K if (PyInt_Check(v)) { x = PyInt_AS_LONG(v); } else #endif if (PyLong_Check(v)) { x = PyLong_AsLongLong(v); } else if (PyIndex_Check(v)) { x = PyNumber_AsSsize_t(v, NULL); if (x == -1 && PyErr_Occurred()) return -1; } else { PyErr_SetString(PyExc_TypeError, "slice indices must be integers or " "None or have an __index__ method"); return -1; } *i = x; return 0; } /* this is PySlice_GetIndicesEx() with Py_ssize_t replaced by idx_t */ static int slice_GetIndicesEx(PySliceObject *r, idx_t length, idx_t *start, idx_t *stop, idx_t *step, idx_t *slicelength) { idx_t defstart, defstop; if (r->step == Py_None) { *step = 1; } else { if (getIndex(r->step, step) < 0) return -1; if (*step == 0) { PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); return -1; } } defstart = *step < 0 ? length - 1 : 0; defstop = *step < 0 ? -1 : length; if (r->start == Py_None) { *start = defstart; } else { if (getIndex(r->start, start) < 0) return -1; if (*start < 0) *start += length; if (*start < 0) *start = (*step < 0) ? -1 : 0; if (*start >= length) *start = (*step < 0) ? length - 1 : length; } if (r->stop == Py_None) { *stop = defstop; } else { if (getIndex(r->stop, stop) < 0) return -1; if (*stop < 0) *stop += length; if (*stop < 0) *stop = -1; if (*stop > length) *stop = length; } if ((*step < 0 && *stop >= *start) || (*step > 0 && *start >= *stop)) { *slicelength = 0; } else if (*step < 0) { *slicelength = (*stop - *start + 1) / (*step) + 1; } else { *slicelength = (*stop - *start - 1) / (*step) + 1; } return 0; } /************************************************************************** Implementation of API methods **************************************************************************/ static PyObject * bitarray_length(bitarrayobject *self) { return PyLong_FromLongLong(self->nbits); } PyDoc_STRVAR(length_doc, "length() -> int\n\ \n\ Return the length, i.e. number of bits stored in the bitarray.\n\ This method is preferred over __len__ (used when typing ``len(a)``),\n\ since __len__ will fail for a bitarray object with 2^31 or more elements\n\ on a 32bit machine, whereas this method will return the correct value,\n\ on 32bit and 64bit machines."); PyDoc_STRVAR(len_doc, "__len__() -> int\n\ \n\ Return the length, i.e. number of bits stored in the bitarray.\n\ This method will fail for a bitarray object with 2^31 or more elements\n\ on a 32bit machine. Use bitarray.length() instead."); static PyObject * bitarray_copy(bitarrayobject *self) { PyObject *res; res = newbitarrayobject(Py_TYPE(self), self->nbits, self->endian); if (res == NULL) return NULL; memcpy(((bitarrayobject *) res)->ob_item, self->ob_item, Py_SIZE(self)); return res; } PyDoc_STRVAR(copy_doc, "copy() -> bitarray\n\ \n\ Return a copy of the bitarray."); static PyObject * bitarray_count(bitarrayobject *self, PyObject *args) { idx_t n1; long x = 1; if (!PyArg_ParseTuple(args, "|i:count", &x)) return NULL; n1 = count(self); return PyLong_FromLongLong(x ? n1 : (self->nbits - n1)); } PyDoc_STRVAR(count_doc, "count([value]) -> int\n\ \n\ Return number of occurrences of value (defaults to True) in the bitarray."); static PyObject * bitarray_index(bitarrayobject *self, PyObject *args) { PyObject *x; idx_t i, start = 0, stop = -1; long vi; if (!PyArg_ParseTuple(args, "O|LL:index", &x, &start, &stop)) return NULL; vi = PyObject_IsTrue(x); if (vi < 0) return NULL; i = findfirst(self, vi, start, stop); if (i < 0) { PyErr_SetString(PyExc_ValueError, "index(x): x not in bitarray"); return NULL; } return PyLong_FromLongLong(i); } PyDoc_STRVAR(index_doc, "index(value, [start, [stop]]) -> int\n\ \n\ Return index of the first occurrence of bool(value) in the bitarray.\n\ Raises ValueError if the value is not present."); static PyObject * bitarray_extend(bitarrayobject *self, PyObject *obj) { if (extend_dispatch(self, obj) < 0) return NULL; Py_RETURN_NONE; } PyDoc_STRVAR(extend_doc, "extend(object)\n\ \n\ Append bits to the end of the bitarray. The objects which can be passed\n\ to this method are the same iterable objects which can given to a bitarray\n\ object upon initialization."); static PyObject * bitarray_contains(bitarrayobject *self, PyObject *x) { long res; if (IS_INT_OR_BOOL(x)) { int vi; vi = IntBool_AsInt(x); if (vi < 0) return NULL; res = findfirst(self, vi, 0, -1) >= 0; } else if (bitarray_Check(x)) { res = search(self, (bitarrayobject *) x, 0) >= 0; } else { PyErr_SetString(PyExc_TypeError, "bitarray or bool expected"); return NULL; } return PyBool_FromLong(res); } PyDoc_STRVAR(contains_doc, "__contains__(x) -> bool\n\ \n\ Return True if bitarray contains x, False otherwise.\n\ The value x may be a boolean (or integer between 0 and 1), or a bitarray."); static PyObject * bitarray_search(bitarrayobject *self, PyObject *args) { PyObject *list = NULL; /* list of matching positions to be returned */ PyObject *x, *item = NULL; Py_ssize_t limit = -1; bitarrayobject *xa; idx_t p; if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT ":_search", &x, &limit)) return NULL; if (!bitarray_Check(x)) { PyErr_SetString(PyExc_TypeError, "bitarray expected for search"); return NULL; } xa = (bitarrayobject *) x; if (xa->nbits == 0) { PyErr_SetString(PyExc_ValueError, "can't search for empty bitarray"); return NULL; } list = PyList_New(0); if (list == NULL) return NULL; if (xa->nbits > self->nbits || limit == 0) return list; p = 0; while (1) { p = search(self, xa, p); if (p < 0) break; item = PyLong_FromLongLong(p); p++; if (item == NULL || PyList_Append(list, item) < 0) { Py_XDECREF(item); Py_XDECREF(list); return NULL; } Py_DECREF(item); if (limit > 0 && PyList_Size(list) >= limit) break; } return list; } PyDoc_STRVAR(search_doc, "search(bitarray, [limit]) -> list\n\ \n\ Searches for the given a bitarray in self, and returns the start positions\n\ where bitarray matches self as a list.\n\ The optional argument limits the number of search results to the integer\n\ specified. By default, all search results are returned."); static PyObject * bitarray_buffer_info(bitarrayobject *self) { PyObject *res, *ptr; ptr = PyLong_FromVoidPtr(self->ob_item), res = Py_BuildValue("OLsiL", ptr, (idx_t) Py_SIZE(self), ENDIAN_STR(self), (int) (BITS(Py_SIZE(self)) - self->nbits), (idx_t) self->allocated); Py_DECREF(ptr); return res; } PyDoc_STRVAR(buffer_info_doc, "buffer_info() -> tuple\n\ \n\ Return a tuple (address, size, endianness, unused, allocated) giving the\n\ current memory address, the size (in bytes) used to hold the bitarray's\n\ contents, the bit endianness as a string, the number of unused bits\n\ (e.g. a bitarray of length 11 will have a buffer size of 2 bytes and\n\ 5 unused bits), and the size (in bytes) of the allocated memory."); static PyObject * bitarray_endian(bitarrayobject *self) { #ifdef IS_PY3K return PyUnicode_FromString(ENDIAN_STR(self)); #else return PyString_FromString(ENDIAN_STR(self)); #endif } PyDoc_STRVAR(endian_doc, "endian() -> string\n\ \n\ Return the bit endianness as a string (either 'little' or 'big')."); static PyObject * bitarray_append(bitarrayobject *self, PyObject *v) { if (append_item(self, v) < 0) return NULL; Py_RETURN_NONE; } PyDoc_STRVAR(append_doc, "append(item)\n\ \n\ Append the value bool(item) to the end of the bitarray."); static PyObject * bitarray_all(bitarrayobject *self) { if (findfirst(self, 0, 0, -1) >= 0) Py_RETURN_FALSE; else Py_RETURN_TRUE; } PyDoc_STRVAR(all_doc, "all() -> bool\n\ \n\ Returns True when all bits in the array are True."); static PyObject * bitarray_any(bitarrayobject *self) { if (findfirst(self, 1, 0, -1) >= 0) Py_RETURN_TRUE; else Py_RETURN_FALSE; } PyDoc_STRVAR(any_doc, "any() -> bool\n\ \n\ Returns True when any bit in the array is True."); static PyObject * bitarray_reduce(bitarrayobject *self) { PyObject *dict, *repr = NULL, *result = NULL; char *str; dict = PyObject_GetAttrString((PyObject *) self, "__dict__"); if (dict == NULL) { PyErr_Clear(); dict = Py_None; Py_INCREF(dict); } /* the first byte indicates the number of unused bits at the end, and the rest of the bytes consist of the raw binary data */ str = PyMem_Malloc(Py_SIZE(self) + 1); if (str == NULL) { PyErr_NoMemory(); goto error; } str[0] = (char) setunused(self); memcpy(str + 1, self->ob_item, Py_SIZE(self)); repr = PyString_FromStringAndSize(str, Py_SIZE(self) + 1); if (repr == NULL) goto error; PyMem_Free((void *) str); result = Py_BuildValue("O(Os)O", Py_TYPE(self), repr, ENDIAN_STR(self), dict); error: Py_DECREF(dict); Py_XDECREF(repr); return result; } PyDoc_STRVAR(reduce_doc, "state information for pickling"); static PyObject * bitarray_reverse(bitarrayobject *self) { PyObject *t; /* temp bitarray to store lower half of self */ idx_t i, m; if (self->nbits < 2) Py_RETURN_NONE; t = newbitarrayobject(Py_TYPE(self), self->nbits / 2, self->endian); if (t == NULL) return NULL; #define tt ((bitarrayobject *) t) /* copy lower half of array into temporary array */ memcpy(tt->ob_item, self->ob_item, Py_SIZE(tt)); m = self->nbits - 1; /* reverse the upper half onto the lower half. */ for (i = 0; i < tt->nbits; i++) setbit(self, i, GETBIT(self, m - i)); /* revert the stored away lower half onto the upper half. */ for (i = 0; i < tt->nbits; i++) setbit(self, m - i, GETBIT(tt, i)); #undef tt Py_DECREF(t); Py_RETURN_NONE; } PyDoc_STRVAR(reverse_doc, "reverse()\n\ \n\ Reverse the order of bits in the array (in-place)."); static PyObject * bitarray_fill(bitarrayobject *self) { long p; p = setunused(self); self->nbits += p; #ifdef IS_PY3K return PyLong_FromLong(p); #else return PyInt_FromLong(p); #endif } PyDoc_STRVAR(fill_doc, "fill() -> int\n\ \n\ Adds zeros to the end of the bitarray, such that the length of the bitarray\n\ is not a multiple of 8. Returns the number of bits added (0..7)."); static PyObject * bitarray_invert(bitarrayobject *self) { invert(self); Py_RETURN_NONE; } PyDoc_STRVAR(invert_doc, "invert()\n\ \n\ Invert all bits in the array (in-place),\n\ i.e. convert each 1-bit into a 0-bit and vice versa."); static PyObject * bitarray_bytereverse(bitarrayobject *self) { bytereverse(self); Py_RETURN_NONE; } PyDoc_STRVAR(bytereverse_doc, "bytereverse()\n\ \n\ For all bytes representing the bitarray, reverse the bit order (in-place).\n\ Note: This method changes the actual machine values representing the\n\ bitarray; it does not change the endianness of the bitarray object."); static PyObject * bitarray_setall(bitarrayobject *self, PyObject *v) { long vi; vi = PyObject_IsTrue(v); if (vi < 0) return NULL; memset(self->ob_item, vi ? 0xff : 0x00, Py_SIZE(self)); Py_RETURN_NONE; } PyDoc_STRVAR(setall_doc, "setall(value)\n\ \n\ Set all bits in the bitarray to bool(value)."); static PyObject * bitarray_sort(bitarrayobject *self, PyObject *args, PyObject *kwds) { idx_t n, n0, n1; int reverse = 0; static char* kwlist[] = {"reverse", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:sort", kwlist, &reverse)) return NULL; n = self->nbits; n1 = count(self); if (reverse) { setrange(self, 0, n1, 1); setrange(self, n1, n, 0); } else { n0 = n - n1; setrange(self, 0, n0, 0); setrange(self, n0, n, 1); } Py_RETURN_NONE; } PyDoc_STRVAR(sort_doc, "sort(reverse=False)\n\ \n\ Sort the bits in the array (in-place)."); #ifdef IS_PY3K static PyObject * bitarray_fromfile(bitarrayobject *self, PyObject *args) { PyObject *f; Py_ssize_t newsize, nbytes = -1; PyObject *reader, *rargs, *result; size_t nread; idx_t t, p; if (!PyArg_ParseTuple(args, "O|n:fromfile", &f, &nbytes)) return NULL; if (nbytes == 0) Py_RETURN_NONE; reader = PyObject_GetAttrString(f, "read"); if (reader == NULL) { PyErr_SetString(PyExc_TypeError, "first argument must be an open file"); return NULL; } rargs = Py_BuildValue("(n)", nbytes); if (rargs == NULL) { Py_DECREF(reader); return NULL; } result = PyEval_CallObject(reader, rargs); if (result != NULL) { if (!PyBytes_Check(result)) { PyErr_SetString(PyExc_TypeError, "first argument must be an open file"); Py_DECREF(result); Py_DECREF(rargs); Py_DECREF(reader); return NULL; } nread = PyBytes_Size(result); t = self->nbits; p = setunused(self); self->nbits += p; newsize = Py_SIZE(self) + nread; if (resize(self, BITS(newsize)) < 0) { Py_DECREF(result); Py_DECREF(rargs); Py_DECREF(reader); return NULL; } memcpy(self->ob_item + (Py_SIZE(self) - nread), PyBytes_AS_STRING(result), nread); if (nbytes > 0 && nread < (size_t) nbytes) { PyErr_SetString(PyExc_EOFError, "not enough items read"); return NULL; } if (delete_n(self, t, p) < 0) return NULL; Py_DECREF(result); } Py_DECREF(rargs); Py_DECREF(reader); Py_RETURN_NONE; } #else static PyObject * bitarray_fromfile(bitarrayobject *self, PyObject *args) { PyObject *f; FILE *fp; Py_ssize_t newsize, nbytes = -1; size_t nread; idx_t t, p; long cur; if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT ":fromfile", &f, &nbytes)) return NULL; fp = PyFile_AsFile(f); if (fp == NULL) { PyErr_SetString(PyExc_TypeError, "first argument must be an open file"); return NULL; } /* find number of bytes till EOF */ if (nbytes < 0) { if ((cur = ftell(fp)) < 0) goto EOFerror; if (fseek(fp, 0L, SEEK_END) || (nbytes = ftell(fp)) < 0) goto EOFerror; nbytes -= cur; if (fseek(fp, cur, SEEK_SET)) { EOFerror: PyErr_SetString(PyExc_EOFError, "could not find EOF"); return NULL; } } if (nbytes == 0) Py_RETURN_NONE; /* file exists and there are more than zero bytes to read */ t = self->nbits; p = setunused(self); self->nbits += p; newsize = Py_SIZE(self) + nbytes; if (resize(self, BITS(newsize)) < 0) return NULL; nread = fread(self->ob_item + (Py_SIZE(self) - nbytes), 1, nbytes, fp); if (nread < (size_t) nbytes) { newsize -= nbytes - nread; if (resize(self, BITS(newsize)) < 0) return NULL; PyErr_SetString(PyExc_EOFError, "not enough items in file"); return NULL; } if (delete_n(self, t, p) < 0) return NULL; Py_RETURN_NONE; } #endif PyDoc_STRVAR(fromfile_doc, "fromfile(f, [n])\n\ \n\ Read n bytes from the file object f and append them to the bitarray\n\ interpreted as machine values. When n is omitted, as many bytes are\n\ read until EOF is reached."); #ifdef IS_PY3K static PyObject * bitarray_tofile(bitarrayobject *self, PyObject *f) { PyObject *writer, *value, *args, *result; if (f == NULL) { PyErr_SetString(PyExc_TypeError, "writeobject with NULL file"); return NULL; } writer = PyObject_GetAttrString(f, "write"); if (writer == NULL) return NULL; setunused(self); value = PyBytes_FromStringAndSize(self->ob_item, Py_SIZE(self)); if (value == NULL) { Py_DECREF(writer); return NULL; } args = PyTuple_Pack(1, value); if (args == NULL) { Py_DECREF(value); Py_DECREF(writer); return NULL; } result = PyEval_CallObject(writer, args); Py_DECREF(args); Py_DECREF(value); Py_DECREF(writer); if (result == NULL) { PyErr_SetString(PyExc_TypeError, "open file expected"); return NULL; } Py_DECREF(result); Py_RETURN_NONE; } #else static PyObject * bitarray_tofile(bitarrayobject *self, PyObject *f) { FILE *fp; fp = PyFile_AsFile(f); if (fp == NULL) { PyErr_SetString(PyExc_TypeError, "open file expected"); return NULL; } if (Py_SIZE(self) == 0) Py_RETURN_NONE; setunused(self); if (fwrite(self->ob_item, 1, Py_SIZE(self), fp) != (size_t) Py_SIZE(self)) { PyErr_SetFromErrno(PyExc_IOError); clearerr(fp); return NULL; } Py_RETURN_NONE; } #endif PyDoc_STRVAR(tofile_doc, "tofile(f)\n\ \n\ Write all bits (as machine values) to the file object f.\n\ When the length of the bitarray is not a multiple of 8,\n\ the remaining bits (1..7) are set to 0."); static PyObject * bitarray_tolist(bitarrayobject *self) { PyObject *list; idx_t i; list = PyList_New((Py_ssize_t) self->nbits); if (list == NULL) return NULL; for (i = 0; i < self->nbits; i++) if (PyList_SetItem(list, (Py_ssize_t) i, PyBool_FromLong(GETBIT(self, i))) < 0) return NULL; return list; } PyDoc_STRVAR(tolist_doc, "tolist() -> list\n\ \n\ Return an ordinary list with the items in the bitarray.\n\ Note that the list object being created will require 32 or 64 times more\n\ memory than the bitarray object, which may cause a memory error if the\n\ bitarray is very large.\n\ Also note that to extend a bitarray with elements from a list,\n\ use the extend method."); static PyObject * bitarray_frombytes(bitarrayobject *self, PyObject *string) { idx_t t, p; if (!PyString_Check(string)) { PyErr_SetString(PyExc_TypeError, "byte string expected"); return NULL; } t = self->nbits; p = setunused(self); self->nbits += p; if (extend_rawstring(self, string) < 0) return NULL; if (delete_n(self, t, p) < 0) return NULL; Py_RETURN_NONE; } PyDoc_STRVAR(frombytes_doc, "frombytes(bytes)\n\ \n\ Append from a byte string, interpreted as machine values."); static PyObject * bitarray_tobytes(bitarrayobject *self) { setunused(self); return PyString_FromStringAndSize(self->ob_item, Py_SIZE(self)); } PyDoc_STRVAR(tobytes_doc, "tobytes() -> bytes\n\ \n\ Return the byte representation of the bitarray.\n\ When the length of the bitarray is not a multiple of 8, the few remaining\n\ bits (1..7) are set to 0."); static PyObject * bitarray_to01(bitarrayobject *self) { #ifdef IS_PY3K PyObject *string, *unpacked; unpacked = unpack(self, '0', '1'); string = PyUnicode_FromEncodedObject(unpacked, NULL, NULL); Py_DECREF(unpacked); return string; #else return unpack(self, '0', '1'); #endif } PyDoc_STRVAR(to01_doc, "to01() -> string\n\ \n\ Return a string containing '0's and '1's, representing the bits in the\n\ bitarray object.\n\ Note: To extend a bitarray from a string containing '0's and '1's,\n\ use the extend method."); static PyObject * bitarray_unpack(bitarrayobject *self, PyObject *args, PyObject *kwds) { char zero = 0x00, one = 0xff; static char* kwlist[] = {"zero", "one", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|cc:unpack", kwlist, &zero, &one)) return NULL; return unpack(self, zero, one); } PyDoc_STRVAR(unpack_doc, "unpack(zero=b'\\x00', one=b'\\xff') -> bytes\n\ \n\ Return a byte string containing one character for each bit in the bitarray,\n\ using the specified mapping.\n\ See also the pack method."); static PyObject * bitarray_pack(bitarrayobject *self, PyObject *string) { if (!PyString_Check(string)) { PyErr_SetString(PyExc_TypeError, "byte string expected"); return NULL; } if (extend_string(self, string, STR_RAW) < 0) return NULL; Py_RETURN_NONE; } PyDoc_STRVAR(pack_doc, "pack(bytes)\n\ \n\ Extend the bitarray from a byte string, where each characters corresponds to\n\ a single bit. The character b'\\x00' maps to bit 0 and all other characters\n\ map to bit 1.\n\ This method, as well as the unpack method, are meant for efficient\n\ transfer of data between bitarray objects to other python objects\n\ (for example NumPy's ndarray object) which have a different view of memory."); static PyObject * bitarray_repr(bitarrayobject *self) { PyObject *string; #ifdef IS_PY3K PyObject *decoded; #endif if (self->nbits == 0) { string = PyString_FromString("bitarray()"); if (string == NULL) return NULL; } else { string = PyString_FromString("bitarray(\'"); if (string == NULL) return NULL; PyString_ConcatAndDel(&string, unpack(self, '0', '1')); PyString_ConcatAndDel(&string, PyString_FromString("\')")); } #ifdef IS_PY3K decoded = PyUnicode_FromEncodedObject(string, NULL, NULL); Py_DECREF(string); string = decoded; #endif return string; } static PyObject * bitarray_insert(bitarrayobject *self, PyObject *args) { idx_t i; PyObject *v; if (!PyArg_ParseTuple(args, "LO:insert", &i, &v)) return NULL; if (i < 0) { i += self->nbits; if (i < 0) i = 0; } if (i > self->nbits) i = self->nbits; if (insert_n(self, i, 1) < 0) return NULL; if (set_item(self, i, v) < 0) return NULL; Py_RETURN_NONE; } PyDoc_STRVAR(insert_doc, "insert(i, item)\n\ \n\ Insert bool(item) into the bitarray before position i."); static PyObject * bitarray_pop(bitarrayobject *self, PyObject *args) { idx_t i = -1; long vi; if (!PyArg_ParseTuple(args, "|L:pop", &i)) return NULL; if (self->nbits == 0) { /* special case -- most common failure cause */ PyErr_SetString(PyExc_IndexError, "pop from empty bitarray"); return NULL; } if (i < 0) i += self->nbits; if (i < 0 || i >= self->nbits) { PyErr_SetString(PyExc_IndexError, "pop index out of range"); return NULL; } vi = GETBIT(self, i); if (delete_n(self, i, 1) < 0) return NULL; return PyBool_FromLong(vi); } PyDoc_STRVAR(pop_doc, "pop([i]) -> item\n\ \n\ Return the i-th (default last) element and delete it from the bitarray.\n\ Raises IndexError if bitarray is empty or index is out of range."); static PyObject * bitarray_remove(bitarrayobject *self, PyObject *v) { idx_t i; long vi; vi = PyObject_IsTrue(v); if (vi < 0) return NULL; i = findfirst(self, vi, 0, -1); if (i < 0) { PyErr_SetString(PyExc_ValueError, "remove(x): x not in bitarray"); return NULL; } if (delete_n(self, i, 1) < 0) return NULL; Py_RETURN_NONE; } PyDoc_STRVAR(remove_doc, "remove(item)\n\ \n\ Remove the first occurrence of bool(item) in the bitarray.\n\ Raises ValueError if item is not present."); /* --------- special methods ----------- */ static PyObject * bitarray_getitem(bitarrayobject *self, PyObject *a) { PyObject *res; idx_t start, stop, step, slicelength, j, i = 0; if (IS_INDEX(a)) { if (getIndex(a, &i) < 0) return NULL; if (i < 0) i += self->nbits; if (i < 0 || i >= self->nbits) { PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); return NULL; } return PyBool_FromLong(GETBIT(self, i)); } if (PySlice_Check(a)) { if (slice_GetIndicesEx((PySliceObject *) a, self->nbits, &start, &stop, &step, &slicelength) < 0) { return NULL; } res = newbitarrayobject(Py_TYPE(self), slicelength, self->endian); if (res == NULL) return NULL; for (i = 0, j = start; i < slicelength; i++, j += step) setbit((bitarrayobject *) res, i, GETBIT(self, j)); return res; } PyErr_SetString(PyExc_TypeError, "index or slice expected"); return NULL; } /* Sets the elements, specified by slice, in self to the value(s) given by v which is either a bitarray or a boolean. */ static int setslice(bitarrayobject *self, PySliceObject *slice, PyObject *v) { idx_t start, stop, step, slicelength, j, i = 0; if (slice_GetIndicesEx(slice, self->nbits, &start, &stop, &step, &slicelength) < 0) return -1; if (bitarray_Check(v)) { #define vv ((bitarrayobject *) v) if (vv->nbits == slicelength) { for (i = 0, j = start; i < slicelength; i++, j += step) setbit(self, j, GETBIT(vv, i)); return 0; } if (step != 1) { char buff[256]; sprintf(buff, "attempt to assign sequence of size %lld " "to extended slice of size %lld", vv->nbits, (idx_t) slicelength); PyErr_SetString(PyExc_ValueError, buff); return -1; } /* make self bigger or smaller */ if (vv->nbits > slicelength) { if (insert_n(self, start, vv->nbits - slicelength) < 0) return -1; } else { if (delete_n(self, start, slicelength - vv->nbits) < 0) return -1; } /* copy the new values into self */ copy_n(self, start, vv, 0, vv->nbits); #undef vv return 0; } if (IS_INT_OR_BOOL(v)) { int vi; vi = IntBool_AsInt(v); if (vi < 0) return -1; for (i = 0, j = start; i < slicelength; i++, j += step) setbit(self, j, vi); return 0; } PyErr_SetString(PyExc_IndexError, "bitarray or bool expected for slice assignment"); return -1; } static PyObject * bitarray_setitem(bitarrayobject *self, PyObject *args) { PyObject *a, *v; idx_t i = 0; if (!PyArg_ParseTuple(args, "OO:__setitem__", &a, &v)) return NULL; if (IS_INDEX(a)) { if (getIndex(a, &i) < 0) return NULL; if (i < 0) i += self->nbits; if (i < 0 || i >= self->nbits) { PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); return NULL; } if (set_item(self, i, v) < 0) return NULL; Py_RETURN_NONE; } if (PySlice_Check(a)) { if (setslice(self, (PySliceObject *) a, v) < 0) return NULL; Py_RETURN_NONE; } PyErr_SetString(PyExc_TypeError, "index or slice expected"); return NULL; } static PyObject * bitarray_delitem(bitarrayobject *self, PyObject *a) { idx_t start, stop, step, slicelength, j, i = 0; if (IS_INDEX(a)) { if (getIndex(a, &i) < 0) return NULL; if (i < 0) i += self->nbits; if (i < 0 || i >= self->nbits) { PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); return NULL; } if (delete_n(self, i, 1) < 0) return NULL; Py_RETURN_NONE; } if (PySlice_Check(a)) { if (slice_GetIndicesEx((PySliceObject *) a, self->nbits, &start, &stop, &step, &slicelength) < 0) { return NULL; } if (slicelength == 0) Py_RETURN_NONE; if (step < 0) { stop = start + 1; start = stop + step * (slicelength - 1) - 1; step = -step; } if (step == 1) { assert(stop - start == slicelength); if (delete_n(self, start, slicelength) < 0) return NULL; Py_RETURN_NONE; } /* this is the only complicated part when step > 1 */ for (i = j = start; i < self->nbits; i++) if ((i - start) % step != 0 || i >= stop) { setbit(self, j, GETBIT(self, i)); j++; } if (resize(self, self->nbits - slicelength) < 0) return NULL; Py_RETURN_NONE; } PyErr_SetString(PyExc_TypeError, "index or slice expected"); return NULL; } /* ---------- number methods ---------- */ static PyObject * bitarray_add(bitarrayobject *self, PyObject *other) { PyObject *res; res = bitarray_copy(self); if (extend_dispatch((bitarrayobject *) res, other) < 0) { Py_DECREF(res); return NULL; } return res; } static PyObject * bitarray_iadd(bitarrayobject *self, PyObject *other) { if (extend_dispatch(self, other) < 0) return NULL; Py_INCREF(self); return (PyObject *) self; } static PyObject * bitarray_mul(bitarrayobject *self, PyObject *v) { PyObject *res; idx_t vi = 0; if (!IS_INDEX(v)) { PyErr_SetString(PyExc_TypeError, "integer value expected for bitarray repetition"); return NULL; } if (getIndex(v, &vi) < 0) return NULL; res = bitarray_copy(self); if (repeat((bitarrayobject *) res, vi) < 0) { Py_DECREF(res); return NULL; } return res; } static PyObject * bitarray_imul(bitarrayobject *self, PyObject *v) { idx_t vi = 0; if (!IS_INDEX(v)) { PyErr_SetString(PyExc_TypeError, "integer value expected for in-place bitarray repetition"); return NULL; } if (getIndex(v, &vi) < 0) return NULL; if (repeat(self, vi) < 0) return NULL; Py_INCREF(self); return (PyObject *) self; } static PyObject * bitarray_cpinvert(bitarrayobject *self) { PyObject *res; res = bitarray_copy(self); invert((bitarrayobject *) res); return res; } #define BITWISE_FUNC(oper) \ static PyObject * \ bitarray_ ## oper (bitarrayobject *self, PyObject *other) \ { \ PyObject *res; \ \ res = bitarray_copy(self); \ if (bitwise((bitarrayobject *) res, other, OP_ ## oper) < 0) { \ Py_DECREF(res); \ return NULL; \ } \ return res; \ } BITWISE_FUNC(and) BITWISE_FUNC(or) BITWISE_FUNC(xor) #define BITWISE_IFUNC(oper) \ static PyObject * \ bitarray_i ## oper (bitarrayobject *self, PyObject *other) \ { \ if (bitwise(self, other, OP_ ## oper) < 0) \ return NULL; \ Py_INCREF(self); \ return (PyObject *) self; \ } BITWISE_IFUNC(and) BITWISE_IFUNC(or) BITWISE_IFUNC(xor) /******************* variable length encoding and decoding ***************/ static PyObject * bitarray_encode(bitarrayobject *self, PyObject *args) { PyObject *codedict, *iterable, *iter, *symbol, *bits; if (!PyArg_ParseTuple(args, "OO:_encode", &codedict, &iterable)) return NULL; iter = PyObject_GetIter(iterable); if (iter == NULL) { PyErr_SetString(PyExc_TypeError, "iterable object expected"); return NULL; } /* extend self with the bitarrays from codedict */ while ((symbol = PyIter_Next(iter)) != NULL) { bits = PyDict_GetItem(codedict, symbol); Py_DECREF(symbol); if (bits == NULL) { PyErr_SetString(PyExc_ValueError, "symbol not in prefix code"); goto error; } if (extend_bitarray(self, (bitarrayobject *) bits) < 0) goto error; } Py_DECREF(iter); if (PyErr_Occurred()) return NULL; Py_RETURN_NONE; error: Py_DECREF(iter); return NULL; } PyDoc_STRVAR(encode_doc, "_encode(code, iterable)\n\ \n\ like the encode method without code checking"); /* return the leave node resulting from traversing the (binary) tree, or, when the iteration is finished, NULL */ static PyObject * tree_traverse(bitarrayobject *self, idx_t *indexp, PyObject *tree) { PyObject *subtree; long vi; if (*indexp == self->nbits) /* stop iterator */ return NULL; vi = GETBIT(self, *indexp); (*indexp)++; subtree = PyList_GetItem(tree, vi); if (PyList_Check(subtree) && PyList_Size(subtree) == 2) return tree_traverse(self, indexp, subtree); else return subtree; } #define IS_EMPTY_LIST(x) (PyList_Check(x) && PyList_Size(x) == 0) static PyObject * bitarray_decode(bitarrayobject *self, PyObject *tree) { PyObject *symbol, *list; idx_t index = 0; list = PyList_New(0); if (list == NULL) return NULL; /* traverse binary tree and append symbols to the result list */ while ((symbol = tree_traverse(self, &index, tree)) != NULL) { if (IS_EMPTY_LIST(symbol)) { PyErr_SetString(PyExc_ValueError, "prefix code does not match data in bitarray"); goto error; } if (PyList_Append(list, symbol) < 0) goto error; } return list; error: Py_DECREF(list); return NULL; } PyDoc_STRVAR(decode_doc, "_decode(tree) -> list\n\ \n\ Given a tree, decode the content of the bitarray and return the list of\n\ symbols."); /*********************** (Bitarray) Decode Iterator *********************/ typedef struct { PyObject_HEAD bitarrayobject *bao; /* bitarray we're searching in */ PyObject *tree; /* prefix tree containing symbols */ idx_t index; /* current index in bitarray */ } decodeiterobject; static PyTypeObject DecodeIter_Type; #define DecodeIter_Check(op) PyObject_TypeCheck(op, &DecodeIter_Type) /* create a new initialized bitarray search iterator object */ static PyObject * bitarray_iterdecode(bitarrayobject *self, PyObject *tree) { decodeiterobject *it; /* iterator to be returned */ it = PyObject_GC_New(decodeiterobject, &DecodeIter_Type); if (it == NULL) return NULL; Py_INCREF(self); it->bao = self; Py_INCREF(tree); it->tree = tree; it->index = 0; PyObject_GC_Track(it); return (PyObject *) it; } PyDoc_STRVAR(iterdecode_doc, "_iterdecode(tree) -> iterator\n\ \n\ Given a tree, decode the content of the bitarray and iterate over the\n\ symbols."); static PyObject * decodeiter_next(decodeiterobject *it) { PyObject *symbol; assert(DecodeIter_Check(it)); symbol = tree_traverse(it->bao, &(it->index), it->tree); if (symbol == NULL) /* stop iteration */ return NULL; if (IS_EMPTY_LIST(symbol)) { PyErr_SetString(PyExc_ValueError, "prefix code does not match data in bitarray"); return NULL; } Py_INCREF(symbol); return symbol; } static void decodeiter_dealloc(decodeiterobject *it) { PyObject_GC_UnTrack(it); Py_XDECREF(it->bao); Py_XDECREF(it->tree); PyObject_GC_Del(it); } static int decodeiter_traverse(decodeiterobject *it, visitproc visit, void *arg) { Py_VISIT(it->bao); return 0; } static PyTypeObject DecodeIter_Type = { #ifdef IS_PY3K PyVarObject_HEAD_INIT(&DecodeIter_Type, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ #endif "bitarraydecodeiterator", /* tp_name */ sizeof(decodeiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor) decodeiter_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ (traverseproc) decodeiter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc) decodeiter_next, /* tp_iternext */ 0, /* tp_methods */ }; /*********************** (Bitarray) Search Iterator *********************/ typedef struct { PyObject_HEAD bitarrayobject *bao; /* bitarray we're searching in */ bitarrayobject *xa; /* bitarray being searched for */ idx_t p; /* current search position */ } searchiterobject; static PyTypeObject SearchIter_Type; #define SearchIter_Check(op) PyObject_TypeCheck(op, &SearchIter_Type) /* create a new initialized bitarray search iterator object */ static PyObject * bitarray_itersearch(bitarrayobject *self, PyObject *x) { searchiterobject *it; /* iterator to be returned */ bitarrayobject *xa; if (!bitarray_Check(x)) { PyErr_SetString(PyExc_TypeError, "bitarray expected for itersearch"); return NULL; } xa = (bitarrayobject *) x; if (xa->nbits == 0) { PyErr_SetString(PyExc_ValueError, "can't search for empty bitarray"); return NULL; } it = PyObject_GC_New(searchiterobject, &SearchIter_Type); if (it == NULL) return NULL; Py_INCREF(self); it->bao = self; Py_INCREF(xa); it->xa = xa; it->p = 0; /* start search at position 0 */ PyObject_GC_Track(it); return (PyObject *) it; } PyDoc_STRVAR(itersearch_doc, "itersearch(bitarray) -> iterator\n\ \n\ Searches for the given a bitarray in self, and return an iterator over\n\ the start positions where bitarray matches self."); static PyObject * searchiter_next(searchiterobject *it) { idx_t p; assert(SearchIter_Check(it)); p = search(it->bao, it->xa, it->p); if (p < 0) /* no more positions -- stop iteration */ return NULL; it->p = p + 1; /* next search position */ return PyLong_FromLongLong(p); } static void searchiter_dealloc(searchiterobject *it) { PyObject_GC_UnTrack(it); Py_XDECREF(it->bao); Py_XDECREF(it->xa); PyObject_GC_Del(it); } static int searchiter_traverse(searchiterobject *it, visitproc visit, void *arg) { Py_VISIT(it->bao); return 0; } static PyTypeObject SearchIter_Type = { #ifdef IS_PY3K PyVarObject_HEAD_INIT(&SearchIter_Type, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ #endif "bitarraysearchiterator", /* tp_name */ sizeof(searchiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor) searchiter_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ (traverseproc) searchiter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc) searchiter_next, /* tp_iternext */ 0, /* tp_methods */ }; /*************************** Method definitions *************************/ static PyMethodDef bitarray_methods[] = { {"all", (PyCFunction) bitarray_all, METH_NOARGS, all_doc}, {"any", (PyCFunction) bitarray_any, METH_NOARGS, any_doc}, {"append", (PyCFunction) bitarray_append, METH_O, append_doc}, {"buffer_info", (PyCFunction) bitarray_buffer_info, METH_NOARGS, buffer_info_doc}, {"bytereverse", (PyCFunction) bitarray_bytereverse, METH_NOARGS, bytereverse_doc}, {"copy", (PyCFunction) bitarray_copy, METH_NOARGS, copy_doc}, {"count", (PyCFunction) bitarray_count, METH_VARARGS, count_doc}, {"_decode", (PyCFunction) bitarray_decode, METH_O, decode_doc}, {"_iterdecode", (PyCFunction) bitarray_iterdecode, METH_O, iterdecode_doc}, {"_encode", (PyCFunction) bitarray_encode, METH_VARARGS, encode_doc}, {"endian", (PyCFunction) bitarray_endian, METH_NOARGS, endian_doc}, {"extend", (PyCFunction) bitarray_extend, METH_O, extend_doc}, {"fill", (PyCFunction) bitarray_fill, METH_NOARGS, fill_doc}, {"fromfile", (PyCFunction) bitarray_fromfile, METH_VARARGS, fromfile_doc}, {"frombytes", (PyCFunction) bitarray_frombytes, METH_O, frombytes_doc}, {"index", (PyCFunction) bitarray_index, METH_VARARGS, index_doc}, {"insert", (PyCFunction) bitarray_insert, METH_VARARGS, insert_doc}, {"invert", (PyCFunction) bitarray_invert, METH_NOARGS, invert_doc}, {"length", (PyCFunction) bitarray_length, METH_NOARGS, length_doc}, {"pack", (PyCFunction) bitarray_pack, METH_O, pack_doc}, {"pop", (PyCFunction) bitarray_pop, METH_VARARGS, pop_doc}, {"remove", (PyCFunction) bitarray_remove, METH_O, remove_doc}, {"reverse", (PyCFunction) bitarray_reverse, METH_NOARGS, reverse_doc}, {"setall", (PyCFunction) bitarray_setall, METH_O, setall_doc}, {"search", (PyCFunction) bitarray_search, METH_VARARGS, search_doc}, {"itersearch", (PyCFunction) bitarray_itersearch, METH_O, itersearch_doc}, {"sort", (PyCFunction) bitarray_sort, METH_VARARGS | METH_KEYWORDS, sort_doc}, {"tofile", (PyCFunction) bitarray_tofile, METH_O, tofile_doc}, {"tolist", (PyCFunction) bitarray_tolist, METH_NOARGS, tolist_doc}, {"tobytes", (PyCFunction) bitarray_tobytes, METH_NOARGS, tobytes_doc}, {"to01", (PyCFunction) bitarray_to01, METH_NOARGS, to01_doc}, {"unpack", (PyCFunction) bitarray_unpack, METH_VARARGS | METH_KEYWORDS, unpack_doc}, /* special methods */ {"__copy__", (PyCFunction) bitarray_copy, METH_NOARGS, copy_doc}, {"__deepcopy__", (PyCFunction) bitarray_copy, METH_O, copy_doc}, {"__len__", (PyCFunction) bitarray_length, METH_NOARGS, len_doc}, {"__contains__", (PyCFunction) bitarray_contains, METH_O, contains_doc}, {"__reduce__", (PyCFunction) bitarray_reduce, METH_NOARGS, reduce_doc}, /* slice methods */ {"__delitem__", (PyCFunction) bitarray_delitem, METH_O, 0}, {"__getitem__", (PyCFunction) bitarray_getitem, METH_O, 0}, {"__setitem__", (PyCFunction) bitarray_setitem, METH_VARARGS, 0}, /* number methods */ {"__add__", (PyCFunction) bitarray_add, METH_O, 0}, {"__iadd__", (PyCFunction) bitarray_iadd, METH_O, 0}, {"__mul__", (PyCFunction) bitarray_mul, METH_O, 0}, {"__rmul__", (PyCFunction) bitarray_mul, METH_O, 0}, {"__imul__", (PyCFunction) bitarray_imul, METH_O, 0}, {"__and__", (PyCFunction) bitarray_and, METH_O, 0}, {"__or__", (PyCFunction) bitarray_or, METH_O, 0}, {"__xor__", (PyCFunction) bitarray_xor, METH_O, 0}, {"__iand__", (PyCFunction) bitarray_iand, METH_O, 0}, {"__ior__", (PyCFunction) bitarray_ior, METH_O, 0}, {"__ixor__", (PyCFunction) bitarray_ixor, METH_O, 0}, {"__invert__", (PyCFunction) bitarray_cpinvert, METH_NOARGS, 0}, {NULL, NULL} /* sentinel */ }; static PyObject * bitarray_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *a; /* to be returned in some cases */ PyObject *initial = NULL; char *endian_str = NULL; int endian; static char* kwlist[] = {"initial", "endian", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Os:bitarray", kwlist, &initial, &endian_str)) return NULL; if (endian_str == NULL) { endian = DEFAULT_ENDIAN; /* use default value */ } else if (strcmp(endian_str, "little") == 0) { endian = 0; } else if (strcmp(endian_str, "big") == 0) { endian = 1; } else { PyErr_SetString(PyExc_ValueError, "endian must be 'little' or 'big'"); return NULL; } /* no arg or None */ if (initial == NULL || initial == Py_None) return newbitarrayobject(type, 0, endian); /* int, long */ if (IS_INDEX(initial)) { idx_t nbits = 0; if (getIndex(initial, &nbits) < 0) return NULL; if (nbits < 0) { PyErr_SetString(PyExc_ValueError, "cannot create bitarray with negative length"); return NULL; } return newbitarrayobject(type, nbits, endian); } /* from bitarray itself */ if (bitarray_Check(initial)) { #define np ((bitarrayobject *) initial) a = newbitarrayobject(type, np->nbits, endian_str == NULL ? np->endian : endian); if (a == NULL) return NULL; memcpy(((bitarrayobject *) a)->ob_item, np->ob_item, Py_SIZE(np)); #undef np return a; } /* string */ if (PyString_Check(initial)) { Py_ssize_t strlen; char *str; strlen = PyString_Size(initial); if (strlen == 0) /* empty string */ return newbitarrayobject(type, 0, endian); str = PyString_AsString(initial); if (0 <= str[0] && str[0] < 8) { /* when the first character is smaller than 8, it indicates the number of unused bits at the end, and rest of the bytes consist of the raw binary data, this is used for pickling */ if (strlen == 1 && str[0] > 0) { PyErr_Format(PyExc_ValueError, "did not expect 0x0%d", (int) str[0]); return NULL; } a = newbitarrayobject(type, BITS(strlen - 1) - ((idx_t) str[0]), endian); if (a == NULL) return NULL; memcpy(((bitarrayobject *) a)->ob_item, str + 1, strlen - 1); return a; } } /* leave remaining type dispatch to the extend method */ a = newbitarrayobject(type, 0, endian); if (a == NULL) return NULL; if (extend_dispatch((bitarrayobject *) a, initial) < 0) { Py_DECREF(a); return NULL; } return a; } static PyObject * richcompare(PyObject *v, PyObject *w, int op) { int cmp, vi, wi; idx_t i, vs, ws; if (!bitarray_Check(v) || !bitarray_Check(w)) { Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } #define va ((bitarrayobject *) v) #define wa ((bitarrayobject *) w) vs = va->nbits; ws = wa->nbits; if (vs != ws) { /* shortcut for EQ/NE: if sizes differ, the bitarrays differ */ if (op == Py_EQ) Py_RETURN_FALSE; if (op == Py_NE) Py_RETURN_TRUE; } /* to avoid uninitialized warning for some compilers */ vi = wi = 0; /* search for the first index where items are different */ for (i = 0; i < vs && i < ws; i++) { vi = GETBIT(va, i); wi = GETBIT(wa, i); if (vi != wi) { /* we have an item that differs -- first, shortcut for EQ/NE */ if (op == Py_EQ) Py_RETURN_FALSE; if (op == Py_NE) Py_RETURN_TRUE; /* compare the final item using the proper operator */ switch (op) { case Py_LT: cmp = vi < wi; break; case Py_LE: cmp = vi <= wi; break; case Py_EQ: cmp = vi == wi; break; case Py_NE: cmp = vi != wi; break; case Py_GT: cmp = vi > wi; break; case Py_GE: cmp = vi >= wi; break; default: return NULL; /* cannot happen */ } return PyBool_FromLong((long) cmp); } } #undef va #undef wa /* no more items to compare -- compare sizes */ switch (op) { case Py_LT: cmp = vs < ws; break; case Py_LE: cmp = vs <= ws; break; case Py_EQ: cmp = vs == ws; break; case Py_NE: cmp = vs != ws; break; case Py_GT: cmp = vs > ws; break; case Py_GE: cmp = vs >= ws; break; default: return NULL; /* cannot happen */ } return PyBool_FromLong((long) cmp); } /************************** Bitarray Iterator **************************/ typedef struct { PyObject_HEAD bitarrayobject *bao; /* bitarray we're iterating over */ idx_t index; /* current index in bitarray */ } bitarrayiterobject; static PyTypeObject BitarrayIter_Type; #define BitarrayIter_Check(op) PyObject_TypeCheck(op, &BitarrayIter_Type) /* create a new initialized bitarray iterator object, this object is returned when calling item(a) */ static PyObject * bitarray_iter(bitarrayobject *self) { bitarrayiterobject *it; assert(bitarray_Check(self)); it = PyObject_GC_New(bitarrayiterobject, &BitarrayIter_Type); if (it == NULL) return NULL; Py_INCREF(self); it->bao = self; it->index = 0; PyObject_GC_Track(it); return (PyObject *) it; } static PyObject * bitarrayiter_next(bitarrayiterobject *it) { long vi; assert(BitarrayIter_Check(it)); if (it->index < it->bao->nbits) { vi = GETBIT(it->bao, it->index); it->index++; return PyBool_FromLong(vi); } return NULL; /* stop iteration */ } static void bitarrayiter_dealloc(bitarrayiterobject *it) { PyObject_GC_UnTrack(it); Py_XDECREF(it->bao); PyObject_GC_Del(it); } static int bitarrayiter_traverse(bitarrayiterobject *it, visitproc visit, void *arg) { Py_VISIT(it->bao); return 0; } static PyTypeObject BitarrayIter_Type = { #ifdef IS_PY3K PyVarObject_HEAD_INIT(&BitarrayIter_Type, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ #endif "bitarrayiterator", /* tp_name */ sizeof(bitarrayiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor) bitarrayiter_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ (traverseproc) bitarrayiter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc) bitarrayiter_next, /* tp_iternext */ 0, /* tp_methods */ }; /********************* Bitarray Buffer Interface ************************/ #ifdef WITH_BUFFER static Py_ssize_t bitarray_buffer_getreadbuf(bitarrayobject *self, Py_ssize_t index, const void **ptr) { if (index != 0) { PyErr_SetString(PyExc_SystemError, "accessing non-existent segment"); return -1; } *ptr = (void *) self->ob_item; return Py_SIZE(self); } static Py_ssize_t bitarray_buffer_getwritebuf(bitarrayobject *self, Py_ssize_t index, const void **ptr) { if (index != 0) { PyErr_SetString(PyExc_SystemError, "accessing non-existent segment"); return -1; } *ptr = (void *) self->ob_item; return Py_SIZE(self); } static Py_ssize_t bitarray_buffer_getsegcount(bitarrayobject *self, Py_ssize_t *lenp) { if (lenp) *lenp = Py_SIZE(self); return 1; } static Py_ssize_t bitarray_buffer_getcharbuf(bitarrayobject *self, Py_ssize_t index, const char **ptr) { if (index != 0) { PyErr_SetString(PyExc_SystemError, "accessing non-existent segment"); return -1; } *ptr = self->ob_item; return Py_SIZE(self); } static int bitarray_getbuffer(bitarrayobject *self, Py_buffer *view, int flags) { int ret; void *ptr; if (view == NULL) { self->ob_exports++; return 0; } ptr = (void *) self->ob_item; ret = PyBuffer_FillInfo(view, (PyObject *) self, ptr, Py_SIZE(self), 0, flags); if (ret >= 0) { self->ob_exports++; } return ret; } static void bitarray_releasebuffer(bitarrayobject *self, Py_buffer *view) { self->ob_exports--; } static PyBufferProcs bitarray_as_buffer = { (readbufferproc) bitarray_buffer_getreadbuf, (writebufferproc) bitarray_buffer_getwritebuf, (segcountproc) bitarray_buffer_getsegcount, (charbufferproc) bitarray_buffer_getcharbuf, (getbufferproc) bitarray_getbuffer, (releasebufferproc) bitarray_releasebuffer, }; #endif /* WITH_BUFFER */ /************************** Bitarray Type *******************************/ static PyTypeObject Bitarraytype = { #ifdef IS_PY3K PyVarObject_HEAD_INIT(&Bitarraytype, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ #endif "bitarray._bitarray", /* tp_name */ sizeof(bitarrayobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor) bitarray_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ (reprfunc) bitarray_repr, /* tp_repr */ 0, /* tp_as_number*/ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ #ifdef WITH_BUFFER &bitarray_as_buffer, /* tp_as_buffer */ #else 0, /* tp_as_buffer */ #endif Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS #ifdef WITH_BUFFER | Py_TPFLAGS_HAVE_NEWBUFFER #endif , /* tp_flags */ 0, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ richcompare, /* tp_richcompare */ offsetof(bitarrayobject, weakreflist), /* tp_weaklistoffset */ (getiterfunc) bitarray_iter, /* tp_iter */ 0, /* tp_iternext */ bitarray_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ PyType_GenericAlloc, /* tp_alloc */ bitarray_new, /* tp_new */ PyObject_Del, /* tp_free */ }; /*************************** Module functions **********************/ static PyObject * bitdiff(PyObject *self, PyObject *args) { PyObject *a, *b; Py_ssize_t i; idx_t res = 0; unsigned char c; if (!PyArg_ParseTuple(args, "OO:bitdiff", &a, &b)) return NULL; if (!(bitarray_Check(a) && bitarray_Check(b))) { PyErr_SetString(PyExc_TypeError, "bitarray object expected"); return NULL; } #define aa ((bitarrayobject *) a) #define bb ((bitarrayobject *) b) if (aa->nbits != bb->nbits) { PyErr_SetString(PyExc_ValueError, "bitarrays of equal length expected"); return NULL; } setunused(aa); setunused(bb); for (i = 0; i < Py_SIZE(aa); i++) { c = aa->ob_item[i] ^ bb->ob_item[i]; res += bitcount_lookup[c]; } #undef aa #undef bb return PyLong_FromLongLong(res); } PyDoc_STRVAR(bitdiff_doc, "bitdiff(a, b) -> int\n\ \n\ Return the difference between two bitarrays a and b.\n\ This is function does the same as (a ^ b).count(), but is more memory\n\ efficient, as no intermediate bitarray object gets created"); static PyObject * bits2bytes(PyObject *self, PyObject *v) { idx_t n = 0; if (!IS_INDEX(v)) { PyErr_SetString(PyExc_TypeError, "integer expected"); return NULL; } if (getIndex(v, &n) < 0) return NULL; if (n < 0) { PyErr_SetString(PyExc_ValueError, "positive value expected"); return NULL; } return PyLong_FromLongLong(BYTES(n)); } PyDoc_STRVAR(bits2bytes_doc, "bits2bytes(n) -> int\n\ \n\ Return the number of bytes necessary to store n bits."); static PyObject * sysinfo(void) { return Py_BuildValue("iiiiL", (int) sizeof(void *), (int) sizeof(size_t), (int) sizeof(Py_ssize_t), (int) sizeof(idx_t), (idx_t) PY_SSIZE_T_MAX); } PyDoc_STRVAR(sysinfo_doc, "_sysinfo() -> tuple\n\ \n\ tuple(sizeof(void *),\n\ sizeof(size_t),\n\ sizeof(Py_ssize_t),\n\ sizeof(idx_t),\n\ PY_SSIZE_T_MAX)"); static PyMethodDef module_functions[] = { {"bitdiff", (PyCFunction) bitdiff, METH_VARARGS, bitdiff_doc }, {"bits2bytes", (PyCFunction) bits2bytes, METH_O, bits2bytes_doc}, {"_sysinfo", (PyCFunction) sysinfo, METH_NOARGS, sysinfo_doc }, {NULL, NULL} /* sentinel */ }; /*********************** Install Module **************************/ #ifdef IS_PY3K static PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_bitarray", 0, -1, module_functions, }; PyMODINIT_FUNC PyInit__bitarray(void) #else PyMODINIT_FUNC init_bitarray(void) #endif { PyObject *m; Py_TYPE(&Bitarraytype) = &PyType_Type; Py_TYPE(&BitarrayIter_Type) = &PyType_Type; #ifdef IS_PY3K m = PyModule_Create(&moduledef); if (m == NULL) return NULL; #else m = Py_InitModule3("_bitarray", module_functions, 0); if (m == NULL) return; #endif Py_INCREF((PyObject *) &Bitarraytype); PyModule_AddObject(m, "_bitarray", (PyObject *) &Bitarraytype); #ifdef IS_PY3K return m; #endif } bitarray-0.8.1/bitarray/test_bitarray.py0000644000076500000240000020564412125715114020460 0ustar ilanstaff00000000000000""" Tests for bitarray Author: Ilan Schnell """ import os import sys import unittest import tempfile import shutil from random import randint is_py3k = bool(sys.version_info[0] == 3) if is_py3k: from io import StringIO else: from cStringIO import StringIO from bitarray import bitarray, bitdiff, bits2bytes, __version__ tests = [] if sys.version_info[:2] < (2, 6): def next(x): return x.next() def to_bytes(s): if is_py3k: return bytes(s.encode('latin1')) elif sys.version_info[:2] >= (2, 6): return bytes(s) else: return s class Util(object): def randombitarrays(self): for n in list(range(25)) + [randint(1000, 2000)]: a = bitarray(endian=['little', 'big'][randint(0, 1)]) a.frombytes(os.urandom(bits2bytes(n))) del a[n:] yield a def randomlists(self): for n in list(range(25)) + [randint(1000, 2000)]: yield [bool(randint(0, 1)) for d in range(n)] def rndsliceidx(self, length): if randint(0, 1): return None else: return randint(-2 * length, 2 * length - 1) def slicelen(self, r, length): return getIndicesEx(r, length)[-1] def check_obj(self, a): self.assertEqual(repr(type(a)), "") unused = 8 * a.buffer_info()[1] - len(a) self.assert_(0 <= unused < 8) self.assertEqual(unused, a.buffer_info()[3]) def assertEQUAL(self, a, b): self.assertEqual(a, b) self.assertEqual(a.endian(), b.endian()) self.check_obj(a) self.check_obj(b) def assertStopIteration(self, it): if is_py3k: return self.assertRaises(StopIteration, it.next) def getIndicesEx(r, length): if not isinstance(r, slice): raise TypeError("slice object expected") start = r.start stop = r.stop step = r.step if r.step is None: step = 1 else: if step == 0: raise ValueError("slice step cannot be zero") if step < 0: defstart = length - 1 defstop = -1 else: defstart = 0 defstop = length if r.start is None: start = defstart else: if start < 0: start += length if start < 0: start = [0, -1][step < 0] if start >= length: start = [length, length - 1][step < 0] if r.stop is None: stop = defstop else: if stop < 0: stop += length if stop < 0: stop = -1 if stop > length: stop = length if (step < 0 and stop >= length) or (step > 0 and start >= stop): slicelength = 0 elif step < 0: slicelength = (stop - start + 1) / step + 1 else: slicelength = (stop - start - 1) / step + 1 if slicelength < 0: slicelength = 0 return start, stop, step, slicelength # --------------------------------------------------------------------------- class TestsModuleFunctions(unittest.TestCase, Util): def test_bitdiff(self): a = bitarray('0011') b = bitarray('0101') self.assertEqual(bitdiff(a, b), 2) self.assertRaises(TypeError, bitdiff, a, '') self.assertRaises(TypeError, bitdiff, '1', b) self.assertRaises(TypeError, bitdiff, a, 4) b.append(1) self.assertRaises(ValueError, bitdiff, a, b) for n in list(range(50)) + [randint(1000, 2000)]: a = bitarray() a.frombytes(os.urandom(bits2bytes(n))) del a[n:] b = bitarray() b.frombytes(os.urandom(bits2bytes(n))) del b[n:] diff = sum(a[i] ^ b[i] for i in range(n)) self.assertEqual(bitdiff(a, b), diff) def test_bits2bytes(self): for arg in ['foo', [], None, {}]: self.assertRaises(TypeError, bits2bytes, arg) self.assertRaises(TypeError, bits2bytes) self.assertRaises(TypeError, bits2bytes, 1, 2) self.assertRaises(ValueError, bits2bytes, -1) self.assertRaises(ValueError, bits2bytes, -924) self.assertEqual(bits2bytes(0), 0) for n in range(1, 1000): self.assertEqual(bits2bytes(n), (n - 1) // 8 + 1) for n, m in [(0, 0), (1, 1), (2, 1), (7, 1), (8, 1), (9, 2), (10, 2), (15, 2), (16, 2), (64, 8), (65, 9), (0, 0), (1, 1), (65, 9), (2**29, 2**26), (2**31, 2**28), (2**32, 2**29), (2**34, 2**31), (2**34+793, 2**31+100), (2**35-8, 2**32-1), (2**62, 2**59), (2**63-8, 2**60-1)]: self.assertEqual(bits2bytes(n), m) tests.append(TestsModuleFunctions) # --------------------------------------------------------------------------- class CreateObjectTests(unittest.TestCase, Util): def test_noInitializer(self): a = bitarray() self.assertEqual(len(a), 0) self.assertEqual(a.tolist(), []) self.check_obj(a) def test_endian1(self): a = bitarray(endian='little') a.fromstring('A') self.assertEqual(a.endian(), 'little') self.check_obj(a) b = bitarray(endian='big') b.fromstring('A') self.assertEqual(b.endian(), 'big') self.check_obj(b) self.assertEqual(a.tostring(), b.tostring()) def test_endian2(self): a = bitarray(endian='little') a.fromstring(' ') self.assertEqual(a.endian(), 'little') self.check_obj(a) b = bitarray(endian='big') b.fromstring(' ') self.assertEqual(b.endian(), 'big') self.check_obj(b) self.assertEqual(a.tostring(), b.tostring()) self.assertRaises(TypeError, bitarray.__new__, bitarray, endian=0) self.assertRaises(ValueError, bitarray.__new__, bitarray, endian='') self.assertRaises(ValueError, bitarray.__new__, bitarray, endian='foo') def test_integers(self): for n in range(50): a = bitarray(n) self.assertEqual(len(a), n) self.check_obj(a) a = bitarray(int(n)) self.assertEqual(len(a), n) self.check_obj(a) self.assertRaises(ValueError, bitarray.__new__, bitarray, -1) self.assertRaises(ValueError, bitarray.__new__, bitarray, -924) def test_list(self): lst = ['foo', None, [1], {}] a = bitarray(lst) self.assertEqual(a.tolist(), [True, False, True, False]) self.check_obj(a) for n in range(50): lst = [bool(randint(0, 1)) for d in range(n)] a = bitarray(lst) self.assertEqual(a.tolist(), lst) self.check_obj(a) def test_tuple(self): tup = ('', True, [], {1:2}) a = bitarray(tup) self.assertEqual(a.tolist(), [False, True, False, True]) self.check_obj(a) for n in range(50): lst = [bool(randint(0, 1)) for d in range(n)] a = bitarray(tuple(lst)) self.assertEqual(a.tolist(), lst) self.check_obj(a) def test_iter1(self): for n in range(50): lst = [bool(randint(0, 1)) for d in range(n)] a = bitarray(iter(lst)) self.assertEqual(a.tolist(), lst) self.check_obj(a) def test_iter2(self): for lst in self.randomlists(): def foo(): for x in lst: yield x a = bitarray(foo()) self.assertEqual(a, bitarray(lst)) self.check_obj(a) def test_01(self): a = bitarray('0010111') self.assertEqual(a.tolist(), [0, 0, 1, 0, 1, 1, 1]) self.check_obj(a) for n in range(50): lst = [bool(randint(0, 1)) for d in range(n)] s = ''.join([['0', '1'][x] for x in lst]) a = bitarray(s) self.assertEqual(a.tolist(), lst) self.check_obj(a) self.assertRaises(ValueError, bitarray.__new__, bitarray, '01012100') def test_rawbytes(self): # this representation is used for pickling for s, r in [('\x00', ''), ('\x07\xff', '1'), ('\x03\xff', '11111'), ('\x01\x87\xda', '10000111' '1101101')]: self.assertEqual(bitarray(to_bytes(s), endian='big'), bitarray(r)) for i in range(1, 8): self.assertRaises(ValueError, bitarray.__new__, bitarray, to_bytes(chr(i))) def test_bitarray(self): for n in range(50): a = bitarray(n) b = bitarray(a) self.assert_(a is not b) self.assertEQUAL(a, b) for end in ('little', 'big'): a = bitarray(endian=end) c = bitarray(a) self.assertEqual(c.endian(), end) c = bitarray(a, endian='little') self.assertEqual(c.endian(), 'little') c = bitarray(a, endian='big') self.assertEqual(c.endian(), 'big') def test_None(self): self.assertEQUAL(bitarray(), bitarray(0)) self.assertEQUAL(bitarray(), bitarray(None)) def test_WrongArgs(self): self.assertRaises(TypeError, bitarray.__new__, bitarray, 'A', 42, 69) self.assertRaises(TypeError, bitarray.__new__, bitarray, Ellipsis) self.assertRaises(TypeError, bitarray.__new__, bitarray, slice(0)) self.assertRaises(TypeError, bitarray.__new__, bitarray, 2.345) self.assertRaises(TypeError, bitarray.__new__, bitarray, 4+3j) self.assertRaises(TypeError, bitarray.__new__, bitarray, '', 0, 42) self.assertRaises(ValueError, bitarray.__new__, bitarray, 0, 'foo') tests.append(CreateObjectTests) # --------------------------------------------------------------------------- class ToObjectsTests(unittest.TestCase, Util): def test_int(self): a = bitarray() self.assertRaises(TypeError, int, a) if not is_py3k: self.assertRaises(TypeError, long, a) def test_float(self): a = bitarray() self.assertRaises(TypeError, float, a) def test_complext(self): a = bitarray() self.assertRaises(TypeError, complex, a) def test_list(self): for a in self.randombitarrays(): self.assertEqual(list(a), a.tolist()) def test_tuple(self): for a in self.randombitarrays(): self.assertEqual(tuple(a), tuple(a.tolist())) tests.append(ToObjectsTests) # --------------------------------------------------------------------------- class MetaDataTests(unittest.TestCase): def test_buffer_info1(self): a = bitarray('0000111100001', endian='little') self.assertEqual(a.buffer_info()[1:4], (2, 'little', 3)) a = bitarray() self.assertRaises(TypeError, a.buffer_info, 42) bi = a.buffer_info() self.assert_(isinstance(bi, tuple)) self.assertEqual(len(bi), 5) self.assert_(isinstance(bi[0], int)) if is_py3k: self.assert_(isinstance(bi[1], int)) self.assert_(isinstance(bi[2], str)) self.assert_(isinstance(bi[3], int)) if is_py3k: self.assert_(isinstance(bi[4], int)) def test_buffer_info2(self): for n in range(50): bi = bitarray(n).buffer_info() self.assertEqual(bi[1], bits2bytes(n)) self.assertEqual(bi[3] + n, 8 * bi[1]) self.assert_(bi[4] >= bi[1]) def test_buffer_info3(self): a = bitarray(endian='little') self.assertEqual(a.buffer_info()[2], 'little') a = bitarray(endian='big') self.assertEqual(a.buffer_info()[2], 'big') def test_endian(self): a = bitarray(endian='little') self.assertEqual(a.endian(), 'little') a = bitarray(endian='big') self.assertEqual(a.endian(), 'big') def test_length(self): for n in range(1000): a = bitarray(n) self.assertEqual(len(a), n) self.assertEqual(a.length(), n) tests.append(MetaDataTests) # --------------------------------------------------------------------------- class SliceTests(unittest.TestCase, Util): def test_getitem1(self): a = bitarray() self.assertRaises(IndexError, a.__getitem__, 0) a.append(True) self.assertEqual(a[0], True) self.assertRaises(IndexError, a.__getitem__, 1) self.assertRaises(IndexError, a.__getitem__, -2) a.append(False) self.assertEqual(a[1], False) self.assertRaises(IndexError, a.__getitem__, 2) self.assertRaises(IndexError, a.__getitem__, -3) def test_getitem2(self): a = bitarray('1100010') for i, b in enumerate([True, True, False, False, False, True, False]): self.assertEqual(a[i], b) self.assertEqual(a[i-7], b) self.assertRaises(IndexError, a.__getitem__, 7) self.assertRaises(IndexError, a.__getitem__, -8) def test_getitem3(self): a = bitarray('0100000100001') self.assertEQUAL(a[:], a) self.assert_(a[:] is not a) aa = a.tolist() self.assertEQUAL(a[11:2:-3], bitarray(aa[11:2:-3])) self.check_obj(a[:]) self.assertRaises(ValueError, a.__getitem__, slice(None, None, 0)) self.assertRaises(TypeError, a.__getitem__, (1, 2)) def test_getitem4(self): for a in self.randombitarrays(): aa = a.tolist() la = len(a) if la == 0: continue for dum in range(10): step = self.rndsliceidx(la) if step == 0: step = None s = slice(self.rndsliceidx(la), self.rndsliceidx(la), step) self.assertEQUAL(a[s], bitarray(aa[s], endian=a.endian())) def test_setitem1(self): a = bitarray([False]) a[0] = 1 self.assertEqual(a.tolist(), [True]) a = bitarray(2) a[0] = 0 a[1] = 1 self.assertEqual(a.tolist(), [False, True]) a[-1] = 0 a[-2] = 1 self.assertEqual(a.tolist(), [True, False]) self.assertRaises(IndexError, a.__setitem__, 2, True) self.assertRaises(IndexError, a.__setitem__, -3, False) def test_setitem2(self): for a in self.randombitarrays(): la = len(a) if la == 0: continue i = randint(0, la - 1) aa = a.tolist() ida = id(a) val = bool(randint(0, 1)) a[i] = val aa[i] = val self.assertEqual(a.tolist(), aa) self.assertEqual(id(a), ida) self.check_obj(a) b = bitarray(la) b[0:la] = bitarray(a) self.assertEqual(a, b) self.assertNotEqual(id(a), id(b)) b = bitarray(la) b[:] = bitarray(a) self.assertEqual(a, b) self.assertNotEqual(id(a), id(b)) b = bitarray(la) b[::-1] = bitarray(a) self.assertEqual(a.tolist()[::-1], b.tolist()) def test_setitem3(self): a = bitarray(5 * [False]) a[0] = 1 a[-2] = 1 self.assertEqual(a, bitarray('10010')) self.assertRaises(IndexError, a.__setitem__, 5, 'foo') self.assertRaises(IndexError, a.__setitem__, -6, 'bar') def test_setitem4(self): for a in self.randombitarrays(): la = len(a) if la == 0: continue for dum in range(3): step = self.rndsliceidx(la) if step == 0: step = None s = slice(self.rndsliceidx(la), self.rndsliceidx(la), step) for b in self.randombitarrays(): if len(b) == self.slicelen(s, len(a)) or step is None: c = bitarray(a) d = c c[s] = b self.assert_(c is d) self.check_obj(c) cc = a.tolist() cc[s] = b.tolist() self.assertEqual(c, bitarray(cc)) def test_setslice_to_bool(self): a = bitarray('11111111') a[::2] = False self.assertEqual(a, bitarray('01010101')) a[4::] = True self.assertEqual(a, bitarray('01011111')) a[-2:] = False self.assertEqual(a, bitarray('01011100')) a[:2:] = True self.assertEqual(a, bitarray('11011100')) a[:] = True self.assertEqual(a, bitarray('11111111')) def test_setslice_to_int(self): a = bitarray('11111111') a[::2] = 0 self.assertEqual(a, bitarray('01010101')) a[4::] = 1 self.assertEqual(a, bitarray('01011111')) a.__setitem__(slice(-2, None, None), 0) self.assertEqual(a, bitarray('01011100')) self.assertRaises(ValueError, a.__setitem__, slice(None, None, 2), 3) self.assertRaises(ValueError, a.__setitem__, slice(None, 2, None), -1) def test_delitem1(self): a = bitarray('100110') del a[1] self.assertEqual(len(a), 5) del a[3] del a[-2] self.assertEqual(a, bitarray('100')) self.assertRaises(IndexError, a.__delitem__, 3) self.assertRaises(IndexError, a.__delitem__, -4) def test_delitem2(self): for a in self.randombitarrays(): la = len(a) if la == 0: continue for dum in range(10): step = self.rndsliceidx(la) if step == 0: step = None s = slice(self.rndsliceidx(la), self.rndsliceidx(la), step) c = bitarray(a) d = c del c[s] self.assert_(c is d) self.check_obj(c) cc = a.tolist() del cc[s] self.assertEQUAL(c, bitarray(cc, endian=c.endian())) tests.append(SliceTests) # --------------------------------------------------------------------------- class MiscTests(unittest.TestCase, Util): def test_instancecheck(self): a = bitarray('011') self.assertTrue(isinstance(a, bitarray)) self.assertFalse(isinstance(a, str)) def test_booleanness(self): self.assertEqual(bool(bitarray('')), False) self.assertEqual(bool(bitarray('0')), True) self.assertEqual(bool(bitarray('1')), True) def test_iterate(self): for lst in self.randomlists(): acc = [] for b in bitarray(lst): acc.append(b) self.assertEqual(acc, lst) def test_iter1(self): it = iter(bitarray('011')) self.assertEqual(next(it), False) self.assertEqual(next(it), True) self.assertEqual(next(it), True) self.assertStopIteration(it) def test_iter2(self): for a in self.randombitarrays(): aa = a.tolist() self.assertEqual(list(a), aa) self.assertEqual(list(iter(a)), aa) def test_assignment(self): a = bitarray('00110111001') a[1:3] = a[7:9] a[-1:] = a[:1] b = bitarray('01010111000') self.assertEqual(a, b) def test_compare(self): for a in self.randombitarrays(): aa = a.tolist() for b in self.randombitarrays(): bb = b.tolist() self.assertEqual(a == b, aa == bb) self.assertEqual(a != b, aa != bb) self.assertEqual(a <= b, aa <= bb) self.assertEqual(a < b, aa < bb) self.assertEqual(a >= b, aa >= bb) self.assertEqual(a > b, aa > bb) def test_subclassing(self): class ExaggeratingBitarray(bitarray): def __new__(cls, data, offset): return bitarray.__new__(cls, data) def __init__(self, data, offset): self.offset = offset def __getitem__(self, i): return bitarray.__getitem__(self, i - self.offset) for a in self.randombitarrays(): if len(a) == 0: continue b = ExaggeratingBitarray(a, 1234) for i in range(len(a)): self.assertEqual(a[i], b[i+1234]) def test_endianness1(self): a = bitarray(endian='little') a.frombytes(to_bytes('\x01')) self.assertEqual(a.to01(), '10000000') b = bitarray(endian='little') b.frombytes(to_bytes('\x80')) self.assertEqual(b.to01(), '00000001') c = bitarray(endian='big') c.frombytes(to_bytes('\x80')) self.assertEqual(c.to01(), '10000000') d = bitarray(endian='big') d.frombytes(to_bytes('\x01')) self.assertEqual(d.to01(), '00000001') self.assertEqual(a, c) self.assertEqual(b, d) def test_endianness2(self): a = bitarray(8, endian='little') a.setall(False) a[0] = True self.assertEqual(a.tobytes(), to_bytes('\x01')) a[1] = True self.assertEqual(a.tobytes(), to_bytes('\x03')) a.frombytes(to_bytes(' ')) self.assertEqual(a.tobytes(), to_bytes('\x03 ')) self.assertEqual(a.to01(), '1100000000000100') def test_endianness3(self): a = bitarray(8, endian='big') a.setall(False) a[7] = True self.assertEqual(a.tobytes(), to_bytes('\x01')) a[6] = True self.assertEqual(a.tobytes(), to_bytes('\x03')) a.frombytes(to_bytes(' ')) self.assertEqual(a.tobytes(), to_bytes('\x03 ')) self.assertEqual(a.to01(), '0000001100100000') def test_endianness4(self): a = bitarray('00100000', endian='big') self.assertEqual(a.tobytes(), to_bytes(' ')) b = bitarray('00000100', endian='little') self.assertEqual(b.tobytes(), to_bytes(' ')) self.assertNotEqual(a, b) def test_endianness5(self): a = bitarray('11100000', endian='little') b = bitarray(a, endian='big') self.assertNotEqual(a, b) self.assertEqual(a.tobytes(), b.tobytes()) def test_pickle(self): from pickle import loads, dumps for v in range(3): for a in self.randombitarrays(): b = loads(dumps(a, v)) self.assert_(b is not a) self.assertEQUAL(a, b) def test_cPickle(self): if is_py3k: return for v in range(3): from cPickle import loads, dumps for a in self.randombitarrays(): b = loads(dumps(a, v)) self.assert_(b is not a) self.assertEQUAL(a, b) def test_overflow(self): from platform import architecture if architecture()[0] == '64bit': return self.assertRaises(OverflowError, bitarray.__new__, bitarray, 2**34 + 1) a = bitarray(10 ** 6) self.assertRaises(OverflowError, a.__imul__, 17180) tests.append(MiscTests) # --------------------------------------------------------------------------- class SpecialMethodTests(unittest.TestCase, Util): def test_all(self): a = bitarray() self.assertTrue(a.all()) if sys.version_info[:2] < (2, 5): return for a in self.randombitarrays(): self.assertEqual(all(a), a.all()) self.assertEqual(all(a.tolist()), a.all()) def test_any(self): a = bitarray() self.assertFalse(a.any()) if sys.version_info[:2] < (2, 5): return for a in self.randombitarrays(): self.assertEqual(any(a), a.any()) self.assertEqual(any(a.tolist()), a.any()) def test_repr(self): a = bitarray() self.assertEqual(repr(a), "bitarray()") a = bitarray('10111') self.assertEqual(repr(a), "bitarray('10111')") for a in self.randombitarrays(): b = eval(repr(a)) self.assert_(b is not a) self.assertEqual(a, b) self.check_obj(b) def test_copy(self): import copy for a in self.randombitarrays(): b = a.copy() self.assert_(b is not a) self.assertEQUAL(a, b) b = copy.copy(a) self.assert_(b is not a) self.assertEQUAL(a, b) b = copy.deepcopy(a) self.assert_(b is not a) self.assertEQUAL(a, b) def assertReallyEqual(self, a, b): # assertEqual first, because it will have a good message if the # assertion fails. self.assertEqual(a, b) self.assertEqual(b, a) self.assertTrue(a == b) self.assertTrue(b == a) self.assertFalse(a != b) self.assertFalse(b != a) if not is_py3k: self.assertEqual(0, cmp(a, b)) self.assertEqual(0, cmp(b, a)) def assertReallyNotEqual(self, a, b): # assertNotEqual first, because it will have a good message if the # assertion fails. self.assertNotEqual(a, b) self.assertNotEqual(b, a) self.assertFalse(a == b) self.assertFalse(b == a) self.assertTrue(a != b) self.assertTrue(b != a) if not is_py3k: self.assertNotEqual(0, cmp(a, b)) self.assertNotEqual(0, cmp(b, a)) def test_equality(self): self.assertReallyEqual(bitarray(''), bitarray('')) self.assertReallyEqual(bitarray('0'), bitarray('0')) self.assertReallyEqual(bitarray('1'), bitarray('1')) def test_not_equality(self): self.assertReallyNotEqual(bitarray(''), bitarray('1')) self.assertReallyNotEqual(bitarray(''), bitarray('0')) self.assertReallyNotEqual(bitarray('0'), bitarray('1')) tests.append(SpecialMethodTests) # --------------------------------------------------------------------------- class NumberTests(unittest.TestCase, Util): def test_add(self): c = bitarray('001') + bitarray('110') self.assertEQUAL(c, bitarray('001110')) for a in self.randombitarrays(): aa = a.copy() for b in self.randombitarrays(): bb = b.copy() c = a + b self.assertEqual(c, bitarray(a.tolist() + b.tolist())) self.assertEqual(c.endian(), a.endian()) self.check_obj(c) self.assertEQUAL(a, aa) self.assertEQUAL(b, bb) a = bitarray() self.assertRaises(TypeError, a.__add__, 42) def test_iadd(self): c = bitarray('001') c += bitarray('110') self.assertEQUAL(c, bitarray('001110')) for a in self.randombitarrays(): for b in self.randombitarrays(): c = bitarray(a) d = c d += b self.assertEqual(d, a + b) self.assert_(c is d) self.assertEQUAL(c, d) self.assertEqual(d.endian(), a.endian()) self.check_obj(d) a = bitarray() self.assertRaises(TypeError, a.__iadd__, 42) def test_mul(self): c = 0 * bitarray('1001111') self.assertEQUAL(c, bitarray()) c = 3 * bitarray('001') self.assertEQUAL(c, bitarray('001001001')) c = bitarray('110') * 3 self.assertEQUAL(c, bitarray('110110110')) for a in self.randombitarrays(): b = a.copy() for n in range(-10, 20): c = a * n self.assertEQUAL(c, bitarray(n * a.tolist(), endian=a.endian())) c = n * a self.assertEqual(c, bitarray(n * a.tolist(), endian=a.endian())) self.assertEQUAL(a, b) a = bitarray() self.assertRaises(TypeError, a.__mul__, None) def test_imul(self): c = bitarray('1101110011') idc = id(c) c *= 0 self.assertEQUAL(c, bitarray()) self.assertEqual(idc, id(c)) c = bitarray('110') c *= 3 self.assertEQUAL(c, bitarray('110110110')) for a in self.randombitarrays(): for n in range(-10, 10): b = a.copy() idb = id(b) b *= n self.assertEQUAL(b, bitarray(n * a.tolist(), endian=a.endian())) self.assertEqual(idb, id(b)) a = bitarray() self.assertRaises(TypeError, a.__imul__, None) tests.append(NumberTests) # --------------------------------------------------------------------------- class BitwiseTests(unittest.TestCase, Util): def test_misc(self): for a in self.randombitarrays(): b = ~a c = a & b self.assertEqual(c.any(), False) self.assertEqual(a, a ^ c) d = a ^ b self.assertEqual(d.all(), True) b &= d self.assertEqual(~b, a) def test_and(self): a = bitarray('11001') b = bitarray('10011') self.assertEQUAL(a & b, bitarray('10001')) b = bitarray('1001') self.assertRaises(ValueError, a.__and__, b) # not same length self.assertRaises(TypeError, a.__and__, 42) def test_iand(self): a = bitarray('110010110') ida = id(a) a &= bitarray('100110011') self.assertEQUAL(a, bitarray('100010010')) self.assertEqual(ida, id(a)) def test_or(self): a = bitarray('11001') b = bitarray('10011') self.assertEQUAL(a | b, bitarray('11011')) def test_ior(self): a = bitarray('110010110') a |= bitarray('100110011') self.assertEQUAL(a, bitarray('110110111')) def test_xor(self): a = bitarray('11001') b = bitarray('10011') self.assertEQUAL(a ^ b, bitarray('01010')) def test_ixor(self): a = bitarray('110010110') a ^= bitarray('100110011') self.assertEQUAL(a, bitarray('010100101')) def test_invert(self): a = bitarray() a.invert() self.assertEQUAL(a, bitarray()) a = bitarray('11011') a.invert() self.assertEQUAL(a, bitarray('00100')) a = bitarray('11011') b = ~a self.assertEQUAL(b, bitarray('00100')) self.assertEQUAL(a, bitarray('11011')) self.assert_(a is not b) for a in self.randombitarrays(): aa = a.tolist() b = bitarray(a) b.invert() for i in range(len(a)): self.assertEqual(b[i], not aa[i]) self.check_obj(b) c = ~a self.assert_(c is not a) self.assertEQUAL(a, bitarray(aa, endian=a.endian())) for i in range(len(a)): self.assertEqual(c[i], not aa[i]) self.check_obj(b) tests.append(BitwiseTests) # --------------------------------------------------------------------------- class SequenceTests(unittest.TestCase, Util): def test_contains1(self): a = bitarray() self.assert_(False not in a) self.assert_(True not in a) self.assert_(bitarray() in a) a.append(True) self.assert_(True in a) self.assert_(False not in a) a = bitarray([False]) self.assert_(False in a) self.assert_(True not in a) a.append(True) self.assert_(0 in a) self.assert_(1 in a) if not is_py3k: self.assert_(long(0) in a) self.assert_(long(1) in a) def test_contains2(self): a = bitarray() self.assertEqual(a.__contains__(1), False) a.append(1) self.assertEqual(a.__contains__(1), True) a = bitarray('0011') self.assertEqual(a.__contains__(bitarray('01')), True) self.assertEqual(a.__contains__(bitarray('10')), False) self.assertRaises(TypeError, a.__contains__, 'asdf') self.assertRaises(ValueError, a.__contains__, 2) self.assertRaises(ValueError, a.__contains__, -1) if not is_py3k: self.assertRaises(ValueError, a.__contains__, long(2)) def test_contains3(self): for n in range(2, 100): a = bitarray(n) a.setall(0) self.assert_(False in a) self.assert_(True not in a) a[randint(0, n - 1)] = 1 self.assert_(True in a) self.assert_(False in a) a.setall(1) self.assert_(True in a) self.assert_(False not in a) a[randint(0, n - 1)] = 0 self.assert_(True in a) self.assert_(False in a) def test_contains4(self): a = bitarray('011010000001') for s, r in [('', True), ('1', True), ('11', True), ('111', False), ('011', True), ('0001', True), ('00011', False)]: self.assertEqual(bitarray(s) in a, r) tests.append(SequenceTests) # --------------------------------------------------------------------------- class ExtendTests(unittest.TestCase, Util): def test_wrongArgs(self): a = bitarray() self.assertRaises(TypeError, a.extend) self.assertRaises(TypeError, a.extend, None) self.assertRaises(TypeError, a.extend, True) self.assertRaises(TypeError, a.extend, 24) self.assertRaises(ValueError, a.extend, '0011201') def test_bitarray(self): a = bitarray() a.extend(bitarray()) self.assertEqual(a, bitarray()) a.extend(bitarray('110')) self.assertEqual(a, bitarray('110')) a.extend(bitarray('1110')) self.assertEqual(a, bitarray('1101110')) a = bitarray('00001111', endian='little') a.extend(bitarray('00111100', endian='big')) self.assertEqual(a, bitarray('0000111100111100')) for a in self.randombitarrays(): for b in self.randombitarrays(): c = bitarray(a) idc = id(c) c.extend(b) self.assertEqual(id(c), idc) self.assertEqual(c, a + b) def test_list(self): a = bitarray() a.extend([0, 1, 3, None, {}]) self.assertEqual(a, bitarray('01100')) a.extend([True, False]) self.assertEqual(a, bitarray('0110010')) for a in self.randomlists(): for b in self.randomlists(): c = bitarray(a) idc = id(c) c.extend(b) self.assertEqual(id(c), idc) self.assertEqual(c.tolist(), a + b) self.check_obj(c) def test_tuple(self): a = bitarray() a.extend((0, 1, 2, 0, 3)) self.assertEqual(a, bitarray('01101')) for a in self.randomlists(): for b in self.randomlists(): c = bitarray(a) idc = id(c) c.extend(tuple(b)) self.assertEqual(id(c), idc) self.assertEqual(c.tolist(), a + b) self.check_obj(c) def test_generator(self): def bar(): for x in ('', '1', None, True, []): yield x a = bitarray() a.extend(bar()) self.assertEqual(a, bitarray('01010')) for a in self.randomlists(): for b in self.randomlists(): def foo(): for e in b: yield e c = bitarray(a) idc = id(c) c.extend(foo()) self.assertEqual(id(c), idc) self.assertEqual(c.tolist(), a + b) self.check_obj(c) def test_iterator1(self): a = bitarray() a.extend(iter([3, 9, 0, 1, -2])) self.assertEqual(a, bitarray('11011')) for a in self.randomlists(): for b in self.randomlists(): c = bitarray(a) idc = id(c) c.extend(iter(b)) self.assertEqual(id(c), idc) self.assertEqual(c.tolist(), a + b) self.check_obj(c) def test_iterator2(self): try: import itertools except ImportError: return a = bitarray() a.extend(itertools.repeat(True, 23)) self.assertEqual(a, bitarray(23 * '1')) def test_string01(self): a = bitarray() a.extend('0110111') self.assertEqual(a, bitarray('0110111')) for a in self.randomlists(): for b in self.randomlists(): c = bitarray(a) idc = id(c) c.extend(''.join(['0', '1'][x] for x in b)) self.assertEqual(id(c), idc) self.assertEqual(c.tolist(), a + b) self.check_obj(c) tests.append(ExtendTests) # --------------------------------------------------------------------------- class MethodTests(unittest.TestCase, Util): def test_append(self): a = bitarray() a.append(True) a.append(False) a.append(False) self.assertEQUAL(a, bitarray('100')) for a in self.randombitarrays(): aa = a.tolist() b = a b.append(1) self.assert_(a is b) self.check_obj(b) self.assertEQUAL(b, bitarray(aa+[1], endian=a.endian())) b.append('') self.assertEQUAL(b, bitarray(aa+[1, 0], endian=a.endian())) def test_insert(self): a = bitarray() b = a a.insert(0, True) self.assert_(a is b) self.assertEqual(a, bitarray('1')) self.assertRaises(TypeError, a.insert) self.assertRaises(TypeError, a.insert, None) for a in self.randombitarrays(): aa = a.tolist() for _ in range(50): item = bool(randint(0, 1)) pos = randint(-len(a) - 2, len(a) + 2) a.insert(pos, item) aa.insert(pos, item) self.assertEqual(a.tolist(), aa) self.check_obj(a) def test_index(self): a = bitarray() for i in (True, False, 1, 0): self.assertRaises(ValueError, a.index, i) a = bitarray(100 * [False]) self.assertRaises(ValueError, a.index, True) a[20] = a[27] = 1 self.assertEqual(a.index(42), 20) self.assertEqual(a.index(0), 0) a = bitarray(200 * [True]) self.assertRaises(ValueError, a.index, False) a[173] = a[187] = 0 self.assertEqual(a.index(False), 173) self.assertEqual(a.index(True), 0) for n in range(50): for m in range(n): a = bitarray(n) a.setall(0) self.assertRaises(ValueError, a.index, 1) a[m] = 1 self.assertEqual(a.index(1), m) a.setall(1) self.assertRaises(ValueError, a.index, 0) a[m] = 0 self.assertEqual(a.index(0), m) def test_index2(self): a = bitarray('00001000' '00000000' '0010000') self.assertEqual(a.index(1), 4) self.assertEqual(a.index(1, 1), 4) self.assertEqual(a.index(0, 4), 5) self.assertEqual(a.index(1, 5), 18) self.assertRaises(ValueError, a.index, 1, 5, 18) self.assertRaises(ValueError, a.index, 1, 19) def test_index3(self): a = bitarray(2000) a.setall(0) for _ in range(3): a[randint(0, 1999)] = 1 aa = a.tolist() for _ in range(100): start = randint(0, 2000) stop = randint(0, 2000) try: res1 = a.index(1, start, stop) except ValueError: res1 = None try: res2 = aa.index(1, start, stop) except ValueError: res2 = None self.assertEqual(res1, res2) def test_count(self): a = bitarray('10011') self.assertEqual(a.count(), 3) self.assertEqual(a.count(True), 3) self.assertEqual(a.count(False), 2) self.assertEqual(a.count(1), 3) self.assertEqual(a.count(0), 2) self.assertRaises(TypeError, a.count, 'A') for i in range(0, 256): a = bitarray() a.frombytes(to_bytes(chr(i))) self.assertEqual(a.count(), a.to01().count('1')) for a in self.randombitarrays(): self.assertEqual(a.count(), a.count(1)) self.assertEqual(a.count(1), a.to01().count('1')) self.assertEqual(a.count(0), a.to01().count('0')) def test_search(self): a = bitarray('') self.assertEqual(a.search(bitarray('0')), []) self.assertEqual(a.search(bitarray('1')), []) a = bitarray('1') self.assertEqual(a.search(bitarray('0')), []) self.assertEqual(a.search(bitarray('1')), [0]) self.assertEqual(a.search(bitarray('11')), []) a = bitarray(100*'1') self.assertEqual(a.search(bitarray('0')), []) self.assertEqual(a.search(bitarray('1')), list(range(100))) a = bitarray('10010101110011111001011') for limit in range(10): self.assertEqual(a.search(bitarray('011'), limit), [6, 11, 20][:limit]) self.assertRaises(ValueError, a.search, bitarray()) self.assertRaises(TypeError, a.search, '010') def test_itersearch(self): a = bitarray('10011') self.assertRaises(ValueError, a.itersearch, bitarray()) self.assertRaises(TypeError, a.itersearch, '') it = a.itersearch(bitarray('1')) self.assertEqual(next(it), 0) self.assertEqual(next(it), 3) self.assertEqual(next(it), 4) self.assertStopIteration(it) def test_search2(self): a = bitarray('10011') for s, res in [('0', [1, 2]), ('1', [0, 3, 4]), ('01', [2]), ('11', [3]), ('000', []), ('1001', [0]), ('011', [2]), ('0011', [1]), ('10011', [0]), ('100111', [])]: b = bitarray(s) self.assertEqual(a.search(b), res) self.assertEqual([p for p in a.itersearch(b)], res) def test_search3(self): a = bitarray('10010101110011111001011') for s, res in [('011', [6, 11, 20]), ('111', [7, 12, 13, 14]), # note the overlap ('1011', [5, 19]), ('100', [0, 9, 16])]: b = bitarray(s) self.assertEqual(a.search(b), res) self.assertEqual(list(a.itersearch(b)), res) self.assertEqual([p for p in a.itersearch(b)], res) def test_fill(self): a = bitarray('') self.assertEqual(a.fill(), 0) self.assertEqual(len(a), 0) a = bitarray('101') self.assertEqual(a.fill(), 5) self.assertEQUAL(a, bitarray('10100000')) self.assertEqual(a.fill(), 0) self.assertEQUAL(a, bitarray('10100000')) for a in self.randombitarrays(): aa = a.tolist() la = len(a) b = a self.assert_(0 <= b.fill() < 8) self.assertEqual(b.endian(), a.endian()) bb = b.tolist() lb = len(b) self.assert_(a is b) self.check_obj(b) if la % 8 == 0: self.assertEqual(bb, aa) self.assertEqual(lb, la) else: self.assert_(lb % 8 == 0) self.assertNotEqual(bb, aa) self.assertEqual(bb[:la], aa) self.assertEqual(b[la:], (lb-la)*bitarray('0')) self.assert_(0 < lb-la < 8) def test_sort(self): a = bitarray('1101000') a.sort() self.assertEqual(a, bitarray('0000111')) a = bitarray('1101000') a.sort(reverse=True) self.assertEqual(a, bitarray('1110000')) a.sort(reverse=False) self.assertEqual(a, bitarray('0000111')) a.sort(True) self.assertEqual(a, bitarray('1110000')) a.sort(False) self.assertEqual(a, bitarray('0000111')) self.assertRaises(TypeError, a.sort, 'A') for a in self.randombitarrays(): ida = id(a) rev = randint(0, 1) a.sort(rev) self.assertEqual(a, bitarray(sorted(a.tolist(), reverse=rev))) self.assertEqual(id(a), ida) def test_reverse(self): self.assertRaises(TypeError, bitarray().reverse, 42) for x, y in [('', ''), ('1', '1'), ('10', '01'), ('001', '100'), ('1110', '0111'), ('11100', '00111'), ('011000', '000110'), ('1101100', '0011011'), ('11110000', '00001111'), ('11111000011', '11000011111'), ('11011111' '00100000' '000111', '111000' '00000100' '11111011')]: a = bitarray(x) a.reverse() self.assertEQUAL(a, bitarray(y)) for a in self.randombitarrays(): aa = a.tolist() ida = id(a) a.reverse() self.assertEqual(ida, id(a)) self.assertEQUAL(a, bitarray(aa[::-1], endian=a.endian())) def test_tolist(self): a = bitarray() self.assertEqual(a.tolist(), []) a = bitarray('110') self.assertEqual(a.tolist(), [True, True, False]) for lst in self.randomlists(): a = bitarray(lst) self.assertEqual(a.tolist(), lst) def test_remove(self): a = bitarray() for i in (True, False, 1, 0): self.assertRaises(ValueError, a.remove, i) a = bitarray(21) a.setall(0) self.assertRaises(ValueError, a.remove, 1) a.setall(1) self.assertRaises(ValueError, a.remove, 0) a = bitarray('1010110') for val, res in [(False, '110110'), (True, '10110'), (1, '0110'), (1, '010'), (0, '10'), (0, '1'), (1, '')]: a.remove(val) self.assertEQUAL(a, bitarray(res)) a = bitarray('0010011') b = a b.remove('1') self.assert_(b is a) self.assertEQUAL(b, bitarray('000011')) def test_pop(self): for x, n, r, y in [('1', 0, True, ''), ('0', -1, False, ''), ('0011100', 3, True, '001100')]: a = bitarray(x) self.assertEqual(a.pop(n), r) self.assertEqual(a, bitarray(y)) a = bitarray('01') self.assertEqual(a.pop(), True) self.assertEqual(a.pop(), False) self.assertRaises(IndexError, a.pop) for a in self.randombitarrays(): self.assertRaises(IndexError, a.pop, len(a)) self.assertRaises(IndexError, a.pop, -len(a)-1) if len(a) == 0: continue aa = a.tolist() enda = a.endian() self.assertEqual(a.pop(), aa[-1]) self.check_obj(a) self.assertEqual(a.endian(), enda) for a in self.randombitarrays(): if len(a) == 0: continue n = randint(-len(a), len(a)-1) aa = a.tolist() self.assertEqual(a.pop(n), aa[n]) aa.pop(n) self.assertEqual(a, bitarray(aa)) self.check_obj(a) def test_setall(self): a = bitarray(5) a.setall(True) self.assertEQUAL(a, bitarray('11111')) for a in self.randombitarrays(): val = randint(0, 1) b = a b.setall(val) self.assertEqual(b, bitarray(len(b) * [val])) self.assert_(a is b) self.check_obj(b) def test_bytereverse(self): for x, y in [('', ''), ('1', '0'), ('1011', '0000'), ('111011', '001101'), ('11101101', '10110111'), ('000000011', '100000000'), ('11011111' '00100000' '000111', '11111011' '00000100' '001110')]: a = bitarray(x) a.bytereverse() self.assertEqual(a, bitarray(y)) for i in range(256): a = bitarray() a.frombytes(to_bytes(chr(i))) aa = a.tolist() b = a b.bytereverse() self.assertEqual(b, bitarray(aa[::-1])) self.assert_(a is b) self.check_obj(b) tests.append(MethodTests) # --------------------------------------------------------------------------- class StringTests(unittest.TestCase, Util): def randombytes(self): for n in range(1, 20): yield to_bytes(''.join(chr(randint(0, 255)) for x in range(n))) def test_frombytes(self): a = bitarray(endian='big') a.frombytes(to_bytes('A')) self.assertEqual(a, bitarray('01000001')) b = a b.frombytes(to_bytes('BC')) self.assertEQUAL(b, bitarray('01000001' '01000010' '01000011', endian='big')) self.assert_(b is a) for b in self.randombitarrays(): c = b.__copy__() b.frombytes(to_bytes('')) self.assertEQUAL(b, c) for b in self.randombitarrays(): for s in self.randombytes(): a = bitarray(endian=b.endian()) a.frombytes(s) c = b.__copy__() b.frombytes(s) self.assertEQUAL(b[-len(a):], a) self.assertEQUAL(b[:-len(a)], c) self.assertEQUAL(c + a, b) def test_tobytes(self): a = bitarray() self.assertEqual(a.tobytes(), to_bytes('')) for end in ('big', 'little'): a = bitarray(endian=end) a.frombytes(to_bytes('foo')) self.assertEqual(a.tobytes(), to_bytes('foo')) for s in self.randombytes(): a = bitarray(endian=end) a.frombytes(s) self.assertEqual(a.tobytes(), s) for n, s in [(1, '\x01'), (2, '\x03'), (3, '\x07'), (4, '\x0f'), (5, '\x1f'), (6, '\x3f'), (7, '\x7f'), (8, '\xff'), (12, '\xff\x0f'), (15, '\xff\x7f'), (16, '\xff\xff'), (17, '\xff\xff\x01'), (24, '\xff\xff\xff')]: a = bitarray(n, endian='little') a.setall(1) self.assertEqual(a.tobytes(), to_bytes(s)) def test_unpack(self): a = bitarray('01') self.assertEqual(a.unpack(), to_bytes('\x00\xff')) self.assertEqual(a.unpack(to_bytes('A')), to_bytes('A\xff')) self.assertEqual(a.unpack(to_bytes('0'), to_bytes('1')), to_bytes('01')) self.assertEqual(a.unpack(one=to_bytes('\x01')), to_bytes('\x00\x01')) self.assertEqual(a.unpack(zero=to_bytes('A')), to_bytes('A\xff')) self.assertEqual(a.unpack(one=to_bytes('t'), zero=to_bytes('f')), to_bytes('ft')) self.assertRaises(TypeError, a.unpack, to_bytes('a'), zero=to_bytes('b')) self.assertRaises(TypeError, a.unpack, foo=to_bytes('b')) for a in self.randombitarrays(): self.assertEqual(a.unpack(to_bytes('0'), to_bytes('1')), to_bytes(a.to01())) b = bitarray() b.pack(a.unpack()) self.assertEqual(b, a) b = bitarray() b.pack(a.unpack(to_bytes('\x01'), to_bytes('\x00'))) b.invert() self.assertEqual(b, a) def test_pack(self): a = bitarray() a.pack(to_bytes('\x00')) self.assertEqual(a, bitarray('0')) a.pack(to_bytes('\xff')) self.assertEqual(a, bitarray('01')) a.pack(to_bytes('\x01\x00\x7a')) self.assertEqual(a, bitarray('01101')) a = bitarray() for n in range(256): a.pack(to_bytes(chr(n))) self.assertEqual(a, bitarray('0' + 255 * '1')) self.assertRaises(TypeError, a.pack, 0) if is_py3k: self.assertRaises(TypeError, a.pack, '1') self.assertRaises(TypeError, a.pack, [1, 3]) self.assertRaises(TypeError, a.pack, bitarray()) tests.append(StringTests) # --------------------------------------------------------------------------- class FileTests(unittest.TestCase, Util): def setUp(self): self.tmpdir = tempfile.mkdtemp() self.tmpfname = os.path.join(self.tmpdir, 'testfile') def tearDown(self): shutil.rmtree(self.tmpdir) def test_pickle(self): from pickle import load, dump for v in range(3): for a in self.randombitarrays(): fo = open(self.tmpfname, 'wb') dump(a, fo, v) fo.close() b = load(open(self.tmpfname, 'rb')) self.assert_(b is not a) self.assertEQUAL(a, b) def test_cPickle(self): if is_py3k: return from cPickle import load, dump for v in range(3): for a in self.randombitarrays(): fo = open(self.tmpfname, 'wb') dump(a, fo, v) fo.close() b = load(open(self.tmpfname, 'rb')) self.assert_(b is not a) self.assertEQUAL(a, b) def test_shelve(self): if sys.version_info[:2] < (2, 5): return import shelve, hashlib d = shelve.open(self.tmpfname) stored = [] for a in self.randombitarrays(): key = hashlib.md5(repr(a).encode() + a.endian().encode()).hexdigest() d[key] = a stored.append((key, a)) d.close() del d d = shelve.open(self.tmpfname) for k, v in stored: self.assertEQUAL(d[k], v) d.close() def test_fromfile_wrong_args(self): b = bitarray() self.assertRaises(TypeError, b.fromfile) self.assertRaises(TypeError, b.fromfile, StringIO()) # file not open self.assertRaises(TypeError, b.fromfile, 42) self.assertRaises(TypeError, b.fromfile, 'bar') def test_from_empty_file(self): fo = open(self.tmpfname, 'wb') fo.close() a = bitarray() a.fromfile(open(self.tmpfname, 'rb')) self.assertEqual(a, bitarray()) def test_from_large_file(self): N = 100000 fo = open(self.tmpfname, 'wb') fo.write(N * to_bytes('X')) fo.close() a = bitarray() a.fromfile(open(self.tmpfname, 'rb')) self.assertEqual(len(a), 8 * N) self.assertEqual(a.buffer_info()[1], N) # make sure there is no over-allocation self.assertEqual(a.buffer_info()[4], N) def test_fromfile_Foo(self): fo = open(self.tmpfname, 'wb') fo.write(to_bytes('Foo\n')) fo.close() a = bitarray(endian='big') a.fromfile(open(self.tmpfname, 'rb')) self.assertEqual(a, bitarray('01000110011011110110111100001010')) a = bitarray(endian='little') a.fromfile(open(self.tmpfname, 'rb')) self.assertEqual(a, bitarray('01100010111101101111011001010000')) a = bitarray('1', endian='little') a.fromfile(open(self.tmpfname, 'rb')) self.assertEqual(a, bitarray('101100010111101101111011001010000')) for n in range(20): a = bitarray(n, endian='little') a.setall(1) a.fromfile(open(self.tmpfname, 'rb')) self.assertEqual(a, n*bitarray('1') + bitarray('01100010111101101111011001010000')) def test_fromfile_n(self): a = bitarray() a.fromstring('ABCDEFGHIJ') fo = open(self.tmpfname, 'wb') a.tofile(fo) fo.close() b = bitarray() f = open(self.tmpfname, 'rb') b.fromfile(f, 1); self.assertEqual(b.tostring(), 'A') f.read(1) b = bitarray() b.fromfile(f, 2); self.assertEqual(b.tostring(), 'CD') b.fromfile(f, 1); self.assertEqual(b.tostring(), 'CDE') b.fromfile(f, 0); self.assertEqual(b.tostring(), 'CDE') b.fromfile(f); self.assertEqual(b.tostring(), 'CDEFGHIJ') b.fromfile(f); self.assertEqual(b.tostring(), 'CDEFGHIJ') f.close() b = bitarray() f = open(self.tmpfname, 'rb') f.read(1); self.assertRaises(EOFError, b.fromfile, f, 10) f.close() self.assertEqual(b.tostring(), 'BCDEFGHIJ') b = bitarray() f = open(self.tmpfname, 'rb') b.fromfile(f); self.assertEqual(b.tostring(), 'ABCDEFGHIJ') self.assertRaises(EOFError, b.fromfile, f, 1) f.close() def test_tofile(self): a = bitarray() f = open(self.tmpfname, 'wb') a.tofile(f) f.close() fi = open(self.tmpfname, 'rb') self.assertEqual(fi.read(), to_bytes('')) fi.close() a = bitarray('01000110011011110110111100001010', endian='big') f = open(self.tmpfname, 'wb') a.tofile(f) f.close() fi = open(self.tmpfname, 'rb') self.assertEqual(fi.read(), to_bytes('Foo\n')) fi.close() for a in self.randombitarrays(): b = bitarray(a, endian='big') fo = open(self.tmpfname, 'wb') b.tofile(fo) fo.close() s = open(self.tmpfname, 'rb').read() self.assertEqual(len(s), a.buffer_info()[1]) for n in range(3): a.fromstring(n * 'A') self.assertRaises(TypeError, a.tofile) self.assertRaises(TypeError, a.tofile, StringIO()) f = open(self.tmpfname, 'wb') a.tofile(f) f.close() self.assertRaises(TypeError, a.tofile, f) for n in range(20): a = n * bitarray('1', endian='little') fo = open(self.tmpfname, 'wb') a.tofile(fo) fo.close() s = open(self.tmpfname, 'rb').read() self.assertEqual(len(s), a.buffer_info()[1]) b = a.__copy__() b.fill() c = bitarray(endian='little') c.frombytes(s) self.assertEqual(c, b) tests.append(FileTests) # --------------------------------------------------------------------------- class PrefixCodeTests(unittest.TestCase, Util): def test_encode_errors(self): a = bitarray() self.assertRaises(TypeError, a.encode, 0, '') self.assertRaises(ValueError, a.encode, {}, '') self.assertRaises(TypeError, a.encode, {'a':42}, '') self.assertRaises(ValueError, a.encode, {'a': bitarray()}, '') # 42 not iterable self.assertRaises(TypeError, a.encode, {'a': bitarray('0')}, 42) self.assertEqual(len(a), 0) def test_encode_string(self): a = bitarray() d = {'a': bitarray('0')} a.encode(d, '') self.assertEqual(a, bitarray()) a.encode(d, 'a') self.assertEqual(a, bitarray('0')) self.assertEqual(d, {'a': bitarray('0')}) def test_encode_list(self): a = bitarray() d = {'a':bitarray('0')} a.encode(d, []) self.assertEqual(a, bitarray()) a.encode(d, ['a']) self.assertEqual(a, bitarray('0')) self.assertEqual(d, {'a': bitarray('0')}) def test_encode_iter(self): a = bitarray() d = {'a': bitarray('0'), 'b': bitarray('1')} a.encode(d, iter('abba')) self.assertEqual(a, bitarray('0110')) def foo(): for c in 'bbaabb': yield c a.encode(d, foo()) self.assertEqual(a, bitarray('0110110011')) self.assertEqual(d, {'a': bitarray('0'), 'b': bitarray('1')}) def test_encode(self): d = {'I': bitarray('1'), 'l': bitarray('01'), 'a': bitarray('001'), 'n': bitarray('000')} a = bitarray() a.encode(d, 'Ilan') self.assertEqual(a, bitarray('101001000')) a.encode(d, 'a') self.assertEqual(a, bitarray('101001000001')) self.assertEqual(d, {'I': bitarray('1'), 'l': bitarray('01'), 'a': bitarray('001'), 'n': bitarray('000')}) self.assertRaises(ValueError, a.encode, d, 'arvin') def test_decode_check_codedict(self): a = bitarray() self.assertRaises(TypeError, a.decode, 0) self.assertRaises(ValueError, a.decode, {}) # 42 not iterable self.assertRaises(TypeError, a.decode, {'a':42}) self.assertRaises(ValueError, a.decode, {'a':bitarray()}) def test_decode_simple(self): d = {'I': bitarray('1'), 'l': bitarray('01'), 'a': bitarray('001'), 'n': bitarray('000')} dcopy = dict(d) a = bitarray('101001000') self.assertEqual(a.decode(d), ['I', 'l', 'a', 'n']) self.assertEqual(d, dcopy) self.assertEqual(a, bitarray('101001000')) def test_iterdecode_simple(self): d = {'I': bitarray('1'), 'l': bitarray('01'), 'a': bitarray('001'), 'n': bitarray('000')} dcopy = dict(d) a = bitarray('101001000') self.assertEqual(list(a.iterdecode(d)), ['I', 'l', 'a', 'n']) self.assertEqual(d, dcopy) self.assertEqual(a, bitarray('101001000')) def test_decode_empty(self): d = {'a': bitarray('1')} a = bitarray() self.assertEqual(a.decode(d), []) self.assertEqual(d, {'a': bitarray('1')}) # test decode iterator self.assertEqual(list(a.iterdecode(d)), []) self.assertEqual(d, {'a': bitarray('1')}) self.assertEqual(len(a), 0) def test_decode_buggybitarray(self): d = {'a': bitarray('0')} a = bitarray('1') self.assertRaises(ValueError, a.decode, d) self.assertEqual(a, bitarray('1')) self.assertEqual(d, {'a': bitarray('0')}) def test_iterdecode_buggybitarray(self): d = {'a': bitarray('0')} a = bitarray('1') it = a.iterdecode(d) if not is_py3k: self.assertRaises(ValueError, it.next) self.assertEqual(a, bitarray('1')) self.assertEqual(d, {'a': bitarray('0')}) def test_decode_buggybitarray2(self): d = {'a': bitarray('00'), 'b': bitarray('01')} a = bitarray('1') self.assertRaises(ValueError, a.decode, d) self.assertEqual(a, bitarray('1')) def test_iterdecode_buggybitarray2(self): d = {'a': bitarray('00'), 'b': bitarray('01')} a = bitarray('1') it = a.iterdecode(d) if not is_py3k: self.assertRaises(ValueError, it.next) self.assertEqual(a, bitarray('1')) def test_decode_ambiguous_code(self): d = {'a': bitarray('0'), 'b': bitarray('0'), 'c': bitarray('1')} a = bitarray() self.assertRaises(ValueError, a.decode, d) self.assertRaises(ValueError, a.iterdecode, d) def test_decode_ambiguous2(self): d = {'a': bitarray('01'), 'b': bitarray('01'), 'c': bitarray('1')} a = bitarray() self.assertRaises(ValueError, a.decode, d) self.assertRaises(ValueError, a.iterdecode, d) def test_miscitems(self): d = {None : bitarray('00'), 0 : bitarray('110'), 1 : bitarray('111'), '' : bitarray('010'), 2 : bitarray('011')} a = bitarray() a.encode(d, [None, 0, 1, '', 2]) self.assertEqual(a, bitarray('00110111010011')) self.assertEqual(a.decode(d), [None, 0, 1, '', 2]) # iterator it = a.iterdecode(d) self.assertEqual(next(it), None) self.assertEqual(next(it), 0) self.assertEqual(next(it), 1) self.assertEqual(next(it), '') self.assertEqual(next(it), 2) self.assertStopIteration(it) def test_real_example(self): code = {' ' : bitarray('001'), '.' : bitarray('0101010'), 'a' : bitarray('0110'), 'b' : bitarray('0001100'), 'c' : bitarray('000011'), 'd' : bitarray('01011'), 'e' : bitarray('111'), 'f' : bitarray('010100'), 'g' : bitarray('101000'), 'h' : bitarray('00000'), 'i' : bitarray('1011'), 'j' : bitarray('0111101111'), 'k' : bitarray('00011010'), 'l' : bitarray('01110'), 'm' : bitarray('000111'), 'n' : bitarray('1001'), 'o' : bitarray('1000'), 'p' : bitarray('101001'), 'q' : bitarray('00001001101'), 'r' : bitarray('1101'), 's' : bitarray('1100'), 't' : bitarray('0100'), 'u' : bitarray('000100'), 'v' : bitarray('0111100'), 'w' : bitarray('011111'), 'x' : bitarray('0000100011'), 'y' : bitarray('101010'), 'z' : bitarray('00011011110')} a = bitarray() message = 'the quick brown fox jumps over the lazy dog.' a.encode(code, message) self.assertEqual(a, bitarray('01000000011100100001001101000100101100' '00110001101000100011001101100001111110010010101001000000010001100' '10111101111000100000111101001110000110000111100111110100101000000' '0111001011100110000110111101010100010101110001010000101010')) self.assertEqual(''.join(a.decode(code)), message) self.assertEqual(''.join(a.iterdecode(code)), message) tests.append(PrefixCodeTests) # -------------- Buffer Interface (Python 2.7 only for now) ---------------- class BufferInterfaceTests(unittest.TestCase): def test_read1(self): a = bitarray('01000001' '01000010' '01000011', endian='big') v = memoryview(a) self.assertEqual(len(v), 3) self.assertEqual(v[0], 'A') self.assertEqual(v[:].tobytes(), 'ABC') a[13] = 1 self.assertEqual(v[:].tobytes(), 'AFC') def test_read2(self): a = bitarray([randint(0, 1) for d in range(8000)]) v = memoryview(a) self.assertEqual(len(v), 1000) b = a[345 * 8 : 657 * 8] self.assertEqual(v[345:657].tobytes(), b.tobytes()) self.assertEqual(v[:].tobytes(), a.tobytes()) def test_write(self): a = bitarray(800000) a.setall(0) v = memoryview(a) self.assertFalse(v.readonly) v[50000] = '\xff' self.assertEqual(a[399999:400009], bitarray('0111111110')) a[400003] = 0 self.assertEqual(a[399999:400009], bitarray('0111011110')) v[30001:30004] = 'ABC' self.assertEqual(a[240000:240040].tobytes(), '\x00ABC\x00') if sys.version_info[:2] == (2, 7): tests.append(BufferInterfaceTests) # --------------------------------------------------------------------------- def run(verbosity=1, repeat=1): print('bitarray is installed in: ' + os.path.dirname(__file__)) print('bitarray version: ' + __version__) print(sys.version) suite = unittest.TestSuite() for cls in tests: for _ in range(repeat): suite.addTest(unittest.makeSuite(cls)) runner = unittest.TextTestRunner(verbosity=verbosity) return runner.run(suite) if __name__ == '__main__': run() bitarray-0.8.1/CHANGE_LOG0000644000076500000240000001054512125715114014760 0ustar ilanstaff000000000000002013-03-30 0.8.1: ------------------- * fix issue #10, i.e. int(bitarray()) segfault * added tests for using a bitarray object as an argument to functions like int, long (on Python 2), float, list, tuple, dict 2012-04-04 0.8.0: ------------------- * add Python 2.4 support * add (module level) function bitdiff for calculating the difference between two bitarrays 2012-02-15 0.7.0: ------------------- * add iterdecode method (C level), which returns an iterator but is otherwise like the decode method * improve memory efficiency and speed of pickling large bitarray objects 2012-02-06 0.6.0: ------------------- * add buffer protocol to bitarray objects (Python 2.7 only) * allow slice assignment to 0 or 1, e.g. a[::3] = 0 (in addition to booleans) * moved implementation of itersearch method to C level (Lluis Pamies) * search, itersearch now only except bitarray objects, whereas __contains__ excepts either booleans or bitarrays * use a priority queue for Huffman tree example (thanks to Ushma Bhatt) * improve documentation 2012-02-02 0.5.2: ------------------- * fixed MSVC compile error on Python 3 (thanks to Chris Gohlke) * add missing start and stop optional parameters to index() method * add examples/compress.py 2012-01-31 0.5.1: ------------------- * update documentation to use tobytes and frombytes, rather than tostring and fromstring (which are now deprecated) * simplified how tests are run 2012-01-23 0.5.0: ------------------- * added itersearch method * added Bloom filter example * minor fixes in docstrings, added more tests 2011-12-29 0.4.0: ------------------- * porting to Python 3.x (Roland Puntaier) * introduced tobytes, frombytes (tostring, fromstring are now deprecated) * updated development status * added sieve prime number example * moved project to github: https://github.com/ilanschnell/bitarray 2009-04-06 0.3.5: ------------------- * fixed reference counts bugs * added possibility to slice assign to True or False, e.g. a[::3] = True will set every third element to True 2009-01-15 0.3.4: ------------------- * Made C code less ambiguous, such that the package compiles on Visual Studio, will all tests passing. 2008-12-14 0.3.3: ------------------- * Made changes to the C code to allow compilation with more compilers. Compiles on Visual Studio, although there are still a few tests failing. 2008-10-19 0.3.2: ------------------- * Added sequential search method. * The special method __contains__ now also takes advantage of the sequential search. 2008-10-12 0.3.1: ------------------- * Simplified state information for pickling. Argument for count is now optional, defaults to True. Fixed typos. 2008-09-30 0.3.0: ------------------- * Fixed a severe bug for 64bit machines. Implemented all methods in C, improved tests. * Removed deprecated methods from01 and fromlist. 2008-09-23 0.2.5: ------------------- * Added section in README about prefix codes. Implemented _multiply method for faster __mul__ and __imul__. Fixed some typos. 2008-09-22 0.2.4: ------------------- * Implemented encode and decode method (in C) for variable-length prefix codes. * Added more examples, wrote README for the examples. * Added more tests, fixed some typos. 2008-09-16 0.2.3: ------------------- * Fixed a memory leak, implemented a number of methods in C. These include __getitem__, __setitem__, __delitem__, pop, remove, insert. The methods implemented on the Python level is very limit now. * Implemented bitwise operations. 2008-09-09 0.2.2: ------------------- * Rewrote parts of the README * Implemented memory efficient algorithm for the reverse method * Fixed typos, added a few tests, more C refactoring. 2008-09-07 0.2.1: ------------------- * Improved tests, in particular added checking for memory leaks. * Refactored many things on the C level. * Implemented a few more methods. 2008-09-02 0.2.0: ------------------- * Added bit endianness property to the bitarray object * Added the examples to the release package. 2008-08-17 0.1.0: ------------------- * First official release; put project to http://pypi.python.org/pypi/bitarray/ May 2008: --------- Wrote the initial code, and put it on my personal web-site: http://ilan.schnell-web.net/prog/ bitarray-0.8.1/examples/0000755000076500000240000000000012125715114015220 5ustar ilanstaff00000000000000bitarray-0.8.1/examples/bloom.py0000644000076500000240000000253112125715114016703 0ustar ilanstaff00000000000000""" Demonstrates the implementation of a Bloom filter, see: http://en.wikipedia.org/wiki/Bloom_filter """ import hashlib from math import exp, log from bitarray import bitarray class BloomFilter(object): def __init__(self, m, k): self.m = m self.k = k self.array = bitarray(m) self.array.setall(0) def add(self, key): for i in self._hashes(key): self.array[i] = 1 def contains(self, key): return all(self.array[i] for i in self._hashes(key)) def _hashes(self, key): """ generate k different hashes, each of which maps a key to one of the m array positions with a uniform random distribution """ h = hashlib.new('md5') h.update(str(key)) x = long(h.hexdigest(), 16) for _ in xrange(self.k): if x < self.m: h.update('.') x = long(h.hexdigest(), 16) x, y = divmod(x, self.m) yield y def test_bloom(m, k, n): b = BloomFilter(m, k) for i in xrange(n): b.add(i) assert b.contains(i) p = (1.0 - exp(-k * (n + 0.5) / (m - 1))) ** k print 100.0 * p, '%' N = 100000 false_pos = sum(b.contains(i) for i in xrange(n, n + N)) print 100.0 * false_pos / N, '%' if __name__ == '__main__': test_bloom(50000, 6, 5000) bitarray-0.8.1/examples/compress.py0000644000076500000240000000167012125715114017431 0ustar ilanstaff00000000000000""" Demonstrates how the bz2 module may be used to create a compressed object which represents a bitarray. """ import bz2 from bitarray import bitarray def compress(ba): """ Given a bitarray, return an object which represents all information within the bitarray in a compresed form. The function `decompress` can be used to restore the bitarray from the compresed object. """ assert isinstance(ba, bitarray) return ba.length(), bz2.compress(ba.tobytes()), ba.endian() def decompress(obj): """ Given an object (created by `compress`), return the a copy of the original bitarray. """ n, data, endian = obj res = bitarray(endian=endian) res.frombytes(bz2.decompress(data)) del res[n:] return res if __name__ == '__main__': a = bitarray(12345) a.setall(0) a[::10] = True c = compress(a) print c b = decompress(c) assert a == b, a.endian() == b.endian() bitarray-0.8.1/examples/decoding.py0000644000076500000240000000377212125715114017357 0ustar ilanstaff00000000000000import time from bitarray import bitarray from huffman import freq_string, huffCode def traverse(it, tree): """ return False, when it has no more elements, or the leave node resulting from traversing the tree """ try: subtree = tree[next(it)] except StopIteration: return False if isinstance(subtree, list) and len(subtree)==2: return traverse(it, subtree) else: # leave node return subtree def insert(tree, sym, ba): """ insert symbol which is mapped to bitarray into tree """ v = ba[0] if len(ba) > 1: if tree[v] == []: tree[v] = [[], []] insert(tree[v], sym, ba[1:]) else: if tree[v] != []: raise ValueError("prefix code ambiguous") tree[v] = sym def decode(codedict, bitsequence): """ this function does the same thing as the bitarray decode method """ # generate tree from codedict tree = [[], []] for sym, ba in codedict.items(): insert(tree, sym, ba) # actual decoding by traversing until StopIteration res = [] it = iter(bitsequence) while True: r = traverse(it, tree) if r is False: break else: if r == []: raise ValueError("prefix code does not match data") res.append(r) return res def main(): txt = open('README').read() code = huffCode(freq_string(txt)) sample = 2000 * txt a = bitarray() a.encode(code, sample) # Time the decode function above start_time = time.time() res = decode(code, a) Py_time = time.time() - start_time assert ''.join(res) == sample print('Py_time: %.6f sec' % Py_time) # Time the decode method which is implemented in C start_time = time.time() res = a.decode(code) C_time = time.time() - start_time assert ''.join(res) == sample print('C_time: %.6f sec' % C_time) print('Ratio: %f' % (Py_time / C_time)) if __name__ == '__main__': main() bitarray-0.8.1/examples/huffman.py0000644000076500000240000000732112125715114017221 0ustar ilanstaff00000000000000""" The non-trivial part of the code is derived from: http://en.literateprograms.org/Huffman_coding_(Python) The link also contains a good description of the algorithm. """ import os, sys from collections import defaultdict from bitarray import bitarray from heapq import heappush, heappop def huffCode(freq): """ Given a dictionary mapping symbols to thier frequency, return the Huffman code in the form of a dictionary mapping the symbols to bitarrays. """ minheap = [] for s in freq: heappush(minheap, (freq[s], s)) while len(minheap) > 1: childR, childL = heappop(minheap), heappop(minheap) parent = (childL[0] + childR[0], childL, childR) heappush(minheap, parent) # Now minheap[0] is the root node of the Huffman tree def traverse(tree, prefix=bitarray()): if len(tree) == 2: result[tree[1]] = prefix else: for i in range(2): traverse(tree[i+1], prefix + bitarray([i])) result = {} traverse(minheap[0]) return result def freq_string(s): """ Given a string, return a dictionary mapping characters to thier frequency. """ res = defaultdict(int) for c in s: res[c] += 1 return res def print_code(filename): freq = freq_string(open(filename).read()) code = huffCode(freq) print(' char frequency Huffman code') print(70*'-') for c in sorted(code, key=lambda c: freq[c], reverse=True): print('%7r %8i %s' % (c, freq[c], code[c].to01())) def encode(filename): s = open(filename, 'rb').read() code = huffCode(freq_string(s)) fo = open(filename + '.huff', 'wb') fo.write(repr(code) + '\n') a = bitarray(endian='little') a.encode(code, s) fo.write(str(a.buffer_info()[3])) # write unused bits as one char string a.tofile(fo) fo.close() print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(s))) def decode(filename): fi = open(filename, 'rb') code = eval(fi.readline()) u = int(fi.read(1)) # number of unused bits in last byte stored in file a = bitarray(endian='little') a.fromfile(fi) fi.close() if u: del a[-u:] assert filename.endswith('.huff') fo = open(filename[:-5] + '.out', 'wb') fo.write(''.join(a.decode(code))) fo.close() def usage(): print("""Usage: %s command FILE print -- calculate and display the Huffman code for the frequency of characters in FILE. encode -- encode FILE using the Huffman code calculated for the frequency of characters in FILE itself. The output is FILE.huff which contains both the Huffman code and the bitarray resulting from the encoding. decode -- decode FILE, which has .huff extension generated with the encode command. The output is written in a filename where .huff is replaced by .out test -- encode FILE, decode FILE.huff, compare FILE with FILE.out, and unlink created files. """ % sys.argv[0]) sys.exit(0) if __name__ == '__main__': if len(sys.argv) != 3: usage() cmd, filename = sys.argv[1:3] if cmd == 'print': print_code(filename) elif cmd == 'encode': encode(filename) elif cmd == 'decode': if filename.endswith('.huff'): decode(filename) else: print('Filename has no .huff extension') elif cmd == 'test': huff = filename + '.huff' out = filename + '.out' encode(filename) decode(huff) assert open(filename, 'rb').read() == open(out, 'rb').read() os.unlink(huff) os.unlink(out) else: print('Unknown command %r' % cmd) usage() bitarray-0.8.1/examples/mandel.py0000644000076500000240000000452212125715114017035 0ustar ilanstaff00000000000000# does not work with Python 3, because weave is not yet supported import hashlib from bitarray import bitarray import numpy from scipy import weave support_code = ''' #define D 501 int color(double cr, double ci) { int d = 1; double zr=cr, zi=ci, zr2, zi2; for(;;) { zr2 = zr * zr; zi2 = zi * zi; if( zr2+zi2 > 16.0 ) goto finish; if( ++d == D ) goto finish; zi = 2.0 * zr * zi + ci; zr = zr2 - zi2 + cr; } finish: return d % 2; } static void PyUFunc_0(char **args, npy_intp *dimensions, npy_intp *steps, void *func) { npy_intp i, n; npy_intp is0 = steps[0]; npy_intp is1 = steps[1]; npy_intp os = steps[2]; char *ip0 = args[0]; char *ip1 = args[1]; char *op = args[2]; n = dimensions[0]; for(i = 0; i < n; i++) { *(long *)op = color(*(double *)ip0, *(double *)ip1); ip0 += is0; ip1 += is1; op += os; } } static PyUFuncGenericFunction f_functions[] = { PyUFunc_0, }; static char f_types[] = { NPY_DOUBLE, NPY_DOUBLE, NPY_BOOL, }; ''' ufunc_info = weave.base_info.custom_info() ufunc_info.add_header('"numpy/ufuncobject.h"') mandel = weave.inline('/* ' + hashlib.md5(support_code).hexdigest() + ''' */ import_ufunc(); return_val = PyUFunc_FromFuncAndData(f_functions, NULL, f_types, 1, /* ntypes */ 2, /* nin */ 1, /* nout */ PyUFunc_None, /* identity */ "mandel", /* name */ "doc", /* doc */ 0); ''', support_code=support_code, verbose=0, customize=ufunc_info) # ---------------------------------------------------------------------------- w, h = 8000, 6000 y, x = numpy.ogrid[-1.5:+1.5:h*1j, -2.75:+1.25:w*1j] data = mandel(x, y) bitdata = bitarray(endian='big') bitdata.pack(data.tostring()) fo = open('mandel.ppm', 'wb') fo.write('P4\n') fo.write('# This is a partable bitmap image of the Mandelbrot set.\n') fo.write('%i %i\n' % (w, h)) bitdata.tofile(fo) fo.close() bitarray-0.8.1/examples/ndarray.py0000644000076500000240000000057612125715114017242 0ustar ilanstaff00000000000000# # This example illusatrates how binary data can be efficiently be passed # between a bitarray object and an ndarray with dtype bool # import bitarray import numpy a = bitarray.bitarray('100011001001') print a # bitarray -> ndarray b = numpy.fromstring(a.unpack(), dtype=bool) print repr(b) # ndarray -> bitarray c = bitarray.bitarray() c.pack(b.tostring()) assert a == c bitarray-0.8.1/examples/pbm.py0000644000076500000240000000412112125715114016346 0ustar ilanstaff00000000000000from bitarray import bitarray, bits2bytes class PBM: # Portable Bitmap def __init__(self, w=0, h=0): self.size = (w, h) self.update() self.data = bitarray(self.bits, endian='big') def update(self): w, h = self.size self.bytes_per_row = bits2bytes(w) self.bits_per_row = 8 * self.bytes_per_row self.bytes = self.bytes_per_row * h self.bits = 8 * self.bytes def info(self): print('size: %s x %s' % self.size) print('bytes per row: %s' % self.bytes_per_row) print('bits per row: %s' % self.bits_per_row) print('bitarray: ' + repr(self.data.buffer_info())) def clear(self): self.data.setall(0) def save(self, filename): fo = open(filename, 'wb') fo.write(b'P4\n') fo.write(b'# This is a partable bitmap (pbm) file.\n') fo.write(('%i %i\n' % (self.size)).encode()) self.data.tofile(fo) fo.close() def load(self, filename): fi = open(filename, 'rb') assert fi.readline().strip() == b'P4' while True: line = fi.readline() if not line.startswith(b'#'): self.size = tuple(map(int, line.split())) break self.update() self.data = bitarray(endian='big') self.data.fromfile(fi) fi.close() assert self.data.buffer_info()[1] == self.bytes def address(self, x, y): return x + self.bits_per_row * y def __getitem__(self, s): x, y = s return self.data[self.address(x, y)] def __setitem__(self, s, val): x, y = s self.data[self.address(x, y)] = val if __name__ == '__main__': # draw picture with straight line from (10, 10) to (390, 390) a = PBM(500, 400) a.info() a.clear() for x in range(10, 391): a[x, x] = True a.save('pic1.ppm') # copy the picture b = PBM() b.load('pic1.ppm') b.save('pic2.ppm') # draw a straight line from (490, 10) to (110, 390) on top for i in range(381): b[490-i, 10+i] = 1 b.save('pic3.ppm') bitarray-0.8.1/examples/README0000644000076500000240000000427112125715114016104 0ustar ilanstaff00000000000000bloom.py: Demonstrates the implementation of a Bloom filter, see: http://en.wikipedia.org/wiki/Bloom_filter compress.py: Demonstrates how the bz2 module may be used to create a compressed object which represents a bitarray decoding.py Bitarray's decode method is implemented in C. Since the C code might be hard to read, we have implemented exactly the same algorithm in Python. It is about 20 times slower than it's C counterpart, since (recursive) function calls are more expensive in Python than in C. huffman.py Demonstrates building a Huffman tree. Given an input file, calculates the number of occurrences for each character; from those frequencies, a Huffman tree is build; and by traversing the tree, the Huffman code is evaluated. Also allows encoding and decoding of a file, see -h option. mandel.py Generates a .ppm image file of size 8000x6000 of the Mandelbrot set. Despite it's size, the output image file has only a size of slightly over 6 Million bytes (uncompressed) because each pixel is stored in one bit. Requires numpy and scipy (see http://scipy.org/). Not supported by Python 3.x. ndarray.py Demonstrates how to efficiently convert boolean data from a bitarray to a numpy.ndarray of dtype bool. Requires numpy. pbm.py Defines a simple class called PBM (Portable Bit Map) which allows: - addressing pixels by their coordinates - storing and loading .ppm (P4), which is the same as .pbm, files sieve.py Sieve of Eratosthenes is a simple, ancient algorithm for finding all prime numbers up to a specified integer. In this exmaple, the algorithm is implemented using the numpy ndarray as well as the bitarray object. Thanks Steve for emailing this example. smallints.py A class is defined which allows efficiently storing an array of integers represented by a specified number of bits (1 through 8). For example, an array with 1000 5 bit integers can be created, allowing each element in the array to take values form 0 to 31, while the size of the object is 625 (5000/8) bytes. Thanks to David Kammeyer for the idea to apply a bitarray in this way. bitarray-0.8.1/examples/runall0000755000076500000240000000021412125715114016440 0ustar ilanstaff00000000000000#!/bin/bash -x python huffman.py test README || exit 1 for x in *.py do echo Running: $x python $x || exit 1 done rm *.ppm *.pyc bitarray-0.8.1/examples/sieve.py0000644000076500000240000000167512125715114016716 0ustar ilanstaff00000000000000import time import numpy import bitarray def primesToN1(n): # use numpy: 8-bit array of boolean flags if n < 2: return [] print 'init numpy' A = numpy.ones(n+1, numpy.bool) # set to 1 == True A[:2] = A[2*2::2] = 0 print 'sieve' for i in xrange(3, int(n**.5)+1, 2): # odd numbers if A[i]: # i is prime A[i*i::i*2] = 0 print 'counting' print numpy.sum(A) def primesToN2(n): # use bitarray: 1-bit boolean flags if n < 2: return [] print 'init bitarray' A = bitarray.bitarray(n+1) A.setall(1) A[:2] = A[2*2::2] = 0 print 'sieve' for i in xrange(3, int(n**.5)+1, 2): # odd numbers if A[i]: # i is prime A[i*i::i*2] = 0 print 'counting' print A.count() N = 100 * 1000 * 1000 def run(func): start_time = time.time() func(N) print 'time: %.6f sec\n' % (time.time() - start_time) run(primesToN1) run(primesToN2) bitarray-0.8.1/examples/smallints.py0000644000076500000240000000267012125715114017605 0ustar ilanstaff00000000000000# # Thanks to David Kammeyer for the idea to apply a bitarray in this way. # from bitarray import bitarray class SmallIntArray(object): """ A class which allows efficiently storeing an array of integers represented by a specified number of bits (1..8). For example, an array with 1000 5 bit integers can be created, allowing each element in the array to take values form 0 to 31, while the size of the object is 625 (5000/8) bytes. """ def __init__(self, N, k): assert 0 < k <= 8 self.N = N # number of integers self.k = k # bits for each integer self.data = bitarray(N*k, endian='little') def slice_i(self, i): assert 0 <= i < self.N return slice(self.k * i, self.k * (i + 1)) def __getitem__(self, i): return ord(self.data[self.slice_i(i)].tostring()) def __setitem__(self, i, v): assert 0 <= v < 2 ** self.k a = bitarray(endian='little') a.fromstring(chr(v)) self.data[self.slice_i(i)] = a[:self.k] if __name__ == '__main__': from random import randint # define array with 1000 integers, each represented by 5 bits a = SmallIntArray(1000, 5) b = [] # store values, for assertion below for i in range(1000): v = randint(0, 31) b.append(v) a[i] = v print(b[:5]) print(a.data.buffer_info()) print(a.data[:25]) for i in range(1000): assert a[i] == b[i] bitarray-0.8.1/LICENSE0000644000076500000240000000452412125715114014414 0ustar ilanstaff00000000000000PYTHON SOFTWARE FOUNDATION LICENSE ---------------------------------- 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Python Software Foundation; All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python. 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. bitarray-0.8.1/Makefile0000644000076500000240000000055312125715114015045 0ustar ilanstaff00000000000000bitarray/_bitarray.so: bitarray/_bitarray.c $(PYTHON) setup.py build_ext --inplace test: bitarray/_bitarray.so $(PYTHON) -c "import bitarray; bitarray.test()" doc: bitarray/_bitarray.so $(python) update_readme.py clean: rm -rf build dist rm -f bitarray/*.o bitarray/*.so rm -f bitarray/*.pyc rm -rf bitarray/__pycache__ *.egg-info rm -f README.html bitarray-0.8.1/README.rst0000644000076500000240000004322312125715114015075 0ustar ilanstaff00000000000000====================================== bitarray: efficient arrays of booleans ====================================== This module provides an object type which efficiently represents an array of booleans. Bitarrays are sequence types and behave very much like usual lists. Eight bits are represented by one byte in a contiguous block of memory. The user can select between two representations; little-endian and big-endian. All of the functionality is implemented in C. Methods for accessing the machine representation are provided. This can be useful when bit level access to binary files is required, such as portable bitmap image files (.pbm). Also, when dealing with compressed data which uses variable bit length encoding, you may find this module useful. Key features ------------ * All functionality implemented in C. * Bitarray objects behave very much like a list object, in particular slicing (including slice assignment and deletion) is supported. * The bit endianness can be specified for each bitarray object, see below. * On 32bit systems, a bitarray object can contain up to 2^34 elements, that is 16 Gbits (on 64bit machines up to 2^63 elements in theory -- on Python 2.4 only 2^31 elements, see `PEP 353 `_ (added in Python 2.5)). * Packing and unpacking to other binary data formats, e.g. `numpy.ndarray `_, is possible. * Fast methods for encoding and decoding variable bit length prefix codes * Sequential search (as list or iterator) * Bitwise operations: ``&, |, ^, &=, |=, ^=, ~`` * Pickling and unpickling of bitarray objects possible. * Bitarray objects support the buffer protocol (Python 2.7 only) Installation ------------ bitarray can be installed from source:: $ tar xzf bitarray-0.8.1.tar.gz $ cd bitarray-0.8.1 $ python setup.py install On Unix systems, the latter command may have to be executed with root privileges. If you have `distribute `_ installed, you can easy_install bitarray. Once you have installed the package, you may want to test it:: $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray bitarray version: 0.8.1 2.7.2 (r271:86832, Nov 29 2010) [GCC 4.2.1 (SUSE Linux)] ......................................................................... ........................................... ---------------------------------------------------------------------- Ran 134 tests in 1.396s OK You can always import the function test, and ``test().wasSuccessful()`` will return True when the test went well. Using the module ---------------- As mentioned above, bitarray objects behave very much like lists, so there is not too new to learn. The biggest difference to list objects is the ability to access the machine representation of the object. When doing so, the bit endianness is of importance, this issue is explained in detail in the section below. Here, we demonstrate the basic usage of bitarray objects: >>> from bitarray import bitarray >>> a = bitarray() # create empty bitarray >>> a.append(True) >>> a.extend([False, True, True]) >>> a bitarray('1011') Bitarray objects can be instantiated in different ways: >>> a = bitarray(2**20) # bitarray of length 1048576 (uninitialized) >>> bitarray('1001011') # from a string bitarray('1001011') >>> lst = [True, False, False, True, False, True, True] >>> bitarray(lst) # from list, tuple, iterable bitarray('1001011') Bits can be assigned from any Python object, if the value can be interpreted as a truth value. You can think of this as Python's built-in function bool() being applied, whenever casting an object: >>> a = bitarray([42, '', True, {}, 'foo', None]) >>> a bitarray('101010') >>> a.append(a) # note that bool(a) is True >>> a.count(42) # counts occurrences of True (not 42) 4L >>> a.remove('') # removes first occurrence of False >>> a bitarray('110101') Like lists, bitarray objects support slice assignment and deletion: >>> a = bitarray(50) >>> a.setall(False) >>> a[11:37:3] = 9 * bitarray([True]) >>> a bitarray('00000000000100100100100100100100100100000000000000') >>> del a[12::3] >>> a bitarray('0000000000010101010101010101000000000') >>> a[-6:] = bitarray('10011') >>> a bitarray('000000000001010101010101010100010011') >>> a += bitarray('000111') >>> a[9:] bitarray('001010101010101010100010011000111') In addition, slices can be assigned to booleans, which is easier (and faster) than assigning to a bitarray in which all values are the same: >>> a = 20 * bitarray('0') >>> a[1:15:3] = True >>> a bitarray('01001001001001000000') This is easier and faster than: >>> a = 20 * bitarray('0') >>> a[1:15:3] = 5 * bitarray('1') >>> a bitarray('01001001001001000000') Note that in the latter we have to create a temporary bitarray whose length must be known or calculated. Bit endianness -------------- Since a bitarray allows addressing of individual bits, where the machine represents 8 bits in one byte, there two obvious choices for this mapping; little- and big-endian. When creating a new bitarray object, the endianness can always be specified explicitly: >>> a = bitarray(endian='little') >>> a.frombytes(b'A') >>> a bitarray('10000010') >>> b = bitarray('11000010', endian='little') >>> b.tobytes() 'C' Here, the low-bit comes first because little-endian means that increasing numeric significance corresponds to an increasing address (index). So a[0] is the lowest and least significant bit, and a[7] is the highest and most significant bit. >>> a = bitarray(endian='big') >>> a.frombytes(b'A') >>> a bitarray('01000001') >>> a[6] = 1 >>> a.tobytes() 'C' Here, the high-bit comes first because big-endian means "most-significant first". So a[0] is now the lowest and most significant bit, and a[7] is the highest and least significant bit. The bit endianness is a property attached to each bitarray object. When comparing bitarray objects, the endianness (and hence the machine representation) is irrelevant; what matters is the mapping from indices to bits: >>> bitarray('11001', endian='big') == bitarray('11001', endian='little') True Bitwise operations (``&, |, ^, &=, |=, ^=, ~``) are implemented efficiently using the corresponding byte operations in C, i.e. the operators act on the machine representation of the bitarray objects. Therefore, one has to be cautious when applying the operation to bitarrays with different endianness. When converting to and from machine representation, using the ``tobytes``, ``frombytes``, ``tofile`` and ``fromfile`` methods, the endianness matters: >>> a = bitarray(endian='little') >>> a.frombytes(b'\x01') >>> a bitarray('10000000') >>> b = bitarray(endian='big') >>> b.frombytes(b'\x80') >>> b bitarray('10000000') >>> a == b True >>> a.tobytes() == b.tobytes() False The endianness can not be changed once an object is created. However, since creating a bitarray from another bitarray just copies the memory representing the data, you can create a new bitarray with different endianness: >>> a = bitarray('11100000', endian='little') >>> a bitarray('11100000') >>> b = bitarray(a, endian='big') >>> b bitarray('00000111') >>> a == b False >>> a.tobytes() == b.tobytes() True The default bit endianness is currently big-endian, however this may change in the future, and when dealing with the machine representation of bitarray objects, it is recommended to always explicitly specify the endianness. Unless, explicitly converting to machine representation, using the ``tobytes``, ``frombytes``, ``tofile`` and ``fromfile`` methods, the bit endianness will have no effect on any computation, and one can safely ignore setting the endianness, and other details of this section. Buffer protocol --------------- Python 2.7 provides memoryview objects, which allow Python code to access the internal data of an object that supports the buffer protocol without copying. Bitarray objects support this protocol, with the memory being interpreted as simple bytes. >>> a = bitarray('01000001' '01000010' '01000011', endian='big') >>> v = memoryview(a) >>> len(v) 3 >>> v[-1] 'C' >>> v[:2].tobytes() 'AB' >>> v.readonly # changing a bitarray's memory is also possible False >>> v[1] = 'o' >>> a bitarray('010000010110111101000011') Variable bit length prefix codes -------------------------------- The method ``encode`` takes a dictionary mapping symbols to bitarrays and an iterable, and extends the bitarray object with the encoded symbols found while iterating. For example: >>> d = {'H':bitarray('111'), 'e':bitarray('0'), ... 'l':bitarray('110'), 'o':bitarray('10')} ... >>> a = bitarray() >>> a.encode(d, 'Hello') >>> a bitarray('111011011010') Note that the string ``'Hello'`` is an iterable, but the symbols are not limited to characters, in fact any immutable Python object can be a symbol. Taking the same dictionary, we can apply the ``decode`` method which will return a list of the symbols: >>> a.decode(d) ['H', 'e', 'l', 'l', 'o'] >>> ''.join(a.decode(d)) 'Hello' Since symbols are not limited to being characters, it is necessary to return them as elements of a list, rather than simply returning the joined string. Reference --------- **The bitarray class:** ``bitarray([initial], [endian=string])`` Return a new bitarray object whose items are bits initialized from the optional initial, and endianness. If no object is provided, the bitarray is initialized to have length zero. The initial object may be of the following types: int, long Create bitarray of length given by the integer. The initial values in the array are random, because only the memory allocated. string Create bitarray from a string of '0's and '1's. list, tuple, iterable Create bitarray from a sequence, each element in the sequence is converted to a bit using truth value value. bitarray Create bitarray from another bitarray. This is done by copying the memory holding the bitarray data, and is hence very fast. The optional keyword arguments 'endian' specifies the bit endianness of the created bitarray object. Allowed values are 'big' and 'little' (default is 'big'). Note that setting the bit endianness only has an effect when accessing the machine representation of the bitarray, i.e. when using the methods: tofile, fromfile, tobytes, frombytes. **A bitarray object supports the following methods:** ``all()`` -> bool Returns True when all bits in the array are True. ``any()`` -> bool Returns True when any bit in the array is True. ``append(item)`` Append the value bool(item) to the end of the bitarray. ``buffer_info()`` -> tuple Return a tuple (address, size, endianness, unused, allocated) giving the current memory address, the size (in bytes) used to hold the bitarray's contents, the bit endianness as a string, the number of unused bits (e.g. a bitarray of length 11 will have a buffer size of 2 bytes and 5 unused bits), and the size (in bytes) of the allocated memory. ``bytereverse()`` For all bytes representing the bitarray, reverse the bit order (in-place). Note: This method changes the actual machine values representing the bitarray; it does not change the endianness of the bitarray object. ``copy()`` -> bitarray Return a copy of the bitarray. ``count([value])`` -> int Return number of occurrences of value (defaults to True) in the bitarray. ``decode(code)`` -> list Given a prefix code (a dict mapping symbols to bitarrays), decode the content of the bitarray and return the list of symbols. ``encode(code, iterable)`` Given a prefix code (a dict mapping symbols to bitarrays), iterates over iterable object with symbols, and extends the bitarray with the corresponding bitarray for each symbols. ``endian()`` -> string Return the bit endianness as a string (either 'little' or 'big'). ``extend(object)`` Append bits to the end of the bitarray. The objects which can be passed to this method are the same iterable objects which can given to a bitarray object upon initialization. ``fill()`` -> int Adds zeros to the end of the bitarray, such that the length of the bitarray is not a multiple of 8. Returns the number of bits added (0..7). ``frombytes(bytes)`` Append from a byte string, interpreted as machine values. ``fromfile(f, [n])`` Read n bytes from the file object f and append them to the bitarray interpreted as machine values. When n is omitted, as many bytes are read until EOF is reached. ``fromstring(string)`` Append from a string, interpreting the string as machine values. Deprecated since version 0.4.0, use ``frombytes()`` instead. ``index(value, [start, [stop]])`` -> int Return index of the first occurrence of bool(value) in the bitarray. Raises ValueError if the value is not present. ``insert(i, item)`` Insert bool(item) into the bitarray before position i. ``invert()`` Invert all bits in the array (in-place), i.e. convert each 1-bit into a 0-bit and vice versa. ``iterdecode(code)`` -> iterator Given a prefix code (a dict mapping symbols to bitarrays), decode the content of the bitarray and iterate over the symbols. ``itersearch(bitarray)`` -> iterator Searches for the given a bitarray in self, and return an iterator over the start positions where bitarray matches self. ``length()`` -> int Return the length, i.e. number of bits stored in the bitarray. This method is preferred over __len__ (used when typing ``len(a)``), since __len__ will fail for a bitarray object with 2^31 or more elements on a 32bit machine, whereas this method will return the correct value, on 32bit and 64bit machines. ``pack(bytes)`` Extend the bitarray from a byte string, where each characters corresponds to a single bit. The character b'\x00' maps to bit 0 and all other characters map to bit 1. This method, as well as the unpack method, are meant for efficient transfer of data between bitarray objects to other python objects (for example NumPy's ndarray object) which have a different view of memory. ``pop([i])`` -> item Return the i-th (default last) element and delete it from the bitarray. Raises IndexError if bitarray is empty or index is out of range. ``remove(item)`` Remove the first occurrence of bool(item) in the bitarray. Raises ValueError if item is not present. ``reverse()`` Reverse the order of bits in the array (in-place). ``search(bitarray, [limit])`` -> list Searches for the given a bitarray in self, and returns the start positions where bitarray matches self as a list. The optional argument limits the number of search results to the integer specified. By default, all search results are returned. ``setall(value)`` Set all bits in the bitarray to bool(value). ``sort(reverse=False)`` Sort the bits in the array (in-place). ``to01()`` -> string Return a string containing '0's and '1's, representing the bits in the bitarray object. Note: To extend a bitarray from a string containing '0's and '1's, use the extend method. ``tobytes()`` -> bytes Return the byte representation of the bitarray. When the length of the bitarray is not a multiple of 8, the few remaining bits (1..7) are set to 0. ``tofile(f)`` Write all bits (as machine values) to the file object f. When the length of the bitarray is not a multiple of 8, the remaining bits (1..7) are set to 0. ``tolist()`` -> list Return an ordinary list with the items in the bitarray. Note that the list object being created will require 32 or 64 times more memory than the bitarray object, which may cause a memory error if the bitarray is very large. Also note that to extend a bitarray with elements from a list, use the extend method. ``tostring()`` -> string Return the string representing (machine values) of the bitarray. When the length of the bitarray is not a multiple of 8, the few remaining bits (1..7) are set to 0. Deprecated since version 0.4.0, use ``tobytes()`` instead. ``unpack(zero=b'\x00', one=b'\xff')`` -> bytes Return a byte string containing one character for each bit in the bitarray, using the specified mapping. See also the pack method. **Functions defined in the module:** ``test(verbosity=1, repeat=1)`` -> TextTestResult Run self-test, and return unittest.runner.TextTestResult object. ``bitdiff(a, b)`` -> int Return the difference between two bitarrays a and b. This is function does the same as (a ^ b).count(), but is more memory efficient, as no intermediate bitarray object gets created ``bits2bytes(n)`` -> int Return the number of bytes necessary to store n bits. Change log ---------- **0.8.1** (2013-03-30): * fix issue #10, i.e. int(bitarray()) segfault * added tests for using a bitarray object as an argument to functions like int, long (on Python 2), float, list, tuple, dict **0.8.0** (2012-04-04): * add Python 2.4 support * add (module level) function bitdiff for calculating the difference between two bitarrays **0.7.0** (2012-02-15): * add iterdecode method (C level), which returns an iterator but is otherwise like the decode method * improve memory efficiency and speed of pickling large bitarray objects Please find the complete change log `here `_. bitarray-0.8.1/setup.py0000644000076500000240000000304312125715114015114 0ustar ilanstaff00000000000000import re from os.path import join try: from setuptools import setup, Extension except ImportError: from distutils.core import setup, Extension kwds = {} kwds['long_description'] = open('README.rst').read() # Read version from bitarray/__init__.py pat = re.compile(r'__version__\s*=\s*(\S+)', re.M) data = open(join('bitarray', '__init__.py')).read() kwds['version'] = eval(pat.search(data).group(1)) setup( name = "bitarray", author = "Ilan Schnell", author_email = "ilanschnell@gmail.com", url = "https://github.com/ilanschnell/bitarray", license = "PSF", classifiers = [ "License :: OSI Approved :: Python Software Foundation License", "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Operating System :: OS Independent", "Programming Language :: C", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.4", "Programming Language :: Python :: 2.5", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.1", "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Topic :: Utilities", ], description = "efficient arrays of booleans -- C extension", packages = ["bitarray"], ext_modules = [Extension(name = "bitarray._bitarray", sources = ["bitarray/_bitarray.c"])], **kwds ) bitarray-0.8.1/TODO0000644000076500000240000000523412125715114014076 0ustar ilanstaff00000000000000* compute the lexicographically next bit permutation, see http://www-graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation * Write special tests for really large bitarrays, for which the length exceeds 2**31 bits --------------------------- RANDOM NOTES --------------------------------- #!/bin/bash for PY in py26 py26d py27 py32 py33 do export PYTHON=$HOME/$PY/bin/python make clean make test || exit 1 done #!/bin/bash for PY in 2.4 2.5 2.6 2.7 3.1 3.2 3.3 do PYBIN=$HOME/$PY/bin/python rm -rf $HOME/$PY/lib/python$PY/site-packages $PYBIN -V rm -rf build dist $PYBIN setup.py install >/dev/null 2>&1 || exit 1 pushd /tmp $PYBIN -c "import bitarray; assert bitarray.test(repeat=1).wasSuccessful()" || \ exit 1 popd done # checking the growth pattern: a = bitarray() sizes = set() for i in xrange(1000000): sizes.add(a.buffer_info()[4]) a.append(0) print sorted(sizes) If I should ever decide to use pre-calculated tables: static char bytereverse_trans[256] = { 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, }; bitarray-0.8.1/update_readme.py0000644000076500000240000000460512125715114016560 0ustar ilanstaff00000000000000import os import re import doctest from cStringIO import StringIO import bitarray fo = StringIO() def write_changelog(): fo.write("Change log\n" "----------\n\n") ver_pat = re.compile(r'(\d{4}-\d{2}-\d{2})\s+(\d+\.\d+\.\d+)') count = 0 for line in open('CHANGE_LOG'): m = ver_pat.match(line) if m: if count == 3: break count += 1 fo.write(m.expand(r'**\2** (\1):\n')) elif line.startswith('---'): fo.write('\n') else: fo.write(line) url = "https://github.com/ilanschnell/bitarray/blob/master/CHANGE_LOG" fo.write("Please find the complete change log\n" "`here <%s>`_.\n" % url) sig_pat = re.compile(r'(\w+\([^()]*\))( -> (.+))?') def write_doc(name): doc = eval('bitarray.%s.__doc__' % name) lines = doc.splitlines() m = sig_pat.match(lines[0]) if m is None: raise Exception("signature line invalid: %r" % lines[0]) s = '``%s``' % m.group(1) if m.group(3): s += ' -> %s' % m.group(3) fo.write(s + '\n') assert lines[1] == '' for line in lines[2:]: fo.write(' %s\n' % line) fo.write('\n\n') def write_reference(): fo.write("Reference\n" "---------\n\n" "**The bitarray class:**\n\n") write_doc('bitarray') fo.write("**A bitarray object supports the following methods:**\n\n") for method in sorted(dir(bitarray.bitarray)): if method.startswith('_'): continue write_doc('bitarray.%s' % method) fo.write("**Functions defined in the module:**\n\n") write_doc('test') write_doc('bitdiff') write_doc('bits2bytes') def write_all(data): ver_pat = re.compile(r'(bitarray.+?)(\d+\.\d+\.\d+)') for line in data.splitlines(): if line == 'Reference': break line = ver_pat.sub(lambda m: m.group(1) + bitarray.__version__, line) fo.write(line + '\n') write_reference() write_changelog() def main(): data = open('README.rst').read() write_all(data) new_data = fo.getvalue() fo.close() if new_data == data: print "already up-to-date" else: with open('README.rst', 'w') as f: f.write(new_data) doctest.testfile('README.rst') os.system('rst2html.py README.rst >README.html') if __name__ == '__main__': main()