pax_global_header00006660000000000000000000000064136257345700014526gustar00rootroot0000000000000052 comment=7747fc2486a712f1516fd5a1da03ae0e0a56a248 flor-1.1.3/000077500000000000000000000000001362573457000124725ustar00rootroot00000000000000flor-1.1.3/.gitignore000066400000000000000000000000731362573457000144620ustar00rootroot00000000000000.* *.egg-info dist/ build/ .cache *.pyc *~ *.bloom venv*/* flor-1.1.3/.travis.yml000066400000000000000000000020131362573457000145770ustar00rootroot00000000000000language: python python: - '2.7' - '3.4' - '3.5' - '3.6' - '3.7' - '3.8' - pypy - pypy3 install: - pip install . - pip install -r requirements.txt script: python -m unittest discover deploy: provider: pypi user: "__token__" on: tags: true python: 3.6 distributions: "sdist bdist_wheel" skip_existing: true password: secure: "H8L6Pz7zI9K2IhaStM47kX0B5aUoAsX5gDN615V04W8LuWByENTIRSdjebTzk/A9FcOnppW84XzbTvLhJV3VJ1VD3SK+tw5Cqa73Q//nYaIo1xE4AixiCMfFh9xg/FxO4d9TUXMzLpF3hRqB0l9DaGAbDgyo24jgQ55oj5WUuv4XL4ZvjxK6l8dQJV1sKdM6MuRtVTy+GqcR3nX0vLTzz/MzVygKst6heVFzKEGv7IR3iy4S4xhsRl2aRU7iq2k+59au1FfjsttaRCWKOO69+TfVHSz4k6aNQPuIk9Iu81pVORzJAe9swTzshyfl3SVOtDgBECzwI4VM5HbFXKA08XJVzcnAy0c1BozhSub3Ao9W5XPOEzv4G88f5H/dgTlIOd7AcwzM267E4TsJUgzjWlDameTM+LhpaT44uhw/I6yw1Pl9Znjz1dltxujrpakyTXQkwzb456L0jhvE/ErwI3D06BVj6ZOvUvkEpGyMYJP0RRRzH/VjKNt3M1B+Swf/+qBFjGVahHU2Dn3jR/cy+gYKasAz87Q6E1r/xlZfIsI/1LTa9psIReDYhEc1vFqMIk6kKQaKURF+JEkAy0u9B/uunVIdDTpDVFOD991EdlQSpaiXaff77Ydw1uxcgahzZpDB1NwCJK4YuuKNasU1kQsvopTs7XlaROUEG/KXiz4=" flor-1.1.3/LICENSE000066400000000000000000000030261362573457000135000ustar00rootroot00000000000000Copyright (c) 2017, DCSO Deutsche Cyber-Sicherheitsorganisation GmbH All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the DCSO Deutsche Cyber-Sicherheitsorganisation GmbH nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flor-1.1.3/README.md000066400000000000000000000031141362573457000137500ustar00rootroot00000000000000# Flor - A Bloom filter implementation in Python [![Build Status](https://travis-ci.org/DCSO/flor.svg?branch=master)](https://travis-ci.org/DCSO/flor) Flor implements a Bloom filter class that is fully compatible with our [Go Bloom filter implementation](https://github.com/DCSO/bloom). # Requirements Flor is compatible with Python 2.7+ and Python 3.2+ as well as PyPy2/3 and does not require any non-standard modules. # Installation Flor can be installed via PyPi/pip: pip install flor Alternatively, you can install it from source: git clone https://github.com/DCSO/flor.git cd flor #add "sudo" if you're not in a virtual environment python setup.py install # Basics A Bloom filter has a capacity `n` and a false positive probability `p` that gives the probability that a filter filled to capacity (i.e. with `n` distinct values inserted) will return `True` for an element that is not in the filter. # Usage Creating a new Bloom filter: from flor import BloomFilter bf = BloomFilter(n=100000, p=0.001) bf.add(b"foo") bf.add(b"bar") bf.add(b"baz") b"baz" in bf #returns True b"nope" in bf #returns False Writing a Bloom filter to a file: bf = BloomFilter() with open('test.bloom', 'wb') as f: bf.write(f) Reading a Bloom filter from a file: bf = BloomFilter() with open('test.bloom', 'rb') as f: bf.read(f) The binary format of the filter is compatible with that generated by our Go library, so you can use the two interchangeably. # License Flor is licensed under the BSD 3 Clause license (see LICENSE). flor-1.1.3/flor/000077500000000000000000000000001362573457000134345ustar00rootroot00000000000000flor-1.1.3/flor/__init__.py000066400000000000000000000002111362573457000155370ustar00rootroot00000000000000# DCSO - Flor # Copyright (c) 2016, 2017, DCSO GmbH. All rights reserved. from .filter import BloomFilter from .fnv import fnv_1, fnv_1aflor-1.1.3/flor/filter.py000066400000000000000000000066741362573457000153100ustar00rootroot00000000000000# DCSO - Flor # Copyright (c) 2016, 2017, DCSO GmbH. All rights reserved. import math from struct import unpack, pack from .fnv import fnv_1 m = 18446744073709551557 g = 18446744073709550147 class BloomFilter(object): class CapacityError(BaseException): pass def __init__(self, n=100000, p=0.001, data=b''): self.p = p self.n = n self.N = 0 self.m = int(abs(math.ceil(float(n) * math.log(float(p)) / math.pow(math.log(2.0), 2.0)))) #we work in 64 bit blocks as this is the format of the Go filter. self.M = int(math.ceil(float(self.m) / 64.0))*8 self.k = int(math.ceil(math.log(2) * float(self.m) / float(n))) self._bytes = bytearray([0 for i in range(self.M)]) self.data = data def __contains__(self, value): return self.check(value) def read(self, input_file): bs8 = input_file.read(8) if len(bs8) != 8: raise IOError("Invalid filter!") flags = unpack('= self.n: raise BloomFilter.CapacityError("Bloom filter is full!") def check(self, value): fp = self.fingerprint(value) for fpe in fp: k = int(fpe / 8) l = fpe % 8 if self._bytes[k] & (1 << l) == 0: return False return True def fingerprint(self, value): bvalue = bytes(value) hn = fnv_1(bvalue) % m fp = [] for i in range(self.k): hn = (hn*g & 0xFFFFFFFFFFFFFFFF) % m fp.append((hn % self.m) & 0xFFFFFFFFFFFFFFFF) return fp flor-1.1.3/flor/fnv.py000066400000000000000000000011671362573457000146040ustar00rootroot00000000000000# DCSO - Flor # Copyright (c) 2016, 2017, DCSO GmbH. All rights reserved. offset = 14695981039346656037 prime = 1099511628211 def fnv_1(value): if not isinstance(value, bytes): raise TypeError("Value must be a bytes object!") hash = offset for byte in bytearray(value): hash = (hash*prime) & 0xFFFFFFFFFFFFFFFF hash ^= byte return hash def fnv_1a(value): if not isinstance(value, bytes): raise TypeError("Value must be a bytes object!") hash = offset for byte in bytearray(value): hash ^= byte hash = (hash*prime) & 0xFFFFFFFFFFFFFFFF return hash flor-1.1.3/flor_test/000077500000000000000000000000001362573457000144735ustar00rootroot00000000000000flor-1.1.3/flor_test/__init__.py000066400000000000000000000000001362573457000165720ustar00rootroot00000000000000flor-1.1.3/flor_test/test_filter.py000066400000000000000000000042231362573457000173720ustar00rootroot00000000000000# DCSO - Flor # Copyright (c) 2016, 2017, DCSO GmbH. All rights reserved. import unittest import math from io import BytesIO from flor.filter import BloomFilter class TestFingerprints(unittest.TestCase): def test_fingerprints(self): bf = BloomFilter(n=100000, p=0.01) fp = bf.fingerprint(b"bar") assert fp == [20311, 36825, 412501, 835777, 658914, 853361, 307361] class TestFilter(unittest.TestCase): def test_creation(self): bf = BloomFilter(n=100000, p=0.01) assert bf.n == 100000 assert bf.p == 0.01 assert bf.m == 958505 #we work in 64 bit blocks as this is the format of the Go filter. assert bf.M == int(math.ceil(bf.m/64.0))*8 assert bf.k == 7 assert bf.N == 0 def test_add_and_check(self): bf = BloomFilter(n=100000, p=0.01) values = (b'bar', b'baz', b'boo', b'bam') for value in values: bf.add(value) assert bf.N == len(values) #repeatedly inserting the same values should not increase the count for value in values: bf.add(value) assert bf.N == len(values) for value in values: assert value in bf #this might occasionally fail (very seldom though) assert not value+b'sdfsfds2asd' in bf def test_read_and_write(self): fs = BytesIO() bf = BloomFilter(n=100000, p=0.01, data=b'foobar') values = (b'bar', b'baz', b'boo', b'bam') for value in values: bf.add(value) bf.write(fs) assert len(fs.getvalue()) > 0 new_bf = BloomFilter(n=1,p=0.1) #we rewind the file to the beginning fs.seek(0) new_bf.read(fs) assert new_bf.n == bf.n assert new_bf.p == bf.p assert new_bf.k == bf.k assert new_bf.m == bf.m assert new_bf.N == bf.N assert new_bf.M == bf.M assert new_bf.data == bf.data assert new_bf._bytes == bf._bytes for value in values: assert value in new_bf and value in bf assert not value+b'343243' in bf if __name__ == '__main__': unittest.main() flor-1.1.3/flor_test/test_fnv.py000066400000000000000000000005601362573457000166760ustar00rootroot00000000000000# DCSO - Flor # Copyright (c) 2016, 2017, DCSO GmbH. All rights reserved. import unittest from flor.fnv import fnv_1, fnv_1a class TestFNV(unittest.TestCase): def test_fnv_1(self): assert fnv_1(b"test") == 0x8c093f7e9fccbf69 def test_fnv_1a(self): assert fnv_1a(b"test") == 0xf9e6e6ef197c2b25 if __name__ == '__main__': unittest.main()flor-1.1.3/requirements.txt000066400000000000000000000000001362573457000157440ustar00rootroot00000000000000flor-1.1.3/setup.py000066400000000000000000000006571362573457000142140ustar00rootroot00000000000000#!/usr/bin/env python # DCSO - Flor # Copyright (c) 2016, 2017, DCSO GmbH. All rights reserved. from setuptools import setup setup(name='Flor', version='1.1.3', description='Flor - An efficient Bloom filter implementation in Python', author='Andreas Dewes - DCSO GmbH', author_email='andreas.dewes@dcso.de', url='https://github.com/DCSO/flor', license='BSD3', packages=['flor'], )