pax_global_header00006660000000000000000000000064135423633210014514gustar00rootroot0000000000000052 comment=58fd84aceb91ab664f51b3bd37a7d93c63a84599 preshed-3.0.2/000077500000000000000000000000001354236332100131505ustar00rootroot00000000000000preshed-3.0.2/.gitignore000066400000000000000000000002211354236332100151330ustar00rootroot00000000000000*.egg *.egg-info preshed/.maps.pxd.swm preshed/.maps.pyx.swl *.sw[a-z] *.so *.pyc *.swp *.swo *.html *.c *.cpp .env/ .denv MANIFEST build/ dist/ preshed-3.0.2/LICENSE000066400000000000000000000021221354236332100141520ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2016 ExplosionAI GmbH, 2014 Matthew Honnibal Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. preshed-3.0.2/MANIFEST.in000066400000000000000000000001001354236332100146750ustar00rootroot00000000000000recursive-include include *.h include LICENSE include README.md preshed-3.0.2/README.md000066400000000000000000000017671354236332100144420ustar00rootroot00000000000000 # preshed: Cython Hash Table for Pre-Hashed Keys Simple but high performance Cython hash table mapping pre-randomized keys to `void*` values. Inspired by [Jeff Preshing](http://preshing.com/20130107/this-hash-table-is-faster-than-a-judy-array/). [![Azure Pipelines](https://img.shields.io/azure-devops/build/explosion-ai/public/3/master.svg?logo=azure-devops&style=flat-square)](https://dev.azure.com/explosion-ai/public/_build?definitionId=3) [![pypi Version](https://img.shields.io/pypi/v/preshed.svg?style=flat-square)](https://pypi.python.org/pypi/preshed) [![conda Version](https://img.shields.io/conda/vn/conda-forge/preshed.svg?style=flat-square)](https://anaconda.org/conda-forge/preshed) [![Python wheels](https://img.shields.io/badge/wheels-%E2%9C%93-4c1.svg?longCache=true&style=flat-square&logo=python&logoColor=white)](https://github.com/explosion/wheelwright/releases) preshed-3.0.2/azure-pipelines.yml000066400000000000000000000034371354236332100170160ustar00rootroot00000000000000trigger: batch: true branches: include: - '*' jobs: - job: 'Test' strategy: matrix: Python27Linux: imageName: 'ubuntu-16.04' python.version: '2.7' Python27Mac: imageName: 'macos-10.13' python.version: '2.7' Python35Linux: imageName: 'ubuntu-16.04' python.version: '3.5' Python35Windows: imageName: 'vs2017-win2016' python.version: '3.5' Python35Mac: imageName: 'macos-10.13' python.version: '3.5' Python36Linux: imageName: 'ubuntu-16.04' python.version: '3.6' Python36Windows: imageName: 'vs2017-win2016' python.version: '3.6' Python36Mac: imageName: 'macos-10.13' python.version: '3.6' Python37Linux: imageName: 'ubuntu-16.04' python.version: '3.7' Python37Windows: imageName: 'vs2017-win2016' python.version: '3.7' Python37Mac: imageName: 'macos-10.13' python.version: '3.7' maxParallel: 4 pool: vmImage: $(imageName) steps: - task: UsePythonVersion@0 inputs: versionSpec: '$(python.version)' architecture: 'x64' - script: | python -m pip install --upgrade pip wheel pip install -r requirements.txt displayName: 'Install dependencies' - script: | python setup.py build_ext --inplace python setup.py sdist displayName: 'Build sdist' - script: pip install dist/*.tar.gz condition: in( variables['Agent.OS'], 'Linux', 'Darwin') displayName: 'Install from sdist (Linux, Mac)' - script: pip install -e . condition: eq( variables['Agent.OS'], 'Windows_NT') displayName: 'Install with pip (Windows)' - script: | python -m pytest preshed displayName: 'Run tests' preshed-3.0.2/bin/000077500000000000000000000000001354236332100137205ustar00rootroot00000000000000preshed-3.0.2/bin/cythonize.py000077500000000000000000000104621354236332100163140ustar00rootroot00000000000000#!/usr/bin/env python """ cythonize.py Cythonize pyx files into C++ files as needed. Usage: cythonize.py [root] Checks pyx files to see if they have been changed relative to their corresponding C++ files. If they have, then runs cython on these files to recreate the C++ files. Additionally, checks pxd files and setup.py if they have been changed. If they have, rebuilds everything. Change detection based on file hashes stored in JSON format. For now, this script should be run by developers when changing Cython files and the resulting C++ files checked in, so that end-users (and Python-only developers) do not get the Cython dependencies. Based upon: https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py https://raw.githubusercontent.com/numpy/numpy/master/tools/cythonize.py Note: this script does not check any of the dependent C++ libraries. """ from __future__ import print_function import os import sys import json import hashlib import subprocess import argparse HASH_FILE = 'cythonize.json' def process_pyx(fromfile, tofile): print('Processing %s' % fromfile) try: from Cython.Compiler.Version import version as cython_version from distutils.version import LooseVersion if LooseVersion(cython_version) < LooseVersion('0.19'): raise Exception('Require Cython >= 0.19') except ImportError: pass flags = ['--fast-fail'] if tofile.endswith('.cpp'): flags += ['--cplus'] try: try: r = subprocess.call(['cython'] + flags + ['-o', tofile, fromfile]) if r != 0: raise Exception('Cython failed') except OSError: # There are ways of installing Cython that don't result in a cython # executable on the path, see gh-2397. r = subprocess.call([sys.executable, '-c', 'import sys; from Cython.Compiler.Main import ' 'setuptools_main as main; sys.exit(main())'] + flags + ['-o', tofile, fromfile]) if r != 0: raise Exception('Cython failed') except OSError: raise OSError('Cython needs to be installed') def preserve_cwd(path, func, *args): orig_cwd = os.getcwd() try: os.chdir(path) func(*args) finally: os.chdir(orig_cwd) def load_hashes(filename): try: return json.load(open(filename)) except (ValueError, IOError): return {} def save_hashes(hash_db, filename): with open(filename, 'w') as f: f.write(json.dumps(hash_db)) def get_hash(path): return hashlib.md5(open(path, 'rb').read()).hexdigest() def hash_changed(base, path, db): full_path = os.path.normpath(os.path.join(base, path)) return not get_hash(full_path) == db.get(full_path) def hash_add(base, path, db): full_path = os.path.normpath(os.path.join(base, path)) db[full_path] = get_hash(full_path) def process(base, filename, db): root, ext = os.path.splitext(filename) if ext in ['.pyx', '.cpp']: if hash_changed(base, filename, db) or not os.path.isfile(os.path.join(base, root + '.cpp')): preserve_cwd(base, process_pyx, root + '.pyx', root + '.cpp') hash_add(base, root + '.cpp', db) hash_add(base, root + '.pyx', db) def check_changes(root, db): res = False new_db = {} setup_filename = 'setup.py' hash_add('.', setup_filename, new_db) if hash_changed('.', setup_filename, db): res = True for base, _, files in os.walk(root): for filename in files: if filename.endswith('.pxd'): hash_add(base, filename, new_db) if hash_changed(base, filename, db): res = True if res: db.clear() db.update(new_db) return res def run(root): db = load_hashes(HASH_FILE) try: check_changes(root, db) for base, _, files in os.walk(root): for filename in files: process(base, filename, db) finally: save_hashes(db, HASH_FILE) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Cythonize pyx files into C++ files as needed') parser.add_argument('root', help='root directory') args = parser.parse_args() run(args.root) preshed-3.0.2/bin/push-tag.sh000077500000000000000000000005561354236332100160150ustar00rootroot00000000000000#!/usr/bin/env bash set -e # Insist repository is clean git diff-index --quiet HEAD git checkout $1 git pull origin $1 git push origin $1 version=$(grep "__version__ = " preshed/about.py) version=${version/__version__ = } version=${version/\'/} version=${version/\'/} version=${version/\"/} version=${version/\"/} git tag "v$version" git push origin "v$version" preshed-3.0.2/fabfile.py000066400000000000000000000031421354236332100151120ustar00rootroot00000000000000from fabric.api import local, run, lcd, cd, env import os from os import path from os.path import exists as file_exists from fabtools.python import virtualenv PWD = path.dirname(__file__) VENV_DIR = path.join(PWD, '.env') DEV_ENV_DIR = path.join(PWD, '.denv') def dev(): # Allow this to persist, since we aren't as rigorous about keeping state clean if not file_exists('.denv'): local('virtualenv .denv') with virtualenv(DEV_ENV_DIR): local('pip install -r requirements.txt') def sdist(): if file_exists('dist/'): local('rm -rf dist/') local('mkdir dist') with virtualenv(VENV_DIR): local('python setup.py sdist') def publish(): with virtualenv(VENV_DIR): local('python setup.py register') local('twine upload dist/*.tar.gz') def setup(): if file_exists('.env'): local('rm -rf .env') local('rm -rf *.egg') local('virtualenv .env') def install(): with virtualenv(VENV_DIR): local('pip install --upgrade setuptools') local('pip install dist/*.tar.gz') local('pip install pytest') def make(): with virtualenv(DEV_ENV_DIR): with lcd(path.dirname(__file__)): local('python setup.py build') def clean(): with lcd(os.path.dirname(__file__)): local('python setup.py clean --all') with virtualenv(DEV_ENV_DIR): with lcd(os.path.dirname(__file__)): local('python setup.py clean --all') def test(): with virtualenv(VENV_DIR): local('python -m pytest -x') def travis(): local('open https://travis-ci.org/spacy-io/preshed') preshed-3.0.2/include/000077500000000000000000000000001354236332100145735ustar00rootroot00000000000000preshed-3.0.2/include/msvc9/000077500000000000000000000000001354236332100156345ustar00rootroot00000000000000preshed-3.0.2/include/msvc9/stdint.h000066400000000000000000000176451354236332100173270ustar00rootroot00000000000000// ISO C9x compliant stdint.h for Microsoft Visual Studio // Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 // // Copyright (c) 2006-2013 Alexander Chemeris // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // 1. Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the product nor the names of its contributors may // be used to endorse or promote products derived from this software // without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // /////////////////////////////////////////////////////////////////////////////// #ifndef _MSC_VER // [ #error "Use this header only with Microsoft Visual C++ compilers!" #endif // _MSC_VER ] #ifndef _MSC_STDINT_H_ // [ #define _MSC_STDINT_H_ #if _MSC_VER > 1000 #pragma once #endif #if _MSC_VER >= 1600 // [ #include #else // ] _MSC_VER >= 1600 [ #include // For Visual Studio 6 in C++ mode and for many Visual Studio versions when // compiling for ARM we should wrap include with 'extern "C++" {}' // or compiler give many errors like this: // error C2733: second C linkage of overloaded function 'wmemchr' not allowed #ifdef __cplusplus extern "C" { #endif # include #ifdef __cplusplus } #endif // Define _W64 macros to mark types changing their size, like intptr_t. #ifndef _W64 # if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 # define _W64 __w64 # else # define _W64 # endif #endif // 7.18.1 Integer types // 7.18.1.1 Exact-width integer types // Visual Studio 6 and Embedded Visual C++ 4 doesn't // realize that, e.g. char has the same size as __int8 // so we give up on __intX for them. #if (_MSC_VER < 1300) typedef signed char int8_t; typedef signed short int16_t; typedef signed int int32_t; typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; #else typedef signed __int8 int8_t; typedef signed __int16 int16_t; typedef signed __int32 int32_t; typedef unsigned __int8 uint8_t; typedef unsigned __int16 uint16_t; typedef unsigned __int32 uint32_t; #endif typedef signed __int64 int64_t; typedef unsigned __int64 uint64_t; // 7.18.1.2 Minimum-width integer types typedef int8_t int_least8_t; typedef int16_t int_least16_t; typedef int32_t int_least32_t; typedef int64_t int_least64_t; typedef uint8_t uint_least8_t; typedef uint16_t uint_least16_t; typedef uint32_t uint_least32_t; typedef uint64_t uint_least64_t; // 7.18.1.3 Fastest minimum-width integer types typedef int8_t int_fast8_t; typedef int16_t int_fast16_t; typedef int32_t int_fast32_t; typedef int64_t int_fast64_t; typedef uint8_t uint_fast8_t; typedef uint16_t uint_fast16_t; typedef uint32_t uint_fast32_t; typedef uint64_t uint_fast64_t; // 7.18.1.4 Integer types capable of holding object pointers #ifdef _WIN64 // [ typedef signed __int64 intptr_t; typedef unsigned __int64 uintptr_t; #else // _WIN64 ][ typedef _W64 signed int intptr_t; typedef _W64 unsigned int uintptr_t; #endif // _WIN64 ] // 7.18.1.5 Greatest-width integer types typedef int64_t intmax_t; typedef uint64_t uintmax_t; // 7.18.2 Limits of specified-width integer types #if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 // 7.18.2.1 Limits of exact-width integer types #define INT8_MIN ((int8_t)_I8_MIN) #define INT8_MAX _I8_MAX #define INT16_MIN ((int16_t)_I16_MIN) #define INT16_MAX _I16_MAX #define INT32_MIN ((int32_t)_I32_MIN) #define INT32_MAX _I32_MAX #define INT64_MIN ((int64_t)_I64_MIN) #define INT64_MAX _I64_MAX #define UINT8_MAX _UI8_MAX #define UINT16_MAX _UI16_MAX #define UINT32_MAX _UI32_MAX #define UINT64_MAX _UI64_MAX // 7.18.2.2 Limits of minimum-width integer types #define INT_LEAST8_MIN INT8_MIN #define INT_LEAST8_MAX INT8_MAX #define INT_LEAST16_MIN INT16_MIN #define INT_LEAST16_MAX INT16_MAX #define INT_LEAST32_MIN INT32_MIN #define INT_LEAST32_MAX INT32_MAX #define INT_LEAST64_MIN INT64_MIN #define INT_LEAST64_MAX INT64_MAX #define UINT_LEAST8_MAX UINT8_MAX #define UINT_LEAST16_MAX UINT16_MAX #define UINT_LEAST32_MAX UINT32_MAX #define UINT_LEAST64_MAX UINT64_MAX // 7.18.2.3 Limits of fastest minimum-width integer types #define INT_FAST8_MIN INT8_MIN #define INT_FAST8_MAX INT8_MAX #define INT_FAST16_MIN INT16_MIN #define INT_FAST16_MAX INT16_MAX #define INT_FAST32_MIN INT32_MIN #define INT_FAST32_MAX INT32_MAX #define INT_FAST64_MIN INT64_MIN #define INT_FAST64_MAX INT64_MAX #define UINT_FAST8_MAX UINT8_MAX #define UINT_FAST16_MAX UINT16_MAX #define UINT_FAST32_MAX UINT32_MAX #define UINT_FAST64_MAX UINT64_MAX // 7.18.2.4 Limits of integer types capable of holding object pointers #ifdef _WIN64 // [ # define INTPTR_MIN INT64_MIN # define INTPTR_MAX INT64_MAX # define UINTPTR_MAX UINT64_MAX #else // _WIN64 ][ # define INTPTR_MIN INT32_MIN # define INTPTR_MAX INT32_MAX # define UINTPTR_MAX UINT32_MAX #endif // _WIN64 ] // 7.18.2.5 Limits of greatest-width integer types #define INTMAX_MIN INT64_MIN #define INTMAX_MAX INT64_MAX #define UINTMAX_MAX UINT64_MAX // 7.18.3 Limits of other integer types #ifdef _WIN64 // [ # define PTRDIFF_MIN _I64_MIN # define PTRDIFF_MAX _I64_MAX #else // _WIN64 ][ # define PTRDIFF_MIN _I32_MIN # define PTRDIFF_MAX _I32_MAX #endif // _WIN64 ] #define SIG_ATOMIC_MIN INT_MIN #define SIG_ATOMIC_MAX INT_MAX #ifndef SIZE_MAX // [ # ifdef _WIN64 // [ # define SIZE_MAX _UI64_MAX # else // _WIN64 ][ # define SIZE_MAX _UI32_MAX # endif // _WIN64 ] #endif // SIZE_MAX ] // WCHAR_MIN and WCHAR_MAX are also defined in #ifndef WCHAR_MIN // [ # define WCHAR_MIN 0 #endif // WCHAR_MIN ] #ifndef WCHAR_MAX // [ # define WCHAR_MAX _UI16_MAX #endif // WCHAR_MAX ] #define WINT_MIN 0 #define WINT_MAX _UI16_MAX #endif // __STDC_LIMIT_MACROS ] // 7.18.4 Limits of other integer types #if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 // 7.18.4.1 Macros for minimum-width integer constants #define INT8_C(val) val##i8 #define INT16_C(val) val##i16 #define INT32_C(val) val##i32 #define INT64_C(val) val##i64 #define UINT8_C(val) val##ui8 #define UINT16_C(val) val##ui16 #define UINT32_C(val) val##ui32 #define UINT64_C(val) val##ui64 // 7.18.4.2 Macros for greatest-width integer constants // These #ifndef's are needed to prevent collisions with . // Check out Issue 9 for the details. #ifndef INTMAX_C // [ # define INTMAX_C INT64_C #endif // INTMAX_C ] #ifndef UINTMAX_C // [ # define UINTMAX_C UINT64_C #endif // UINTMAX_C ] #endif // __STDC_CONSTANT_MACROS ] #endif // _MSC_VER >= 1600 ] #endif // _MSC_STDINT_H_ ] preshed-3.0.2/preshed/000077500000000000000000000000001354236332100146025ustar00rootroot00000000000000preshed-3.0.2/preshed/__init__.pxd000066400000000000000000000000001354236332100170440ustar00rootroot00000000000000preshed-3.0.2/preshed/__init__.py000066400000000000000000000000251354236332100167100ustar00rootroot00000000000000from .about import * preshed-3.0.2/preshed/about.py000066400000000000000000000004121354236332100162630ustar00rootroot00000000000000__title__ = "preshed" __version__ = "3.0.2" __summary__ = "Cython hash table that trusts the keys are pre-hashed" __uri__ = "https://github.com/explosion/preshed" __author__ = "Matthew Honnibal" __email__ = "matt@explosion.ai" __license__ = "MIT" __release__ = True preshed-3.0.2/preshed/bloom.pxd000066400000000000000000000012211354236332100164230ustar00rootroot00000000000000from libc.stdint cimport uint64_t, uint32_t from cymem.cymem cimport Pool ctypedef uint64_t key_t cdef struct BloomStruct: key_t* bitfield key_t hcount # hash count, number of hash functions key_t length uint32_t seed cdef class BloomFilter: cdef Pool mem cdef BloomStruct* c_bloom cdef inline bint contains(self, key_t item) nogil cdef void bloom_init(Pool mem, BloomStruct* bloom, key_t hcount, key_t length, uint32_t seed) except * cdef void bloom_add(BloomStruct* bloom, key_t item) nogil cdef bint bloom_contains(const BloomStruct* bloom, key_t item) nogil cdef void bloom_add(BloomStruct* bloom, key_t item) nogil preshed-3.0.2/preshed/bloom.pyx000066400000000000000000000106611354236332100164600ustar00rootroot00000000000000# cython: infer_types=True # cython: cdivision=True # from murmurhash.mrmr cimport hash128_x86 import math from array import array try: import copy_reg except ImportError: import copyreg as copy_reg def calculate_size_and_hash_count(members, error_rate): """Calculate the optimal size in bits and number of hash functions for a given number of members and error rate. """ base = math.log(1 / (2 ** math.log(2))) bit_count = math.ceil((members * math.log(error_rate)) / base) hash_count = math.floor((bit_count / members) * math.log(2)) return (bit_count, hash_count) cdef class BloomFilter: """Bloom filter that allows for basic membership tests. Only integers are supported as keys. """ def __init__(self, key_t size=(2 ** 10), key_t hash_funcs=23, uint32_t seed=0): self.mem = Pool() self.c_bloom = self.mem.alloc(1, sizeof(BloomStruct)) bloom_init(self.mem, self.c_bloom, hash_funcs, size, seed) @classmethod def from_error_rate(cls, members, error_rate=1E-4): params = calculate_size_and_hash_count(members, error_rate) return cls(*params) def add(self, key_t item): bloom_add(self.c_bloom, item) def __contains__(self, item): return bloom_contains(self.c_bloom, item) cdef inline bint contains(self, key_t item) nogil: return bloom_contains(self.c_bloom, item) def to_bytes(self): return bloom_to_bytes(self.c_bloom) def from_bytes(self, bytes byte_string): bloom_from_bytes(self.mem, self.c_bloom, byte_string) return self cdef bytes bloom_to_bytes(const BloomStruct* bloom): py = array("L") py.append(bloom.hcount) py.append(bloom.length) py.append(bloom.seed) for i in range(bloom.length // sizeof(key_t)): py.append(bloom.bitfield[i]) if hasattr(py, "tobytes"): return py.tobytes() else: # Python 2 :( return py.tostring() cdef void bloom_from_bytes(Pool mem, BloomStruct* bloom, bytes data): py = array("L") if hasattr(py, "frombytes"): py.frombytes(data) else: py.fromstring(data) bloom.hcount = py[0] bloom.length = py[1] bloom.seed = py[2] bloom.bitfield = mem.alloc(bloom.length // sizeof(key_t), sizeof(key_t)) for i in range(bloom.length // sizeof(key_t)): bloom.bitfield[i] = py[3+i] cdef void bloom_init(Pool mem, BloomStruct* bloom, key_t hcount, key_t length, uint32_t seed) except *: # size should be a multiple of the container size - round up if length % sizeof(key_t): length = math.ceil(length / sizeof(key_t)) * sizeof(key_t) bloom.length = length bloom.hcount = hcount bloom.bitfield = mem.alloc(length // sizeof(key_t), sizeof(key_t)) bloom.seed = seed # Instead of calling MurmurHash with a different seed for each hash function, this # generates two initial hash values and then combines them to create the correct # number of hashes. This technique is faster than just doing MurmurhHash # repeatedly and has been shown to work as well as full hashing. # For details see "Less Hashing, Same Performance: Building a Better Bloom # Filter", Kirsch & Mitzenmacher. # https://www.semanticscholar.org/paper/Less-hashing%2C-same-performance%3A-Building-a-better-Kirsch-Mitzenmacher/65c43afbfc064705bdc40d3473f32518e9306429 # The choice of seeds is arbitrary. cdef void bloom_add(BloomStruct* bloom, key_t item) nogil: cdef key_t hv cdef key_t[2] keys cdef key_t one = 1 # We want this explicitly typed, because bits hash128_x86(&item, sizeof(key_t), 0, &keys) for hiter in range(bloom.hcount): hv = (keys[0] + (hiter * keys[1])) % bloom.length bloom.bitfield[hv // sizeof(key_t)] |= one << (hv % sizeof(key_t)) cdef bint bloom_contains(const BloomStruct* bloom, key_t item) nogil: cdef key_t hv cdef key_t[2] keys cdef key_t one = 1 # We want this explicitly typed, because bits hash128_x86(&item, sizeof(key_t), 0, &keys) for hiter in range(bloom.hcount): hv = (keys[0] + (hiter * keys[1])) % bloom.length if not (bloom.bitfield[hv // sizeof(key_t)] & one << (hv % sizeof(key_t))): return False return True def pickle_bloom(BloomFilter bloom): return unpickle_bloom, (bloom.to_bytes(),) def unpickle_bloom(byte_string): return BloomFilter().from_bytes(byte_string) copy_reg.pickle(BloomFilter, pickle_bloom, unpickle_bloom) preshed-3.0.2/preshed/counter.pxd000066400000000000000000000006141354236332100167770ustar00rootroot00000000000000from libc.stdint cimport int64_t from cymem.cymem cimport Pool from .maps cimport MapStruct from .maps cimport map_init, map_get, map_set, map_iter from .maps cimport key_t ctypedef int64_t count_t cdef class PreshCounter: cdef Pool mem cdef MapStruct* c_map cdef public object smoother cdef readonly count_t total cpdef int inc(self, key_t key, count_t inc) except -1 preshed-3.0.2/preshed/counter.pyx000066400000000000000000000137441354236332100170340ustar00rootroot00000000000000"""Count occurrences of uint64-valued keys.""" from __future__ import division cimport cython from libc.math cimport log, exp, sqrt cdef class PreshCounter: def __init__(self, initial_size=8): assert initial_size != 0 assert initial_size & (initial_size - 1) == 0 self.mem = Pool() self.c_map = self.mem.alloc(1, sizeof(MapStruct)) map_init(self.mem, self.c_map, initial_size) self.smoother = None self.total = 0 property length: def __get__(self): return self.c_map.length def __len__(self): return self.c_map.length def __iter__(self): cdef int i = 0 cdef key_t key cdef void* value while map_iter(self.c_map, &i, &key, &value): yield key, value def __getitem__(self, key_t key): return map_get(self.c_map, key) cpdef int inc(self, key_t key, count_t inc) except -1: cdef count_t c = map_get(self.c_map, key) c += inc map_set(self.mem, self.c_map, key, c) self.total += inc return c def prob(self, key_t key): cdef GaleSmoother smoother cdef void* value = map_get(self.c_map, key) if self.smoother is not None: smoother = self.smoother r_star = self.smoother(value) return r_star / self.smoother.total elif value == NULL: return 0 else: return value / self.total def smooth(self): self.smoother = GaleSmoother(self) cdef class GaleSmoother: cdef Pool mem cdef count_t* Nr cdef double gradient cdef double intercept cdef readonly count_t cutoff cdef count_t Nr0 cdef readonly double total def __init__(self, PreshCounter counts): count_counts = PreshCounter() cdef double total = 0 for _, count in counts: count_counts.inc(count, 1) total += count # If we have no items seen 1 or 2 times, this doesn't work. But, this # won't be true in real data... assert count_counts[1] != 0 and count_counts[2] != 0, "Cannot smooth your weird data" # Extrapolate Nr0 from Nr1 and Nr2. self.Nr0 = count_counts[1] + (count_counts[1] - count_counts[2]) self.mem = Pool() cdef double[2] mb cdef int n_counts = 0 for _ in count_counts: n_counts += 1 sorted_r = count_counts.mem.alloc(n_counts, sizeof(count_t)) self.Nr = self.mem.alloc(n_counts, sizeof(count_t)) for i, (count, count_count) in enumerate(sorted(count_counts)): sorted_r[i] = count self.Nr[i] = count_count _fit_loglinear_model(mb, sorted_r, self.Nr, n_counts) self.cutoff = _find_when_to_switch(sorted_r, self.Nr, mb[0], mb[1], n_counts) self.gradient = mb[0] self.intercept = mb[1] self.total = self(0) * self.Nr0 for count, count_count in count_counts: self.total += self(count) * count_count def __call__(self, count_t r): if r == 0: return self.Nr[1] / self.Nr0 elif r < self.cutoff: return turing_estimate_of_r(r, self.Nr[r-1], self.Nr[r]) else: return gale_estimate_of_r(r, self.gradient, self.intercept) def count_count(self, count_t r): if r == 0: return self.Nr0 else: return self.Nr[r-1] @cython.cdivision(True) cdef double turing_estimate_of_r(double r, double Nr, double Nr1) except -1: return ((r + 1) * Nr1) / Nr @cython.cdivision(True) cdef double gale_estimate_of_r(double r, double gradient, double intercept) except -1: cdef double e_nr = exp(gradient * log(r) + intercept) cdef double e_nr1 = exp(gradient * log(r+1) + intercept) return (r + 1) * (e_nr1 / e_nr) @cython.cdivision(True) cdef void _fit_loglinear_model(double* output, count_t* sorted_r, count_t* Nr, int length) except *: cdef double x_mean = 0.0 cdef double y_mean = 0.0 cdef Pool mem = Pool() x = mem.alloc(length, sizeof(double)) y = mem.alloc(length, sizeof(double)) cdef int i for i in range(length): r = sorted_r[i] x[i] = log(r) y[i] = log(_get_zr(i, sorted_r, Nr[i], length)) x_mean += x[i] y_mean += y[i] x_mean /= length y_mean /= length cdef double ss_xy = 0.0 cdef double ss_xx = 0.0 for i in range(length): x_dist = x[i] - x_mean y_dist = y[i] - y_mean # SS_xy = sum the product of the distances from the mean ss_xy += x_dist * y_dist # SS_xx = sum the squares of the x distance ss_xx += x_dist * x_dist # Gradient output[0] = ss_xy / ss_xx # Intercept output[1] = y_mean - output[0] * x_mean @cython.cdivision(True) cdef double _get_zr(int j, count_t* sorted_r, count_t Nr_j, int n_counts) except -1: cdef double r_i = sorted_r[j-1] if j >= 1 else 0 cdef double r_j = sorted_r[j] cdef double r_k = sorted_r[j+1] if (j+1) < n_counts else (2 * r_i - 1) return 2 * Nr_j / (r_k - r_i) @cython.cdivision(True) cdef double _variance(double r, double Nr, double Nr1) nogil: return 1.96 * sqrt((r+1)**2 * (Nr1 / Nr**2) * (1.0 + (Nr1 / Nr))) @cython.cdivision(True) cdef count_t _find_when_to_switch(count_t* sorted_r, count_t* Nr, double m, double b, int length) except -1: cdef int i cdef count_t r for i in range(length-1): r = sorted_r[i] if sorted_r[i+1] != r+1: return r g_r = gale_estimate_of_r(r, m, b) t_r = turing_estimate_of_r(r, Nr[i], Nr[i+1]) if abs(t_r - g_r) <= _variance(r, Nr[i], Nr[i+1]): return r else: return length - 1 preshed-3.0.2/preshed/maps.pxd000066400000000000000000000024661354236332100162670ustar00rootroot00000000000000from libc.stdint cimport uint64_t from cymem.cymem cimport Pool ctypedef uint64_t key_t cdef struct Cell: key_t key void* value cdef struct Result: int found void* value cdef struct MapStruct: Cell* cells void* value_for_empty_key void* value_for_del_key key_t length key_t filled bint is_empty_key_set bint is_del_key_set cdef void* map_bulk_get(const MapStruct* map_, const key_t* keys, void** values, int n) nogil cdef Result map_get_unless_missing(const MapStruct* map_, const key_t key) nogil cdef void* map_get(const MapStruct* map_, const key_t key) nogil cdef void map_set(Pool mem, MapStruct* map_, key_t key, void* value) except * cdef void map_init(Pool mem, MapStruct* pmap, size_t length) except * cdef bint map_iter(const MapStruct* map_, int* i, key_t* key, void** value) nogil cdef void* map_clear(MapStruct* map_, const key_t key) nogil cdef class PreshMap: cdef MapStruct* c_map cdef Pool mem cdef inline void* get(self, key_t key) nogil cdef void set(self, key_t key, void* value) except * cdef class PreshMapArray: cdef Pool mem cdef MapStruct* maps cdef size_t length cdef inline void* get(self, size_t i, key_t key) nogil cdef void set(self, size_t i, key_t key, void* value) except * preshed-3.0.2/preshed/maps.pyx000066400000000000000000000210231354236332100163020ustar00rootroot00000000000000# cython: infer_types=True # cython: cdivision=True # cimport cython DEF EMPTY_KEY = 0 DEF DELETED_KEY = 1 cdef class PreshMap: """Hash map that assumes keys come pre-hashed. Maps uint64_t --> uint64_t. Uses open addressing with linear probing. Usage map = PreshMap() # Create a table map = PreshMap(initial_size=1024) # Create with initial size (efficiency) map[key] = value # Set a value to a key value = map[key] # Get a value given a key for key, value in map.items(): # Iterate over items len(map) # Get number of inserted keys """ def __init__(self, size_t initial_size=8): # Size must be power of two if initial_size == 0: initial_size = 8 if initial_size & (initial_size - 1) != 0: power = 1 while power < initial_size: power *= 2 initial_size = power self.mem = Pool() self.c_map = self.mem.alloc(1, sizeof(MapStruct)) map_init(self.mem, self.c_map, initial_size) property capacity: def __get__(self): return self.c_map.length def items(self): cdef key_t key cdef void* value cdef int i = 0 while map_iter(self.c_map, &i, &key, &value): yield key, value def keys(self): for key, _ in self.items(): yield key def values(self): for _, value in self.items(): yield value def pop(self, key_t key, default=None): cdef Result result = map_get_unless_missing(self.c_map, key) map_clear(self.c_map, key) if result.found: return result.value else: return default def __getitem__(self, key_t key): cdef Result result = map_get_unless_missing(self.c_map, key) if result.found: return result.value else: return None def __setitem__(self, key_t key, size_t value): map_set(self.mem, self.c_map, key, value) def __delitem__(self, key_t key): map_clear(self.c_map, key) def __len__(self): return self.c_map.filled def __contains__(self, key_t key): cdef Result result = map_get_unless_missing(self.c_map, key) return True if result.found else False def __iter__(self): for key in self.keys(): yield key cdef inline void* get(self, key_t key) nogil: return map_get(self.c_map, key) cdef void set(self, key_t key, void* value) except *: map_set(self.mem, self.c_map, key, value) cdef class PreshMapArray: """An array of hash tables that assume keys come pre-hashed. Each table uses open addressing with linear probing. """ def __init__(self, size_t length, size_t initial_size=8): self.mem = Pool() self.length = length self.maps = self.mem.alloc(length, sizeof(MapStruct)) for i in range(length): map_init(self.mem, &self.maps[i], initial_size) cdef inline void* get(self, size_t i, key_t key) nogil: return map_get(&self.maps[i], key) cdef void set(self, size_t i, key_t key, void* value) except *: map_set(self.mem, &self.maps[i], key, value) cdef void map_init(Pool mem, MapStruct* map_, size_t length) except *: map_.length = length map_.filled = 0 map_.cells = mem.alloc(length, sizeof(Cell)) cdef void map_set(Pool mem, MapStruct* map_, key_t key, void* value) except *: cdef Cell* cell if key == EMPTY_KEY: map_.value_for_empty_key = value map_.is_empty_key_set = True elif key == DELETED_KEY: map_.value_for_del_key = value map_.is_del_key_set = True else: cell = _find_cell_for_insertion(map_.cells, map_.length, key) if cell.key == EMPTY_KEY: map_.filled += 1 cell.key = key cell.value = value if (map_.filled + 1) * 5 >= (map_.length * 3): _resize(mem, map_) cdef void* map_get(const MapStruct* map_, const key_t key) nogil: if key == EMPTY_KEY: return map_.value_for_empty_key elif key == DELETED_KEY: return map_.value_for_del_key cdef Cell* cell = _find_cell(map_.cells, map_.length, key) return cell.value cdef Result map_get_unless_missing(const MapStruct* map_, const key_t key) nogil: cdef Result result cdef Cell* cell result.found = 0 result.value = NULL if key == EMPTY_KEY: if map_.is_empty_key_set: result.found = 1 result.value = map_.value_for_empty_key elif key == DELETED_KEY: if map_.is_del_key_set: result.found = 1 result.value = map_.value_for_del_key else: cell = _find_cell(map_.cells, map_.length, key) if cell.key == key: result.found = 1 result.value = cell.value return result cdef void* map_clear(MapStruct* map_, const key_t key) nogil: if key == EMPTY_KEY: value = map_.value_for_empty_key if map_.is_empty_key_set else NULL map_.is_empty_key_set = False return value elif key == DELETED_KEY: value = map_.value_for_del_key if map_.is_del_key_set else NULL map_.is_del_key_set = False return value else: cell = _find_cell(map_.cells, map_.length, key) cell.key = DELETED_KEY # We shouldn't decrement the "filled" value here, as we're not actually # making "empty" values -- deleted values aren't quite the same. # Instead if we manage to insert into a deleted slot, we don't increment # the fill rate. return cell.value cdef void* map_bulk_get(const MapStruct* map_, const key_t* keys, void** values, int n) nogil: cdef int i for i in range(n): values[i] = map_get(map_, keys[i]) cdef bint map_iter(const MapStruct* map_, int* i, key_t* key, void** value) nogil: '''Iterate over the filled items, setting the current place in i, and the key and value. Return False when iteration finishes. ''' cdef const Cell* cell while i[0] < map_.length: cell = &map_.cells[i[0]] i[0] += 1 if cell[0].key != EMPTY_KEY and cell[0].key != DELETED_KEY: key[0] = cell[0].key value[0] = cell[0].value return True # Remember to check for cells keyed by the special empty and deleted keys if i[0] == map_.length: i[0] += 1 if map_.is_empty_key_set: key[0] = EMPTY_KEY value[0] = map_.value_for_empty_key return True if i[0] == map_.length + 1: i[0] += 1 if map_.is_del_key_set: key[0] = DELETED_KEY value[0] = map_.value_for_del_key return True return False @cython.cdivision cdef inline Cell* _find_cell(Cell* cells, const key_t size, const key_t key) nogil: # Modulo for powers-of-two via bitwise & cdef key_t i = (key & (size - 1)) while cells[i].key != EMPTY_KEY and cells[i].key != key: i = (i + 1) & (size - 1) return &cells[i] @cython.cdivision cdef inline Cell* _find_cell_for_insertion(Cell* cells, const key_t size, const key_t key) nogil: """Find the correct cell to insert a value, which could be a previously deleted cell. If we cross a deleted cell and the key is in the table, we mark the later cell as deleted, and return the earlier one.""" cdef Cell* deleted = NULL # Modulo for powers-of-two via bitwise & cdef key_t i = (key & (size - 1)) while cells[i].key != EMPTY_KEY and cells[i].key != key: if cells[i].key == DELETED_KEY: deleted = &cells[i] i = (i + 1) & (size - 1) if deleted is not NULL: if cells[i].key == key: # We need to ensure we don't end up with the key in the table twice. # If we're using a deleted cell and we also have the key, we mark # the later cell as deleted. cells[i].key = DELETED_KEY return deleted return &cells[i] cdef void _resize(Pool mem, MapStruct* map_) except *: cdef size_t new_size = map_.length * 2 cdef Cell* old_cells = map_.cells cdef size_t old_size = map_.length map_.length = new_size map_.filled = 0 map_.cells = mem.alloc(new_size, sizeof(Cell)) cdef size_t i cdef size_t slot for i in range(old_size): if old_cells[i].key != EMPTY_KEY and old_cells[i].key != DELETED_KEY: map_set(mem, map_, old_cells[i].key, old_cells[i].value) mem.free(old_cells) preshed-3.0.2/preshed/tests/000077500000000000000000000000001354236332100157445ustar00rootroot00000000000000preshed-3.0.2/preshed/tests/__init__.py000066400000000000000000000000001354236332100200430ustar00rootroot00000000000000preshed-3.0.2/preshed/tests/test_bloom.py000066400000000000000000000023501354236332100204650ustar00rootroot00000000000000from __future__ import division import pytest import pickle from preshed.bloom import BloomFilter def test_contains(): bf = BloomFilter() assert 23 not in bf bf.add(23) assert 23 in bf bf.add(5) bf.add(42) bf.add(1002) assert 5 in bf assert 42 in bf assert 1002 in bf def test_no_false_negatives(): bf = BloomFilter(size=100, hash_funcs=2) for ii in range(0,1000,20): bf.add(ii) for ii in range(0,1000,20): assert ii in bf def test_from_error(): bf = BloomFilter.from_error_rate(1000) for ii in range(0,1000,20): bf.add(ii) for ii in range(0,1000,20): assert ii in bf def test_to_from_bytes(): bf = BloomFilter(size=100, hash_funcs=2) for ii in range(0,1000,20): bf.add(ii) data = bf.to_bytes() bf2 = BloomFilter() for ii in range(0,1000,20): assert ii not in bf2 bf2.from_bytes(data) for ii in range(0,1000,20): assert ii in bf2 assert bf2.to_bytes() == data def test_bloom_pickle(): bf = BloomFilter(size=100, hash_funcs=2) for ii in range(0,1000,20): bf.add(ii) data = pickle.dumps(bf) bf2 = pickle.loads(data) for ii in range(0,1000,20): assert ii in bf2 preshed-3.0.2/preshed/tests/test_counter.py000066400000000000000000000037201354236332100210360ustar00rootroot00000000000000from __future__ import division import pytest from preshed.counter import PreshCounter def test_count(): counter = PreshCounter() assert counter[12] == 0 counter.inc(12, 1) assert counter[12] == 1 counter.inc(14, 10) counter.inc(9, 10) counter.inc(12, 4) assert counter[12] == 5 assert counter[14] == 10 assert counter[9] == 10 def test_unsmooth_prob(): counter = PreshCounter() assert counter.prob(12) == 0.0 counter.inc(12, 1) assert counter.prob(12) == 1.0 counter.inc(14, 10) assert counter.prob(14) == 10 / 11 assert counter.prob(12) == 1.0 / 11 def test_smooth_prob(): p = PreshCounter() # 1 10 # 2 6 # 3 4 # 5 2 # 8 1 for i in range(10): p.inc(100-i, 1) # 10 items of freq 1 for i in range(6): p.inc(90 - i, 2) # 6 items of freq 2 for i in range(4): p.inc(80 - i, 3) # 4 items of freq 3 for i in range(2): p.inc(70 - i, 5) # 2 items of freq 5 for i in range(1): p.inc(60 - i, 8) # 1 item of freq 8 assert p.total == (10 * 1) + (6 * 2) + (4 * 3) + (2 * 5) + (1 * 8) assert p.prob(100) == 1.0 / p.total assert p.prob(200) == 0.0 assert p.prob(60) == 8.0 / p.total p.smooth() assert p.smoother(1) < 1.0 assert p.smoother(8) < 8.0 assert p.prob(1000) < p.prob(100) for event, count in reversed(sorted(p, key=lambda it: it[1])): assert p.smoother(count) < count import os def test_large_freqs(): if 'TEST_FILE_LOC' in os.environ: loc = os.environ['TEST_FILE_LOC'] else: return None counts = PreshCounter() for i, line in enumerate(open(loc)): line = line.strip() if not line: continue freq = int(line.split()[0]) counts.inc(i+1, freq) oov = i+2 assert counts.prob(oov) == 0.0 assert counts.prob(1) < 0.1 counts.smooth() assert counts.prob(oov) > 0 assert counts.prob(oov) < counts.prob(i) preshed-3.0.2/preshed/tests/test_hashing.py000066400000000000000000000025741354236332100210060ustar00rootroot00000000000000import pytest from preshed.maps import PreshMap import random def test_insert(): h = PreshMap() assert h[1] is None h[1] = 5 assert h[1] == 5 h[2] = 6 assert h[1] == 5 assert h[2] == 6 def test_resize(): h = PreshMap(4) h[4] = 12 for i in range(10, 100): value = int(i * (random.random() + 1)) h[i] = value assert h[4] == 12 def test_zero_key(): h = PreshMap() h[0] = 6 h[5] = 12 assert h[0] == 6 assert h[5] == 12 for i in range(500, 1000): h[i] = i * random.random() assert h[0] == 6 assert h[5] == 12 def test_iter(): key_sum = 0 val_sum = 0 h = PreshMap() for i in range(56, 24, -3): h[i] = i * 2 key_sum += i val_sum += i * 2 for key, value in h.items(): key_sum -= key val_sum -= value assert key_sum == 0 assert val_sum == 0 def test_one_and_empty(): # See Issue #21 table = PreshMap() for i in range(100, 110): table[i] = i del table[i] assert table[0] == None def test_many_and_empty(): # See Issue #21 table = PreshMap() for i in range(100, 110): table[i] = i for i in range(100, 110): del table[i] assert table[0] == None def test_zero_values(): table = PreshMap() table[10] = 0 assert table[10] == 0 assert table[11] is None preshed-3.0.2/preshed/tests/test_pop.py000066400000000000000000000003171354236332100201540ustar00rootroot00000000000000from ..maps import PreshMap def test_pop1(): table = PreshMap() table[10] = 20 table[30] = 25 assert table[10] == 20 assert table[30] == 25 table.pop(30) assert table[10] == 20 preshed-3.0.2/requirements.txt000066400000000000000000000001021354236332100164250ustar00rootroot00000000000000cymem>=2.0.2,<2.1.0 cython>=0.28 pytest murmurhash>=0.28.0,<1.1.0 preshed-3.0.2/setup.py000077500000000000000000000113621354236332100146700ustar00rootroot00000000000000#!/usr/bin/env python from __future__ import print_function import os import subprocess import sys import contextlib from distutils.command.build_ext import build_ext from distutils.sysconfig import get_python_inc from distutils import ccompiler, msvccompiler try: from setuptools import Extension, setup except ImportError: from distutils.core import Extension, setup PACKAGES = ["preshed", "preshed.tests"] MOD_NAMES = ["preshed.maps", "preshed.counter", "preshed.bloom"] # By subclassing build_extensions we have the actual compiler that will be used which is really known only after finalize_options # http://stackoverflow.com/questions/724664/python-distutils-how-to-get-a-compiler-that-is-going-to-be-used compile_options = { "msvc": ["/Ox", "/EHsc"], "other": ["-O3", "-Wno-strict-prototypes", "-Wno-unused-function"], } link_options = {"msvc": [], "other": []} class build_ext_options: def build_options(self): for e in self.extensions: e.extra_compile_args = compile_options.get( self.compiler.compiler_type, compile_options["other"] ) for e in self.extensions: e.extra_link_args = link_options.get( self.compiler.compiler_type, link_options["other"] ) class build_ext_subclass(build_ext, build_ext_options): def build_extensions(self): build_ext_options.build_options(self) build_ext.build_extensions(self) def generate_cython(root, source): print("Cythonizing sources") p = subprocess.call( [sys.executable, os.path.join(root, "bin", "cythonize.py"), source] ) if p != 0: raise RuntimeError("Running cythonize failed") def is_source_release(path): return os.path.exists(os.path.join(path, "PKG-INFO")) def clean(path): for name in MOD_NAMES: name = name.replace(".", "/") for ext in [".so", ".html", ".cpp", ".c"]: file_path = os.path.join(path, name + ext) if os.path.exists(file_path): os.unlink(file_path) @contextlib.contextmanager def chdir(new_dir): old_dir = os.getcwd() try: os.chdir(new_dir) sys.path.insert(0, new_dir) yield finally: del sys.path[0] os.chdir(old_dir) def setup_package(): root = os.path.abspath(os.path.dirname(__file__)) if len(sys.argv) > 1 and sys.argv[1] == "clean": return clean(root) with chdir(root): with open(os.path.join(root, "preshed", "about.py")) as f: about = {} exec(f.read(), about) with open(os.path.join(root, "README.md")) as f: readme = f.read() include_dirs = [get_python_inc(plat_specific=True)] if ( ccompiler.new_compiler().compiler_type == "msvc" and msvccompiler.get_build_version() == 9 ): include_dirs.append(os.path.join(root, "include", "msvc9")) ext_modules = [] for mod_name in MOD_NAMES: mod_path = mod_name.replace(".", "/") + ".cpp" ext_modules.append( Extension( mod_name, [mod_path], language="c++", include_dirs=include_dirs ) ) if not is_source_release(root): generate_cython(root, "preshed") setup( name="preshed", zip_safe=False, packages=PACKAGES, package_data={"": ["*.pyx", "*.pxd"]}, description=about["__summary__"], long_description=readme, long_description_content_type="text/markdown", author=about["__author__"], author_email=about["__email__"], version=about["__version__"], url=about["__uri__"], license=about["__license__"], ext_modules=ext_modules, setup_requires=[], install_requires=["cymem>=2.0.2,<2.1.0", "murmurhash>=0.28.0,<1.1.0"], classifiers=[ "Environment :: Console", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", "Operating System :: POSIX :: Linux", "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", "Programming Language :: Cython", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Topic :: Scientific/Engineering", ], cmdclass={"build_ext": build_ext_subclass}, ) if __name__ == "__main__": setup_package()