pax_global_header 0000666 0000000 0000000 00000000064 13542363321 0014514 g ustar 00root root 0000000 0000000 52 comment=58fd84aceb91ab664f51b3bd37a7d93c63a84599
preshed-3.0.2/ 0000775 0000000 0000000 00000000000 13542363321 0013150 5 ustar 00root root 0000000 0000000 preshed-3.0.2/.gitignore 0000664 0000000 0000000 00000000221 13542363321 0015133 0 ustar 00root root 0000000 0000000 *.egg
*.egg-info
preshed/.maps.pxd.swm
preshed/.maps.pyx.swl
*.sw[a-z]
*.so
*.pyc
*.swp
*.swo
*.html
*.c
*.cpp
.env/
.denv
MANIFEST
build/
dist/
preshed-3.0.2/LICENSE 0000664 0000000 0000000 00000002122 13542363321 0014152 0 ustar 00root root 0000000 0000000 The MIT License (MIT)
Copyright (c) 2016 ExplosionAI GmbH, 2014 Matthew Honnibal
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
preshed-3.0.2/MANIFEST.in 0000664 0000000 0000000 00000000100 13542363321 0014675 0 ustar 00root root 0000000 0000000 recursive-include include *.h
include LICENSE
include README.md
preshed-3.0.2/README.md 0000664 0000000 0000000 00000001767 13542363321 0014442 0 ustar 00root root 0000000 0000000
# preshed: Cython Hash Table for Pre-Hashed Keys
Simple but high performance Cython hash table mapping pre-randomized keys to `void*` values. Inspired by [Jeff Preshing](http://preshing.com/20130107/this-hash-table-is-faster-than-a-judy-array/).
[](https://dev.azure.com/explosion-ai/public/_build?definitionId=3)
[](https://pypi.python.org/pypi/preshed)
[](https://anaconda.org/conda-forge/preshed)
[](https://github.com/explosion/wheelwright/releases)
preshed-3.0.2/azure-pipelines.yml 0000664 0000000 0000000 00000003437 13542363321 0017016 0 ustar 00root root 0000000 0000000 trigger:
batch: true
branches:
include:
- '*'
jobs:
- job: 'Test'
strategy:
matrix:
Python27Linux:
imageName: 'ubuntu-16.04'
python.version: '2.7'
Python27Mac:
imageName: 'macos-10.13'
python.version: '2.7'
Python35Linux:
imageName: 'ubuntu-16.04'
python.version: '3.5'
Python35Windows:
imageName: 'vs2017-win2016'
python.version: '3.5'
Python35Mac:
imageName: 'macos-10.13'
python.version: '3.5'
Python36Linux:
imageName: 'ubuntu-16.04'
python.version: '3.6'
Python36Windows:
imageName: 'vs2017-win2016'
python.version: '3.6'
Python36Mac:
imageName: 'macos-10.13'
python.version: '3.6'
Python37Linux:
imageName: 'ubuntu-16.04'
python.version: '3.7'
Python37Windows:
imageName: 'vs2017-win2016'
python.version: '3.7'
Python37Mac:
imageName: 'macos-10.13'
python.version: '3.7'
maxParallel: 4
pool:
vmImage: $(imageName)
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
architecture: 'x64'
- script: |
python -m pip install --upgrade pip wheel
pip install -r requirements.txt
displayName: 'Install dependencies'
- script: |
python setup.py build_ext --inplace
python setup.py sdist
displayName: 'Build sdist'
- script: pip install dist/*.tar.gz
condition: in( variables['Agent.OS'], 'Linux', 'Darwin')
displayName: 'Install from sdist (Linux, Mac)'
- script: pip install -e .
condition: eq( variables['Agent.OS'], 'Windows_NT')
displayName: 'Install with pip (Windows)'
- script: |
python -m pytest preshed
displayName: 'Run tests'
preshed-3.0.2/bin/ 0000775 0000000 0000000 00000000000 13542363321 0013720 5 ustar 00root root 0000000 0000000 preshed-3.0.2/bin/cythonize.py 0000775 0000000 0000000 00000010462 13542363321 0016314 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
""" cythonize.py
Cythonize pyx files into C++ files as needed.
Usage: cythonize.py [root]
Checks pyx files to see if they have been changed relative to their
corresponding C++ files. If they have, then runs cython on these files to
recreate the C++ files.
Additionally, checks pxd files and setup.py if they have been changed. If
they have, rebuilds everything.
Change detection based on file hashes stored in JSON format.
For now, this script should be run by developers when changing Cython files
and the resulting C++ files checked in, so that end-users (and Python-only
developers) do not get the Cython dependencies.
Based upon:
https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py
https://raw.githubusercontent.com/numpy/numpy/master/tools/cythonize.py
Note: this script does not check any of the dependent C++ libraries.
"""
from __future__ import print_function
import os
import sys
import json
import hashlib
import subprocess
import argparse
HASH_FILE = 'cythonize.json'
def process_pyx(fromfile, tofile):
print('Processing %s' % fromfile)
try:
from Cython.Compiler.Version import version as cython_version
from distutils.version import LooseVersion
if LooseVersion(cython_version) < LooseVersion('0.19'):
raise Exception('Require Cython >= 0.19')
except ImportError:
pass
flags = ['--fast-fail']
if tofile.endswith('.cpp'):
flags += ['--cplus']
try:
try:
r = subprocess.call(['cython'] + flags + ['-o', tofile, fromfile])
if r != 0:
raise Exception('Cython failed')
except OSError:
# There are ways of installing Cython that don't result in a cython
# executable on the path, see gh-2397.
r = subprocess.call([sys.executable, '-c',
'import sys; from Cython.Compiler.Main import '
'setuptools_main as main; sys.exit(main())'] + flags +
['-o', tofile, fromfile])
if r != 0:
raise Exception('Cython failed')
except OSError:
raise OSError('Cython needs to be installed')
def preserve_cwd(path, func, *args):
orig_cwd = os.getcwd()
try:
os.chdir(path)
func(*args)
finally:
os.chdir(orig_cwd)
def load_hashes(filename):
try:
return json.load(open(filename))
except (ValueError, IOError):
return {}
def save_hashes(hash_db, filename):
with open(filename, 'w') as f:
f.write(json.dumps(hash_db))
def get_hash(path):
return hashlib.md5(open(path, 'rb').read()).hexdigest()
def hash_changed(base, path, db):
full_path = os.path.normpath(os.path.join(base, path))
return not get_hash(full_path) == db.get(full_path)
def hash_add(base, path, db):
full_path = os.path.normpath(os.path.join(base, path))
db[full_path] = get_hash(full_path)
def process(base, filename, db):
root, ext = os.path.splitext(filename)
if ext in ['.pyx', '.cpp']:
if hash_changed(base, filename, db) or not os.path.isfile(os.path.join(base, root + '.cpp')):
preserve_cwd(base, process_pyx, root + '.pyx', root + '.cpp')
hash_add(base, root + '.cpp', db)
hash_add(base, root + '.pyx', db)
def check_changes(root, db):
res = False
new_db = {}
setup_filename = 'setup.py'
hash_add('.', setup_filename, new_db)
if hash_changed('.', setup_filename, db):
res = True
for base, _, files in os.walk(root):
for filename in files:
if filename.endswith('.pxd'):
hash_add(base, filename, new_db)
if hash_changed(base, filename, db):
res = True
if res:
db.clear()
db.update(new_db)
return res
def run(root):
db = load_hashes(HASH_FILE)
try:
check_changes(root, db)
for base, _, files in os.walk(root):
for filename in files:
process(base, filename, db)
finally:
save_hashes(db, HASH_FILE)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Cythonize pyx files into C++ files as needed')
parser.add_argument('root', help='root directory')
args = parser.parse_args()
run(args.root)
preshed-3.0.2/bin/push-tag.sh 0000775 0000000 0000000 00000000556 13542363321 0016015 0 ustar 00root root 0000000 0000000 #!/usr/bin/env bash
set -e
# Insist repository is clean
git diff-index --quiet HEAD
git checkout $1
git pull origin $1
git push origin $1
version=$(grep "__version__ = " preshed/about.py)
version=${version/__version__ = }
version=${version/\'/}
version=${version/\'/}
version=${version/\"/}
version=${version/\"/}
git tag "v$version"
git push origin "v$version"
preshed-3.0.2/fabfile.py 0000664 0000000 0000000 00000003142 13542363321 0015112 0 ustar 00root root 0000000 0000000 from fabric.api import local, run, lcd, cd, env
import os
from os import path
from os.path import exists as file_exists
from fabtools.python import virtualenv
PWD = path.dirname(__file__)
VENV_DIR = path.join(PWD, '.env')
DEV_ENV_DIR = path.join(PWD, '.denv')
def dev():
# Allow this to persist, since we aren't as rigorous about keeping state clean
if not file_exists('.denv'):
local('virtualenv .denv')
with virtualenv(DEV_ENV_DIR):
local('pip install -r requirements.txt')
def sdist():
if file_exists('dist/'):
local('rm -rf dist/')
local('mkdir dist')
with virtualenv(VENV_DIR):
local('python setup.py sdist')
def publish():
with virtualenv(VENV_DIR):
local('python setup.py register')
local('twine upload dist/*.tar.gz')
def setup():
if file_exists('.env'):
local('rm -rf .env')
local('rm -rf *.egg')
local('virtualenv .env')
def install():
with virtualenv(VENV_DIR):
local('pip install --upgrade setuptools')
local('pip install dist/*.tar.gz')
local('pip install pytest')
def make():
with virtualenv(DEV_ENV_DIR):
with lcd(path.dirname(__file__)):
local('python setup.py build')
def clean():
with lcd(os.path.dirname(__file__)):
local('python setup.py clean --all')
with virtualenv(DEV_ENV_DIR):
with lcd(os.path.dirname(__file__)):
local('python setup.py clean --all')
def test():
with virtualenv(VENV_DIR):
local('python -m pytest -x')
def travis():
local('open https://travis-ci.org/spacy-io/preshed')
preshed-3.0.2/include/ 0000775 0000000 0000000 00000000000 13542363321 0014573 5 ustar 00root root 0000000 0000000 preshed-3.0.2/include/msvc9/ 0000775 0000000 0000000 00000000000 13542363321 0015634 5 ustar 00root root 0000000 0000000 preshed-3.0.2/include/msvc9/stdint.h 0000664 0000000 0000000 00000017645 13542363321 0017327 0 ustar 00root root 0000000 0000000 // ISO C9x compliant stdint.h for Microsoft Visual Studio
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
//
// Copyright (c) 2006-2013 Alexander Chemeris
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the product nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_STDINT_H_ // [
#define _MSC_STDINT_H_
#if _MSC_VER > 1000
#pragma once
#endif
#if _MSC_VER >= 1600 // [
#include
#else // ] _MSC_VER >= 1600 [
#include
// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
// compiling for ARM we should wrap include with 'extern "C++" {}'
// or compiler give many errors like this:
// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
#ifdef __cplusplus
extern "C" {
#endif
# include
#ifdef __cplusplus
}
#endif
// Define _W64 macros to mark types changing their size, like intptr_t.
#ifndef _W64
# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
# define _W64 __w64
# else
# define _W64
# endif
#endif
// 7.18.1 Integer types
// 7.18.1.1 Exact-width integer types
// Visual Studio 6 and Embedded Visual C++ 4 doesn't
// realize that, e.g. char has the same size as __int8
// so we give up on __intX for them.
#if (_MSC_VER < 1300)
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
#else
typedef signed __int8 int8_t;
typedef signed __int16 int16_t;
typedef signed __int32 int32_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
#endif
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
// 7.18.1.2 Minimum-width integer types
typedef int8_t int_least8_t;
typedef int16_t int_least16_t;
typedef int32_t int_least32_t;
typedef int64_t int_least64_t;
typedef uint8_t uint_least8_t;
typedef uint16_t uint_least16_t;
typedef uint32_t uint_least32_t;
typedef uint64_t uint_least64_t;
// 7.18.1.3 Fastest minimum-width integer types
typedef int8_t int_fast8_t;
typedef int16_t int_fast16_t;
typedef int32_t int_fast32_t;
typedef int64_t int_fast64_t;
typedef uint8_t uint_fast8_t;
typedef uint16_t uint_fast16_t;
typedef uint32_t uint_fast32_t;
typedef uint64_t uint_fast64_t;
// 7.18.1.4 Integer types capable of holding object pointers
#ifdef _WIN64 // [
typedef signed __int64 intptr_t;
typedef unsigned __int64 uintptr_t;
#else // _WIN64 ][
typedef _W64 signed int intptr_t;
typedef _W64 unsigned int uintptr_t;
#endif // _WIN64 ]
// 7.18.1.5 Greatest-width integer types
typedef int64_t intmax_t;
typedef uint64_t uintmax_t;
// 7.18.2 Limits of specified-width integer types
#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
// 7.18.2.1 Limits of exact-width integer types
#define INT8_MIN ((int8_t)_I8_MIN)
#define INT8_MAX _I8_MAX
#define INT16_MIN ((int16_t)_I16_MIN)
#define INT16_MAX _I16_MAX
#define INT32_MIN ((int32_t)_I32_MIN)
#define INT32_MAX _I32_MAX
#define INT64_MIN ((int64_t)_I64_MIN)
#define INT64_MAX _I64_MAX
#define UINT8_MAX _UI8_MAX
#define UINT16_MAX _UI16_MAX
#define UINT32_MAX _UI32_MAX
#define UINT64_MAX _UI64_MAX
// 7.18.2.2 Limits of minimum-width integer types
#define INT_LEAST8_MIN INT8_MIN
#define INT_LEAST8_MAX INT8_MAX
#define INT_LEAST16_MIN INT16_MIN
#define INT_LEAST16_MAX INT16_MAX
#define INT_LEAST32_MIN INT32_MIN
#define INT_LEAST32_MAX INT32_MAX
#define INT_LEAST64_MIN INT64_MIN
#define INT_LEAST64_MAX INT64_MAX
#define UINT_LEAST8_MAX UINT8_MAX
#define UINT_LEAST16_MAX UINT16_MAX
#define UINT_LEAST32_MAX UINT32_MAX
#define UINT_LEAST64_MAX UINT64_MAX
// 7.18.2.3 Limits of fastest minimum-width integer types
#define INT_FAST8_MIN INT8_MIN
#define INT_FAST8_MAX INT8_MAX
#define INT_FAST16_MIN INT16_MIN
#define INT_FAST16_MAX INT16_MAX
#define INT_FAST32_MIN INT32_MIN
#define INT_FAST32_MAX INT32_MAX
#define INT_FAST64_MIN INT64_MIN
#define INT_FAST64_MAX INT64_MAX
#define UINT_FAST8_MAX UINT8_MAX
#define UINT_FAST16_MAX UINT16_MAX
#define UINT_FAST32_MAX UINT32_MAX
#define UINT_FAST64_MAX UINT64_MAX
// 7.18.2.4 Limits of integer types capable of holding object pointers
#ifdef _WIN64 // [
# define INTPTR_MIN INT64_MIN
# define INTPTR_MAX INT64_MAX
# define UINTPTR_MAX UINT64_MAX
#else // _WIN64 ][
# define INTPTR_MIN INT32_MIN
# define INTPTR_MAX INT32_MAX
# define UINTPTR_MAX UINT32_MAX
#endif // _WIN64 ]
// 7.18.2.5 Limits of greatest-width integer types
#define INTMAX_MIN INT64_MIN
#define INTMAX_MAX INT64_MAX
#define UINTMAX_MAX UINT64_MAX
// 7.18.3 Limits of other integer types
#ifdef _WIN64 // [
# define PTRDIFF_MIN _I64_MIN
# define PTRDIFF_MAX _I64_MAX
#else // _WIN64 ][
# define PTRDIFF_MIN _I32_MIN
# define PTRDIFF_MAX _I32_MAX
#endif // _WIN64 ]
#define SIG_ATOMIC_MIN INT_MIN
#define SIG_ATOMIC_MAX INT_MAX
#ifndef SIZE_MAX // [
# ifdef _WIN64 // [
# define SIZE_MAX _UI64_MAX
# else // _WIN64 ][
# define SIZE_MAX _UI32_MAX
# endif // _WIN64 ]
#endif // SIZE_MAX ]
// WCHAR_MIN and WCHAR_MAX are also defined in
#ifndef WCHAR_MIN // [
# define WCHAR_MIN 0
#endif // WCHAR_MIN ]
#ifndef WCHAR_MAX // [
# define WCHAR_MAX _UI16_MAX
#endif // WCHAR_MAX ]
#define WINT_MIN 0
#define WINT_MAX _UI16_MAX
#endif // __STDC_LIMIT_MACROS ]
// 7.18.4 Limits of other integer types
#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
// 7.18.4.1 Macros for minimum-width integer constants
#define INT8_C(val) val##i8
#define INT16_C(val) val##i16
#define INT32_C(val) val##i32
#define INT64_C(val) val##i64
#define UINT8_C(val) val##ui8
#define UINT16_C(val) val##ui16
#define UINT32_C(val) val##ui32
#define UINT64_C(val) val##ui64
// 7.18.4.2 Macros for greatest-width integer constants
// These #ifndef's are needed to prevent collisions with .
// Check out Issue 9 for the details.
#ifndef INTMAX_C // [
# define INTMAX_C INT64_C
#endif // INTMAX_C ]
#ifndef UINTMAX_C // [
# define UINTMAX_C UINT64_C
#endif // UINTMAX_C ]
#endif // __STDC_CONSTANT_MACROS ]
#endif // _MSC_VER >= 1600 ]
#endif // _MSC_STDINT_H_ ]
preshed-3.0.2/preshed/ 0000775 0000000 0000000 00000000000 13542363321 0014602 5 ustar 00root root 0000000 0000000 preshed-3.0.2/preshed/__init__.pxd 0000664 0000000 0000000 00000000000 13542363321 0017044 0 ustar 00root root 0000000 0000000 preshed-3.0.2/preshed/__init__.py 0000664 0000000 0000000 00000000025 13542363321 0016710 0 ustar 00root root 0000000 0000000 from .about import *
preshed-3.0.2/preshed/about.py 0000664 0000000 0000000 00000000412 13542363321 0016263 0 ustar 00root root 0000000 0000000 __title__ = "preshed"
__version__ = "3.0.2"
__summary__ = "Cython hash table that trusts the keys are pre-hashed"
__uri__ = "https://github.com/explosion/preshed"
__author__ = "Matthew Honnibal"
__email__ = "matt@explosion.ai"
__license__ = "MIT"
__release__ = True
preshed-3.0.2/preshed/bloom.pxd 0000664 0000000 0000000 00000001221 13542363321 0016423 0 ustar 00root root 0000000 0000000 from libc.stdint cimport uint64_t, uint32_t
from cymem.cymem cimport Pool
ctypedef uint64_t key_t
cdef struct BloomStruct:
key_t* bitfield
key_t hcount # hash count, number of hash functions
key_t length
uint32_t seed
cdef class BloomFilter:
cdef Pool mem
cdef BloomStruct* c_bloom
cdef inline bint contains(self, key_t item) nogil
cdef void bloom_init(Pool mem, BloomStruct* bloom, key_t hcount, key_t length, uint32_t seed) except *
cdef void bloom_add(BloomStruct* bloom, key_t item) nogil
cdef bint bloom_contains(const BloomStruct* bloom, key_t item) nogil
cdef void bloom_add(BloomStruct* bloom, key_t item) nogil
preshed-3.0.2/preshed/bloom.pyx 0000664 0000000 0000000 00000010661 13542363321 0016460 0 ustar 00root root 0000000 0000000 # cython: infer_types=True
# cython: cdivision=True
#
from murmurhash.mrmr cimport hash128_x86
import math
from array import array
try:
import copy_reg
except ImportError:
import copyreg as copy_reg
def calculate_size_and_hash_count(members, error_rate):
"""Calculate the optimal size in bits and number of hash functions for a
given number of members and error rate.
"""
base = math.log(1 / (2 ** math.log(2)))
bit_count = math.ceil((members * math.log(error_rate)) / base)
hash_count = math.floor((bit_count / members) * math.log(2))
return (bit_count, hash_count)
cdef class BloomFilter:
"""Bloom filter that allows for basic membership tests.
Only integers are supported as keys.
"""
def __init__(self, key_t size=(2 ** 10), key_t hash_funcs=23, uint32_t seed=0):
self.mem = Pool()
self.c_bloom = self.mem.alloc(1, sizeof(BloomStruct))
bloom_init(self.mem, self.c_bloom, hash_funcs, size, seed)
@classmethod
def from_error_rate(cls, members, error_rate=1E-4):
params = calculate_size_and_hash_count(members, error_rate)
return cls(*params)
def add(self, key_t item):
bloom_add(self.c_bloom, item)
def __contains__(self, item):
return bloom_contains(self.c_bloom, item)
cdef inline bint contains(self, key_t item) nogil:
return bloom_contains(self.c_bloom, item)
def to_bytes(self):
return bloom_to_bytes(self.c_bloom)
def from_bytes(self, bytes byte_string):
bloom_from_bytes(self.mem, self.c_bloom, byte_string)
return self
cdef bytes bloom_to_bytes(const BloomStruct* bloom):
py = array("L")
py.append(bloom.hcount)
py.append(bloom.length)
py.append(bloom.seed)
for i in range(bloom.length // sizeof(key_t)):
py.append(bloom.bitfield[i])
if hasattr(py, "tobytes"):
return py.tobytes()
else:
# Python 2 :(
return py.tostring()
cdef void bloom_from_bytes(Pool mem, BloomStruct* bloom, bytes data):
py = array("L")
if hasattr(py, "frombytes"):
py.frombytes(data)
else:
py.fromstring(data)
bloom.hcount = py[0]
bloom.length = py[1]
bloom.seed = py[2]
bloom.bitfield = mem.alloc(bloom.length // sizeof(key_t), sizeof(key_t))
for i in range(bloom.length // sizeof(key_t)):
bloom.bitfield[i] = py[3+i]
cdef void bloom_init(Pool mem, BloomStruct* bloom, key_t hcount, key_t length, uint32_t seed) except *:
# size should be a multiple of the container size - round up
if length % sizeof(key_t):
length = math.ceil(length / sizeof(key_t)) * sizeof(key_t)
bloom.length = length
bloom.hcount = hcount
bloom.bitfield = mem.alloc(length // sizeof(key_t), sizeof(key_t))
bloom.seed = seed
# Instead of calling MurmurHash with a different seed for each hash function, this
# generates two initial hash values and then combines them to create the correct
# number of hashes. This technique is faster than just doing MurmurhHash
# repeatedly and has been shown to work as well as full hashing.
# For details see "Less Hashing, Same Performance: Building a Better Bloom
# Filter", Kirsch & Mitzenmacher.
# https://www.semanticscholar.org/paper/Less-hashing%2C-same-performance%3A-Building-a-better-Kirsch-Mitzenmacher/65c43afbfc064705bdc40d3473f32518e9306429
# The choice of seeds is arbitrary.
cdef void bloom_add(BloomStruct* bloom, key_t item) nogil:
cdef key_t hv
cdef key_t[2] keys
cdef key_t one = 1 # We want this explicitly typed, because bits
hash128_x86(&item, sizeof(key_t), 0, &keys)
for hiter in range(bloom.hcount):
hv = (keys[0] + (hiter * keys[1])) % bloom.length
bloom.bitfield[hv // sizeof(key_t)] |= one << (hv % sizeof(key_t))
cdef bint bloom_contains(const BloomStruct* bloom, key_t item) nogil:
cdef key_t hv
cdef key_t[2] keys
cdef key_t one = 1 # We want this explicitly typed, because bits
hash128_x86(&item, sizeof(key_t), 0, &keys)
for hiter in range(bloom.hcount):
hv = (keys[0] + (hiter * keys[1])) % bloom.length
if not (bloom.bitfield[hv // sizeof(key_t)] & one << (hv % sizeof(key_t))):
return False
return True
def pickle_bloom(BloomFilter bloom):
return unpickle_bloom, (bloom.to_bytes(),)
def unpickle_bloom(byte_string):
return BloomFilter().from_bytes(byte_string)
copy_reg.pickle(BloomFilter, pickle_bloom, unpickle_bloom)
preshed-3.0.2/preshed/counter.pxd 0000664 0000000 0000000 00000000614 13542363321 0016777 0 ustar 00root root 0000000 0000000 from libc.stdint cimport int64_t
from cymem.cymem cimport Pool
from .maps cimport MapStruct
from .maps cimport map_init, map_get, map_set, map_iter
from .maps cimport key_t
ctypedef int64_t count_t
cdef class PreshCounter:
cdef Pool mem
cdef MapStruct* c_map
cdef public object smoother
cdef readonly count_t total
cpdef int inc(self, key_t key, count_t inc) except -1
preshed-3.0.2/preshed/counter.pyx 0000664 0000000 0000000 00000013744 13542363321 0017034 0 ustar 00root root 0000000 0000000 """Count occurrences of uint64-valued keys."""
from __future__ import division
cimport cython
from libc.math cimport log, exp, sqrt
cdef class PreshCounter:
def __init__(self, initial_size=8):
assert initial_size != 0
assert initial_size & (initial_size - 1) == 0
self.mem = Pool()
self.c_map = self.mem.alloc(1, sizeof(MapStruct))
map_init(self.mem, self.c_map, initial_size)
self.smoother = None
self.total = 0
property length:
def __get__(self):
return self.c_map.length
def __len__(self):
return self.c_map.length
def __iter__(self):
cdef int i = 0
cdef key_t key
cdef void* value
while map_iter(self.c_map, &i, &key, &value):
yield key, value
def __getitem__(self, key_t key):
return map_get(self.c_map, key)
cpdef int inc(self, key_t key, count_t inc) except -1:
cdef count_t c = map_get(self.c_map, key)
c += inc
map_set(self.mem, self.c_map, key, c)
self.total += inc
return c
def prob(self, key_t key):
cdef GaleSmoother smoother
cdef void* value = map_get(self.c_map, key)
if self.smoother is not None:
smoother = self.smoother
r_star = self.smoother(value)
return r_star / self.smoother.total
elif value == NULL:
return 0
else:
return value / self.total
def smooth(self):
self.smoother = GaleSmoother(self)
cdef class GaleSmoother:
cdef Pool mem
cdef count_t* Nr
cdef double gradient
cdef double intercept
cdef readonly count_t cutoff
cdef count_t Nr0
cdef readonly double total
def __init__(self, PreshCounter counts):
count_counts = PreshCounter()
cdef double total = 0
for _, count in counts:
count_counts.inc(count, 1)
total += count
# If we have no items seen 1 or 2 times, this doesn't work. But, this
# won't be true in real data...
assert count_counts[1] != 0 and count_counts[2] != 0, "Cannot smooth your weird data"
# Extrapolate Nr0 from Nr1 and Nr2.
self.Nr0 = count_counts[1] + (count_counts[1] - count_counts[2])
self.mem = Pool()
cdef double[2] mb
cdef int n_counts = 0
for _ in count_counts:
n_counts += 1
sorted_r = count_counts.mem.alloc(n_counts, sizeof(count_t))
self.Nr = self.mem.alloc(n_counts, sizeof(count_t))
for i, (count, count_count) in enumerate(sorted(count_counts)):
sorted_r[i] = count
self.Nr[i] = count_count
_fit_loglinear_model(mb, sorted_r, self.Nr, n_counts)
self.cutoff = _find_when_to_switch(sorted_r, self.Nr, mb[0], mb[1],
n_counts)
self.gradient = mb[0]
self.intercept = mb[1]
self.total = self(0) * self.Nr0
for count, count_count in count_counts:
self.total += self(count) * count_count
def __call__(self, count_t r):
if r == 0:
return self.Nr[1] / self.Nr0
elif r < self.cutoff:
return turing_estimate_of_r(r, self.Nr[r-1], self.Nr[r])
else:
return gale_estimate_of_r(r, self.gradient, self.intercept)
def count_count(self, count_t r):
if r == 0:
return self.Nr0
else:
return self.Nr[r-1]
@cython.cdivision(True)
cdef double turing_estimate_of_r(double r, double Nr, double Nr1) except -1:
return ((r + 1) * Nr1) / Nr
@cython.cdivision(True)
cdef double gale_estimate_of_r(double r, double gradient, double intercept) except -1:
cdef double e_nr = exp(gradient * log(r) + intercept)
cdef double e_nr1 = exp(gradient * log(r+1) + intercept)
return (r + 1) * (e_nr1 / e_nr)
@cython.cdivision(True)
cdef void _fit_loglinear_model(double* output, count_t* sorted_r, count_t* Nr,
int length) except *:
cdef double x_mean = 0.0
cdef double y_mean = 0.0
cdef Pool mem = Pool()
x = mem.alloc(length, sizeof(double))
y = mem.alloc(length, sizeof(double))
cdef int i
for i in range(length):
r = sorted_r[i]
x[i] = log(r)
y[i] = log(_get_zr(i, sorted_r, Nr[i], length))
x_mean += x[i]
y_mean += y[i]
x_mean /= length
y_mean /= length
cdef double ss_xy = 0.0
cdef double ss_xx = 0.0
for i in range(length):
x_dist = x[i] - x_mean
y_dist = y[i] - y_mean
# SS_xy = sum the product of the distances from the mean
ss_xy += x_dist * y_dist
# SS_xx = sum the squares of the x distance
ss_xx += x_dist * x_dist
# Gradient
output[0] = ss_xy / ss_xx
# Intercept
output[1] = y_mean - output[0] * x_mean
@cython.cdivision(True)
cdef double _get_zr(int j, count_t* sorted_r, count_t Nr_j, int n_counts) except -1:
cdef double r_i = sorted_r[j-1] if j >= 1 else 0
cdef double r_j = sorted_r[j]
cdef double r_k = sorted_r[j+1] if (j+1) < n_counts else (2 * r_i - 1)
return 2 * Nr_j / (r_k - r_i)
@cython.cdivision(True)
cdef double _variance(double r, double Nr, double Nr1) nogil:
return 1.96 * sqrt((r+1)**2 * (Nr1 / Nr**2) * (1.0 + (Nr1 / Nr)))
@cython.cdivision(True)
cdef count_t _find_when_to_switch(count_t* sorted_r, count_t* Nr, double m, double b,
int length) except -1:
cdef int i
cdef count_t r
for i in range(length-1):
r = sorted_r[i]
if sorted_r[i+1] != r+1:
return r
g_r = gale_estimate_of_r(r, m, b)
t_r = turing_estimate_of_r(r, Nr[i], Nr[i+1])
if abs(t_r - g_r) <= _variance(r, Nr[i], Nr[i+1]):
return r
else:
return length - 1
preshed-3.0.2/preshed/maps.pxd 0000664 0000000 0000000 00000002466 13542363321 0016267 0 ustar 00root root 0000000 0000000 from libc.stdint cimport uint64_t
from cymem.cymem cimport Pool
ctypedef uint64_t key_t
cdef struct Cell:
key_t key
void* value
cdef struct Result:
int found
void* value
cdef struct MapStruct:
Cell* cells
void* value_for_empty_key
void* value_for_del_key
key_t length
key_t filled
bint is_empty_key_set
bint is_del_key_set
cdef void* map_bulk_get(const MapStruct* map_, const key_t* keys, void** values,
int n) nogil
cdef Result map_get_unless_missing(const MapStruct* map_, const key_t key) nogil
cdef void* map_get(const MapStruct* map_, const key_t key) nogil
cdef void map_set(Pool mem, MapStruct* map_, key_t key, void* value) except *
cdef void map_init(Pool mem, MapStruct* pmap, size_t length) except *
cdef bint map_iter(const MapStruct* map_, int* i, key_t* key, void** value) nogil
cdef void* map_clear(MapStruct* map_, const key_t key) nogil
cdef class PreshMap:
cdef MapStruct* c_map
cdef Pool mem
cdef inline void* get(self, key_t key) nogil
cdef void set(self, key_t key, void* value) except *
cdef class PreshMapArray:
cdef Pool mem
cdef MapStruct* maps
cdef size_t length
cdef inline void* get(self, size_t i, key_t key) nogil
cdef void set(self, size_t i, key_t key, void* value) except *
preshed-3.0.2/preshed/maps.pyx 0000664 0000000 0000000 00000021023 13542363321 0016302 0 ustar 00root root 0000000 0000000 # cython: infer_types=True
# cython: cdivision=True
#
cimport cython
DEF EMPTY_KEY = 0
DEF DELETED_KEY = 1
cdef class PreshMap:
"""Hash map that assumes keys come pre-hashed. Maps uint64_t --> uint64_t.
Uses open addressing with linear probing.
Usage
map = PreshMap() # Create a table
map = PreshMap(initial_size=1024) # Create with initial size (efficiency)
map[key] = value # Set a value to a key
value = map[key] # Get a value given a key
for key, value in map.items(): # Iterate over items
len(map) # Get number of inserted keys
"""
def __init__(self, size_t initial_size=8):
# Size must be power of two
if initial_size == 0:
initial_size = 8
if initial_size & (initial_size - 1) != 0:
power = 1
while power < initial_size:
power *= 2
initial_size = power
self.mem = Pool()
self.c_map = self.mem.alloc(1, sizeof(MapStruct))
map_init(self.mem, self.c_map, initial_size)
property capacity:
def __get__(self):
return self.c_map.length
def items(self):
cdef key_t key
cdef void* value
cdef int i = 0
while map_iter(self.c_map, &i, &key, &value):
yield key, value
def keys(self):
for key, _ in self.items():
yield key
def values(self):
for _, value in self.items():
yield value
def pop(self, key_t key, default=None):
cdef Result result = map_get_unless_missing(self.c_map, key)
map_clear(self.c_map, key)
if result.found:
return result.value
else:
return default
def __getitem__(self, key_t key):
cdef Result result = map_get_unless_missing(self.c_map, key)
if result.found:
return result.value
else:
return None
def __setitem__(self, key_t key, size_t value):
map_set(self.mem, self.c_map, key, value)
def __delitem__(self, key_t key):
map_clear(self.c_map, key)
def __len__(self):
return self.c_map.filled
def __contains__(self, key_t key):
cdef Result result = map_get_unless_missing(self.c_map, key)
return True if result.found else False
def __iter__(self):
for key in self.keys():
yield key
cdef inline void* get(self, key_t key) nogil:
return map_get(self.c_map, key)
cdef void set(self, key_t key, void* value) except *:
map_set(self.mem, self.c_map, key, value)
cdef class PreshMapArray:
"""An array of hash tables that assume keys come pre-hashed. Each table
uses open addressing with linear probing.
"""
def __init__(self, size_t length, size_t initial_size=8):
self.mem = Pool()
self.length = length
self.maps = self.mem.alloc(length, sizeof(MapStruct))
for i in range(length):
map_init(self.mem, &self.maps[i], initial_size)
cdef inline void* get(self, size_t i, key_t key) nogil:
return map_get(&self.maps[i], key)
cdef void set(self, size_t i, key_t key, void* value) except *:
map_set(self.mem, &self.maps[i], key, value)
cdef void map_init(Pool mem, MapStruct* map_, size_t length) except *:
map_.length = length
map_.filled = 0
map_.cells = mem.alloc(length, sizeof(Cell))
cdef void map_set(Pool mem, MapStruct* map_, key_t key, void* value) except *:
cdef Cell* cell
if key == EMPTY_KEY:
map_.value_for_empty_key = value
map_.is_empty_key_set = True
elif key == DELETED_KEY:
map_.value_for_del_key = value
map_.is_del_key_set = True
else:
cell = _find_cell_for_insertion(map_.cells, map_.length, key)
if cell.key == EMPTY_KEY:
map_.filled += 1
cell.key = key
cell.value = value
if (map_.filled + 1) * 5 >= (map_.length * 3):
_resize(mem, map_)
cdef void* map_get(const MapStruct* map_, const key_t key) nogil:
if key == EMPTY_KEY:
return map_.value_for_empty_key
elif key == DELETED_KEY:
return map_.value_for_del_key
cdef Cell* cell = _find_cell(map_.cells, map_.length, key)
return cell.value
cdef Result map_get_unless_missing(const MapStruct* map_, const key_t key) nogil:
cdef Result result
cdef Cell* cell
result.found = 0
result.value = NULL
if key == EMPTY_KEY:
if map_.is_empty_key_set:
result.found = 1
result.value = map_.value_for_empty_key
elif key == DELETED_KEY:
if map_.is_del_key_set:
result.found = 1
result.value = map_.value_for_del_key
else:
cell = _find_cell(map_.cells, map_.length, key)
if cell.key == key:
result.found = 1
result.value = cell.value
return result
cdef void* map_clear(MapStruct* map_, const key_t key) nogil:
if key == EMPTY_KEY:
value = map_.value_for_empty_key if map_.is_empty_key_set else NULL
map_.is_empty_key_set = False
return value
elif key == DELETED_KEY:
value = map_.value_for_del_key if map_.is_del_key_set else NULL
map_.is_del_key_set = False
return value
else:
cell = _find_cell(map_.cells, map_.length, key)
cell.key = DELETED_KEY
# We shouldn't decrement the "filled" value here, as we're not actually
# making "empty" values -- deleted values aren't quite the same.
# Instead if we manage to insert into a deleted slot, we don't increment
# the fill rate.
return cell.value
cdef void* map_bulk_get(const MapStruct* map_, const key_t* keys, void** values,
int n) nogil:
cdef int i
for i in range(n):
values[i] = map_get(map_, keys[i])
cdef bint map_iter(const MapStruct* map_, int* i, key_t* key, void** value) nogil:
'''Iterate over the filled items, setting the current place in i, and the
key and value. Return False when iteration finishes.
'''
cdef const Cell* cell
while i[0] < map_.length:
cell = &map_.cells[i[0]]
i[0] += 1
if cell[0].key != EMPTY_KEY and cell[0].key != DELETED_KEY:
key[0] = cell[0].key
value[0] = cell[0].value
return True
# Remember to check for cells keyed by the special empty and deleted keys
if i[0] == map_.length:
i[0] += 1
if map_.is_empty_key_set:
key[0] = EMPTY_KEY
value[0] = map_.value_for_empty_key
return True
if i[0] == map_.length + 1:
i[0] += 1
if map_.is_del_key_set:
key[0] = DELETED_KEY
value[0] = map_.value_for_del_key
return True
return False
@cython.cdivision
cdef inline Cell* _find_cell(Cell* cells, const key_t size, const key_t key) nogil:
# Modulo for powers-of-two via bitwise &
cdef key_t i = (key & (size - 1))
while cells[i].key != EMPTY_KEY and cells[i].key != key:
i = (i + 1) & (size - 1)
return &cells[i]
@cython.cdivision
cdef inline Cell* _find_cell_for_insertion(Cell* cells, const key_t size, const key_t key) nogil:
"""Find the correct cell to insert a value, which could be a previously
deleted cell. If we cross a deleted cell and the key is in the table, we
mark the later cell as deleted, and return the earlier one."""
cdef Cell* deleted = NULL
# Modulo for powers-of-two via bitwise &
cdef key_t i = (key & (size - 1))
while cells[i].key != EMPTY_KEY and cells[i].key != key:
if cells[i].key == DELETED_KEY:
deleted = &cells[i]
i = (i + 1) & (size - 1)
if deleted is not NULL:
if cells[i].key == key:
# We need to ensure we don't end up with the key in the table twice.
# If we're using a deleted cell and we also have the key, we mark
# the later cell as deleted.
cells[i].key = DELETED_KEY
return deleted
return &cells[i]
cdef void _resize(Pool mem, MapStruct* map_) except *:
cdef size_t new_size = map_.length * 2
cdef Cell* old_cells = map_.cells
cdef size_t old_size = map_.length
map_.length = new_size
map_.filled = 0
map_.cells = mem.alloc(new_size, sizeof(Cell))
cdef size_t i
cdef size_t slot
for i in range(old_size):
if old_cells[i].key != EMPTY_KEY and old_cells[i].key != DELETED_KEY:
map_set(mem, map_, old_cells[i].key, old_cells[i].value)
mem.free(old_cells)
preshed-3.0.2/preshed/tests/ 0000775 0000000 0000000 00000000000 13542363321 0015744 5 ustar 00root root 0000000 0000000 preshed-3.0.2/preshed/tests/__init__.py 0000664 0000000 0000000 00000000000 13542363321 0020043 0 ustar 00root root 0000000 0000000 preshed-3.0.2/preshed/tests/test_bloom.py 0000664 0000000 0000000 00000002350 13542363321 0020465 0 ustar 00root root 0000000 0000000 from __future__ import division
import pytest
import pickle
from preshed.bloom import BloomFilter
def test_contains():
bf = BloomFilter()
assert 23 not in bf
bf.add(23)
assert 23 in bf
bf.add(5)
bf.add(42)
bf.add(1002)
assert 5 in bf
assert 42 in bf
assert 1002 in bf
def test_no_false_negatives():
bf = BloomFilter(size=100, hash_funcs=2)
for ii in range(0,1000,20):
bf.add(ii)
for ii in range(0,1000,20):
assert ii in bf
def test_from_error():
bf = BloomFilter.from_error_rate(1000)
for ii in range(0,1000,20):
bf.add(ii)
for ii in range(0,1000,20):
assert ii in bf
def test_to_from_bytes():
bf = BloomFilter(size=100, hash_funcs=2)
for ii in range(0,1000,20):
bf.add(ii)
data = bf.to_bytes()
bf2 = BloomFilter()
for ii in range(0,1000,20):
assert ii not in bf2
bf2.from_bytes(data)
for ii in range(0,1000,20):
assert ii in bf2
assert bf2.to_bytes() == data
def test_bloom_pickle():
bf = BloomFilter(size=100, hash_funcs=2)
for ii in range(0,1000,20):
bf.add(ii)
data = pickle.dumps(bf)
bf2 = pickle.loads(data)
for ii in range(0,1000,20):
assert ii in bf2
preshed-3.0.2/preshed/tests/test_counter.py 0000664 0000000 0000000 00000003720 13542363321 0021036 0 ustar 00root root 0000000 0000000 from __future__ import division
import pytest
from preshed.counter import PreshCounter
def test_count():
counter = PreshCounter()
assert counter[12] == 0
counter.inc(12, 1)
assert counter[12] == 1
counter.inc(14, 10)
counter.inc(9, 10)
counter.inc(12, 4)
assert counter[12] == 5
assert counter[14] == 10
assert counter[9] == 10
def test_unsmooth_prob():
counter = PreshCounter()
assert counter.prob(12) == 0.0
counter.inc(12, 1)
assert counter.prob(12) == 1.0
counter.inc(14, 10)
assert counter.prob(14) == 10 / 11
assert counter.prob(12) == 1.0 / 11
def test_smooth_prob():
p = PreshCounter()
# 1 10
# 2 6
# 3 4
# 5 2
# 8 1
for i in range(10):
p.inc(100-i, 1) # 10 items of freq 1
for i in range(6):
p.inc(90 - i, 2) # 6 items of freq 2
for i in range(4):
p.inc(80 - i, 3) # 4 items of freq 3
for i in range(2):
p.inc(70 - i, 5) # 2 items of freq 5
for i in range(1):
p.inc(60 - i, 8) # 1 item of freq 8
assert p.total == (10 * 1) + (6 * 2) + (4 * 3) + (2 * 5) + (1 * 8)
assert p.prob(100) == 1.0 / p.total
assert p.prob(200) == 0.0
assert p.prob(60) == 8.0 / p.total
p.smooth()
assert p.smoother(1) < 1.0
assert p.smoother(8) < 8.0
assert p.prob(1000) < p.prob(100)
for event, count in reversed(sorted(p, key=lambda it: it[1])):
assert p.smoother(count) < count
import os
def test_large_freqs():
if 'TEST_FILE_LOC' in os.environ:
loc = os.environ['TEST_FILE_LOC']
else:
return None
counts = PreshCounter()
for i, line in enumerate(open(loc)):
line = line.strip()
if not line:
continue
freq = int(line.split()[0])
counts.inc(i+1, freq)
oov = i+2
assert counts.prob(oov) == 0.0
assert counts.prob(1) < 0.1
counts.smooth()
assert counts.prob(oov) > 0
assert counts.prob(oov) < counts.prob(i)
preshed-3.0.2/preshed/tests/test_hashing.py 0000664 0000000 0000000 00000002574 13542363321 0021006 0 ustar 00root root 0000000 0000000 import pytest
from preshed.maps import PreshMap
import random
def test_insert():
h = PreshMap()
assert h[1] is None
h[1] = 5
assert h[1] == 5
h[2] = 6
assert h[1] == 5
assert h[2] == 6
def test_resize():
h = PreshMap(4)
h[4] = 12
for i in range(10, 100):
value = int(i * (random.random() + 1))
h[i] = value
assert h[4] == 12
def test_zero_key():
h = PreshMap()
h[0] = 6
h[5] = 12
assert h[0] == 6
assert h[5] == 12
for i in range(500, 1000):
h[i] = i * random.random()
assert h[0] == 6
assert h[5] == 12
def test_iter():
key_sum = 0
val_sum = 0
h = PreshMap()
for i in range(56, 24, -3):
h[i] = i * 2
key_sum += i
val_sum += i * 2
for key, value in h.items():
key_sum -= key
val_sum -= value
assert key_sum == 0
assert val_sum == 0
def test_one_and_empty():
# See Issue #21
table = PreshMap()
for i in range(100, 110):
table[i] = i
del table[i]
assert table[0] == None
def test_many_and_empty():
# See Issue #21
table = PreshMap()
for i in range(100, 110):
table[i] = i
for i in range(100, 110):
del table[i]
assert table[0] == None
def test_zero_values():
table = PreshMap()
table[10] = 0
assert table[10] == 0
assert table[11] is None
preshed-3.0.2/preshed/tests/test_pop.py 0000664 0000000 0000000 00000000317 13542363321 0020154 0 ustar 00root root 0000000 0000000 from ..maps import PreshMap
def test_pop1():
table = PreshMap()
table[10] = 20
table[30] = 25
assert table[10] == 20
assert table[30] == 25
table.pop(30)
assert table[10] == 20
preshed-3.0.2/requirements.txt 0000664 0000000 0000000 00000000102 13542363321 0016425 0 ustar 00root root 0000000 0000000 cymem>=2.0.2,<2.1.0
cython>=0.28
pytest
murmurhash>=0.28.0,<1.1.0
preshed-3.0.2/setup.py 0000775 0000000 0000000 00000011362 13542363321 0014670 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
from __future__ import print_function
import os
import subprocess
import sys
import contextlib
from distutils.command.build_ext import build_ext
from distutils.sysconfig import get_python_inc
from distutils import ccompiler, msvccompiler
try:
from setuptools import Extension, setup
except ImportError:
from distutils.core import Extension, setup
PACKAGES = ["preshed", "preshed.tests"]
MOD_NAMES = ["preshed.maps", "preshed.counter", "preshed.bloom"]
# By subclassing build_extensions we have the actual compiler that will be used which is really known only after finalize_options
# http://stackoverflow.com/questions/724664/python-distutils-how-to-get-a-compiler-that-is-going-to-be-used
compile_options = {
"msvc": ["/Ox", "/EHsc"],
"other": ["-O3", "-Wno-strict-prototypes", "-Wno-unused-function"],
}
link_options = {"msvc": [], "other": []}
class build_ext_options:
def build_options(self):
for e in self.extensions:
e.extra_compile_args = compile_options.get(
self.compiler.compiler_type, compile_options["other"]
)
for e in self.extensions:
e.extra_link_args = link_options.get(
self.compiler.compiler_type, link_options["other"]
)
class build_ext_subclass(build_ext, build_ext_options):
def build_extensions(self):
build_ext_options.build_options(self)
build_ext.build_extensions(self)
def generate_cython(root, source):
print("Cythonizing sources")
p = subprocess.call(
[sys.executable, os.path.join(root, "bin", "cythonize.py"), source]
)
if p != 0:
raise RuntimeError("Running cythonize failed")
def is_source_release(path):
return os.path.exists(os.path.join(path, "PKG-INFO"))
def clean(path):
for name in MOD_NAMES:
name = name.replace(".", "/")
for ext in [".so", ".html", ".cpp", ".c"]:
file_path = os.path.join(path, name + ext)
if os.path.exists(file_path):
os.unlink(file_path)
@contextlib.contextmanager
def chdir(new_dir):
old_dir = os.getcwd()
try:
os.chdir(new_dir)
sys.path.insert(0, new_dir)
yield
finally:
del sys.path[0]
os.chdir(old_dir)
def setup_package():
root = os.path.abspath(os.path.dirname(__file__))
if len(sys.argv) > 1 and sys.argv[1] == "clean":
return clean(root)
with chdir(root):
with open(os.path.join(root, "preshed", "about.py")) as f:
about = {}
exec(f.read(), about)
with open(os.path.join(root, "README.md")) as f:
readme = f.read()
include_dirs = [get_python_inc(plat_specific=True)]
if (
ccompiler.new_compiler().compiler_type == "msvc"
and msvccompiler.get_build_version() == 9
):
include_dirs.append(os.path.join(root, "include", "msvc9"))
ext_modules = []
for mod_name in MOD_NAMES:
mod_path = mod_name.replace(".", "/") + ".cpp"
ext_modules.append(
Extension(
mod_name, [mod_path], language="c++", include_dirs=include_dirs
)
)
if not is_source_release(root):
generate_cython(root, "preshed")
setup(
name="preshed",
zip_safe=False,
packages=PACKAGES,
package_data={"": ["*.pyx", "*.pxd"]},
description=about["__summary__"],
long_description=readme,
long_description_content_type="text/markdown",
author=about["__author__"],
author_email=about["__email__"],
version=about["__version__"],
url=about["__uri__"],
license=about["__license__"],
ext_modules=ext_modules,
setup_requires=[],
install_requires=["cymem>=2.0.2,<2.1.0", "murmurhash>=0.28.0,<1.1.0"],
classifiers=[
"Environment :: Console",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
"Operating System :: POSIX :: Linux",
"Operating System :: MacOS :: MacOS X",
"Operating System :: Microsoft :: Windows",
"Programming Language :: Cython",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Topic :: Scientific/Engineering",
],
cmdclass={"build_ext": build_ext_subclass},
)
if __name__ == "__main__":
setup_package()
| |