pax_global_header 0000666 0000000 0000000 00000000064 14134461607 0014520 g ustar 00root root 0000000 0000000 52 comment=364495cbc5ec2ab59c522f8c785683603d1b2500
cymem-2.0.6/ 0000775 0000000 0000000 00000000000 14134461607 0012637 5 ustar 00root root 0000000 0000000 cymem-2.0.6/.gitignore 0000664 0000000 0000000 00000001612 14134461607 0014627 0 ustar 00root root 0000000 0000000 # Cython / C extensions
cythonize.json
spacy/*.html
*.cpp
*.so
# Vim / VSCode / editors
*.swp
*.sw*
Profile.prof
.vscode
.sass-cache
# Python
.Python
.python-version
__pycache__/
.pytest_cache
*.py[cod]
.env/
.env*
.~env/
.venv
venv/
.dev
.denv
.pypyenv
.pytest_cache/
# Distribution / packaging
env/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
pip-wheel-metadata/
Pipfile.lock
.installed.cfg
*.egg
.eggs
MANIFEST
# Temporary files
*.~*
tmp/
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# Rope
.ropeproject
# Django stuff:
*.log
*.pot
# Windows
*.bat
Thumbs.db
Desktop.ini
# Mac OS X
*.DS_Store
# Komodo project files
*.komodoproject
# Other
*.tgz
# Pycharm project files
*.idea
cymem-2.0.6/LICENSE 0000664 0000000 0000000 00000002122 14134461607 0013641 0 ustar 00root root 0000000 0000000 The MIT License (MIT)
Copyright (C) 2016 ExplosionAI GmbH, 2014 Matthew Honnibal
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
cymem-2.0.6/MANIFEST.in 0000664 0000000 0000000 00000000100 14134461607 0014364 0 ustar 00root root 0000000 0000000 include LICENSE
include README.md
recursive-exclude cymem *.cpp
cymem-2.0.6/README.md 0000664 0000000 0000000 00000015702 14134461607 0014123 0 ustar 00root root 0000000 0000000
# cymem: A Cython Memory Helper
cymem provides two small memory-management helpers for Cython. They make it
easy to tie memory to a Python object's life-cycle, so that the memory is freed
when the object is garbage collected.
[](https://dev.azure.com/explosion-ai/public/_build?definitionId=2)
[](https://pypi.python.org/pypi/cymem)
[](https://anaconda.org/conda-forge/cymem)
[](https://github.com/explosion/wheelwright/releases)
## Overview
The most useful is `cymem.Pool`, which acts as a thin wrapper around the calloc
function:
```python
from cymem.cymem cimport Pool
cdef Pool mem = Pool()
data1 = mem.alloc(10, sizeof(int))
data2 = mem.alloc(12, sizeof(float))
```
The `Pool` object saves the memory addresses internally, and frees them when the
object is garbage collected. Typically you'll attach the `Pool` to some cdef'd
class. This is particularly handy for deeply nested structs, which have
complicated initialization functions. Just pass the `Pool` object into the
initializer, and you don't have to worry about freeing your struct at all —
all of the calls to `Pool.alloc` will be automatically freed when the `Pool`
expires.
## Installation
Installation is via [pip](https://pypi.python.org/pypi/pip), and requires [Cython](http://cython.org). Before installing, make sure that your `pip`, `setuptools` and `wheel` are up to date.
```bash
pip install -U pip setuptools wheel
pip install cymem
```
## Example Use Case: An array of structs
Let's say we want a sequence of sparse matrices. We need fast access, and
a Python list isn't performing well enough. So, we want a C-array or C++
vector, which means we need the sparse matrix to be a C-level struct — it
can't be a Python class. We can write this easily enough in Cython:
```python
"""Example without Cymem
To use an array of structs, we must carefully walk the data structure when
we deallocate it.
"""
from libc.stdlib cimport calloc, free
cdef struct SparseRow:
size_t length
size_t* indices
double* values
cdef struct SparseMatrix:
size_t length
SparseRow* rows
cdef class MatrixArray:
cdef size_t length
cdef SparseMatrix** matrices
def __cinit__(self, list py_matrices):
self.length = 0
self.matrices = NULL
def __init__(self, list py_matrices):
self.length = len(py_matrices)
self.matrices = calloc(len(py_matrices), sizeof(SparseMatrix*))
for i, py_matrix in enumerate(py_matrices):
self.matrices[i] = sparse_matrix_init(py_matrix)
def __dealloc__(self):
for i in range(self.length):
sparse_matrix_free(self.matrices[i])
free(self.matrices)
cdef SparseMatrix* sparse_matrix_init(list py_matrix) except NULL:
sm = calloc(1, sizeof(SparseMatrix))
sm.length = len(py_matrix)
sm.rows = calloc(sm.length, sizeof(SparseRow))
cdef size_t i, j
cdef dict py_row
cdef size_t idx
cdef double value
for i, py_row in enumerate(py_matrix):
sm.rows[i].length = len(py_row)
sm.rows[i].indices = calloc(sm.rows[i].length, sizeof(size_t))
sm.rows[i].values = calloc(sm.rows[i].length, sizeof(double))
for j, (idx, value) in enumerate(py_row.items()):
sm.rows[i].indices[j] = idx
sm.rows[i].values[j] = value
return sm
cdef void* sparse_matrix_free(SparseMatrix* sm) except *:
cdef size_t i
for i in range(sm.length):
free(sm.rows[i].indices)
free(sm.rows[i].values)
free(sm.rows)
free(sm)
```
We wrap the data structure in a Python ref-counted class at as low a level as
we can, given our performance constraints. This allows us to allocate and free
the memory in the `__cinit__` and `__dealloc__` Cython special methods.
However, it's very easy to make mistakes when writing the `__dealloc__` and
`sparse_matrix_free` functions, leading to memory leaks. cymem prevents you from
writing these deallocators at all. Instead, you write as follows:
```python
"""Example with Cymem.
Memory allocation is hidden behind the Pool class, which remembers the
addresses it gives out. When the Pool object is garbage collected, all of
its addresses are freed.
We don't need to write MatrixArray.__dealloc__ or sparse_matrix_free,
eliminating a common class of bugs.
"""
from cymem.cymem cimport Pool
cdef struct SparseRow:
size_t length
size_t* indices
double* values
cdef struct SparseMatrix:
size_t length
SparseRow* rows
cdef class MatrixArray:
cdef size_t length
cdef SparseMatrix** matrices
cdef Pool mem
def __cinit__(self, list py_matrices):
self.mem = None
self.length = 0
self.matrices = NULL
def __init__(self, list py_matrices):
self.mem = Pool()
self.length = len(py_matrices)
self.matrices = self.mem.alloc(self.length, sizeof(SparseMatrix*))
for i, py_matrix in enumerate(py_matrices):
self.matrices[i] = sparse_matrix_init(self.mem, py_matrix)
cdef SparseMatrix* sparse_matrix_init_cymem(Pool mem, list py_matrix) except NULL:
sm = mem.alloc(1, sizeof(SparseMatrix))
sm.length = len(py_matrix)
sm.rows = mem.alloc(sm.length, sizeof(SparseRow))
cdef size_t i, j
cdef dict py_row
cdef size_t idx
cdef double value
for i, py_row in enumerate(py_matrix):
sm.rows[i].length = len(py_row)
sm.rows[i].indices = mem.alloc(sm.rows[i].length, sizeof(size_t))
sm.rows[i].values = mem.alloc(sm.rows[i].length, sizeof(double))
for j, (idx, value) in enumerate(py_row.items()):
sm.rows[i].indices[j] = idx
sm.rows[i].values[j] = value
return sm
```
All that the `Pool` class does is remember the addresses it gives out. When the
`MatrixArray` object is garbage-collected, the `Pool` object will also be garbage
collected, which triggers a call to `Pool.__dealloc__`. The `Pool` then frees all of
its addresses. This saves you from walking back over your nested data structures
to free them, eliminating a common class of errors.
## Custom Allocators
Sometimes external C libraries use private functions to allocate and free objects,
but we'd still like the laziness of the `Pool`.
```python
from cymem.cymem cimport Pool, WrapMalloc, WrapFree
cdef Pool mem = Pool(WrapMalloc(priv_malloc), WrapFree(priv_free))
```
cymem-2.0.6/azure-pipelines.yml 0000664 0000000 0000000 00000004451 14134461607 0016502 0 ustar 00root root 0000000 0000000 trigger:
batch: true
branches:
include:
- '*'
jobs:
- job: 'Test'
strategy:
matrix:
Python36Linux:
imageName: 'ubuntu-18.04'
python.version: '3.6'
Python36Windows:
imageName: 'vs2017-win2016'
python.version: '3.6'
Python36Mac:
imageName: 'macos-10.14'
python.version: '3.6'
Python37Linux:
imageName: 'ubuntu-18.04'
python.version: '3.7'
Python37Windows:
imageName: 'vs2017-win2016'
python.version: '3.7'
Python37Mac:
imageName: 'macos-10.14'
python.version: '3.7'
Python38Linux:
imageName: 'ubuntu-18.04'
python.version: '3.8'
Python38Windows:
imageName: 'vs2017-win2016'
python.version: '3.8'
Python38Mac:
imageName: 'macos-10.14'
python.version: '3.8'
Python39Linux:
imageName: 'ubuntu-18.04'
python.version: '3.9'
Python39Windows:
imageName: 'vs2017-win2016'
python.version: '3.9'
Python39Mac:
imageName: 'macos-10.14'
python.version: '3.9'
Python310Linux:
imageName: 'ubuntu-18.04'
python.version: '3.10'
Python310Windows:
imageName: 'vs2017-win2016'
python.version: '3.10'
Python310Mac:
imageName: 'macos-10.15'
python.version: '3.10'
maxParallel: 4
pool:
vmImage: $(imageName)
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
architecture: 'x64'
- script: |
python -m pip install --upgrade pip setuptools
pip install -r requirements.txt
displayName: 'Install dependencies'
- script: |
python setup.py build_ext --inplace
python setup.py sdist
displayName: 'Build sdist'
- script: |
pip freeze > installed.txt
pip uninstall -y -r installed.txt
displayName: 'Uninstall all packages'
- script: pip install dist/*.tar.gz
condition: in( variables['Agent.OS'], 'Linux', 'Darwin')
displayName: 'Install from sdist (Linux, Mac)'
- script: pip install -e .
condition: eq( variables['Agent.OS'], 'Windows_NT')
displayName: 'Install with pip (Windows)'
- script: |
pip install -r requirements.txt
python -m pytest cymem
displayName: 'Run tests'
cymem-2.0.6/bin/ 0000775 0000000 0000000 00000000000 14134461607 0013407 5 ustar 00root root 0000000 0000000 cymem-2.0.6/bin/push-tag.sh 0000775 0000000 0000000 00000000504 14134461607 0015475 0 ustar 00root root 0000000 0000000 #!/usr/bin/env bash
set -e
# Insist repository is clean
git diff-index --quiet HEAD
git checkout master
git pull origin master
git push origin master
version=$(grep "__version__ = " cymem/about.py)
version=${version/__version__ = }
version=${version/\'/}
version=${version/\'/}
git tag "v$version"
git push origin --tags
cymem-2.0.6/cymem/ 0000775 0000000 0000000 00000000000 14134461607 0013751 5 ustar 00root root 0000000 0000000 cymem-2.0.6/cymem/__init__.pxd 0000664 0000000 0000000 00000000000 14134461607 0016213 0 ustar 00root root 0000000 0000000 cymem-2.0.6/cymem/__init__.py 0000664 0000000 0000000 00000000025 14134461607 0016057 0 ustar 00root root 0000000 0000000 from .about import *
cymem-2.0.6/cymem/about.py 0000664 0000000 0000000 00000000350 14134461607 0015433 0 ustar 00root root 0000000 0000000 __title__ = "cymem"
__version__ = "2.0.6"
__summary__ = "Manage calls to calloc/free through Cython"
__uri__ = "https://github.com/explosion/cymem"
__author__ = "Matthew Honnibal"
__email__ = "matt@explosion.ai"
__license__ = "MIT"
cymem-2.0.6/cymem/cymem.pxd 0000664 0000000 0000000 00000001413 14134461607 0015577 0 ustar 00root root 0000000 0000000 ctypedef void* (*malloc_t)(size_t n)
ctypedef void (*free_t)(void *p)
cdef class PyMalloc:
cdef malloc_t malloc
cdef void _set(self, malloc_t malloc)
cdef PyMalloc WrapMalloc(malloc_t malloc)
cdef class PyFree:
cdef free_t free
cdef void _set(self, free_t free)
cdef PyFree WrapFree(free_t free)
cdef class Pool:
cdef readonly size_t size
cdef readonly dict addresses
cdef readonly list refs
cdef readonly PyMalloc pymalloc
cdef readonly PyFree pyfree
cdef void* alloc(self, size_t number, size_t size) except NULL
cdef void free(self, void* addr) except *
cdef void* realloc(self, void* addr, size_t n) except NULL
cdef class Address:
cdef void* ptr
cdef readonly PyMalloc pymalloc
cdef readonly PyFree pyfree
cymem-2.0.6/cymem/cymem.pyx 0000664 0000000 0000000 00000012730 14134461607 0015630 0 ustar 00root root 0000000 0000000 # cython: embedsignature=True
from cpython.mem cimport PyMem_Malloc, PyMem_Free
from cpython.ref cimport Py_INCREF, Py_DECREF
from libc.string cimport memset
from libc.string cimport memcpy
import warnings
WARN_ZERO_ALLOC = False
cdef class PyMalloc:
cdef void _set(self, malloc_t malloc):
self.malloc = malloc
cdef PyMalloc WrapMalloc(malloc_t malloc):
cdef PyMalloc o = PyMalloc()
o._set(malloc)
return o
cdef class PyFree:
cdef void _set(self, free_t free):
self.free = free
cdef PyFree WrapFree(free_t free):
cdef PyFree o = PyFree()
o._set(free)
return o
Default_Malloc = WrapMalloc(PyMem_Malloc)
Default_Free = WrapFree(PyMem_Free)
cdef class Pool:
"""Track allocated memory addresses, and free them all when the Pool is
garbage collected. This provides an easy way to avoid memory leaks, and
removes the need for deallocation functions for complicated structs.
>>> from cymem.cymem cimport Pool
>>> cdef Pool mem = Pool()
>>> data1 = mem.alloc(10, sizeof(int))
>>> data2 = mem.alloc(12, sizeof(float))
Attributes:
size (size_t): The current size (in bytes) allocated by the pool.
addresses (dict): The currently allocated addresses and their sizes. Read-only.
pymalloc (PyMalloc): The allocator to use (default uses PyMem_Malloc).
pyfree (PyFree): The free to use (default uses PyMem_Free).
"""
def __cinit__(self, PyMalloc pymalloc=Default_Malloc,
PyFree pyfree=Default_Free):
self.size = 0
self.addresses = {}
self.refs = []
self.pymalloc = pymalloc
self.pyfree = pyfree
def __dealloc__(self):
cdef size_t addr
if self.addresses is not None:
for addr in self.addresses:
if addr != 0:
self.pyfree.free(addr)
cdef void* alloc(self, size_t number, size_t elem_size) except NULL:
"""Allocate a 0-initialized number*elem_size-byte block of memory, and
remember its address. The block will be freed when the Pool is garbage
collected. Throw warning when allocating zero-length size and
WARN_ZERO_ALLOC was set to True.
"""
if WARN_ZERO_ALLOC and (number == 0 or elem_size == 0):
warnings.warn("Allocating zero bytes")
cdef void* p = self.pymalloc.malloc(number * elem_size)
if p == NULL:
raise MemoryError("Error assigning %d bytes" % (number * elem_size))
memset(p, 0, number * elem_size)
self.addresses[p] = number * elem_size
self.size += number * elem_size
return p
cdef void* realloc(self, void* p, size_t new_size) except NULL:
"""Resizes the memory block pointed to by p to new_size bytes, returning
a non-NULL pointer to the new block. new_size must be larger than the
original.
If p is not in the Pool or new_size is 0, a MemoryError is raised.
"""
if p not in self.addresses:
raise ValueError("Pointer %d not found in Pool %s" % (p, self.addresses))
if new_size == 0:
raise ValueError("Realloc requires new_size > 0")
assert new_size > self.addresses[p]
cdef void* new_ptr = self.alloc(1, new_size)
if new_ptr == NULL:
raise MemoryError("Error reallocating to %d bytes" % new_size)
memcpy(new_ptr, p, self.addresses[p])
self.free(p)
self.addresses[new_ptr] = new_size
return new_ptr
cdef void free(self, void* p) except *:
"""Frees the memory block pointed to by p, which must have been returned
by a previous call to Pool.alloc. You don't necessarily need to free
memory addresses manually --- you can instead let the Pool be garbage
collected, at which point all the memory will be freed.
If p is not in Pool.addresses, a KeyError is raised.
"""
self.size -= self.addresses.pop(p)
self.pyfree.free(p)
def own_pyref(self, object py_ref):
self.refs.append(py_ref)
cdef class Address:
"""A block of number * size-bytes of 0-initialized memory, tied to a Python
ref-counted object. When the object is garbage collected, the memory is freed.
>>> from cymem.cymem cimport Address
>>> cdef Address address = Address(10, sizeof(double))
>>> d10 = address.ptr
Args:
number (size_t): The number of elements in the memory block.
elem_size (size_t): The size of each element.
Attributes:
ptr (void*): Pointer to the memory block.
addr (size_t): Read-only size_t cast of the pointer.
pymalloc (PyMalloc): The allocator to use (default uses PyMem_Malloc).
pyfree (PyFree): The free to use (default uses PyMem_Free).
"""
def __cinit__(self, size_t number, size_t elem_size,
PyMalloc pymalloc=Default_Malloc, PyFree pyfree=Default_Free):
self.ptr = NULL
self.pymalloc = pymalloc
self.pyfree = pyfree
def __init__(self, size_t number, size_t elem_size):
self.ptr = self.pymalloc.malloc(number * elem_size)
if self.ptr == NULL:
raise MemoryError("Error assigning %d bytes" % number * elem_size)
memset(self.ptr, 0, number * elem_size)
property addr:
def __get__(self):
return self.ptr
def __dealloc__(self):
if self.ptr != NULL:
self.pyfree.free(self.ptr)
cymem-2.0.6/cymem/tests/ 0000775 0000000 0000000 00000000000 14134461607 0015113 5 ustar 00root root 0000000 0000000 cymem-2.0.6/cymem/tests/test_import.py 0000664 0000000 0000000 00000000407 14134461607 0020037 0 ustar 00root root 0000000 0000000 # This is a very weak test, but testing Cython code can be hard. So, at least check
# we can create the object...
from cymem.cymem import Pool, Address
def test_pool():
mem = Pool()
assert mem.size == 0
def test_address():
address = Address(1, 2)
cymem-2.0.6/pyproject.toml 0000664 0000000 0000000 00000000154 14134461607 0015553 0 ustar 00root root 0000000 0000000 [build-system]
requires = [
"setuptools",
"cython>=0.25",
]
build-backend = "setuptools.build_meta"
cymem-2.0.6/requirements.txt 0000664 0000000 0000000 00000000024 14134461607 0016117 0 ustar 00root root 0000000 0000000 cython>=0.25
pytest
cymem-2.0.6/setup.py 0000775 0000000 0000000 00000010501 14134461607 0014351 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
from __future__ import print_function
import io
import os
import sys
import contextlib
from setuptools import Extension, setup
from distutils.command.build_ext import build_ext
from distutils.sysconfig import get_python_inc
from Cython.Build import cythonize
PACKAGES = ["cymem", "cymem.tests"]
MOD_NAMES = ["cymem.cymem"]
# By subclassing build_extensions we have the actual compiler that will be used which is really known only after finalize_options
# http://stackoverflow.com/questions/724664/python-distutils-how-to-get-a-compiler-that-is-going-to-be-used
compile_options = {
"msvc": ["/Ox", "/EHsc"],
"other": ["-O3", "-Wno-strict-prototypes", "-Wno-unused-function"],
}
link_options = {"msvc": [], "other": []}
class build_ext_options:
def build_options(self):
for e in self.extensions:
e.extra_compile_args = compile_options.get(
self.compiler.compiler_type, compile_options["other"]
)
for e in self.extensions:
e.extra_link_args = link_options.get(
self.compiler.compiler_type, link_options["other"]
)
class build_ext_subclass(build_ext, build_ext_options):
def build_extensions(self):
build_ext_options.build_options(self)
build_ext.build_extensions(self)
def clean(path):
for name in MOD_NAMES:
name = name.replace(".", "/")
for ext in [".so", ".html", ".cpp", ".c"]:
file_path = os.path.join(path, name + ext)
if os.path.exists(file_path):
os.unlink(file_path)
@contextlib.contextmanager
def chdir(new_dir):
old_dir = os.getcwd()
try:
os.chdir(new_dir)
sys.path.insert(0, new_dir)
yield
finally:
del sys.path[0]
os.chdir(old_dir)
def setup_package():
root = os.path.abspath(os.path.dirname(__file__))
if len(sys.argv) > 1 and sys.argv[1] == "clean":
return clean(root)
with chdir(root):
with io.open(os.path.join(root, "cymem", "about.py"), encoding="utf8") as f:
about = {}
exec(f.read(), about)
with io.open(os.path.join(root, "README.md"), encoding="utf8") as f:
readme = f.read()
include_dirs = [get_python_inc(plat_specific=True)]
ext_modules = []
for mod_name in MOD_NAMES:
mod_path = mod_name.replace(".", "/") + ".pyx"
ext_modules.append(
Extension(
mod_name, [mod_path], language="c++", include_dirs=include_dirs
)
)
setup(
name="cymem",
zip_safe=False,
packages=PACKAGES,
package_data={"": ["*.pyx", "*.pxd"]},
description=about["__summary__"],
long_description=readme,
long_description_content_type="text/markdown",
author=about["__author__"],
author_email=about["__email__"],
version=about["__version__"],
url=about["__uri__"],
license=about["__license__"],
ext_modules=cythonize(ext_modules, language_level=2),
setup_requires=["cython>=0.25"],
classifiers=[
"Environment :: Console",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
"Operating System :: POSIX :: Linux",
"Operating System :: MacOS :: MacOS X",
"Operating System :: Microsoft :: Windows",
"Programming Language :: Cython",
"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Topic :: Scientific/Engineering",
],
cmdclass={"build_ext": build_ext_subclass},
)
if __name__ == "__main__":
setup_package()