pax_global_header00006660000000000000000000000064145727525570014535gustar00rootroot0000000000000052 comment=ade5bc0f55dc01a06d4146d71f880d3819ad2ee7 flexcache-0.3/000077500000000000000000000000001457275255700133215ustar00rootroot00000000000000flexcache-0.3/.coveragerc000066400000000000000000000002761457275255700154470ustar00rootroot00000000000000[run] source = flexcache omit = flexcache/testsuite/* [report] exclude_lines = @abstractmethod @abc.abstractmethod # Have to re-enable the standard pragma pragma: no cover flexcache-0.3/.github/000077500000000000000000000000001457275255700146615ustar00rootroot00000000000000flexcache-0.3/.github/pull_request_template.md000066400000000000000000000003031457275255700216160ustar00rootroot00000000000000- [ ] Closes # (insert issue number) - [ ] Executed `pre-commit run --all-files` with no errors - [ ] The change is fully covered by automated unit tests - [ ] Added an entry to the CHANGES file flexcache-0.3/.github/workflows/000077500000000000000000000000001457275255700167165ustar00rootroot00000000000000flexcache-0.3/.github/workflows/ci.yml000066400000000000000000000034211457275255700200340ustar00rootroot00000000000000name: CI on: [push, pull_request] jobs: test-linux: strategy: matrix: python-version: [3.9, '3.10', '3.11', '3.12'] runs-on: ubuntu-latest env: TEST_OPTS: "-rfsxEX -s --cov=flexcache --cov-config=.coveragerc" steps: - uses: actions/checkout@v2 with: fetch-depth: 100 - name: Get tags run: git fetch --depth=1 origin +refs/tags/*:refs/tags/* - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Get pip cache dir id: pip-cache run: echo "::set-output name=dir::$(pip cache dir)" - name: Setup caching uses: actions/cache@v2 with: path: ${{ steps.pip-cache.outputs.dir }} key: pip-${{ matrix.python-version }} restore-keys: | pip-${{ matrix.python-version }} - name: Install dependencies run: | pip install .[test] - name: Run Tests run: | pytest $TEST_OPTS - name: Coverage report run: coverage report -m - name: Coveralls Parallel env: COVERALLS_FLAG_NAME: ${{ matrix.test-number }} COVERALLS_PARALLEL: true GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COVERALLS_SERVICE_NAME: github run: | pip install coveralls coveralls coveralls: needs: test-linux runs-on: ubuntu-latest steps: - uses: actions/setup-python@v2 with: python-version: 3.x - name: Coveralls Finished env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COVERALLS_SERVICE_NAME: github run: | pip install coveralls coveralls --finish flexcache-0.3/.github/workflows/lint-autoupdate.yml000066400000000000000000000027011457275255700225600ustar00rootroot00000000000000name: pre-commit on: schedule: - cron: "0 0 * * 0" # every Sunday at 00:00 UTC workflow_dispatch: jobs: autoupdate: name: autoupdate runs-on: ubuntu-latest if: github.repository == 'hgrecco/flexcache' steps: - name: checkout uses: actions/checkout@v2 - name: Cache pip and pre-commit uses: actions/cache@v2 with: path: | ~/.cache/pre-commit ~/.cache/pip key: ${{ runner.os }}-pre-commit-autoupdate - name: setup python uses: actions/setup-python@v2 with: python-version: 3.x - name: upgrade pip run: python -m pip install --upgrade pip - name: install dependencies run: python -m pip install --upgrade pre-commit - name: version info run: python -m pip list - name: autoupdate uses: technote-space/create-pr-action@bfd4392c80dbeb54e0bacbcf4750540aecae6ed4 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} EXECUTE_COMMANDS: | python -m pre_commit autoupdate python -m pre_commit run --all-files COMMIT_MESSAGE: 'pre-commit: autoupdate hook versions' COMMIT_NAME: 'github-actions[bot]' COMMIT_EMAIL: 'github-actions[bot]@users.noreply.github.com' PR_TITLE: 'pre-commit: autoupdate hook versions' PR_BRANCH_PREFIX: 'pre-commit/' PR_BRANCH_NAME: 'autoupdate-${PR_ID}' flexcache-0.3/.github/workflows/lint.yml000066400000000000000000000005201457275255700204040ustar00rootroot00000000000000name: Lint on: [push, pull_request] jobs: lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: python-version: 3.x - name: Lint uses: pre-commit/action@v2.0.0 with: extra_args: --all-files --show-diff-on-failure flexcache-0.3/.github/workflows/publish.yml000066400000000000000000000007651457275255700211170ustar00rootroot00000000000000name: Build and publish to PyPI on: push: tags: - '*' jobs: publish: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: '3.x' - name: Install dependencies run: python -m pip install build - name: Build package run: python -m build - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.PYPI_API_TOKEN }} flexcache-0.3/.gitignore000066400000000000000000000003531457275255700153120ustar00rootroot00000000000000*~ __pycache__ *egg-info* *.pyc .DS_Store docs/_build/ .idea .vscode build/ dist/ MANIFEST *pytest_cache* .eggs # WebDAV file system cache files .DAV/ # tags files (from ctags) tags .coverage* # notebook stuff *.ipynb_checkpoints* flexcache-0.3/.pre-commit-config.yaml000066400000000000000000000014541457275255700176060ustar00rootroot00000000000000exclude: '^pint/_vendor' repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/psf/black rev: 23.1.0 hooks: - id: black - id: black-jupyter - repo: https://github.com/charliermarsh/ruff-pre-commit rev: 'v0.0.240' hooks: - id: ruff args: ["--fix"] - repo: https://github.com/executablebooks/mdformat rev: 0.7.16 hooks: - id: mdformat additional_dependencies: - mdformat-gfm # GitHub-flavored Markdown - mdformat-black - repo: https://github.com/kynan/nbstripout rev: 0.6.1 hooks: - id: nbstripout args: [--extra-keys=metadata.kernelspec metadata.language_info.version] flexcache-0.3/AUTHORS000066400000000000000000000001231457275255700143650ustar00rootroot00000000000000flexcache is written and maintained by Hernan E. Grecco . flexcache-0.3/BADGES.rst000066400000000000000000000016171457275255700150050ustar00rootroot00000000000000.. image:: https://img.shields.io/pypi/v/flexcache.svg :target: https://pypi.python.org/pypi/flexcache :alt: Latest Version .. image:: https://img.shields.io/pypi/l/flexcache.svg :target: https://pypi.python.org/pypi/flexcache :alt: License .. image:: https://img.shields.io/pypi/pyversions/flexcache.svg :target: https://pypi.python.org/pypi/flexcache :alt: Python Versions .. image:: https://github.com/hgrecco/flexcache/workflows/CI/badge.svg :target: https://github.com/hgrecco/flexcache/actions?query=workflow%3ACI :alt: CI .. image:: https://github.com/hgrecco/flexcache/workflows/Lint/badge.svg :target: https://github.com/hgrecco/flexcache/actions?query=workflow%3ALint :alt: LINTER .. image:: https://coveralls.io/repos/github/hgrecco/flexcache/badge.svg?branch=main :target: https://coveralls.io/github/hgrecco/flexcache?branch=main :alt: Coverage flexcache-0.3/CHANGES000066400000000000000000000012661457275255700143210ustar00rootroot00000000000000flexcache Changelog =================== 0.3 (2023-08-03) ---------------- - Better typing support - Tests against Python 3.11 and 3.12 - Updated package infrastructure. 0.2 (2022-02-28) ---------------- - Split header into Header, Invalidate and Naming classes. - Renamed reader to converter, in docs and variables. - Allow passing directly a converter_id as string. - Allow converters with one (source_object) or two parameters (source_object, cache_path_stem). - Added NameByHashIter. - Added type checking on source_object using __post_init__. - Added docstrings. - Added checks to fail fast when creating Header classes. 0.1 (2022-02-27) ---------------- - first public release. flexcache-0.3/LICENSE000066400000000000000000000030601457275255700143250ustar00rootroot00000000000000Copyright (c) 2022 by Hernan E. Grecco and contributors. See AUTHORS for more details. Some rights reserved. Redistribution and use in source and binary forms of the software as well as documentation, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flexcache-0.3/MANIFEST.in000066400000000000000000000003371457275255700150620ustar00rootroot00000000000000include AUTHORS CHANGES LICENSE README.rst BADGES.rst version.py include *.yaml recursive-include flexcache *.py exclude pull_request_template.md version.py .coveragerc global-exclude *.pyc *~ .DS_Store *__pycache__* *.pyo flexcache-0.3/README.rst000066400000000000000000000127411457275255700150150ustar00rootroot00000000000000.. image:: https://img.shields.io/pypi/v/flexcache.svg :target: https://pypi.python.org/pypi/flexcache :alt: Latest Version .. image:: https://img.shields.io/pypi/l/flexcache.svg :target: https://pypi.python.org/pypi/flexcache :alt: License .. image:: https://img.shields.io/pypi/pyversions/flexcache.svg :target: https://pypi.python.org/pypi/flexcache :alt: Python Versions .. image:: https://github.com/hgrecco/flexcache/workflows/CI/badge.svg :target: https://github.com/hgrecco/flexcache/actions?query=workflow%3ACI :alt: CI .. image:: https://github.com/hgrecco/flexcache/workflows/Lint/badge.svg :target: https://github.com/hgrecco/flexcache/actions?query=workflow%3ALint :alt: LINTER .. image:: https://coveralls.io/repos/github/hgrecco/flexcache/badge.svg?branch=main :target: https://coveralls.io/github/hgrecco/flexcache?branch=main :alt: Coverage flexcache ========= An robust and extensible package to cache on disk the result of expensive calculations. Consider an expensive function `parse` that takes a path and returns a parsed version: .. code-block:: python >>> content = parse("source.txt") It would be nice to automatically and persistently cache this result and this is where flexcache comes in. First, we create a `DiskCache` object: .. code-block:: python >>> from flexcache import DiskCacheByMTime >>> dc = DiskCacheByMTime(cache_folder="/my/cache/folder") and then is loaded: .. code-block:: python >>> content, basename = dc.load("source.txt", converter=parse) If this is the first call, as the cached result is not available, `parse` will be called on `source.txt` and the output will be saved and returned. The next time, the cached will be loaded and returned. When the source is changed, the DiskCache detects that the cached file is older, calls `parse` again storing and returning the new result. In certain cases you would rather detect that the file has changed by hashing the file. Simply use `DiskCacheByHash` instead of `DiskCacheByMTime`. Cached files are saved using the pickle protocol, and each has a companion json file with the header content. This idea is completely flexible, and apply not only to parser. In **flexcache** we say there are two types of objects: **source object** and **converted object**. The conversion function maps the former in to the latter. The cache stores the latter by looking a customizable aspect of the former. Building your own caching logic ------------------------------- In certain cases you would like to customize how caching and invalidation is done. You can achieve this by subclassing the `DiskCache`. .. code-block:: python >>> from flexcache import DiskCache >>> class MyDiskCache(DiskCache): ... ... @dataclass(frozen=True) ... class MyHeader(NameByPathHeader, InvalidateByExist, BasicPythonHeader): ... pass ... ... _header_classes = {pathlib.Path: MyHeader} Here we created a custom Header class and use it to handle `pathlib.Path` objects. You can even have multiple headers registered in the same class to handle different source object types. We provide a convenient set of mixable classes to achieve almost any behavior. These are divided in three categories and you must choose at least one from every kind. Headers ~~~~~~~ These classes store the information that will be saved along side the cached file. - **BaseHeader**: source object and identifier of the converter function. - **BasicPythonHeader**: source and identifier of the converter function, platform, python implementation, python version. Invalidate ~~~~~~~~~~ These classes define how the cache will decide if the cached converted object is an actual representation of the source object. - **InvalidateByExist**: the cached file must exists. - **InvalidateByPathMTime**: the cached file exists and is newer than the source object (which has to be `pathlib.Path`) - **InvalidateByMultiPathsMtime**: the cached file exists and is newer than the each path in the source object (which has to be `tuple[pathlib.Path]`) Naming ~~~~~~ These classes define how the name is generated. The basename for the cache file is a hash hexdigest built by feeding a collection of values determined by the Header object. - **NameByFields**: all fields except the `source_object`. - **NameByPath**: resolved path of the source object (which has to be `pathlib.Path`). - **NameByMultiPaths**: resolved path of each path source object (which has to be `tuple[pathlib.Path]`), sorted in ascending order. - **NameByFileContent**: the bytes content of the file referred by the source object (which has to be `pathlib.Path`). - **NameByHashIter**: the values in the source object. (which has to be `tuple[str]`), sorted in ascending order - **NameByObj**: the pickled version of the source object (which has to be pickable), using the highest available protocol. This also adds `pickle_protocol` to the header. You can mix and match as you see it fit, and of course, you can make your own. Finally, you can also avoid saving the header by setting the `_store_header` class attribute to `False`. ---- This project was started as a part of Pint_, the python units package. See AUTHORS_ for a list of the maintainers. To review an ordered list of notable changes for each version of a project, see CHANGES_ .. _`AUTHORS`: https://github.com/hgrecco/flexcache/blob/main/AUTHORS .. _`CHANGES`: https://github.com/hgrecco/flexcache/blob/main/CHANGES .. _`Pint`: https://github.com/hgrecco/pint flexcache-0.3/flexcache/000077500000000000000000000000001457275255700152435ustar00rootroot00000000000000flexcache-0.3/flexcache/__init__.py000066400000000000000000000024541457275255700173610ustar00rootroot00000000000000""" flexcache ~~~~~~~~~ Classes for persistent caching and invalidating cached objects, which are built from a source object and a (potentially expensive) conversion function. :copyright: 2022 by flexcache Authors, see AUTHORS for more details. :license: BSD, see LICENSE for more details. """ from __future__ import annotations from importlib.metadata import version try: # pragma: no cover __version__ = version("flexcache") except Exception: # pragma: no cover # we seem to have a local copy not installed without setuptools # so the reported version will be unknown __version__ = "unknown" from .flexcache import ( BaseHeader, BasicPythonHeader, DiskCache, DiskCacheByHash, DiskCacheByMTime, InvalidateByExist, InvalidateByMultiPathsMtime, InvalidateByPathMTime, NameByFields, NameByFileContent, NameByHashIter, NameByMultiPaths, NameByObj, NameByPath, ) __all__ = ( "__version__", "BaseHeader", "BasicPythonHeader", "NameByFields", "NameByFileContent", "NameByObj", "NameByPath", "NameByMultiPaths", "NameByHashIter", "DiskCache", "DiskCacheByHash", "DiskCacheByMTime", "InvalidateByExist", "InvalidateByPathMTime", "InvalidateByMultiPathsMtime", ) flexcache-0.3/flexcache/flexcache.py000066400000000000000000000344051457275255700175450ustar00rootroot00000000000000""" flexcache.flexcache ~~~~~~~~~~~~~~~~~~~ Classes for persistent caching and invalidating cached objects, which are built from a source object and a (potentially expensive) conversion function. Header ------ Contains summary information about the source object that will be saved together with the cached file. It's capabilities are divided in three groups: - The Header itself which contains the information that will be saved alongside the cached file - The Naming logic which indicates how the cached filename is built. - The Invalidation logic which indicates whether a cached file is valid (i.e. truthful to the actual source file). DiskCache --------- Saves and loads to the cache a transformed versions of a source object. :copyright: 2022 by flexcache Authors, see AUTHORS for more details. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import abc import hashlib import json import pathlib import pickle import platform import sys from dataclasses import asdict as dc_asdict from dataclasses import dataclass from dataclasses import fields as dc_fields from typing import Any, Iterable, Union, Generator, Optional, Callable, ClassVar if sys.version_info >= (3, 10): from typing import TypeAlias # noqa else: from typing_extensions import TypeAlias # noqa if sys.version_info >= (3, 11): from typing import Self # noqa else: from typing_extensions import Self # noqa Pickable: TypeAlias = Any Converter1: TypeAlias = Callable[ [ Any, ], Any, ] Converter2: TypeAlias = Callable[[Any, str], Any] Converter: TypeAlias = Union[Converter1, Converter2] ######### # Header ######### @dataclass(frozen=True) class BaseHeader(abc.ABC): """Header with no information except the converter_id All header files must inherit from this. """ # The actual source of the data (or a reference to it) # that is going to be converted. source: Any # An identification of the function that is used to # convert the source into the result object. converter_id: str _source_type: ClassVar[Any] = object def __post_init__(self): # TODO: In more modern python versions it would be # good to check for things like tuple[str]. if not isinstance(self.source, self._source_type): raise TypeError( f"Source must be {self._source_type}, " f"not {type(self.source)}" ) def for_cache_name(self) -> Generator[bytes, None, None]: """The basename for the cache file is a hash hexdigest built by feeding this collection of values. A class can provide it's own set of values by rewriting `_for_cache_name`. """ for el in self._for_cache_name(): if isinstance(el, str): yield el.encode("utf-8") else: yield el def _for_cache_name(self) -> Generator[Union[bytes, str], None, None]: """The basename for the cache file is a hash hexdigest built by feeding this collection of values. Change the behavior by writing your own. """ yield self.converter_id @abc.abstractmethod def is_valid(self, cache_path: pathlib.Path) -> bool: """Return True if the cache_path is an cached version of the source_object represented by this header. """ HeaderBuilder: TypeAlias = Union[type[BaseHeader], Callable[[Any, str], BaseHeader]] @dataclass(frozen=True) class BasicPythonHeader(BaseHeader): """Header with basic Python information.""" system: str = platform.system() python_implementation: str = platform.python_implementation() python_version: str = platform.python_version() ##################### # Invalidation logic ##################### @dataclass(frozen=True) class InvalidateByExist(BaseHeader): """The cached file is valid if exists and is newer than the source file.""" def is_valid(self, cache_path: pathlib.Path) -> bool: return cache_path.exists() @dataclass(frozen=True) class InvalidateByPathMTime(BaseHeader): """The cached file is valid if exists and is newer than the source file.""" @property @abc.abstractmethod def source_path(self) -> pathlib.Path: ... def is_valid(self, cache_path: pathlib.Path) -> bool: return ( cache_path.exists() and cache_path.stat().st_mtime > self.source_path.stat().st_mtime ) @dataclass(frozen=True) class InvalidateByMultiPathsMtime(BaseHeader): """The cached file is valid if exists and is newer than the newest source file.""" @property @abc.abstractmethod def source_paths(self) -> tuple[pathlib.Path, ...]: ... @property def newest_date(self) -> float: return max((p.stat().st_mtime for p in self.source_paths), default=0.0) def is_valid(self, cache_path: pathlib.Path) -> bool: return cache_path.exists() and cache_path.stat().st_mtime > self.newest_date ############### # Naming logic ############### @dataclass(frozen=True) class NameByFields(BaseHeader): """Name is built taking into account all fields in the Header (except the source itself). """ def _for_cache_name(self) -> Generator[Any, None, None]: yield from super()._for_cache_name() for field in dc_fields(self): if field.name not in ("source", "converter_id"): yield getattr(self, field.name) @dataclass(frozen=True) class NameByFileContent(BaseHeader): """Given a file source object, the name is built from its content.""" _source_type = pathlib.Path @property def source_path(self) -> pathlib.Path: return self.source def _for_cache_name(self) -> Generator[Any, None, None]: yield from super()._for_cache_name() yield self.source_path.read_bytes() @classmethod def from_string(cls, s: str, converter_id: str) -> Self: return cls(pathlib.Path(s), converter_id) @dataclass(frozen=True) class NameByObj(BaseHeader): """Given a pickable source object, the name is built from its content.""" pickle_protocol: int = pickle.HIGHEST_PROTOCOL def _for_cache_name(self) -> Generator[Any, None, None]: yield from super()._for_cache_name() yield pickle.dumps(self.source, protocol=self.pickle_protocol) @dataclass(frozen=True) class NameByPath(BaseHeader): """Given a file source object, the name is built from its resolved path.""" _source_type = pathlib.Path @property def source_path(self) -> pathlib.Path: return self.source def _for_cache_name(self) -> Generator[Any, None, None]: yield from super()._for_cache_name() yield bytes(self.source_path.resolve()) @classmethod def from_string(cls, s: str, converter_id: str) -> Self: return cls(pathlib.Path(s), converter_id) @dataclass(frozen=True) class NameByMultiPaths(BaseHeader): """Given multiple file source object, the name is built from their resolved path in ascending order. """ _source_type = tuple @property def source_paths(self) -> tuple[pathlib.Path, ...]: return self.source def _for_cache_name(self) -> Generator[Any, None, None]: yield from super()._for_cache_name() yield from sorted(bytes(p.resolve()) for p in self.source_paths) @classmethod def from_strings(cls, ss: Iterable[str], converter_id: str): return cls(tuple(pathlib.Path(s) for s in ss), converter_id) @dataclass(frozen=True) class NameByHashIter(BaseHeader): """Given multiple hashes, the name is built from them in ascending order.""" _source_type = tuple def _for_cache_name(self) -> Generator[Any, None, None]: yield from super()._for_cache_name() yield from sorted(h for h in self.source) class DiskCache: """A class to store and load cached objects to disk, which are built from a source object and conversion function. The basename for the cache file is a hash hexdigest built by feeding a collection of values determined by the Header object. Parameters ---------- cache_folder indicates where the cache files will be saved. """ # Maps classes to header class _header_classes: dict[type, HeaderBuilder] # Hasher object constructor (e.g. a member of hashlib) # must implement update(b: bytes) and hexdigest() methods _hasher = hashlib.sha1 # If True, for each cached file the header is also stored. _store_header: bool = True def __init__(self, cache_folder: str | pathlib.Path): self.cache_folder = pathlib.Path(cache_folder) self.cache_folder.mkdir(parents=True, exist_ok=True) self._header_classes = getattr(self, "_header_classes", {}) def register_header_class(self, object_class: type, header_class: HeaderBuilder): self._header_classes[object_class] = header_class def cache_stem_for(self, header: BaseHeader) -> str: """Generate a hash representing the basename of a memoized file for a given header. The naming strategy is defined by the header class used. """ hd = self._hasher() for value in header.for_cache_name(): hd.update(value) return hd.hexdigest() def cache_path_for(self, header: BaseHeader) -> pathlib.Path: """Generate a Path representing the location of a memoized file for a given filepath or object. The naming strategy is defined by the header class used. """ h = self.cache_stem_for(header) return self.cache_folder.joinpath(h).with_suffix(".pickle") def _get_header_class(self, source_object: Any) -> HeaderBuilder: for k, v in self._header_classes.items(): if isinstance(source_object, k): return v raise TypeError(f"Cannot find header class for {type(source_object)}") def load( self, source_object: Any, converter: Optional[Union[str, Converter]] = None, pass_hash: bool = False, ) -> tuple[Any, str]: """Given a source_object, return the converted value stored in the cache together with the cached path stem When the cache is not found: - If a converter callable is given, use it on the source object, store the result in the cache and return it. - Return None, otherwise. Two signatures for the converter are valid: - source_object -> transformed object - (source_object, cached_path_stem) -> transformed_object To use the second one, use `pass_hash=True`. If you want to do the conversion yourself outside this class, use the converter argument to provide a name for it. This is important as the cached_path_stem depends on the converter name. """ header_class = self._get_header_class(source_object) converter_id: str if isinstance(converter, str): converter_id = converter converter = None else: converter_id = getattr(converter, "__name__", "") header = header_class(source_object, converter_id) cache_path = self.cache_path_for(header) converted_object = self.rawload(header, cache_path) if converted_object: return converted_object, cache_path.stem if converter is None: return None, cache_path.stem if pass_hash: converted_object = converter(source_object, cache_path.stem) else: converted_object = converter(source_object) self.rawsave(header, converted_object, cache_path) return converted_object, cache_path.stem def save( self, converted_object: Pickable, source_object: Any, converter_id: str = "" ) -> str: """Given a converted_object and its corresponding source_object, store it in the cache and return the cached_path_stem. """ header_class = self._get_header_class(source_object) header = header_class(source_object, converter_id) return self.rawsave(header, converted_object, self.cache_path_for(header)).stem def rawload( self, header: BaseHeader, cache_path: Optional[pathlib.Path] = None ) -> Optional[Pickable]: """Load the converted_object from the cache if it is valid. The invalidating strategy is defined by the header class used. The cache_path is optional, it will be calculated from the header if not given. """ if cache_path is None: cache_path = self.cache_path_for(header) if header.is_valid(cache_path): with cache_path.open(mode="rb") as fi: return pickle.load(fi) return None def rawsave( self, header: BaseHeader, converted_object: Pickable, cache_path: Optional[pathlib.Path] = None, ) -> pathlib.Path: """Save the converted object (in pickle format) and its header (in json format) to the cache folder. The cache_path is optional, it will be calculated from the header if not given. """ if cache_path is None: cache_path = self.cache_path_for(header) if self._store_header: with cache_path.with_suffix(".json").open("w", encoding="utf-8") as fo: json.dump({k: str(v) for k, v in dc_asdict(header).items()}, fo) with cache_path.open(mode="wb") as fo: pickle.dump(converted_object, fo) return cache_path class DiskCacheByHash(DiskCache): """Convenience class used for caching conversions that take a path, naming by hashing its content. """ @dataclass(frozen=True) class Header(NameByFileContent, InvalidateByExist, BaseHeader): pass _header_classes: dict[type, HeaderBuilder] = { pathlib.Path: Header, str: Header.from_string, } class DiskCacheByMTime(DiskCache): """Convenience class used for caching conversions that take a path, naming by hashing its full path and invalidating by the file modification time. """ @dataclass(frozen=True) class Header(NameByPath, InvalidateByPathMTime, BaseHeader): pass _header_classes: dict[type, HeaderBuilder] = { pathlib.Path: Header, str: Header.from_string, } flexcache-0.3/flexcache/py.typed000066400000000000000000000000001457275255700167300ustar00rootroot00000000000000flexcache-0.3/flexcache/testsuite/000077500000000000000000000000001457275255700172745ustar00rootroot00000000000000flexcache-0.3/flexcache/testsuite/__init__.py000066400000000000000000000000001457275255700213730ustar00rootroot00000000000000flexcache-0.3/flexcache/testsuite/test_byhash.py000066400000000000000000000060251457275255700221660ustar00rootroot00000000000000import pathlib import pickle import time from flexcache import DiskCacheByHash # These sleep time is needed when run on GitHub Actions # If not given or too short, some mtime changes are not visible. FS_SLEEP = 0.010 def parser(p: pathlib.Path): return p.read_bytes() def test_file_changed(tmp_path): # Generate a definition file dfile = tmp_path / "definitions.txt" dfile.write_bytes(b"1234") dc = DiskCacheByHash(tmp_path) content = dc.load(dfile)[0] assert len(tuple(tmp_path.glob("*.pickle"))) == 0 assert len(tuple(tmp_path.glob("*.json"))) == 0 time.sleep(FS_SLEEP) # First, the cache should be missed assert content is None dc.save(pickle.dumps(dfile.read_bytes()), dfile) # There should be a cache file now assert len(tuple(tmp_path.glob("*.pickle"))) == 1 assert len(tuple(tmp_path.glob("*.json"))) == 1 content = pickle.loads(dc.load(dfile)[0]) # Now, the cache should be hit assert content == b"1234" # Modify the definition file # Add some sleep to make sure that the time stamp difference is significant. dfile.write_bytes(b"1235") # Verify that the cache was not loaded as the content of the original file # has changed. assert dc.load(dfile)[0] is None def test_cache_miss(tmp_path): # Generate a definition file dfile = tmp_path / "definitions.txt" dfile.write_bytes(b"1234") dc = DiskCacheByHash(tmp_path) content = dc.load(dfile)[0] assert len(tuple(tmp_path.glob("*.pickle"))) == 0 assert len(tuple(tmp_path.glob("*.json"))) == 0 time.sleep(FS_SLEEP) # First, the cache should be missed assert content is None dc.save(pickle.dumps(dfile.read_bytes()), dfile) # There should be a cache file now assert len(tuple(tmp_path.glob("*.pickle"))) == 1 assert len(tuple(tmp_path.glob("*.json"))) == 1 content = pickle.loads(dc.load(dfile)[0]) # Now, the cache should be hit assert content == b"1234" # Modify the definition file # Add some sleep to make sure that the time stamp difference is significant. time.sleep(FS_SLEEP) dfile.write_bytes(b"1235") # Verify that the cached was not loaded content = dc.load(dfile)[0] assert content is None def test_func(tmp_path): # Generate a definition file dfile = tmp_path / "definitions.txt" dfile.write_bytes(b"1234") dc = DiskCacheByHash(tmp_path) assert dc.load(dfile, converter=parser)[0] == b"1234" # There should be a cache file now assert len(tuple(tmp_path.glob("*.pickle"))) == 1 assert len(tuple(tmp_path.glob("*.json"))) == 1 # Modify the definition file # Add some sleep to make sure that the time stamp difference is significant. dfile.write_bytes(b"1235") # Verify that the cache was not loaded as the content of the original file # has changed. assert dc.load(dfile, converter=parser)[0] == b"1235" # There should be TWO cache files now assert len(tuple(tmp_path.glob("*.pickle"))) == 2 assert len(tuple(tmp_path.glob("*.json"))) == 2 flexcache-0.3/flexcache/testsuite/test_bymtime.py000066400000000000000000000063271457275255700223630ustar00rootroot00000000000000import pathlib import pickle import time from flexcache import DiskCacheByMTime # These sleep time is needed when run on GitHub Actions # If not given or too short, some mtime changes are not visible. FS_SLEEP = 0.010 def parser(p: pathlib.Path): return p.read_bytes() def test_new_cache_date(tmp_path): # Generate a definition file dfile = tmp_path / "definitions.txt" dfile.write_bytes(b"1234") dc = DiskCacheByMTime(tmp_path) content = dc.load(dfile)[0] time.sleep(FS_SLEEP) assert len(tuple(tmp_path.glob("*.pickle"))) == 0 assert len(tuple(tmp_path.glob("*.json"))) == 0 # First, the cache should be missed assert content is None dc.save(pickle.dumps(dfile.read_bytes()), dfile) # There should be a cache file now assert len(tuple(tmp_path.glob("*.pickle"))) == 1 assert len(tuple(tmp_path.glob("*.json"))) == 1 content = pickle.loads(dc.load(dfile)[0]) # Now, the cache should be hit assert content == b"1234" # Modify the definition file # Add some sleep to make sure that the time stamp difference is significant. time.sleep(FS_SLEEP) dfile.write_bytes(b"1235") time.sleep(FS_SLEEP) # Modify the time stamp of the old cache files so they are newer for p in tmp_path.glob("*.pickle"): p.touch() # Verify that the cached file (not the definition file), was loaded. d = pickle.loads(dc.load(dfile)[0]) assert d == b"1234" def test_file_changed(tmp_path): # Generate a definition file dfile = tmp_path / "definitions.txt" dfile.write_bytes(b"1234") dc = DiskCacheByMTime(tmp_path) content = dc.load(dfile)[0] assert len(tuple(tmp_path.glob("*.pickle"))) == 0 assert len(tuple(tmp_path.glob("*.json"))) == 0 time.sleep(FS_SLEEP) # First, the cache should be missed assert content is None dc.save(pickle.dumps(dfile.read_bytes()), dfile) # There should be a cache file now assert len(tuple(tmp_path.glob("*.pickle"))) == 1 assert len(tuple(tmp_path.glob("*.json"))) == 1 content = pickle.loads(dc.load(dfile)[0]) # Now, the cache should be hit assert content == b"1234" # Modify the definition file # Add some sleep to make sure that the time stamp difference is significant. time.sleep(FS_SLEEP) dfile.write_bytes(b"1235") # Verify that the cached was not loaded content = dc.load(dfile)[0] assert content is None def test_func(tmp_path): # Generate a definition file dfile = tmp_path / "definitions.txt" dfile.write_bytes(b"1234") dc = DiskCacheByMTime(tmp_path) assert dc.load(dfile, converter=parser)[0] == b"1234" # There should be a cache file now assert len(tuple(tmp_path.glob("*.pickle"))) == 1 assert len(tuple(tmp_path.glob("*.json"))) == 1 # Modify the definition file # Add some sleep to make sure that the time stamp difference is significant. dfile.write_bytes(b"1235") # Verify that the cache was not loaded as the content of the original file # has changed. assert dc.load(dfile, converter=parser)[0] == b"1235" # There should be still ONE cache files now assert len(tuple(tmp_path.glob("*.pickle"))) == 1 assert len(tuple(tmp_path.glob("*.json"))) == 1 flexcache-0.3/flexcache/testsuite/test_custom_diskcache.py000066400000000000000000000040171457275255700242170ustar00rootroot00000000000000import pytest import flexcache from flexcache import DiskCache def test_register(tmp_path): c = DiskCache(tmp_path) class Header( flexcache.InvalidateByExist, flexcache.NameByFields, flexcache.BaseHeader ): @classmethod def from_int(cls, source, converter_id): return cls(bytes(source), converter_id) c.register_header_class(int, Header.from_int) c.load(3) with pytest.raises(TypeError): c.load(3j) def test_missing_cache_path(tmp_path): c = DiskCache(tmp_path) class Header( flexcache.InvalidateByExist, flexcache.NameByFields, flexcache.BaseHeader ): @classmethod def from_int(cls, source, converter_id): return cls(bytes(source), converter_id) hdr = Header("123", "456") assert c.rawsave(hdr, "789").stem == c.cache_stem_for(hdr) assert c.rawload(hdr) == "789" def test_converter_id(tmp_path): c = DiskCache(tmp_path) class Header( flexcache.InvalidateByExist, flexcache.NameByFields, flexcache.BaseHeader ): @classmethod def from_int(cls, source, converter_id): return cls(bytes(source), converter_id) c.register_header_class(int, Header.from_int) def func(n): return n * 2 content, this_hash = c.load(21, func) assert content == 42 assert c.load(21, "func") == (content, this_hash) assert c.save(content, 21, "func") == this_hash def test_converter_pass_hash(tmp_path): c = DiskCache(tmp_path) class Header( flexcache.InvalidateByExist, flexcache.NameByFields, flexcache.BaseHeader ): @classmethod def from_int(cls, source, converter_id): return cls(bytes(source), converter_id) c.register_header_class(int, Header.from_int) def func(n, a_hash): return (n, a_hash) content, this_hash = c.load(21, func, True) assert content == (21, this_hash) assert c.load(21, "func", True) == (content, this_hash) assert c.save(content, 21, "func") == this_hash flexcache-0.3/flexcache/testsuite/test_header.py000066400000000000000000000113721457275255700221410ustar00rootroot00000000000000import json import pathlib import pickle import time from dataclasses import asdict as dc_asdict from dataclasses import dataclass import pytest import flexcache # These sleep time is needed when run on GitHub Actions # If not given or too short, some mtime changes are not visible. FS_SLEEP = 0.010 def test_empty(tmp_path): @dataclass(frozen=True) class Hdr( flexcache.InvalidateByExist, flexcache.NameByFields, flexcache.BaseHeader ): pass hdr = Hdr("123", "myconverter") assert tuple(hdr.for_cache_name()) == ("myconverter".encode("utf-8"),) p1 = tmp_path / "cache.pickle" assert not hdr.is_valid(p1) p1.touch() assert hdr.is_valid(p1) try: json.dumps({k: str(v) for k, v in dc_asdict(hdr).items()}) except Exception: assert False def test_basic_python(): @dataclass(frozen=True) class Hdr( flexcache.InvalidateByExist, flexcache.NameByFields, flexcache.BasicPythonHeader ): pass hdr = Hdr("123", "myconverter") cn = tuple(hdr.for_cache_name()) assert len(cn) == 4 try: json.dumps({k: str(v) for k, v in dc_asdict(hdr).items()}) except Exception: assert False def test_name_by_content(tmp_path): @dataclass(frozen=True) class Hdr( flexcache.InvalidateByExist, flexcache.NameByFileContent, flexcache.BaseHeader ): pass p = tmp_path / "source.txt" p.write_bytes(b"1234") hdr = Hdr(p, "myconverter") assert hdr.source_path == p cn = tuple(hdr.for_cache_name()) assert len(cn) == 2 assert cn[1] == b"1234" try: json.dumps({k: str(v) for k, v in dc_asdict(hdr).items()}) except Exception: assert False def test_name_by_path(tmp_path): @dataclass(frozen=True) class Hdr( flexcache.NameByPath, flexcache.InvalidateByPathMTime, flexcache.BaseHeader ): pass p = tmp_path / "source.txt" p.write_bytes(b"1234") hdr = Hdr(p, "myconverter") assert hdr.source_path == p cn = tuple(hdr.for_cache_name()) assert len(cn) == 2 assert cn[1] == bytes(p.resolve()) try: json.dumps({k: str(v) for k, v in dc_asdict(hdr).items()}) except Exception: assert False def test_name_by_paths(tmp_path): @dataclass(frozen=True) class Hdr( flexcache.NameByMultiPaths, flexcache.InvalidateByMultiPathsMtime, flexcache.BaseHeader, ): pass p0 = tmp_path / "source0.txt" p0.touch() time.sleep(FS_SLEEP) p1 = tmp_path / "source1.txt" p2 = tmp_path / "source2.txt" p1.write_bytes(b"1234") p2.write_bytes(b"1234") hdr = Hdr((p1, p2), "myconverter") time.sleep(FS_SLEEP) p3 = tmp_path / "source3.txt" p3.touch() cn = tuple(hdr.for_cache_name()) assert len(cn) == 3 assert cn[1] == bytes(p1.resolve()) assert cn[2] == bytes(p2.resolve()) try: json.dumps({k: str(v) for k, v in dc_asdict(hdr).items()}) except Exception: assert False assert not hdr.is_valid(tmp_path / "not.txt") assert not hdr.is_valid(p0) assert hdr.is_valid(p3) hdr = Hdr.from_strings((str(p1), str(p2)), "myconverter") assert hdr.source_paths == (p1, p2) def test_name_by_obj(tmp_path): @dataclass(frozen=True) class Hdr(flexcache.InvalidateByExist, flexcache.NameByObj, flexcache.BaseHeader): pass hdr = Hdr((1, 2, 3), "myconverter") cn = tuple(hdr.for_cache_name()) assert len(cn) == 2 assert hdr.pickle_protocol == pickle.HIGHEST_PROTOCOL assert cn[1] == pickle.dumps((1, 2, 3), hdr.pickle_protocol) try: json.dumps({k: str(v) for k, v in dc_asdict(hdr).items()}) except Exception: assert False def test_name_by_hash(tmp_path): @dataclass(frozen=True) class Hdr( flexcache.InvalidateByExist, flexcache.NameByHashIter, flexcache.BaseHeader ): pass hdr = Hdr(("b", "a", "c"), "myconverter") cn = tuple(hdr.for_cache_name()) assert len(cn) == 4 assert cn[1] == b"a" assert cn[2] == b"b" assert cn[3] == b"c" try: json.dumps({k: str(v) for k, v in dc_asdict(hdr).items()}) except Exception: assert False def test_predefined_headers(tmp_path): fn = "source.txt" hdr0 = flexcache.DiskCacheByMTime.Header.from_string(fn, "123") assert isinstance(hdr0.source_path, pathlib.Path) assert str(hdr0.source_path) == fn hdr1 = flexcache.DiskCacheByHash.Header.from_string(fn, "123") assert isinstance(hdr1.source_path, pathlib.Path) assert str(hdr1.source_path) == fn def test_wrong_type(): @dataclass(frozen=True) class Hdr(flexcache.NameByPath, flexcache.InvalidateByPathMTime): pass with pytest.raises(TypeError): Hdr("testing", "my_converter") flexcache-0.3/pyproject.toml000066400000000000000000000036571457275255700162500ustar00rootroot00000000000000[project] name = "flexcache" authors = [ {name="Hernan E. Grecco", email="hernan.grecco@gmail.com"} ] license = {text = "BSD"} description = "Saves and loads to the cache a transformed versions of a source object." readme = "README.rst" maintainers = [ {name="Hernan E. Grecco", email="hernan.grecco@gmail.com"}, ] keywords = ["cache", "optimization", "storage", "disk"] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: BSD License", "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX", "Programming Language :: Python", "Topic :: System :: Filesystems", "Topic :: Software Development :: Libraries", "Topic :: Utilities", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11" ] requires-python = ">=3.9" dynamic = ["version"] # Version is taken from git tags using setuptools_scm dependencies = [ "typing_extensions" ] [tool.setuptools.package-data] flexcache = ["py.typed"] [project.optional-dependencies] test = [ "pytest", "pytest-mpl", "pytest-cov", "pytest-subtests" ] [project.urls] Homepage = "https://github.com/hgrecco/flexcache" [tool.setuptools] packages = ["flexcache"] [build-system] requires = ["setuptools>=61", "wheel", "setuptools_scm[toml]>=3.4.3"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] [tool.ruff.isort] required-imports = ["from __future__ import annotations"] known-first-party= ["pint"] [tool.ruff] ignore = [ # whitespace before ':' - doesn't work well with black # "E203", "E402", # line too long - let black worry about that "E501", # do not assign a lambda expression, use a def "E731", # line break before binary operator # "W503" ] extend-exclude = ["build"] line-length=88 flexcache-0.3/setup.py000066400000000000000000000001341457275255700150310ustar00rootroot00000000000000#!/usr/bin/env python3 from setuptools import setup if __name__ == "__main__": setup()