pax_global_header00006660000000000000000000000064145570541340014522gustar00rootroot0000000000000052 comment=e2be0c2f8e7c61ab5e2611dd58748f803f520d4d cachecontrol-0.14.0/000077500000000000000000000000001455705413400142505ustar00rootroot00000000000000cachecontrol-0.14.0/.github/000077500000000000000000000000001455705413400156105ustar00rootroot00000000000000cachecontrol-0.14.0/.github/workflows/000077500000000000000000000000001455705413400176455ustar00rootroot00000000000000cachecontrol-0.14.0/.github/workflows/release.yml000066400000000000000000000015201455705413400220060ustar00rootroot00000000000000on: release: types: - published name: release jobs: pypi: name: upload release to PyPI runs-on: ubuntu-latest permissions: # Used to authenticate to PyPI via OIDC. # Used to sign the release's artifacts with sigstore-python. id-token: write # Used to attach signing artifacts to the published release. contents: write steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: "3.x" - name: deps run: python -m pip install -U build - name: build run: python -m build - name: publish uses: pypa/gh-action-pypi-publish@release/v1 - name: sign uses: sigstore/gh-action-sigstore-python@v2.0.1 with: inputs: ./dist/*.tar.gz ./dist/*.whl release-signing-artifacts: true cachecontrol-0.14.0/.github/workflows/tests.yml000066400000000000000000000016451455705413400215400ustar00rootroot00000000000000--- name: CI on: push: branches: ["master"] pull_request: branches: ["master"] workflow_dispatch: jobs: tests: name: "Python ${{ matrix.python-version }} ${{ matrix.os }}" runs-on: "${{ matrix.os }}" strategy: fail-fast: false matrix: python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] os: ["macos-latest", "windows-latest", "ubuntu-latest"] steps: - uses: "actions/checkout@v4" - uses: "actions/setup-python@v4" with: python-version: "${{ matrix.python-version }}" allow-prereleases: true - name: "Install dependencies" run: | python -VV python -m site python -m pip install --upgrade pip setuptools wheel python -m pip install --upgrade virtualenv tox tox-gh-actions - name: "Run tox targets for ${{ matrix.python-version }}" run: "python -m tox" cachecontrol-0.14.0/.gitignore000066400000000000000000000003021455705413400162330ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 .DS_Store *.pyc *.pyo *.egg-info/* dist bin lib lib64 include .Python docs/_build build/ .tox .venv web_cache cachecontrol-0.14.0/CONTRIBUTING.md000066400000000000000000000036411455705413400165050ustar00rootroot00000000000000# Contributing to CacheControl Thank you for your interest in contributing to `CacheControl`! The information below will help you set up a local development environment and perform common development tasks. ## Requirements `CacheControl`'s only external development requirement is Python 3.7 or newer. ## Development steps First, clone this repository: ```bash git clone https://github.com/psf/cachecontrol cd cachecontrol ``` Then, bootstrap your local development environment: ```bash make bootstrap # OPTIONAL: enter the new environment, if you'd like to run things directly source .venv/bin/activate ``` Once you've run `make bootstrap`, you can run the other `make` targets to perform particular tasks. Any changes you make to the `cachecontrol` source tree will take effect immediately in the development environment. ### Linting You can run the current formatters with: ```bash make format ``` ### Testing You can run the unit tests locally with: ```bash # run the test suite in the current environment make test # OPTIONAL: use `tox` to fan out across multiple interpreters make test-all ``` ### Documentation You can build the Sphinx-based documentation with: ```bash # puts the generated HTML in docs/_build/html/ make doc ``` ### Releasing **NOTE**: If you're a non-maintaining contributor, you don't need the steps here! They're documented for completeness and for onboarding future maintainers. Releases of `CacheControl` are managed by GitHub Actions. To perform a release: 1. Update `CacheControl`'s `__version__` attribute. It can be found under `cachecontrol/__init__.py`. 1. Create a new tag corresponding to your new version, with a `v` prefix. For example: ```bash # IMPORTANT: don't forget the `v` prefix! git tag v1.2.3 ``` 1. Push your changes to `master` and to the new remote tag. 1. Create, save, and publish a GitHub release for your new tag, including any `CHANGELOG` entries. cachecontrol-0.14.0/CONTRIBUTORS.rst000066400000000000000000000005071455705413400167410ustar00rootroot00000000000000.. SPDX-FileCopyrightText: SPDX-FileCopyrightText: 2015 Eric Larson SPDX-License-Identifier: Apache-2.0 ============== Contributors ============== Huge thanks to all those folks who have helped improve CacheControl! - Toby White - Ian Cordasco - Cory Benfield - Javier de la Rosa - Donald Stufft - Joseph Walton cachecontrol-0.14.0/LICENSE.txt000066400000000000000000000010561455705413400160750ustar00rootroot00000000000000Copyright 2012-2021 Eric Larson Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. cachecontrol-0.14.0/Makefile000066400000000000000000000015221455705413400157100ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 VENV=.venv VENV_CMD=python3 -m venv ACTIVATE = $(VENV)/bin/activate $(VENV)/bin/pip: $(VENV_CMD) $(VENV) bootstrap: $(VENV)/bin/pip $(VENV)/bin/pip install -e .[dev] format: $(VENV)/bin/black . doc: $(VENV)/bin/sphinx-build . $(ACTIVATE); cd docs && make html clean: clean-build clean-pyc clean-test clean-build: rm -fr build/ rm -fr dist/ rm -fr *.egg-info clean-pyc: find . -name '*.pyc' -exec rm -f {} + find . -name '*.pyo' -exec rm -f {} + find . -name '*~' -exec rm -f {} + find . -name '__pycache__' -exec rm -fr {} + clean-test: rm -fr .tox/ rm -f .coverage rm -fr htmlcov/ test-all: $(VENV)/bin/tox test: $(VENV)/bin/py.test coverage: $(VENV)/bin/py.test --cov cachecontrol dist: clean $(VENV)/bin/python -m build ls -l dist cachecontrol-0.14.0/README.rst000066400000000000000000000023171455705413400157420ustar00rootroot00000000000000.. SPDX-FileCopyrightText: SPDX-FileCopyrightText: 2015 Eric Larson SPDX-License-Identifier: Apache-2.0 ============== CacheControl ============== .. image:: https://img.shields.io/pypi/v/cachecontrol.svg :target: https://pypi.python.org/pypi/cachecontrol :alt: Latest Version .. image:: https://github.com/psf/cachecontrol/actions/workflows/tests.yml/badge.svg :target: https://github.com/psf/cachecontrol/actions/workflows/tests.yml CacheControl is a port of the caching algorithms in httplib2_ for use with requests_ session object. It was written because httplib2's better support for caching is often mitigated by its lack of thread safety. The same is true of requests in terms of caching. Quickstart ========== .. code-block:: python import requests from cachecontrol import CacheControl sess = requests.session() cached_sess = CacheControl(sess) response = cached_sess.get('https://google.com') If the URL contains any caching based headers, it will cache the result in a simple dictionary. For more info, check out the docs_ .. _docs: http://cachecontrol.readthedocs.org/en/latest/ .. _httplib2: https://github.com/httplib2/httplib2 .. _requests: http://docs.python-requests.org/ cachecontrol-0.14.0/cachecontrol/000077500000000000000000000000001455705413400167145ustar00rootroot00000000000000cachecontrol-0.14.0/cachecontrol/__init__.py000066400000000000000000000012001455705413400210160ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 """CacheControl import Interface. Make it easy to import from cachecontrol without long namespaces. """ __author__ = "Eric Larson" __email__ = "eric@ionrock.org" __version__ = "0.14.0" from cachecontrol.adapter import CacheControlAdapter from cachecontrol.controller import CacheController from cachecontrol.wrapper import CacheControl __all__ = [ "__author__", "__email__", "__version__", "CacheControlAdapter", "CacheController", "CacheControl", ] import logging logging.getLogger(__name__).addHandler(logging.NullHandler()) cachecontrol-0.14.0/cachecontrol/_cmd.py000066400000000000000000000032101455705413400201640ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations import logging from argparse import ArgumentParser from typing import TYPE_CHECKING import requests from cachecontrol.adapter import CacheControlAdapter from cachecontrol.cache import DictCache from cachecontrol.controller import logger if TYPE_CHECKING: from argparse import Namespace from cachecontrol.controller import CacheController def setup_logging() -> None: logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() logger.addHandler(handler) def get_session() -> requests.Session: adapter = CacheControlAdapter( DictCache(), cache_etags=True, serializer=None, heuristic=None ) sess = requests.Session() sess.mount("http://", adapter) sess.mount("https://", adapter) sess.cache_controller = adapter.controller # type: ignore[attr-defined] return sess def get_args() -> Namespace: parser = ArgumentParser() parser.add_argument("url", help="The URL to try and cache") return parser.parse_args() def main() -> None: args = get_args() sess = get_session() # Make a request to get a response resp = sess.get(args.url) # Turn on logging setup_logging() # try setting the cache cache_controller: CacheController = ( sess.cache_controller # type: ignore[attr-defined] ) cache_controller.cache_response(resp.request, resp.raw) # Now try to get it if cache_controller.cached_request(resp.request): print("Cached!") else: print("Not cached :(") if __name__ == "__main__": main() cachecontrol-0.14.0/cachecontrol/adapter.py000066400000000000000000000141471455705413400207150ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations import functools import types import zlib from typing import TYPE_CHECKING, Any, Collection, Mapping from requests.adapters import HTTPAdapter from cachecontrol.cache import DictCache from cachecontrol.controller import PERMANENT_REDIRECT_STATUSES, CacheController from cachecontrol.filewrapper import CallbackFileWrapper if TYPE_CHECKING: from requests import PreparedRequest, Response from urllib3 import HTTPResponse from cachecontrol.cache import BaseCache from cachecontrol.heuristics import BaseHeuristic from cachecontrol.serialize import Serializer class CacheControlAdapter(HTTPAdapter): invalidating_methods = {"PUT", "PATCH", "DELETE"} def __init__( self, cache: BaseCache | None = None, cache_etags: bool = True, controller_class: type[CacheController] | None = None, serializer: Serializer | None = None, heuristic: BaseHeuristic | None = None, cacheable_methods: Collection[str] | None = None, *args: Any, **kw: Any, ) -> None: super().__init__(*args, **kw) self.cache = DictCache() if cache is None else cache self.heuristic = heuristic self.cacheable_methods = cacheable_methods or ("GET",) controller_factory = controller_class or CacheController self.controller = controller_factory( self.cache, cache_etags=cache_etags, serializer=serializer ) def send( self, request: PreparedRequest, stream: bool = False, timeout: None | float | tuple[float, float] | tuple[float, None] = None, verify: bool | str = True, cert: (None | bytes | str | tuple[bytes | str, bytes | str]) = None, proxies: Mapping[str, str] | None = None, cacheable_methods: Collection[str] | None = None, ) -> Response: """ Send a request. Use the request information to see if it exists in the cache and cache the response if we need to and can. """ cacheable = cacheable_methods or self.cacheable_methods if request.method in cacheable: try: cached_response = self.controller.cached_request(request) except zlib.error: cached_response = None if cached_response: return self.build_response(request, cached_response, from_cache=True) # check for etags and add headers if appropriate request.headers.update(self.controller.conditional_headers(request)) resp = super().send(request, stream, timeout, verify, cert, proxies) return resp def build_response( self, request: PreparedRequest, response: HTTPResponse, from_cache: bool = False, cacheable_methods: Collection[str] | None = None, ) -> Response: """ Build a response by making a request or using the cache. This will end up calling send and returning a potentially cached response """ cacheable = cacheable_methods or self.cacheable_methods if not from_cache and request.method in cacheable: # Check for any heuristics that might update headers # before trying to cache. if self.heuristic: response = self.heuristic.apply(response) # apply any expiration heuristics if response.status == 304: # We must have sent an ETag request. This could mean # that we've been expired already or that we simply # have an etag. In either case, we want to try and # update the cache if that is the case. cached_response = self.controller.update_cached_response( request, response ) if cached_response is not response: from_cache = True # We are done with the server response, read a # possible response body (compliant servers will # not return one, but we cannot be 100% sure) and # release the connection back to the pool. response.read(decode_content=False) response.release_conn() response = cached_response # We always cache the 301 responses elif int(response.status) in PERMANENT_REDIRECT_STATUSES: self.controller.cache_response(request, response) else: # Wrap the response file with a wrapper that will cache the # response when the stream has been consumed. response._fp = CallbackFileWrapper( # type: ignore[assignment] response._fp, # type: ignore[arg-type] functools.partial( self.controller.cache_response, request, response ), ) if response.chunked: super_update_chunk_length = response._update_chunk_length def _update_chunk_length(self: HTTPResponse) -> None: super_update_chunk_length() if self.chunk_left == 0: self._fp._close() # type: ignore[union-attr] response._update_chunk_length = types.MethodType( # type: ignore[method-assign] _update_chunk_length, response ) resp: Response = super().build_response(request, response) # type: ignore[no-untyped-call] # See if we should invalidate the cache. if request.method in self.invalidating_methods and resp.ok: assert request.url is not None cache_url = self.controller.cache_url(request.url) self.cache.delete(cache_url) # Give the request a from_cache attr to let people use it resp.from_cache = from_cache # type: ignore[attr-defined] return resp def close(self) -> None: self.cache.close() super().close() # type: ignore[no-untyped-call] cachecontrol-0.14.0/cachecontrol/cache.py000066400000000000000000000036401455705413400203340ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 """ The cache object API for implementing caches. The default is a thread safe in-memory dictionary. """ from __future__ import annotations from threading import Lock from typing import IO, TYPE_CHECKING, MutableMapping if TYPE_CHECKING: from datetime import datetime class BaseCache: def get(self, key: str) -> bytes | None: raise NotImplementedError() def set( self, key: str, value: bytes, expires: int | datetime | None = None ) -> None: raise NotImplementedError() def delete(self, key: str) -> None: raise NotImplementedError() def close(self) -> None: pass class DictCache(BaseCache): def __init__(self, init_dict: MutableMapping[str, bytes] | None = None) -> None: self.lock = Lock() self.data = init_dict or {} def get(self, key: str) -> bytes | None: return self.data.get(key, None) def set( self, key: str, value: bytes, expires: int | datetime | None = None ) -> None: with self.lock: self.data.update({key: value}) def delete(self, key: str) -> None: with self.lock: if key in self.data: self.data.pop(key) class SeparateBodyBaseCache(BaseCache): """ In this variant, the body is not stored mixed in with the metadata, but is passed in (as a bytes-like object) in a separate call to ``set_body()``. That is, the expected interaction pattern is:: cache.set(key, serialized_metadata) cache.set_body(key) Similarly, the body should be loaded separately via ``get_body()``. """ def set_body(self, key: str, body: bytes) -> None: raise NotImplementedError() def get_body(self, key: str) -> IO[bytes] | None: """ Return the body as file-like object. """ raise NotImplementedError() cachecontrol-0.14.0/cachecontrol/caches/000077500000000000000000000000001455705413400201425ustar00rootroot00000000000000cachecontrol-0.14.0/cachecontrol/caches/__init__.py000066400000000000000000000004271455705413400222560ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from cachecontrol.caches.file_cache import FileCache, SeparateBodyFileCache from cachecontrol.caches.redis_cache import RedisCache __all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"] cachecontrol-0.14.0/cachecontrol/caches/file_cache.py000066400000000000000000000124061455705413400225610ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations import hashlib import os from textwrap import dedent from typing import IO, TYPE_CHECKING, Union from pathlib import Path from cachecontrol.cache import BaseCache, SeparateBodyBaseCache from cachecontrol.controller import CacheController if TYPE_CHECKING: from datetime import datetime from filelock import BaseFileLock def _secure_open_write(filename: str, fmode: int) -> IO[bytes]: # We only want to write to this file, so open it in write only mode flags = os.O_WRONLY # os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only # will open *new* files. # We specify this because we want to ensure that the mode we pass is the # mode of the file. flags |= os.O_CREAT | os.O_EXCL # Do not follow symlinks to prevent someone from making a symlink that # we follow and insecurely open a cache file. if hasattr(os, "O_NOFOLLOW"): flags |= os.O_NOFOLLOW # On Windows we'll mark this file as binary if hasattr(os, "O_BINARY"): flags |= os.O_BINARY # Before we open our file, we want to delete any existing file that is # there try: os.remove(filename) except OSError: # The file must not exist already, so we can just skip ahead to opening pass # Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a # race condition happens between the os.remove and this line, that an # error will be raised. Because we utilize a lockfile this should only # happen if someone is attempting to attack us. fd = os.open(filename, flags, fmode) try: return os.fdopen(fd, "wb") except: # An error occurred wrapping our FD in a file object os.close(fd) raise class _FileCacheMixin: """Shared implementation for both FileCache variants.""" def __init__( self, directory: str | Path, forever: bool = False, filemode: int = 0o0600, dirmode: int = 0o0700, lock_class: type[BaseFileLock] | None = None, ) -> None: try: if lock_class is None: from filelock import FileLock lock_class = FileLock except ImportError: notice = dedent( """ NOTE: In order to use the FileCache you must have filelock installed. You can install it via pip: pip install cachecontrol[filecache] """ ) raise ImportError(notice) self.directory = directory self.forever = forever self.filemode = filemode self.dirmode = dirmode self.lock_class = lock_class @staticmethod def encode(x: str) -> str: return hashlib.sha224(x.encode()).hexdigest() def _fn(self, name: str) -> str: # NOTE: This method should not change as some may depend on it. # See: https://github.com/ionrock/cachecontrol/issues/63 hashed = self.encode(name) parts = list(hashed[:5]) + [hashed] return os.path.join(self.directory, *parts) def get(self, key: str) -> bytes | None: name = self._fn(key) try: with open(name, "rb") as fh: return fh.read() except FileNotFoundError: return None def set( self, key: str, value: bytes, expires: int | datetime | None = None ) -> None: name = self._fn(key) self._write(name, value) def _write(self, path: str, data: bytes) -> None: """ Safely write the data to the given path. """ # Make sure the directory exists try: os.makedirs(os.path.dirname(path), self.dirmode) except OSError: pass with self.lock_class(path + ".lock"): # Write our actual file with _secure_open_write(path, self.filemode) as fh: fh.write(data) def _delete(self, key: str, suffix: str) -> None: name = self._fn(key) + suffix if not self.forever: try: os.remove(name) except FileNotFoundError: pass class FileCache(_FileCacheMixin, BaseCache): """ Traditional FileCache: body is stored in memory, so not suitable for large downloads. """ def delete(self, key: str) -> None: self._delete(key, "") class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache): """ Memory-efficient FileCache: body is stored in a separate file, reducing peak memory usage. """ def get_body(self, key: str) -> IO[bytes] | None: name = self._fn(key) + ".body" try: return open(name, "rb") except FileNotFoundError: return None def set_body(self, key: str, body: bytes) -> None: name = self._fn(key) + ".body" self._write(name, body) def delete(self, key: str) -> None: self._delete(key, "") self._delete(key, ".body") def url_to_file_path(url: str, filecache: FileCache) -> str: """Return the file cache path based on the URL. This does not ensure the file exists! """ key = CacheController.cache_url(url) return filecache._fn(key) cachecontrol-0.14.0/cachecontrol/caches/redis_cache.py000066400000000000000000000025361455705413400227530ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations from datetime import datetime, timezone from typing import TYPE_CHECKING from cachecontrol.cache import BaseCache if TYPE_CHECKING: from redis import Redis class RedisCache(BaseCache): def __init__(self, conn: Redis[bytes]) -> None: self.conn = conn def get(self, key: str) -> bytes | None: return self.conn.get(key) def set( self, key: str, value: bytes, expires: int | datetime | None = None ) -> None: if not expires: self.conn.set(key, value) elif isinstance(expires, datetime): now_utc = datetime.now(timezone.utc) if expires.tzinfo is None: now_utc = now_utc.replace(tzinfo=None) delta = expires - now_utc self.conn.setex(key, int(delta.total_seconds()), value) else: self.conn.setex(key, expires, value) def delete(self, key: str) -> None: self.conn.delete(key) def clear(self) -> None: """Helper for clearing all the keys in a database. Use with caution!""" for key in self.conn.keys(): self.conn.delete(key) def close(self) -> None: """Redis uses connection pooling, no need to close the connection.""" pass cachecontrol-0.14.0/cachecontrol/controller.py000066400000000000000000000441071455705413400214570ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 """ The httplib2 algorithms ported for use with requests. """ from __future__ import annotations import calendar import logging import re import time from email.utils import parsedate_tz from typing import TYPE_CHECKING, Collection, Mapping from requests.structures import CaseInsensitiveDict from cachecontrol.cache import DictCache, SeparateBodyBaseCache from cachecontrol.serialize import Serializer if TYPE_CHECKING: from typing import Literal from requests import PreparedRequest from urllib3 import HTTPResponse from cachecontrol.cache import BaseCache logger = logging.getLogger(__name__) URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") PERMANENT_REDIRECT_STATUSES = (301, 308) def parse_uri(uri: str) -> tuple[str, str, str, str, str]: """Parses a URI using the regex given in Appendix B of RFC 3986. (scheme, authority, path, query, fragment) = parse_uri(uri) """ match = URI.match(uri) assert match is not None groups = match.groups() return (groups[1], groups[3], groups[4], groups[6], groups[8]) class CacheController: """An interface to see if request should cached or not.""" def __init__( self, cache: BaseCache | None = None, cache_etags: bool = True, serializer: Serializer | None = None, status_codes: Collection[int] | None = None, ): self.cache = DictCache() if cache is None else cache self.cache_etags = cache_etags self.serializer = serializer or Serializer() self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308) @classmethod def _urlnorm(cls, uri: str) -> str: """Normalize the URL to create a safe key for the cache""" (scheme, authority, path, query, fragment) = parse_uri(uri) if not scheme or not authority: raise Exception("Only absolute URIs are allowed. uri = %s" % uri) scheme = scheme.lower() authority = authority.lower() if not path: path = "/" # Could do syntax based normalization of the URI before # computing the digest. See Section 6.2.2 of Std 66. request_uri = query and "?".join([path, query]) or path defrag_uri = scheme + "://" + authority + request_uri return defrag_uri @classmethod def cache_url(cls, uri: str) -> str: return cls._urlnorm(uri) def parse_cache_control(self, headers: Mapping[str, str]) -> dict[str, int | None]: known_directives = { # https://tools.ietf.org/html/rfc7234#section-5.2 "max-age": (int, True), "max-stale": (int, False), "min-fresh": (int, True), "no-cache": (None, False), "no-store": (None, False), "no-transform": (None, False), "only-if-cached": (None, False), "must-revalidate": (None, False), "public": (None, False), "private": (None, False), "proxy-revalidate": (None, False), "s-maxage": (int, True), } cc_headers = headers.get("cache-control", headers.get("Cache-Control", "")) retval: dict[str, int | None] = {} for cc_directive in cc_headers.split(","): if not cc_directive.strip(): continue parts = cc_directive.split("=", 1) directive = parts[0].strip() try: typ, required = known_directives[directive] except KeyError: logger.debug("Ignoring unknown cache-control directive: %s", directive) continue if not typ or not required: retval[directive] = None if typ: try: retval[directive] = typ(parts[1].strip()) except IndexError: if required: logger.debug( "Missing value for cache-control " "directive: %s", directive, ) except ValueError: logger.debug( "Invalid value for cache-control directive " "%s, must be %s", directive, typ.__name__, ) return retval def _load_from_cache(self, request: PreparedRequest) -> HTTPResponse | None: """ Load a cached response, or return None if it's not available. """ # We do not support caching of partial content: so if the request contains a # Range header then we don't want to load anything from the cache. if "Range" in request.headers: return None cache_url = request.url assert cache_url is not None cache_data = self.cache.get(cache_url) if cache_data is None: logger.debug("No cache entry available") return None if isinstance(self.cache, SeparateBodyBaseCache): body_file = self.cache.get_body(cache_url) else: body_file = None result = self.serializer.loads(request, cache_data, body_file) if result is None: logger.warning("Cache entry deserialization failed, entry ignored") return result def cached_request(self, request: PreparedRequest) -> HTTPResponse | Literal[False]: """ Return a cached response if it exists in the cache, otherwise return False. """ assert request.url is not None cache_url = self.cache_url(request.url) logger.debug('Looking up "%s" in the cache', cache_url) cc = self.parse_cache_control(request.headers) # Bail out if the request insists on fresh data if "no-cache" in cc: logger.debug('Request header has "no-cache", cache bypassed') return False if "max-age" in cc and cc["max-age"] == 0: logger.debug('Request header has "max_age" as 0, cache bypassed') return False # Check whether we can load the response from the cache: resp = self._load_from_cache(request) if not resp: return False # If we have a cached permanent redirect, return it immediately. We # don't need to test our response for other headers b/c it is # intrinsically "cacheable" as it is Permanent. # # See: # https://tools.ietf.org/html/rfc7231#section-6.4.2 # # Client can try to refresh the value by repeating the request # with cache busting headers as usual (ie no-cache). if int(resp.status) in PERMANENT_REDIRECT_STATUSES: msg = ( "Returning cached permanent redirect response " "(ignoring date and etag information)" ) logger.debug(msg) return resp headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers) if not headers or "date" not in headers: if "etag" not in headers: # Without date or etag, the cached response can never be used # and should be deleted. logger.debug("Purging cached response: no date or etag") self.cache.delete(cache_url) logger.debug("Ignoring cached response: no date") return False now = time.time() time_tuple = parsedate_tz(headers["date"]) assert time_tuple is not None date = calendar.timegm(time_tuple[:6]) current_age = max(0, now - date) logger.debug("Current age based on date: %i", current_age) # TODO: There is an assumption that the result will be a # urllib3 response object. This may not be best since we # could probably avoid instantiating or constructing the # response until we know we need it. resp_cc = self.parse_cache_control(headers) # determine freshness freshness_lifetime = 0 # Check the max-age pragma in the cache control header max_age = resp_cc.get("max-age") if max_age is not None: freshness_lifetime = max_age logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime) # If there isn't a max-age, check for an expires header elif "expires" in headers: expires = parsedate_tz(headers["expires"]) if expires is not None: expire_time = calendar.timegm(expires[:6]) - date freshness_lifetime = max(0, expire_time) logger.debug("Freshness lifetime from expires: %i", freshness_lifetime) # Determine if we are setting freshness limit in the # request. Note, this overrides what was in the response. max_age = cc.get("max-age") if max_age is not None: freshness_lifetime = max_age logger.debug( "Freshness lifetime from request max-age: %i", freshness_lifetime ) min_fresh = cc.get("min-fresh") if min_fresh is not None: # adjust our current age by our min fresh current_age += min_fresh logger.debug("Adjusted current age from min-fresh: %i", current_age) # Return entry if it is fresh enough if freshness_lifetime > current_age: logger.debug('The response is "fresh", returning cached response') logger.debug("%i > %i", freshness_lifetime, current_age) return resp # we're not fresh. If we don't have an Etag, clear it out if "etag" not in headers: logger.debug('The cached response is "stale" with no etag, purging') self.cache.delete(cache_url) # return the original handler return False def conditional_headers(self, request: PreparedRequest) -> dict[str, str]: resp = self._load_from_cache(request) new_headers = {} if resp: headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers) if "etag" in headers: new_headers["If-None-Match"] = headers["ETag"] if "last-modified" in headers: new_headers["If-Modified-Since"] = headers["Last-Modified"] return new_headers def _cache_set( self, cache_url: str, request: PreparedRequest, response: HTTPResponse, body: bytes | None = None, expires_time: int | None = None, ) -> None: """ Store the data in the cache. """ if isinstance(self.cache, SeparateBodyBaseCache): # We pass in the body separately; just put a placeholder empty # string in the metadata. self.cache.set( cache_url, self.serializer.dumps(request, response, b""), expires=expires_time, ) # body is None can happen when, for example, we're only updating # headers, as is the case in update_cached_response(). if body is not None: self.cache.set_body(cache_url, body) else: self.cache.set( cache_url, self.serializer.dumps(request, response, body), expires=expires_time, ) def cache_response( self, request: PreparedRequest, response: HTTPResponse, body: bytes | None = None, status_codes: Collection[int] | None = None, ) -> None: """ Algorithm for caching requests. This assumes a requests Response object. """ # From httplib2: Don't cache 206's since we aren't going to # handle byte range requests cacheable_status_codes = status_codes or self.cacheable_status_codes if response.status not in cacheable_status_codes: logger.debug( "Status code %s not in %s", response.status, cacheable_status_codes ) return response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict( response.headers ) if "date" in response_headers: time_tuple = parsedate_tz(response_headers["date"]) assert time_tuple is not None date = calendar.timegm(time_tuple[:6]) else: date = 0 # If we've been given a body, our response has a Content-Length, that # Content-Length is valid then we can check to see if the body we've # been given matches the expected size, and if it doesn't we'll just # skip trying to cache it. if ( body is not None and "content-length" in response_headers and response_headers["content-length"].isdigit() and int(response_headers["content-length"]) != len(body) ): return cc_req = self.parse_cache_control(request.headers) cc = self.parse_cache_control(response_headers) assert request.url is not None cache_url = self.cache_url(request.url) logger.debug('Updating cache with response from "%s"', cache_url) # Delete it from the cache if we happen to have it stored there no_store = False if "no-store" in cc: no_store = True logger.debug('Response header has "no-store"') if "no-store" in cc_req: no_store = True logger.debug('Request header has "no-store"') if no_store and self.cache.get(cache_url): logger.debug('Purging existing cache entry to honor "no-store"') self.cache.delete(cache_url) if no_store: return # https://tools.ietf.org/html/rfc7234#section-4.1: # A Vary header field-value of "*" always fails to match. # Storing such a response leads to a deserialization warning # during cache lookup and is not allowed to ever be served, # so storing it can be avoided. if "*" in response_headers.get("vary", ""): logger.debug('Response header has "Vary: *"') return # If we've been given an etag, then keep the response if self.cache_etags and "etag" in response_headers: expires_time = 0 if response_headers.get("expires"): expires = parsedate_tz(response_headers["expires"]) if expires is not None: expires_time = calendar.timegm(expires[:6]) - date expires_time = max(expires_time, 14 * 86400) logger.debug(f"etag object cached for {expires_time} seconds") logger.debug("Caching due to etag") self._cache_set(cache_url, request, response, body, expires_time) # Add to the cache any permanent redirects. We do this before looking # that the Date headers. elif int(response.status) in PERMANENT_REDIRECT_STATUSES: logger.debug("Caching permanent redirect") self._cache_set(cache_url, request, response, b"") # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. elif "date" in response_headers: time_tuple = parsedate_tz(response_headers["date"]) assert time_tuple is not None date = calendar.timegm(time_tuple[:6]) # cache when there is a max-age > 0 max_age = cc.get("max-age") if max_age is not None and max_age > 0: logger.debug("Caching b/c date exists and max-age > 0") expires_time = max_age self._cache_set( cache_url, request, response, body, expires_time, ) # If the request can expire, it means we should cache it # in the meantime. elif "expires" in response_headers: if response_headers["expires"]: expires = parsedate_tz(response_headers["expires"]) if expires is not None: expires_time = calendar.timegm(expires[:6]) - date else: expires_time = None logger.debug( "Caching b/c of expires header. expires in {} seconds".format( expires_time ) ) self._cache_set( cache_url, request, response, body, expires_time, ) def update_cached_response( self, request: PreparedRequest, response: HTTPResponse ) -> HTTPResponse: """On a 304 we will get a new set of headers that we want to update our cached value with, assuming we have one. This should only ever be called when we've sent an ETag and gotten a 304 as the response. """ assert request.url is not None cache_url = self.cache_url(request.url) cached_response = self._load_from_cache(request) if not cached_response: # we didn't have a cached response return response # Lets update our headers with the headers from the new request: # http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1 # # The server isn't supposed to send headers that would make # the cached body invalid. But... just in case, we'll be sure # to strip out ones we know that might be problmatic due to # typical assumptions. excluded_headers = ["content-length"] cached_response.headers.update( { k: v for k, v in response.headers.items() if k.lower() not in excluded_headers } ) # we want a 200 b/c we have content via the cache cached_response.status = 200 # update our cache self._cache_set(cache_url, request, cached_response) return cached_response cachecontrol-0.14.0/cachecontrol/filewrapper.py000066400000000000000000000103041455705413400216040ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations import mmap from tempfile import NamedTemporaryFile from typing import TYPE_CHECKING, Any, Callable if TYPE_CHECKING: from http.client import HTTPResponse class CallbackFileWrapper: """ Small wrapper around a fp object which will tee everything read into a buffer, and when that file is closed it will execute a callback with the contents of that buffer. All attributes are proxied to the underlying file object. This class uses members with a double underscore (__) leading prefix so as not to accidentally shadow an attribute. The data is stored in a temporary file until it is all available. As long as the temporary files directory is disk-based (sometimes it's a memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory pressure is high. For small files the disk usually won't be used at all, it'll all be in the filesystem memory cache, so there should be no performance impact. """ def __init__( self, fp: HTTPResponse, callback: Callable[[bytes], None] | None ) -> None: self.__buf = NamedTemporaryFile("rb+", delete=True) self.__fp = fp self.__callback = callback def __getattr__(self, name: str) -> Any: # The vaguaries of garbage collection means that self.__fp is # not always set. By using __getattribute__ and the private # name[0] allows looking up the attribute value and raising an # AttributeError when it doesn't exist. This stop thigns from # infinitely recursing calls to getattr in the case where # self.__fp hasn't been set. # # [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers fp = self.__getattribute__("_CallbackFileWrapper__fp") return getattr(fp, name) def __is_fp_closed(self) -> bool: try: return self.__fp.fp is None except AttributeError: pass try: closed: bool = self.__fp.closed return closed except AttributeError: pass # We just don't cache it then. # TODO: Add some logging here... return False def _close(self) -> None: if self.__callback: if self.__buf.tell() == 0: # Empty file: result = b"" else: # Return the data without actually loading it into memory, # relying on Python's buffer API and mmap(). mmap() just gives # a view directly into the filesystem's memory cache, so it # doesn't result in duplicate memory use. self.__buf.seek(0, 0) result = memoryview( mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ) ) self.__callback(result) # We assign this to None here, because otherwise we can get into # really tricky problems where the CPython interpreter dead locks # because the callback is holding a reference to something which # has a __del__ method. Setting this to None breaks the cycle # and allows the garbage collector to do it's thing normally. self.__callback = None # Closing the temporary file releases memory and frees disk space. # Important when caching big files. self.__buf.close() def read(self, amt: int | None = None) -> bytes: data: bytes = self.__fp.read(amt) if data: # We may be dealing with b'', a sign that things are over: # it's passed e.g. after we've already closed self.__buf. self.__buf.write(data) if self.__is_fp_closed(): self._close() return data def _safe_read(self, amt: int) -> bytes: data: bytes = self.__fp._safe_read(amt) # type: ignore[attr-defined] if amt == 2 and data == b"\r\n": # urllib executes this read to toss the CRLF at the end # of the chunk. return data self.__buf.write(data) if self.__is_fp_closed(): self._close() return data cachecontrol-0.14.0/cachecontrol/heuristics.py000066400000000000000000000113261455705413400214530ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations import calendar import time from datetime import datetime, timedelta, timezone from email.utils import formatdate, parsedate, parsedate_tz from typing import TYPE_CHECKING, Any, Mapping if TYPE_CHECKING: from urllib3 import HTTPResponse TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT" def expire_after(delta: timedelta, date: datetime | None = None) -> datetime: date = date or datetime.now(timezone.utc) return date + delta def datetime_to_header(dt: datetime) -> str: return formatdate(calendar.timegm(dt.timetuple())) class BaseHeuristic: def warning(self, response: HTTPResponse) -> str | None: """ Return a valid 1xx warning header value describing the cache adjustments. The response is provided too allow warnings like 113 http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need to explicitly say response is over 24 hours old. """ return '110 - "Response is Stale"' def update_headers(self, response: HTTPResponse) -> dict[str, str]: """Update the response headers with any new headers. NOTE: This SHOULD always include some Warning header to signify that the response was cached by the client, not by way of the provided headers. """ return {} def apply(self, response: HTTPResponse) -> HTTPResponse: updated_headers = self.update_headers(response) if updated_headers: response.headers.update(updated_headers) warning_header_value = self.warning(response) if warning_header_value is not None: response.headers.update({"Warning": warning_header_value}) return response class OneDayCache(BaseHeuristic): """ Cache the response by providing an expires 1 day in the future. """ def update_headers(self, response: HTTPResponse) -> dict[str, str]: headers = {} if "expires" not in response.headers: date = parsedate(response.headers["date"]) expires = expire_after(timedelta(days=1), date=datetime(*date[:6], tzinfo=timezone.utc)) # type: ignore[index,misc] headers["expires"] = datetime_to_header(expires) headers["cache-control"] = "public" return headers class ExpiresAfter(BaseHeuristic): """ Cache **all** requests for a defined time period. """ def __init__(self, **kw: Any) -> None: self.delta = timedelta(**kw) def update_headers(self, response: HTTPResponse) -> dict[str, str]: expires = expire_after(self.delta) return {"expires": datetime_to_header(expires), "cache-control": "public"} def warning(self, response: HTTPResponse) -> str | None: tmpl = "110 - Automatically cached for %s. Response might be stale" return tmpl % self.delta class LastModified(BaseHeuristic): """ If there is no Expires header already, fall back on Last-Modified using the heuristic from http://tools.ietf.org/html/rfc7234#section-4.2.2 to calculate a reasonable value. Firefox also does something like this per https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397 Unlike mozilla we limit this to 24-hr. """ cacheable_by_default_statuses = { 200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501, } def update_headers(self, resp: HTTPResponse) -> dict[str, str]: headers: Mapping[str, str] = resp.headers if "expires" in headers: return {} if "cache-control" in headers and headers["cache-control"] != "public": return {} if resp.status not in self.cacheable_by_default_statuses: return {} if "date" not in headers or "last-modified" not in headers: return {} time_tuple = parsedate_tz(headers["date"]) assert time_tuple is not None date = calendar.timegm(time_tuple[:6]) last_modified = parsedate(headers["last-modified"]) if last_modified is None: return {} now = time.time() current_age = max(0, now - date) delta = date - calendar.timegm(last_modified) freshness_lifetime = max(0, min(delta / 10, 24 * 3600)) if freshness_lifetime <= current_age: return {} expires = date + freshness_lifetime return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))} def warning(self, resp: HTTPResponse) -> str | None: return None cachecontrol-0.14.0/cachecontrol/py.typed000066400000000000000000000000001455705413400204010ustar00rootroot00000000000000cachecontrol-0.14.0/cachecontrol/serialize.py000066400000000000000000000117661455705413400212700ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations import io from typing import IO, TYPE_CHECKING, Any, Mapping, cast import msgpack from requests.structures import CaseInsensitiveDict from urllib3 import HTTPResponse if TYPE_CHECKING: from requests import PreparedRequest class Serializer: serde_version = "4" def dumps( self, request: PreparedRequest, response: HTTPResponse, body: bytes | None = None, ) -> bytes: response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict( response.headers ) if body is None: # When a body isn't passed in, we'll read the response. We # also update the response with a new file handler to be # sure it acts as though it was never read. body = response.read(decode_content=False) response._fp = io.BytesIO(body) # type: ignore[assignment] response.length_remaining = len(body) data = { "response": { "body": body, # Empty bytestring if body is stored separately "headers": {str(k): str(v) for k, v in response.headers.items()}, "status": response.status, "version": response.version, "reason": str(response.reason), "decode_content": response.decode_content, } } # Construct our vary headers data["vary"] = {} if "vary" in response_headers: varied_headers = response_headers["vary"].split(",") for header in varied_headers: header = str(header).strip() header_value = request.headers.get(header, None) if header_value is not None: header_value = str(header_value) data["vary"][header] = header_value return b",".join([f"cc={self.serde_version}".encode(), self.serialize(data)]) def serialize(self, data: dict[str, Any]) -> bytes: return cast(bytes, msgpack.dumps(data, use_bin_type=True)) def loads( self, request: PreparedRequest, data: bytes, body_file: IO[bytes] | None = None, ) -> HTTPResponse | None: # Short circuit if we've been given an empty set of data if not data: return None # Previous versions of this library supported other serialization # formats, but these have all been removed. if not data.startswith(f"cc={self.serde_version},".encode()): return None data = data[5:] return self._loads_v4(request, data, body_file) def prepare_response( self, request: PreparedRequest, cached: Mapping[str, Any], body_file: IO[bytes] | None = None, ) -> HTTPResponse | None: """Verify our vary headers match and construct a real urllib3 HTTPResponse object. """ # Special case the '*' Vary value as it means we cannot actually # determine if the cached response is suitable for this request. # This case is also handled in the controller code when creating # a cache entry, but is left here for backwards compatibility. if "*" in cached.get("vary", {}): return None # Ensure that the Vary headers for the cached response match our # request for header, value in cached.get("vary", {}).items(): if request.headers.get(header, None) != value: return None body_raw = cached["response"].pop("body") headers: CaseInsensitiveDict[str] = CaseInsensitiveDict( data=cached["response"]["headers"] ) if headers.get("transfer-encoding", "") == "chunked": headers.pop("transfer-encoding") cached["response"]["headers"] = headers try: body: IO[bytes] if body_file is None: body = io.BytesIO(body_raw) else: body = body_file except TypeError: # This can happen if cachecontrol serialized to v1 format (pickle) # using Python 2. A Python 2 str(byte string) will be unpickled as # a Python 3 str (unicode string), which will cause the above to # fail with: # # TypeError: 'str' does not support the buffer interface body = io.BytesIO(body_raw.encode("utf8")) # Discard any `strict` parameter serialized by older version of cachecontrol. cached["response"].pop("strict", None) return HTTPResponse(body=body, preload_content=False, **cached["response"]) def _loads_v4( self, request: PreparedRequest, data: bytes, body_file: IO[bytes] | None = None, ) -> HTTPResponse | None: try: cached = msgpack.loads(data, raw=False) except ValueError: return None return self.prepare_response(request, cached, body_file) cachecontrol-0.14.0/cachecontrol/wrapper.py000066400000000000000000000024601455705413400207500ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations from typing import TYPE_CHECKING, Collection from cachecontrol.adapter import CacheControlAdapter from cachecontrol.cache import DictCache if TYPE_CHECKING: import requests from cachecontrol.cache import BaseCache from cachecontrol.controller import CacheController from cachecontrol.heuristics import BaseHeuristic from cachecontrol.serialize import Serializer def CacheControl( sess: requests.Session, cache: BaseCache | None = None, cache_etags: bool = True, serializer: Serializer | None = None, heuristic: BaseHeuristic | None = None, controller_class: type[CacheController] | None = None, adapter_class: type[CacheControlAdapter] | None = None, cacheable_methods: Collection[str] | None = None, ) -> requests.Session: cache = DictCache() if cache is None else cache adapter_class = adapter_class or CacheControlAdapter adapter = adapter_class( cache, cache_etags=cache_etags, serializer=serializer, heuristic=heuristic, controller_class=controller_class, cacheable_methods=cacheable_methods, ) sess.mount("http://", adapter) sess.mount("https://", adapter) return sess cachecontrol-0.14.0/docs/000077500000000000000000000000001455705413400152005ustar00rootroot00000000000000cachecontrol-0.14.0/docs/Makefile000066400000000000000000000130501455705413400166370ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 # Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/CacheControl.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/CacheControl.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/CacheControl" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/CacheControl" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." cachecontrol-0.14.0/docs/conf.py000066400000000000000000000176661455705413400165170ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 # CacheControl documentation build configuration file, created by # sphinx-quickstart on Mon Nov 4 15:01:23 2013. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. from cachecontrol import __version__ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.todo", "sphinx.ext.viewcode", "sphinx_copybutton", ] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix of source filenames. source_suffix = ".rst" # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = "index" # General information about the project. project = "CacheControl" copyright = "2013, Eric Larson" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = __version__ # The full version, including alpha/beta/rc tags. release = __version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. # pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "furo" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = "CacheControldoc" # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ( "index", "CacheControl.tex", "CacheControl Documentation", "Eric Larson", "manual", ) ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ("index", "cachecontrol", "CacheControl Documentation", ["Eric Larson"], 1) ] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( "index", "CacheControl", "CacheControl Documentation", "Eric Larson", "CacheControl", "One line description of project.", "Miscellaneous", ) ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' cachecontrol-0.14.0/docs/custom_heuristics.rst000066400000000000000000000126261455705413400215150ustar00rootroot00000000000000.. SPDX-FileCopyrightText: SPDX-FileCopyrightText: 2015 Eric Larson SPDX-License-Identifier: Apache-2.0 =========================== Custom Caching Strategies =========================== There are times when a server provides responses that are logically cacheable, but they lack the headers necessary to cause CacheControl to cache the response. `The HTTP Caching Spec `_ does allow for caching systems to cache requests that lack caching headers. In these situations, the caching system can use heuristics to determine an appropriate amount of time to cache a response. By default, in CacheControl the decision to cache must be explicit by default via the caching headers. When there is a need to cache responses that wouldn't normally be cached, a user can provide a heuristic to adjust the response in order to make it cacheable. For example when running a test suite against a service, caching all responses might be helpful speeding things up while still making real calls to the API. Caching Heuristics ================== A cache heuristic allows specifying a caching strategy by adjusting response headers before the response is considered for caching. For example, if we wanted to implement a caching strategy where every request should be cached for a week, we can implement the strategy in a `cachecontrol.heuristics.Heuristic`. :: import calendar from cachecontrol.heuristics import BaseHeuristic from datetime import datetime, timedelta from email.utils import parsedate, formatdate class OneWeekHeuristic(BaseHeuristic): def update_headers(self, response): date = parsedate(response.headers['date']) expires = datetime(*date[:6]) + timedelta(weeks=1) return { 'expires' : formatdate(calendar.timegm(expires.timetuple())), 'cache-control' : 'public', } def warning(self, response): msg = 'Automatically cached! Response is Stale.' return '110 - "%s"' % msg When a response is received and we are testing for whether it is cacheable, the heuristic is applied before checking its headers. We also set a `warning header `_ to communicate why the response might be stale. The original response is passed into the warning header in order to use its values. For example, if the response has been expired for more than 24 hours a `Warning 113 `_ should be used. In order to use this heuristic, we pass it to our `CacheControl` constructor. :: from requests import Session from cachecontrol import CacheControl sess = CacheControl(Session(), heuristic=OneWeekHeuristic()) sess.get('http://google.com') r = sess.get('http://google.com') assert r.from_cache The google homepage specifically uses a negative expires header and private cache control header to avoid caches. We've managed to work around that aspect and cache the response using our heuristic. Best Practices ============== Cache heuristics are still a new feature, which means that the support is somewhat rudimentary. There likely to be best practices and common heuristics that can meet the needs of many use cases. For example, in the above heuristic it is important to change both the `expires` and `cache-control` headers in order to make the response cacheable. If you do find a helpful best practice or create a helpful heuristic, please consider sending a pull request or opening a issue. Expires After ------------- CacheControl bundles an `ExpiresAfter` heuristic that is aimed at making it relatively easy to automatically cache responses for a period of time. Here is an example .. code-block:: python import requests from cachecontrol import CacheControlAdapter from cachecontrol.heuristics import ExpiresAfter adapter = CacheControlAdapter(heuristic=ExpiresAfter(days=1)) sess = requests.Session() sess.mount('http://', adapter) The arguments are the same as the `datetime.timedelta` object. `ExpiresAfter` will override or add the `Expires` header and override or set the `Cache-Control` header to `public`. Last Modified ------------- CacheControl bundles an `LastModified` heuristic that emulates the behavior of Firefox, following RFC7234. Roughly stated, this sets the expiration on a page to 10% of the difference between the request timestamp and the last modified timestamp. This is capped at 24-hr. .. code-block:: python import requests from cachecontrol import CacheControlAdapter from cachecontrol.heuristics import LastModified adapter = CacheControlAdapter(heuristic=LastModified()) sess = requests.Session() sess.mount('http://', adapter) Site Specific Heuristics ------------------------ If you have a specific domain that you want to apply a specific heuristic to, use a separate adapter. :: import requests from cachecontrol import CacheControlAdapter from mypkg import MyHeuristic sess = requests.Session() sess.mount( 'http://my.specific-domain.com', CacheControlAdapter(heuristic=MyHeuristic()) ) In this way you can limit your heuristic to a specific site. Warning! ======== Caching is hard and while HTTP does a reasonable job defining rules for freshness, overriding those rules should be done with caution. Many have been frustrated by over aggressive caches, so please carefully consider your use case before utilizing a more aggressive heuristic. cachecontrol-0.14.0/docs/etags.rst000066400000000000000000000121611455705413400170360ustar00rootroot00000000000000.. SPDX-FileCopyrightText: SPDX-FileCopyrightText: 2015 Eric Larson SPDX-License-Identifier: Apache-2.0 ============== ETag Support ============== CacheControl's support of ETags is slightly different than httplib2. In httplib2, an ETag is considered when using a cached response when the cache is considered stale. When a cached response is expired and it has an ETag header, httplib2 issues the next request with the appropriate `If-None-Match` header. We'll call this behavior a **Time Priority** cache as the ETag support only takes effect when the time has expired. In CacheControl the default behavior when an ETag is sent by the server is to cache the response. We'll refer to this pattern as a **Equal Priority** cache as the decision to cache is either time base or due to the presense of an ETag. The spec is not explicit what takes priority when caching with both ETags and time based headers. Therefore, CacheControl supports the different mechanisms via configuration where possible. Turning Off Equal Priority Caching ================================== The danger in Equal Priority Caching is that a server that returns ETag headers for every request may fill up your cache. You can disable Equal Priority Caching and utilize a Time Priority algorithm like httplib2. :: import requests from cachecontrol import CacheControl sess = CacheControl(requests.Session(), cache_etags=False) This will only utilize ETags when they exist within the context of time based caching headers. If a response has time base caching headers that are valid along with an ETag, we will still attempt to handle a 304 Not Modified even though the cached value as expired. Here is a simple example. :: # Server response GET /foo.html Date: Tue, 26 Nov 2013 00:50:49 GMT Cache-Control: max-age=3000 ETag: JAsUYM8K On a subsequent request, if the cache has expired, the next request will still include the `If-None-Match` header. The cached response will remain in the cache awaiting the response. :: # Client request GET /foo.html If-None-Match: JAsUYM8K If the server returns a `304 Not Modified`, it will use the stale cached value, updating the headers from the most recent request. :: # Server response GET /foo.html Date: Tue, 26 Nov 2013 01:30:19 GMT Cache-Control: max-age=3000 ETag: JAsUYM8K If the server returns a `200 OK`, the cache will be updated accordingly. Equal Priority Caching Benefits =============================== The benefits of equal priority caching is that you have two orthogonal means of introducing a cache. The time based cache provides an effective way to reduce the load on requests that can be eventually consistent. Static resource are a great example of when time based caching is effective. The ETag based cache is effective for working with documents that are larger and/or need to be correct immediately after changes. For example, if you exported some data from a large database, the file could be 10 GBs. Being able to send an ETag with this sort of request an know the version you have locally is valid saves a ton of bandwidth and time. Likewise, if you have a resource that you want to update, you can be confident there will not be a `lost update`_ because you have local version that is stale. Endpoint Specific Caching ========================= It should be pointed out that there are times when an endpoint is specifically tailored for different caching techniques. If you have a RESTful service, there might be endpoints that are specifically meant to be cached via time based caching techniques where as other endpoints should focus on using ETags. In this situation it is recommended that you use the `CacheControlAdapter` directly. :: import requests from cachecontrol import CacheControlAdapter from cachecontrol.caches import RedisCache # using django for an idea on where you might get a # username/password. from django.conf import settings # a function to return a redis connection all the instances of the # app may use. this allows updates to the API (ie PUT) to invalidate # the cache for other users. from myapp.db import redis_connection # create our session client = sess.Session(auth=(settings.user, settings.password)) # we have a gettext like endpoint. this doesn't get updated very # often so a time based cache is a helpful way to reduce many small # requests. client.mount('http://myapi.foo.com/gettext/', CacheControlAdapter(cache_etags=False)) # here we have user profile endpoint that lets us update information # about users. we need this to be consistent immediately after a user # updates some information because another node might handle the # request. It uses the global redis cache to coordinate the cache and # uses the equal priority caching to be sure etags are used by default. redis_cache = RedisCache(redis_connection()) client.mount('http://myapi.foo.com/user_profiles/', CacheControlAdapter(cache=redis_cache)) Hopefully this more indepth example reveals how to configure a `requests.Session` to better utilize ETag based caching vs. Time Priority Caching. .. _lost update: http://www.w3.org/1999/04/Editing/ cachecontrol-0.14.0/docs/index.rst000066400000000000000000000044761455705413400170540ustar00rootroot00000000000000.. SPDX-FileCopyrightText: SPDX-FileCopyrightText: 2015 Eric Larson SPDX-License-Identifier: Apache-2.0 .. CacheControl documentation master file, created by sphinx-quickstart on Mon Nov 4 15:01:23 2013. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Welcome to CacheControl's documentation! ======================================== CacheControl is a port of the caching algorithms in httplib2_ for use with the requests_ session object. It was written because httplib2's better support for caching is often mitigated by its lack of thread-safety. The same is true of requests in terms of caching. Install ======= CacheControl is available from PyPI_. You can install it with pip_ :: $ pip install CacheControl Some of the included cache storage classes have external requirements. See :doc:`storage` for more info. Quick Start =========== For the impatient, here is how to get started using CacheControl: .. code-block:: python import requests from cachecontrol import CacheControl sess = requests.session() cached_sess = CacheControl(sess) response = cached_sess.get('http://google.com') This uses a thread-safe in-memory dictionary for storage. Tests ===== The tests are all in ``cachecontrol/tests`` and are runnable by ``py.test``. Disclaimers =========== CacheControl is relatively new and might have bugs. I have made an effort to faithfully port the tests from httplib2 to CacheControl, but there is a decent chance that I've missed something. Please file bugs if you find any issues! With that in mind, CacheControl has been used sucessfully in production environments, replacing httplib2's usage. If you give it a try, please let me know of any issues. .. _httplib2: https://github.com/httplib2/httplib2 .. _requests: https://requests.readthedocs.io/en/latest/ .. _PyPI: https://pypi.org/project/CacheControl/ .. _pip: https://pip.pypa.io/en/stable/ Contents ======== .. toctree:: :maxdepth: 2 usage storage etags custom_heuristics tips .. toctree:: :hidden: :caption: Development :maxdepth: 2 release_notes GitHub PyPI Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` cachecontrol-0.14.0/docs/release_notes.rst000066400000000000000000000116271455705413400205710ustar00rootroot00000000000000.. SPDX-FileCopyrightText: SPDX-FileCopyrightText: 2015 Eric Larson SPDX-License-Identifier: Apache-2.0 =============== Release Notes =============== 0.14.0 ====== * Explicitly allow ``pathlib.Path`` as a type for ``FileCache.directory``. * Drop support for Python 3.7. Python 3.8 is now the minimum version. * Don't use the cache to return a full response if a request has a Range header. 0.13.1 ====== * Support for old serialization formats has been removed. * Move the serialization implementation into own method. * Drop support for Python older than 3.7. 0.13.0 ====== **YANKED** The project has been moved to the `PSF `_ organization. * Discard the ``strict`` attribute when serializing and deserializing responses. * Fix the ``IncompleteRead`` error thrown by ``urllib3 2.0``. * Remove usage of ``utcnow`` in favor of timezone-aware datetimes. * Remove the ``compat`` module. * Use Python's ``unittest.mock`` library instead of ``mock``. * Add type annotations. * Exclude the ``tests`` directory from the wheel. 0.12.14 ======= * Revert the change "switch lockfile to filelock" to fix the compatibility issue. 0.12.13 ======= * Discard the ``strict`` attribute when serializing and deserializing responses. * Fix the IncompleteRead error thrown by ``urllib3`` 2.0. * Exclude the tests directory from the wheel. 0.12.11 ======= * Added new variant of ``FileCache``, ``SeparateBodyFileCache``, which uses less memory by storing the body in a separate file than metadata, and streaming data in and out directly to/from that file. Implemented by [Itamar Turner-Trauring](https://pythonspeed.com), work sponsored by [G-Research](https://www.gresearch.co.uk/technology-innovation-and-open-source/). 0.12.7 ====== * Dropped support for Python 2.7, 3.4, 3.5. * Reduced memory usage when caching large files. 0.12.0 ====== Rather than using compressed JSON for caching values, we are now using MessagePack (http://msgpack.org/). MessagePack has the advantage that that serialization and deserialization is faster, especially for caching large binary payloads. 0.11.2 ====== This release introduces the `cachecontrol.heuristics.LastModified` heuristic. This uses the same behaviour as many browsers to base expiry on the `Last-Modified` header when no explicit expiry is provided. 0.11.0 ====== The biggest change is the introduction of using compressed JSON rather than pickle for storing cached values. This allows Python 3.4 and Python 2.7 to use the same cache store. Previously, if a cache was created on 3.4, a 2.7 client would fail loading it, causing an invalid cache miss. Using JSON also avoids the exec call used in pickle, making the cache more secure by avoiding a potential code injection point. Finally, the compressed JSON is a smaller payload, saving a bit of space. In order to support arbitrary binary data in the JSON format, base64 encoding is used to turn the data into strings. It has to do some encoding dances to make sure that the bytes/str types are correct, so **please** open a new issue if you notice any issues. This release also introduces the `cachecontrol.heuristics.ExpiresAfter` heuristic. This allows passing in arguments like a `datetime.timedelta` in order to configure that all responses are cached for the specific period of time. 0.10.0 ====== This is an important release as it changes what is actually cached. Rather than caching requests' Response objects, we are now caching the underlying urllib3 response object. Also, the response will not be cached unless the response is actually consumed by the user. These changes allowed the reintroduction of .raw support. Huge thanks goes out to @dstufft for these excellent patches and putting so much work into CacheControl to allow cached responses to behave exactly as a normal response. - FileCache Updates (via `@dstufft `_) - files are now hashed via sha-2 - files are stored in a namespaced directory to avoid hitting os limits on the number of files in a directory. - use the io.BytesIO when reading / writing (via `@alex `_) - `#19 `_ Allow for a custom controller via `@cournape `_ - `#17 `_ use highest protocol version for pickling via `@farwayer `_ - `#16 `_ FileCache: raw field serialization via `@farwayer `_ 0.9.3 ===== - `#16 `_: All cached responses get None for a raw attribute. - `#13 `_ Switched to md5 encoded keys in file cache (via `@mxjeff `_) - `#11 `_ Fix timezones in tests (via `@kaliko `_) cachecontrol-0.14.0/docs/requirements.txt000066400000000000000000000000071455705413400204610ustar00rootroot00000000000000.[dev] cachecontrol-0.14.0/docs/storage.rst000066400000000000000000000103671455705413400174050ustar00rootroot00000000000000.. SPDX-FileCopyrightText: SPDX-FileCopyrightText: 2015 Eric Larson SPDX-License-Identifier: Apache-2.0 ==================== Storing Cache Data ==================== CacheControl comes with a few storage backends for storing your cache'd objects. DictCache ========= The `DictCache` is the default cache used when no other is provided. It is a simple threadsafe dictionary. It doesn't try to do anything smart about deadlocks or forcing a busted cache, but it should be reasonably safe to use. Also, the `DictCache` does not transform the request or response objects in anyway. Therefore it is unlikely you could persist the entire cache to disk. The converse is that it should be very fast. FileCache ========= The `FileCache` is similar to the caching mechanism provided by httplib2_. It requires `filelock`_ be installed as it prevents multiple threads from writing to the same file at the same time. .. note:: Note that you can install this dependency automatically with pip by requesting the *filecache* extra: :: pip install cachecontrol[filecache] Here is an example using the `FileCache`: :: import requests from cachecontrol import CacheControl from cachecontrol.caches.file_cache import FileCache sess = CacheControl(requests.Session(), cache=FileCache('.web_cache')) The `FileCache` supports a `forever` flag that disables deleting from the cache. This can be helpful in debugging applications that make many web requests that you don't want to repeat. It also can be helpful in testing. Here is an example of how to use it: :: forever_cache = FileCache('.web_cache', forever=True) sess = CacheControl(requests.Session(), forever_cache) SeparateBodyFileCache ===================== This is similar to ``FileCache``, but far more memory efficient, and therefore recommended if you expect to be caching large downloads. ``FileCache`` results in memory usage that can be 2× or 3× of the downloaded file, whereas ``SeparateBodyFileCache`` should have fixed memory usage. The body of the request is stored in a separate file than metadata, and streamed in and out. It requires `filelock`_ be installed as it prevents multiple threads from writing to the same file at the same time. .. note:: You can install this dependency automatically with pip by requesting the *filecache* extra: :: pip install cachecontrol[filecache] Here is an example of using the cache:: import requests from cachecontrol import CacheControl from cachecontrol.caches import SeparateBodyFileCache sess = CacheControl(requests.Session(), cache=SeparateBodyFileCache('.web_cache')) ``SeparateBodyFileCache`` supports the same options as ``FileCache``. RedisCache ========== The `RedisCache` uses a Redis database to store values. The values are stored as strings in redis, which means the get, set and delete actions are used. It requires the `redis`_ library to be installed. .. note:: Note that you can install this dependency automatically with pip by requesting the *redis* extra: :: pip install cachecontrol[redis] The `RedisCache` also provides a clear method to delete all keys in a database. Obviously, this should be used with caution as it is naive and works iteratively, looping over each key and deleting it. Here is an example using a `RedisCache`: :: import redis import requests from cachecontrol import CacheControl from cachecontrol.caches.redis_cache import RedisCache pool = redis.ConnectionPool(host='localhost', port=6379, db=0) r = redis.Redis(connection_pool=pool) sess = CacheControl(requests.Session(), RedisCache(r)) This is primarily a proof of concept, so please file bugs if there is a better method for utilizing redis as a cache. Third-Party Cache Providers =========================== * cachecontrol-django_ uses Django's caching mechanism. * cachecontrol-uwsgi_ uses uWSGI's caching framework. .. _httplib2: https://github.com/httplib2/httplib2 .. _filelock: https://github.com/tox-dev/py-filelock .. _requests 2.1: http://docs.python-requests.org/en/latest/community/updates/#id2 .. _redis: https://github.com/andymccurdy/redis-py .. _cachecontrol-django: https://github.com/glassesdirect/cachecontrol-django .. _cachecontrol-uwsgi: https://github.com/etene/cachecontrol-uwsgi cachecontrol-0.14.0/docs/tips.rst000066400000000000000000000045201455705413400167120ustar00rootroot00000000000000.. SPDX-FileCopyrightText: SPDX-FileCopyrightText: 2015 Eric Larson SPDX-License-Identifier: Apache-2.0 ========================= Tips and Best Practices ========================= Caching is hard! It is considered one of the great challenges of computer science. Fortunately, the HTTP spec helps to navigate some pitfalls of invalidation using stale responses. Below are some suggestions and best practices to help avoid the more subtle issues that can crop up using CacheControl and HTTP caching. If you have a suggestion please create a new issue in `github `_ and let folks know what you ran into and how you fixed it. Timezones ========= It is important to remember that the times reported by a server may or may not be timezone aware. If you are using CacheControl with a service you control, make sure any timestamps are used consistently, especially if requests might cross timezones. Cached Responses ================ We've done our best to make sure cached responses act like a normal response, but there are aspects that are different for somewhat obvious reasons. - Cached responses are never streaming - Cached responses have `None` for the `raw` attribute Obviously, when you cache a response, you have downloaded the entire body. Therefore, there is never a use case for streaming a cached response. With that in mind, you should be aware that if you try to cache a very large response on a network store, you still might have some latency tranferring the data from the network store to your application. Another consideration is storing large responses in a `FileCache`. If you are caching using ETags and the server is extremely specific as to what constitutes an equivalent request, it could provide many different responses for essentially the same data within the context of your application. Query String Params =================== If you are caching requests that use a large number of query string parameters, consider sorting them to ensure that the request is properly cached. Requests supports passing both dictionaries and lists of tuples as the param argument in a request. For example: :: requests.get(url, params=sorted([('foo', 'one'), ('bar', 'two')])) By ordering your params, you can be sure the cache key will be consistent across requests and you are caching effectively. cachecontrol-0.14.0/docs/usage.rst000066400000000000000000000034721455705413400170440ustar00rootroot00000000000000.. SPDX-FileCopyrightText: SPDX-FileCopyrightText: 2015 Eric Larson SPDX-License-Identifier: Apache-2.0 ==================== Using CacheControl ==================== CacheControl assumes you are using a `requests.Session` for your requests. If you are making ad-hoc requests using `requests.get` then you probably are not terribly concerned about caching. There are two way to use CacheControl, via the wrapper and the adapter. Wrapper ======= The easiest way to use CacheControl is to utilize the basic wrapper. Here is an example: :: import requests import cachecontrol sess = cachecontrol.CacheControl(requests.Session()) resp = sess.get('http://google.com') This uses the default cache store, a thread safe in-memory dictionary. Adapter ======= The other way to use CacheControl is via a requests `Transport Adapter`_. Here is how the adapter works: :: import requests import cachecontrol sess = requests.Session() sess.mount('http://', cachecontrol.CacheControlAdapter()) resp = sess.get('http://google.com') Under the hood, the wrapper method of using CacheControl mentioned above is the same as this example. Use a Different Cache Store =========================== Both the wrapper and adapter classes allow providing a custom cache store object for storing your cached data. Here is an example using the provided `FileCache` from CacheControl: :: import requests from cachecontrol import CacheControl # NOTE: This requires filelock be installed from cachecontrol.caches import FileCache sess = CacheControl(requests.Session(), cache=FileCache('.webcache')) The `FileCache` will create a directory called `.webcache` and store a file for each cached request. .. _Transport Adapter: http://docs.python-requests.org/en/latest/user/advanced/#transport-adapters cachecontrol-0.14.0/examples/000077500000000000000000000000001455705413400160665ustar00rootroot00000000000000cachecontrol-0.14.0/examples/benchmark.py000066400000000000000000000026461455705413400204020ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 import sys import requests import argparse from multiprocessing import Process from datetime import datetime from wsgiref.simple_server import make_server from cachecontrol import CacheControl HOST = "localhost" PORT = 8050 URL = f"http://{HOST}:{PORT}/" class Server: def __call__(self, env, sr): body = "Hello World!" status = "200 OK" headers = [ ("Cache-Control", "max-age=%i" % (60 * 10)), ("Content-Type", "text/plain"), ] sr(status, headers) return body def start_server(): httpd = make_server(HOST, PORT, Server()) httpd.serve_forever() def run_benchmark(sess): proc = Process(target=start_server) proc.start() start = datetime.now() for i in range(0, 1000): sess.get(URL) sys.stdout.write(".") end = datetime.now() print() total = end - start print("Total time for 1000 requests: %s" % total) proc.terminate() def run(): parser = argparse.ArgumentParser() parser.add_argument( "-n", "--no-cache", default=False, action="store_true", help="Do not use cachecontrol", ) args = parser.parse_args() sess = requests.Session() if not args.no_cache: sess = CacheControl(sess) run_benchmark(sess) if __name__ == "__main__": run() cachecontrol-0.14.0/pyproject.toml000066400000000000000000000040061455705413400171640ustar00rootroot00000000000000[build-system] requires = ["flit_core >=3.2,<4"] build-backend = "flit_core.buildapi" [tool.flit.module] name = "cachecontrol" [tool.flit.sdist] include = ["tests/"] [project] name = "CacheControl" dynamic = ["version"] description = "httplib2 caching for requests" readme = "README.rst" license = { file = "LICENSE.txt" } authors = [ { name = "Eric Larson", email = "ericlarson@ionrock.com" }, { name = "Frost Ming", email = "me@frostming.com" }, { name = "William Woodruff", email = "william@yossarian.net" }, ] classifiers = [ "Development Status :: 4 - Beta", "Environment :: Web Environment", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Internet :: WWW/HTTP", ] keywords = ["requests", "http", "caching", "web"] dependencies = ["requests >= 2.16.0", "msgpack >= 0.5.2, < 2.0.0"] requires-python = ">=3.7" [project.urls] Homepage = "https://pypi.org/project/CacheControl/" Issues = "https://github.com/psf/cachecontrol/issues" Source = "https://github.com/psf/cachecontrol" [project.optional-dependencies] # End-user extras. filecache = ["filelock >= 3.8.0"] redis = ["redis>=2.10.5"] # Development extras. dev = [ "CacheControl[filecache,redis]", "build", "mypy", "tox", "pytest-cov", "pytest", "cherrypy", "sphinx", "furo", "sphinx-copybutton", "black", "types-redis", "types-requests", ] [project.scripts] doesitcache = "cachecontrol._cmd:main" [tool.mypy] show_error_codes = true strict = true enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] [[tool.mypy.overrides]] module = "msgpack" ignore_missing_imports = true [tool.pytest.ini_options] norecursedirs = ["bin", "lib", "include", "build"] cachecontrol-0.14.0/tests/000077500000000000000000000000001455705413400154125ustar00rootroot00000000000000cachecontrol-0.14.0/tests/__init__.py000066400000000000000000000000361455705413400175220ustar00rootroot00000000000000"""Tests for CacheControl.""" cachecontrol-0.14.0/tests/conftest.py000066400000000000000000000111651455705413400176150ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from pprint import pformat import os import socket import pytest import cherrypy class SimpleApp: def __init__(self): self.etag_count = 0 self.update_etag_string() def dispatch(self, env): path = env["PATH_INFO"][1:].split("/") segment = path.pop(0) if segment and hasattr(self, segment): return getattr(self, segment) return None def optional_cacheable_request(self, env, start_response): """A request with no hints as to whether it should be cached. Yet, we might still choose to cache it via a heuristic.""" headers = [ ("server", "nginx/1.2.6 (Ubuntu)"), ("last-modified", "Mon, 21 Jul 2014 17:45:39 GMT"), ("content-type", "text/html"), ] start_response("200 OK", headers) return [pformat(env).encode("utf8")] def vary_accept(self, env, start_response): response = pformat(env).encode("utf8") headers = [ ("Cache-Control", "max-age=5000"), ("Content-Type", "text/plain"), ("Vary", "Accept-Encoding, Accept"), ] start_response("200 OK", headers) return [response] def update_etag_string(self): self.etag_count += 1 self.etag_string = f'"ETAG-{self.etag_count}"' def update_etag(self, env, start_response): self.update_etag_string() headers = [("Cache-Control", "max-age=5000"), ("Content-Type", "text/plain")] start_response("200 OK", headers) return [pformat(env).encode("utf8")] def conditional_get(self, env, start_response): return start_response("304 Not Modified", []) def etag(self, env, start_response): headers = [("Etag", self.etag_string)] if env.get("HTTP_IF_NONE_MATCH") == self.etag_string: start_response("304 Not Modified", headers) return [] else: start_response("200 OK", headers) return [pformat(env).encode("utf8")] def cache_60(self, env, start_response): headers = [("Cache-Control", "public, max-age=60")] start_response("200 OK", headers) return [pformat(env).encode("utf8")] def no_cache(self, env, start_response): headers = [("Cache-Control", "no-cache")] start_response("200 OK", headers) return [pformat(env).encode("utf8")] def permanent_redirect(self, env, start_response): headers = [("Location", "/permalink")] start_response("301 Moved Permanently", headers) return [b"See: /permalink"] def permalink(self, env, start_response): start_response("200 OK", [("Content-Type", "text/plain")]) return [b"The permanent resource"] def multiple_choices(self, env, start_response): headers = [("Link", "/permalink")] start_response("300 Multiple Choices", headers) return [b"See: /permalink"] def stream(self, env, start_response): headers = [("Content-Type", "text/plain"), ("Cache-Control", "max-age=5000")] start_response("200 OK", headers) for i in range(10): yield pformat(i).encode("utf8") def fixed_length(self, env, start_response): body = b"0123456789" headers = [ ("Content-Type", "text/plain"), ("Cache-Control", "max-age=5000"), ("Content-Length", str(len(body))), ] start_response("200 OK", headers) return [body] def __call__(self, env, start_response): func = self.dispatch(env) if func: return func(env, start_response) headers = [("Cache-Control", "max-age=5000"), ("Content-Type", "text/plain")] start_response("200 OK", headers) return [pformat(env).encode("utf8")] @pytest.fixture(scope="session") def server(): return cherrypy.server @pytest.fixture() def url(server): return "http://%s:%s/" % server.bind_addr def get_free_port(): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(("", 0)) ip, port = s.getsockname() s.close() ip = os.environ.get("WEBTEST_SERVER_BIND", "127.0.0.1") return ip, port def pytest_configure(config): cherrypy.tree.graft(SimpleApp(), "/") ip, port = get_free_port() cherrypy.config.update({"server.socket_host": ip, "server.socket_port": port}) # turn off logging logger = cherrypy.log.access_log logger.removeHandler(logger.handlers[0]) cherrypy.server.start() def pytest_unconfigure(config): try: cherrypy.server.stop() except: pass cachecontrol-0.14.0/tests/issue_263.py000066400000000000000000000021661455705413400175130ustar00rootroot00000000000000#!/usr/bin/env python3 import sys import cachecontrol import requests from cachecontrol.cache import DictCache from cachecontrol.heuristics import BaseHeuristic import logging clogger = logging.getLogger("cachecontrol") clogger.addHandler(logging.StreamHandler()) clogger.setLevel(logging.DEBUG) from pprint import pprint class NoAgeHeuristic(BaseHeuristic): def update_headers(self, response): if "cache-control" in response.headers: del response.headers["cache-control"] cache_adapter = cachecontrol.CacheControlAdapter( DictCache(), cache_etags=True, heuristic=NoAgeHeuristic() ) session = requests.Session() session.mount("https://", cache_adapter) def log_resp(resp): return print(f"{resp.status_code} {resp.request.method}") for k, v in response.headers.items(): print(f"{k}: {v}") for i in range(2): response = session.get( "https://api.github.com/repos/sigmavirus24/github3.py/pulls/1033" ) log_resp(response) print(f"Content length: {len(response.content)}") print(response.from_cache) if len(response.content) == 0: sys.exit(1) cachecontrol-0.14.0/tests/test_adapter.py000066400000000000000000000032141455705413400204430ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from unittest import mock import pytest from requests import Session from cachecontrol.adapter import CacheControlAdapter from cachecontrol.cache import DictCache from cachecontrol.wrapper import CacheControl def use_wrapper(): print("Using helper") sess = CacheControl(Session()) return sess def use_adapter(): print("Using adapter") sess = Session() sess.mount("http://", CacheControlAdapter()) return sess @pytest.fixture(params=[use_adapter, use_wrapper]) def sess(url, request): sess = request.param() sess.get(url) yield sess # closing session object sess.close() class TestSessionActions: def test_get_caches(self, url, sess): r2 = sess.get(url) assert r2.from_cache is True def test_get_with_no_cache_does_not_cache(self, url, sess): r2 = sess.get(url, headers={"Cache-Control": "no-cache"}) assert not r2.from_cache def test_put_invalidates_cache(self, url, sess): r2 = sess.put(url, data={"foo": "bar"}) sess.get(url) assert not r2.from_cache def test_patch_invalidates_cache(self, url, sess): r2 = sess.patch(url, data={"foo": "bar"}) sess.get(url) assert not r2.from_cache def test_delete_invalidates_cache(self, url, sess): r2 = sess.delete(url) sess.get(url) assert not r2.from_cache def test_close(self): cache = mock.Mock(spec=DictCache) sess = Session() sess.mount("http://", CacheControlAdapter(cache)) sess.close() assert cache.close.called cachecontrol-0.14.0/tests/test_cache_control.py000066400000000000000000000252041455705413400216310ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 """ Unit tests that verify our caching methods work correctly. """ import time from tempfile import mkdtemp from unittest.mock import ANY, Mock import pytest from cachecontrol import CacheController from cachecontrol.cache import DictCache from cachecontrol.caches import SeparateBodyFileCache from .utils import DummyRequest, DummyResponse, NullSerializer TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT" class TestCacheControllerResponse: url = "http://url.com/" def req(self, headers=None): headers = headers or {} return Mock(full_url=self.url, url=self.url, headers=headers) # < 1.x support def resp(self, headers=None): headers = headers or {} return Mock( status=200, headers=headers, request=self.req(), read=lambda **k: b"testing" ) @pytest.fixture() def cc(self): # Cache controller fixture return CacheController(Mock(), serializer=Mock()) def test_no_cache_non_20x_response(self, cc): # No caching without some extra headers, so we add them now = time.strftime(TIME_FMT, time.gmtime()) resp = self.resp({"cache-control": "max-age=3600", "date": now}) no_cache_codes = [201, 400, 500] for code in no_cache_codes: resp.status = code cc.cache_response(Mock(), resp) assert not cc.cache.set.called # this should work b/c the resp is 20x resp.status = 203 cc.cache_response(self.req(), resp) assert cc.serializer.dumps.called assert cc.cache.set.called def test_no_cache_with_no_date(self, cc): # No date header which makes our max-age pointless resp = self.resp({"cache-control": "max-age=3600"}) cc.cache_response(self.req(), resp) assert not cc.cache.set.called def test_no_cache_with_wrong_sized_body(self, cc): # When the body is the wrong size, then we don't want to cache it # because it is obviously broken. resp = self.resp({"cache-control": "max-age=3600", "Content-Length": "5"}) cc.cache_response(self.req(), resp, b"0" * 10) assert not cc.cache.set.called def test_cache_response_no_cache_control(self, cc): resp = self.resp() cc.cache_response(self.req(), resp) assert not cc.cache.set.called def test_cache_response_cache_max_age(self, cc): now = time.strftime(TIME_FMT, time.gmtime()) resp = self.resp({"cache-control": "max-age=3600", "date": now}) req = self.req() cc.cache_response(req, resp) cc.serializer.dumps.assert_called_with(req, resp, None) cc.cache.set.assert_called_with(self.url, ANY, expires=3600) def test_cache_response_cache_max_age_with_invalid_value_not_cached(self, cc): now = time.strftime(TIME_FMT, time.gmtime()) # Not a valid header; this would be from a misconfigured server resp = self.resp({"cache-control": "max-age=3600; public", "date": now}) cc.cache_response(self.req(), resp) assert not cc.cache.set.called def test_cache_response_no_store(self): resp = Mock() cache = DictCache({self.url: resp}) cc = CacheController(cache) cache_url = cc.cache_url(self.url) resp = self.resp({"cache-control": "no-store"}) assert cc.cache.get(cache_url) cc.cache_response(self.req(), resp) assert not cc.cache.get(cache_url) def test_cache_response_no_store_with_etag(self, cc): resp = self.resp({"cache-control": "no-store", "ETag": "jfd9094r808"}) cc.cache_response(self.req(), resp) assert not cc.cache.set.called def test_no_cache_with_vary_star(self, cc): # Vary: * indicates that the response can never be served # from the cache, so storing it can be avoided. resp = self.resp({"vary": "*"}) cc.cache_response(self.req(), resp) assert not cc.cache.set.called def test_update_cached_response_no_local_cache(self): """ If the local cache doesn't have the given URL, just reuse the response passed to ``update_cached_response()`` """ cache = DictCache({}) cc = CacheController(cache) req = DummyRequest(url="http://localhost/", headers={"if-match": "xyz"}) resp = DummyResponse( status=304, headers={ "ETag": "xyz", "x-value": "b", "Date": time.strftime(TIME_FMT, time.gmtime()), "Cache-Control": "max-age=60", "Content-Length": "200", }, ) # First, ensure the response from update_cached_response() matches the # cached one: result = cc.update_cached_response(req, resp) assert result is resp def test_update_cached_response_with_valid_headers_separate_body(self): """ If the local cache has the given URL ``update_cached_response()`` will: 1. Load the body from the cache. 2. Update the stored headers to match the returned response. This is the version for a cache that stores a separate body. """ cache = SeparateBodyFileCache(mkdtemp()) self.update_cached_response_with_valid_headers_test(cache) def test_update_cached_response_with_valid_headers(self): """ If the local cache has the given URL ``update_cached_response()`` will: 1. Load the body from the cache. 2. Update the stored headers to match the returned response. This is the version for non-separate body. """ cache = DictCache({}) self.update_cached_response_with_valid_headers_test(cache) def update_cached_response_with_valid_headers_test(self, cache): """ If the local cache has the given URL ``update_cached_response()`` will: 1. Load the body from the cache. 2. Update the stored headers to match the returned response. This is the shared utility for any cache object. """ # Cache starts out prepopulated wih an entry: etag = "jfd9094r808" cc = CacheController(cache) url = "http://localhost:123/x" req = DummyRequest(url=url, headers={}) cached_resp = DummyResponse( status=200, headers={ "ETag": etag, "x-value:": "a", "Content-Length": "100", "Cache-Control": "max-age=60", "Date": time.strftime(TIME_FMT, time.gmtime()), }, ) cc._cache_set(url, req, cached_resp, b"my body") # Now we get another request, and it's a 304, with new value for # `x-value` header. # Set our content length to 200. That would be a mistake in # the server, but we'll handle it gracefully... for now. req = DummyRequest(url=url, headers={"if-match": etag}) resp = DummyResponse( status=304, headers={ "ETag": etag, "x-value": "b", "Date": time.strftime(TIME_FMT, time.gmtime()), "Cache-Control": "max-age=60", "Content-Length": "200", }, ) # First, ensure the response from update_cached_response() matches the # cached one: result = cc.update_cached_response(req, resp) # Second, ensure that the cache was updated: result2 = cc.cached_request(req) for r in [result, result2]: assert r.headers["ETag"] == etag assert r.headers["x-value"] == "b" assert r.headers["Content-Length"] == "100" assert r.read() == b"my body" class TestCacheControlRequest: url = "http://foo.com/bar" def setup_method(self): self.c = CacheController(DictCache(), serializer=NullSerializer()) def req(self, headers): mock_request = Mock(url=self.url, headers=headers) return self.c.cached_request(mock_request) def test_cache_request_no_headers(self): cached_resp = Mock( headers={"ETag": "jfd9094r808", "Content-Length": 100}, status=200 ) self.c.cache = DictCache({self.url: cached_resp}) resp = self.req({}) assert not resp def test_cache_request_no_cache(self): resp = self.req({"cache-control": "no-cache"}) assert not resp def test_cache_request_pragma_no_cache(self): resp = self.req({"pragma": "no-cache"}) assert not resp def test_cache_request_no_store(self): resp = self.req({"cache-control": "no-store"}) assert not resp def test_cache_request_max_age_0(self): resp = self.req({"cache-control": "max-age=0"}) assert not resp def test_cache_request_not_in_cache(self): resp = self.req({}) assert not resp def test_cache_request_fresh_max_age(self): now = time.strftime(TIME_FMT, time.gmtime()) resp = Mock(headers={"cache-control": "max-age=3600", "date": now}, status=200) cache = DictCache({self.url: resp}) self.c.cache = cache r = self.req({}) assert r == resp def test_cache_request_unfresh_max_age(self): earlier = time.time() - 3700 # epoch - 1h01m40s now = time.strftime(TIME_FMT, time.gmtime(earlier)) resp = Mock(headers={"cache-control": "max-age=3600", "date": now}, status=200) self.c.cache = DictCache({self.url: resp}) r = self.req({}) assert not r def test_cache_request_fresh_expires(self): later = time.time() + 86400 # GMT + 1 day expires = time.strftime(TIME_FMT, time.gmtime(later)) now = time.strftime(TIME_FMT, time.gmtime()) resp = Mock(headers={"expires": expires, "date": now}, status=200) cache = DictCache({self.url: resp}) self.c.cache = cache r = self.req({}) assert r == resp def test_cache_request_unfresh_expires(self): sooner = time.time() - 86400 # GMT - 1 day expires = time.strftime(TIME_FMT, time.gmtime(sooner)) now = time.strftime(TIME_FMT, time.gmtime()) resp = Mock(headers={"expires": expires, "date": now}, status=200) cache = DictCache({self.url: resp}) self.c.cache = cache r = self.req({}) assert not r def test_cached_request_with_bad_max_age_headers_not_returned(self): now = time.strftime(TIME_FMT, time.gmtime()) # Not a valid header; this would be from a misconfigured server resp = Mock(headers={"cache-control": "max-age=xxx", "date": now}, status=200) self.c.cache = DictCache({self.url: resp}) assert not self.req({}) cachecontrol-0.14.0/tests/test_chunked_response.py000066400000000000000000000026611455705413400223670ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 """ Test for supporting streamed responses (Transfer-Encoding: chunked) """ import pytest import requests from cachecontrol import CacheControl @pytest.fixture() def sess(): sess = CacheControl(requests.Session()) yield sess # closing session object sess.close() class TestChunkedResponses: def test_cache_chunked_response(self, url, sess): """ Verify that an otherwise cacheable response is cached when the response is chunked. """ url = url + "stream" r = sess.get(url) from pprint import pprint pprint(dict(r.headers)) pprint(dict(r.request.headers)) print(r.content) assert r.headers.get("transfer-encoding") == "chunked" r = sess.get(url, headers={"Cache-Control": "max-age=3600"}) assert r.from_cache is True def test_stream_is_cached(self, url, sess): resp_1 = sess.get(url + "stream") content_1 = resp_1.content resp_2 = sess.get(url + "stream") content_2 = resp_1.content assert not resp_1.from_cache assert resp_2.from_cache assert content_1 == content_2 def test_stream_is_not_cached_when_content_is_not_read(self, url, sess): sess.get(url + "stream", stream=True) resp = sess.get(url + "stream", stream=True) assert not resp.from_cache cachecontrol-0.14.0/tests/test_etag.py000066400000000000000000000147021455705413400177470ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from contextlib import ExitStack, suppress from unittest.mock import Mock, patch from urllib.parse import urljoin import pytest import requests from cachecontrol import CacheControl from cachecontrol.cache import DictCache from tests.utils import NullSerializer class TestETag: """Test our equal priority caching with ETags Equal Priority Caching is a term I've defined to describe when ETags are cached orthgonally from Time Based Caching. """ @pytest.fixture() def sess(self, url): self.etag_url = urljoin(url, "/etag") self.update_etag_url = urljoin(url, "/update_etag") self.cache = DictCache() sess = CacheControl( requests.Session(), cache=self.cache, serializer=NullSerializer() ) yield sess # closing session object sess.close() def test_etags_get_example(self, sess, server): """RFC 2616 14.26 The If-None-Match request-header field is used with a method to make it conditional. A client that has one or more entities previously obtained from the resource can verify that none of those entities is current by including a list of their associated entity tags in the If-None-Match header field. The purpose of this feature is to allow efficient updates of cached information with a minimum amount of transaction overhead If any of the entity tags match the entity tag of the entity that would have been returned in the response to a similar GET request (without the If-None-Match header) on that resource, [...] then the server MUST NOT perform the requested method, [...]. Instead, if the request method was GET or HEAD, the server SHOULD respond with a 304 (Not Modified) response, including the cache-related header fields (particularly ETag) of one of the entities that matched. (Paraphrased) A server may provide an ETag header on a response. On subsequent queries, the client may reference the value of this Etag header in an If-None-Match header; on receiving such a header, the server can check whether the entity at that URL has changed from the clients last version, and if not, it can return a 304 to indicate the client can use it's current representation. """ r = sess.get(self.etag_url) # make sure we cached it assert self.cache.get(self.etag_url) == r.raw # make the same request resp = sess.get(self.etag_url) assert resp.raw == r.raw assert resp.from_cache # tell the server to change the etags of the response sess.get(self.update_etag_url) resp = sess.get(self.etag_url) assert resp != r assert not resp.from_cache # Make sure we updated our cache with the new etag'd response. assert self.cache.get(self.etag_url) == resp.raw def test_etags_get_no_cache(self, sess, server): """A 'Cache-Control: no-cache' header stops us from using the cache directly, but not from using the 'If-None-Match' header on the request.""" # get our response r = sess.get(self.etag_url) assert "if-none-match" not in r.request.headers r = sess.get(self.etag_url, headers={"Cache-Control": "no-cache"}) assert "if-none-match" in r.request.headers assert r.status_code == 200 # This response does come from the cache, but only after the 304 response from # the server told us that was fine. assert r.from_cache def test_etags_get_with_range(self, sess, server): """A 'Range' header stops us from using the cache altogether.""" # get our response r = sess.get(self.etag_url) r = sess.get(self.etag_url, headers={"Range": "0-10"}) assert "if-none-match" not in r.request.headers assert r.status_code == 200 assert not r.from_cache class TestDisabledETags: """Test our use of ETags when the response is stale and the response has an ETag. """ @pytest.fixture() def sess(self, server, url): self.etag_url = urljoin(url, "/etag") self.update_etag_url = urljoin(url, "/update_etag") self.cache = DictCache() sess = CacheControl( requests.Session(), cache=self.cache, cache_etags=False, serializer=NullSerializer(), ) return sess def test_expired_etags_if_none_match_response(self, sess): """Make sure an expired response that contains an ETag uses the If-None-Match header. """ # get our response r = sess.get(self.etag_url) # expire our request by changing the date. Our test endpoint # doesn't provide time base caching headers, so we add them # here in order to expire the request. r.headers["Date"] = "Tue, 26 Nov 2012 00:50:49 GMT" self.cache.set(self.etag_url, r.raw) r = sess.get(self.etag_url) assert r.from_cache assert "if-none-match" in r.request.headers assert r.status_code == 200 class TestReleaseConnection: """ On 304s we still make a request using our connection pool, yet we do not call the parent adapter, which releases the connection back to the pool. This test ensures that when the parent `get` method is not called we consume the response (which should be empty according to the HTTP spec) and release the connection. """ def test_not_modified_releases_connection(self, server, url): sess = CacheControl(requests.Session()) etag_url = urljoin(url, "/etag") sess.get(etag_url) resp = Mock(status=304, headers={}) # These are various ways the the urllib3 response can created # in requests.adapters. Which one is actually used depends # on which version if `requests` is in use, as well as perhaps # other parameters. response_mods = [ "requests.adapters.HTTPResponse.from_httplib", "urllib3.HTTPConnectionPool.urlopen", ] with ExitStack() as stack: for mod in response_mods: with suppress(ImportError, AttributeError): stack.enter_context(patch(mod, Mock(return_value=resp))) sess.get(etag_url) assert resp.read.called assert resp.release_conn.called cachecontrol-0.14.0/tests/test_expires_heuristics.py000066400000000000000000000150171455705413400227500ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 import calendar import time from datetime import datetime, timezone from email.utils import formatdate, parsedate from pprint import pprint from unittest.mock import Mock from requests import Session, get from cachecontrol import CacheControl from cachecontrol.heuristics import ( TIME_FMT, BaseHeuristic, ExpiresAfter, LastModified, OneDayCache, ) from .utils import DummyResponse class TestHeuristicWithoutWarning: def setup_method(self): class NoopHeuristic(BaseHeuristic): warning = Mock() def update_headers(self, resp): return {} self.heuristic = NoopHeuristic() self.sess = CacheControl(Session(), heuristic=self.heuristic) def test_no_header_change_means_no_warning_header(self, url): the_url = url + "optional_cacheable_request" self.sess.get(the_url) assert not self.heuristic.warning.called class TestHeuristicWith3xxResponse: def setup_method(self): class DummyHeuristic(BaseHeuristic): def update_headers(self, resp): return {"x-dummy-header": "foobar"} self.sess = CacheControl(Session(), heuristic=DummyHeuristic()) def test_heuristic_applies_to_301(self, url): the_url = url + "permanent_redirect" resp = self.sess.get(the_url) assert "x-dummy-header" in resp.headers def test_heuristic_applies_to_304(self, url): the_url = url + "conditional_get" resp = self.sess.get(the_url) assert "x-dummy-header" in resp.headers class TestUseExpiresHeuristic: def test_expires_heuristic_arg(self): sess = Session() cached_sess = CacheControl(sess, heuristic=Mock()) assert cached_sess class TestOneDayCache: def setup_method(self): self.sess = Session() self.cached_sess = CacheControl(self.sess, heuristic=OneDayCache()) def test_cache_for_one_day(self, url): the_url = url + "optional_cacheable_request" r = self.sess.get(the_url) assert "expires" in r.headers assert "warning" in r.headers pprint(dict(r.headers)) r = self.sess.get(the_url) pprint(dict(r.headers)) assert r.from_cache class TestExpiresAfter: def setup_method(self): self.sess = Session() self.cache_sess = CacheControl(self.sess, heuristic=ExpiresAfter(days=1)) def test_expires_after_one_day(self, url): the_url = url + "no_cache" resp = get(the_url) assert resp.headers["cache-control"] == "no-cache" r = self.sess.get(the_url) assert "expires" in r.headers assert "warning" in r.headers assert r.headers["cache-control"] == "public" r = self.sess.get(the_url) assert r.from_cache class TestLastModified: def setup_method(self): self.sess = Session() self.cached_sess = CacheControl(self.sess, heuristic=LastModified()) def test_last_modified(self, url): the_url = url + "optional_cacheable_request" r = self.sess.get(the_url) assert "expires" in r.headers assert "warning" not in r.headers pprint(dict(r.headers)) r = self.sess.get(the_url) pprint(dict(r.headers)) assert r.from_cache def datetime_to_header(dt): return formatdate(calendar.timegm(dt.timetuple())) class TestModifiedUnitTests: def last_modified(self, period): return time.strftime(TIME_FMT, time.gmtime(self.time_now - period)) def setup_method(self): self.heuristic = LastModified() self.time_now = time.time() day_in_seconds = 86400 self.year_ago = self.last_modified(day_in_seconds * 365) self.week_ago = self.last_modified(day_in_seconds * 7) self.day_ago = self.last_modified(day_in_seconds) self.now = self.last_modified(0) # NOTE: We pass in a negative to get a positive... Probably # should refactor. self.day_ahead = self.last_modified(-day_in_seconds) def test_no_expiry_is_inferred_when_no_last_modified_is_present(self): assert self.heuristic.update_headers(DummyResponse(200, {})) == {} def test_expires_is_not_replaced_when_present(self): resp = DummyResponse(200, {"Expires": self.day_ahead}) assert self.heuristic.update_headers(resp) == {} def test_last_modified_is_used(self): resp = DummyResponse(200, {"Date": self.now, "Last-Modified": self.week_ago}) modified = self.heuristic.update_headers(resp) assert ["expires"] == list(modified.keys()) expected = datetime(*parsedate(modified["expires"])[:6], tzinfo=timezone.utc) assert expected > datetime.now(timezone.utc) def test_last_modified_is_not_used_when_cache_control_present(self): resp = DummyResponse( 200, { "Date": self.now, "Last-Modified": self.week_ago, "Cache-Control": "private", }, ) assert self.heuristic.update_headers(resp) == {} def test_last_modified_is_not_used_when_status_is_unknown(self): resp = DummyResponse(299, {"Date": self.now, "Last-Modified": self.week_ago}) assert self.heuristic.update_headers(resp) == {} def test_last_modified_is_used_when_cache_control_public(self): resp = DummyResponse( 200, { "Date": self.now, "Last-Modified": self.week_ago, "Cache-Control": "public", }, ) modified = self.heuristic.update_headers(resp) assert ["expires"] == list(modified.keys()) expected = datetime(*parsedate(modified["expires"])[:6], tzinfo=timezone.utc) assert expected > datetime.now(timezone.utc) def test_warning_not_added_when_response_more_recent_than_24_hours(self): resp = DummyResponse(200, {"Date": self.now, "Last-Modified": self.week_ago}) assert self.heuristic.warning(resp) is None def test_warning_is_not_added_when_heuristic_was_not_used(self): resp = DummyResponse(200, {"Date": self.now, "Expires": self.day_ahead}) assert self.heuristic.warning(resp) is None def test_expiry_is_no_more_that_twenty_four_hours(self): resp = DummyResponse(200, {"Date": self.now, "Last-Modified": self.year_ago}) modified = self.heuristic.update_headers(resp) assert ["expires"] == list(modified.keys()) assert self.day_ahead == modified["expires"] cachecontrol-0.14.0/tests/test_max_age.py000066400000000000000000000033361455705413400204310ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 import pytest from requests import Session from cachecontrol.adapter import CacheControlAdapter from cachecontrol.cache import DictCache from .utils import NullSerializer class TestMaxAge: @pytest.fixture() def sess(self, url): self.url = url self.cache = DictCache() sess = Session() sess.mount( "http://", CacheControlAdapter(self.cache, serializer=NullSerializer()) ) return sess def test_client_max_age_0(self, sess): """ Making sure when the client uses max-age=0 we don't get a cached copy even though we're still fresh. """ print("first request") r = sess.get(self.url) assert self.cache.get(self.url) == r.raw print("second request") r = sess.get(self.url, headers={"Cache-Control": "max-age=0"}) # don't remove from the cache assert self.cache.get(self.url) assert not r.from_cache def test_client_max_age_3600(self, sess): """ Verify we get a cached value when the client has a reasonable max-age value. """ r = sess.get(self.url) assert self.cache.get(self.url) == r.raw # request that we don't want a new one unless r = sess.get(self.url, headers={"Cache-Control": "max-age=3600"}) assert r.from_cache is True # now lets grab one that forces a new request b/c the cache # has expired. To do that we'll inject a new time value. resp = self.cache.get(self.url) resp.headers["date"] = "Tue, 15 Nov 1994 08:12:31 GMT" r = sess.get(self.url) assert not r.from_cache cachecontrol-0.14.0/tests/test_redirects.py000066400000000000000000000030601455705413400210060ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 """ Test for supporting redirect caches as needed. """ import requests from cachecontrol import CacheControl class TestPermanentRedirects: def setup_method(self): self.sess = CacheControl(requests.Session()) def test_redirect_response_is_cached(self, url): self.sess.get(url + "permanent_redirect", allow_redirects=False) resp = self.sess.get(url + "permanent_redirect", allow_redirects=False) assert resp.from_cache def test_bust_cache_on_redirect(self, url): self.sess.get(url + "permanent_redirect", allow_redirects=False) resp = self.sess.get( url + "permanent_redirect", headers={"cache-control": "no-cache"}, allow_redirects=False, ) assert not resp.from_cache class TestMultipleChoicesRedirects: def setup_method(self): self.sess = CacheControl(requests.Session()) def test_multiple_choices_is_cacheable(self, url): self.sess.get(url + "multiple_choices_redirect", allow_redirects=False) resp = self.sess.get(url + "multiple_choices_redirect", allow_redirects=False) assert resp.from_cache def test_bust_cache_on_redirect(self, url): self.sess.get(url + "multiple_choices_redirect", allow_redirects=False) resp = self.sess.get( url + "multiple_choices_redirect", headers={"cache-control": "no-cache"}, allow_redirects=False, ) assert not resp.from_cache cachecontrol-0.14.0/tests/test_regressions.py000066400000000000000000000015011455705413400213630ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 import pytest from cachecontrol import CacheControl from cachecontrol.caches import FileCache from cachecontrol.filewrapper import CallbackFileWrapper from requests import Session class Test39: def test_file_cache_recognizes_consumed_file_handle(self, url): s = CacheControl(Session(), FileCache("web_cache")) the_url = url + "cache_60" s.get(the_url) r = s.get(the_url) assert r.from_cache s.close() def test_getattr_during_gc(): s = CallbackFileWrapper(None, None) # normal behavior: with pytest.raises(AttributeError): s.x # this previously had caused an infinite recursion vars(s).clear() # gc does this. with pytest.raises(AttributeError): s.x cachecontrol-0.14.0/tests/test_serialization.py000066400000000000000000000070441455705413400217050ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 import pickle from unittest.mock import Mock import msgpack import requests from cachecontrol.serialize import Serializer class TestSerializer: def setup_method(self): self.serializer = Serializer() self.response_data = { "response": { # Encode the body as bytes b/c it will eventually be # converted back into a BytesIO object. "body": b"Hello World", "headers": { "Content-Type": "text/plain", "Expires": "87654", "Cache-Control": "public", }, "status": 200, "version": 11, "reason": "", "strict": True, "decode_content": True, } } def test_load_by_version_v0(self): data = b"cc=0,somedata" req = Mock() resp = self.serializer.loads(req, data) assert resp is None def test_load_by_version_v1(self): data = b"cc=1,somedata" req = Mock() resp = self.serializer.loads(req, data) assert resp is None def test_load_by_version_v2(self): data = b"cc=2,somedata" req = Mock() resp = self.serializer.loads(req, data) assert resp is None def test_load_by_version_v3(self): data = b"cc=3,somedata" req = Mock() resp = self.serializer.loads(req, data) assert resp is None def test_read_version_v4(self): req = Mock() resp = self.serializer._loads_v4(req, msgpack.dumps(self.response_data)) # We have to decode our urllib3 data back into a unicode string. assert resp.data == b"Hello World" def test_read_latest_version_streamable(self, url): original_resp = requests.get(url, stream=True) req = original_resp.request resp = self.serializer.loads( req, self.serializer.dumps(req, original_resp.raw, original_resp.content) ) assert resp.read() def test_read_latest_version(self, url): original_resp = requests.get(url) data = original_resp.content req = original_resp.request resp = self.serializer.loads( req, self.serializer.dumps(req, original_resp.raw, data) ) assert resp.read() == data def test_no_vary_header(self, url): original_resp = requests.get(url) data = original_resp.content req = original_resp.request # We make sure our response has a Vary header and that the # request doesn't have the header. original_resp.raw.headers["vary"] = "Foo" assert self.serializer.loads( req, self.serializer.dumps(req, original_resp.raw, data) ) def test_no_body_creates_response_file_handle_on_dumps(self, url): original_resp = requests.get(url, stream=True) data = None req = original_resp.request assert self.serializer.loads( req, self.serializer.dumps(req, original_resp.raw, data) ) # By passing in data=None it will force a read of the file # handle. Reading it again proves we're resetting the internal # file handle with a buffer. assert original_resp.raw.read() def test_no_incomplete_read_on_dumps(self, url): resp = requests.get(url + "fixed_length", stream=True) self.serializer.dumps(resp.request, resp.raw) assert resp.content == b"0123456789" cachecontrol-0.14.0/tests/test_server_http_version.py000066400000000000000000000003621455705413400231360ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 import requests def test_http11(url): resp = requests.get(url) # Making sure our test server speaks HTTP/1.1 assert resp.raw._fp.version == 11 cachecontrol-0.14.0/tests/test_storage_filecache.py000066400000000000000000000127701455705413400224610ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 """ Unit tests that verify FileCache storage works correctly. """ import os import string from random import randint, sample import pytest import requests from cachecontrol import CacheControl from cachecontrol.caches import FileCache, SeparateBodyFileCache from filelock import FileLock def randomdata(): """Plain random http data generator:""" key = "".join(sample(string.ascii_lowercase, randint(2, 4))) val = "".join(sample(string.ascii_lowercase + string.digits, randint(2, 10))) return f"&{key}={val}" class FileCacheTestsMixin: FileCacheClass = None # Either FileCache or SeparateBodyFileCache @pytest.fixture() def sess(self, url, tmpdir): self.url = url self.cache = self.FileCacheClass(str(tmpdir)) sess = CacheControl(requests.Session(), cache=self.cache) yield sess # closing session object sess.close() def test_filecache_from_cache(self, sess): response = sess.get(self.url) assert not response.from_cache response = sess.get(self.url) assert response.from_cache def test_filecache_directory_not_exists(self, tmpdir, sess): url = self.url + "".join(sample(string.ascii_lowercase, randint(2, 4))) # Make sure our cache dir doesn't exist tmp_cache = tmpdir.join("missing", "folder", "name").strpath assert not os.path.exists(tmp_cache) self.cache.directory = tmp_cache # trigger a cache save sess.get(url) # Now our cache dir does exist assert os.path.exists(tmp_cache) def test_filecache_directory_already_exists(self, tmpdir, sess): """ Assert no errors are raised when using a cache directory that already exists on the filesystem. """ url = self.url + "".join(sample(string.ascii_lowercase, randint(2, 4))) # Make sure our cache dir DOES exist tmp_cache = tmpdir.join("missing", "folder", "name").strpath os.makedirs(tmp_cache, self.cache.dirmode) assert os.path.exists(tmp_cache) self.cache.directory = tmp_cache # trigger a cache save sess.get(url) assert True # b/c no exceptions were raised def test_key_length(self, sess): """ Hash table keys: Most file systems have a 255 characters path limitation. * Make sure hash method does not produce too long keys * Ideally hash method generate fixed length keys """ url0 = url1 = "http://example.org/res?a=1" while len(url0) < 255: url0 += randomdata() url1 += randomdata() assert len(self.cache.encode(url0)) < 200 assert len(self.cache.encode(url0)) == len(self.cache.encode(url1)) def test_simple_lockfile_arg(self, tmpdir): cache = self.FileCacheClass(str(tmpdir)) assert issubclass(cache.lock_class, FileLock) cache.close() def test_lock_class(self, tmpdir): lock_class = object() cache = self.FileCacheClass(str(tmpdir), lock_class=lock_class) assert cache.lock_class is lock_class cache.close() def test_filecache_with_delete_request(self, tmpdir, sess): # verifies issue #155 url = self.url + "".join(sample(string.ascii_lowercase, randint(2, 4))) sess.delete(url) assert True # test verifies no exceptions were raised def test_filecache_with_put_request(self, tmpdir, sess): # verifies issue #155 url = self.url + "".join(sample(string.ascii_lowercase, randint(2, 4))) sess.put(url) assert True # test verifies no exceptions were raised class TestFileCache(FileCacheTestsMixin): """ Tests for ``FileCache``. """ FileCacheClass = FileCache def test_body_stored_inline(self, sess): """The body is stored together with the metadata.""" url = self.url + "cache_60" response = sess.get(url) body = response.content response2 = sess.get(url) assert response2.from_cache assert response2.content == body # OK now let's violate some abstraction boundaries to make sure body # was stored in metadata file. with open(self.cache._fn(url), "rb") as f: assert body in f.read() assert not os.path.exists(self.cache._fn(url) + ".body") class TestSeparateBodyFileCache(FileCacheTestsMixin): """ Tests for ``SeparateBodyFileCache`` """ FileCacheClass = SeparateBodyFileCache def test_body_actually_stored_separately(self, sess): """ Body is stored and can be retrieved from the SeparateBodyFileCache, with assurances it's actually being loaded from separate file than metadata. """ url = self.url + "cache_60" response = sess.get(url) body = response.content response2 = sess.get(url) assert response2.from_cache assert response2.content == body # OK now let's violate some abstraction boundaries to make sure body # actually came from separate file. with open(self.cache._fn(url), "rb") as f: assert body not in f.read() with open(self.cache._fn(url) + ".body", "rb") as f: assert body == f.read() with open(self.cache._fn(url) + ".body", "wb") as f: f.write(b"CORRUPTED") response2 = sess.get(url) assert response2.from_cache assert response2.content == b"CORRUPTED" cachecontrol-0.14.0/tests/test_storage_redis.py000066400000000000000000000014101455705413400216510ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from datetime import datetime, timezone from unittest.mock import Mock from cachecontrol.caches import RedisCache class TestRedisCache: def setup_method(self): self.conn = Mock() self.cache = RedisCache(self.conn) def test_set_expiration_datetime(self): self.cache.set("foo", "bar", expires=datetime(2014, 2, 2)) assert self.conn.setex.called def test_set_expiration_datetime_aware(self): self.cache.set("foo", "bar", expires=datetime(2014, 2, 2, tzinfo=timezone.utc)) assert self.conn.setex.called def test_set_expiration_int(self): self.cache.set("foo", "bar", expires=600) assert self.conn.setex.called cachecontrol-0.14.0/tests/test_vary.py000066400000000000000000000056041455705413400200110ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 from pprint import pprint from urllib.parse import urljoin import pytest import requests from cachecontrol import CacheControl from cachecontrol.cache import DictCache class TestVary: @pytest.fixture() def sess(self, url): self.url = urljoin(url, "/vary_accept") self.cache = DictCache() sess = CacheControl(requests.Session(), cache=self.cache) return sess def cached_equal(self, cached, resp): # remove any transfer-encoding headers as they don't apply to # a cached value if "chunked" in resp.raw.headers.get("transfer-encoding", ""): resp.raw.headers.pop("transfer-encoding") checks = [ cached._fp.getvalue() == resp.content, cached.headers == resp.raw.headers, cached.status == resp.raw.status, cached.version == resp.raw.version, cached.reason == resp.raw.reason, cached.decode_content == resp.raw.decode_content, ] print(checks) pprint(dict(cached.headers)) pprint(dict(resp.raw.headers)) return all(checks) def test_vary_example(self, sess): """RFC 2616 13.6 When the cache receives a subsequent request whose Request-URI specifies one or more cache entries including a Vary header field, the cache MUST NOT use such a cache entry to construct a response to the new request unless all of the selecting request-headers present in the new request match the corresponding stored request-headers in the original request. Or, in simpler terms, when you make a request and the server returns defines a Vary header, unless all the headers listed in the Vary header are the same, it won't use the cached value. """ s = sess.adapters["http://"].controller.serializer r = sess.get(self.url) c = s.loads(r.request, self.cache.get(self.url)) # make sure we cached it assert self.cached_equal(c, r) # make the same request resp = sess.get(self.url) assert self.cached_equal(c, resp) assert resp.from_cache # make a similar request, changing the accept header resp = sess.get(self.url, headers={"Accept": "text/plain, text/html"}) assert not self.cached_equal(c, resp) assert not resp.from_cache # Just confirming two things here: # # 1) The server used the vary header # 2) We have more than one header we vary on # # The reason for this is that when we don't specify the header # in the request, it is considered the same in terms of # whether or not to use the cached value. assert "vary" in r.headers assert len(r.headers["vary"].replace(" ", "").split(",")) == 2 cachecontrol-0.14.0/tests/utils.py000066400000000000000000000015231455705413400171250ustar00rootroot00000000000000""" Shared utility classes. """ from requests.structures import CaseInsensitiveDict from cachecontrol.serialize import Serializer class NullSerializer(Serializer): def dumps(self, request, response, body=None): return response def loads(self, request, data, body_file=None): if data and getattr(data, "chunked", False): data.chunked = False return data class DummyResponse: """Match a ``urllib3.response.HTTPResponse``.""" version = "1.1" reason = b"Because" strict = 0 decode_content = False def __init__(self, status, headers): self.status = status self.headers = CaseInsensitiveDict(headers) class DummyRequest: """Match a Request.""" def __init__(self, url, headers): self.url = url self.headers = CaseInsensitiveDict(headers) cachecontrol-0.14.0/tox.ini000066400000000000000000000010311455705413400155560ustar00rootroot00000000000000; SPDX-FileCopyrightText: 2015 Eric Larson ; ; SPDX-License-Identifier: Apache-2.0 [tox] isolated_build = True envlist = py{37,38,39,310,311,312}, mypy [gh-actions] python = 3.7: py37 3.8: py38 3.9: py39 3.10: py310 3.11: py311 3.12: py312, mypy [testenv] deps = pytest cherrypy redis>=2.10.5 filelock>=3.8.0 commands = py.test {posargs:tests/} [testenv:mypy] deps = {[testenv]deps} mypy types-redis types-requests commands = mypy {posargs:cachecontrol}