CacheControl-0.11.7/0000755000076500000240000000000012765672030014221 5ustar ericstaff00000000000000CacheControl-0.11.7/cachecontrol/0000755000076500000240000000000012765672030016665 5ustar ericstaff00000000000000CacheControl-0.11.7/cachecontrol/__init__.py0000644000076500000240000000045612765672004021004 0ustar ericstaff00000000000000"""CacheControl import Interface. Make it easy to import from cachecontrol without long namespaces. """ __author__ = 'Eric Larson' __email__ = 'eric@ionrock.org' __version__ = '0.11.7' from .wrapper import CacheControl from .adapter import CacheControlAdapter from .controller import CacheController CacheControl-0.11.7/cachecontrol/_cmd.py0000644000076500000240000000236312737301617020144 0ustar ericstaff00000000000000import logging import requests from cachecontrol.adapter import CacheControlAdapter from cachecontrol.cache import DictCache from cachecontrol.controller import logger from argparse import ArgumentParser def setup_logging(): logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() logger.addHandler(handler) def get_session(): adapter = CacheControlAdapter( DictCache(), cache_etags=True, serializer=None, heuristic=None, ) sess = requests.Session() sess.mount('http://', adapter) sess.mount('https://', adapter) sess.cache_controller = adapter.controller return sess def get_args(): parser = ArgumentParser() parser.add_argument('url', help='The URL to try and cache') return parser.parse_args() def main(args=None): args = get_args() sess = get_session() # Make a request to get a response resp = sess.get(args.url) # Turn on logging setup_logging() # try setting the cache sess.cache_controller.cache_response(resp.request, resp.raw) # Now try to get it if sess.cache_controller.cached_request(resp.request): print('Cached!') else: print('Not cached :(') if __name__ == '__main__': main() CacheControl-0.11.7/cachecontrol/adapter.py0000644000076500000240000001076412765671617020701 0ustar ericstaff00000000000000import types import functools from requests.adapters import HTTPAdapter from .controller import CacheController from .cache import DictCache from .filewrapper import CallbackFileWrapper class CacheControlAdapter(HTTPAdapter): invalidating_methods = set(['PUT', 'DELETE']) def __init__(self, cache=None, cache_etags=True, controller_class=None, serializer=None, heuristic=None, *args, **kw): super(CacheControlAdapter, self).__init__(*args, **kw) self.cache = cache or DictCache() self.heuristic = heuristic controller_factory = controller_class or CacheController self.controller = controller_factory( self.cache, cache_etags=cache_etags, serializer=serializer, ) def send(self, request, **kw): """ Send a request. Use the request information to see if it exists in the cache and cache the response if we need to and can. """ if request.method == 'GET': cached_response = self.controller.cached_request(request) if cached_response: return self.build_response(request, cached_response, from_cache=True) # check for etags and add headers if appropriate request.headers.update( self.controller.conditional_headers(request) ) resp = super(CacheControlAdapter, self).send(request, **kw) return resp def build_response(self, request, response, from_cache=False): """ Build a response by making a request or using the cache. This will end up calling send and returning a potentially cached response """ if not from_cache and request.method == 'GET': # Check for any heuristics that might update headers # before trying to cache. if self.heuristic: response = self.heuristic.apply(response) # apply any expiration heuristics if response.status == 304: # We must have sent an ETag request. This could mean # that we've been expired already or that we simply # have an etag. In either case, we want to try and # update the cache if that is the case. cached_response = self.controller.update_cached_response( request, response ) if cached_response is not response: from_cache = True # We are done with the server response, read a # possible response body (compliant servers will # not return one, but we cannot be 100% sure) and # release the connection back to the pool. response.read(decode_content=False) response.release_conn() response = cached_response # We always cache the 301 responses elif response.status == 301: self.controller.cache_response(request, response) else: # Wrap the response file with a wrapper that will cache the # response when the stream has been consumed. response._fp = CallbackFileWrapper( response._fp, functools.partial( self.controller.cache_response, request, response, ) ) if response.chunked: super_update_chunk_length = response._update_chunk_length def _update_chunk_length(self): super_update_chunk_length() if self.chunk_left == 0: self._fp._close() response._update_chunk_length = types.MethodType(_update_chunk_length, response) resp = super(CacheControlAdapter, self).build_response( request, response ) # See if we should invalidate the cache. if request.method in self.invalidating_methods and resp.ok: cache_url = self.controller.cache_url(request.url) self.cache.delete(cache_url) # Give the request a from_cache attr to let people use it resp.from_cache = from_cache return resp def close(self): self.cache.close() super(CacheControlAdapter, self).close() CacheControl-0.11.7/cachecontrol/cache.py0000644000076500000240000000142612633360076020303 0ustar ericstaff00000000000000""" The cache object API for implementing caches. The default is a thread safe in-memory dictionary. """ from threading import Lock class BaseCache(object): def get(self, key): raise NotImplemented() def set(self, key, value): raise NotImplemented() def delete(self, key): raise NotImplemented() def close(self): pass class DictCache(BaseCache): def __init__(self, init_dict=None): self.lock = Lock() self.data = init_dict or {} def get(self, key): return self.data.get(key, None) def set(self, key, value): with self.lock: self.data.update({key: value}) def delete(self, key): with self.lock: if key in self.data: self.data.pop(key) CacheControl-0.11.7/cachecontrol/caches/0000755000076500000240000000000012765672030020113 5ustar ericstaff00000000000000CacheControl-0.11.7/cachecontrol/caches/__init__.py0000644000076500000240000000056112633360076022224 0ustar ericstaff00000000000000from textwrap import dedent try: from .file_cache import FileCache except ImportError: notice = dedent(''' NOTE: In order to use the FileCache you must have lockfile installed. You can install it via pip: pip install lockfile ''') print(notice) try: import redis from .redis_cache import RedisCache except ImportError: pass CacheControl-0.11.7/cachecontrol/caches/file_cache.py0000644000076500000240000000666412633360076022541 0ustar ericstaff00000000000000import hashlib import os from lockfile import LockFile from lockfile.mkdirlockfile import MkdirLockFile from ..cache import BaseCache from ..controller import CacheController def _secure_open_write(filename, fmode): # We only want to write to this file, so open it in write only mode flags = os.O_WRONLY # os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only # will open *new* files. # We specify this because we want to ensure that the mode we pass is the # mode of the file. flags |= os.O_CREAT | os.O_EXCL # Do not follow symlinks to prevent someone from making a symlink that # we follow and insecurely open a cache file. if hasattr(os, "O_NOFOLLOW"): flags |= os.O_NOFOLLOW # On Windows we'll mark this file as binary if hasattr(os, "O_BINARY"): flags |= os.O_BINARY # Before we open our file, we want to delete any existing file that is # there try: os.remove(filename) except (IOError, OSError): # The file must not exist already, so we can just skip ahead to opening pass # Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a # race condition happens between the os.remove and this line, that an # error will be raised. Because we utilize a lockfile this should only # happen if someone is attempting to attack us. fd = os.open(filename, flags, fmode) try: return os.fdopen(fd, "wb") except: # An error occurred wrapping our FD in a file object os.close(fd) raise class FileCache(BaseCache): def __init__(self, directory, forever=False, filemode=0o0600, dirmode=0o0700, use_dir_lock=None, lock_class=None): if use_dir_lock is not None and lock_class is not None: raise ValueError("Cannot use use_dir_lock and lock_class together") if use_dir_lock: lock_class = MkdirLockFile if lock_class is None: lock_class = LockFile self.directory = directory self.forever = forever self.filemode = filemode self.dirmode = dirmode self.lock_class = lock_class @staticmethod def encode(x): return hashlib.sha224(x.encode()).hexdigest() def _fn(self, name): # NOTE: This method should not change as some may depend on it. # See: https://github.com/ionrock/cachecontrol/issues/63 hashed = self.encode(name) parts = list(hashed[:5]) + [hashed] return os.path.join(self.directory, *parts) def get(self, key): name = self._fn(key) if not os.path.exists(name): return None with open(name, 'rb') as fh: return fh.read() def set(self, key, value): name = self._fn(key) # Make sure the directory exists try: os.makedirs(os.path.dirname(name), self.dirmode) except (IOError, OSError): pass with self.lock_class(name) as lock: # Write our actual file with _secure_open_write(lock.path, self.filemode) as fh: fh.write(value) def delete(self, key): name = self._fn(key) if not self.forever: os.remove(name) def url_to_file_path(url, filecache): """Return the file cache path based on the URL. This does not ensure the file exists! """ key = CacheController.cache_url(url) return filecache._fn(key) CacheControl-0.11.7/cachecontrol/caches/redis_cache.py0000644000076500000240000000171512633360076022720 0ustar ericstaff00000000000000from __future__ import division from datetime import datetime def total_seconds(td): """Python 2.6 compatability""" if hasattr(td, 'total_seconds'): return td.total_seconds() ms = td.microseconds secs = (td.seconds + td.days * 24 * 3600) return (ms + secs * 10**6) / 10**6 class RedisCache(object): def __init__(self, conn): self.conn = conn def get(self, key): return self.conn.get(key) def set(self, key, value, expires=None): if not expires: self.conn.set(key, value) else: expires = expires - datetime.now() self.conn.setex(key, total_seconds(expires), value) def delete(self, key): self.conn.delete(key) def clear(self): """Helper for clearing all the keys in a database. Use with caution!""" for key in self.conn.keys(): self.conn.delete(key) def close(self): self.conn.disconnect() CacheControl-0.11.7/cachecontrol/compat.py0000644000076500000240000000121512633360076020517 0ustar ericstaff00000000000000try: from urllib.parse import urljoin except ImportError: from urlparse import urljoin try: import cPickle as pickle except ImportError: import pickle # Handle the case where the requests module has been patched to not have # urllib3 bundled as part of its source. try: from requests.packages.urllib3.response import HTTPResponse except ImportError: from urllib3.response import HTTPResponse try: from requests.packages.urllib3.util import is_fp_closed except ImportError: from urllib3.util import is_fp_closed # Replicate some six behaviour try: text_type = (unicode,) except NameError: text_type = (str,) CacheControl-0.11.7/cachecontrol/controller.py0000644000076500000240000003132412674775674021446 0ustar ericstaff00000000000000""" The httplib2 algorithms ported for use with requests. """ import logging import re import calendar import time from email.utils import parsedate_tz from requests.structures import CaseInsensitiveDict from .cache import DictCache from .serialize import Serializer logger = logging.getLogger(__name__) URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") def parse_uri(uri): """Parses a URI using the regex given in Appendix B of RFC 3986. (scheme, authority, path, query, fragment) = parse_uri(uri) """ groups = URI.match(uri).groups() return (groups[1], groups[3], groups[4], groups[6], groups[8]) class CacheController(object): """An interface to see if request should cached or not. """ def __init__(self, cache=None, cache_etags=True, serializer=None): self.cache = cache or DictCache() self.cache_etags = cache_etags self.serializer = serializer or Serializer() @classmethod def _urlnorm(cls, uri): """Normalize the URL to create a safe key for the cache""" (scheme, authority, path, query, fragment) = parse_uri(uri) if not scheme or not authority: raise Exception("Only absolute URIs are allowed. uri = %s" % uri) scheme = scheme.lower() authority = authority.lower() if not path: path = "/" # Could do syntax based normalization of the URI before # computing the digest. See Section 6.2.2 of Std 66. request_uri = query and "?".join([path, query]) or path defrag_uri = scheme + "://" + authority + request_uri return defrag_uri @classmethod def cache_url(cls, uri): return cls._urlnorm(uri) def parse_cache_control(self, headers): """ Parse the cache control headers returning a dictionary with values for the different directives. """ retval = {} cc_header = 'cache-control' if 'Cache-Control' in headers: cc_header = 'Cache-Control' if cc_header in headers: parts = headers[cc_header].split(',') parts_with_args = [ tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=") ] parts_wo_args = [ (name.strip().lower(), 1) for name in parts if -1 == name.find("=") ] retval = dict(parts_with_args + parts_wo_args) return retval def cached_request(self, request): """ Return a cached response if it exists in the cache, otherwise return False. """ cache_url = self.cache_url(request.url) logger.debug('Looking up "%s" in the cache', cache_url) cc = self.parse_cache_control(request.headers) # Bail out if the request insists on fresh data if 'no-cache' in cc: logger.debug('Request header has "no-cache", cache bypassed') return False if 'max-age' in cc and cc['max-age'] == 0: logger.debug('Request header has "max_age" as 0, cache bypassed') return False # Request allows serving from the cache, let's see if we find something cache_data = self.cache.get(cache_url) if cache_data is None: logger.debug('No cache entry available') return False # Check whether it can be deserialized resp = self.serializer.loads(request, cache_data) if not resp: logger.warning('Cache entry deserialization failed, entry ignored') return False # If we have a cached 301, return it immediately. We don't # need to test our response for other headers b/c it is # intrinsically "cacheable" as it is Permanent. # See: # https://tools.ietf.org/html/rfc7231#section-6.4.2 # # Client can try to refresh the value by repeating the request # with cache busting headers as usual (ie no-cache). if resp.status == 301: msg = ('Returning cached "301 Moved Permanently" response ' '(ignoring date and etag information)') logger.debug(msg) return resp headers = CaseInsensitiveDict(resp.headers) if not headers or 'date' not in headers: if 'etag' not in headers: # Without date or etag, the cached response can never be used # and should be deleted. logger.debug('Purging cached response: no date or etag') self.cache.delete(cache_url) logger.debug('Ignoring cached response: no date') return False now = time.time() date = calendar.timegm( parsedate_tz(headers['date']) ) current_age = max(0, now - date) logger.debug('Current age based on date: %i', current_age) # TODO: There is an assumption that the result will be a # urllib3 response object. This may not be best since we # could probably avoid instantiating or constructing the # response until we know we need it. resp_cc = self.parse_cache_control(headers) # determine freshness freshness_lifetime = 0 # Check the max-age pragma in the cache control header if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): freshness_lifetime = int(resp_cc['max-age']) logger.debug('Freshness lifetime from max-age: %i', freshness_lifetime) # If there isn't a max-age, check for an expires header elif 'expires' in headers: expires = parsedate_tz(headers['expires']) if expires is not None: expire_time = calendar.timegm(expires) - date freshness_lifetime = max(0, expire_time) logger.debug("Freshness lifetime from expires: %i", freshness_lifetime) # Determine if we are setting freshness limit in the # request. Note, this overrides what was in the response. if 'max-age' in cc: try: freshness_lifetime = int(cc['max-age']) logger.debug('Freshness lifetime from request max-age: %i', freshness_lifetime) except ValueError: freshness_lifetime = 0 if 'min-fresh' in cc: try: min_fresh = int(cc['min-fresh']) except ValueError: min_fresh = 0 # adjust our current age by our min fresh current_age += min_fresh logger.debug('Adjusted current age from min-fresh: %i', current_age) # Return entry if it is fresh enough if freshness_lifetime > current_age: logger.debug('The response is "fresh", returning cached response') logger.debug('%i > %i', freshness_lifetime, current_age) return resp # we're not fresh. If we don't have an Etag, clear it out if 'etag' not in headers: logger.debug( 'The cached response is "stale" with no etag, purging' ) self.cache.delete(cache_url) # return the original handler return False def conditional_headers(self, request): cache_url = self.cache_url(request.url) resp = self.serializer.loads(request, self.cache.get(cache_url)) new_headers = {} if resp: headers = CaseInsensitiveDict(resp.headers) if 'etag' in headers: new_headers['If-None-Match'] = headers['ETag'] if 'last-modified' in headers: new_headers['If-Modified-Since'] = headers['Last-Modified'] return new_headers def cache_response(self, request, response, body=None): """ Algorithm for caching requests. This assumes a requests Response object. """ # From httplib2: Don't cache 206's since we aren't going to # handle byte range requests cacheable_status_codes = [200, 203, 300, 301] if response.status not in cacheable_status_codes: logger.debug( 'Status code %s not in %s', response.status, cacheable_status_codes ) return response_headers = CaseInsensitiveDict(response.headers) # If we've been given a body, our response has a Content-Length, that # Content-Length is valid then we can check to see if the body we've # been given matches the expected size, and if it doesn't we'll just # skip trying to cache it. if (body is not None and "content-length" in response_headers and response_headers["content-length"].isdigit() and int(response_headers["content-length"]) != len(body)): return cc_req = self.parse_cache_control(request.headers) cc = self.parse_cache_control(response_headers) cache_url = self.cache_url(request.url) logger.debug('Updating cache with response from "%s"', cache_url) # Delete it from the cache if we happen to have it stored there no_store = False if cc.get('no-store'): no_store = True logger.debug('Response header has "no-store"') if cc_req.get('no-store'): no_store = True logger.debug('Request header has "no-store"') if no_store and self.cache.get(cache_url): logger.debug('Purging existing cache entry to honor "no-store"') self.cache.delete(cache_url) # If we've been given an etag, then keep the response if self.cache_etags and 'etag' in response_headers: logger.debug('Caching due to etag') self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), ) # Add to the cache any 301s. We do this before looking that # the Date headers. elif response.status == 301: logger.debug('Caching permanant redirect') self.cache.set( cache_url, self.serializer.dumps(request, response) ) # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. elif 'date' in response_headers: # cache when there is a max-age > 0 if cc and cc.get('max-age'): if cc['max-age'].isdigit() and int(cc['max-age']) > 0: logger.debug('Caching b/c date exists and max-age > 0') self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), ) # If the request can expire, it means we should cache it # in the meantime. elif 'expires' in response_headers: if response_headers['expires']: logger.debug('Caching b/c of expires header') self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), ) def update_cached_response(self, request, response): """On a 304 we will get a new set of headers that we want to update our cached value with, assuming we have one. This should only ever be called when we've sent an ETag and gotten a 304 as the response. """ cache_url = self.cache_url(request.url) cached_response = self.serializer.loads( request, self.cache.get(cache_url) ) if not cached_response: # we didn't have a cached response return response # Lets update our headers with the headers from the new request: # http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1 # # The server isn't supposed to send headers that would make # the cached body invalid. But... just in case, we'll be sure # to strip out ones we know that might be problmatic due to # typical assumptions. excluded_headers = [ "content-length", ] cached_response.headers.update( dict((k, v) for k, v in response.headers.items() if k.lower() not in excluded_headers) ) # we want a 200 b/c we have content via the cache cached_response.status = 200 # update our cache self.cache.set( cache_url, self.serializer.dumps(request, cached_response), ) return cached_response CacheControl-0.11.7/cachecontrol/filewrapper.py0000644000076500000240000000474312674572714021576 0ustar ericstaff00000000000000from io import BytesIO class CallbackFileWrapper(object): """ Small wrapper around a fp object which will tee everything read into a buffer, and when that file is closed it will execute a callback with the contents of that buffer. All attributes are proxied to the underlying file object. This class uses members with a double underscore (__) leading prefix so as not to accidentally shadow an attribute. """ def __init__(self, fp, callback): self.__buf = BytesIO() self.__fp = fp self.__callback = callback def __getattr__(self, name): # The vaguaries of garbage collection means that self.__fp is # not always set. By using __getattribute__ and the private # name[0] allows looking up the attribute value and raising an # AttributeError when it doesn't exist. This stop thigns from # infinitely recursing calls to getattr in the case where # self.__fp hasn't been set. # # [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers fp = self.__getattribute__('_CallbackFileWrapper__fp') return getattr(fp, name) def __is_fp_closed(self): try: return self.__fp.fp is None except AttributeError: pass try: return self.__fp.closed except AttributeError: pass # We just don't cache it then. # TODO: Add some logging here... return False def _close(self): if self.__callback: self.__callback(self.__buf.getvalue()) # We assign this to None here, because otherwise we can get into # really tricky problems where the CPython interpreter dead locks # because the callback is holding a reference to something which # has a __del__ method. Setting this to None breaks the cycle # and allows the garbage collector to do it's thing normally. self.__callback = None def read(self, amt=None): data = self.__fp.read(amt) self.__buf.write(data) if self.__is_fp_closed(): self._close() return data def _safe_read(self, amt): data = self.__fp._safe_read(amt) if amt == 2 and data == b'\r\n': # urllib executes this read to toss the CRLF at the end # of the chunk. return data self.__buf.write(data) if self.__is_fp_closed(): self._close() return data CacheControl-0.11.7/cachecontrol/heuristics.py0000644000076500000240000001005512674571316021426 0ustar ericstaff00000000000000import calendar import time from email.utils import formatdate, parsedate, parsedate_tz from datetime import datetime, timedelta TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT" def expire_after(delta, date=None): date = date or datetime.now() return date + delta def datetime_to_header(dt): return formatdate(calendar.timegm(dt.timetuple())) class BaseHeuristic(object): def warning(self, response): """ Return a valid 1xx warning header value describing the cache adjustments. The response is provided too allow warnings like 113 http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need to explicitly say response is over 24 hours old. """ return '110 - "Response is Stale"' def update_headers(self, response): """Update the response headers with any new headers. NOTE: This SHOULD always include some Warning header to signify that the response was cached by the client, not by way of the provided headers. """ return {} def apply(self, response): updated_headers = self.update_headers(response) if updated_headers: response.headers.update(updated_headers) warning_header_value = self.warning(response) if warning_header_value is not None: response.headers.update({'Warning': warning_header_value}) return response class OneDayCache(BaseHeuristic): """ Cache the response by providing an expires 1 day in the future. """ def update_headers(self, response): headers = {} if 'expires' not in response.headers: date = parsedate(response.headers['date']) expires = expire_after(timedelta(days=1), date=datetime(*date[:6])) headers['expires'] = datetime_to_header(expires) headers['cache-control'] = 'public' return headers class ExpiresAfter(BaseHeuristic): """ Cache **all** requests for a defined time period. """ def __init__(self, **kw): self.delta = timedelta(**kw) def update_headers(self, response): expires = expire_after(self.delta) return { 'expires': datetime_to_header(expires), 'cache-control': 'public', } def warning(self, response): tmpl = '110 - Automatically cached for %s. Response might be stale' return tmpl % self.delta class LastModified(BaseHeuristic): """ If there is no Expires header already, fall back on Last-Modified using the heuristic from http://tools.ietf.org/html/rfc7234#section-4.2.2 to calculate a reasonable value. Firefox also does something like this per https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397 Unlike mozilla we limit this to 24-hr. """ cacheable_by_default_statuses = set([ 200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501 ]) def update_headers(self, resp): headers = resp.headers if 'expires' in headers: return {} if 'cache-control' in headers and headers['cache-control'] != 'public': return {} if resp.status not in self.cacheable_by_default_statuses: return {} if 'date' not in headers or 'last-modified' not in headers: return {} date = calendar.timegm(parsedate_tz(headers['date'])) last_modified = parsedate(headers['last-modified']) if date is None or last_modified is None: return {} now = time.time() current_age = max(0, now - date) delta = date - calendar.timegm(last_modified) freshness_lifetime = max(0, min(delta / 10, 24 * 3600)) if freshness_lifetime <= current_age: return {} expires = date + freshness_lifetime return {'expires': time.strftime(TIME_FMT, time.gmtime(expires))} def warning(self, resp): return None CacheControl-0.11.7/cachecontrol/serialize.py0000644000076500000240000001457412674572714021250 0ustar ericstaff00000000000000import base64 import io import json import zlib from requests.structures import CaseInsensitiveDict from .compat import HTTPResponse, pickle, text_type def _b64_encode_bytes(b): return base64.b64encode(b).decode("ascii") def _b64_encode_str(s): return _b64_encode_bytes(s.encode("utf8")) def _b64_encode(s): if isinstance(s, text_type): return _b64_encode_str(s) return _b64_encode_bytes(s) def _b64_decode_bytes(b): return base64.b64decode(b.encode("ascii")) def _b64_decode_str(s): return _b64_decode_bytes(s).decode("utf8") class Serializer(object): def dumps(self, request, response, body=None): response_headers = CaseInsensitiveDict(response.headers) if body is None: body = response.read(decode_content=False) # NOTE: 99% sure this is dead code. I'm only leaving it # here b/c I don't have a test yet to prove # it. Basically, before using # `cachecontrol.filewrapper.CallbackFileWrapper`, # this made an effort to reset the file handle. The # `CallbackFileWrapper` short circuits this code by # setting the body as the content is consumed, the # result being a `body` argument is *always* passed # into cache_response, and in turn, # `Serializer.dump`. response._fp = io.BytesIO(body) data = { "response": { "body": _b64_encode_bytes(body), "headers": dict( (_b64_encode(k), _b64_encode(v)) for k, v in response.headers.items() ), "status": response.status, "version": response.version, "reason": _b64_encode_str(response.reason), "strict": response.strict, "decode_content": response.decode_content, }, } # Construct our vary headers data["vary"] = {} if "vary" in response_headers: varied_headers = response_headers['vary'].split(',') for header in varied_headers: header = header.strip() data["vary"][header] = request.headers.get(header, None) # Encode our Vary headers to ensure they can be serialized as JSON data["vary"] = dict( (_b64_encode(k), _b64_encode(v) if v is not None else v) for k, v in data["vary"].items() ) return b",".join([ b"cc=2", zlib.compress( json.dumps( data, separators=(",", ":"), sort_keys=True, ).encode("utf8"), ), ]) def loads(self, request, data): # Short circuit if we've been given an empty set of data if not data: return # Determine what version of the serializer the data was serialized # with try: ver, data = data.split(b",", 1) except ValueError: ver = b"cc=0" # Make sure that our "ver" is actually a version and isn't a false # positive from a , being in the data stream. if ver[:3] != b"cc=": data = ver + data ver = b"cc=0" # Get the version number out of the cc=N ver = ver.split(b"=", 1)[-1].decode("ascii") # Dispatch to the actual load method for the given version try: return getattr(self, "_loads_v{0}".format(ver))(request, data) except AttributeError: # This is a version we don't have a loads function for, so we'll # just treat it as a miss and return None return def prepare_response(self, request, cached): """Verify our vary headers match and construct a real urllib3 HTTPResponse object. """ # Special case the '*' Vary value as it means we cannot actually # determine if the cached response is suitable for this request. if "*" in cached.get("vary", {}): return # Ensure that the Vary headers for the cached response match our # request for header, value in cached.get("vary", {}).items(): if request.headers.get(header, None) != value: return body_raw = cached["response"].pop("body") headers = CaseInsensitiveDict(data=cached['response']['headers']) if headers.get('transfer-encoding', '') == 'chunked': headers.pop('transfer-encoding') cached['response']['headers'] = headers try: body = io.BytesIO(body_raw) except TypeError: # This can happen if cachecontrol serialized to v1 format (pickle) # using Python 2. A Python 2 str(byte string) will be unpickled as # a Python 3 str (unicode string), which will cause the above to # fail with: # # TypeError: 'str' does not support the buffer interface body = io.BytesIO(body_raw.encode('utf8')) return HTTPResponse( body=body, preload_content=False, **cached["response"] ) def _loads_v0(self, request, data): # The original legacy cache data. This doesn't contain enough # information to construct everything we need, so we'll treat this as # a miss. return def _loads_v1(self, request, data): try: cached = pickle.loads(data) except ValueError: return return self.prepare_response(request, cached) def _loads_v2(self, request, data): try: cached = json.loads(zlib.decompress(data).decode("utf8")) except ValueError: return # We need to decode the items that we've base64 encoded cached["response"]["body"] = _b64_decode_bytes( cached["response"]["body"] ) cached["response"]["headers"] = dict( (_b64_decode_str(k), _b64_decode_str(v)) for k, v in cached["response"]["headers"].items() ) cached["response"]["reason"] = _b64_decode_str( cached["response"]["reason"], ) cached["vary"] = dict( (_b64_decode_str(k), _b64_decode_str(v) if v is not None else v) for k, v in cached["vary"].items() ) return self.prepare_response(request, cached) CacheControl-0.11.7/cachecontrol/wrapper.py0000644000076500000240000000076212633360076020722 0ustar ericstaff00000000000000from .adapter import CacheControlAdapter from .cache import DictCache def CacheControl(sess, cache=None, cache_etags=True, serializer=None, heuristic=None): cache = cache or DictCache() adapter = CacheControlAdapter( cache, cache_etags=cache_etags, serializer=serializer, heuristic=heuristic, ) sess.mount('http://', adapter) sess.mount('https://', adapter) return sess CacheControl-0.11.7/CacheControl.egg-info/0000755000076500000240000000000012765672030020257 5ustar ericstaff00000000000000CacheControl-0.11.7/CacheControl.egg-info/dependency_links.txt0000644000076500000240000000000112765672030024325 0ustar ericstaff00000000000000 CacheControl-0.11.7/CacheControl.egg-info/entry_points.txt0000644000076500000240000000007012765672030023552 0ustar ericstaff00000000000000[console_scripts] doesitcache = cachecontrol._cmd:main CacheControl-0.11.7/CacheControl.egg-info/PKG-INFO0000644000076500000240000000430212765672030021353 0ustar ericstaff00000000000000Metadata-Version: 1.1 Name: CacheControl Version: 0.11.7 Summary: httplib2 caching for requests Home-page: https://github.com/ionrock/cachecontrol Author: Eric Larson Author-email: eric@ionrock.org License: UNKNOWN Description: ============== CacheControl ============== .. image:: https://img.shields.io/pypi/v/cachecontrol.svg :target: https://pypi.python.org/pypi/cachecontrol :alt: Latest Version .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master :target: https://travis-ci.org/ionrock/cachecontrol CacheControl is a port of the caching algorithms in httplib2_ for use with requests_ session object. It was written because httplib2's better support for caching is often mitigated by its lack of threadsafety. The same is true of requests in terms of caching. Quickstart ========== .. code-block:: python import requests from cachecontrol import CacheControl sess = requests.session() cached_sess = CacheControl(sess) response = cached_sess.get('http://google.com') If the URL contains any caching based headers, it will cache the result in a simple dictionary. For more info, check out the docs_ .. _docs: http://cachecontrol.readthedocs.org/en/latest/ .. _httplib2: https://github.com/jcgregorio/httplib2 .. _requests: http://docs.python-requests.org/ Keywords: requests http caching web Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: Environment :: Web Environment Classifier: License :: OSI Approved :: Apache Software License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python :: 2.6 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.2 Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Topic :: Internet :: WWW/HTTP CacheControl-0.11.7/CacheControl.egg-info/requires.txt0000644000076500000240000000004412765672030022655 0ustar ericstaff00000000000000requests [filecache] lockfile>=0.9 CacheControl-0.11.7/CacheControl.egg-info/SOURCES.txt0000644000076500000240000000115312765672030022143 0ustar ericstaff00000000000000LICENSE.txt MANIFEST.in README.rst setup.cfg setup.py CacheControl.egg-info/PKG-INFO CacheControl.egg-info/SOURCES.txt CacheControl.egg-info/dependency_links.txt CacheControl.egg-info/entry_points.txt CacheControl.egg-info/requires.txt CacheControl.egg-info/top_level.txt cachecontrol/__init__.py cachecontrol/_cmd.py cachecontrol/adapter.py cachecontrol/cache.py cachecontrol/compat.py cachecontrol/controller.py cachecontrol/filewrapper.py cachecontrol/heuristics.py cachecontrol/serialize.py cachecontrol/wrapper.py cachecontrol/caches/__init__.py cachecontrol/caches/file_cache.py cachecontrol/caches/redis_cache.pyCacheControl-0.11.7/CacheControl.egg-info/top_level.txt0000644000076500000240000000001512765672030023005 0ustar ericstaff00000000000000cachecontrol CacheControl-0.11.7/LICENSE.txt0000644000076500000240000000105112633360076016037 0ustar ericstaff00000000000000Copyright 2015 Eric Larson Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. CacheControl-0.11.7/MANIFEST.in0000644000076500000240000000002312633360076015750 0ustar ericstaff00000000000000include LICENSE.txtCacheControl-0.11.7/PKG-INFO0000644000076500000240000000430212765672030015315 0ustar ericstaff00000000000000Metadata-Version: 1.1 Name: CacheControl Version: 0.11.7 Summary: httplib2 caching for requests Home-page: https://github.com/ionrock/cachecontrol Author: Eric Larson Author-email: eric@ionrock.org License: UNKNOWN Description: ============== CacheControl ============== .. image:: https://img.shields.io/pypi/v/cachecontrol.svg :target: https://pypi.python.org/pypi/cachecontrol :alt: Latest Version .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master :target: https://travis-ci.org/ionrock/cachecontrol CacheControl is a port of the caching algorithms in httplib2_ for use with requests_ session object. It was written because httplib2's better support for caching is often mitigated by its lack of threadsafety. The same is true of requests in terms of caching. Quickstart ========== .. code-block:: python import requests from cachecontrol import CacheControl sess = requests.session() cached_sess = CacheControl(sess) response = cached_sess.get('http://google.com') If the URL contains any caching based headers, it will cache the result in a simple dictionary. For more info, check out the docs_ .. _docs: http://cachecontrol.readthedocs.org/en/latest/ .. _httplib2: https://github.com/jcgregorio/httplib2 .. _requests: http://docs.python-requests.org/ Keywords: requests http caching web Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: Environment :: Web Environment Classifier: License :: OSI Approved :: Apache Software License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python :: 2.6 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.2 Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Topic :: Internet :: WWW/HTTP CacheControl-0.11.7/README.rst0000644000076500000240000000207712700547771015720 0ustar ericstaff00000000000000============== CacheControl ============== .. image:: https://img.shields.io/pypi/v/cachecontrol.svg :target: https://pypi.python.org/pypi/cachecontrol :alt: Latest Version .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master :target: https://travis-ci.org/ionrock/cachecontrol CacheControl is a port of the caching algorithms in httplib2_ for use with requests_ session object. It was written because httplib2's better support for caching is often mitigated by its lack of threadsafety. The same is true of requests in terms of caching. Quickstart ========== .. code-block:: python import requests from cachecontrol import CacheControl sess = requests.session() cached_sess = CacheControl(sess) response = cached_sess.get('http://google.com') If the URL contains any caching based headers, it will cache the result in a simple dictionary. For more info, check out the docs_ .. _docs: http://cachecontrol.readthedocs.org/en/latest/ .. _httplib2: https://github.com/jcgregorio/httplib2 .. _requests: http://docs.python-requests.org/ CacheControl-0.11.7/setup.cfg0000644000076500000240000000016012765672030016037 0ustar ericstaff00000000000000[tool:pytest] norecursedirs = bin lib include build [egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 CacheControl-0.11.7/setup.py0000644000076500000240000000260312765672004015735 0ustar ericstaff00000000000000import setuptools long_description = open('README.rst').read() VERSION = '0.11.7' setup_params = dict( name='CacheControl', version=VERSION, author='Eric Larson', author_email='eric@ionrock.org', url='https://github.com/ionrock/cachecontrol', keywords='requests http caching web', packages=setuptools.find_packages(), package_data={'': ['LICENSE.txt']}, package_dir={'cachecontrol': 'cachecontrol'}, include_package_data=True, description='httplib2 caching for requests', long_description=long_description, install_requires=[ 'requests', ], extras_require={ 'filecache': ['lockfile>=0.9'], }, entry_points={ 'console_scripts': [ 'doesitcache = cachecontrol._cmd:main', ] }, classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: Web Environment', 'License :: OSI Approved :: Apache Software License', 'Operating System :: OS Independent', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Topic :: Internet :: WWW/HTTP', ], ) if __name__ == '__main__': setuptools.setup(**setup_params)