park-1.0.0/0000755000076600007660000000000012022115413012730 5ustar peterpeter00000000000000park-1.0.0/LICENSE0000644000076600007660000000203612021762067013752 0ustar peterpeter00000000000000Copyright (c) 2012 litl, LLC. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. park-1.0.0/MANIFEST.in0000644000076600007660000000005012021762067014475 0ustar peterpeter00000000000000include README.rst LICENSE test_park.py park-1.0.0/park.egg-info/0000755000076600007660000000000012022115413015357 5ustar peterpeter00000000000000park-1.0.0/park.egg-info/dependency_links.txt0000644000076600007660000000000112022115413021425 0ustar peterpeter00000000000000 park-1.0.0/park.egg-info/PKG-INFO0000644000076600007660000000075712022115413016465 0ustar peterpeter00000000000000Metadata-Version: 1.0 Name: park Version: 1.0.0 Summary: A key-value store with ordered traversal of keys Home-page: https://github.com/litl/park Author: Peter Teichman Author-email: pteichman@litl.com License: MIT Description: UNKNOWN Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: MIT License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python park-1.0.0/park.egg-info/SOURCES.txt0000644000076600007660000000025412022115413017244 0ustar peterpeter00000000000000LICENSE MANIFEST.in README.rst park.py setup.py test_park.py park.egg-info/PKG-INFO park.egg-info/SOURCES.txt park.egg-info/dependency_links.txt park.egg-info/top_level.txtpark-1.0.0/park.egg-info/top_level.txt0000644000076600007660000000000512022115413020104 0ustar peterpeter00000000000000park park-1.0.0/park.py0000644000076600007660000002456012022115175014253 0ustar peterpeter00000000000000# coding: utf-8 # Copyright 2012 litl, LLC. All Rights Reserved. __version__ = "1.0.0" import abc import itertools import logging import os import sqlite3 logger = logging.getLogger(__name__) __all__ = ["SQLiteStore", "KVStore"] class KVStore(object): """An abstract key-value interface with support for range iteration.""" __metaclass__ = abc.ABCMeta # Implement the Python context manager protocol. def __enter__(self): return self def __exit__(self, *exc_info): self.close() def close(self): # pragma: no cover """Release any resources associated with a KVStore. This is used to support the Python context manager protocol with semantics similar to ``contextlib.closing()``. That means you can use any concrete implementation of KVStore like: :: with park.SQLiteStore("/path/to/db") as kv: kv.put("my_key", "my_value") """ # Typically overridden by subclasses, this default # implementation does nothing. pass def contains(self, key): """True if the store contains key.""" return self.get(key, default=None) is not None @abc.abstractmethod def get(self, key, default=None): # pragma: no cover """Get the value associated with a key. :param key: The key to retrieve. :type key: bytes :param default: A default value to return if the key is not present in the store. :returns: The value associated with ``key``. """ pass @abc.abstractmethod def put(self, key, value): # pragma: no cover """Put a key-value pair into the store. If the key is already present, this replaces its value. Both the key and value are binary safe. :param key: The key to set. :type key: bytes :param value: The value to set the key to. :type value: bytes """ pass @abc.abstractmethod def put_many(self, items): # pragma: no cover """Put many key-value pairs. This method may take advantage of performance or atomicity features of the underlying store. It does not guarantee that all items will be set in the same transaction, only that transactions may be used for performance. :param items: An iterable producing (key, value) tuples. """ for key, value in items: self.put(key, value) @abc.abstractmethod def delete(self, key): # pragma: no cover """Remove a key from the store. :param key: The key to remove. :type key: bytes """ pass @abc.abstractmethod def delete_many(self, keys): # pragma: no cover """Remove many keys from the store. :param keys: An iterable producing keys to remove. """ for key in keys: self.delete(key) @abc.abstractmethod def keys(self, key_from=None, key_to=None): # pragma: no cover """Get a lexically sorted range of keys. :param key_from: Lower bound (inclusive), or None for unbounded. :type key_from: bytes :param key_to: Upper bound (inclusive), or None for unbounded. :type key_to: bytes :yields: All keys from the store where ``key_from <= key <= key_to``. """ pass @abc.abstractmethod def items(self, key_from=None, key_to=None): # pragma: no cover """Get a lexically sorted range of (key, value) tuples. :param key_from: Lower bound (inclusive), or None for unbounded. :type key_from: bytes :param key_to: Upper bound (inclusive), or None for unbounded. :type key_to: bytes :yields: All (key, value) pairs from the store where ``key_from <= key <= key_to``. """ pass def prefix_items(self, prefix, strip_prefix=False): """Get all (key, value) pairs with keys that begin with ``prefix``. :param prefix: Lexical prefix for keys to search. :type prefix: bytes :param strip_prefix: True to strip the prefix from yielded items. :type strip_prefix: bool :yields: All (key, value) pairs in the store where the keys begin with the ``prefix``. """ items = self.items(key_from=prefix) start = 0 if strip_prefix: start = len(prefix) for key, value in items: if not key.startswith(prefix): break yield key[start:], value def prefix_keys(self, prefix, strip_prefix=False): """Get all keys that begin with ``prefix``. :param prefix: Lexical prefix for keys to search. :type prefix: bytes :param strip_prefix: True to strip the prefix from yielded items. :type strip_prefix: bool :yields: All keys in the store that begin with ``prefix``. """ keys = self.keys(key_from=prefix) start = 0 if strip_prefix: start = len(prefix) for key in keys: if not key.startswith(prefix): break yield key[start:] def ibatch(iterable, size): """Yield a series of batches from iterable, each size elements long.""" source = iter(iterable) while True: batch = itertools.islice(source, size) yield itertools.chain([next(batch)], batch) class SQLiteStore(KVStore): """A KVStore in an SQLite database. This is what you want to use. :param path: The filesystem path for the database, which will be created if it doesn't exist. :type path: str See :py:class:`park.KVStore` for what you can do with it. SQLiteStore uses an embarrassingly simple SQL schema: .. code-block:: sql CREATE TABLE kv ( key BLOB NOT NULL PRIMARY KEY, value BLOB NOT NULL) There are a few implications of this schema you might need to be aware of. 1. Declaring ``key`` as PRIMARY KEY automatically indexes it, which gives constant time ordered traversal of keys and O(log n) lookup. However, SQLite 3 indexes the keys separately from the table data, which means your keys are effectively stored twice in the database. A primary key also means the index can't be dropped during bulk inserts. 2. Using BLOBs for both columns keeps them binary safe, but it means everything going in must be type ``bytes``. Python ``str`` strings are converted automatically, but if you're dealing with Unicode data you'll need to encode it to bytes first. UTF-8 is a fine option: :: >>> kv.put("key", value.encode("utf-8")) >>> kv.get("key").decode("utf-8") """ def __init__(self, path): need_schema = not os.path.exists(path) self.conn = sqlite3.connect(path) # Don't create unicode objects for retrieved values self.conn.text_factory = buffer # Disable the SQLite cache. Its pages tend to get swapped # out, even if the database file is in buffer cache. c = self.conn.cursor() c.execute("PRAGMA cache_size=0") c.execute("PRAGMA page_size=4096") # Use write-ahead logging if it's available, otherwise truncate journal_mode, = c.execute("PRAGMA journal_mode=WAL").fetchone() if journal_mode != "wal": c.execute("PRAGMA journal_mode=truncate") # Speed-for-reliability tradeoffs c.execute("PRAGMA temp_store=memory") c.execute("PRAGMA synchronous=OFF") if need_schema: self._create_db(self.conn) def close(self): self.conn.commit() self.conn.close() del self.conn def _create_db(self, conn): logger.debug("Creating SQLiteStore schema") c = conn.cursor() c.execute(""" CREATE TABLE kv ( key BLOB NOT NULL PRIMARY KEY, value BLOB NOT NULL)""") conn.commit() def get(self, key, default=None): q = "SELECT value FROM kv WHERE key = ?" c = self.conn.cursor() row = c.execute(q, (sqlite3.Binary(key),)).fetchone() if not row: return default return bytes(row[0]) def put(self, key, value): q = "INSERT OR REPLACE INTO kv (key, value) VALUES (?, ?)" self.conn.execute(q, (sqlite3.Binary(key), sqlite3.Binary(value))) self.conn.commit() def put_many(self, items): q = "INSERT OR REPLACE INTO kv (key, value) VALUES (?, ?)" c = self.conn.cursor() blob = sqlite3.Binary for batch in ibatch(items, 30000): items = ((blob(key), blob(value)) for key, value in batch) c.executemany(q, items) self.conn.commit() def delete(self, key): q = "DELETE FROM kv WHERE key = ?" self.conn.execute(q, (sqlite3.Binary(key),)) self.conn.commit() def delete_many(self, keys): q = "DELETE FROM kv WHERE key = ?" c = self.conn.cursor() blob = sqlite3.Binary for batch in ibatch(keys, 30000): items = ((blob(key),) for key in batch) c.executemany(q, items) self.conn.commit() def _range_where(self, key_from=None, key_to=None): if key_from is not None and key_to is None: return "WHERE key >= :key_from" if key_from is None and key_to is not None: return "WHERE key <= :key_to" if key_from is not None and key_to is not None: return "WHERE key BETWEEN :key_from AND :key_to" return "" def items(self, key_from=None, key_to=None): q = "SELECT key, value FROM kv %s ORDER BY key " \ % self._range_where(key_from, key_to) if key_from is not None: key_from = sqlite3.Binary(key_from) if key_to is not None: key_to = sqlite3.Binary(key_to) c = self.conn.cursor() for key, value in c.execute(q, dict(key_from=key_from, key_to=key_to)): yield bytes(key), bytes(value) def keys(self, key_from=None, key_to=None): q = "SELECT key FROM kv %s ORDER BY key " \ % self._range_where(key_from, key_to) if key_from is not None: key_from = sqlite3.Binary(key_from) if key_to is not None: key_to = sqlite3.Binary(key_to) c = self.conn.cursor() for key, in c.execute(q, dict(key_from=key_from, key_to=key_to)): yield bytes(key) park-1.0.0/PKG-INFO0000644000076600007660000000075712022115413014036 0ustar peterpeter00000000000000Metadata-Version: 1.0 Name: park Version: 1.0.0 Summary: A key-value store with ordered traversal of keys Home-page: https://github.com/litl/park Author: Peter Teichman Author-email: pteichman@litl.com License: MIT Description: UNKNOWN Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: MIT License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python park-1.0.0/README.rst0000644000076600007660000000455012021762067014437 0ustar peterpeter00000000000000Park is a persistent key-value API for Python with ordered traversal of keys. Both keys and values are binary safe. It's similar in use to LevelDB, but has no dependencies outside the Python standard library. It is meant to be extremely easy to use and can scale to a few gigabytes of data. It allows you to be lazy until it doesn't meet your needs. Use it until then. It supports simple getting and setting of byte data: :: >>> kv = park.SQLiteStore("numbers.park") >>> kv.put("1", "one") >>> kv.put("2", "two") >>> kv.put("3", "three") >>> kv.put("4", "four") >>> kv.get("2") 'two' Batched setting of data from an iterable: :: >>> kv.put_many([("1", "one"), ("2", "two"), ("3", "three")]) >>> kv.get("3") 'three' Lexically ordered traversal of keys and items, with start and end sentinels (inclusive): :: >>> kv.put("1", "one") >>> kv.put("2", "two") >>> kv.put("3", "three") >>> kv.put("11", "eleven") >>> kv.put("12", "twelve") >>> list(kv.keys()) ['1', '11', '12', '2', '3'] >>> list(kv.keys(key_from="12")) ['12', '2', '3'] >>> list(kv.keys(key_from="12", key_to="2")) ['12', '2'] >>> list(kv.items(key_from="12")) [('12', 'twelve'), ('2', 'two'), ('3', 'three')] Iteration over all keys or items with a given prefix: :: >>> kv.put("pet/dog", "Canis lupus familiaris") >>> kv.put("pet/cat", "Felis catus") >>> kv.put("pet/wolf", "Canis lupus") >>> list(kv.prefix_keys("pet/")) ['pet/cat', 'pet/dog', 'pet/wolf'] >>> list(kv.prefix_keys("pet/", strip_prefix=True)) ['cat', 'dog', 'wolf'] >>> list(kv.prefix_items("pet/", strip_prefix=True)) [('cat', 'Felis catus'), ('dog', 'Canis lupus familiaris'), ('wolf', 'Canis lupus')] It plays well with generators, so you can e.g. park all the counting numbers (this will take a while): :: def numbers(): for num in itertools.count(1): key = value = str(num) yield key, value kv.put_many(numbers()) Or recursively park a directory's contents (keyed by relative paths) from the local filesystem: :: def file_item(filename): with open(filename, "r") as fd: return filename, fd.read() kv.put_many(file_item(os.path.join(root, name)) for root, dirs, files in os.walk(directory) for name in files) park-1.0.0/setup.cfg0000644000076600007660000000007312022115413014551 0ustar peterpeter00000000000000[egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 park-1.0.0/setup.py0000755000076600007660000000327712021762067014472 0ustar peterpeter00000000000000#!/usr/bin/env python # coding: utf-8 import park import sys # Require setuptools. See http://pypi.python.org/pypi/setuptools for # installation instructions, or run the ez_setup script found at # http://peak.telecommunity.com/dist/ez_setup.py from setuptools import setup, find_packages, Command class CheckCommand(Command): description = "Run tests." user_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): import subprocess print "Running pep8..." if subprocess.call(["pep8", "park.py", "test_park.py"]): sys.exit("ERROR: failed pep8 checks") print "Running pyflakes..." if subprocess.call(["pyflakes", "park.py", "test_park.py"]): sys.exit("ERROR: failed pyflakes checks") print "Running tests..." if subprocess.call(["coverage", "run", "--source=park,test_park", "./setup.py", "test"]): sys.exit("ERROR: failed unit tests") subprocess.call(['coverage', 'report', '-m']) setup( name="park", version=park.__version__, author="Peter Teichman", author_email="pteichman@litl.com", license="MIT", url = "https://github.com/litl/park", description="A key-value store with ordered traversal of keys", py_modules=["park"], test_suite="test_park", cmdclass = { "check": CheckCommand }, classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python" ] ) park-1.0.0/test_park.py0000644000076600007660000002113512022115146015303 0ustar peterpeter00000000000000# coding: utf-8 # Copyright 2012 litl, LLC. All Rights Reserved. import operator import os import unittest2 as unittest import park class KVStoreBase(object): """Base tests for KV Stores""" def test_get_default(self): self.assertIsNone(self.store.get("missing")) def test_put_get(self): key, value = "test_key", "test_value" self.assertIsNone(self.store.get(key, None)) self.store.put(key, value) self.assertTrue(self.store.contains(key)) self.assertEqual(value, self.store.get(key)) def test_delete(self): self.store.put("test_key1", "test_value1") self.store.put("test_key2", "test_value2") self.store.delete("test_key1") self.assertEqual(["test_key2"], list(self.store.keys())) def test_delete_many(self): self.store.put("test_key1", "test_value1") self.store.put("test_key2", "test_value2") self.store.delete_many(["test_key1", "test_key2"]) self.assertEqual([], list(self.store.keys())) def test_null_key(self): key, value = "\x00", "test_value" self.assertIsNone(self.store.get(key, None)) self.store.put(key, value) self.assertEqual(value, self.store.get(key)) self.assertEqual([key], list(self.store.keys())) self.assertEqual([(key, value)], list(self.store.items())) def test_null_value(self): key, value = "test_key", "\x00" self.assertIsNone(self.store.get(key, None)) self.store.put(key, value) self.assertEqual(value, self.store.get(key)) self.assertEqual([key], list(self.store.keys())) self.assertEqual([(key, value)], list(self.store.items())) def test_replace(self): key = "foo" self.assertIsNone(self.store.get(key, None)) self.store.put(key, "bar") self.assertEqual("bar", self.store.get(key)) self.store.put(key, "baz") self.assertEqual("baz", self.store.get(key)) def test_put_many(self): items = [ ("one", "value1"), ("two", "value2"), ("three", "value3"), ("four", "value4"), ("five", "value5"), ("six", "value6"), ("seven", "value7"), ("eight", "value8"), ("nine", "value9") ] self.store.put_many(items) for key, value in items: self.assertEqual(value, self.store.get(key)) def test_no_keys(self): self.assertEqual([], list(self.store.keys())) self.assertEqual([], list(self.store.keys(key_from="foo"))) self.assertEqual([], list(self.store.keys(key_to="bar"))) self.assertEqual([], list(self.store.keys(key_from="foo", key_to="bar"))) def test_no_items(self): self.assertEqual([], list(self.store.items())) self.assertEqual([], list(self.store.items(key_from="foo"))) self.assertEqual([], list(self.store.items(key_to="bar"))) self.assertEqual([], list(self.store.items(key_from="foo", key_to="bar"))) def test_keys(self): items = [ ("one", "value1"), ("two", "value2"), ("three", "value3"), ("four", "value4"), ("five", "value5"), ("six", "value6"), ("seven", "value7"), ("eight", "value8"), ("nine", "value9") ] for key, value in items: self.store.put(key, value) # Sorted order is: eight five four nine one seven six three two keys = list(self.store.keys()) expected = "eight five four nine one seven six three two".split() self.assertEqual(expected, keys) # Test key_from on keys that are present and missing in the db keys = list(self.store.keys(key_from="four")) expected = "four nine one seven six three two".split() self.assertEqual(expected, keys) keys = list(self.store.keys(key_from="fo")) expected = "four nine one seven six three two".split() self.assertEqual(expected, keys) # Test key_to keys = list(self.store.keys(key_to="six")) expected = "eight five four nine one seven six".split() self.assertEqual(expected, keys) keys = list(self.store.keys(key_to="si")) expected = "eight five four nine one seven".split() self.assertEqual(expected, keys) # And test them both together keys = list(self.store.keys(key_from="five", key_to="three")) expected = "five four nine one seven six three".split() self.assertEqual(expected, keys) def test_prefix_keys(self): # Fake some interesting keys and values to make sure the # prefix iterators are working store = self.store store.put("a/", "a") store.put("a/b", "b") store.put("a/c", "c") store.put("a/d", "d") store.put("a/e", "e") store.put("a/f", "f") store.put("b/", "b") store.put("c/", "c") store.put("d/", "d") a_list = list(store.prefix_keys("a/")) self.assertEqual("a/ a/b a/c a/d a/e a/f".split(), a_list) a_list = list(store.prefix_keys("a/", strip_prefix=True)) self.assertEqual(["", "b", "c", "d", "e", "f"], a_list) self.assertEqual(["b/"], list(store.prefix_keys("b/"))) self.assertEqual(["c/"], list(store.prefix_keys("c/"))) self.assertEqual(["d/"], list(store.prefix_keys("d/"))) def test_items(self): put_items = dict([ ("one", "value1"), ("two", "value2"), ("three", "value3"), ("four", "value4"), ("five", "value5"), ("six", "value6"), ("seven", "value7"), ("eight", "value8"), ("nine", "value9") ]) for key, value in put_items.items(): self.store.put(key, value) # Sorted order is: eight five four nine one seven six three two keys = list(self.store.items()) expected = sorted(put_items.items(), key=operator.itemgetter(0)) self.assertEqual(expected, keys) # Test key_from on keys that are present and missing in the db keys = list(self.store.items(key_from="four")) self.assertEqual(expected[2:], keys) keys = list(self.store.items(key_from="fo")) self.assertEqual(expected[2:], keys) # Test key_to keys = list(self.store.items(key_to="six")) self.assertEqual(expected[:7], keys) keys = list(self.store.items(key_to="si")) self.assertEqual(expected[:6], keys) # And test them both together keys = list(self.store.items(key_from="five", key_to="three")) self.assertEqual(expected[1:8], keys) def test_prefix_items(self): # Fake some interesting keys and values to make sure the # prefix iterators are working store = self.store store.put("a/", "a") store.put("a/b", "b") store.put("a/c", "c") store.put("a/d", "d") store.put("a/e", "e") store.put("a/f", "f") store.put("b/", "b") store.put("c/", "c") store.put("d/", "d") expected = [("a/", "a"), ("a/b", "b"), ("a/c", "c"), ("a/d", "d"), ("a/e", "e"), ("a/f", "f")] a_list = list(store.prefix_items("a/")) self.assertEqual(expected, a_list) expected = [("", "a"), ("b", "b"), ("c", "c"), ("d", "d"), ("e", "e"), ("f", "f")] a_list = list(store.prefix_items("a/", strip_prefix=True)) self.assertEqual(expected, a_list) def test_context_manager(self): with self.store as kv: kv.put("foo", "bar") kv.put("baz", "quux") self.assertEqual("bar", kv.get("foo")) class TestIbatch(unittest.TestCase): def test_ibatch(self): items = range(10) batches = park.ibatch(items, 3) self.assertEqual([0, 1, 2], list(next(batches))) self.assertEqual([3, 4, 5], list(next(batches))) self.assertEqual([6, 7, 8], list(next(batches))) self.assertEqual([9], list(next(batches))) class TestSQLiteStore(unittest.TestCase, KVStoreBase): DB = "tests.test_sqlite_store" def setUp(self): self.store = park.SQLiteStore(self.DB) def cleanup(): if os.path.exists(self.DB): os.unlink(self.DB) self.addCleanup(cleanup)