pax_global_header00006660000000000000000000000064133525046460014522gustar00rootroot0000000000000052 comment=f7950a9a359774c0190abde8da729b1810bdf3f4 pickleshare-0.7.5/000077500000000000000000000000001335250464600140255ustar00rootroot00000000000000pickleshare-0.7.5/.gitignore000066400000000000000000000000751335250464600160170ustar00rootroot00000000000000*.pyc __pycache__/ /build/ /dist/ .pytest_cache/ *.egg-info/ pickleshare-0.7.5/.travis.yml000066400000000000000000000004431335250464600161370ustar00rootroot00000000000000language: python install: pip install -e . script: py.test python: - "pypy" - "pypy3" - "2.7" - "3.3" - "3.4" - "3.5" - "3.6" matrix: include: - python: "3.7" dist: xenial sudo: true - python: "3.8-dev" dist: xenial sudo: true pickleshare-0.7.5/LICENSE000066400000000000000000000020671335250464600150370ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2016 Ville Vainio Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pickleshare-0.7.5/MANIFEST.in000066400000000000000000000000541335250464600155620ustar00rootroot00000000000000include LICENSE include test_pickleshare.py pickleshare-0.7.5/README.md000066400000000000000000000020251335250464600153030ustar00rootroot00000000000000PickleShare - a small 'shelve' like datastore with concurrency support Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike shelve, many processes can access the database simultaneously. Changing a value in database is immediately visible to other processes accessing the same database. Concurrency is possible because the values are stored in separate files. Hence the "database" is a directory where *all* files are governed by PickleShare. Both python2 and python3 are supported. Example usage: ```python from pickleshare import * db = PickleShareDB('~/testpickleshare') db.clear() print("Should be empty:", db.items()) db['hello'] = 15 db['aku ankka'] = [1,2,313] db['paths/are/ok/key'] = [1,(5,46)] print(db.keys()) ``` This module is certainly not ZODB, but can be used for low-load (non-mission-critical) situations where tiny code size trumps the advanced features of a "real" object database. Installation guide: ```sh pip install pickleshare ``` Or, if installing from source ```sh pip install . ``` pickleshare-0.7.5/pickleshare.py000066400000000000000000000233261335250464600166770ustar00rootroot00000000000000#!/usr/bin/env python """ PickleShare - a small 'shelve' like datastore with concurrency support Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike shelve, many processes can access the database simultaneously. Changing a value in database is immediately visible to other processes accessing the same database. Concurrency is possible because the values are stored in separate files. Hence the "database" is a directory where *all* files are governed by PickleShare. Example usage:: from pickleshare import * db = PickleShareDB('~/testpickleshare') db.clear() print "Should be empty:",db.items() db['hello'] = 15 db['aku ankka'] = [1,2,313] db['paths/are/ok/key'] = [1,(5,46)] print db.keys() del db['aku ankka'] This module is certainly not ZODB, but can be used for low-load (non-mission-critical) situations where tiny code size trumps the advanced features of a "real" object database. Installation guide: pip install pickleshare Author: Ville Vainio License: MIT open source license. """ from __future__ import print_function __version__ = "0.7.5" try: from pathlib import Path except ImportError: # Python 2 backport from pathlib2 import Path import os,stat,time try: import collections.abc as collections_abc except ImportError: import collections as collections_abc try: import cPickle as pickle except ImportError: import pickle import errno import sys if sys.version_info[0] >= 3: string_types = (str,) else: string_types = (str, unicode) def gethashfile(key): return ("%02x" % abs(hash(key) % 256))[-2:] _sentinel = object() class PickleShareDB(collections_abc.MutableMapping): """ The main 'connection' object for PickleShare database """ def __init__(self,root): """ Return a db object that will manage the specied directory""" if not isinstance(root, string_types): root = str(root) root = os.path.abspath(os.path.expanduser(root)) self.root = Path(root) if not self.root.is_dir(): # catching the exception is necessary if multiple processes are concurrently trying to create a folder # exists_ok keyword argument of mkdir does the same but only from Python 3.5 try: self.root.mkdir(parents=True) except OSError as e: if e.errno != errno.EEXIST: raise # cache has { 'key' : (obj, orig_mod_time) } self.cache = {} def __getitem__(self,key): """ db['key'] reading """ fil = self.root / key try: mtime = (fil.stat()[stat.ST_MTIME]) except OSError: raise KeyError(key) if fil in self.cache and mtime == self.cache[fil][1]: return self.cache[fil][0] try: # The cached item has expired, need to read with fil.open("rb") as f: obj = pickle.loads(f.read()) except: raise KeyError(key) self.cache[fil] = (obj,mtime) return obj def __setitem__(self,key,value): """ db['key'] = 5 """ fil = self.root / key parent = fil.parent if parent and not parent.is_dir(): parent.mkdir(parents=True) # We specify protocol 2, so that we can mostly go between Python 2 # and Python 3. We can upgrade to protocol 3 when Python 2 is obsolete. with fil.open('wb') as f: pickle.dump(value, f, protocol=2) try: self.cache[fil] = (value, fil.stat().st_mtime) except OSError as e: if e.errno != errno.ENOENT: raise def hset(self, hashroot, key, value): """ hashed set """ hroot = self.root / hashroot if not hroot.is_dir(): hroot.mkdir() hfile = hroot / gethashfile(key) d = self.get(hfile, {}) d.update( {key : value}) self[hfile] = d def hget(self, hashroot, key, default = _sentinel, fast_only = True): """ hashed get """ hroot = self.root / hashroot hfile = hroot / gethashfile(key) d = self.get(hfile, _sentinel ) #print "got dict",d,"from",hfile if d is _sentinel: if fast_only: if default is _sentinel: raise KeyError(key) return default # slow mode ok, works even after hcompress() d = self.hdict(hashroot) return d.get(key, default) def hdict(self, hashroot): """ Get all data contained in hashed category 'hashroot' as dict """ hfiles = self.keys(hashroot + "/*") hfiles.sort() last = len(hfiles) and hfiles[-1] or '' if last.endswith('xx'): # print "using xx" hfiles = [last] + hfiles[:-1] all = {} for f in hfiles: # print "using",f try: all.update(self[f]) except KeyError: print("Corrupt",f,"deleted - hset is not threadsafe!") del self[f] self.uncache(f) return all def hcompress(self, hashroot): """ Compress category 'hashroot', so hset is fast again hget will fail if fast_only is True for compressed items (that were hset before hcompress). """ hfiles = self.keys(hashroot + "/*") all = {} for f in hfiles: # print "using",f all.update(self[f]) self.uncache(f) self[hashroot + '/xx'] = all for f in hfiles: p = self.root / f if p.name == 'xx': continue p.unlink() def __delitem__(self,key): """ del db["key"] """ fil = self.root / key self.cache.pop(fil,None) try: fil.unlink() except OSError: # notfound and permission denied are ok - we # lost, the other process wins the conflict pass def _normalized(self, p): """ Make a key suitable for user's eyes """ return str(p.relative_to(self.root)).replace('\\','/') def keys(self, globpat = None): """ All keys in DB, or all keys matching a glob""" if globpat is None: files = self.root.rglob('*') else: files = self.root.glob(globpat) return [self._normalized(p) for p in files if p.is_file()] def __iter__(self): return iter(self.keys()) def __len__(self): return len(self.keys()) def uncache(self,*items): """ Removes all, or specified items from cache Use this after reading a large amount of large objects to free up memory, when you won't be needing the objects for a while. """ if not items: self.cache = {} for it in items: self.cache.pop(it,None) def waitget(self,key, maxwaittime = 60 ): """ Wait (poll) for a key to get a value Will wait for `maxwaittime` seconds before raising a KeyError. The call exits normally if the `key` field in db gets a value within the timeout period. Use this for synchronizing different processes or for ensuring that an unfortunately timed "db['key'] = newvalue" operation in another process (which causes all 'get' operation to cause a KeyError for the duration of pickling) won't screw up your program logic. """ wtimes = [0.2] * 3 + [0.5] * 2 + [1] tries = 0 waited = 0 while 1: try: val = self[key] return val except KeyError: pass if waited > maxwaittime: raise KeyError(key) time.sleep(wtimes[tries]) waited+=wtimes[tries] if tries < len(wtimes) -1: tries+=1 def getlink(self,folder): """ Get a convenient link for accessing items """ return PickleShareLink(self, folder) def __repr__(self): return "PickleShareDB('%s')" % self.root class PickleShareLink: """ A shortdand for accessing nested PickleShare data conveniently. Created through PickleShareDB.getlink(), example:: lnk = db.getlink('myobjects/test') lnk.foo = 2 lnk.bar = lnk.foo + 5 """ def __init__(self, db, keydir ): self.__dict__.update(locals()) def __getattr__(self,key): return self.__dict__['db'][self.__dict__['keydir']+'/' + key] def __setattr__(self,key,val): self.db[self.keydir+'/' + key] = val def __repr__(self): db = self.__dict__['db'] keys = db.keys( self.__dict__['keydir'] +"/*") return "" % ( self.__dict__['keydir'], ";".join([Path(k).basename() for k in keys])) def main(): import textwrap usage = textwrap.dedent("""\ pickleshare - manage PickleShare databases Usage: pickleshare dump /path/to/db > dump.txt pickleshare load /path/to/db < dump.txt pickleshare test /path/to/db """) DB = PickleShareDB import sys if len(sys.argv) < 2: print(usage) return cmd = sys.argv[1] args = sys.argv[2:] if cmd == 'dump': if not args: args= ['.'] db = DB(args[0]) import pprint pprint.pprint(db.items()) elif cmd == 'load': cont = sys.stdin.read() db = DB(args[0]) data = eval(cont) db.clear() for k,v in db.items(): db[k] = v elif cmd == 'testwait': db = DB(args[0]) db.clear() print(db.waitget('250')) elif cmd == 'test': test() stress() if __name__== "__main__": main() pickleshare-0.7.5/setup.cfg000066400000000000000000000000321335250464600156410ustar00rootroot00000000000000[bdist_wheel] universal=1 pickleshare-0.7.5/setup.py000066400000000000000000000037021335250464600155410ustar00rootroot00000000000000from setuptools import setup # extract version from pickleshare.py # can't import because pickleshare depends on path.py with open('pickleshare.py') as f: for line in f: if line.startswith('__version__'): version = eval(line.split('=', 1)[1]) break setup( name="pickleshare", version=version, py_modules=['pickleshare'], author="Ville Vainio", author_email="vivainio@gmail.com", description="Tiny 'shelve'-like database with concurrency support", license="MIT", extras_require = { # Ugly, but we can't do < comparison here ':python_version in "2.6 2.7 3.2 3.3"': ['pathlib2'], }, url="https://github.com/pickleshare/pickleshare", keywords="database persistence pickle ipc shelve", long_description="""\ PickleShare - a small 'shelve' like datastore with concurrency support Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike shelve, many processes can access the database simultaneously. Changing a value in database is immediately visible to other processes accessing the same database. Concurrency is possible because the values are stored in separate files. Hence the "database" is a directory where *all* files are governed by PickleShare. Example usage:: from pickleshare import * db = PickleShareDB('~/testpickleshare') db.clear() print("Should be empty:",db.items()) db['hello'] = 15 db['aku ankka'] = [1,2,313] db['paths/are/ok/key'] = [1,(5,46)] print(db.keys()) This module is certainly not ZODB, but can be used for low-load (non-mission-critical) situations where tiny code size trumps the advanced features of a "real" object database. Installation guide: pip install pickleshare """, classifiers=[ 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', ] ) pickleshare-0.7.5/test_pickleshare.py000066400000000000000000000027531335250464600177370ustar00rootroot00000000000000from __future__ import print_function import os from pickleshare import PickleShareDB def test_pickleshare(tmpdir): db = PickleShareDB(tmpdir) db.clear() print("Should be empty:",db.items()) assert len(db) == 0 db['hello'] = 15 assert db['hello'] == 15 db['aku ankka'] = [1,2,313] assert db['aku ankka'] == [1,2,313] db['paths/nest/ok/keyname'] = [1,(5,46)] assert db['paths/nest/ok/keyname'] == [1,(5,46)] db.hset('hash', 'aku', 12) db.hset('hash', 'ankka', 313) assert db.hget('hash', 'aku') == 12 assert db.hget('hash', 'ankka') == 313 print("all hashed",db.hdict('hash')) print(db.keys()) print(db.keys('paths/nest/ok/k*')) print(dict(db)) # snapsot of whole db db.uncache() # frees memory, causes re-reads later # shorthand for accessing deeply nested files lnk = db.getlink('myobjects/test') lnk.foo = 2 lnk.bar = lnk.foo + 5 assert lnk.bar == 7 def test_stress(tmpdir): db = PickleShareDB(tmpdir) import time,sys for i in range(100): for j in range(500): if i % 15 == 0 and i < 70: if str(j) in db: del db[str(j)] continue if j%33 == 0: time.sleep(0.02) db[str(j)] = db.get(str(j), []) + [(i,j,"proc %d" % os.getpid())] db.hset('hash',j, db.hget('hash',j,15) + 1 ) print(i, end=' ') sys.stdout.flush() if i % 10 == 0: db.uncache()