bd2k-python-lib-1.14a1.dev43/0000775000175100017510000000000013173354616016653 5ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/setup.cfg0000664000175100017510000000014613173354616020475 0ustar jenkinsjenkins00000000000000[pytest] python_files = *.py addopts = --doctest-modules [egg_info] tag_build = .dev43 tag_date = 0 bd2k-python-lib-1.14a1.dev43/setup.py0000664000175100017510000000246213173354566020375 0ustar jenkinsjenkins00000000000000import sys assert sys.version_info >= (2, 6) from setuptools import setup, find_packages kwargs = dict( name="bd2k-python-lib", version="1.14a1", author='Hannes Schmidt', author_email='hannes@ucsc.edu', url='https://github.com/BD2KGenomics/bd2k-python-lib', description='The BD2K Python module kitchen sink', package_dir={ '': 'src' }, packages=find_packages( 'src' ), install_requires=[ 'future' ], tests_require=[ 'pytest==2.7.2', 'mock==1.0.1', 'lockfile==0.11.0', 'boto==2.38.0'], namespace_packages=[ 'bd2k' ] ) from setuptools.command.test import test as TestCommand class PyTest( TestCommand ): user_options = [ ('pytest-args=', 'a', "Arguments to pass to py.test") ] def initialize_options( self ): TestCommand.initialize_options( self ) self.pytest_args = [ ] def finalize_options( self ): TestCommand.finalize_options( self ) self.test_args = [ ] self.test_suite = True def run_tests( self ): import pytest # Sanitize command line arguments to avoid confusing Toil code attempting to parse them sys.argv[ 1: ] = [ ] errno = pytest.main( self.pytest_args ) sys.exit( errno ) kwargs[ 'cmdclass' ] = { 'test': PyTest } setup( **kwargs ) bd2k-python-lib-1.14a1.dev43/src/0000775000175100017510000000000013173354616017442 5ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/src/bd2k_python_lib.egg-info/0000775000175100017510000000000013173354616024205 5ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/src/bd2k_python_lib.egg-info/SOURCES.txt0000664000175100017510000000232713173354616026075 0ustar jenkinsjenkins00000000000000setup.cfg setup.py src/bd2k/__init__.py src/bd2k/util/__init__.py src/bd2k/util/collections.py src/bd2k/util/d32.py src/bd2k/util/d64.py src/bd2k/util/exceptions.py src/bd2k/util/expando.py src/bd2k/util/files.py src/bd2k/util/fnmatch.py src/bd2k/util/hashes.py src/bd2k/util/humanize.py src/bd2k/util/iterables.py src/bd2k/util/lockfile.py src/bd2k/util/logging.py src/bd2k/util/objects.py src/bd2k/util/processes.py src/bd2k/util/retry.py src/bd2k/util/shell.py src/bd2k/util/strings.py src/bd2k/util/threading.py src/bd2k/util/throttle.py src/bd2k/util/ec2/__init__.py src/bd2k/util/ec2/credentials.py src/bd2k/util/ec2/test/__init__.py src/bd2k/util/ec2/test/test_credentials.py src/bd2k/util/test/__init__.py src/bd2k/util/test/test_d32.py src/bd2k/util/test/test_d64.py src/bd2k/util/test/test_files.py src/bd2k/util/test/test_panic.py src/bd2k/util/test/test_strings.py src/bd2k/util/xml/__init__.py src/bd2k/util/xml/builder.py src/bd2k_python_lib.egg-info/PKG-INFO src/bd2k_python_lib.egg-info/SOURCES.txt src/bd2k_python_lib.egg-info/dependency_links.txt src/bd2k_python_lib.egg-info/namespace_packages.txt src/bd2k_python_lib.egg-info/pbr.json src/bd2k_python_lib.egg-info/requires.txt src/bd2k_python_lib.egg-info/top_level.txtbd2k-python-lib-1.14a1.dev43/src/bd2k_python_lib.egg-info/pbr.json0000664000175100017510000000005713173354602025660 0ustar jenkinsjenkins00000000000000{"is_release": false, "git_version": "ea5b46e"}bd2k-python-lib-1.14a1.dev43/src/bd2k_python_lib.egg-info/PKG-INFO0000664000175100017510000000047113173354616025304 0ustar jenkinsjenkins00000000000000Metadata-Version: 1.0 Name: bd2k-python-lib Version: 1.14a1.dev43 Summary: The BD2K Python module kitchen sink Home-page: https://github.com/BD2KGenomics/bd2k-python-lib Author: Hannes Schmidt Author-email: hannes@ucsc.edu License: UNKNOWN Description-Content-Type: UNKNOWN Description: UNKNOWN Platform: UNKNOWN bd2k-python-lib-1.14a1.dev43/src/bd2k_python_lib.egg-info/namespace_packages.txt0000664000175100017510000000000513173354616030533 0ustar jenkinsjenkins00000000000000bd2k bd2k-python-lib-1.14a1.dev43/src/bd2k_python_lib.egg-info/top_level.txt0000664000175100017510000000000513173354616026732 0ustar jenkinsjenkins00000000000000bd2k bd2k-python-lib-1.14a1.dev43/src/bd2k_python_lib.egg-info/dependency_links.txt0000664000175100017510000000000113173354616030253 0ustar jenkinsjenkins00000000000000 bd2k-python-lib-1.14a1.dev43/src/bd2k_python_lib.egg-info/requires.txt0000664000175100017510000000000713173354616026602 0ustar jenkinsjenkins00000000000000future bd2k-python-lib-1.14a1.dev43/src/bd2k/0000775000175100017510000000000013173354616020264 5ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/src/bd2k/__init__.py0000664000175100017510000000007313173354566022401 0ustar jenkinsjenkins00000000000000__import__( 'pkg_resources' ).declare_namespace( __name__ )bd2k-python-lib-1.14a1.dev43/src/bd2k/util/0000775000175100017510000000000013173354616021241 5ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/src/bd2k/util/humanize.py0000664000175100017510000000727313173354566023450 0ustar jenkinsjenkins00000000000000# http://code.activestate.com/recipes/578019-bytes-to-human-human-to-bytes-converter/ """ Bytes-to-human / human-to-bytes converter. Based on: http://goo.gl/kTQMs Working with Python 2.x and 3.x. Author: Giampaolo Rodola' License: MIT """ from __future__ import division # see: http://goo.gl/kTQMs from past.utils import old_div SYMBOLS = { 'customary' : ('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'), 'customary_ext' : ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa', 'zetta', 'iotta'), 'iec' : ('Bi', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'), 'iec_ext' : ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi', 'zebi', 'yobi'), } def bytes2human(n, fmt='%(value).1f %(symbol)s', symbols='customary'): """ Convert n bytes into a human readable string based on format. symbols can be either "customary", "customary_ext", "iec" or "iec_ext", see: http://goo.gl/kTQMs >>> bytes2human(0) '0.0 ' >>> bytes2human(0.9) '0.0 ' >>> bytes2human(1) '1.0 ' >>> bytes2human(1.9) '1.0 ' >>> bytes2human(1024) '1.0 K' >>> bytes2human(1048576) '1.0 M' >>> bytes2human(1099511627776127398123789121) '909.5 Y' >>> bytes2human(9856, symbols="customary") '9.6 K' >>> bytes2human(9856, symbols="customary_ext") '9.6 kilo' >>> bytes2human(9856, symbols="iec") '9.6 Ki' >>> bytes2human(9856, symbols="iec_ext") '9.6 kibi' >>> bytes2human(10000, "%(value).1f %(symbol)s/sec") '9.8 K/sec' >>> # precision can be adjusted by playing with %f operator >>> bytes2human(10000, fmt="%(value).5f %(symbol)s") '9.76562 K' """ n = int(n) if n < 0: raise ValueError("n < 0") symbols = SYMBOLS[symbols] prefix = {} for i, s in enumerate(symbols[1:]): prefix[s] = 1 << (i+1)*10 for symbol in reversed(symbols[1:]): if n >= prefix[symbol]: value = old_div(float(n), prefix[symbol]) return fmt % locals() return fmt % dict(symbol=symbols[0], value=n) def human2bytes(s): """ Attempts to guess the string format based on default symbols set and return the corresponding bytes as an integer. When unable to recognize the format ValueError is raised. >>> human2bytes('0 ') 0 >>> human2bytes('1 K') 1024 >>> human2bytes('1K') 1024 >>> human2bytes('1.1K') == 1024 + 102 True >>> human2bytes('1 M') 1048576 >>> human2bytes('1 Gi') 1073741824 >>> human2bytes('1 tera') 1099511627776 >>> human2bytes('0.5kilo') 512 >>> human2bytes('0.1 byte') 0 >>> human2bytes('1 byte') 1 >>> human2bytes('1 k') # k is an alias for K 1024 >>> human2bytes('12 foo') Traceback (most recent call last): ... ValueError: can't interpret '12 foo' """ init = s num = "" while s and s[0:1].isdigit() or s[0:1] == '.': num += s[0] s = s[1:] num = float(num) letter = s.strip() for name, sset in list(SYMBOLS.items()): if letter in sset: break else: if letter == 'k': # treat 'k' as an alias for 'K' as per: http://goo.gl/kTQMs sset = SYMBOLS['customary'] letter = letter.upper() else: raise ValueError("can't interpret %r" % init) prefix = {sset[0]:1} for i, s in enumerate(sset[1:]): prefix[s] = 1 << (i+1)*10 return int(num * prefix[letter]) if __name__ == "__main__": import doctest doctest.testmod() bd2k-python-lib-1.14a1.dev43/src/bd2k/util/threading.py0000664000175100017510000000430413173354566023565 0ustar jenkinsjenkins00000000000000from __future__ import absolute_import from builtins import range import sys import threading class BoundedEmptySemaphore( threading._BoundedSemaphore ): """ A bounded semaphore that is initially empty. """ def __init__( self, value=1, verbose=None ): super( BoundedEmptySemaphore, self ).__init__( value, verbose ) for i in range( value ): assert self.acquire( blocking=False ) class ExceptionalThread( threading.Thread ): """ A thread whose join() method re-raises exceptions raised during run(). While join() is idempotent, the exception is only during the first invocation of join() that succesfully joined the thread. If join() times out, no exception will be re reraised even though an exception might already have occured in run(). When subclassing this thread, override tryRun() instead of run(). >>> def f(): ... assert 0 >>> t = ExceptionalThread(target=f) >>> t.start() >>> t.join() Traceback (most recent call last): ... AssertionError >>> class MyThread(ExceptionalThread): ... def tryRun( self ): ... assert 0 >>> t = MyThread() >>> t.start() >>> t.join() Traceback (most recent call last): ... AssertionError """ exc_info = None def run( self ): try: self.tryRun( ) except: self.exc_info = sys.exc_info( ) raise def tryRun( self ): super( ExceptionalThread, self ).run( ) def join( self, *args, **kwargs ): super( ExceptionalThread, self ).join( *args, **kwargs ) if not self.is_alive( ) and self.exc_info is not None: type, value, traceback = self.exc_info self.exc_info = None raise type, value, traceback # noinspection PyPep8Naming class defaultlocal( threading.local ): """ Thread local storage with default values for each field in each thread >>> l = defaultlocal( foo=42 ) >>> def f(): print l.foo >>> t = threading.Thread(target=f) >>> t.start() ; t.join() 42 """ def __init__( self, **kwargs ): super( defaultlocal, self ).__init__( ) self.__dict__.update( kwargs ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/expando.py0000664000175100017510000000435413173354566023263 0ustar jenkinsjenkins00000000000000class Expando(dict): """ Pass inital attributes to the constructor: >>> o = Expando(foo=42) >>> o.foo 42 Dynamically create new attributes: >>> o.bar = 'hi' >>> o.bar 'hi' Expando is a dictionary: >>> isinstance(o,dict) True >>> o['foo'] 42 Works great with JSON: >>> import json >>> s='{"foo":42}' >>> o = json.loads(s,object_hook=Expando) >>> o {u'foo': 42} >>> o.foo 42 >>> o.bar = 'hi' >>> o {u'foo': 42, 'bar': 'hi'} And since Expando is a dict, it serializes back to JSON just fine: >>> json.dumps(o) '{"foo": 42, "bar": "hi"}' Attributes can be deleted, too: >>> o = Expando(foo=42) >>> o.foo 42 >>> del o.foo >>> o.foo Traceback (most recent call last): ... AttributeError: 'Expando' object has no attribute 'foo' >>> o['foo'] Traceback (most recent call last): ... KeyError: 'foo' >>> del o.foo Traceback (most recent call last): ... AttributeError: foo And copied: >>> o = Expando(foo=42) >>> p = o.copy() >>> isinstance(p,Expando) True >>> o == p True >>> o is p False Same with MagicExpando ... >>> o = MagicExpando() >>> o.foo.bar = 42 >>> p = o.copy() >>> isinstance(p,MagicExpando) True >>> o == p True >>> o is p False ... but the copy is shallow: >>> o.foo is p.foo True """ def __init__( self, *args, **kwargs ): super( Expando, self ).__init__( *args, **kwargs ) self.__slots__ = None self.__dict__ = self def copy(self): return type(self)(self) class MagicExpando(Expando): """ Use MagicExpando for chained attribute access. The first time a missing attribute is accessed, it will be set to a new child MagicExpando. >>> o=MagicExpando() >>> o.foo = 42 >>> o {'foo': 42} >>> o.bar.hello = 'hi' >>> o {'foo': 42, 'bar': {'hello': 'hi'}} """ def __getattribute__( self, name ): try: return super( Expando, self ).__getattribute__( name ) except AttributeError: child = self.__class__( ) self[name] = child return child bd2k-python-lib-1.14a1.dev43/src/bd2k/util/shell.py0000664000175100017510000000107113173354566022725 0ustar jenkinsjenkins00000000000000from builtins import range import re def quote(s, level=1): for i in range( 0, level ): s = _quote( s ) return s _find_unsafe = re.compile( r'[^\w@%+=:,./-]' ).search def _quote(s): """ Return a shell-escaped version of the string *s*. Stolen from Python 3's shlex module """ if not s: return "''" if _find_unsafe( s ) is None: return s # use single quotes, and put single quotes into double quotes # the string $'b is then quoted as '$'"'"'b' return "'" + s.replace( "'", "'\"'\"'" ) + "'" bd2k-python-lib-1.14a1.dev43/src/bd2k/util/lockfile.py0000664000175100017510000000217013173354566023407 0ustar jenkinsjenkins00000000000000from __future__ import absolute_import import errno import logging as log import os try: from lockfile.pidlockfile import PIDLockFile except: pass else: class SmartPIDLockFile( PIDLockFile ): """ A PID lock file that breaks the lock if the owning process doesn't exist """ def process_alive( self, pid ): try: os.kill( pid, 0 ) # now we know the process exists return True except OSError as e: if e.errno == errno.ESRCH: # now we know the process doesn't exist return False else: # now we're not sure return None def acquire( self, timeout=None ): owner = self.read_pid( ) if owner is not None and owner != os.getpid( ) and self.process_alive( owner ) is False: log.warn( "Breaking lock '%s' since owning process %i is dead." % (self.lock_file, owner) ) self.break_lock( ) PIDLockFile.acquire( self, timeout ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/__init__.py0000664000175100017510000001420013173354566023353 0ustar jenkinsjenkins00000000000000from __future__ import absolute_import import datetime import grp import pwd from functools import wraps from threading import Lock import re def uid_to_name( uid ): return pwd.getpwuid( uid ).pw_name def gid_to_name( gid ): return grp.getgrgid( gid ).gr_name def name_to_uid( name ): return pwd.getpwnam( name ).pw_uid def name_to_gid( name ): return grp.getgrnam( name ).gr_gid def memoize( f ): """ A decorator that memoizes a function result based on its parameters. For example, this can be used in place of lazy initialization. If the decorating function is invoked by multiple threads, the decorated function may be called more than once with the same arguments. """ # TODO: Recommend that f's arguments be immutable memory = { } @wraps( f ) def new_f( *args ): try: return memory[ args ] except KeyError: r = f( *args ) memory[ args ] = r return r return new_f def sync_memoize( f ): """ Like memoize, but guarantees that decorated function is only called once, even when multiple threads are calling the decorating function with multiple parameters. """ # TODO: Think about an f that is recursive memory = { } lock = Lock( ) @wraps( f ) def new_f( *args ): try: return memory[ args ] except KeyError: # on cache misses, retry with lock held with lock: try: return memory[ args ] except KeyError: r = f( *args ) memory[ args ] = r return r return new_f def properties( obj ): """ Returns a dictionary with one entry per attribute of the given object. The key being the attribute name and the value being the attribute value. Attributes starting in two underscores will be ignored. This function is an alternative to vars() which only returns instance variables, not properties. Note that methods are returned as well but the value in the dictionary is the method, not the return value of the method. >>> class Foo(): ... def __init__(self): ... self.var = 1 ... @property ... def prop(self): ... return self.var + 1 ... def meth(self): ... return self.var + 2 >>> foo = Foo() >>> properties( foo ) == { 'var':1, 'prop':2, 'meth':foo.meth } True Note how the entry for prop is not a bound method (i.e. the getter) but a the return value of that getter. """ return dict( (attr, getattr( obj, attr )) for attr in dir( obj ) if not attr.startswith( '__' ) ) def ilen( it ): """ Return the number of elements in an iterable >>> ilen(xrange(0,100)) 100 """ return sum( 1 for _ in it ) def rfc3339_datetime_re( anchor=True ): """ Returns a regular expression for syntactic validation of ISO date-times, RFC-3339 date-times to be precise. >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39Z') ) True >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39.123Z') ) True >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39-08:00') ) True >>> bool( rfc3339_datetime_re().match('2013-11-06T15:56:39.123+11:00') ) True It anchors the matching to the beginning and end of a string by default ... >>> bool( rfc3339_datetime_re().search('bla 2013-11-06T15:56:39Z bla') ) False ... but that can be changed: >>> bool( rfc3339_datetime_re( anchor=False ).search('bla 2013-11-06T15:56:39Z bla') ) True >>> bool( rfc3339_datetime_re( anchor=False ).match('2013-11-06T15:56:39Z bla') ) True Keep in mind that re.match() always anchors at the beginning: >>> bool( rfc3339_datetime_re( anchor=False ).match('bla 2013-11-06T15:56:39Z') ) False It does not check whether the actual value is a semantically valid datetime: >>> bool( rfc3339_datetime_re().match('9999-99-99T99:99:99.9-99:99') ) True If the regular expression matches, each component of the matching value will be exposed as a captured group in the match object. >>> rfc3339_datetime_re().match('2013-11-06T15:56:39Z').groups() ('2013', '11', '06', '15', '56', '39', None, 'Z') >>> rfc3339_datetime_re().match('2013-11-06T15:56:39.123Z').groups() ('2013', '11', '06', '15', '56', '39', '123', 'Z') >>> rfc3339_datetime_re().match('2013-11-06T15:56:39.123-08:30').groups() ('2013', '11', '06', '15', '56', '39', '123', '-08:30') """ return re.compile( ('^' if anchor else '') + '(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?(Z|[+-]\d{2}:\d{2})' + ('$' if anchor else '') ) _rfc3339_datetime_re = rfc3339_datetime_re( ) def parse_iso_utc( s ): """ Parses an ISO time with a hard-coded Z for zulu-time (UTC) at the end. Other timezones are not supported. :param str s: the ISO-formatted time :rtype: datetime.datetime :return: an timezone-naive datetime object >>> parse_iso_utc('2016-04-27T00:28:04.000Z') datetime.datetime(2016, 4, 27, 0, 28, 4) >>> parse_iso_utc('2016-04-27T00:28:04Z') datetime.datetime(2016, 4, 27, 0, 28, 4) >>> parse_iso_utc('2016-04-27T00:28:04X') Traceback (most recent call last): ... ValueError: Not a valid ISO datetime in UTC: 2016-04-27T00:28:04X """ m = _rfc3339_datetime_re.match( s ) if not m: raise ValueError( 'Not a valid ISO datetime in UTC: ' + s ) else: fmt = '%Y-%m-%dT%H:%M:%S' + ('.%f' if m.group( 7 ) else '') + 'Z' return datetime.datetime.strptime( s, fmt ) def strict_bool( s ): """ Variant of bool() that only accepts two possible string values. """ if s == 'True': return True elif s == 'False': return False else: raise ValueError( s ) def less_strict_bool( x ): """ Idempotent and None-safe version of strict_bool. """ if x is None: return False elif x is True or x is False: return x else: return strict_bool( x ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/test/0000775000175100017510000000000013173354616022220 5ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/src/bd2k/util/test/test_d64.py0000664000175100017510000000303013173354566024226 0ustar jenkinsjenkins00000000000000# Copyright (c) 2014 Dominic Tarr # Copyright (c) 2015 Hannes Schmidt # # Permission is hereby granted, free of charge, to any person obtaining a copy of this software # and associated documentation files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all copies or # substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # Ported from JS found at https://github.com/dominictarr/d64 from __future__ import absolute_import from builtins import map from builtins import range from unittest import TestCase from bd2k.util.d64 import standard as d64 import os class TestD64( TestCase ): def test( self ): l = [ os.urandom( i ) for i in range( 1000 ) ] self.assertEqual( list(map( d64.decode, sorted( map( d64.encode, l ) ) )), sorted( l ) ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/test/test_strings.py0000664000175100017510000000046213173354566025330 0ustar jenkinsjenkins00000000000000import unittest from bd2k.util.strings import interpolate from bd2k.util.strings import to_english foo = 4 bar = 1 class TestStrings( unittest.TestCase ): def test_interpolate( self ): bar = 2 # should override the global foo self.assertEquals( interpolate( "{foo}{bar}" ), "42" ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/test/__init__.py0000664000175100017510000000000013173354566024323 0ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/src/bd2k/util/test/test_panic.py0000664000175100017510000000474113173354566024735 0ustar jenkinsjenkins00000000000000import inspect import logging import unittest import sys from bd2k.util.exceptions import panic log = logging.getLogger( __name__ ) logging.basicConfig( ) class TestPanic( unittest.TestCase ): def test_panic_by_hand( self ): try: self.try_and_panic_by_hand( ) except: self.__assert_raised_exception_is_primary( ) def test_panic( self ): try: self.try_and_panic( ) except: self.__assert_raised_exception_is_primary( ) def test_panic_with_secondary( self ): try: self.try_and_panic_with_secondary( ) except: self.__assert_raised_exception_is_primary( ) def test_nested_panic( self ): try: self.try_and_nested_panic_with_secondary( ) except: self.__assert_raised_exception_is_primary( ) def try_and_panic_by_hand( self ): try: self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1 raise ValueError( "primary" ) except Exception: exc_type, exc_value, exc_traceback = sys.exc_info( ) try: raise RuntimeError( "secondary" ) except Exception: pass raise exc_type, exc_value, exc_traceback def try_and_panic( self ): try: self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1 raise ValueError( "primary" ) except: with panic( log ): pass def try_and_panic_with_secondary( self ): try: self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1 raise ValueError( "primary" ) except: with panic( log ): raise RuntimeError( "secondary" ) def try_and_nested_panic_with_secondary( self ): try: self.line_of_primary_exc = inspect.currentframe( ).f_lineno + 1 raise ValueError( "primary" ) except: with panic( log ): with panic( log ): raise RuntimeError( "secondary" ) def __assert_raised_exception_is_primary( self ): exc_type, exc_value, exc_traceback = sys.exc_info( ) self.assertEquals( exc_type, ValueError ) self.assertEquals( exc_value.message, "primary" ) while exc_traceback.tb_next is not None: exc_traceback = exc_traceback.tb_next self.assertEquals( exc_traceback.tb_lineno, self.line_of_primary_exc ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/test/test_files.py0000664000175100017510000000270613173354566024744 0ustar jenkinsjenkins00000000000000from builtins import range from unittest import TestCase from mock import MagicMock, call class TestFiles( TestCase ): if False: from bd2k.util.files import gread, gwrite # See comment in module under test def test_gread( self ): for n in range( 0, 4 ): f = MagicMock( ) # The mock file contains "12". Each read() invocation shall return one byte from that, # followed by the empty string for EOF. f.read.side_effect = [ '1', '2', '' ] # Read n bytes greedily # noinspection PyTypeChecker self.assertEqual( self.gread( f, n ), "12"[ :n ] ) # First call to read() should request n bytes and then one less on each subsequent call. self.assertEqual( f.mock_calls, [ call.read( i ) for i in range( n, 0, -1 ) ] ) def test_gwrite( self ): for n in range( 0, 3 ): f = MagicMock( ) # Each write invocation shall write a single byte. f.write.side_effect = [ 1 ] * n s = "12"[ :n ] # noinspection PyTypeChecker self.gwrite( f, s ) # The first call to write() should be passed the entire string, minus one byte off # the front for each subsequent call. self.assertEqual( f.mock_calls, [ call.write( s[ i: ] ) for i in range( 0, n ) ] ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/test/test_d32.py0000664000175100017510000000304513173354566024227 0ustar jenkinsjenkins00000000000000# Copyright (c) 2014 Dominic Tarr # Copyright (c) 2015 Hannes Schmidt # # Permission is hereby granted, free of charge, to any person obtaining a copy of this software # and associated documentation files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all copies or # substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # Inspired by JavaScript code found at https://github.com/dominictarr/d64 from __future__ import absolute_import from builtins import map from builtins import range from unittest import TestCase from bd2k.util.d32 import standard as d32 import os class TestD32( TestCase ): def test( self ): l = [ os.urandom( i ) for i in range( 1000 ) ] self.assertEqual( list(map( d32.decode, sorted( map( d32.encode, l ) ) )), sorted( l ) ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/xml/0000775000175100017510000000000013173354616022041 5ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/src/bd2k/util/xml/__init__.py0000664000175100017510000000002613173354566024154 0ustar jenkinsjenkins00000000000000__author__ = 'hannes' bd2k-python-lib-1.14a1.dev43/src/bd2k/util/xml/builder.py0000664000175100017510000001730013173354566024046 0ustar jenkinsjenkins00000000000000# This is a port from lxml.builder.E which itself was inspired by an idea by the creator of # ElementTree (http://effbot.org/zone/element-builder.htm). Support for namespaces was removed. # # -- Hannes # # Element generator factory by Fredrik Lundh. # # Source: # http://online.effbot.org/2006_11_01_archive.htm#et-builder # http://effbot.python-hosting.com/file/stuff/sandbox/elementlib/builder.py # # -------------------------------------------------------------------- # The ElementTree toolkit is # # Copyright (c) 1999-2004 by Fredrik Lundh # # By obtaining, using, and/or copying this software and/or its # associated documentation, you agree that you have read, understood, # and will comply with the following terms and conditions: # # Permission to use, copy, modify, and distribute this software and # its associated documentation for any purpose and without fee is # hereby granted, provided that the above copyright notice appears in # all copies, and that both that copyright notice and this permission # notice appear in supporting documentation, and that the name of # Secret Labs AB or the author not be used in advertising or publicity # pertaining to distribution of the software without specific, written # prior permission. # # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE # OF THIS SOFTWARE. # -------------------------------------------------------------------- from __future__ import absolute_import from past.builtins import basestring from builtins import object """ The ``E`` Element factory for generating XML documents. """ import xml.etree.ElementTree as ET try: from functools import partial except ImportError: # fake it for pre-2.5 releases def partial(func, tag): return lambda *args, **kwargs: func(tag, *args, **kwargs) try: callable except NameError: # Python 3 def callable(f): return hasattr(f, '__call__') try: basestring except NameError: basestring = str try: unicode except NameError: unicode = str class ElementMaker(object): """Element generator factory. Unlike the ordinary Element factory, the E factory allows you to pass in more than just a tag and some optional attributes; you can also pass in text and other elements. The text is added as either text or tail attributes, and elements are inserted at the right spot. Some small examples:: >>> import xml.etree.ElementTree as ET >>> ET.tostring(E("tag")) '' >>> ET.tostring(E("tag", "text")) 'text' >>> ET.tostring(E("tag", "text", key="value")) 'text' >>> ET.tostring(E("tag", E("subtag", "text"), "tail")) 'texttail' For simple tags, the factory also allows you to write ``E.tag(...)`` instead of ``E('tag', ...)``:: >>> ET.tostring(E.tag()) '' >>> ET.tostring(E.tag("text")) 'text' >>> ET.tostring(E.tag(E.subtag("text"), "tail")) 'texttail' Here's a somewhat larger example; this shows how to generate HTML documents, using a mix of prepared factory functions for inline elements, nested ``E.tag`` calls, and embedded XHTML fragments:: # some common inline elements A = E.a I = E.i B = E.b def CLASS(v): # helper function, 'class' is a reserved word return {'class': v} page = ( E.html( E.head( E.title("This is a sample document") ), E.body( E.h1("Hello!", CLASS("title")), E.p("This is a paragraph with ", B("bold"), " text in it!"), E.p("This is another paragraph, with a ", A("link", href="http://www.python.org"), "."), E.p("Here are some reservered characters: ."), ET.XML("

And finally, here is an embedded XHTML fragment.

"), ) ) ) print ET.tostring(page) Here's a prettyprinted version of the output from the above script:: This is a sample document

Hello!

This is a paragraph with bold text in it!

This is another paragraph, with link.

Here are some reservered characters: <spam&egg>.

And finally, here is an embedded XHTML fragment.

""" def __init__(self, typemap=None, namespace=None, makeelement=None): if namespace is not None: self._namespace = '{' + namespace + '}' else: self._namespace = None if makeelement is not None: assert callable(makeelement) self._makeelement = makeelement else: self._makeelement = ET.Element # initialize type map for this element factory if typemap: typemap = typemap.copy() else: typemap = {} def add_text(elem, item): try: elem[-1].tail = (elem[-1].tail or "") + item except IndexError: elem.text = (elem.text or "") + item def add_cdata(elem, cdata): if elem.text: raise ValueError("Can't add a CDATA section. Element already has some text: %r" % elem.text) elem.text = cdata if str not in typemap: typemap[str] = add_text if unicode not in typemap: typemap[unicode] = add_text # if ET.CDATA not in typemap: # typemap[ET.CDATA] = add_cdata def add_dict(elem, item): attrib = elem.attrib for k, v in list(item.items()): if isinstance(v, basestring): attrib[k] = v else: attrib[k] = typemap[type(v)](None, v) if dict not in typemap: typemap[dict] = add_dict self._typemap = typemap def __call__(self, tag, *children, **attrib): get = self._typemap.get if self._namespace is not None and tag[0] != '{': tag = self._namespace + tag elem = self._makeelement(tag) if attrib: get(dict)(elem, attrib) for item in children: if callable(item): item = item() t = get(type(item)) if t is None: if ET.iselement(item): elem.append(item) continue for basetype in type(item).__mro__: # See if the typemap knows of any of this type's bases. t = get(basetype) if t is not None: break else: raise TypeError("bad argument type: %s(%r)" % (type(item).__name__, item)) v = t(elem, item) if v: get(type(v))(elem, v) return elem def __getattr__(self, tag): return partial(self, tag) # create factory object E = ElementMaker() bd2k-python-lib-1.14a1.dev43/src/bd2k/util/retry.py0000664000175100017510000000777413173354566023003 0ustar jenkinsjenkins00000000000000from __future__ import absolute_import from future import standard_library standard_library.install_aliases() from builtins import next import time import urllib.request, urllib.error, urllib.parse from contextlib import contextmanager import logging log = logging.getLogger( __name__ ) # noinspection PyUnusedLocal def never( exception ): return False def retry( delays=(0, 1, 1, 4, 16, 64), timeout=300, predicate=never ): """ Retry an operation while the failure matches a given predicate and until a given timeout expires, waiting a given amount of time in between attempts. This function is a generator that yields contextmanagers. See doctests below for example usage. :param Iterable[float] delays: an interable yielding the time in seconds to wait before each retried attempt, the last element of the iterable will be repeated. :param float timeout: a overall timeout that should not be exceeded for all attempts together. This is a best-effort mechanism only and it won't abort an ongoing attempt, even if the timeout expires during that attempt. :param Callable[[Exception],bool] predicate: a unary callable returning True if another attempt should be made to recover from the given exception. The default value for this parameter will prevent any retries! :return: a generator yielding context managers, one per attempt :rtype: Iterator Retry for a limited amount of time: >>> true = lambda _:True >>> false = lambda _:False >>> i = 0 >>> for attempt in retry( delays=[0], timeout=.1, predicate=true ): ... with attempt: ... i += 1 ... raise RuntimeError('foo') Traceback (most recent call last): ... RuntimeError: foo >>> i > 1 True If timeout is 0, do exactly one attempt: >>> i = 0 >>> for attempt in retry( timeout=0 ): ... with attempt: ... i += 1 ... raise RuntimeError( 'foo' ) Traceback (most recent call last): ... RuntimeError: foo >>> i 1 Don't retry on success: >>> i = 0 >>> for attempt in retry( delays=[0], timeout=.1, predicate=true ): ... with attempt: ... i += 1 >>> i 1 Don't retry on unless predicate returns True: >>> i = 0 >>> for attempt in retry( delays=[0], timeout=.1, predicate=false): ... with attempt: ... i += 1 ... raise RuntimeError( 'foo' ) Traceback (most recent call last): ... RuntimeError: foo >>> i 1 """ if timeout > 0: go = [ None ] @contextmanager def repeated_attempt( delay ): try: yield except Exception as e: if time.time( ) + delay < expiration and predicate( e ): log.info( 'Got %s, trying again in %is.', e, delay ) time.sleep( delay ) else: raise else: go.pop( ) delays = iter( delays ) expiration = time.time( ) + timeout delay = next( delays ) while go: yield repeated_attempt( delay ) delay = next( delays, delay ) else: @contextmanager def single_attempt( ): yield yield single_attempt( ) default_delays = (0, 1, 1, 4, 16, 64) default_timeout = 300 def retryable_http_error( e ): return isinstance( e, urllib.error.HTTPError ) and e.code in ('503', '408', '500') def retry_http( delays=default_delays, timeout=default_timeout, predicate=retryable_http_error ): """ >>> i = 0 >>> for attempt in retry_http(timeout=5): ... with attempt: ... i += 1 ... raise urllib.error.HTTPError('http://www.test.com', '408', 'some message', {}, None) Traceback (most recent call last): ... HTTPError: HTTP Error 408: some message >>> i > 1 True """ return retry( delays=delays, timeout=timeout, predicate=predicate ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/strings.py0000664000175100017510000000737413173354566023323 0ustar jenkinsjenkins00000000000000# coding=utf-8 from builtins import str from builtins import next from builtins import range import inspect def to_english( iterable, separator=", ", conjunction=' and ', empty='empty', wrapper=None, pair_conjunction=None): """ Convert list to a string containing an enumeration in plain English. :param iterable: an iterable of strings or objects that can be cast to a string :param separator: the text to insert between elements :param conjunction: the text used to connect the final element :param empty: the text to be used to represent an empty iterable :param wrapper: the text to surround the elements :param pair_conjunction: the conjunction to use between elements if there are exactly two of them, defaults to conjunction >>> to_english( [], empty='nada' ) 'nada' >>> to_english( [ 1 ] ) '1' >>> to_english( [ 1, 2 ], conjunction=' or ' ) '1 or 2' >>> to_english( [ 1, 2, 3 ], conjunction=' or ') '1, 2 or 3' >>> to_english( [ 1, 2, 3 ], separator='; ', conjunction=' or ') '1; 2 or 3' >>> to_english( [ 1, 2, 3 ], conjunction=', and ', pair_conjunction=' and ' ) '1, 2, and 3' >>> to_english( [ 1, 2 ], conjunction=', and ', pair_conjunction=' and ' ) '1 and 2' >>> to_english( [ 1 ], conjunction=', and ', pair_conjunction=' and ' ) '1' """ i = iter( iterable ) try: x = next(i) except StopIteration: return empty r = [ ] while True: x = str( x ) if wrapper is not None: x = wrapper + x + wrapper try: n = next(i) except StopIteration: if len(r) > 2: r.append( conjunction ) elif len(r) > 0: r.append( conjunction if pair_conjunction is None else pair_conjunction ) r.append( x ) break else: if r: r.append( separator ) r.append( x ) x = n return str(''.join( r )) def interpolate( template, skip_frames=0, **kwargs ): """ Interpolate {…} placeholders in the given template string with the given values or the local variables in the calling scope. The syntax of the format string is the same as for the built-in string format function. Explicitly passed keyword arguments take precedence over local variables which take precedence over global variables. Unlike with Python scoping rules, only the variables in a single frame are examined. Example usage: >>> x = 1 >>> interpolate( "{x}" ) '1' >>> interpolate( "{x}", x=2 ) '2' >>> interpolate( "{x} {y}", y=2 ) '1 2' Use from bd2k.util.strings import interpolate as fmt to import this function under a shortened alias. """ return __interpolate( template, skip_frames, kwargs ) def interpolate_dict( template, dictionary, skip_frames=0 ): """ Equivalent to interpolate( template, skip_frames, **dictionary ) Example usage: >>> x = 1 >>> interpolate_dict( "{x}", {} ) '1' >>> interpolate_dict( "{x}", dict(x=2) ) '2' >>> interpolate_dict( "{x} {y}", dict(y=2) ) '1 2' """ return __interpolate( template, skip_frames, dictionary ) # This is a separate function such that the depth to the client stack frame is the same for # interpolate() and interpolate_dict() def __interpolate( template, skip_frames, dictionary ): frame = inspect.currentframe( ) for i in range( skip_frames + 2 ): prev_frame = frame frame = frame.f_back del prev_frame try: env = frame.f_globals.copy( ) env.update( frame.f_locals ) env.update( dictionary ) finally: del frame return template.format( **env ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/d32.py0000664000175100017510000001122113173354566022204 0ustar jenkinsjenkins00000000000000from __future__ import division # Copyright (c) 2015 Hannes Schmidt # # Permission is hereby granted, free of charge, to any person obtaining a copy of this software # and associated documentation files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all copies or # substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # Inspired by Dominic Tarr's JavaScript at https://github.com/dominictarr/d64 from builtins import str from builtins import range from builtins import object from past.utils import old_div class D32( object ): """ Base32 encoding and decoding without padding, and using an arbitrary alphabet. """ def __init__( self, alphabet ): super( D32, self ).__init__( ) self.alphabet = bytearray( alphabet ) self.lookup = bytearray( 255 ) for i in range( 32 ): self.lookup[ self.alphabet[ i ] ] = i def encode( self, d ): """ >>> encode = standard.encode >>> encode('') '' >>> encode('\\0') '22' >>> encode('\\xff') 'zw' >>> encode('\\0\\1\\2\\3\\4') '222k62s6' >>> encode('\\0\\1\\2\\3\\4\\5') '222k62s62o' """ m = len( d ) n = old_div((m * 8 + 4), 5) padding = 8 - n % 8 e = bytearray( n + padding ) i, j = 0, 0 a = self.alphabet while i < m: if m - i < 5: g = bytearray( d[ i: ] + '\0' * (5 - (m - i)) ) else: g = bytearray( d[ i:i + 5 ] ) # bit 1 2 3 # bit 01234567 89012345 67890123 45678901 23456789 # byte 00000000 11111111 22222222 33333333 44444444 # group 00000111 11222223 33334444 45555566 66677777 e[ j + 0 ] = a[ g[ 0 ] >> 3 ] e[ j + 1 ] = a[ g[ 0 ] << 2 & 31 | g[ 1 ] >> 6 ] e[ j + 2 ] = a[ g[ 1 ] >> 1 & 31 ] e[ j + 3 ] = a[ g[ 1 ] << 4 & 31 | g[ 2 ] >> 4 ] e[ j + 4 ] = a[ g[ 2 ] << 1 & 31 | g[ 3 ] >> 7 ] e[ j + 5 ] = a[ g[ 3 ] >> 2 & 31 ] e[ j + 6 ] = a[ g[ 3 ] << 3 & 31 | g[ 4 ] >> 5 ] e[ j + 7 ] = a[ g[ 4 ] & 31 ] j += 8 i += 5 return str( e[ :-padding ] ) def decode( self, e ): """ >>> decode = standard.decode # >>> decode('222k62s62o') # '\\x00\\x01\\x02\\x03\\x04\\x05' # >>> decode('222k62s6') # '\\x00\\x01\\x02\\x03\\x04' >>> decode('zw') '\\xff' """ n = len( e ) m = old_div(n * 5, 8) padding = 5 - m % 5 d = bytearray( m + padding ) i, j = 0, 0 l = self.lookup while j < n: if n - j < 8: g = [ l[ ord( x ) ] for x in e[ j: ] ] + [ 0 ] * (8 - (n - j)) else: g = [ l[ ord( x ) ] for x in e[ j:j + 8 ] ] # bit 1 2 3 # bit 01234567 89012345 67890123 45678901 23456789 # byte 00000000 11111111 22222222 33333333 44444444 # group 00000111 11222223 33334444 45555566 66677777 d[ i + 0 ] = g[ 0 ] << 3 & 255 | g[ 1 ] >> 2 d[ i + 1 ] = g[ 1 ] << 6 & 255 | g[ 2 ] << 1 & 255 | g[ 3 ] >> 4 d[ i + 2 ] = g[ 3 ] << 4 & 255 | g[ 4 ] >> 1 d[ i + 3 ] = g[ 4 ] << 7 & 255 | g[ 5 ] << 2 & 255 | g[ 6 ] >> 3 d[ i + 4 ] = g[ 6 ] << 5 & 255 | g[ 7 ] j += 8 i += 5 return bytes( d[ :-padding ] ) # A variant of Base64 that maintains the lexicographical ordering such that for any given list of # string l, map( decode, sorted( map( standard.encode, l ) ) == sorted( l ) standard = D32( '234567abcdefghijklmnopqrstuvwxyz' ) # A reimplementation of base64.b32encode and base64.b32encode, but faster and without padding: base32 = D32( 'abcdefghijklmnopqrstuvwxyz234567' ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/ec2/0000775000175100017510000000000013173354616021712 5ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/src/bd2k/util/ec2/__init__.py0000664000175100017510000000000013173354566024015 0ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/src/bd2k/util/ec2/test/0000775000175100017510000000000013173354616022671 5ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/src/bd2k/util/ec2/test/__init__.py0000664000175100017510000000000013173354566024774 0ustar jenkinsjenkins00000000000000bd2k-python-lib-1.14a1.dev43/src/bd2k/util/ec2/test/test_credentials.py0000664000175100017510000000546513173354566026615 0ustar jenkinsjenkins00000000000000from builtins import range import logging import errno import os import unittest from bd2k.util.ec2.credentials import (enable_metadata_credential_caching, disable_metadata_credential_caching, cache_path) def get_access_key( ): from boto.provider import Provider provider = Provider( 'aws' ) return None if provider._credential_expiry_time is None else provider.get_access_key( ) class CredentialsTest( unittest.TestCase ): def __init__( self, *args, **kwargs ): super( CredentialsTest, self ).__init__( *args, **kwargs ) self.cache_path = os.path.expanduser( cache_path ) @classmethod def setUpClass( cls ): super( CredentialsTest, cls ).setUpClass( ) logging.basicConfig( level=logging.DEBUG ) def setUp( self ): super( CredentialsTest, self ).setUp( ) self.cleanUp( ) def cleanUp( self ): try: os.unlink( self.cache_path ) except OSError as e: if e.errno == errno.ENOENT: pass else: raise def tearDown( self ): super( CredentialsTest, self ).tearDown( ) self.cleanUp( ) def test_metadata_credential_caching( self ): """ Brute forces many concurrent requests for getting temporary credentials. If you comment out the calls to enable_metadata_credential_caching, you should see some failures due to requests timing out. The test will also take much longer in that case. """ num_tests = 1000 num_processes = 32 # Get key without caching access_key = get_access_key( ) self.assertFalse( os.path.exists( self.cache_path ) ) enable_metadata_credential_caching( ) # Again for idempotence enable_metadata_credential_caching( ) try: futures = [ ] from multiprocessing import Pool pool = Pool( num_processes ) try: for i in range( num_tests ): futures.append( pool.apply_async( get_access_key ) ) except: pool.close( ) pool.terminate( ) raise else: pool.close( ) pool.join( ) finally: disable_metadata_credential_caching( ) # Again for idempotence disable_metadata_credential_caching( ) self.assertEquals( access_key is not None, os.path.exists( self.cache_path ) ) self.assertEquals( len( futures ), num_tests ) access_keys = [ f.get( ) for f in futures ] self.assertEquals( len( access_keys ), num_tests ) access_keys = set( access_keys ) self.assertEquals( len( access_keys ), 1 ) self.assertEquals( access_keys.pop( ), access_key ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/ec2/credentials.py0000664000175100017510000001406013173354566024566 0ustar jenkinsjenkins00000000000000import errno import logging import threading import time from datetime import datetime import os from bd2k.util.files import mkdir_p log = logging.getLogger( __name__ ) cache_path = '~/.cache/aws/cached_temporary_credentials' datetime_format = "%Y-%m-%dT%H:%M:%SZ" # incidentally the same as the format used by AWS def datetime_to_str( dt ): """ Convert a naive (implicitly UTC) datetime object into a string, explicitly UTC. >>> datetime_to_str( datetime( 1970, 1, 1, 0, 0, 0 ) ) '1970-01-01T00:00:00Z' """ return dt.strftime( datetime_format ) def str_to_datetime( s ): """ Convert a string, explicitly UTC into a naive (implicitly UTC) datetime object. >>> str_to_datetime( '1970-01-01T00:00:00Z' ) datetime.datetime(1970, 1, 1, 0, 0) Just to show that the constructor args for seconds and microseconds are optional: >>> datetime(1970, 1, 1, 0, 0, 0) datetime.datetime(1970, 1, 1, 0, 0) """ return datetime.strptime( s, datetime_format ) monkey_patch_lock = threading.RLock( ) _populate_keys_from_metadata_server_orig = None def enable_metadata_credential_caching( ): """ Monkey-patches Boto to allow multiple processes using it to share one set of cached, temporary IAM role credentials. This helps avoid hitting request limits imposed on the metadata service when too many processes concurrently request those credentials. Function is idempotent. This function should be called before any AWS connections attempts are made with Boto. """ global _populate_keys_from_metadata_server_orig with monkey_patch_lock: if _populate_keys_from_metadata_server_orig is None: from boto.provider import Provider _populate_keys_from_metadata_server_orig = Provider._populate_keys_from_metadata_server Provider._populate_keys_from_metadata_server = _populate_keys_from_metadata_server def disable_metadata_credential_caching( ): """ Reverse the effect of enable_metadata_credential_caching() """ global _populate_keys_from_metadata_server_orig with monkey_patch_lock: if _populate_keys_from_metadata_server_orig is not None: from boto.provider import Provider Provider._populate_keys_from_metadata_server = _populate_keys_from_metadata_server_orig _populate_keys_from_metadata_server_orig = None def _populate_keys_from_metadata_server( self ): global _populate_keys_from_metadata_server_orig path = os.path.expanduser( cache_path ) tmp_path = path + '.tmp' while True: log.debug( 'Attempting to read cached credentials from %s.', path ) try: with open( path, 'r' ) as f: content = f.read( ) if content: record = content.split( '\n' ) assert len(record) == 4 self._access_key = record[ 0 ] self._secret_key = record[ 1 ] self._security_token = record[ 2 ] self._credential_expiry_time = str_to_datetime( record[ 3 ] ) else: log.debug( '%s is empty. Credentials are not temporary.', path ) return except IOError as e: if e.errno == errno.ENOENT: log.debug( 'Cached credentials are missing.' ) dir_path = os.path.dirname( path ) if not os.path.exists( dir_path ): log.debug( 'Creating parent directory %s', dir_path ) # A race would be ok at this point mkdir_p( dir_path ) else: raise else: if self._credentials_need_refresh( ): log.debug( 'Cached credentials are expired.' ) else: log.debug( 'Cached credentials exist and are still fresh.' ) return # We get here if credentials are missing or expired log.debug( 'Racing to create %s.', tmp_path ) # Only one process, the winner, will succeed try: fd = os.open( tmp_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600 ) except OSError as e: if e.errno == errno.EEXIST: log.debug( 'Lost the race to create %s. Waiting on winner to remove it.', tmp_path ) while os.path.exists( tmp_path ): time.sleep( .1 ) log.debug( 'Winner removed %s. Trying from the top.', tmp_path ) else: raise else: try: log.debug( 'Won the race to create %s. ' 'Requesting credentials from metadata service.', tmp_path ) _populate_keys_from_metadata_server_orig( self ) except: os.close( fd ) fd = None log.debug( 'Failed to obtain credentials, removing %s.', tmp_path ) # This unblocks the loosers. os.unlink( tmp_path ) # Bail out. It's too likely to happen repeatedly raise else: if self._credential_expiry_time is None: os.close( fd ) fd = None log.debug( 'Credentials are not temporary. ' 'Leaving %s empty and renaming it to %s.', tmp_path, path ) else: log.debug( 'Writing credentials to %s.', tmp_path ) with os.fdopen( fd, 'w' ) as fh: fd = None fh.write( '\n'.join( [ self._access_key, self._secret_key, self._security_token, datetime_to_str( self._credential_expiry_time ) ] ) ) log.debug( 'Wrote credentials to %s. ' 'Renaming it to %s.', tmp_path, path ) os.rename( tmp_path, path ) return finally: if fd is not None: os.close( fd ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/files.py0000664000175100017510000000611513173354566022724 0ustar jenkinsjenkins00000000000000import errno import os def mkdir_p( path ): """ The equivalent of mkdir -p """ try: os.makedirs( path ) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir( path ): pass else: raise def rm_f( path ): """ Remove the file at the given path with os.remove(), ignoring errors caused by the file's absence. """ try: os.remove( path ) except OSError as e: if e.errno == errno.ENOENT: pass else: raise def copyfileobj( src, dst, limit=None, bufsize=1024 * 1024 ): """ Copy the contents of one file object to another file object. If limit is given, stop after at most limit bytes were copied. The copying will begin at the current file pointer of each file object. :param src: the file object to copy from :param dst: the file object to copy to :param limit: the maximum number of bytes to copy or None if all remaining bytes in src should be copied :param bufsize: the size of the intermediate copy buffer. No more than that many bytes will ever be read from src or written to dst at any one time. :return: None if limit is None, otherwise the difference between limit and the number of bytes actually copied. This will be > 0 if and only if the source file hit EOF before limit number of bytes could be read. >>> import tempfile >>> with open('/dev/urandom') as f1: ... with tempfile.TemporaryFile() as f2: ... copyfileobj(f1,f2,limit=100) ... f2.seek(60) ... with tempfile.TemporaryFile() as f3: ... copyfileobj(f2,f3), f2.tell(), f3.tell() (None, 100, 40) """ while limit is None or limit > 0: buf = src.read( bufsize if limit is None or bufsize < limit else limit ) if buf: if limit is not None: limit -= len( buf ) assert limit >= 0 dst.write( buf ) else: return limit if False: # These are not needed for Python 2.7 as Python's builtin file object's read() and write() # method are greedy. For Python 3.x these may be useful. def gread( readable, n ): """ Greedy read. Read until readable is exhausted, and error occurs or the given number of bytes have been read. If it returns fewer than the requested number bytes if and only if the end of file has been reached. :type readable: io.FileIO """ bufs = [ ] i = 0 while i < n: buf = readable.read( n - i ) m = len( buf ) if m == 0: break bufs.append( buf ) i += m return ''.join( bufs ) def gwrite( writable, buf ): """ Greedy write. Write until the entire buffer has been written to or an error occurs. :type writable: io.FileIO[str|bytearray] :type buf: str|bytearray """ n = len( buf ) i = 0 while i < n: i += writable.write( buf[ i: ] ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/collections.py0000664000175100017510000001001613173354566024133 0ustar jenkinsjenkins00000000000000from __future__ import absolute_import from builtins import next import collections from itertools import dropwhile class OrderedSet( collections.MutableSet ): """ An ordered set from http://code.activestate.com/recipes/576694/ Note: Maybe leaky, may have O(N) lookup by index TODO: Consider https://github.com/LuminosoInsight/ordered-set which uses a native Python list instead of a linked list >>> s = OrderedSet( 'abracadaba' ) >>> s OrderedSet(['a', 'b', 'r', 'c', 'd']) >>> t = OrderedSet( 'simsalabim' ) >>> t OrderedSet(['s', 'i', 'm', 'a', 'l', 'b']) >>> s | t OrderedSet(['a', 'b', 'r', 'c', 'd', 's', 'i', 'm', 'l']) >>> s & t OrderedSet(['a', 'b']) >>> s - t OrderedSet(['r', 'c', 'd']) >>> t - s OrderedSet(['s', 'i', 'm', 'l']) >>> OrderedSet( reversed( s ) ) OrderedSet(['d', 'c', 'r', 'b', 'a']) >>> s.pop() 'd' >>> s OrderedSet(['a', 'b', 'r', 'c']) >>> s.discard('b') >>> s OrderedSet(['a', 'r', 'c']) >>> s.pop( last=False ) 'a' >>> s OrderedSet(['r', 'c']) >>> s.union( t ) >>> s OrderedSet(['r', 'c', 's', 'i', 'm', 'a', 'l', 'b']) >>> s = OrderedSet() >>> s OrderedSet() >>> s.pop() Traceback (most recent call last): .... KeyError: 'set is empty' >>> OrderedSet( "aba" ) == OrderedSet( "ab" ) True >>> OrderedSet( "aba" ) == OrderedSet( "abc" ) False >>> OrderedSet( "aba" ) == OrderedSet( "ba" ) False >>> OrderedSet( "aba" ) == set( "ba" ) True """ def __init__( self, iterable=None ): self.end = end = [ ] end += [ None, end, end ] # sentinel node for doubly linked list self.map = { } # key --> [key, prev, next] if iterable is not None: self |= iterable def __len__( self ): return len( self.map ) def __contains__( self, key ): return key in self.map def add( self, key ): if key not in self.map: end = self.end curr = end[ 1 ] curr[ 2 ] = end[ 1 ] = self.map[ key ] = [ key, curr, end ] def discard( self, key ): if key in self.map: key, prev, next = self.map.pop( key ) prev[ 2 ] = next next[ 1 ] = prev def __iter__( self ): end = self.end curr = end[ 2 ] while curr is not end: yield curr[ 0 ] curr = curr[ 2 ] def __reversed__( self ): end = self.end curr = end[ 1 ] while curr is not end: yield curr[ 0 ] curr = curr[ 1 ] def pop( self, last=True ): if not self: raise KeyError( 'set is empty' ) key = self.end[ 1 ][ 0 ] if last else self.end[ 2 ][ 0 ] self.discard( key ) return key def __repr__( self ): if not self: return '%s()' % (self.__class__.__name__,) return '%s(%r)' % (self.__class__.__name__, list( self )) def __eq__( self, other ): if isinstance( other, OrderedSet ): return len( self ) == len( other ) and list( self ) == list( other ) return set( self ) == set( other ) def union(self,other): self |= other def rindex( l, v ): """ Like l.index(v) but finds last occurrence of value v in sequence l. :type l: anything >>> rindex( [0], 0 ) 0 >>> rindex( [0,0], 0 ) 1 >>> rindex( [0,1], 0 ) 0 >>> rindex( [0,1,0,1], 0 ) 2 >>> rindex( [0,1,0,1], 1 ) 3 >>> rindex( [0], 1 ) Traceback (most recent call last): ... ValueError: 1 >>> rindex( [None], None ) 0 >>> rindex( [], None ) Traceback (most recent call last): ... ValueError: None >>> rindex( "0101", '0') 2 >>> rindex( (0,1,0,1), 0 ) 2 >>> rindex( xrange(3), 2 ) 2 """ try: n = next( dropwhile( lambda i_x: v != i_x[1], enumerate( reversed( l ), 1 ) ) )[ 0 ] except StopIteration: raise ValueError( v ) else: return len( l ) - n bd2k-python-lib-1.14a1.dev43/src/bd2k/util/exceptions.py0000664000175100017510000000567213173354566024012 0ustar jenkinsjenkins00000000000000from builtins import object from contextlib import contextmanager import sys class panic( object ): """ The Python idiom for reraising a primary exception fails when the except block raises a secondary exception, e.g. while trying to cleanup. In that case the original exception is lost and the secondary exception is reraised. The solution seems to be to save the primary exception info as returned from sys.exc_info() and then reraise that. This is a contextmanager that should be used like this try: # do something that can fail except: with panic( log ): # do cleanup that can also fail If a logging logger is passed to panic(), any secondary Exception raised within the with block will be logged. Otherwise those exceptions are swallowed. At the end of the with block the primary exception will be reraised. """ def __init__( self, log=None ): super( panic, self ).__init__( ) self.log = log self.exc_info = None def __enter__( self ): self.exc_info = sys.exc_info( ) def __exit__( self, *exc_info ): if self.log is not None and exc_info and exc_info[ 0 ]: self.log.warn( "Exception during panic", exc_info=exc_info ) exc_type, exc_value, traceback = self.exc_info raise exc_type, exc_value, traceback class RequirementError( Exception ): """ The expcetion raised bye require(). Where AssertionError is raised when there is likely an internal problem within the code base, i.e. a bug, an instance of this class is raised when the cause lies outside the code base, e.g. with the user or caller. """ pass def require( value, message, *message_args ): """ Raise RequirementError with the given message if the given value is considered false. See https://docs.python.org/2/library/stdtypes.html#truth-value-testing for a defintiion of which values are false. This function is commonly used for validating user input. It is meant to be complimentary to assert. See RequirementError for more on that. :param Any value: the value to be tested :param message: :param message_args: optional values for % formatting the given message :return: >>> require(1 + 1 == 2, 'You made a terrible mistake') >>> require(1 + 1 == 3, 'You made a terrible mistake') Traceback (most recent call last): ... RequirementError: You made a terrible mistake >>> require(1 + 1 == 3, 'You made a terrible mistake, %s', 'you fool') Traceback (most recent call last): ... RequirementError: You made a terrible mistake, you fool >>> require(1 + 1 == 3, 'You made a terrible mistake, %s %s', 'your', 'majesty') Traceback (most recent call last): ... RequirementError: You made a terrible mistake, your majesty """ if not value: if message_args: message = message % message_args raise RequirementError( message) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/objects.py0000664000175100017510000001623013173354566023252 0ustar jenkinsjenkins00000000000000from __future__ import absolute_import from builtins import object from bd2k.util import sync_memoize class abstractclassmethod( classmethod ): """ This class defines a decorator that allows the decorated class to be both an abstract method and a class method. Shamelessly stolen from http://stackoverflow.com/questions/11217878/python-2-7-combine-abc-abstractmethod-and-classmethod >>> from abc import ABCMeta >>> class DemoABC: ... __metaclass__ = ABCMeta ... ... @abstractclassmethod ... def from_int(cls, n): ... return cls() >>> class DemoConcrete(DemoABC): ... @classmethod ... def from_int(cls, n): ... return cls(2*n) ... ... def __init__(self, n): ... print ('Initializing with %s' % n) >>> d = DemoConcrete(5) # Succeeds by calling a concrete __init__() Initializing with 5 >>> d = DemoConcrete.from_int(5) # Succeeds by calling a concrete from_int() Initializing with 10 >>> DemoABC() # Fails because from_int() is abstract Traceback (most recent call last): ... TypeError: Can't instantiate abstract class DemoABC with abstract methods from_int >>> DemoABC.from_int(5) # Fails because from_int() is not implemented Traceback (most recent call last): ... TypeError: Can't instantiate abstract class DemoABC with abstract methods from_int """ __isabstractmethod__ = True def __init__(self, callable): callable.__isabstractmethod__ = True super(abstractclassmethod, self).__init__(callable) class abstractstaticmethod( staticmethod ): """ This class defines a decorator that allows the decorated class to be both an abstract method and a static method. Based on code found at http://stackoverflow.com/questions/11217878/python-2-7-combine-abc-abstractmethod-and-classmethod >>> from abc import ABCMeta >>> class DemoABC: ... __metaclass__ = ABCMeta ... ... @abstractstaticmethod ... def f(n): ... raise NotImplementedError() >>> class DemoConcrete(DemoABC): ... @staticmethod ... def f(n): ... return (2*n) >>> d = DemoABC.f(5) # Fails because f() is not implemented Traceback (most recent call last): ... NotImplementedError >>> DemoConcrete.f(5) # Succeeds by calling a concrete f() 10 """ __isabstractmethod__ = True def __init__(self, callable): callable.__isabstractmethod__ = True super(abstractstaticmethod, self).__init__(callable) class InnerClass( object ): """ Note that this is EXPERIMENTAL code. A nested class (the inner class) decorated with this will have an additional attribute called 'outer' referencing the instance of the nesting class (the outer class) that was used to create the inner class. The outer instance does not need to be passed to the inner class's constructor, it will be set magically. Shamelessly stolen from http://stackoverflow.com/questions/2278426/inner-classes-how-can-i-get-the-outer-class-object-at-construction-time#answer-2278595. with names made more descriptive (I hope) and added caching of the BoundInner classes. Caveat: Within the inner class, self.__class__ will not be the inner class but a dynamically created subclass thereof. It's name will be the same as that of the inner class, but its __module__ will be different. There will be one such dynamic subclass per inner class and instance of outer class, if that outer class instance created any instances of inner the class. >>> class Outer(object): ... def new_inner(self): ... # self is an instance of the outer class ... inner = self.Inner() ... # the inner instance's 'outer' attribute is set to the outer instance ... assert inner.outer is self ... return inner ... @InnerClass ... class Inner(object): ... def get_outer(self): ... return self.outer ... @classmethod ... def new_inner(cls): ... return cls() >>> o = Outer() >>> i = o.new_inner() >>> i # doctest: +ELLIPSIS bound to >>> i.get_outer() # doctest: +ELLIPSIS Now with inheritance for both inner and outer: >>> class DerivedOuter(Outer): ... def new_inner(self): ... return self.DerivedInner() ... @InnerClass ... class DerivedInner(Outer.Inner): ... def get_outer(self): ... assert super( DerivedOuter.DerivedInner, self ).get_outer() == self.outer ... return self.outer >>> derived_outer = DerivedOuter() >>> derived_inner = derived_outer.new_inner() >>> derived_inner # doctest: +ELLIPSIS bound to >>> derived_inner.get_outer() # doctest: +ELLIPSIS Test a static references: >>> Outer.Inner >>> DerivedOuter.Inner >>> DerivedOuter.DerivedInner Can't decorate top-level classes. Unfortunately, this is detected when the instance is created, not when the class is defined. >>> @InnerClass ... class Foo(object): ... pass >>> Foo() Traceback (most recent call last): ... RuntimeError: Inner classes must be nested in another class. All inner instances should refer to a single outer instance: >>> o = Outer() >>> o.new_inner().outer == o == o.new_inner().outer True All inner instances should be of the same class ... >>> o.new_inner().__class__ == o.new_inner().__class__ True ... but that class isn't the inner class ... >>> o.new_inner().__class__ != Outer.Inner True ... but a subclass of the inner class. >>> isinstance( o.new_inner(), Outer.Inner ) True Static and class methods, e.g. should work, too >>> o.Inner.new_inner().outer == o True """ def __init__( self, inner_class ): super( InnerClass, self ).__init__( ) self.inner_class = inner_class # noinspection PyUnusedLocal def __get__( self, instance, owner ): # No need to wrap a static reference, i.e one that is made via 'Outer.' rather than 'self.' if instance is None: return self.inner_class else: return self._bind( instance ) @sync_memoize def _bind( self, _outer ): class BoundInner( self.inner_class ): outer = _outer def __repr__( self ): return "%s bound to %s" % (super( BoundInner, self ).__repr__( ), repr( _outer )) BoundInner.__name__ = self.inner_class.__name__ BoundInner.__module__ = self.inner_class.__module__ return BoundInner def __call__( *args, **kwargs ): raise RuntimeError( "Inner classes must be nested in another class." ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/logging.py0000664000175100017510000000172413173354566023251 0ustar jenkinsjenkins00000000000000from __future__ import absolute_import import codecs import types import logging class Utf8SyslogFormatter( logging.Formatter ): """ Works around http://bugs.python.org/issue14452 """ def format( self, record ): origGetMessage = record.getMessage def getMessage( _self ): msg = origGetMessage( ) if isinstance( msg, str ): try: # First check if we can represent the message as ASCII without loosing # information. That we we can avoid writing the BOM unless absolutely necessary. msg = msg.encode( 'ascii' ) except UnicodeEncodeError: msg = codecs.BOM + msg.encode( 'utf8' ) return msg types.MethodType( getMessage, record, logging.LogRecord ) record.getMessage = types.MethodType( getMessage, record, logging.LogRecord ) return logging.Formatter.format( self, record ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/throttle.py0000664000175100017510000001472013173354566023470 0ustar jenkinsjenkins00000000000000from __future__ import absolute_import from builtins import object import time import threading from bd2k.util.threading import BoundedEmptySemaphore class GlobalThrottle(object): """ A thread-safe rate limiter that throttles all threads globally. This should be used to regulate access to a global resource. It can be used as a function/method decorator or as a simple object, using the throttle() method. The token generation starts with the first call to throttle() or the decorated function. Each subsequent call to throttle() will then acquire a token, possibly having to wait until one becomes available. The number of unused tokens will not exceed a limit given at construction time. This is a very basic mechanism to prevent the resource from becoming swamped after longer pauses. """ def __init__( self, min_interval, max_unused ): self.min_interval = min_interval self.semaphore = BoundedEmptySemaphore( max_unused ) self.thread_start_lock = threading.Lock( ) self.thread_started = False self.thread = threading.Thread( target=self.generator ) self.thread.daemon = True def generator( self ): while True: try: self.semaphore.release( ) except ValueError: pass time.sleep( self.min_interval ) def throttle( self, wait=True ): """ If the wait parameter is True, this method returns True after suspending the current thread as necessary to ensure that no less than the configured minimum interval passed since the most recent time an invocation of this method returned True in any thread. If the wait parameter is False, this method immediatly returns True if at least the configured minimum interval has passed since the most recent time this method returned True in any thread, or False otherwise. """ # I think there is a race in Thread.start(), hence the lock with self.thread_start_lock: if not self.thread_started: self.thread.start( ) self.thread_started = True return self.semaphore.acquire( blocking=wait ) def __call__( self, function ): def wrapper( *args, **kwargs ): self.throttle( ) return function( *args, **kwargs ) return wrapper class LocalThrottle(object): """ A thread-safe rate limiter that throttles each thread independently. Can be used as a function or method decorator or as a simple object, via its .throttle() method. The use as a decorator is deprecated in favor of throttle(). """ def __init__( self, min_interval ): """ Initialize this local throttle. :param min_interval: The minimum interval in seconds between invocations of the throttle method or, if this throttle is used as a decorator, invocations of the decorated method. """ self.min_interval = min_interval self.per_thread = threading.local( ) self.per_thread.last_invocation = None def throttle( self, wait=True ): """ If the wait parameter is True, this method returns True after suspending the current thread as necessary to ensure that no less than the configured minimum interval has passed since the last invocation of this method in the current thread returned True. If the wait parameter is False, this method immediatly returns True (if at least the configured minimum interval has passed since the last time this method returned True in the current thread) or False otherwise. """ now = time.time( ) last_invocation = self.per_thread.last_invocation if last_invocation is not None: interval = now - last_invocation if interval < self.min_interval: if wait: remainder = self.min_interval - interval time.sleep( remainder ) else: return False self.per_thread.last_invocation = now return True def __call__( self, function ): def wrapper( *args, **kwargs ): self.throttle( ) return function( *args, **kwargs ) return wrapper class throttle( object ): """ A context manager for ensuring that the execution of its body takes at least a given amount of time, sleeping if necessary. It is a simpler version of LocalThrottle if used as a decorator. Ensures that body takes at least the given amount of time. >>> start = time.time() >>> with throttle(1): ... pass >>> 1 <= time.time() - start <= 1.1 True Ditto when used as a decorator. >>> @throttle(1) ... def f(): ... pass >>> start = time.time() >>> f() >>> 1 <= time.time() - start <= 1.1 True If the body takes longer by itself, don't throttle. >>> start = time.time() >>> with throttle(1): ... time.sleep(2) >>> 2 <= time.time() - start <= 2.1 True Ditto when used as a decorator. >>> @throttle(1) ... def f(): ... time.sleep(2) >>> start = time.time() >>> f() >>> 2 <= time.time() - start <= 2.1 True If an exception occurs, don't throttle. >>> start = time.time() >>> try: ... with throttle(1): ... raise ValueError('foo') ... except ValueError: ... end = time.time() ... raise Traceback (most recent call last): ... ValueError: foo >>> 0 <= end - start <= 0.1 True Ditto when used as a decorator. >>> @throttle(1) ... def f(): ... raise ValueError('foo') >>> start = time.time() >>> try: ... f() ... except ValueError: ... end = time.time() ... raise Traceback (most recent call last): ... ValueError: foo >>> 0 <= end - start <= 0.1 True """ def __init__( self, min_interval ): self.min_interval = min_interval def __enter__( self ): self.start = time.time( ) def __exit__( self, exc_type, exc_val, exc_tb ): if exc_type is None: duration = time.time( ) - self.start remainder = self.min_interval - duration if remainder > 0: time.sleep( remainder ) def __call__( self, function ): def wrapper( *args, **kwargs ): with self: return function( *args, **kwargs ) return wrapper bd2k-python-lib-1.14a1.dev43/src/bd2k/util/hashes.py0000664000175100017510000001021513173354566023071 0ustar jenkinsjenkins00000000000000from builtins import str from builtins import next from past.builtins import basestring def hash_json( hash_obj, value ): """ Compute the hash of a parsed JSON value using the given hash object. This function does not hash the JSON value, it hashes the object tree that is the result of parsing a string in JSON format. Hashables (JSON objects) are hashed entry by entry in order of the lexicographical ordering on the keys. Iterables are hashed in their inherent order. If value or any of its children is an iterable with non-deterministic ordering of its elements, e.g. a set, this method will yield non-deterministic results. :param hash_obj: one of the Hash objects in hashlib, or any other object that has an update(s) method accepting a single string. :type value: int|str|float|Iterable[type(obj)]|Hashable[str,type(obj)] :param value: The value to be hashed >>> import hashlib >>> def actual(x): h = hashlib.md5(); hash_json(h,x); return h.hexdigest() >>> def expect(s): h = hashlib.md5(); h.update(s); return h.hexdigest() >>> actual(0) == expect('0') True >>> actual(0.0) == expect('0.0') True >>> actual(0.1) == expect('0.1') True >>> actual(True) == expect('true') True >>> actual(False) == expect('false') True >>> actual("") == expect('""') True >>> actual([]) == expect('[]') True >>> actual([0]) == expect('[0]') True >>> actual([0,1]) == expect('[0,1]') True >>> actual({}) == expect('{}') True >>> actual({'':0}) == expect('{:0}') True >>> actual({'0':0}) == expect('{0:0}') True >>> actual({'0':0,'1':1}) == expect('{0:0,1:1}') True >>> actual({'':[]}) == expect('{:[]}') True >>> actual([{}]) == expect('[{}]') True >>> actual({0:0}) Traceback (most recent call last): ... ValueError: Dictionary keys must be strings, not >>> actual(object()) Traceback (most recent call last): ... ValueError: Type is not supported """ try: items = iter(value.items( )) except AttributeError: # Must check for string before testing iterability since strings are iterable if isinstance( value, basestring ): _hash_string( hash_obj, value ) else: try: iterator = iter( value ) except TypeError: # We must check for bool first since it is subclass of int (wrongly, IMHO) if isinstance( value, bool ): _hash_bool( hash_obj, value ) elif isinstance( value, (int, float) ): _hash_number( hash_obj, value ) else: raise ValueError( 'Type %s is not supported' % type( value ) ) else: _hash_iterable( hash_obj, iterator ) else: _hash_hashable( hash_obj, items ) def _hash_number( hash_obj, n ): hash_obj.update( str( n ) ) def _hash_bool( hash_obj, b ): hash_obj.update( 'true' if b else 'false' ) def _hash_string( hash_obj, s ): hash_obj.update( '"' ) hash_obj.update( s ) hash_obj.update( '"' ) def _hash_iterable( hash_obj, items ): hash_obj.update( '[' ) try: item = next( items ) hash_json( hash_obj, item ) while True: item = next( items ) hash_obj.update( ',' ) hash_json( hash_obj, item ) except StopIteration: pass hash_obj.update( ']' ) def _hash_hashable( hash_obj, items ): items = iter( sorted( items ) ) hash_obj.update( '{' ) try: item = next( items ) _hash_hashable_item( hash_obj, item ) while True: item = next( items ) hash_obj.update( ',' ) _hash_hashable_item( hash_obj, item ) except StopIteration: pass hash_obj.update( '}' ) def _hash_hashable_item( hash_obj, k_v ): (k, v) = k_v if isinstance( k, basestring ): hash_obj.update( k ) hash_obj.update( ':' ) hash_json( hash_obj, v ) else: raise ValueError( 'Dictionary keys must be strings, not %s' % type( k ) ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/processes.py0000664000175100017510000000224413173354566023627 0ustar jenkinsjenkins00000000000000import os def which( name, path=None ): """ Look for an executable file of the given name in the given list of directories, or the directories listed in the PATH variable of the current environment. Roughly the equivalent of the `which` program. Does not work on Windows. :type name: str :param name: the name of the program :type path: Iterable :param path: the directory paths to consider or None if the directories referenced in the PATH environment variable should be used instead :returns: an iterator yielding the full path to every occurrance of an executable file of the given name in a directory on the given path or the PATH environment variable if no path was passed >>> next( which('ls') ) '/bin/ls' >>> list( which('asdalskhvxjvkjhsdasdnbmfiewwewe') ) [] >>> list( which('ls', path=()) ) [] """ if path is None: path = os.environ.get( 'PATH' ) if path is None: return path = path.split( os.pathsep ) for bin_dir in path: executable_path = os.path.join( bin_dir, name ) if os.access( executable_path, os.X_OK ): yield executable_path bd2k-python-lib-1.14a1.dev43/src/bd2k/util/fnmatch.py0000664000175100017510000000751013173354566023242 0ustar jenkinsjenkins00000000000000# Same as Python's fnmatch with the following diferences: # - '/' doesn't match '*' # - added '**' to match anything # - added some unit tests """Filename matching with shell patterns. fnmatch(FILENAME, PATTERN) matches according to the local convention. fnmatchcase(FILENAME, PATTERN) always takes case in account. The functions operate by translating the pattern into a regular expression. They cache the compiled regular expressions for speed. The function translate(PATTERN) returns a regular expression corresponding to PATTERN. (It does not compile it.) """ import re __all__ = [ "filter", "fnmatch", "fnmatchcase", "translate" ] _cache = { } _MAXCACHE = 100 def _purge( ): """Clear the pattern cache""" _cache.clear( ) def fnmatch( name, pat ): """Test whether FILENAME matches PATTERN. Patterns are Unix shell style: * matches everything ? matches any single character [seq] matches any character in seq [!seq] matches any char not in seq An initial period in FILENAME is not special. Both FILENAME and PATTERN are first case-normalized if the operating system requires it. If you don't want this, use fnmatchcase(FILENAME, PATTERN). >>> fnmatch('bar', '*' ) True >>> fnmatch('foo/bar', '*' ) False >>> fnmatch('foo/bar', '**' ) True >>> fnmatch('foo/bar', '*/*' ) True >>> fnmatch('foo/bar', '**/*' ) True >>> fnmatch('/bar', '**/*' ) True >>> fnmatch('/', '**' ) True >>> fnmatch('/', '*' ) False """ import os name = os.path.normcase( name ) pat = os.path.normcase( pat ) return fnmatchcase( name, pat ) def filter( names, pat ): """Return the subset of the list NAMES that match PAT""" import os, posixpath result = [ ] pat = os.path.normcase( pat ) if not pat in _cache: res = translate( pat ) if len( _cache ) >= _MAXCACHE: _cache.clear( ) _cache[ pat ] = re.compile( res ) match = _cache[ pat ].match if os.path is posixpath: # normcase on posix is NOP. Optimize it away from the loop. for name in names: if match( name ): result.append( name ) else: for name in names: if match( os.path.normcase( name ) ): result.append( name ) return result def fnmatchcase( name, pat ): """Test whether FILENAME matches PATTERN, including case. This is a version of fnmatch() which doesn't case-normalize its arguments. """ if not pat in _cache: res = translate( pat ) if len( _cache ) >= _MAXCACHE: _cache.clear( ) _cache[ pat ] = re.compile( res ) return _cache[ pat ].match( name ) is not None def translate( pat ): """Translate a shell PATTERN to a regular expression. There is no way to quote meta-characters. """ i, n = 0, len( pat ) res = '' while i < n: c = pat[ i ] i += 1 if c == '*': if i < len(pat) and pat[i] == '*': i += 1 res += '.*' else: res += '[^/]*' elif c == '?': res += '.' elif c == '[': j = i if j < n and pat[ j ] == '!': j += 1 if j < n and pat[ j ] == ']': j += 1 while j < n and pat[ j ] != ']': j += 1 if j >= n: res += '\\[' else: stuff = pat[ i:j ].replace( '\\', '\\\\' ) i = j + 1 if stuff[ 0 ] == '!': stuff = '^' + stuff[ 1: ] elif stuff[ 0 ] == '^': stuff = '\\' + stuff res = '%s[%s]' % (res, stuff) else: res = res + re.escape( c ) return res + '\Z(?ms)' bd2k-python-lib-1.14a1.dev43/src/bd2k/util/d64.py0000664000175100017510000000745413173354566022226 0ustar jenkinsjenkins00000000000000from __future__ import division # Copyright (c) 2014 Dominic Tarr # Copyright (c) 2015 Hannes Schmidt # # Permission is hereby granted, free of charge, to any person obtaining a copy of this software # and associated documentation files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all copies or # substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # Ported from JS found at https://github.com/dominictarr/d64 from builtins import str from builtins import range from builtins import object from past.utils import old_div class D64( object ): def __init__( self, special_chars ): super( D64, self ).__init__( ) alphabet = 'PYFGCRLAOEUIDHTNSQJKXBMWVZpyfgcrlaoeuidhtnsqjkxbmwvz1234567890' self.alphabet = bytearray( sorted( alphabet + special_chars ) ) self.lookup = bytearray( 255 ) for i in range( 64 ): code = self.alphabet[ i ] self.lookup[ code ] = i def encode( self, data ): """ >>> encode = standard.encode >>> encode('') '' >>> encode('\\x00') '..' >>> encode('\\x00\\x01') '..3' >>> encode('\\x00\\x01\\x02') '..31' >>> encode('\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07') '..31.kF40VR' """ l = len( data ) s = bytearray( old_div((l * 4 + 2), 3) ) hang = 0 j = 0 a = self.alphabet for i in range( l ): v = ord( data[ i ] ) r = i % 3 if r == 0: s[ j ] = a[ v >> 2 ] j += 1 hang = (v & 3) << 4 elif r == 1: s[ j ] = a[ hang | v >> 4 ] j += 1 hang = (v & 0xf) << 2 elif r == 2: s[ j ] = a[ hang | v >> 6 ] j += 1 s[ j ] = a[ v & 0x3f ] j += 1 hang = 0 else: assert False if l % 3: s[ j ] = a[ hang ] return str( s ) def decode( self, e ): """ >>> decode = standard.decode >>> decode('') '' >>> decode('..') '\\x00' >>> decode('..3') '\\x00\\x01' >>> decode('..31') '\\x00\\x01\\x02' >>> decode('..31.kF40VR') '\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07' """ n = len( e ) j = 0 b = bytearray( old_div(n * 3, 4) ) hang = 0 l = self.lookup for i in range( n ): v = l[ ord( e[ i ] ) ] r = i % 4 if r == 0: hang = v << 2 elif r == 1: b[ j ] = hang | v >> 4 j += 1 hang = (v << 4) & 0xFF elif r == 2: b[ j ] = hang | v >> 2 j += 1 hang = (v << 6) & 0xFF elif r == 3: b[ j ] = hang | v j += 1 else: assert False return bytes( b ) standard = D64( '._' ) bd2k-python-lib-1.14a1.dev43/src/bd2k/util/iterables.py0000664000175100017510000001121013173354566023564 0ustar jenkinsjenkins00000000000000from builtins import map from builtins import zip from builtins import object from itertools import takewhile, dropwhile, chain try: from itertools import zip_longest as zip_longest except: from itertools import izip_longest as zip_longest def common_prefix( xs, ys ): """ >>> list( common_prefix('','') ) [] >>> list( common_prefix('A','') ) [] >>> list( common_prefix('','A') ) [] >>> list( common_prefix('A','A') ) ['A'] >>> list( common_prefix('AB','A') ) ['A'] >>> list( common_prefix('A','AB') ) ['A'] >>> list( common_prefix('A','B') ) [] """ return map( lambda x_y: x_y[0], takewhile( lambda a_b: a_b[0] == a_b[1], zip( xs, ys ) ) ) def disparate_suffix( xs, ys ): """ >>> list( disparate_suffix('','') ) [] >>> list( disparate_suffix('A','') ) [('A', None)] >>> list( disparate_suffix('','A') ) [(None, 'A')] >>> list( disparate_suffix('A','A') ) [] >>> list( disparate_suffix('AB','A') ) [('B', None)] >>> list( disparate_suffix('A','AB') ) [(None, 'B')] >>> list( disparate_suffix('A','B') ) [('A', 'B')] """ return dropwhile( lambda a_b1: a_b1[0] == a_b1[1], zip_longest( xs, ys ) ) def flatten( iterables ): return chain.from_iterable( iterables ) # noinspection PyPep8Naming class concat( object ): """ A literal iterable that lets you combine sequence literals (lists, set) with generators or list comprehensions. Instead of >>> [ -1 ] + [ x * 2 for x in range( 3 ) ] + [ -1 ] [-1, 0, 2, 4, -1] you can write >>> list( concat( -1, ( x * 2 for x in range( 3 ) ), -1 ) ) [-1, 0, 2, 4, -1] This is slightly shorter (not counting the list constructor) and does not involve array construction or concatenation. Note that concat() flattens (or chains) all iterable arguments into a single result iterable: >>> list( concat( 1, xrange( 2, 4 ), 4 ) ) [1, 2, 3, 4] It only does so one level deep. If you need to recursively flatten a data structure, check out crush(). If you want to prevent that flattening for an iterable argument, wrap it in concat(): >>> list( concat( 1, concat( xrange( 2, 4 ) ), 4 ) ) [1, xrange(2, 4), 4] Some more example. >>> list( concat() ) # empty concat [] >>> list( concat( 1 ) ) # non-iterable [1] >>> list( concat( concat() ) ) # empty iterable [] >>> list( concat( concat( 1 ) ) ) # singleton iterable [1] >>> list( concat( 1, concat( 2 ), 3 ) ) # flattened iterable [1, 2, 3] >>> list( concat( 1, [2], 3 ) ) # flattened iterable [1, 2, 3] >>> list( concat( 1, concat( [2] ), 3 ) ) # protecting an iterable from being flattened [1, [2], 3] >>> list( concat( 1, concat( [2], 3 ), 4 ) ) # protection only works with a single argument [1, 2, 3, 4] >>> list( concat( 1, 2, concat( 3, 4 ), 5, 6 ) ) [1, 2, 3, 4, 5, 6] >>> list( concat( 1, 2, concat( [ 3, 4 ] ), 5, 6 ) ) [1, 2, [3, 4], 5, 6] Note that while strings are technically iterable, concat() does not flatten them. >>> list( concat( 'ab' ) ) ['ab'] >>> list( concat( concat( 'ab' ) ) ) ['ab'] """ def __init__( self, *args ): super( concat, self ).__init__( ) self.args = args def __iter__( self ): def expand( x ): if isinstance( x, concat ) and len( x.args ) == 1: i = x.args else: try: i = x.__iter__( ) except AttributeError: i = x, return i return flatten( map( expand, self.args ) ) # noinspection PyPep8Naming class crush( object ): """ >>> list(crush([])) [] >>> list(crush([[]])) [] >>> list(crush([1])) [1] >>> list(crush([[1]])) [1] >>> list(crush([[[]]])) [] >>> list(crush([1,(),['two'],([3, 4],),{5}])) [1, 'two', 3, 4, 5] >>> list(crush(1)) Traceback (most recent call last): ... TypeError: 'int' object is not iterable >>> list(crush('123')) ['1', '2', '3'] The above is a bit of an anomaly since strings occurring inside iterables are not broken up: >>> list(crush(['123'])) ['123'] """ def __init__( self, iterables ): super( crush, self ).__init__( ) self.iterables = iterables def __iter__( self ): def expand( x ): try: # Using __iter__() instead of iter() prevents breaking up of strings return crush( x.__iter__( ) ) except AttributeError: return x, return flatten( map( expand, self.iterables ) ) bd2k-python-lib-1.14a1.dev43/PKG-INFO0000664000175100017510000000047113173354616017752 0ustar jenkinsjenkins00000000000000Metadata-Version: 1.0 Name: bd2k-python-lib Version: 1.14a1.dev43 Summary: The BD2K Python module kitchen sink Home-page: https://github.com/BD2KGenomics/bd2k-python-lib Author: Hannes Schmidt Author-email: hannes@ucsc.edu License: UNKNOWN Description-Content-Type: UNKNOWN Description: UNKNOWN Platform: UNKNOWN