async-0.6.1/0000755000175100017510000000000011472205557011711 5ustar byronbyronasync-0.6.1/graph.py0000644000175100017510000000606011466226142013362 0ustar byronbyron"""Simplistic implementation of a graph""" __all__ = ('Node', 'Graph') class Node(object): """A Node in the graph. They know their neighbours, and have an id which should resolve into a string""" __slots__ = ('in_nodes', 'out_nodes', 'id') def __init__(self, id=None): self.id = id self.in_nodes = list() self.out_nodes = list() def __str__(self): return str(self.id) def __repr__(self): return "%s(%s)" % (type(self).__name__, self.id) class Graph(object): """A simple graph implementation, keeping nodes and providing basic access and editing functions. The performance is only suitable for small graphs of not more than 10 nodes !""" __slots__ = "nodes" def __init__(self): self.nodes = list() def __del__(self): """Deletes bidericational dependencies""" for node in self.nodes: node.in_nodes = None node.out_nodes = None # END cleanup nodes # otherwise the nodes would keep floating around def add_node(self, node): """Add a new node to the graph :return: the newly added node""" self.nodes.append(node) return node def remove_node(self, node): """Delete a node from the graph :return: self""" try: del(self.nodes[self.nodes.index(node)]) except ValueError: return self # END ignore if it doesn't exist # clear connections for outn in node.out_nodes: del(outn.in_nodes[outn.in_nodes.index(node)]) for inn in node.in_nodes: del(inn.out_nodes[inn.out_nodes.index(node)]) node.out_nodes = list() node.in_nodes = list() return self def add_edge(self, u, v): """Add an undirected edge between the given nodes u and v. :return: self :raise ValueError: If the new edge would create a cycle""" if u is v: raise ValueError("Cannot connect a node with itself") # are they already connected ? if u in v.in_nodes and v in u.out_nodes or \ v in u.in_nodes and u in v.out_nodes: return self # END handle connection exists # cycle check - if we can reach any of the two by following either ones # history, its a cycle for start, end in ((u, v), (v,u)): if not start.in_nodes: continue nodes = start.in_nodes[:] seen = set() # depth first search - its faster while nodes: n = nodes.pop() if n in seen: continue seen.add(n) if n is end: raise ValueError("Connecting u with v would create a cycle") nodes.extend(n.in_nodes) # END while we are searching # END for each direction to look # connection is valid, set it up u.out_nodes.append(v) v.in_nodes.append(u) return self def input_inclusive_dfirst_reversed(self, node): """Return all input nodes of the given node, depth first, It will return the actual input node last, as it is required like that by the pool""" stack = [node] seen = set() # depth first out = list() while stack: n = stack.pop() if n in seen: continue seen.add(n) out.append(n) # only proceed in that direction if visitor is fine with it stack.extend(n.in_nodes) # END call visitor # END while walking out.reverse() return out async-0.6.1/PKG-INFO0000644000175100017510000000046211472205557013010 0ustar byronbyronMetadata-Version: 1.0 Name: async Version: 0.6.1 Summary: Async Framework Home-page: http://gitorious.org/git-python/async Author: Sebastian Thiel Author-email: byronimo@gmail.com License: BSD License Description: Async is a framework to process interdependent tasks in a pool of workers Platform: UNKNOWN async-0.6.1/README0000644000175100017510000000177211466226142012574 0ustar byronbyronasync ===== Async aims to make writing asyncronous processing easier. It provides a task-graph with interdependent tasks that communicate using blocking channels, allowing to delay actual computations until items are requested. Tasks will automatically be distributed among 0 or more threads for the actual computation. Even though the GIL effectively prevents true concurrency, operations which block, such as file IO, can be sped up with it already. In conjuction with custom c extensions which release the GIL, true concurrency can be obtained as well. REQUIREMENTS ============ * Python Nose - for running the tests SOURCE ====== The source is available in a git repository at gitorious and github: git://gitorious.org/git-python/async.git git://github.com/Byron/async.git Run the tests with cd async nosetests MAILING LIST ============ http://groups.google.com/group/git-python ISSUE TRACKER ============= http://byronimo.lighthouseapp.com/projects/51787-gitpython LICENSE ======= New BSD License async-0.6.1/AUTHORS0000644000175100017510000000003111466226142012747 0ustar byronbyronCreator: Sebastian Thiel async-0.6.1/util.py0000644000175100017510000001553411466226142013244 0ustar byronbyron"""Module with utilities related to async operations""" from threading import ( Lock, _allocate_lock, _Condition, _sleep, _time, ) from Queue import ( Empty, ) from collections import deque import sys import os #{ Routines def cpu_count(): """:return:number of CPUs in the system :note: inspired by multiprocessing""" num = 0 try: if sys.platform == 'win32': num = int(os.environ['NUMBER_OF_PROCESSORS']) elif 'bsd' in sys.platform or sys.platform == 'darwin': num = int(os.popen('sysctl -n hw.ncpu').read()) else: num = os.sysconf('SC_NPROCESSORS_ONLN') except (ValueError, KeyError, OSError, AttributeError): pass # END exception handling if num == 0: raise NotImplementedError('cannot determine number of cpus') return num #} END routines class DummyLock(object): """An object providing a do-nothing lock interface for use in sync mode""" __slots__ = tuple() def acquire(self): pass def release(self): pass class SyncQueue(deque): """Adapter to allow using a deque like a queue, without locking""" def get(self, block=True, timeout=None): try: return self.popleft() except IndexError: raise Empty # END raise empty def empty(self): return len(self) == 0 def set_writable(self, state): pass def writable(self): return True def put(self, item, block=True, timeout=None): self.append(item) class HSCondition(deque): """Cleaned up code of the original condition object in order to make it run and respond faster.""" __slots__ = ("_lock") delay = 0.0002 # reduces wait times, but increases overhead def __init__(self, lock=None): if lock is None: lock = Lock() self._lock = lock def release(self): self._lock.release() def acquire(self, block=None): if block is None: self._lock.acquire() else: self._lock.acquire(block) def wait(self, timeout=None): waiter = _allocate_lock() waiter.acquire() # get it the first time, no blocking self.append(waiter) try: # restore state no matter what (e.g., KeyboardInterrupt) # now we block, as we hold the lock already # in the momemnt we release our lock, someone else might actually resume self._lock.release() if timeout is None: waiter.acquire() else: # Balancing act: We can't afford a pure busy loop, because of the # GIL, so we have to sleep # We try to sleep only tiny amounts of time though to be very responsive # NOTE: this branch is not used by the async system anyway, but # will be hit when the user reads with timeout endtime = _time() + timeout delay = self.delay acquire = waiter.acquire while True: gotit = acquire(0) if gotit: break remaining = endtime - _time() if remaining <= 0: break # this makes 4 threads working as good as two, but of course # it causes more frequent micro-sleeping #delay = min(delay * 2, remaining, .05) _sleep(delay) # END endless loop if not gotit: try: self.remove(waiter) except AttributeError: # handle python 2.4 - actually this should be made thread-safe # but lets see ... try: # lets hope we pop the right one - we don't loop over it # yet-we just keep minimal compatability with py 2.4 item = self.pop() if item != waiter: self.append(item) except IndexError: pass except ValueError: pass # END didn't ever get it finally: # reacquire the lock self._lock.acquire() # END assure release lock def notify(self, n=1): """Its vital that this method is threadsafe - we absolutely have to get a lock at the beginning of this method to be sure we get the correct amount of waiters back. If we bail out, although a waiter is about to be added, it will miss its wakeup notification, and block forever (possibly)""" self._lock.acquire() try: if not self: # len(self) == 0, but this should be faster return if n == 1: try: self.popleft().release() except IndexError: pass else: for i in range(min(n, len(self))): self.popleft().release() # END for each waiter to resume # END handle n = 1 case faster finally: self._lock.release() # END assure lock is released def notify_all(self): self.notify(len(self)) class ReadOnly(Exception): """Thrown when trying to write to a read-only queue""" class AsyncQueue(deque): """A queue using different condition objects to gain multithreading performance. Additionally it has a threadsafe writable flag, which will alert all readers that there is nothing more to get here. All default-queue code was cleaned up for performance.""" __slots__ = ('mutex', 'not_empty', '_writable') def __init__(self, maxsize=0): self.mutex = Lock() self.not_empty = HSCondition(self.mutex) self._writable = True def qsize(self): self.mutex.acquire() try: return len(self) finally: self.mutex.release() def writable(self): self.mutex.acquire() try: return self._writable finally: self.mutex.release() def set_writable(self, state): """Set the writable flag of this queue to True or False :return: The previous state""" self.mutex.acquire() try: old = self._writable self._writable = state return old finally: self.mutex.release() # if we won't receive anymore items, inform the getters if not state: self.not_empty.notify_all() # END tell everyone # END handle locking def empty(self): self.mutex.acquire() try: return not len(self) finally: self.mutex.release() def put(self, item, block=True, timeout=None): self.mutex.acquire() # NOTE: we explicitly do NOT check for our writable state # Its just used as a notification signal, and we need to be able # to continue writing to prevent threads ( easily ) from failing # to write their computed results, which we want in fact # NO: we want them to fail and stop processing, as the one who caused # the channel to close had a reason and wants the threads to # stop on the task as soon as possible if not self._writable: self.mutex.release() raise ReadOnly # END handle read-only self.append(item) self.mutex.release() self.not_empty.notify() def get(self, block=True, timeout=None): self.mutex.acquire() try: if block: if timeout is None: while not len(self) and self._writable: self.not_empty.wait() else: endtime = _time() + timeout while not len(self) and self._writable: remaining = endtime - _time() if remaining <= 0.0: raise Empty self.not_empty.wait(remaining) # END handle timeout mode # END handle block # can throw if we woke up because we are not writable anymore try: return self.popleft() except IndexError: raise Empty # END handle unblocking reason finally: self.mutex.release() # END assure lock is released #} END utilities async-0.6.1/channel.py0000644000175100017510000002553311466226142013677 0ustar byronbyron"""Contains a queue based channel implementation""" from Queue import ( Empty, Full ) from util import ( AsyncQueue, SyncQueue, ReadOnly ) from time import time import threading import sys __all__ = ('Channel', 'SerialChannel', 'Writer', 'ChannelWriter', 'CallbackChannelWriter', 'Reader', 'ChannelReader', 'CallbackChannelReader', 'mkchannel', 'ReadOnly', 'IteratorReader', 'CallbackReaderMixin', 'CallbackWriterMixin') #{ Classes class Channel(object): """A channel is similar to a file like object. It has a write end as well as one or more read ends. If Data is in the channel, it can be read, if not the read operation will block until data becomes available. If the channel is closed, any read operation will result in an exception This base class is not instantiated directly, but instead serves as constructor for Rwriter pairs. Create a new channel """ __slots__ = 'queue' # The queue to use to store the actual data QueueCls = AsyncQueue def __init__(self): """initialize this instance with a queue holding the channel contents""" self.queue = self.QueueCls() class SerialChannel(Channel): """A slightly faster version of a Channel, which sacrificed thead-safety for performance""" QueueCls = SyncQueue class Writer(object): """A writer is an object providing write access to a possibly blocking reading device""" __slots__ = tuple() #{ Interface def __init__(self, device): """Initialize the instance with the device to write to""" def write(self, item, block=True, timeout=None): """Write the given item into the device :param block: True if the device may block until space for the item is available :param timeout: The time in seconds to wait for the device to become ready in blocking mode""" raise NotImplementedError() def size(self): """:return: number of items already in the device, they could be read with a reader""" raise NotImplementedError() def close(self): """Close the channel. Multiple close calls on a closed channel are no an error""" raise NotImplementedError() def closed(self): """:return: True if the channel was closed""" raise NotImplementedError() #} END interface class ChannelWriter(Writer): """The write end of a channel, a file-like interface for a channel""" __slots__ = ('channel', '_put') def __init__(self, channel): """Initialize the writer to use the given channel""" self.channel = channel self._put = self.channel.queue.put #{ Interface def write(self, item, block=False, timeout=None): return self._put(item, block, timeout) def size(self): return self.channel.queue.qsize() def close(self): """Close the channel. Multiple close calls on a closed channel are no an error""" self.channel.queue.set_writable(False) def closed(self): """:return: True if the channel was closed""" return not self.channel.queue.writable() #} END interface class CallbackWriterMixin(object): """The write end of a channel which allows you to setup a callback to be called after an item was written to the channel""" # slots don't work with mixin's :( # __slots__ = ('_pre_cb') def __init__(self, *args): super(CallbackWriterMixin, self).__init__(*args) self._pre_cb = None def set_pre_cb(self, fun = lambda item: item): """ Install a callback to be called before the given item is written. It returns a possibly altered item which will be written to the channel instead, making it useful for pre-write item conversions. Providing None uninstalls the current method. :return: the previously installed function or None :note: Must be thread-safe if the channel is used in multiple threads""" prev = self._pre_cb self._pre_cb = fun return prev def write(self, item, block=True, timeout=None): if self._pre_cb: item = self._pre_cb(item) super(CallbackWriterMixin, self).write(item, block, timeout) class CallbackChannelWriter(CallbackWriterMixin, ChannelWriter): """Implements a channel writer with callback functionality""" pass class Reader(object): """Allows reading from a device""" __slots__ = tuple() #{ Interface def __init__(self, device): """Initialize the instance with the device to read from""" #{ Iterator protocol def __iter__(self): return self def next(self): """Implements the iterator protocol, iterating individual items""" items = self.read(1) if items: return items[0] raise StopIteration #} END iterator protocol #{ Interface def read(self, count=0, block=True, timeout=None): """ read a list of items read from the device. The list, as a sequence of items, is similar to the string of characters returned when reading from file like objects. :param count: given amount of items to read. If < 1, all items will be read :param block: if True, the call will block until an item is available :param timeout: if positive and block is True, it will block only for the given amount of seconds, returning the items it received so far. The timeout is applied to each read item, not for the whole operation. :return: single item in a list if count is 1, or a list of count items. If the device was empty and count was 1, an empty list will be returned. If count was greater 1, a list with less than count items will be returned. If count was < 1, a list with all items that could be read will be returned.""" raise NotImplementedError() #} END interface class ChannelReader(Reader): """Allows reading from a channel. The reader is thread-safe if the channel is as well""" __slots__ = 'channel' def __init__(self, channel): """Initialize this instance from its parent write channel""" self.channel = channel #{ Interface def read(self, count=0, block=True, timeout=None): # if the channel is closed for writing, we never block # NOTE: is handled by the queue # We don't check for a closed state here has it costs time - most of # the time, it will not be closed, and will bail out automatically once # it gets closed # in non-blocking mode, its all not a problem out = list() queue = self.channel.queue if not block: # be as fast as possible in non-blocking mode, hence # its a bit 'unrolled' try: if count == 1: out.append(queue.get(False)) elif count < 1: while True: out.append(queue.get(False)) # END for each item else: for i in xrange(count): out.append(queue.get(False)) # END for each item # END handle count except Empty: pass # END handle exceptions else: # to get everything into one loop, we set the count accordingly if count == 0: count = sys.maxint # END handle count i = 0 while i < count: try: out.append(queue.get(block, timeout)) i += 1 except Empty: # here we are only if # someone woke us up to inform us about the queue that changed # its writable state # The following branch checks for closed channels, and pulls # as many items as we need and as possible, before # leaving the loop. if not queue.writable(): try: while i < count: out.append(queue.get(False, None)) i += 1 # END count loop except Empty: break # out of count loop # END handle absolutely empty queue # END handle closed channel # if we are here, we woke up and the channel is not closed # Either the queue became writable again, which currently shouldn't # be able to happen in the channel, or someone read with a timeout # that actually timed out. # As it timed out, which is the only reason we are here, # we have to abort break # END ignore empty # END for each item # END handle blocking return out #} END interface class CallbackReaderMixin(object): """A channel which sends a callback before items are read from the channel""" # unfortunately, slots can only use direct inheritance, have to turn it off :( # __slots__ = "_pre_cb" def __init__(self, *args): super(CallbackReaderMixin, self).__init__(*args) self._pre_cb = None self._post_cb = None def set_pre_cb(self, fun = lambda count: None): """ Install a callback to call with the item count to be read before any item is actually read from the channel. Exceptions will be propagated. If a function is not provided, the call is effectively uninstalled. :return: the previously installed callback or None :note: The callback must be threadsafe if the channel is used by multiple threads.""" prev = self._pre_cb self._pre_cb = fun return prev def set_post_cb(self, fun = lambda items: items): """ Install a callback to call after items have been read, but before they are returned to the caller. The callback may adjust the items and/or the list. If no function is provided, the callback is uninstalled :return: the previously installed function""" prev = self._post_cb self._post_cb = fun return prev def read(self, count=0, block=True, timeout=None): if self._pre_cb: self._pre_cb(count) items = super(CallbackReaderMixin, self).read(count, block, timeout) if self._post_cb: items = self._post_cb(items) return items class CallbackChannelReader(CallbackReaderMixin, ChannelReader): """Implements a channel reader with callback functionality""" pass class IteratorReader(Reader): """A Reader allowing to read items from an iterator, instead of a channel. Reads will never block. Its thread-safe""" __slots__ = ("_empty", '_iter', '_lock') # the type of the lock to use when reading from the iterator lock_type = threading.Lock def __init__(self, iterator): self._empty = False if not hasattr(iterator, 'next'): raise ValueError("Iterator %r needs a next() function" % iterator) self._iter = iterator self._lock = self.lock_type() def read(self, count=0, block=True, timeout=None): """Non-Blocking implementation of read""" # not threadsafe, but worst thing that could happen is that # we try to get items one more time if self._empty: return list() # END early abort self._lock.acquire() try: if count == 0: self._empty = True return list(self._iter) else: out = list() it = self._iter for i in xrange(count): try: out.append(it.next()) except StopIteration: self._empty = True break # END handle empty iterator # END for each item to take return out # END handle count finally: self._lock.release() # END handle locking #} END classes #{ Constructors def mkchannel(ctype = Channel, wtype = ChannelWriter, rtype = ChannelReader): """ Create a channel, with a reader and a writer :return: tuple(reader, writer) :param ctype: Channel to instantiate :param wctype: The type of the write channel to instantiate :param rctype: The type of the read channel to instantiate""" c = ctype() wc = wtype(c) rc = rtype(c) return wc, rc #} END constructors async-0.6.1/task.py0000644000175100017510000002004111466226142013216 0ustar byronbyronfrom graph import Node from util import ReadOnly from channel import IteratorReader import threading import weakref import sys import new __all__ = ('Task', 'ThreadTaskBase', 'IteratorTaskBase', 'IteratorThreadTask', 'ChannelThreadTask') class Task(Node): """ Abstracts a named task, which contains additional information on how the task should be queued and processed. Results of the item processing are sent to a writer, which is to be set by the creator using the ``set_writer`` method. Items are read using the internal ``_read`` callable, subclasses are meant to set this to a callable that supports the Reader interface's read function. * **min_count** assures that not less than min_count items will be processed per call. * **max_chunksize** assures that multi-threading is happening in smaller chunks. If someone wants all items to be processed, using read(0), the whole task would go to one worker, as well as dependent tasks. If you want finer granularity , you can specify this here, causing chunks to be no larger than max_chunksize * **apply_single** if True, default True, individual items will be given to the worker function. If False, a list of possibly multiple items will be passed instead. """ __slots__ = ( '_read', # method to yield items to process '_out_writer', # output write channel '_exc', # exception caught '_done', # True if we are done '_num_writers', # number of concurrent writers '_wlock', # lock for the above 'fun', # function to call with items read 'min_count', # minimum amount of items to produce, None means no override 'max_chunksize', # maximium amount of items to process per process call 'apply_single' # apply single items even if multiple where read ) def __init__(self, id, fun, apply_single=True, min_count=None, max_chunksize=0, writer=None): Node.__init__(self, id) self._read = None # to be set by subclasss self._out_writer = writer self._exc = None self._done = False self._num_writers = 0 self._wlock = threading.Lock() self.fun = fun self.min_count = None self.max_chunksize = 0 # not set self.apply_single = apply_single def is_done(self): """:return: True if we are finished processing""" return self._done def set_done(self): """Set ourselves to being done, has we have completed the processing""" self._done = True def set_writer(self, writer): """Set the write channel to the given one""" self._out_writer = writer def writer(self): """ :return: a proxy to our write channel or None if non is set :note: you must not hold a reference to our write channel when the task is being processed. This would cause the write channel never to be closed as the task will think there is still another instance being processed which can close the channel once it is done. In the worst case, this will block your reads.""" if self._out_writer is None: return None return self._out_writer def close(self): """A closed task will close its channel to assure the readers will wake up :note: its safe to call this method multiple times""" self._out_writer.close() def is_closed(self): """:return: True if the task's write channel is closed""" return self._out_writer.closed() def error(self): """:return: Exception caught during last processing or None""" return self._exc def process(self, count=0): """Process count items and send the result individually to the output channel""" # first thing: increment the writer count - other tasks must be able # to respond properly ( even if it turns out we don't need it later ) self._wlock.acquire() self._num_writers += 1 self._wlock.release() items = self._read(count) try: try: if items: write = self._out_writer.write if self.apply_single: for item in items: rval = self.fun(item) write(rval) # END for each item else: # shouldn't apply single be the default anyway ? # The task designers should chunk them up in advance rvals = self.fun(items) for rval in rvals: write(rval) # END handle single apply # END if there is anything to do finally: self._wlock.acquire() self._num_writers -= 1 self._wlock.release() # END handle writer count except Exception, e: # be sure our task is not scheduled again self.set_done() # PROBLEM: We have failed to create at least one item, hence its not # garantueed that enough items will be produced for a possibly blocking # client on the other end. This is why we have no other choice but # to close the channel, preventing the possibility of blocking. # This implies that dependent tasks will go down with us, but that is # just the right thing to do of course - one loose link in the chain ... # Other chunks of our kind currently being processed will then # fail to write to the channel and fail as well self.close() # If some other chunk of our Task had an error, the channel will be closed # This is not an issue, just be sure we don't overwrite the original # exception with the ReadOnly error that would be emitted in that case. # We imply that ReadOnly is exclusive to us, as it won't be an error # if the user emits it if not isinstance(e, ReadOnly): self._exc = e # END set error flag # END exception handling # if we didn't get all demanded items, which is also the case if count is 0 # we have depleted the input channel and are done # We could check our output channel for how many items we have and put that # into the equation, but whats important is that we were asked to produce # count items. if not items or len(items) != count: self.set_done() # END handle done state # If we appear to be the only one left with our output channel, and are # done ( this could have been set in another thread as well ), make # sure to close the output channel. # Waiting with this to be the last one helps to keep the # write-channel writable longer # The count is: 1 = wc itself, 2 = first reader channel, + x for every # thread having its copy on the stack # + 1 for the instance we provide to refcount # Soft close, so others can continue writing their results if self.is_done(): self._wlock.acquire() try: if self._num_writers == 0: self.close() # END handle writers finally: self._wlock.release() # END assure lock release # END handle channel closure #{ Configuration class ThreadTaskBase(object): """Describes tasks which can be used with theaded pools""" pass class IteratorTaskBase(Task): """Implements a task which processes items from an iterable in a multi-processing safe manner""" __slots__ = tuple() def __init__(self, iterator, *args, **kwargs): Task.__init__(self, *args, **kwargs) self._read = IteratorReader(iterator).read # defaults to returning our items unchanged if self.fun is None: self.fun = lambda item: item class IteratorThreadTask(IteratorTaskBase, ThreadTaskBase): """An input iterator for threaded pools""" lock_type = threading.Lock class ChannelThreadTask(Task, ThreadTaskBase): """Uses an input channel as source for reading items For instantiation, it takes all arguments of its base, the first one needs to be the input channel to read from though.""" __slots__ = "_pool_ref" def __init__(self, in_reader, *args, **kwargs): Task.__init__(self, *args, **kwargs) self._read = in_reader.read self._pool_ref = None #{ Internal Interface def reader(self): """:return: input channel from which we read""" # the instance is bound in its instance method - lets use this to keep # the refcount at one ( per consumer ) return self._read.im_self def set_read(self, read): """Adjust the read method to the given one""" self._read = read def set_pool(self, pool): self._pool_ref = weakref.ref(pool) def pool(self): """:return: pool we are attached to, or None""" if self._pool_ref is None: return None return self._pool_ref() #} END intenral interface async-0.6.1/mod/0000755000175100017510000000000011472205557012470 5ustar byronbyronasync-0.6.1/mod/zlibmodule.c0000644000175100017510000007511511466226142015007 0ustar byronbyron/* zlibmodule.c -- gzip-compatible data compression */ /* See http://www.gzip.org/zlib/ */ /* Windows users: read Python's PCbuild\readme.txt */ #include "Python.h" #include "zlib.h" #ifdef WITH_THREAD #include "pythread.h" /* #defs ripped off from _tkinter.c, even though the situation here is much simpler, because we don't have to worry about waiting for Tcl events! And, since zlib itself is threadsafe, we don't need to worry about re-entering zlib functions. N.B. Since ENTER_ZLIB and LEAVE_ZLIB only need to be called on functions that modify the components of preexisting de/compress objects, it could prove to be a performance gain on multiprocessor machines if there was an de/compress object-specific lock. However, for the moment the ENTER_ZLIB and LEAVE_ZLIB calls are global for ALL de/compress objects. S.T. And this is exactly what we do, have one lock per object. This should allow multi-threaded compression and decompression */ #define ENTER_ZLIB \ Py_BEGIN_ALLOW_THREADS \ PyThread_acquire_lock(self->zlib_lock, 1); \ Py_END_ALLOW_THREADS #define LEAVE_ZLIB \ PyThread_release_lock(self->zlib_lock); #else #define ENTER_ZLIB #define LEAVE_ZLIB #endif /* WITH THREAD */ /* The following parameters are copied from zutil.h, version 0.95 */ #define DEFLATED 8 #if MAX_MEM_LEVEL >= 8 # define DEF_MEM_LEVEL 8 #else # define DEF_MEM_LEVEL MAX_MEM_LEVEL #endif #define DEF_WBITS MAX_WBITS /* The output buffer will be increased in chunks of DEFAULTALLOC bytes. */ #define DEFAULTALLOC (16*1024) #define PyInit_zlib initzlib static PyTypeObject Comptype; static PyTypeObject Decomptype; static PyObject *ZlibError; typedef struct { PyObject_HEAD z_stream zst; PyObject *unused_data; PyObject *unconsumed_tail; PyObject *status; int is_initialised; #ifdef WITH_THREAD PyThread_type_lock zlib_lock; #endif } compobject; static void zlib_error(z_stream zst, int err, char *msg) { if (zst.msg == Z_NULL) PyErr_Format(ZlibError, "Error %d %s", err, msg); else PyErr_Format(ZlibError, "Error %d %s: %.200s", err, msg, zst.msg); } PyDoc_STRVAR(compressobj__doc__, "compressobj([level]) -- Return a compressor object.\n" "\n" "Optional arg level is the compression level, in 1-9."); PyDoc_STRVAR(decompressobj__doc__, "decompressobj([wbits]) -- Return a decompressor object.\n" "\n" "Optional arg wbits is the window buffer size."); static compobject * newcompobject(PyTypeObject *type) { compobject *self; self = PyObject_New(compobject, type); if (self == NULL) return NULL; self->is_initialised = 0; self->unused_data = PyString_FromString(""); if (self->unused_data == NULL) { Py_DECREF(self); return NULL; } self->unconsumed_tail = PyString_FromString(""); if (self->unconsumed_tail == NULL) { Py_DECREF(self); return NULL; } self->status = PyLong_FromLong(~0); #ifdef WITH_THREAD self->zlib_lock = PyThread_allocate_lock(); #endif /* WITH_THREAD */ return self; } PyDoc_STRVAR(compress__doc__, "compress(string[, level]) -- Returned compressed string.\n" "\n" "Optional arg level is the compression level, in 1-9."); static PyObject * PyZlib_compress(PyObject *self, PyObject *args) { PyObject *ReturnVal = NULL; Byte *input, *output; int length, level=Z_DEFAULT_COMPRESSION, err; z_stream zst; /* require Python string object, optional 'level' arg */ if (!PyArg_ParseTuple(args, "s#|i:compress", &input, &length, &level)) return NULL; zst.avail_out = length + length/1000 + 12 + 1; output = (Byte*)malloc(zst.avail_out); if (output == NULL) { PyErr_SetString(PyExc_MemoryError, "Can't allocate memory to compress data"); return NULL; } /* Past the point of no return. From here on out, we need to make sure we clean up mallocs & INCREFs. */ zst.zalloc = (alloc_func)NULL; zst.zfree = (free_func)Z_NULL; zst.next_out = (Byte *)output; zst.next_in = (Byte *)input; zst.avail_in = length; err = deflateInit(&zst, level); switch(err) { case(Z_OK): break; case(Z_MEM_ERROR): PyErr_SetString(PyExc_MemoryError, "Out of memory while compressing data"); goto error; case(Z_STREAM_ERROR): PyErr_SetString(ZlibError, "Bad compression level"); goto error; default: deflateEnd(&zst); zlib_error(zst, err, "while compressing data"); goto error; } Py_BEGIN_ALLOW_THREADS; err = deflate(&zst, Z_FINISH); Py_END_ALLOW_THREADS; if (err != Z_STREAM_END) { zlib_error(zst, err, "while compressing data"); deflateEnd(&zst); goto error; } err=deflateEnd(&zst); if (err == Z_OK) ReturnVal = PyString_FromStringAndSize((char *)output, zst.total_out); else zlib_error(zst, err, "while finishing compression"); error: free(output); return ReturnVal; } PyDoc_STRVAR(decompress__doc__, "decompress(string[, wbits[, bufsize]]) -- Return decompressed string.\n" "\n" "Optional arg wbits is the window buffer size. Optional arg bufsize is\n" "the initial output buffer size."); static PyObject * PyZlib_decompress(PyObject *self, PyObject *args) { PyObject *result_str; Byte *input; int length, err; int wsize=DEF_WBITS; Py_ssize_t r_strlen=DEFAULTALLOC; z_stream zst; if (!PyArg_ParseTuple(args, "s#|in:decompress", &input, &length, &wsize, &r_strlen)) return NULL; if (r_strlen <= 0) r_strlen = 1; zst.avail_in = length; zst.avail_out = r_strlen; if (!(result_str = PyString_FromStringAndSize(NULL, r_strlen))) return NULL; zst.zalloc = (alloc_func)NULL; zst.zfree = (free_func)Z_NULL; zst.next_out = (Byte *)PyString_AS_STRING(result_str); zst.next_in = (Byte *)input; err = inflateInit2(&zst, wsize); switch(err) { case(Z_OK): break; case(Z_MEM_ERROR): PyErr_SetString(PyExc_MemoryError, "Out of memory while decompressing data"); goto error; default: inflateEnd(&zst); zlib_error(zst, err, "while preparing to decompress data"); goto error; } do { Py_BEGIN_ALLOW_THREADS err=inflate(&zst, Z_FINISH); Py_END_ALLOW_THREADS switch(err) { case(Z_STREAM_END): break; case(Z_BUF_ERROR): /* * If there is at least 1 byte of room according to zst.avail_out * and we get this error, assume that it means zlib cannot * process the inflate call() due to an error in the data. */ if (zst.avail_out > 0) { PyErr_Format(ZlibError, "Error %i while decompressing data", err); inflateEnd(&zst); goto error; } /* fall through */ case(Z_OK): /* need more memory */ if (_PyString_Resize(&result_str, r_strlen << 1) < 0) { inflateEnd(&zst); goto error; } zst.next_out = (unsigned char *)PyString_AS_STRING(result_str) \ + r_strlen; zst.avail_out = r_strlen; r_strlen = r_strlen << 1; break; default: inflateEnd(&zst); zlib_error(zst, err, "while decompressing data"); goto error; } } while (err != Z_STREAM_END); err = inflateEnd(&zst); if (err != Z_OK) { zlib_error(zst, err, "while finishing data decompression"); goto error; } _PyString_Resize(&result_str, zst.total_out); return result_str; error: Py_XDECREF(result_str); return NULL; } static PyObject * PyZlib_compressobj(PyObject *selfptr, PyObject *args) { compobject *self; int level=Z_DEFAULT_COMPRESSION, method=DEFLATED; int wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=0, err; if (!PyArg_ParseTuple(args, "|iiiii:compressobj", &level, &method, &wbits, &memLevel, &strategy)) return NULL; self = newcompobject(&Comptype); if (self==NULL) return(NULL); self->zst.zalloc = (alloc_func)NULL; self->zst.zfree = (free_func)Z_NULL; self->zst.next_in = NULL; self->zst.avail_in = 0; err = deflateInit2(&self->zst, level, method, wbits, memLevel, strategy); switch(err) { case (Z_OK): self->is_initialised = 1; return (PyObject*)self; case (Z_MEM_ERROR): Py_DECREF(self); PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for compression object"); return NULL; case(Z_STREAM_ERROR): Py_DECREF(self); PyErr_SetString(PyExc_ValueError, "Invalid initialization option"); return NULL; default: zlib_error(self->zst, err, "while creating compression object"); Py_DECREF(self); return NULL; } } static PyObject * PyZlib_decompressobj(PyObject *selfptr, PyObject *args) { int wbits=DEF_WBITS, err; compobject *self; if (!PyArg_ParseTuple(args, "|i:decompressobj", &wbits)) return NULL; self = newcompobject(&Decomptype); if (self == NULL) return(NULL); self->zst.zalloc = (alloc_func)NULL; self->zst.zfree = (free_func)Z_NULL; self->zst.next_in = NULL; self->zst.avail_in = 0; err = inflateInit2(&self->zst, wbits); switch(err) { case (Z_OK): self->is_initialised = 1; return (PyObject*)self; case(Z_STREAM_ERROR): Py_DECREF(self); PyErr_SetString(PyExc_ValueError, "Invalid initialization option"); return NULL; case (Z_MEM_ERROR): Py_DECREF(self); PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for decompression object"); return NULL; default: zlib_error(self->zst, err, "while creating decompression object"); Py_DECREF(self); return NULL; } } static void Comp_dealloc(compobject *self) { if (self->is_initialised) deflateEnd(&self->zst); Py_XDECREF(self->unused_data); Py_XDECREF(self->unconsumed_tail); Py_XDECREF(self->status); #ifdef WITH_THREAD PyThread_free_lock(self->zlib_lock); #endif /* WITH_THREAD */ PyObject_Del(self); } static void Decomp_dealloc(compobject *self) { if (self->is_initialised) inflateEnd(&self->zst); Py_XDECREF(self->unused_data); Py_XDECREF(self->unconsumed_tail); Py_XDECREF(self->status); #ifdef WITH_THREAD PyThread_free_lock(self->zlib_lock); #endif /* WITH_THREAD */ PyObject_Del(self); } PyDoc_STRVAR(comp_compress__doc__, "compress(data) -- Return a string containing data compressed.\n" "\n" "After calling this function, some of the input data may still\n" "be stored in internal buffers for later processing.\n" "Call the flush() method to clear these buffers."); static PyObject * PyZlib_objcompress(compobject *self, PyObject *args) { int err, inplen, length = DEFAULTALLOC; PyObject *RetVal; Byte *input; unsigned long start_total_out; if (!PyArg_ParseTuple(args, "s#:compress", &input, &inplen)) return NULL; if (!(RetVal = PyString_FromStringAndSize(NULL, length))) return NULL; ENTER_ZLIB start_total_out = self->zst.total_out; self->zst.avail_in = inplen; self->zst.next_in = input; self->zst.avail_out = length; self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal); Py_BEGIN_ALLOW_THREADS err = deflate(&(self->zst), Z_NO_FLUSH); Py_END_ALLOW_THREADS /* while Z_OK and the output buffer is full, there might be more output, so extend the output buffer and try again */ while (err == Z_OK && self->zst.avail_out == 0) { if (_PyString_Resize(&RetVal, length << 1) < 0) goto error; self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \ + length; self->zst.avail_out = length; length = length << 1; Py_BEGIN_ALLOW_THREADS err = deflate(&(self->zst), Z_NO_FLUSH); Py_END_ALLOW_THREADS } // Set status Py_DECREF(self->status); self->status = PyLong_FromLong(err); /* We will only get Z_BUF_ERROR if the output buffer was full but there wasn't more output when we tried again, so it is not an error condition. */ if (err != Z_OK && err != Z_BUF_ERROR) { zlib_error(self->zst, err, "while compressing"); Py_DECREF(RetVal); RetVal = NULL; goto error; } _PyString_Resize(&RetVal, self->zst.total_out - start_total_out); error: LEAVE_ZLIB return RetVal; } PyDoc_STRVAR(decomp_decompress__doc__, "decompress(data, max_length) -- Return a string containing the decompressed\n" "version of the data.\n" "\n" "After calling this function, some of the input data may still be stored in\n" "internal buffers for later processing.\n" "Call the flush() method to clear these buffers.\n" "If the max_length parameter is specified then the return value will be\n" "no longer than max_length. Unconsumed input data will be stored in\n" "the unconsumed_tail attribute."); static PyObject * PyZlib_objdecompress(compobject *self, PyObject *args) { int err, inplen, old_length, length = DEFAULTALLOC; int max_length = 0; PyObject *RetVal; Byte *input; unsigned long start_total_out; if (!PyArg_ParseTuple(args, "s#|i:decompress", &input, &inplen, &max_length)) return NULL; if (max_length < 0) { PyErr_SetString(PyExc_ValueError, "max_length must be greater than zero"); return NULL; } /* limit amount of data allocated to max_length */ if (max_length && length > max_length) length = max_length; if (!(RetVal = PyString_FromStringAndSize(NULL, length))) return NULL; ENTER_ZLIB start_total_out = self->zst.total_out; self->zst.avail_in = inplen; self->zst.next_in = input; self->zst.avail_out = length; self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal); Py_BEGIN_ALLOW_THREADS err = inflate(&(self->zst), Z_SYNC_FLUSH); Py_END_ALLOW_THREADS /* While Z_OK and the output buffer is full, there might be more output. So extend the output buffer and try again. */ while (err == Z_OK && self->zst.avail_out == 0) { /* If max_length set, don't continue decompressing if we've already reached the limit. */ if (max_length && length >= max_length) break; /* otherwise, ... */ old_length = length; length = length << 1; if (max_length && length > max_length) length = max_length; if (_PyString_Resize(&RetVal, length) < 0) goto error; self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \ + old_length; self->zst.avail_out = length - old_length; Py_BEGIN_ALLOW_THREADS err = inflate(&(self->zst), Z_SYNC_FLUSH); Py_END_ALLOW_THREADS } // Set status Py_DECREF(self->status); self->status = PyLong_FromLong(err); /* Not all of the compressed data could be accommodated in the output buffer of specified size. Return the unconsumed tail in an attribute.*/ if(max_length) { Py_DECREF(self->unconsumed_tail); self->unconsumed_tail = PyString_FromStringAndSize((char *)self->zst.next_in, self->zst.avail_in); if(!self->unconsumed_tail) { Py_DECREF(RetVal); RetVal = NULL; goto error; } } /* The end of the compressed data has been reached, so set the unused_data attribute to a string containing the remainder of the data in the string. Note that this is also a logical place to call inflateEnd, but the old behaviour of only calling it on flush() is preserved. */ if (err == Z_STREAM_END) { Py_XDECREF(self->unused_data); /* Free original empty string */ self->unused_data = PyString_FromStringAndSize( (char *)self->zst.next_in, self->zst.avail_in); if (self->unused_data == NULL) { Py_DECREF(RetVal); goto error; } /* We will only get Z_BUF_ERROR if the output buffer was full but there wasn't more output when we tried again, so it is not an error condition. */ } else if (err != Z_OK && err != Z_BUF_ERROR) { zlib_error(self->zst, err, "while decompressing"); Py_DECREF(RetVal); RetVal = NULL; goto error; } _PyString_Resize(&RetVal, self->zst.total_out - start_total_out); error: LEAVE_ZLIB return RetVal; } PyDoc_STRVAR(comp_flush__doc__, "flush( [mode] ) -- Return a string containing any remaining compressed data.\n" "\n" "mode can be one of the constants Z_SYNC_FLUSH, Z_FULL_FLUSH, Z_FINISH; the\n" "default value used when mode is not specified is Z_FINISH.\n" "If mode == Z_FINISH, the compressor object can no longer be used after\n" "calling the flush() method. Otherwise, more data can still be compressed."); static PyObject * PyZlib_flush(compobject *self, PyObject *args) { int err, length = DEFAULTALLOC; PyObject *RetVal; int flushmode = Z_FINISH; unsigned long start_total_out; if (!PyArg_ParseTuple(args, "|i:flush", &flushmode)) return NULL; /* Flushing with Z_NO_FLUSH is a no-op, so there's no point in doing any work at all; just return an empty string. */ if (flushmode == Z_NO_FLUSH) { return PyString_FromStringAndSize(NULL, 0); } if (!(RetVal = PyString_FromStringAndSize(NULL, length))) return NULL; ENTER_ZLIB start_total_out = self->zst.total_out; self->zst.avail_in = 0; self->zst.avail_out = length; self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal); Py_BEGIN_ALLOW_THREADS err = deflate(&(self->zst), flushmode); Py_END_ALLOW_THREADS /* while Z_OK and the output buffer is full, there might be more output, so extend the output buffer and try again */ while (err == Z_OK && self->zst.avail_out == 0) { if (_PyString_Resize(&RetVal, length << 1) < 0) goto error; self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \ + length; self->zst.avail_out = length; length = length << 1; Py_BEGIN_ALLOW_THREADS err = deflate(&(self->zst), flushmode); Py_END_ALLOW_THREADS } // update final status Py_DECREF(self->status); self->status = PyLong_FromLong(err); /* If flushmode is Z_FINISH, we also have to call deflateEnd() to free various data structures. Note we should only get Z_STREAM_END when flushmode is Z_FINISH, but checking both for safety*/ if (err == Z_STREAM_END && flushmode == Z_FINISH) { err = deflateEnd(&(self->zst)); if (err != Z_OK) { zlib_error(self->zst, err, "from deflateEnd()"); Py_DECREF(RetVal); RetVal = NULL; goto error; } else self->is_initialised = 0; /* We will only get Z_BUF_ERROR if the output buffer was full but there wasn't more output when we tried again, so it is not an error condition. */ } else if (err!=Z_OK && err!=Z_BUF_ERROR) { zlib_error(self->zst, err, "while flushing"); Py_DECREF(RetVal); RetVal = NULL; goto error; } _PyString_Resize(&RetVal, self->zst.total_out - start_total_out); error: LEAVE_ZLIB return RetVal; } #ifdef HAVE_ZLIB_COPY PyDoc_STRVAR(comp_copy__doc__, "copy() -- Return a copy of the compression object."); static PyObject * PyZlib_copy(compobject *self) { compobject *retval = NULL; int err; retval = newcompobject(&Comptype); if (!retval) return NULL; /* Copy the zstream state * We use ENTER_ZLIB / LEAVE_ZLIB to make this thread-safe */ ENTER_ZLIB err = deflateCopy(&retval->zst, &self->zst); switch(err) { case(Z_OK): break; case(Z_STREAM_ERROR): PyErr_SetString(PyExc_ValueError, "Inconsistent stream state"); goto error; case(Z_MEM_ERROR): PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for compression object"); goto error; default: zlib_error(self->zst, err, "while copying compression object"); goto error; } Py_INCREF(self->unused_data); Py_INCREF(self->unconsumed_tail); Py_INCREF(self->status); Py_XDECREF(retval->unused_data); Py_XDECREF(retval->unconsumed_tail); Py_XDECREF(retval->status); retval->unused_data = self->unused_data; retval->unconsumed_tail = self->unconsumed_tail; retval->status = self->status; /* Mark it as being initialized */ retval->is_initialised = 1; LEAVE_ZLIB return (PyObject *)retval; error: LEAVE_ZLIB Py_XDECREF(retval); return NULL; } PyDoc_STRVAR(decomp_copy__doc__, "copy() -- Return a copy of the decompression object."); static PyObject * PyZlib_uncopy(compobject *self) { compobject *retval = NULL; int err; retval = newcompobject(&Decomptype); if (!retval) return NULL; /* Copy the zstream state * We use ENTER_ZLIB / LEAVE_ZLIB to make this thread-safe */ ENTER_ZLIB err = inflateCopy(&retval->zst, &self->zst); switch(err) { case(Z_OK): break; case(Z_STREAM_ERROR): PyErr_SetString(PyExc_ValueError, "Inconsistent stream state"); goto error; case(Z_MEM_ERROR): PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for decompression object"); goto error; default: zlib_error(self->zst, err, "while copying decompression object"); goto error; } Py_INCREF(self->unused_data); Py_INCREF(self->unconsumed_tail); Py_INCREF(self->status); Py_XDECREF(retval->unused_data); Py_XDECREF(retval->unconsumed_tail); Py_XDECREF(retval->status); retval->unused_data = self->unused_data; retval->unconsumed_tail = self->unconsumed_tail; retval->status = self->status; /* Mark it as being initialized */ retval->is_initialised = 1; LEAVE_ZLIB return (PyObject *)retval; error: LEAVE_ZLIB Py_XDECREF(retval); return NULL; } #endif PyDoc_STRVAR(decomp_flush__doc__, "flush( [length] ) -- Return a string containing any remaining\n" "decompressed data. length, if given, is the initial size of the\n" "output buffer.\n" "\n" "The decompressor object can no longer be used after this call."); static PyObject * PyZlib_unflush(compobject *self, PyObject *args) { int err, length = DEFAULTALLOC; PyObject * retval = NULL; unsigned long start_total_out; if (!PyArg_ParseTuple(args, "|i:flush", &length)) return NULL; if (length <= 0) { PyErr_SetString(PyExc_ValueError, "length must be greater than zero"); return NULL; } if (!(retval = PyString_FromStringAndSize(NULL, length))) return NULL; ENTER_ZLIB start_total_out = self->zst.total_out; self->zst.avail_out = length; self->zst.next_out = (Byte *)PyString_AS_STRING(retval); Py_BEGIN_ALLOW_THREADS err = inflate(&(self->zst), Z_FINISH); Py_END_ALLOW_THREADS /* while Z_OK and the output buffer is full, there might be more output, so extend the output buffer and try again */ while ((err == Z_OK || err == Z_BUF_ERROR) && self->zst.avail_out == 0) { if (_PyString_Resize(&retval, length << 1) < 0) goto error; self->zst.next_out = (Byte *)PyString_AS_STRING(retval) + length; self->zst.avail_out = length; length = length << 1; Py_BEGIN_ALLOW_THREADS err = inflate(&(self->zst), Z_FINISH); Py_END_ALLOW_THREADS } /* If flushmode is Z_FINISH, we also have to call deflateEnd() to free various data structures. Note we should only get Z_STREAM_END when flushmode is Z_FINISH */ if (err == Z_STREAM_END) { err = inflateEnd(&(self->zst)); self->is_initialised = 0; if (err != Z_OK) { zlib_error(self->zst, err, "from inflateEnd()"); Py_DECREF(retval); retval = NULL; goto error; } } _PyString_Resize(&retval, self->zst.total_out - start_total_out); error: LEAVE_ZLIB return retval; } static PyMethodDef comp_methods[] = { {"compress", (binaryfunc)PyZlib_objcompress, METH_VARARGS, comp_compress__doc__}, {"flush", (binaryfunc)PyZlib_flush, METH_VARARGS, comp_flush__doc__}, #ifdef HAVE_ZLIB_COPY {"copy", (PyCFunction)PyZlib_copy, METH_NOARGS, comp_copy__doc__}, #endif {NULL, NULL} }; static PyMethodDef Decomp_methods[] = { {"decompress", (binaryfunc)PyZlib_objdecompress, METH_VARARGS, decomp_decompress__doc__}, {"flush", (binaryfunc)PyZlib_unflush, METH_VARARGS, decomp_flush__doc__}, #ifdef HAVE_ZLIB_COPY {"copy", (PyCFunction)PyZlib_uncopy, METH_NOARGS, decomp_copy__doc__}, #endif {NULL, NULL} }; static PyObject * Comp_getattr(compobject *self, char *name) { PyObject * retval; ENTER_ZLIB if (strcmp(name, "status") == 0) { Py_INCREF(self->status); retval = self->status; } else { retval = Py_FindMethod(comp_methods, (PyObject *)self, name); } LEAVE_ZLIB return retval; } static PyObject * Decomp_getattr(compobject *self, char *name) { PyObject * retval; ENTER_ZLIB if (strcmp(name, "unused_data") == 0) { Py_INCREF(self->unused_data); retval = self->unused_data; } else if (strcmp(name, "unconsumed_tail") == 0) { Py_INCREF(self->unconsumed_tail); retval = self->unconsumed_tail; } else if (strcmp(name, "status") == 0) { Py_INCREF(self->status); retval = self->status; } else { retval = Py_FindMethod(Decomp_methods, (PyObject *)self, name); } LEAVE_ZLIB return retval; } PyDoc_STRVAR(adler32__doc__, "adler32(string[, start]) -- Compute an Adler-32 checksum of string.\n" "\n" "An optional starting value can be specified. The returned checksum is\n" "a signed integer."); static PyObject * PyZlib_adler32(PyObject *self, PyObject *args) { unsigned int adler32val = 1; /* adler32(0L, Z_NULL, 0) */ Byte *buf; int len, signed_val; if (!PyArg_ParseTuple(args, "s#|I:adler32", &buf, &len, &adler32val)) return NULL; /* In Python 2.x we return a signed integer regardless of native platform * long size (the 32bit unsigned long is treated as 32-bit signed and sign * extended into a 64-bit long inside the integer object). 3.0 does the * right thing and returns unsigned. http://bugs.python.org/issue1202 */ signed_val = adler32(adler32val, buf, len); return PyInt_FromLong(signed_val); } PyDoc_STRVAR(crc32__doc__, "crc32(string[, start]) -- Compute a CRC-32 checksum of string.\n" "\n" "An optional starting value can be specified. The returned checksum is\n" "a signed integer."); static PyObject * PyZlib_crc32(PyObject *self, PyObject *args) { unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */ Byte *buf; int len, signed_val; if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &len, &crc32val)) return NULL; /* In Python 2.x we return a signed integer regardless of native platform * long size (the 32bit unsigned long is treated as 32-bit signed and sign * extended into a 64-bit long inside the integer object). 3.0 does the * right thing and returns unsigned. http://bugs.python.org/issue1202 */ signed_val = crc32(crc32val, buf, len); return PyInt_FromLong(signed_val); } static PyMethodDef zlib_methods[] = { {"adler32", (PyCFunction)PyZlib_adler32, METH_VARARGS, adler32__doc__}, {"compress", (PyCFunction)PyZlib_compress, METH_VARARGS, compress__doc__}, {"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS, compressobj__doc__}, {"crc32", (PyCFunction)PyZlib_crc32, METH_VARARGS, crc32__doc__}, {"decompress", (PyCFunction)PyZlib_decompress, METH_VARARGS, decompress__doc__}, {"decompressobj", (PyCFunction)PyZlib_decompressobj, METH_VARARGS, decompressobj__doc__}, {NULL, NULL} }; static PyTypeObject Comptype = { PyVarObject_HEAD_INIT(0, 0) "zlib.Compress", sizeof(compobject), 0, (destructor)Comp_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ (getattrfunc)Comp_getattr, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ }; static PyTypeObject Decomptype = { PyVarObject_HEAD_INIT(0, 0) "zlib.Decompress", sizeof(compobject), 0, (destructor)Decomp_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ (getattrfunc)Decomp_getattr, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ }; PyDoc_STRVAR(zlib_module_documentation, "The functions in this module allow compression and decompression using the\n" "zlib library, which is based on GNU zip.\n" "\n" "adler32(string[, start]) -- Compute an Adler-32 checksum.\n" "compress(string[, level]) -- Compress string, with compression level in 1-9.\n" "compressobj([level]) -- Return a compressor object.\n" "crc32(string[, start]) -- Compute a CRC-32 checksum.\n" "decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n" "decompressobj([wbits]) -- Return a decompressor object.\n" "\n" "'wbits' is window buffer size.\n" "Compressor objects support compress() and flush() methods; decompressor\n" "objects support decompress() and flush()."); PyMODINIT_FUNC PyInit_zlib(void) { PyObject *m, *ver; Py_TYPE(&Comptype) = &PyType_Type; Py_TYPE(&Decomptype) = &PyType_Type; m = Py_InitModule4("zlib", zlib_methods, zlib_module_documentation, (PyObject*)NULL,PYTHON_API_VERSION); if (m == NULL) return; ZlibError = PyErr_NewException("zlib.error", NULL, NULL); if (ZlibError != NULL) { Py_INCREF(ZlibError); PyModule_AddObject(m, "error", ZlibError); } PyModule_AddIntConstant(m, "MAX_WBITS", MAX_WBITS); PyModule_AddIntConstant(m, "DEFLATED", DEFLATED); PyModule_AddIntConstant(m, "DEF_MEM_LEVEL", DEF_MEM_LEVEL); PyModule_AddIntConstant(m, "Z_BEST_SPEED", Z_BEST_SPEED); PyModule_AddIntConstant(m, "Z_BEST_COMPRESSION", Z_BEST_COMPRESSION); PyModule_AddIntConstant(m, "Z_DEFAULT_COMPRESSION", Z_DEFAULT_COMPRESSION); PyModule_AddIntConstant(m, "Z_FILTERED", Z_FILTERED); PyModule_AddIntConstant(m, "Z_HUFFMAN_ONLY", Z_HUFFMAN_ONLY); PyModule_AddIntConstant(m, "Z_DEFAULT_STRATEGY", Z_DEFAULT_STRATEGY); PyModule_AddIntConstant(m, "Z_FINISH", Z_FINISH); PyModule_AddIntConstant(m, "Z_NO_FLUSH", Z_NO_FLUSH); PyModule_AddIntConstant(m, "Z_SYNC_FLUSH", Z_SYNC_FLUSH); PyModule_AddIntConstant(m, "Z_FULL_FLUSH", Z_FULL_FLUSH); // error codes PyModule_AddIntConstant(m, "Z_STATUS_UNSET", ~0); PyModule_AddIntConstant(m, "Z_OK", Z_OK); PyModule_AddIntConstant(m, "Z_STREAM_END", Z_STREAM_END); PyModule_AddIntConstant(m, "Z_NEED_DICT", Z_NEED_DICT); PyModule_AddIntConstant(m, "Z_ERRNO", Z_ERRNO); PyModule_AddIntConstant(m, "Z_STREAM_ERROR", Z_STREAM_ERROR); PyModule_AddIntConstant(m, "Z_DATA_ERROR", Z_DATA_ERROR); PyModule_AddIntConstant(m, "Z_MEM_ERROR", Z_MEM_ERROR); PyModule_AddIntConstant(m, "Z_BUF_ERROR", Z_BUF_ERROR); PyModule_AddIntConstant(m, "Z_VERSION_ERROR", Z_VERSION_ERROR); ver = PyString_FromString(ZLIB_VERSION); if (ver != NULL) PyModule_AddObject(m, "ZLIB_VERSION", ver); PyModule_AddStringConstant(m, "__version__", "1.0"); } async-0.6.1/mod/__init__.py0000644000175100017510000000000011466226142014563 0ustar byronbyronasync-0.6.1/pool.py0000644000175100017510000004257511466226142013245 0ustar byronbyron"""Implementation of a thread-pool working with channels""" from thread import ( WorkerThread, StopProcessing, ) from threading import Lock from util import ( AsyncQueue, DummyLock ) from Queue import ( Queue, Empty ) from graph import Graph from channel import ( mkchannel, ChannelWriter, Channel, SerialChannel, CallbackChannelReader ) import sys import weakref from time import sleep import new __all__ = ('PoolReader', 'Pool', 'ThreadPool') class PoolReader(CallbackChannelReader): """A reader designed to read from channels which take part in pools It acts like a handle to the underlying task in the pool.""" __slots__ = ('_task_ref', '_pool_ref') def __init__(self, channel, task, pool): CallbackChannelReader.__init__(self, channel) self._task_ref = weakref.ref(task) self._pool_ref = weakref.ref(pool) def __del__(self): """Assures that our task will be deleted if we were the last reader""" task = self._task_ref() if task is None: return pool = self._pool_ref() if pool is None: return # if this is the last reader to the wc we just handled, there # is no way anyone will ever read from the task again. If so, # delete the task in question, it will take care of itself and orphans # it might leave # 1 is ourselves, + 1 for the call + 1, and 3 magical ones which # I can't explain, but appears to be normal in the destructor # On the caller side, getrefcount returns 2, as expected # When just calling remove_task, # it has no way of knowing that the write channel is about to diminsh. # which is why we pass the info as a private kwarg - not nice, but # okay for now if sys.getrefcount(self) < 6: pool.remove_task(task, _from_destructor_ = True) # END handle refcount based removal of task #{ Internal def _read(self, count=0, block=True, timeout=None): return CallbackChannelReader.read(self, count, block, timeout) def pool_ref(self): """:return: reference to the pool we belong to""" return self._pool_ref def task_ref(self): """:return: reference to the task producing our items""" return self._task_ref #} END internal #{ Interface def task(self): """:return: task we read from :raise ValueError: If the instance is not attached to at task""" task = self._task_ref() if task is None: raise ValueError("PoolReader is not associated with at task anymore") return task def pool(self): """:return: pool our task belongs to :raise ValueError: if the instance does not belong to a pool""" pool = self._pool_ref() if pool is None: raise ValueError("PoolReader is not associated with a pool anymore") return pool #} END interface def read(self, count=0, block=True, timeout=None): """Read an item that was processed by one of our threads :note: Triggers task dependency handling needed to provide the necessary input""" # NOTE: we always queue the operation that would give us count items # as tracking the scheduled items or testing the channels size # is in herently unsafe depending on the design of the task network # If we put on tasks onto the queue for every request, we are sure # to always produce enough items, even if the task.min_count actually # provided enough - its better to have some possibly empty task runs # than having and empty queue that blocks. # if the user tries to use us to read from a done task, we will never # compute as all produced items are already in the channel task = self._task_ref() if task is None: return list() # END abort if task was deleted skip_compute = task.is_done() or task.error() ########## prepare ############################## if not skip_compute: self._pool_ref()._prepare_channel_read(task, count) # END prepare pool scheduling ####### read data ######## ########################## # read actual items, tasks were setup to put their output into our channel ( as well ) items = CallbackChannelReader.read(self, count, block, timeout) ########################## return items class Pool(object): """A thread pool maintains a set of one or more worker threads, but supports a fully serial mode in which case the amount of threads is zero. Work is distributed via Channels, which form a dependency graph. The evaluation is lazy, as work will only be done once an output is requested. The thread pools inherent issue is the global interpreter lock that it will hit, which gets worse considering a few c extensions specifically lock their part globally as well. The only way this will improve is if custom c extensions are written which do some bulk work, but release the GIL once they have acquired their resources. Due to the nature of having multiple objects in git, its easy to distribute that work cleanly among threads. :note: the current implementation returns channels which are meant to be used only from the main thread, hence you cannot consume their results from multiple threads unless you use a task for it.""" __slots__ = ( '_tasks', # a graph of tasks '_num_workers', # list of workers '_queue', # master queue for tasks '_taskorder_cache', # map task id -> ordered dependent tasks '_taskgraph_lock', # lock for accessing the task graph ) # CONFIGURATION # The type of worker to create - its expected to provide the Thread interface, # taking the taskqueue as only init argument # as well as a method called stop_and_join() to terminate it WorkerCls = None # The type of lock to use to protect critical sections, providing the # threading.Lock interface LockCls = None # the type of the task queue to use - it must provide the Queue interface TaskQueueCls = None def __init__(self, size=0): self._tasks = Graph() self._num_workers = 0 self._queue = self.TaskQueueCls() self._taskgraph_lock = self.LockCls() self._taskorder_cache = dict() self.set_size(size) def __del__(self): self.set_size(0) #{ Internal def _prepare_channel_read(self, task, count): """Process the tasks which depend on the given one to be sure the input channels are filled with data once we process the actual task Tasks have two important states: either they are done, or they are done and have an error, so they are likely not to have finished all their work. Either way, we will put them onto a list of tasks to delete them, providng information about the failed ones. Tasks which are not done will be put onto the queue for processing, which is fine as we walked them depth-first.""" # for the walk, we must make sure the ordering does not change. Even # when accessing the cache, as it is related to graph changes self._taskgraph_lock.acquire() try: try: dfirst_tasks = self._taskorder_cache[id(task)] except KeyError: # have to retrieve the list from the graph dfirst_tasks = self._tasks.input_inclusive_dfirst_reversed(task) self._taskorder_cache[id(task)] = dfirst_tasks # END handle cached order retrieval finally: self._taskgraph_lock.release() # END handle locking # check the min count on all involved tasks, and be sure that we don't # have any task which produces less than the maximum min-count of all tasks # The actual_count is used when chunking tasks up for the queue, whereas # the count is usued to determine whether we still have enough output # on the queue, checking qsize ( ->revise ) # ABTRACT: If T depends on T-1, and the client wants 1 item, T produces # at least 10, T-1 goes with 1, then T will block after 1 item, which # is read by the client. On the next read of 1 item, we would find T's # queue empty and put in another 10, which could put another thread into # blocking state. T-1 produces one more item, which is consumed right away # by the two threads running T. Although this works in the end, it leaves # many threads blocking and waiting for input, which is not desired. # Setting the min-count to the max of the mincount of all tasks assures # we have enough items for all. # Addition: in serial mode, we would enter a deadlock if one task would # ever wait for items ! actual_count = count min_counts = (((t.min_count is not None and t.min_count) or count) for t in dfirst_tasks) min_count = reduce(lambda m1, m2: max(m1, m2), min_counts) if 0 < count < min_count: actual_count = min_count # END set actual count # the list includes our tasks - the first one to evaluate first, the # requested one last for task in dfirst_tasks: # if task.error() or task.is_done(): # in theory, the should never be consumed task in the pool, right ? # They delete themselves once they are done. But as we run asynchronously, # It can be that someone reads, while a task realizes its done, and # we get here to prepare the read although it already is done. # Its not a problem though, the task wiill not do anything. # Hence we don't waste our time with checking for it # raise AssertionError("Shouldn't have consumed tasks on the pool, they delete themeselves, what happend ?") # END skip processing # but use the actual count to produce the output, we may produce # more than requested numchunks = 1 chunksize = actual_count remainder = 0 # we need the count set for this - can't chunk up unlimited items # In serial mode we could do this by checking for empty input channels, # but in dispatch mode its impossible ( == not easily possible ) # Only try it if we have enough demand if task.max_chunksize and actual_count > task.max_chunksize: numchunks = actual_count / task.max_chunksize chunksize = task.max_chunksize remainder = actual_count - (numchunks * chunksize) # END handle chunking # the following loops are kind of unrolled - code duplication # should make things execute faster. Putting the if statements # into the loop would be less code, but ... slower if self._num_workers: # respect the chunk size, and split the task up if we want # to process too much. This can be defined per task qput = self._queue.put if numchunks > 1: for i in xrange(numchunks): qput((task.process, chunksize)) # END for each chunk to put else: qput((task.process, chunksize)) # END try efficient looping if remainder: qput((task.process, remainder)) # END handle chunksize else: # no workers, so we have to do the work ourselves if numchunks > 1: for i in xrange(numchunks): task.process(chunksize) # END for each chunk to put else: task.process(chunksize) # END try efficient looping if remainder: task.process(remainder) # END handle chunksize # END handle serial mode # END for each task to process def _remove_task_if_orphaned(self, task, from_destructor): """Check the task, and delete it if it is orphaned""" # 1 for writer on task, 1 for the getrefcount call + 1 for each other writer/reader # If we are getting here from the destructor of an RPool channel, # its totally valid to virtually decrement the refcount by 1 as # we can expect it to drop once the destructor completes, which is when # we finish all recursive calls max_ref_count = 3 + from_destructor if sys.getrefcount(task.writer().channel) < max_ref_count: self.remove_task(task, from_destructor) #} END internal #{ Interface def size(self): """:return: amount of workers in the pool :note: method is not threadsafe !""" return self._num_workers def set_size(self, size=0): """Set the amount of workers to use in this pool. When reducing the size, threads will continue with their work until they are done before effectively being removed. :return: self :param size: if 0, the pool will do all work itself in the calling thread, otherwise the work will be distributed among the given amount of threads. If the size is 0, newly added tasks will use channels which are NOT threadsafe to optimize item throughput. :note: currently NOT threadsafe !""" assert size > -1, "Size cannot be negative" # either start new threads, or kill existing ones. # If we end up with no threads, we process the remaining chunks on the queue # ourselves cur_count = self._num_workers if cur_count < size: # we can safely increase the size, even from serial mode, as we would # only be able to do this if the serial ( sync ) mode finished processing. # Just adding more workers is not a problem at all. add_count = size - cur_count for i in range(add_count): self.WorkerCls(self._queue).start() # END for each new worker to create self._num_workers += add_count elif cur_count > size: # We don't care which thread exactly gets hit by our stop request # On their way, they will consume remaining tasks, but new ones # could be added as we speak. del_count = cur_count - size for i in range(del_count): self._queue.put((self.WorkerCls.stop, True)) # arg doesnt matter # END for each thread to stop self._num_workers -= del_count # END handle count if size == 0: # NOTE: we do not preocess any tasks still on the queue, as we ill # naturally do that once we read the next time, only on the tasks # that are actually required. The queue will keep the tasks, # and once we are deleted, they will vanish without additional # time spend on them. If there shouldn't be any consumers anyway. # If we should reenable some workers again, they will continue on the # remaining tasks, probably with nothing to do. # We can't clear the task queue if we have removed workers # as they will receive the termination signal through it, and if # we had added workers, we wouldn't be here ;). pass # END process queue return self def num_tasks(self): """:return: amount of tasks""" self._taskgraph_lock.acquire() try: return len(self._tasks.nodes) finally: self._taskgraph_lock.release() def remove_task(self, task, _from_destructor_ = False): """ Delete the task. Additionally we will remove orphaned tasks, which can be identified if their output channel is only held by themselves, so no one will ever consume its items. This method blocks until all tasks to be removed have been processed, if they are currently being processed. :return: self""" self._taskgraph_lock.acquire() try: # it can be that the task is already deleted, but its chunk was on the # queue until now, so its marked consumed again if not task in self._tasks.nodes: return self # END early abort # the task we are currently deleting could also be processed by # a thread right now. We don't care about it as its taking care about # its write channel itself, and sends everything it can to it. # For it it doesn't matter that its not part of our task graph anymore. # now delete our actual node - be sure its done to prevent further # processing in case there are still client reads on their way. task.set_done() # keep its input nodes as we check whether they were orphaned in_tasks = task.in_nodes self._tasks.remove_node(task) self._taskorder_cache.clear() finally: self._taskgraph_lock.release() # END locked deletion for t in in_tasks: self._remove_task_if_orphaned(t, _from_destructor_) # END handle orphans recursively return self def add_task(self, task): """Add a new task to be processed. :return: a read channel to retrieve processed items. If that handle is lost, the task will be considered orphaned and will be deleted on the next occasion.""" # create a write channel for it ctype = Channel # adjust the task with our pool ref, if it has the slot and is empty # For now, we don't allow tasks to be used in multiple pools, except # for by their channels if hasattr(task, 'pool'): their_pool = task.pool() if their_pool is None: task.set_pool(self) elif their_pool is not self: raise ValueError("Task %r is already registered to another pool" % task.id) # END handle pool exclusivity # END handle pool aware tasks self._taskgraph_lock.acquire() try: self._taskorder_cache.clear() self._tasks.add_node(task) # Use a non-threadsafe queue # This brings about 15% more performance, but sacrifices thread-safety if self.size() == 0: ctype = SerialChannel # END improve locks # setup the tasks channel - respect the task creators choice though # if it is set. wc = task.writer() ch = None if wc is None: ch = ctype() wc = ChannelWriter(ch) task.set_writer(wc) else: ch = wc.channel # END create write channel ifunset rc = PoolReader(ch, task, self) finally: self._taskgraph_lock.release() # END sync task addition # If the input channel is one of our read channels, we add the relation if hasattr(task, 'reader'): ic = task.reader() if hasattr(ic, 'pool_ref') and ic.pool_ref()() is self: self._taskgraph_lock.acquire() try: self._tasks.add_edge(ic._task_ref(), task) # additionally, bypass ourselves when reading from the # task, if possible if hasattr(ic, '_read'): task.set_read(ic._read) # END handle read bypass finally: self._taskgraph_lock.release() # END handle edge-adding # END add task relation # END handle input channels for connections return rc #} END interface class ThreadPool(Pool): """A pool using threads as worker""" WorkerCls = WorkerThread LockCls = Lock TaskQueueCls = AsyncQueue async-0.6.1/test/0000755000175100017510000000000011472205557012670 5ustar byronbyronasync-0.6.1/test/lib.py0000644000175100017510000000020111466226142013775 0ustar byronbyron"""Module with shared tools for testing""" import unittest class TestBase(unittest.TestCase): """Common base for all tests""" async-0.6.1/test/test_task.py0000644000175100017510000000034311466226142015237 0ustar byronbyron"""Channel testing""" from lib import * from async.util import * from async.task import * import time class TestTask(TestBase): max_threads = cpu_count() def test_iterator_task(self): # tested via test_pool pass async-0.6.1/test/test_thread.py0000644000175100017510000000172211466226142015546 0ustar byronbyron# -*- coding: utf-8 -*- """ Test thead classes and functions""" from lib import * from async.thread import * from Queue import Queue import time class TestWorker(WorkerThread): def __init__(self, *args, **kwargs): super(TestWorker, self).__init__(*args, **kwargs) self.reset() def fun(self, arg): self.called = True self.arg = arg return True def make_assertion(self): assert self.called assert self.arg self.reset() def reset(self): self.called = False self.arg = None class TestThreads(TestBase): @terminate_threads def test_worker_thread(self): worker = TestWorker() assert isinstance(worker.start(), WorkerThread) # test different method types standalone_func = lambda *args, **kwargs: worker.fun(*args, **kwargs) for function in (TestWorker.fun, worker.fun, standalone_func): worker.inq.put((function, 1)) time.sleep(0.01) worker.make_assertion() # END for each function type worker.stop_and_join() async-0.6.1/test/test_example.py0000644000175100017510000000207311466226142015732 0ustar byronbyron"""Module containing examples from the documentaiton""" from lib import * from async.pool import * from async.task import * from async.thread import terminate_threads class TestExamples(TestBase): @terminate_threads def test_usage(self): p = ThreadPool() # default size is 0, synchronous mode assert p.size() == 0 # now tasks would be processed asynchronously p.set_size(1) assert p.size() == 1 # A task performing processing on items from an iterator t = IteratorThreadTask(iter(range(10)), "power", lambda i: i*i) reader = p.add_task(t) # read all items - they where procesed by worker 1 items = reader.read() assert len(items) == 10 and items[0] == 0 and items[-1] == 81 # chaining t = IteratorThreadTask(iter(range(10)), "power", lambda i: i*i) reader = p.add_task(t) # chain both by linking their readers tmult = ChannelThreadTask(reader, "mult", lambda i: i*2) result_reader = p.add_task(tmult) # read all items = result_reader.read() assert len(items) == 10 and items[0] == 0 and items[-1] == 162 async-0.6.1/test/test_performance.py0000644000175100017510000000300211466226142016571 0ustar byronbyron"""Channel testing""" from lib import * from task import * from async.pool import * from async.thread import terminate_threads from async.util import cpu_count import time import sys class TestThreadPoolPerformance(TestBase): max_threads = cpu_count() def test_base(self): # create a dependency network, and see how the performance changes # when adjusting the amount of threads pool = ThreadPool(0) ni = 1000 # number of items to process print self.max_threads for num_threads in range(self.max_threads*2 + 1): pool.set_size(num_threads) for num_transformers in (1, 5, 10): for read_mode in range(2): ts, rcs = add_task_chain(pool, ni, count=num_transformers, feedercls=IteratorThreadTask, transformercls=TestPerformanceThreadTask, include_verifier=False) mode_info = "read(0)" if read_mode == 1: mode_info = "read(1) * %i" % ni # END mode info fmt = "Threadcount=%%i: Produced %%i items using %s in %%i transformations in %%f s (%%f items / s)" % mode_info reader = rcs[-1] st = time.time() if read_mode == 1: for i in xrange(ni): assert len(reader.read(1)) == 1 # END for each item to read else: assert len(reader.read(0)) == ni # END handle read mode elapsed = time.time() - st print >> sys.stderr, fmt % (num_threads, ni, num_transformers, elapsed, ni / elapsed) # END for each read-mode # END for each amount of processors # END for each thread count async-0.6.1/test/test_pool.py0000644000175100017510000003646711466226142015266 0ustar byronbyron"""Pool testing""" from lib import * from task import * from async.pool import * from async.thread import terminate_threads from async.util import cpu_count import threading import weakref import time import sys class TestThreadPool(TestBase): max_threads = cpu_count() def _assert_single_task(self, p, async=False): """Performs testing in a synchronized environment""" print >> sys.stderr, "Threadpool: Starting single task (async = %i) with %i threads" % (async, p.size()) null_tasks = p.num_tasks() # in case we had some before # add a simple task # it iterates n items ni = 1000 assert ni % 2 == 0, "ni needs to be dividable by 2" assert ni % 4 == 0, "ni needs to be dividable by 4" make_task = lambda *args, **kwargs: make_iterator_task(ni, *args, **kwargs) task = make_task() assert p.num_tasks() == null_tasks rc = p.add_task(task) assert p.num_tasks() == 1 + null_tasks assert isinstance(rc, PoolReader) assert task._out_writer is not None # pull the result completely - we should get one task, which calls its # function once. In sync mode, the order matches print "read(0)" items = rc.read() assert len(items) == ni task._assert(1, ni) if not async: assert items[0] == 0 and items[-1] == ni-1 # as the task is done, it should have been removed - we have read everything assert task.is_done() del(rc) assert p.num_tasks() == null_tasks task = make_task() # pull individual items rc = p.add_task(task) assert p.num_tasks() == 1 + null_tasks st = time.time() print "read(1) * %i" % ni for i in range(ni): items = rc.read(1) assert len(items) == 1 # can't assert order in async mode if not async: assert i == items[0] # END for each item elapsed = time.time() - st print >> sys.stderr, "Threadpool: processed %i individual items, with %i threads, one at a time, in %f s ( %f items / s )" % (ni, p.size(), elapsed, ni / elapsed) # it couldn't yet notice that the input is depleted as we pulled exaclty # ni items - the next one would remove it. Instead, we delete our channel # which triggers orphan handling assert not task.is_done() assert p.num_tasks() == 1 + null_tasks del(rc) assert p.num_tasks() == null_tasks # test min count # if we query 1 item, it will prepare ni / 2 task = make_task() task.min_count = ni / 2 rc = p.add_task(task) print "read(1)" items = rc.read(1) assert len(items) == 1 and items[0] == 0 # processes ni / 2 print "read(1)" items = rc.read(1) assert len(items) == 1 and items[0] == 1 # processes nothing # rest - it has ni/2 - 2 on the queue, and pulls ni-2 # It wants too much, so the task realizes its done. The task # doesn't care about the items in its output channel nri = ni-2 print "read(%i)" % nri items = rc.read(nri) assert len(items) == nri p.remove_task(task) assert p.num_tasks() == null_tasks task._assert(2, ni) # two chunks, ni calls # its already done, gives us no more, its still okay to use it though # as a task doesn't have to be in the graph to allow reading its produced # items print "read(0) on closed" # it can happen that a thread closes the channel just a tiny fraction of time # after we check this, so the test fails, although it is nearly closed. # When we start reading, we should wake up once it sends its signal # assert task.is_closed() assert len(rc.read()) == 0 # test chunking # we always want 4 chunks, these could go to individual nodes task = make_task() task.min_count = ni / 2 # restore previous value task.max_chunksize = ni / 4 # 4 chunks rc = p.add_task(task) # must read a specific item count # count is still at ni / 2 - here we want more than that # 2 steps with n / 4 items, + 1 step with n/4 items to get + 2 nri = ni / 2 + 2 print "read(%i) chunksize set" % nri items = rc.read(nri) assert len(items) == nri # have n / 4 - 2 items on queue, want n / 4 in first chunk, cause 1 processing # ( 4 in total ). Still want n / 4 - 2 in second chunk, causing another processing nri = ni / 2 - 2 print "read(%i) chunksize set" % nri items = rc.read(nri) assert len(items) == nri task._assert( 5, ni) # delete the handle first, causing the task to be removed and to be set # done. We check for the set-done state later. Depending on the timing, # The task is not yet set done when we are checking it because we were # scheduled in before the flag could be set. del(rc) assert task.is_done() assert p.num_tasks() == null_tasks # depleted # but this only hits if we want too many items, if we want less, it could # still do too much - hence we set the min_count to the same number to enforce # at least ni / 4 items to be preocessed, no matter what we request task = make_task() task.min_count = None task.max_chunksize = ni / 4 # match previous setup rc = p.add_task(task) st = time.time() print "read(1) * %i, chunksize set" % ni for i in range(ni): if async: assert len(rc.read(1)) == 1 else: assert rc.read(1)[0] == i # END handle async mode # END pull individual items # too many processing counts ;) elapsed = time.time() - st print >> sys.stderr, "Threadpool: processed %i individual items in chunks of %i, with %i threads, one at a time, in %f s ( %f items / s )" % (ni, ni/4, p.size(), elapsed, ni / elapsed) task._assert(ni, ni) assert p.num_tasks() == 1 + null_tasks assert p.remove_task(task) is p # del manually this time assert p.num_tasks() == null_tasks # now with we set the minimum count to reduce the number of processing counts task = make_task() task.min_count = ni / 4 task.max_chunksize = ni / 4 # match previous setup rc = p.add_task(task) print "read(1) * %i, min_count%i + chunksize" % (ni, task.min_count) for i in range(ni): items = rc.read(1) assert len(items) == 1 if not async: assert items[0] == i # END for each item task._assert(ni / task.min_count, ni) del(rc) assert p.num_tasks() == null_tasks # test failure # on failure, the processing stops and the task is finished, keeping # his error for later task = make_task() task.should_fail = True rc = p.add_task(task) print "read(0) with failure" assert len(rc.read()) == 0 # failure on first item assert isinstance(task.error(), AssertionError) assert task.is_done() # on error, its marked done as well del(rc) assert p.num_tasks() == null_tasks # test failure after ni / 2 items # This makes sure it correctly closes the channel on failure to prevent blocking nri = ni/2 task = make_task(TestFailureThreadTask, fail_after=ni/2) rc = p.add_task(task) assert len(rc.read()) == nri assert task.is_done() assert isinstance(task.error(), AssertionError) print >> sys.stderr, "done with everything" def _assert_async_dependent_tasks(self, pool): # includes failure in center task, 'recursive' orphan cleanup # This will also verify that the channel-close mechanism works # t1 -> t2 -> t3 print >> sys.stderr, "Threadpool: starting async dependency test in %i threads" % pool.size() null_tasks = pool.num_tasks() ni = 1000 count = 3 aic = count + 2 make_task = lambda *args, **kwargs: add_task_chain(pool, ni, count, *args, **kwargs) ts, rcs = make_task() assert len(ts) == aic assert len(rcs) == aic assert pool.num_tasks() == null_tasks + len(ts) # read(0) ######### st = time.time() items = rcs[-1].read() elapsed = time.time() - st print len(items), ni assert len(items) == ni del(rcs) assert pool.num_tasks() == 0 # tasks depleted, all done, no handles # wait a tiny moment - there could still be something unprocessed on the # queue, increasing the refcount time.sleep(0.15) assert sys.getrefcount(ts[-1]) == 2 # ts + call assert sys.getrefcount(ts[0]) == 2 # ts + call print >> sys.stderr, "Dependent Tasks: evaluated %i items of %i dependent in %f s ( %i items / s )" % (ni, aic, elapsed, ni / elapsed) # read(1) ######### ts, rcs = make_task() st = time.time() for i in xrange(ni): items = rcs[-1].read(1) assert len(items) == 1 # END for each item to pull elapsed_single = time.time() - st # another read yields nothing, its empty assert len(rcs[-1].read()) == 0 print >> sys.stderr, "Dependent Tasks: evaluated %i items with read(1) of %i dependent in %f s ( %i items / s )" % (ni, aic, elapsed_single, ni / elapsed_single) # read with min-count size ########################### # must be faster, as it will read ni / 4 chunks # Its enough to set one task, as it will force all others in the chain # to min_size as well. ts, rcs = make_task() assert pool.num_tasks() == len(ts) nri = ni / 4 ts[-1].min_count = nri st = time.time() for i in xrange(ni): items = rcs[-1].read(1) assert len(items) == 1 # END for each item to read elapsed_minsize = time.time() - st # its empty assert len(rcs[-1].read()) == 0 print >> sys.stderr, "Dependent Tasks: evaluated %i items with read(1), min_size=%i, of %i dependent in %f s ( %i items / s )" % (ni, nri, aic, elapsed_minsize, ni / elapsed_minsize) # it should have been a bit faster at least, and most of the time it is # Sometimes, its not, mainly because: # * The test tasks lock a lot, hence they slow down the system # * Each read will still trigger the pool to evaluate, causing some overhead # even though there are enough items on the queue in that case. Keeping # track of the scheduled items helped there, but it caused further inacceptable # slowdown # assert elapsed_minsize < elapsed_single # read with failure ################### # it should recover and give at least fail_after items # t1 -> x -> t3 fail_after = ni/2 ts, rcs = make_task(fail_setup=[(0, fail_after)]) items = rcs[-1].read() assert len(items) == fail_after # MULTI-POOL # If two pools are connected, this shold work as well. # The second one has just one more thread ts, rcs = make_task() # connect verifier channel as feeder of the second pool p2 = ThreadPool(0) # don't spawn new threads, they have the tendency not to wake up on mutexes assert p2.size() == 0 p2ts, p2rcs = add_task_chain(p2, ni, count, feeder_channel=rcs[-1], id_offset=count) assert p2ts[0] is None # we have no feeder task assert rcs[-1].pool_ref()() is pool # it didnt change the pool assert rcs[-1] is p2ts[1].reader() assert p2.num_tasks() == len(p2ts)-1 # first is None # reading from the last one will evaluate all pools correctly print "read(0) multi-pool" st = time.time() items = p2rcs[-1].read() elapsed = time.time() - st assert len(items) == ni print >> sys.stderr, "Dependent Tasks: evaluated 2 connected pools and %i items with read(0), of %i dependent tasks in %f s ( %i items / s )" % (ni, aic + aic-1, elapsed, ni / elapsed) # loose the handles of the second pool to allow others to go as well del(p2rcs); del(p2ts) assert p2.num_tasks() == 0 # now we lost our old handles as well, and the tasks go away ts, rcs = make_task() assert pool.num_tasks() == len(ts) p2ts, p2rcs = add_task_chain(p2, ni, count, feeder_channel=rcs[-1], id_offset=count) assert p2.num_tasks() == len(p2ts) - 1 # Test multi-read(1) print "read(1) * %i" % ni reader = rcs[-1] st = time.time() for i in xrange(ni): items = reader.read(1) assert len(items) == 1 # END for each item to get elapsed = time.time() - st del(reader) # decrement refcount print >> sys.stderr, "Dependent Tasks: evaluated 2 connected pools and %i items with read(1), of %i dependent tasks in %f s ( %i items / s )" % (ni, aic + aic-1, elapsed, ni / elapsed) # another read is empty assert len(rcs[-1].read()) == 0 # now that both are connected, I can drop my handle to the reader # without affecting the task-count, but whats more important: # They remove their tasks correctly once we drop our references in the # right order del(p2ts) assert p2rcs[0] is rcs[-1] del(p2rcs) assert p2.num_tasks() == 0 del(p2) assert pool.num_tasks() == null_tasks + len(ts) del(ts) del(rcs) assert pool.num_tasks() == null_tasks # ASSERTION: We already tested that one pool behaves correctly when an error # occours - if two pools handle their ref-counts correctly, which they # do if we are here, then they should handle errors happening during # the task processing as expected as well. Hence we can safe this here @terminate_threads def test_base(self): max_wait_attempts = 3 sleep_time = 0.1 for mc in range(max_wait_attempts): # wait for threads to die if len(threading.enumerate()) != 1: time.sleep(sleep_time) # END for each attempt assert len(threading.enumerate()) == 1, "Waited %f s for threads to die, its still alive" % (max_wait_attempts, sleep_time) p = ThreadPool() # default pools have no workers assert p.size() == 0 # increase and decrease the size num_threads = len(threading.enumerate()) for i in range(self.max_threads): p.set_size(i) assert p.size() == i assert len(threading.enumerate()) == num_threads + i for i in range(self.max_threads, -1, -1): p.set_size(i) assert p.size() == i assert p.size() == 0 # threads should be killed already, but we let them a tiny amount of time # just to be sure time.sleep(0.05) assert len(threading.enumerate()) == num_threads # SINGLE TASK SERIAL SYNC MODE ############################## # put a few unrelated tasks that we forget about - check ref counts and cleanup t1, t2 = TestThreadTask(iter(list()), "nothing1", None), TestThreadTask(iter(list()), "nothing2", None) urc1 = p.add_task(t1) urc2 = p.add_task(t2) assert p.num_tasks() == 2 # test pool reader assert urc1.pool_ref()() is p assert urc1.task_ref()() is t1 assert urc1.pool() == p assert urc1.task() == t1 ## SINGLE TASK ################# self._assert_single_task(p, False) assert p.num_tasks() == 2 del(urc1) assert p.num_tasks() == 1 p.remove_task(t2) assert p.num_tasks() == 0 assert sys.getrefcount(t2) == 2 t3 = TestChannelThreadTask(urc2, "channel", None) urc3 = p.add_task(t3) assert p.num_tasks() == 1 del(urc3) assert p.num_tasks() == 0 assert sys.getrefcount(t3) == 2 # DEPENDENT TASKS SYNC MODE ########################### self._assert_async_dependent_tasks(p) # SINGLE TASK THREADED ASYNC MODE ( 1 thread ) ############################################## # step one gear up - just one thread for now. p.set_size(1) assert p.size() == 1 assert len(threading.enumerate()) == num_threads + 1 # deleting the pool stops its threads - just to be sure ;) # Its not synchronized, hence we wait a moment del(p) time.sleep(0.05) assert len(threading.enumerate()) == num_threads p = ThreadPool(1) assert len(threading.enumerate()) == num_threads + 1 # here we go self._assert_single_task(p, True) # SINGLE TASK ASYNC MODE ( 2 threads ) ###################################### # two threads to compete for a single task p.set_size(2) self._assert_single_task(p, True) # real stress test- should be native on every dual-core cpu with 2 hardware # threads per core p.set_size(4) self._assert_single_task(p, True) # DEPENDENT TASK ASYNC MODE ########################### self._assert_async_dependent_tasks(p) print >> sys.stderr, "Done with everything" async-0.6.1/test/task.py0000644000175100017510000001400311466226142014176 0ustar byronbyron"""Module containing task implementations useful for testing them""" from async.task import * import threading import weakref class _TestTaskBase(object): """Note: causes great slowdown due to the required locking of task variables""" def __init__(self, *args, **kwargs): super(_TestTaskBase, self).__init__(*args, **kwargs) self.should_fail = False self.lock = threading.Lock() # yes, can't safely do x = x + 1 :) self.plock = threading.Lock() self.item_count = 0 self.process_count = 0 def do_fun(self, item): self.lock.acquire() self.item_count += 1 self.lock.release() if self.should_fail: raise AssertionError("I am failing just for the fun of it") return item def process(self, count=1): # must do it first, otherwise we might read and check results before # the thread gets here :). Its a lesson ! self.plock.acquire() self.process_count += 1 self.plock.release() super(_TestTaskBase, self).process(count) def _assert(self, pc, fc, check_scheduled=False): """Assert for num process counts (pc) and num function counts (fc) :return: self""" self.lock.acquire() if self.item_count != fc: print self.item_count, fc assert self.item_count == fc self.lock.release() # NOTE: asserting num-writers fails every now and then, implying a thread is # still processing (an empty chunk) when we are checking it. This can # only be prevented by checking the scheduled items, which requires locking # and causes slowdows, so we don't do that. If the num_writers # counter wouldn't be maintained properly, more tests would fail, so # we can safely refrain from checking this here # self._wlock.acquire() # assert self._num_writers == 0 # self._wlock.release() return self class TestThreadTask(_TestTaskBase, IteratorThreadTask): pass class TestFailureThreadTask(TestThreadTask): """Fails after X items""" def __init__(self, *args, **kwargs): self.fail_after = kwargs.pop('fail_after') super(TestFailureThreadTask, self).__init__(*args, **kwargs) def do_fun(self, item): item = TestThreadTask.do_fun(self, item) self.lock.acquire() try: if self.item_count > self.fail_after: raise AssertionError("Simulated failure after processing %i items" % self.fail_after) finally: self.lock.release() # END handle fail after return item class TestChannelThreadTask(_TestTaskBase, ChannelThreadTask): """Apply a transformation on items read from an input channel""" def __init__(self, *args, **kwargs): self.fail_after = kwargs.pop('fail_after', 0) super(TestChannelThreadTask, self).__init__(*args, **kwargs) def do_fun(self, item): """return tuple(i, i*2)""" item = super(TestChannelThreadTask, self).do_fun(item) # fail after support if self.fail_after: self.lock.acquire() try: if self.item_count > self.fail_after: raise AssertionError("Simulated failure after processing %i items" % self.fail_after) finally: self.lock.release() # END handle fail-after if isinstance(item, tuple): i = item[0] return item + (i * self.id, ) else: return (item, item * self.id) # END handle tuple class TestPerformanceThreadTask(ChannelThreadTask): """Applies no operation to the item, and does not lock, measuring the actual throughput of the system""" def do_fun(self, item): return item class TestVerifyChannelThreadTask(_TestTaskBase, ChannelThreadTask): """An input channel task, which verifies the result of its input channels, should be last in the chain. Id must be int""" def do_fun(self, item): """return tuple(i, i*2)""" item = super(TestVerifyChannelThreadTask, self).do_fun(item) # make sure the computation order matches assert isinstance(item, tuple), "input was no tuple: %s" % item base = item[0] for id, num in enumerate(item[1:]): assert num == base * id, "%i != %i, orig = %s" % (num, base * id, str(item)) # END verify order return item #{ Utilities def make_proxy_method(t): """required to prevent binding self into the method we call""" wt = weakref.proxy(t) return lambda item: wt.do_fun(item) def add_task_chain(p, ni, count=1, fail_setup=list(), feeder_channel=None, id_offset=0, feedercls=TestThreadTask, transformercls=TestChannelThreadTask, include_verifier=True): """Create a task chain of feeder, count transformers and order verifcator to the pool p, like t1 -> t2 -> t3 :param fail_setup: a list of pairs, task_id, fail_after, i.e. [(2, 20)] would make the third transformer fail after 20 items :param feeder_channel: if set to a channel, it will be used as input of the first transformation task. The respective first task in the return value will be None. :param id_offset: defines the id of the first transformation task, all subsequent ones will add one :return: tuple(list(task1, taskN, ...), list(rc1, rcN, ...))""" nt = p.num_tasks() feeder = None frc = feeder_channel if feeder_channel is None: feeder = make_iterator_task(ni, taskcls=feedercls) frc = p.add_task(feeder) # END handle specific feeder rcs = [frc] tasks = [feeder] inrc = frc for tc in xrange(count): t = transformercls(inrc, tc+id_offset, None) t.fun = make_proxy_method(t) #t.fun = t.do_fun inrc = p.add_task(t) tasks.append(t) rcs.append(inrc) # END create count transformers # setup failure for id, fail_after in fail_setup: tasks[1+id].fail_after = fail_after # END setup failure if include_verifier: verifier = TestVerifyChannelThreadTask(inrc, 'verifier', None) #verifier.fun = verifier.do_fun verifier.fun = make_proxy_method(verifier) vrc = p.add_task(verifier) tasks.append(verifier) rcs.append(vrc) # END handle include verifier return tasks, rcs def make_iterator_task(ni, taskcls=TestThreadTask, **kwargs): """:return: task which yields ni items :param taskcls: the actual iterator type to use :param kwargs: additional kwargs to be passed to the task""" t = taskcls(iter(range(ni)), 'iterator', None, **kwargs) if isinstance(t, _TestTaskBase): t.fun = make_proxy_method(t) return t #} END utilities async-0.6.1/test/mod/0000755000175100017510000000000011472205557013447 5ustar byronbyronasync-0.6.1/test/mod/test_zlib.py0000644000175100017510000000354311466226142016021 0ustar byronbyron"""ZLib module testing""" from async.test.lib import * import async.mod.zlib as zlib import sys import struct class TestZLib(TestBase): def test_constants(self): # check constants assert zlib.Z_STATUS_UNSET == ~0 assert hasattr(zlib, "Z_OK") assert hasattr(zlib, "Z_STREAM_END") assert hasattr(zlib, "Z_NEED_DICT") assert hasattr(zlib, "Z_ERRNO") assert hasattr(zlib, "Z_STREAM_ERROR") assert hasattr(zlib, "Z_DATA_ERROR") assert hasattr(zlib, "Z_MEM_ERROR") assert hasattr(zlib, "Z_BUF_ERROR") assert hasattr(zlib, "Z_VERSION_ERROR") def test_status(self): # test the newly introduced status code data = struct.pack(">L", (1<<31) + (1<<15) + (1<<2)) assert len(data) == 4 # compress cobj = zlib.compressobj(zlib.Z_BEST_SPEED) assert cobj.status == zlib.Z_STATUS_UNSET cchunk = '' for c in data: cchunk += cobj.compress(c) assert cobj.status == zlib.Z_OK # END for each databyte # its not yet done, but soon it will cchunk += cobj.flush() assert cobj.status == zlib.Z_STREAM_END # zip should have added a few bytes of info assert len(cchunk) > len(data) # decompress - need status to determine decompession finished dcobj = zlib.decompressobj() idata = '' # inflated data for i, c in enumerate(cchunk): idata += dcobj.decompress(c) assert dcobj.status == zlib.Z_OK # break if we have it if len(idata) == len(data): break # END for each character assert idata == data # we should still have some bytes left assert i < len(cchunk) - 1 # feed the remaining data, we don't expect to decompress anything, but # want to see the status change while dcobj.status == zlib.Z_OK: i += 1 assert len(dcobj.decompress(cchunk[i])) == 0 # END deplete compressed stream # now we are done assert dcobj.status == zlib.Z_STREAM_END assert i == len(cchunk) - 1 async-0.6.1/test/mod/__init__.py0000644000175100017510000000000011466226142015542 0ustar byronbyronasync-0.6.1/test/test_channel.py0000644000175100017510000000470111466226142015707 0ustar byronbyron"""Channel testing""" from lib import * from async.channel import * import time class TestChannels(TestBase): def test_base(self): # creating channel yields a write and a read channal wc, rc = mkchannel() assert isinstance(wc, ChannelWriter) # default args assert isinstance(rc, ChannelReader) # TEST UNLIMITED SIZE CHANNEL - writing+reading is FIFO item = 1 item2 = 2 wc.write(item) wc.write(item2) # read all - it blocks as its still open for writing to = 0.2 st = time.time() assert rc.read(timeout=to) == [item, item2] assert time.time() - st >= to # next read blocks. it waits a second st = time.time() assert len(rc.read(1, True, to)) == 0 assert time.time() - st >= to # writing to a closed channel raises assert not wc.closed() wc.close() assert wc.closed() wc.close() # fine assert wc.closed() self.failUnlessRaises(ReadOnly, wc.write, 1) # reading from a closed channel never blocks assert len(rc.read()) == 0 assert len(rc.read(5)) == 0 assert len(rc.read(1)) == 0 # test callback channels wc, rc = mkchannel(wtype = CallbackChannelWriter, rtype = CallbackChannelReader) cb = [0, 0, 0] # set slots to one if called def pre_write(item): cb[0] = 1 return item + 1 def pre_read(count): cb[1] = 1 def post_read(items): assert isinstance(items, list) cb[2] = 1 return [ i+1 for i in items] # set, verify it returns previous one assert wc.set_pre_cb(pre_write) is None assert rc.set_pre_cb(pre_read) is None assert rc.set_post_cb(post_read) is None assert wc.set_pre_cb(pre_write) is pre_write assert rc.set_pre_cb(pre_read) is pre_read assert rc.set_post_cb(post_read) is post_read # writer transforms input val = 5 wc.write(val) assert cb[0] == 1 and cb[1] == 0 rval = rc.read(1)[0] # read one item, must not block assert cb[0] == 1 and cb[1] == 1 and cb[2] == 1 assert rval == val + 1 + 1 # ITERATOR READER reader = IteratorReader(iter(range(10))) assert len(reader.read(2)) == 2 assert len(reader.read(0)) == 8 # its empty now assert len(reader.read(0)) == 0 assert len(reader.read(5)) == 0 # doesn't work if item is not an iterator self.failUnlessRaises(ValueError, IteratorReader, list()) # test general read-iteration - its supported by all readers reader = IteratorReader(iter(range(10))) assert len(list(reader)) == 10 # NOTE: its thread-safety is tested by the pool async-0.6.1/test/__init__.py0000644000175100017510000000000011466226142014763 0ustar byronbyronasync-0.6.1/test/test_graph.py0000644000175100017510000000423511466226142015402 0ustar byronbyron"""Channel testing""" from lib import * from async.graph import * import time import sys class TestGraph(TestBase): def test_base(self): g = Graph() nn = 10 assert nn > 2, "need at least 3 nodes" # add unconnected nodes for i in range(nn): assert isinstance(g.add_node(Node()), Node) # END add nodes assert len(g.nodes) == nn # delete unconnected nodes for n in g.nodes[:]: g.remove_node(n) # END del nodes # add a chain of connected nodes last = None for i in range(nn): n = g.add_node(Node(i)) if last: assert not last.out_nodes assert not n.in_nodes assert g.add_edge(last, n) is g assert last.out_nodes[0] is n assert n.in_nodes[0] is last last = n # END for each node to connect # try to connect a node with itself self.failUnlessRaises(ValueError, g.add_edge, last, last) # try to create a cycle self.failUnlessRaises(ValueError, g.add_edge, g.nodes[0], g.nodes[-1]) self.failUnlessRaises(ValueError, g.add_edge, g.nodes[-1], g.nodes[0]) # we have undirected edges, readding the same edge, but the other way # around does not change anything n1, n2, n3 = g.nodes[0], g.nodes[1], g.nodes[2] g.add_edge(n1, n2) # already connected g.add_edge(n2, n1) # same thing assert len(n1.out_nodes) == 1 assert len(n1.in_nodes) == 0 assert len(n2.in_nodes) == 1 assert len(n2.out_nodes) == 1 # deleting a connected node clears its neighbour connections assert n3.in_nodes[0] is n2 assert g.remove_node(n2) is g assert g.remove_node(n2) is g # multi-deletion okay assert len(g.nodes) == nn - 1 assert len(n3.in_nodes) == 0 assert len(n1.out_nodes) == 0 # check the history from the last node end = g.nodes[-1] dfirst_nodes = g.input_inclusive_dfirst_reversed(end) num_nodes_seen = nn - 2 # deleted second, which leaves first one disconnected assert len(dfirst_nodes) == num_nodes_seen assert dfirst_nodes[-1] == end and dfirst_nodes[-2].id == end.id-1 # test cleanup # its at least kept by its graph assert sys.getrefcount(end) > 3 del(g) del(n1); del(n2); del(n3) del(dfirst_nodes) del(last) del(n) assert sys.getrefcount(end) == 2 async-0.6.1/setup.py0000755000175100017510000000553211472205243013423 0ustar byronbyron#!/usr/bin/env python from distutils.core import setup, Extension from distutils.command.build_py import build_py from distutils.command.build_ext import build_ext import os, sys # wow, this is a mixed bag ... I am pretty upset about all of this ... setuptools_build_py_module = None try: # don't pull it in if we don't have to if 'setuptools' in sys.modules: import setuptools.command.build_py as setuptools_build_py_module from setuptools.command.build_ext import build_ext except ImportError: pass class build_ext_nofail(build_ext): """Doesn't fail when build our optional extensions""" def run(self): try: build_ext.run(self) except Exception: print "Ignored failure when building extensions, pure python modules will be used instead" # END ignore errors def get_data_files(self): """Can you feel the pain ? So, in python2.5 and python2.4 coming with maya, the line dealing with the ``plen`` has a bug which causes it to truncate too much. It is fixed in the system interpreters as they receive patches, and shows how bad it is if something doesn't have proper unittests. The code here is a plain copy of the python2.6 version which works for all. Generate list of '(package,src_dir,build_dir,filenames)' tuples""" data = [] if not self.packages: return data # this one is just for the setup tools ! They don't iniitlialize this variable # when they should, but do it on demand using this method.Its crazy if hasattr(self, 'analyze_manifest'): self.analyze_manifest() # END handle setuptools ... for package in self.packages: # Locate package source directory src_dir = self.get_package_dir(package) # Compute package build directory build_dir = os.path.join(*([self.build_lib] + package.split('.'))) # Length of path to strip from found files plen = 0 if src_dir: plen = len(src_dir)+1 # Strip directory from globbed filenames filenames = [ file[plen:] for file in self.find_data_files(package, src_dir) ] data.append((package, src_dir, build_dir, filenames)) return data build_py.get_data_files = get_data_files if setuptools_build_py_module: setuptools_build_py_module.build_py._get_data_files = get_data_files # END apply setuptools patch too setup(cmdclass={'build_ext':build_ext_nofail}, name = "async", version = "0.6.1", description = "Async Framework", author = "Sebastian Thiel", author_email = "byronimo@gmail.com", url = "http://gitorious.org/git-python/async", packages = ('async', 'async.mod', 'async.test', 'async.test.mod'), package_data={'async' : ['AUTHORS', 'README']}, package_dir = {'async':''}, ext_modules=[Extension('async.mod.zlib', ['mod/zlibmodule.c'])], license = "BSD License", zip_safe=False, long_description = """Async is a framework to process interdependent tasks in a pool of workers""" ) async-0.6.1/thread.py0000644000175100017510000001412511467275121013533 0ustar byronbyron# -*- coding: utf-8 -*- """Module with threading utilities""" __docformat__ = "restructuredtext" import threading import inspect import Queue import sys __all__ = ('do_terminate_threads', 'terminate_threads', 'TerminatableThread', 'WorkerThread') #{ Decorators def do_terminate_threads(whitelist=list()): """Simple function which terminates all of our threads :param whitelist: If whitelist is given, only the given threads will be terminated""" for t in threading.enumerate(): if not isinstance(t, TerminatableThread): continue if whitelist and t not in whitelist: continue t.schedule_termination() t.stop_and_join() # END for each thread def terminate_threads( func ): """Kills all worker threads the method has created by sending the quit signal. This takes over in case of an error in the main function""" def wrapper(*args, **kwargs): cur_threads = set(threading.enumerate()) try: return func(*args, **kwargs) finally: do_terminate_threads(set(threading.enumerate()) - cur_threads) # END finally shutdown threads # END wrapper wrapper.__name__ = func.__name__ return wrapper #} END decorators #{ Classes class TerminatableThread(threading.Thread): """A simple thread able to terminate itself on behalf of the user. Terminate a thread as follows: t.stop_and_join() Derived classes call _should_terminate() to determine whether they should abort gracefully """ __slots__ = '_terminate' def __init__(self): super(TerminatableThread, self).__init__() self._terminate = False #{ Subclass Interface def _should_terminate(self): """:return: True if this thread should terminate its operation immediately""" return self._terminate def _terminated(self): """Called once the thread terminated. Its called in the main thread and may perform cleanup operations""" pass def start(self): """Start the thread and return self""" super(TerminatableThread, self).start() return self #} END subclass interface #{ Interface def schedule_termination(self): """Schedule this thread to be terminated as soon as possible. :note: this method does not block.""" self._terminate = True def stop_and_join(self): """Ask the thread to stop its operation and wait for it to terminate :note: Depending on the implenetation, this might block a moment""" self._terminate = True self.join() self._terminated() #} END interface class StopProcessing(Exception): """If thrown in a function processed by a WorkerThread, it will terminate""" class WorkerThread(TerminatableThread): """ This base allows to call functions on class instances natively. As it is meant to work with a pool, the result of the call must be handled by the callee. The thread runs forever unless it receives the terminate signal using its task queue. Tasks could be anything, but should usually be class methods and arguments to allow the following: inq = Queue() w = WorkerThread(inq) w.start() inq.put((WorkerThread., args, kwargs)) finally we call quit to terminate asap. alternatively, you can make a call more intuitively - the output is the output queue allowing you to get the result right away or later w.call(arg, kwarg='value').get() inq.put(WorkerThread.quit) w.join() You may provide the following tuples as task: t[0] = class method, function or instance method t[1] = optional, tuple or list of arguments to pass to the routine t[2] = optional, dictionary of keyword arguments to pass to the routine """ __slots__ = ('inq') # define how often we should check for a shutdown request in case our # taskqueue is empty shutdown_check_time_s = 0.5 def __init__(self, inq = None): super(WorkerThread, self).__init__() self.inq = inq if inq is None: self.inq = Queue.Queue() @classmethod def stop(cls, *args): """If send via the inq of the thread, it will stop once it processed the function""" raise StopProcessing def run(self): """Process input tasks until we receive the quit signal""" gettask = self.inq.get while True: if self._should_terminate(): break # END check for stop request # note: during shutdown, this turns None in the middle of waiting # for an item to be put onto it - we can't du anything about it - # even if we catch everything and break gracefully, the parent # call will think we failed with an empty exception. # Hence we just don't do anything about it. Alternatively # we could override the start method to get our own bootstrapping, # which would mean repeating plenty of code in of the threading module. tasktuple = gettask() # needing exactly one function, and one arg routine, arg = tasktuple try: try: rval = None if inspect.ismethod(routine): if routine.im_self is None: rval = routine(self, arg) else: rval = routine(arg) elif inspect.isroutine(routine): rval = routine(arg) else: # ignore unknown items sys.stderr.write("%s: task %s was not understood - terminating\n" % (self.getName(), str(tasktuple))) break # END make routine call finally: # make sure we delete the routine to release the reference as soon # as possible. Otherwise objects might not be destroyed # while we are waiting del(routine) del(tasktuple) except StopProcessing: break except Exception,e: sys.stderr.write("%s: Task %s raised unhandled exception: %s - this really shouldn't happen !\n" % (self.getName(), str(tasktuple), str(e))) continue # just continue # END routine exception handling # END handle routine release # END endless loop def stop_and_join(self): """Send stop message to ourselves - we don't block, the thread will terminate once it has finished processing its input queue to receive our termination event""" # DONT call superclass as it will try to join - join's don't work for # some reason, as python apparently doesn't switch threads (so often) # while waiting ... I don't know, but the threads respond properly, # but only if dear python switches to them self.inq.put((self.stop, None)) #} END classes async-0.6.1/__init__.py0000644000175100017510000000203511467301141014010 0ustar byronbyron"""Initialize the multi-processing package""" #{ Initialization def _init_atexit(): """Setup an at-exit job to be sure our workers are shutdown correctly before the interpreter quits""" import atexit import thread atexit.register(thread.do_terminate_threads) def _init_signals(): """Assure we shutdown our threads correctly when being interrupted""" import signal import thread import sys prev_handler = signal.getsignal(signal.SIGINT) def thread_interrupt_handler(signum, frame): thread.do_terminate_threads() if callable(prev_handler): prev_handler(signum, frame) raise KeyboardInterrupt() # END call previous handler # END signal handler try: signal.signal(signal.SIGINT, thread_interrupt_handler) except ValueError: # happens if we don't try it from the main thread print >> sys.stderr, "Failed to setup thread-interrupt handler. This is usually not critical" # END exception handling #} END init _init_atexit() _init_signals() # initial imports from task import * from pool import * from channel import *