queuelib-1.5.0/0000755000372000037200000000000013251473650014176 5ustar travistravis00000000000000queuelib-1.5.0/queuelib/0000755000372000037200000000000013251473650016011 5ustar travistravis00000000000000queuelib-1.5.0/queuelib/tests/0000755000372000037200000000000013251473650017153 5ustar travistravis00000000000000queuelib-1.5.0/queuelib/tests/__init__.py0000644000372000037200000000147713251473560021275 0ustar travistravis00000000000000import unittest, tempfile, shutil class QueuelibTestCase(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp(prefix="queuelib-tests-") self.qpath = self.mktemp() self.qdir = self.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def mktemp(self): return tempfile.mktemp(dir=self.tmpdir) def mkdtemp(self): return tempfile.mkdtemp(dir=self.tmpdir) def track_closed(cls): """Wraps a queue class to track down if close() method was called""" class TrackingClosed(cls): def __init__(self, *a, **kw): super(TrackingClosed, self).__init__(*a, **kw) self.closed = False def close(self): super(TrackingClosed, self).close() self.closed = True return TrackingClosed queuelib-1.5.0/queuelib/tests/test_pqueue.py0000644000372000037200000001327213251473560022075 0ustar travistravis00000000000000import os from queuelib.pqueue import PriorityQueue from queuelib.queue import ( FifoMemoryQueue, LifoMemoryQueue, FifoDiskQueue, LifoDiskQueue, FifoSQLiteQueue, LifoSQLiteQueue, ) from queuelib.tests import (QueuelibTestCase, track_closed) # hack to prevent py.test from discovering base test class class base: class PQueueTestBase(QueuelibTestCase): def setUp(self): QueuelibTestCase.setUp(self) self.q = PriorityQueue(self.qfactory) def qfactory(self, prio): raise NotImplementedError def test_len_nonzero(self): assert not self.q self.assertEqual(len(self.q), 0) self.q.push(b'a', 3) assert self.q self.q.push(b'b', 1) self.q.push(b'c', 2) self.q.push(b'd', 1) self.assertEqual(len(self.q), 4) self.q.pop() self.q.pop() self.q.pop() self.q.pop() assert not self.q self.assertEqual(len(self.q), 0) def test_close(self): self.q.push(b'a', 3) self.q.push(b'b', 1) self.q.push(b'c', 2) self.q.push(b'd', 1) iqueues = self.q.queues.values() self.assertEqual(sorted(self.q.close()), [1, 2, 3]) assert all(q.closed for q in iqueues) def test_close_return_active(self): self.q.push(b'b', 1) self.q.push(b'c', 2) self.q.push(b'a', 3) self.q.pop() self.assertEqual(sorted(self.q.close()), [2, 3]) def test_popped_internal_queues_closed(self): self.q.push(b'a', 3) self.q.push(b'b', 1) self.q.push(b'c', 2) p1queue = self.q.queues[1] self.assertEqual(self.q.pop(), b'b') self.q.close() assert p1queue.closed class FifoTestMixin(object): def test_push_pop_noprio(self): self.q.push(b'a') self.q.push(b'b') self.q.push(b'c') self.assertEqual(self.q.pop(), b'a') self.assertEqual(self.q.pop(), b'b') self.assertEqual(self.q.pop(), b'c') self.assertEqual(self.q.pop(), None) def test_push_pop_prio(self): self.q.push(b'a', 3) self.q.push(b'b', 1) self.q.push(b'c', 2) self.q.push(b'd', 1) self.assertEqual(self.q.pop(), b'b') self.assertEqual(self.q.pop(), b'd') self.assertEqual(self.q.pop(), b'c') self.assertEqual(self.q.pop(), b'a') self.assertEqual(self.q.pop(), None) class LifoTestMixin(object): def test_push_pop_noprio(self): self.q.push(b'a') self.q.push(b'b') self.q.push(b'c') self.assertEqual(self.q.pop(), b'c') self.assertEqual(self.q.pop(), b'b') self.assertEqual(self.q.pop(), b'a') self.assertEqual(self.q.pop(), None) def test_push_pop_prio(self): self.q.push(b'a', 3) self.q.push(b'b', 1) self.q.push(b'c', 2) self.q.push(b'd', 1) self.assertEqual(self.q.pop(), b'd') self.assertEqual(self.q.pop(), b'b') self.assertEqual(self.q.pop(), b'c') self.assertEqual(self.q.pop(), b'a') self.assertEqual(self.q.pop(), None) class FifoMemoryPriorityQueueTest(FifoTestMixin, base.PQueueTestBase): def qfactory(self, prio): return track_closed(FifoMemoryQueue)() class LifoMemoryPriorityQueueTest(LifoTestMixin, base.PQueueTestBase): def qfactory(self, prio): return track_closed(LifoMemoryQueue)() class DiskTestMixin(object): def test_nonserializable_object_one(self): self.assertRaises(TypeError, self.q.push, lambda x: x, 0) self.assertEqual(self.q.close(), []) def test_nonserializable_object_many_close(self): self.q.push(b'a', 3) self.q.push(b'b', 1) self.assertRaises(TypeError, self.q.push, lambda x: x, 0) self.q.push(b'c', 2) self.assertEqual(self.q.pop(), b'b') self.assertEqual(sorted(self.q.close()), [2, 3]) def test_nonserializable_object_many_pop(self): self.q.push(b'a', 3) self.q.push(b'b', 1) self.assertRaises(TypeError, self.q.push, lambda x: x, 0) self.q.push(b'c', 2) self.assertEqual(self.q.pop(), b'b') self.assertEqual(self.q.pop(), b'c') self.assertEqual(self.q.pop(), b'a') self.assertEqual(self.q.pop(), None) self.assertEqual(self.q.close(), []) def test_reopen_with_prio(self): q1 = PriorityQueue(self.qfactory) q1.push(b'a', 3) q1.push(b'b', 1) q1.push(b'c', 2) active = q1.close() q2 = PriorityQueue(self.qfactory, startprios=active) self.assertEqual(q2.pop(), b'b') self.assertEqual(q2.pop(), b'c') self.assertEqual(q2.pop(), b'a') self.assertEqual(q2.close(), []) class FifoDiskPriorityQueueTest(FifoTestMixin, DiskTestMixin, base.PQueueTestBase): def qfactory(self, prio): path = os.path.join(self.qdir, str(prio)) return track_closed(FifoDiskQueue)(path) class LifoDiskPriorityQueueTest(LifoTestMixin, DiskTestMixin, base.PQueueTestBase): def qfactory(self, prio): path = os.path.join(self.qdir, str(prio)) return track_closed(LifoDiskQueue)(path) class FifoSQLitePriorityQueueTest(FifoTestMixin, DiskTestMixin, base.PQueueTestBase): def qfactory(self, prio): path = os.path.join(self.qdir, str(prio)) return track_closed(FifoSQLiteQueue)(path) class LifoSQLitePriorityQueueTest(LifoTestMixin, DiskTestMixin, base.PQueueTestBase): def qfactory(self, prio): path = os.path.join(self.qdir, str(prio)) return track_closed(LifoSQLiteQueue)(path) queuelib-1.5.0/queuelib/tests/test_queue.py0000644000372000037200000001430213251473560021710 0ustar travistravis00000000000000import os import glob import pytest from queuelib.queue import ( FifoMemoryQueue, LifoMemoryQueue, FifoDiskQueue, LifoDiskQueue, FifoSQLiteQueue, LifoSQLiteQueue, ) from queuelib.tests import QueuelibTestCase class BaseQueueTest(object): def queue(self): return NotImplementedError() def test_empty(self): """Empty queue test""" q = self.queue() assert q.pop() is None def test_single_pushpop(self): q = self.queue() q.push(b'a') assert q.pop() == b'a' def test_binary_element(self): elem = ( b'\x80\x02}q\x01(U\x04bodyq\x02U\x00U\t_encodingq\x03U\x05utf-' b'8q\x04U\x07cookiesq\x05}q\x06U\x04metaq\x07}q\x08U\x07header' b'sq\t}U\x03urlq\nX\x15\x00\x00\x00file:///tmp/tmphDJYsgU\x0bd' b'ont_filterq\x0b\x89U\x08priorityq\x0cK\x00U\x08callbackq\rNU' b'\x06methodq\x0eU\x03GETq\x0fU\x07errbackq\x10Nu.' ) q = self.queue() q.push(elem) assert q.pop() == elem def test_len(self): q = self.queue() self.assertEqual(len(q), 0) q.push(b'a') self.assertEqual(len(q), 1) q.push(b'b') q.push(b'c') self.assertEqual(len(q), 3) q.pop() q.pop() q.pop() self.assertEqual(len(q), 0) class FifoTestMixin(BaseQueueTest): def test_push_pop1(self): """Basic push/pop test""" q = self.queue() q.push(b'a') q.push(b'b') q.push(b'c') self.assertEqual(q.pop(), b'a') self.assertEqual(q.pop(), b'b') self.assertEqual(q.pop(), b'c') self.assertEqual(q.pop(), None) def test_push_pop2(self): """Test interleaved push and pops""" q = self.queue() q.push(b'a') q.push(b'b') q.push(b'c') q.push(b'd') self.assertEqual(q.pop(), b'a') self.assertEqual(q.pop(), b'b') q.push(b'e') self.assertEqual(q.pop(), b'c') self.assertEqual(q.pop(), b'd') self.assertEqual(q.pop(), b'e') class LifoTestMixin(BaseQueueTest): def test_push_pop1(self): """Basic push/pop test""" q = self.queue() q.push(b'a') q.push(b'b') q.push(b'c') self.assertEqual(q.pop(), b'c') self.assertEqual(q.pop(), b'b') self.assertEqual(q.pop(), b'a') self.assertEqual(q.pop(), None) def test_push_pop2(self): """Test interleaved push and pops""" q = self.queue() q.push(b'a') q.push(b'b') q.push(b'c') q.push(b'd') self.assertEqual(q.pop(), b'd') self.assertEqual(q.pop(), b'c') q.push(b'e') self.assertEqual(q.pop(), b'e') self.assertEqual(q.pop(), b'b') self.assertEqual(q.pop(), b'a') class PersistentTestMixin(object): chunksize = 100000 @pytest.mark.xfail(reason="Reenable once Scrapy.squeues stop" " extending from queuelib testsuite") def test_non_bytes_raises_typeerror(self): q = self.queue() self.assertRaises(TypeError, q.push, 0) self.assertRaises(TypeError, q.push, u'') self.assertRaises(TypeError, q.push, None) self.assertRaises(TypeError, q.push, lambda x: x) def test_text_in_windows(self): e1 = b'\r\n' q = self.queue() q.push(e1) q.close() q = self.queue() e2 = q.pop() self.assertEqual(e1, e2) def test_close_open(self): """Test closing and re-opening keeps state""" q = self.queue() q.push(b'a') q.push(b'b') q.push(b'c') q.push(b'd') q.pop() q.pop() q.close() del q q = self.queue() self.assertEqual(len(q), 2) q.push(b'e') q.pop() q.pop() q.close() del q q = self.queue() assert q.pop() is not None self.assertEqual(len(q), 0) def test_cleanup(self): """Test queue dir is removed if queue is empty""" q = self.queue() values = [b'0', b'1', b'2', b'3', b'4'] assert os.path.exists(self.qpath) for x in values: q.push(x) for x in values: q.pop() q.close() assert not os.path.exists(self.qpath) class FifoMemoryQueueTest(FifoTestMixin, QueuelibTestCase): def queue(self): return FifoMemoryQueue() class LifoMemoryQueueTest(LifoTestMixin, QueuelibTestCase): def queue(self): return LifoMemoryQueue() class FifoDiskQueueTest(FifoTestMixin, PersistentTestMixin, QueuelibTestCase): def queue(self): return FifoDiskQueue(self.qpath, chunksize=self.chunksize) def test_chunks(self): """Test chunks are created and removed""" values = [b'0', b'1', b'2', b'3', b'4'] q = self.queue() for x in values: q.push(x) chunks = glob.glob(os.path.join(self.qpath, 'q*')) self.assertEqual(len(chunks), 5 // self.chunksize + 1) for x in values: q.pop() chunks = glob.glob(os.path.join(self.qpath, 'q*')) self.assertEqual(len(chunks), 1) class ChunkSize1FifoDiskQueueTest(FifoDiskQueueTest): chunksize = 1 class ChunkSize2FifoDiskQueueTest(FifoDiskQueueTest): chunksize = 2 class ChunkSize3FifoDiskQueueTest(FifoDiskQueueTest): chunksize = 3 class ChunkSize4FifoDiskQueueTest(FifoDiskQueueTest): chunksize = 4 class LifoDiskQueueTest(LifoTestMixin, PersistentTestMixin, QueuelibTestCase): def queue(self): return LifoDiskQueue(self.qpath) def test_file_size_shrinks(self): """Test size of queue file shrinks when popping items""" q = self.queue() q.push(b'a') q.push(b'b') q.close() size = os.path.getsize(self.qpath) q = self.queue() q.pop() q.close() assert os.path.getsize(self.qpath), size class FifoSQLiteQueueTest(FifoTestMixin, PersistentTestMixin, QueuelibTestCase): def queue(self): return FifoSQLiteQueue(self.qpath) class LifoSQLiteQueueTest(LifoTestMixin, PersistentTestMixin, QueuelibTestCase): def queue(self): return LifoSQLiteQueue(self.qpath) queuelib-1.5.0/queuelib/tests/test_rrqueue.py0000644000372000037200000001070513251473560022257 0ustar travistravis00000000000000import os from queuelib.rrqueue import RoundRobinQueue from queuelib.queue import ( FifoMemoryQueue, LifoMemoryQueue, FifoDiskQueue, LifoDiskQueue, FifoSQLiteQueue, LifoSQLiteQueue, ) from queuelib.tests import (QueuelibTestCase, track_closed) # hack to prevent py.test from discovering base test class class base: class RRQueueTestBase(QueuelibTestCase): def setUp(self): QueuelibTestCase.setUp(self) self.q = RoundRobinQueue(self.qfactory) def qfactory(self, key): raise NotImplementedError def test_len_nonzero(self): assert not self.q self.assertEqual(len(self.q), 0) self.q.push(b'a', '3') assert self.q self.q.push(b'b', '1') self.q.push(b'c', '2') self.q.push(b'd', '1') self.assertEqual(len(self.q), 4) self.q.pop() self.q.pop() self.q.pop() self.q.pop() assert not self.q self.assertEqual(len(self.q), 0) def test_close(self): self.q.push(b'a', '3') self.q.push(b'b', '1') self.q.push(b'c', '2') self.q.push(b'd', '1') iqueues = self.q.queues.values() self.assertEqual(sorted(self.q.close()), ['1', '2', '3']) assert all(q.closed for q in iqueues) def test_close_return_active(self): self.q.push(b'b', '1') self.q.push(b'c', '2') self.q.push(b'a', '3') self.q.pop() self.assertEqual(sorted(self.q.close()), ['2', '3']) class FifoTestMixin(object): def test_push_pop_key(self): self.q.push(b'a', '1') self.q.push(b'b', '1') self.q.push(b'c', '2') self.q.push(b'd', '2') self.assertEqual(self.q.pop(), b'a') self.assertEqual(self.q.pop(), b'c') self.assertEqual(self.q.pop(), b'b') self.assertEqual(self.q.pop(), b'd') self.assertEqual(self.q.pop(), None) class LifoTestMixin(object): def test_push_pop_key(self): self.q.push(b'a', '1') self.q.push(b'b', '1') self.q.push(b'c', '2') self.q.push(b'd', '2') self.assertEqual(self.q.pop(), b'b') self.assertEqual(self.q.pop(), b'd') self.assertEqual(self.q.pop(), b'a') self.assertEqual(self.q.pop(), b'c') self.assertEqual(self.q.pop(), None) class FifoMemoryRRQueueTest(FifoTestMixin, base.RRQueueTestBase): def qfactory(self, key): return track_closed(FifoMemoryQueue)() class LifoMemoryRRQueueTest(LifoTestMixin, base.RRQueueTestBase): def qfactory(self, key): return track_closed(LifoMemoryQueue)() class DiskTestMixin(object): def test_nonserializable_object_one(self): self.assertRaises(TypeError, self.q.push, lambda x: x, '0') self.assertEqual(self.q.close(), []) def test_nonserializable_object_many_close(self): self.q.push(b'a', '3') self.q.push(b'b', '1') self.assertRaises(TypeError, self.q.push, lambda x: x, '0') self.q.push(b'c', '2') self.assertEqual(self.q.pop(), b'a') self.assertEqual(sorted(self.q.close()), ['1', '2']) def test_nonserializable_object_many_pop(self): self.q.push(b'a', '3') self.q.push(b'b', '1') self.assertRaises(TypeError, self.q.push, lambda x: x, '0') self.q.push(b'c', '2') self.assertEqual(self.q.pop(), b'a') self.assertEqual(self.q.pop(), b'b') self.assertEqual(self.q.pop(), b'c') self.assertEqual(self.q.pop(), None) self.assertEqual(self.q.close(), []) class FifoDiskRRQueueTest(FifoTestMixin, DiskTestMixin, base.RRQueueTestBase): def qfactory(self, key): path = os.path.join(self.qdir, str(key)) return track_closed(FifoDiskQueue)(path) class LifoDiskRRQueueTest(LifoTestMixin, DiskTestMixin, base.RRQueueTestBase): def qfactory(self, key): path = os.path.join(self.qdir, str(key)) return track_closed(LifoDiskQueue)(path) class FifoSQLiteRRQueueTest(FifoTestMixin, DiskTestMixin, base.RRQueueTestBase): def qfactory(self, key): path = os.path.join(self.qdir, str(key)) return track_closed(FifoSQLiteQueue)(path) class LifoSQLiteRRQueueTest(LifoTestMixin, DiskTestMixin, base.RRQueueTestBase): def qfactory(self, key): path = os.path.join(self.qdir, str(key)) return track_closed(LifoSQLiteQueue)(path) queuelib-1.5.0/queuelib/__init__.py0000644000372000037200000000021613251473560020121 0ustar travistravis00000000000000from queuelib.queue import FifoDiskQueue, LifoDiskQueue from queuelib.pqueue import PriorityQueue from queuelib.rrqueue import RoundRobinQueuequeuelib-1.5.0/queuelib/pqueue.py0000644000372000037200000000373613251473560017700 0ustar travistravis00000000000000class PriorityQueue(object): """A priority queue implemented using multiple internal queues (typically, FIFO queues). The internal queue must implement the following methods: * push(obj) * pop() * close() * __len__() The constructor receives a qfactory argument, which is a callable used to instantiate a new (internal) queue when a new priority is allocated. The qfactory function is called with the priority number as first and only argument. Only integer priorities should be used. Lower numbers are higher priorities. startprios is a sequence of priorities to start with. If the queue was previously closed leaving some priority buckets non-empty, those priorities should be passed in startprios. """ def __init__(self, qfactory, startprios=()): self.queues = {} self.qfactory = qfactory for p in startprios: self.queues[p] = self.qfactory(p) self.curprio = min(startprios) if startprios else None def push(self, obj, priority=0): if priority not in self.queues: self.queues[priority] = self.qfactory(priority) q = self.queues[priority] q.push(obj) # this may fail (eg. serialization error) if self.curprio is None or priority < self.curprio: self.curprio = priority def pop(self): if self.curprio is None: return q = self.queues[self.curprio] m = q.pop() if len(q) == 0: del self.queues[self.curprio] q.close() prios = [p for p, q in self.queues.items() if len(q) > 0] self.curprio = min(prios) if prios else None return m def close(self): active = [] for p, q in self.queues.items(): if len(q): active.append(p) q.close() return active def __len__(self): return sum(len(x) for x in self.queues.values()) if self.queues else 0 queuelib-1.5.0/queuelib/queue.py0000644000372000037200000001472213251473560017515 0ustar travistravis00000000000000import os import glob import json import struct import sqlite3 from collections import deque class FifoMemoryQueue(object): """In-memory FIFO queue, API compliant with FifoDiskQueue.""" def __init__(self): self.q = deque() self.push = self.q.append def pop(self): q = self.q return q.popleft() if q else None def close(self): pass def __len__(self): return len(self.q) class LifoMemoryQueue(FifoMemoryQueue): """In-memory LIFO queue, API compliant with LifoDiskQueue.""" def pop(self): q = self.q return q.pop() if q else None class FifoDiskQueue(object): """Persistent FIFO queue.""" szhdr_format = ">L" szhdr_size = struct.calcsize(szhdr_format) def __init__(self, path, chunksize=100000): self.path = path if not os.path.exists(path): os.makedirs(path) self.info = self._loadinfo(chunksize) self.chunksize = self.info['chunksize'] self.headf = self._openchunk(self.info['head'][0], 'ab+') self.tailf = self._openchunk(self.info['tail'][0]) os.lseek(self.tailf.fileno(), self.info['tail'][2], os.SEEK_SET) def push(self, string): if not isinstance(string, bytes): raise TypeError('Unsupported type: {}'.format(type(string).__name__)) hnum, hpos = self.info['head'] hpos += 1 szhdr = struct.pack(self.szhdr_format, len(string)) os.write(self.headf.fileno(), szhdr + string) if hpos == self.chunksize: hpos = 0 hnum += 1 self.headf.close() self.headf = self._openchunk(hnum, 'ab+') self.info['size'] += 1 self.info['head'] = [hnum, hpos] def _openchunk(self, number, mode='rb'): return open(os.path.join(self.path, 'q%05d' % number), mode) def pop(self): tnum, tcnt, toffset = self.info['tail'] if [tnum, tcnt] >= self.info['head']: return tfd = self.tailf.fileno() szhdr = os.read(tfd, self.szhdr_size) if not szhdr: return size, = struct.unpack(self.szhdr_format, szhdr) data = os.read(tfd, size) tcnt += 1 toffset += self.szhdr_size + size if tcnt == self.chunksize and tnum <= self.info['head'][0]: tcnt = toffset = 0 tnum += 1 self.tailf.close() os.remove(self.tailf.name) self.tailf = self._openchunk(tnum) self.info['size'] -= 1 self.info['tail'] = [tnum, tcnt, toffset] return data def close(self): self.headf.close() self.tailf.close() self._saveinfo(self.info) if len(self) == 0: self._cleanup() def __len__(self): return self.info['size'] def _loadinfo(self, chunksize): infopath = self._infopath() if os.path.exists(infopath): with open(infopath) as f: info = json.load(f) else: info = { 'chunksize': chunksize, 'size': 0, 'tail': [0, 0, 0], 'head': [0, 0], } return info def _saveinfo(self, info): with open(self._infopath(), 'w') as f: json.dump(info, f) def _infopath(self): return os.path.join(self.path, 'info.json') def _cleanup(self): for x in glob.glob(os.path.join(self.path, 'q*')): os.remove(x) os.remove(os.path.join(self.path, 'info.json')) if not os.listdir(self.path): os.rmdir(self.path) class LifoDiskQueue(object): """Persistent LIFO queue.""" SIZE_FORMAT = ">L" SIZE_SIZE = struct.calcsize(SIZE_FORMAT) def __init__(self, path): self.path = path if os.path.exists(path): self.f = open(path, 'rb+') qsize = self.f.read(self.SIZE_SIZE) self.size, = struct.unpack(self.SIZE_FORMAT, qsize) self.f.seek(0, os.SEEK_END) else: self.f = open(path, 'wb+') self.f.write(struct.pack(self.SIZE_FORMAT, 0)) self.size = 0 def push(self, string): if not isinstance(string, bytes): raise TypeError('Unsupported type: {}'.format(type(string).__name__)) self.f.write(string) ssize = struct.pack(self.SIZE_FORMAT, len(string)) self.f.write(ssize) self.size += 1 def pop(self): if not self.size: return self.f.seek(-self.SIZE_SIZE, os.SEEK_END) size, = struct.unpack(self.SIZE_FORMAT, self.f.read()) self.f.seek(-size-self.SIZE_SIZE, os.SEEK_END) data = self.f.read(size) self.f.seek(-size, os.SEEK_CUR) self.f.truncate() self.size -= 1 return data def close(self): if self.size: self.f.seek(0) self.f.write(struct.pack(self.SIZE_FORMAT, self.size)) self.f.close() if not self.size: os.remove(self.path) def __len__(self): return self.size class FifoSQLiteQueue(object): _sql_create = ( 'CREATE TABLE IF NOT EXISTS queue ' '(id INTEGER PRIMARY KEY AUTOINCREMENT, item BLOB)' ) _sql_size = 'SELECT COUNT(*) FROM queue' _sql_push = 'INSERT INTO queue (item) VALUES (?)' _sql_pop = 'SELECT id, item FROM queue ORDER BY id LIMIT 1' _sql_del = 'DELETE FROM queue WHERE id = ?' def __init__(self, path): self._path = os.path.abspath(path) self._db = sqlite3.Connection(self._path, timeout=60) self._db.text_factory = bytes with self._db as conn: conn.execute(self._sql_create) def push(self, item): if not isinstance(item, bytes): raise TypeError('Unsupported type: {}'.format(type(item).__name__)) with self._db as conn: conn.execute(self._sql_push, (item,)) def pop(self): with self._db as conn: for id_, item in conn.execute(self._sql_pop): conn.execute(self._sql_del, (id_,)) return item def close(self): size = len(self) self._db.close() if not size: os.remove(self._path) def __len__(self): with self._db as conn: return next(conn.execute(self._sql_size))[0] class LifoSQLiteQueue(FifoSQLiteQueue): _sql_pop = 'SELECT id, item FROM queue ORDER BY id DESC LIMIT 1' #FifoDiskQueue = FifoSQLiteQueue # noqa #LifoDiskQueue = LifoSQLiteQueue # noqa queuelib-1.5.0/queuelib/rrqueue.py0000644000372000037200000000424513251473560020060 0ustar travistravis00000000000000from collections import deque class RoundRobinQueue(object): """A round robin queue implemented using multiple internal queues (typically, FIFO queues). The internal queue must implement the following methods: * push(obj) * pop() * close() * __len__() The constructor receives a qfactory argument, which is a callable used to instantiate a new (internal) queue when a new key is allocated. The qfactory function is called with the key number as first and only argument. start_keys is a sequence of domains to start with. If the queue was previously closed leaving some domain buckets non-empty, those domains should be passed in start_keys. The queue maintains a fifo queue of keys. The key that went last is poped first and the next queue for that key is then poped. This allows for a round robin """ def __init__(self, qfactory, start_domains=()): self.queues = {} self.qfactory = qfactory for key in start_domains: self.queues[key] = self.qfactory(key) self.key_queue = deque(start_domains) def push(self, obj, key): if key not in self.key_queue: self.queues[key] = self.qfactory(key) self.key_queue.appendleft(key) # it's new, might as well pop first q = self.queues[key] q.push(obj) # this may fail (eg. serialization error) def pop(self): m = None # pop until we find a valid object, closing necessary queues while m is None: try: key = self.key_queue.pop() except IndexError: return q = self.queues[key] m = q.pop() if len(q) == 0: del self.queues[key] q.close() else: self.key_queue.appendleft(key) if m: return m def close(self): active = [] for k, q in self.queues.items(): if len(q): active.append(k) q.close() return active def __len__(self): return sum(len(x) for x in self.queues.values()) if self.queues else 0 queuelib-1.5.0/queuelib.egg-info/0000755000372000037200000000000013251473650017503 5ustar travistravis00000000000000queuelib-1.5.0/queuelib.egg-info/PKG-INFO0000644000372000037200000001456013251473650020606 0ustar travistravis00000000000000Metadata-Version: 1.1 Name: queuelib Version: 1.5.0 Summary: Collection of persistent (disk-based) queues Home-page: https://github.com/scrapy/queuelib Author: Scrapy project Author-email: info@scrapy.org License: BSD Description-Content-Type: UNKNOWN Description: ======== queuelib ======== .. image:: https://secure.travis-ci.org/scrapy/queuelib.png?branch=master :target: http://travis-ci.org/scrapy/queuelib .. image:: https://img.shields.io/codecov/c/github/scrapy/queuelib/master.svg :target: http://codecov.io/github/scrapy/queuelib?branch=master :alt: Coverage report Queuelib is a collection of persistent (disk-based) queues for Python. Queuelib goals are speed and simplicity. It was originally part of the `Scrapy framework`_ and stripped out on its own library. Note: Queuelib isn't thread-safe. Requirements ============ * Python 2.7 or Python 3.3 * no external library requirements Installation ============ You can install Queuelib either via the Python Package Index (PyPI) or from source. To install using pip:: $ pip install queuelib To install using easy_install:: $ easy_install queuelib If you have downloaded a source tarball you can install it by running the following (as root):: # python setup.py install FIFO/LIFO disk queues ===================== Queuelib provides FIFO and LIFO queue implementations. Here is an example usage of the FIFO queue:: >>> from queuelib import FifoDiskQueue >>> q = FifoDiskQueue("queuefile") >>> q.push(b'a') >>> q.push(b'b') >>> q.push(b'c') >>> q.pop() b'a' >>> q.close() >>> q = FifoDiskQueue("queuefile") >>> q.pop() b'b' >>> q.pop() b'c' >>> q.pop() >>> The LIFO queue is identical (API-wise), but importing ``LifoDiskQueue`` instead. PriorityQueue ============= A discrete-priority queue implemented by combining multiple FIFO/LIFO queues (one per priority). First, select the type of queue to be used per priority (FIFO or LIFO):: >>> from queuelib import FifoDiskQueue >>> qfactory = lambda priority: FifoDiskQueue('queue-dir-%s' % priority) Then instantiate the Priority Queue with it:: >>> from queuelib import PriorityQueue >>> pq = PriorityQueue(qfactory) And use it:: >>> pq.push(b'a', 3) >>> pq.push(b'b', 1) >>> pq.push(b'c', 2) >>> pq.push(b'd', 2) >>> pq.pop() b'b' >>> pq.pop() b'c' >>> pq.pop() b'd' >>> pq.pop() b'a' RoundRobinQueue =============== Has nearly the same interface and implementation as a Priority Queue except that each element must be pushed with a (mandatory) key. Popping from the queue cycles through the keys "round robin". Instantiate the Round Robin Queue similarly to the Priority Queue:: >>> from queuelib import RoundRobinQueue >>> rr = RoundRobinQueue(qfactory) And use it:: >>> rr.push(b'a', '1') >>> rr.push(b'b', '1') >>> rr.push(b'c', '2') >>> rr.push(b'd', '2') >>> rr.pop() b'a' >>> rr.pop() b'c' >>> rr.pop() b'b' >>> rr.pop() b'd' Mailing list ============ Use the `scrapy-users`_ mailing list for questions about Queuelib. Bug tracker =========== If you have any suggestions, bug reports or annoyances please report them to our issue tracker at: http://github.com/scrapy/queuelib/issues/ Contributing ============ Development of Queuelib happens at GitHub: http://github.com/scrapy/queuelib You are highly encouraged to participate in the development. If you don't like GitHub (for some reason) you're welcome to send regular patches. All changes require tests to be merged. Tests ===== Tests are located in `queuelib/tests` directory. They can be run using `nosetests`_ with the following command:: nosetests The output should be something like the following:: $ nosetests ............................................................................. ---------------------------------------------------------------------- Ran 77 tests in 0.145s OK License ======= This software is licensed under the BSD License. See the LICENSE file in the top distribution directory for the full license text. Versioning ========== This software follows `Semantic Versioning`_ .. _Scrapy framework: http://scrapy.org .. _scrapy-users: http://groups.google.com/group/scrapy-users .. _Semantic Versioning: http://semver.org/ .. _nosetests: https://nose.readthedocs.org/en/latest/ Platform: Any Classifier: Development Status :: 5 - Production/Stable Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy queuelib-1.5.0/queuelib.egg-info/SOURCES.txt0000644000372000037200000000057113251473650021372 0ustar travistravis00000000000000LICENSE MANIFEST.in NEWS README.rst setup.cfg setup.py queuelib/__init__.py queuelib/pqueue.py queuelib/queue.py queuelib/rrqueue.py queuelib.egg-info/PKG-INFO queuelib.egg-info/SOURCES.txt queuelib.egg-info/dependency_links.txt queuelib.egg-info/top_level.txt queuelib/tests/__init__.py queuelib/tests/test_pqueue.py queuelib/tests/test_queue.py queuelib/tests/test_rrqueue.pyqueuelib-1.5.0/queuelib.egg-info/dependency_links.txt0000644000372000037200000000000113251473650023551 0ustar travistravis00000000000000 queuelib-1.5.0/queuelib.egg-info/top_level.txt0000644000372000037200000000001113251473650022225 0ustar travistravis00000000000000queuelib queuelib-1.5.0/LICENSE0000644000372000037200000000277313251473560015214 0ustar travistravis00000000000000Copyright (c) w3lib and Scrapy developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of Scrapy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. queuelib-1.5.0/MANIFEST.in0000644000372000037200000000006013251473560015730 0ustar travistravis00000000000000include README.rst include LICENSE include NEWS queuelib-1.5.0/NEWS0000644000372000037200000000020213251473560014667 0ustar travistravis00000000000000Queuelib release notes ====================== Version 1.0 ----------- (released on April 23rd, 2013) First release of Queuelib. queuelib-1.5.0/README.rst0000644000372000037200000001014013251473560015661 0ustar travistravis00000000000000======== queuelib ======== .. image:: https://secure.travis-ci.org/scrapy/queuelib.png?branch=master :target: http://travis-ci.org/scrapy/queuelib .. image:: https://img.shields.io/codecov/c/github/scrapy/queuelib/master.svg :target: http://codecov.io/github/scrapy/queuelib?branch=master :alt: Coverage report Queuelib is a collection of persistent (disk-based) queues for Python. Queuelib goals are speed and simplicity. It was originally part of the `Scrapy framework`_ and stripped out on its own library. Note: Queuelib isn't thread-safe. Requirements ============ * Python 2.7 or Python 3.3 * no external library requirements Installation ============ You can install Queuelib either via the Python Package Index (PyPI) or from source. To install using pip:: $ pip install queuelib To install using easy_install:: $ easy_install queuelib If you have downloaded a source tarball you can install it by running the following (as root):: # python setup.py install FIFO/LIFO disk queues ===================== Queuelib provides FIFO and LIFO queue implementations. Here is an example usage of the FIFO queue:: >>> from queuelib import FifoDiskQueue >>> q = FifoDiskQueue("queuefile") >>> q.push(b'a') >>> q.push(b'b') >>> q.push(b'c') >>> q.pop() b'a' >>> q.close() >>> q = FifoDiskQueue("queuefile") >>> q.pop() b'b' >>> q.pop() b'c' >>> q.pop() >>> The LIFO queue is identical (API-wise), but importing ``LifoDiskQueue`` instead. PriorityQueue ============= A discrete-priority queue implemented by combining multiple FIFO/LIFO queues (one per priority). First, select the type of queue to be used per priority (FIFO or LIFO):: >>> from queuelib import FifoDiskQueue >>> qfactory = lambda priority: FifoDiskQueue('queue-dir-%s' % priority) Then instantiate the Priority Queue with it:: >>> from queuelib import PriorityQueue >>> pq = PriorityQueue(qfactory) And use it:: >>> pq.push(b'a', 3) >>> pq.push(b'b', 1) >>> pq.push(b'c', 2) >>> pq.push(b'd', 2) >>> pq.pop() b'b' >>> pq.pop() b'c' >>> pq.pop() b'd' >>> pq.pop() b'a' RoundRobinQueue =============== Has nearly the same interface and implementation as a Priority Queue except that each element must be pushed with a (mandatory) key. Popping from the queue cycles through the keys "round robin". Instantiate the Round Robin Queue similarly to the Priority Queue:: >>> from queuelib import RoundRobinQueue >>> rr = RoundRobinQueue(qfactory) And use it:: >>> rr.push(b'a', '1') >>> rr.push(b'b', '1') >>> rr.push(b'c', '2') >>> rr.push(b'd', '2') >>> rr.pop() b'a' >>> rr.pop() b'c' >>> rr.pop() b'b' >>> rr.pop() b'd' Mailing list ============ Use the `scrapy-users`_ mailing list for questions about Queuelib. Bug tracker =========== If you have any suggestions, bug reports or annoyances please report them to our issue tracker at: http://github.com/scrapy/queuelib/issues/ Contributing ============ Development of Queuelib happens at GitHub: http://github.com/scrapy/queuelib You are highly encouraged to participate in the development. If you don't like GitHub (for some reason) you're welcome to send regular patches. All changes require tests to be merged. Tests ===== Tests are located in `queuelib/tests` directory. They can be run using `nosetests`_ with the following command:: nosetests The output should be something like the following:: $ nosetests ............................................................................. ---------------------------------------------------------------------- Ran 77 tests in 0.145s OK License ======= This software is licensed under the BSD License. See the LICENSE file in the top distribution directory for the full license text. Versioning ========== This software follows `Semantic Versioning`_ .. _Scrapy framework: http://scrapy.org .. _scrapy-users: http://groups.google.com/group/scrapy-users .. _Semantic Versioning: http://semver.org/ .. _nosetests: https://nose.readthedocs.org/en/latest/ queuelib-1.5.0/setup.cfg0000644000372000037200000000010313251473650016011 0ustar travistravis00000000000000[bdist_wheel] universal = 1 [egg_info] tag_build = tag_date = 0 queuelib-1.5.0/setup.py0000644000372000037200000000173413251473560015715 0ustar travistravis00000000000000from setuptools import setup, find_packages setup( name='queuelib', version='1.5.0', license='BSD', description='Collection of persistent (disk-based) queues', long_description=open('README.rst').read(), author='Scrapy project', author_email='info@scrapy.org', url='https://github.com/scrapy/queuelib', packages=find_packages(), platforms=['Any'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', ] ) queuelib-1.5.0/PKG-INFO0000644000372000037200000001456013251473650015301 0ustar travistravis00000000000000Metadata-Version: 1.1 Name: queuelib Version: 1.5.0 Summary: Collection of persistent (disk-based) queues Home-page: https://github.com/scrapy/queuelib Author: Scrapy project Author-email: info@scrapy.org License: BSD Description-Content-Type: UNKNOWN Description: ======== queuelib ======== .. image:: https://secure.travis-ci.org/scrapy/queuelib.png?branch=master :target: http://travis-ci.org/scrapy/queuelib .. image:: https://img.shields.io/codecov/c/github/scrapy/queuelib/master.svg :target: http://codecov.io/github/scrapy/queuelib?branch=master :alt: Coverage report Queuelib is a collection of persistent (disk-based) queues for Python. Queuelib goals are speed and simplicity. It was originally part of the `Scrapy framework`_ and stripped out on its own library. Note: Queuelib isn't thread-safe. Requirements ============ * Python 2.7 or Python 3.3 * no external library requirements Installation ============ You can install Queuelib either via the Python Package Index (PyPI) or from source. To install using pip:: $ pip install queuelib To install using easy_install:: $ easy_install queuelib If you have downloaded a source tarball you can install it by running the following (as root):: # python setup.py install FIFO/LIFO disk queues ===================== Queuelib provides FIFO and LIFO queue implementations. Here is an example usage of the FIFO queue:: >>> from queuelib import FifoDiskQueue >>> q = FifoDiskQueue("queuefile") >>> q.push(b'a') >>> q.push(b'b') >>> q.push(b'c') >>> q.pop() b'a' >>> q.close() >>> q = FifoDiskQueue("queuefile") >>> q.pop() b'b' >>> q.pop() b'c' >>> q.pop() >>> The LIFO queue is identical (API-wise), but importing ``LifoDiskQueue`` instead. PriorityQueue ============= A discrete-priority queue implemented by combining multiple FIFO/LIFO queues (one per priority). First, select the type of queue to be used per priority (FIFO or LIFO):: >>> from queuelib import FifoDiskQueue >>> qfactory = lambda priority: FifoDiskQueue('queue-dir-%s' % priority) Then instantiate the Priority Queue with it:: >>> from queuelib import PriorityQueue >>> pq = PriorityQueue(qfactory) And use it:: >>> pq.push(b'a', 3) >>> pq.push(b'b', 1) >>> pq.push(b'c', 2) >>> pq.push(b'd', 2) >>> pq.pop() b'b' >>> pq.pop() b'c' >>> pq.pop() b'd' >>> pq.pop() b'a' RoundRobinQueue =============== Has nearly the same interface and implementation as a Priority Queue except that each element must be pushed with a (mandatory) key. Popping from the queue cycles through the keys "round robin". Instantiate the Round Robin Queue similarly to the Priority Queue:: >>> from queuelib import RoundRobinQueue >>> rr = RoundRobinQueue(qfactory) And use it:: >>> rr.push(b'a', '1') >>> rr.push(b'b', '1') >>> rr.push(b'c', '2') >>> rr.push(b'd', '2') >>> rr.pop() b'a' >>> rr.pop() b'c' >>> rr.pop() b'b' >>> rr.pop() b'd' Mailing list ============ Use the `scrapy-users`_ mailing list for questions about Queuelib. Bug tracker =========== If you have any suggestions, bug reports or annoyances please report them to our issue tracker at: http://github.com/scrapy/queuelib/issues/ Contributing ============ Development of Queuelib happens at GitHub: http://github.com/scrapy/queuelib You are highly encouraged to participate in the development. If you don't like GitHub (for some reason) you're welcome to send regular patches. All changes require tests to be merged. Tests ===== Tests are located in `queuelib/tests` directory. They can be run using `nosetests`_ with the following command:: nosetests The output should be something like the following:: $ nosetests ............................................................................. ---------------------------------------------------------------------- Ran 77 tests in 0.145s OK License ======= This software is licensed under the BSD License. See the LICENSE file in the top distribution directory for the full license text. Versioning ========== This software follows `Semantic Versioning`_ .. _Scrapy framework: http://scrapy.org .. _scrapy-users: http://groups.google.com/group/scrapy-users .. _Semantic Versioning: http://semver.org/ .. _nosetests: https://nose.readthedocs.org/en/latest/ Platform: Any Classifier: Development Status :: 5 - Production/Stable Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy