pax_global_header00006660000000000000000000000064137521524210014514gustar00rootroot0000000000000052 comment=5ce10afcfcd81afd46a9eae32ba085d8ae900240 persist-queue-0.5.1/000077500000000000000000000000001375215242100143325ustar00rootroot00000000000000persist-queue-0.5.1/.circleci/000077500000000000000000000000001375215242100161655ustar00rootroot00000000000000persist-queue-0.5.1/.circleci/config.yml000066400000000000000000000040121375215242100201520ustar00rootroot00000000000000version: 2 jobs: py27: docker: # Primary container image where all steps run. - image: circleci/python:2.7.15 environment: - TOXENV: py27 steps: &common_steps - checkout - run: command: | sudo pip install tox - run: command: | # tell the operating system to remove the file size limit on core dump files ulimit -c unlimited tox - run: bash <(curl -s https://codecov.io/bash) -cF python - run: command: | mkdir -p /tmp/core_dumps cp core.* /tmp/core_dumps when: on_fail - store_artifacts: # collect core dumps path: /tmp/core_dumps - store_artifacts: path: .coverage - store_artifacts: path: coverage.xml - store_artifacts: path: htmlcov py34: docker: # Primary container image where all steps run. - image: circleci/python:3.4.7 environment: - TOXENV: py34 steps: *common_steps py35: docker: # Primary container image where all steps run. - image: circleci/python:3.5.5 environment: - TOXENV: py35 steps: *common_steps py36: docker: # Primary container image where all steps run. - image: circleci/python:3.6.5 environment: - TOXENV: py36 steps: *common_steps py37: docker: # Primary container image where all steps run. - image: circleci/python:3.7.0 environment: - TOXENV: py37 steps: *common_steps pep8: docker: # Primary container image where all steps run. - image: circleci/python:3.5.4 environment: - TOXENV: pep8 steps: *common_steps cover: docker: # Primary container image where all steps run. - image: circleci/python:3.5.4 environment: - TOXENV: cover steps: *common_steps workflows: version: 2 build: jobs: - pep8 - py27 - py34 - py35 - py36 - py37 - cover persist-queue-0.5.1/.coveragerc000066400000000000000000000001711375215242100164520ustar00rootroot00000000000000[run] branch = True source = persistqueue/* omit = ./tests/* ./.tox/* ./setup.py [xml] output = coverage.xml persist-queue-0.5.1/.gitignore000066400000000000000000000021041375215242100163170ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg .testrepository/ # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject # IDE specific folders .idea/ persist-queue-0.5.1/LICENSE000066400000000000000000000030171375215242100153400ustar00rootroot00000000000000Copyright (c) G. B. Versiani. Copyright (c) Peter Wang. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of python-pqueue nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. persist-queue-0.5.1/MANIFEST.in000066400000000000000000000000611375215242100160650ustar00rootroot00000000000000include LICENSE include README.rst include *.txt persist-queue-0.5.1/README.rst000066400000000000000000000366051375215242100160330ustar00rootroot00000000000000persist-queue - A thread-safe, disk-based queue for Python ========================================================== .. image:: https://img.shields.io/circleci/project/github/peter-wangxu/persist-queue/master.svg?label=Linux%20%26%20Mac :target: https://circleci.com/gh/peter-wangxu/persist-queue .. image:: https://img.shields.io/appveyor/ci/peter-wangxu/persist-queue/master.svg?label=Windows :target: https://ci.appveyor.com/project/peter-wangxu/persist-queue .. image:: https://img.shields.io/codecov/c/github/peter-wangxu/persist-queue/master.svg :target: https://codecov.io/gh/peter-wangxu/persist-queue .. image:: https://img.shields.io/pypi/v/persist-queue.svg :target: https://pypi.python.org/pypi/persist-queue ``persist-queue`` implements a file-based queue and a serial of sqlite3-based queues. The goals is to achieve following requirements: * Disk-based: each queued item should be stored in disk in case of any crash. * Thread-safe: can be used by multi-threaded producers and multi-threaded consumers. * Recoverable: Items can be read after process restart. * Green-compatible: can be used in ``greenlet`` or ``eventlet`` environment. While *queuelib* and *python-pqueue* cannot fulfil all of above. After some try, I found it's hard to achieve based on their current implementation without huge code change. this is the motivation to start this project. By default, *persist-queue* use *pickle* object serialization module to support object instances. Most built-in type, like `int`, `dict`, `list` are able to be persisted by `persist-queue` directly, to support customized objects, please refer to `Pickling and unpickling extension types(Python2) `_ and `Pickling Class Instances(Python3) `_ This project is based on the achievements of `python-pqueue `_ and `queuelib `_ Slack channels ^^^^^^^^^^^^^^ Join `persist-queue `_ channel Requirements ------------ * Python 2.7 or Python 3.x. * Full support for Linux. * Windows support (with `Caution`_ if ``persistqueue.Queue`` is used). Features -------- - Multiple platforms support: Linux, macOS, Windows - Pure python - Both filed based queues and sqlite3 based queues are supported - Filed based queue: multiple serialization protocol support: pickle(default), msgpack, json Installation ------------ from pypi ^^^^^^^^^ .. code-block:: console pip install persist-queue # for msgpack support, use following command pip install persist-queue[extra] from source code ^^^^^^^^^^^^^^^^ .. code-block:: console git clone https://github.com/peter-wangxu/persist-queue cd persist-queue # for msgpack support, run 'pip install -r extra-requirements.txt' first python setup.py install Benchmark --------- Here are the time spent(in seconds) for writing/reading **1000** items to the disk comparing the sqlite3 and file queue. - Windows - OS: Windows 10 - Disk: SATA3 SSD - RAM: 16 GiB +---------------+---------+-------------------------+----------------------------+ | | Write | Write/Read(1 task_done) | Write/Read(many task_done) | +---------------+---------+-------------------------+----------------------------+ | SQLite3 Queue | 1.8880 | 2.0290 | 3.5940 | +---------------+---------+-------------------------+----------------------------+ | File Queue | 4.9520 | 5.0560 | 8.4900 | +---------------+---------+-------------------------+----------------------------+ **windows note** Performance of Windows File Queue has dramatic improvement since `v0.4.1` due to the atomic renaming support(3-4X faster) - Linux - OS: Ubuntu 16.04 (VM) - Disk: SATA3 SSD - RAM: 4 GiB +---------------+--------+-------------------------+----------------------------+ | | Write | Write/Read(1 task_done) | Write/Read(many task_done) | +---------------+--------+-------------------------+----------------------------+ | SQLite3 Queue | 1.8282 | 1.8075 | 2.8639 | +---------------+--------+-------------------------+----------------------------+ | File Queue | 0.9123 | 1.0411 | 2.5104 | +---------------+--------+-------------------------+----------------------------+ - Mac OS - OS: 10.14 (macOS Mojave) - Disk: PCIe SSD - RAM: 16 GiB +---------------+--------+-------------------------+----------------------------+ | | Write | Write/Read(1 task_done) | Write/Read(many task_done) | +---------------+--------+-------------------------+----------------------------+ | SQLite3 Queue | 0.1879 | 0.2115 | 0.3147 | +---------------+--------+-------------------------+----------------------------+ | File Queue | 0.5158 | 0.5357 | 1.0446 | +---------------+--------+-------------------------+----------------------------+ **note** - The value above is in seconds for reading/writing *1000* items, the less the better - Above result was got from: .. code-block:: console python benchmark/run_benchmark.py 1000 To see the real performance on your host, run the script under ``benchmark/run_benchmark.py``: .. code-block:: console python benchmark/run_benchmark.py Examples -------- Example usage with a SQLite3 based queue ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: python >>> import persistqueue >>> q = persistqueue.SQLiteQueue('mypath', auto_commit=True) >>> q.put('str1') >>> q.put('str2') >>> q.put('str3') >>> q.get() 'str1' >>> del q Close the console, and then recreate the queue: .. code-block:: python >>> import persistqueue >>> q = persistqueue.SQLiteQueue('mypath', auto_commit=True) >>> q.get() 'str2' >>> Example usage of SQLite3 based ``UniqueQ`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This queue does not allow duplicate items. .. code-block:: python >>> import persistqueue >>> q = persistqueue.UniqueQ('mypath') >>> q.put('str1') >>> q.put('str1') >>> q.size 1 >>> q.put('str2') >>> q.size 2 >>> Example usage of SQLite3 based ``SQLiteAckQueue``/``UniqueAckQ`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The core functions: - ``get``: get from queue and mark item as unack - ``ack``: mark item as acked - ``nack``: there might be something wrong with current consumer, so mark item as ready and new consumer will get it - ``ack_failed``: there might be something wrong during process, so just mark item as failed. - ``clear_acked_data``: perform a sql delete agaist sqlite, it remove the latest 1000 items whose status is ``AckStatus.acked`` (note: this does not shrink the file size on disk) - ``shrink_disk_usage`` perform a ``VACUUM`` against the sqlite, and rebuild the database file, this usually takes long time and frees a lot of disk space after ``clear_acked_data`` .. code-block:: python >>> import persistqueue >>> ackq = persistqueue.SQLiteAckQueue('path') >>> ackq.put('str1') >>> item = ackq.get() >>> # Do something with the item >>> ackq.ack(item) # If done with the item >>> ackq.nack(item) # Else mark item as `nack` so that it can be proceeded again by any worker >>> ackq.ack_failed(item) # Or else mark item as `ack_failed` to discard this item Note: 1. The SQLiteAckQueue always uses "auto_commit=True". 2. The Queue could be set in non-block style, e.g. "SQLiteAckQueue.get(block=False, timeout=5)". 3. ``UniqueAckQ`` only allows for unique items Example usage with a file based queue ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: python >>> from persistqueue import Queue >>> q = Queue("mypath") >>> q.put('a') >>> q.put('b') >>> q.put('c') >>> q.get() 'a' >>> q.task_done() Close the python console, and then we restart the queue from the same path, .. code-block:: python >>> from persistqueue import Queue >>> q = Queue('mypath') >>> q.get() 'b' >>> q.task_done() Example usage with an auto-saving file based queue ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ *Available since: v0.5.0* By default, items added to the queue are persisted during the ``put()`` call, and items removed from a queue are only persisted when ``task_done()`` is called. .. code-block:: python >>> from persistqueue import Queue >>> q = Queue("mypath") >>> q.put('a') >>> q.put('b') >>> q.get() 'a' >>> q.get() 'b' After exiting and restarting the queue from the same path, we see the items remain in the queue, because ``task_done()`` wasn't called before. .. code-block:: python >>> from persistqueue import Queue >>> q = Queue('mypath') >>> q.get() 'a' >>> q.get() 'b' This can be advantageous. For example, if your program crashes before finishing processing an item, it will remain in the queue after restarting. You can also spread out the ``task_done()`` calls for performance reasons to avoid lots of individual writes. Using ``autosave=True`` on a file based queue will automatically save on every call to ``get()``. Calling ``task_done()`` is not necessary, but may still be used to ``join()`` against the queue. .. code-block:: python >>> from persistqueue import Queue >>> q = Queue("mypath", autosave=True) >>> q.put('a') >>> q.put('b') >>> q.get() 'a' After exiting and restarting the queue from the same path, only the second item remains: .. code-block:: python >>> from persistqueue import Queue >>> q = Queue('mypath', autosave=True) >>> q.get() 'b' Example usage with a SQLite3 based dict ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: python >>> from persisitqueue import PDict >>> q = PDict("testpath", "testname") >>> q['key1'] = 123 >>> q['key2'] = 321 >>> q['key1'] 123 >>> len(q) 2 >>> del q['key1'] >>> q['key1'] Traceback (most recent call last): File "", line 1, in File "persistqueue\pdict.py", line 58, in __getitem__ raise KeyError('Key: {} not exists.'.format(item)) KeyError: 'Key: key1 not exists.' Close the console and restart the PDict .. code-block:: python >>> from persisitqueue import PDict >>> q = PDict("testpath", "testname") >>> q['key2'] 321 Multi-thread usage for **SQLite3** based queue ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: python from persistqueue import FIFOSQLiteQueue q = FIFOSQLiteQueue(path="./test", multithreading=True) def worker(): while True: item = q.get() do_work(item) for i in range(num_worker_threads): t = Thread(target=worker) t.daemon = True t.start() for item in source(): q.put(item) multi-thread usage for **Queue** ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: python from persistqueue import Queue q = Queue() def worker(): while True: item = q.get() do_work(item) q.task_done() for i in range(num_worker_threads): t = Thread(target=worker) t.daemon = True t.start() for item in source(): q.put(item) q.join() # block until all tasks are done **note** Due to the limitation of file queue described in issue `#89 `_, `task_done` in one thread may acknowledge items in other threads which should not be. Considering the `SQLiteAckQueue` if you have such requirement. Serialization via msgpack/json ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - v0.4.1: Currently only available for file based Queue - v0.4.2: Also available for SQLite3 based Queues .. code-block:: python >>> from persistqueue >>> q = persistqueue.Queue('mypath', persistqueue.serializers.msgpack) >>> # via json >>> # q = Queue('mypath', persistqueue.serializers.json) >>> q.get() 'b' >>> q.task_done() Explicit resource reclaim ^^^^^^^^^^^^^^^^^^^^^^^^^ For some reasons, an application may require explicit reclamation for file handles or sql connections before end of execution. In these cases, user can simply call: .. code-block:: python q = Queue() # or q = persistqueue.SQLiteQueue('mypath', auto_commit=True) del q to reclaim related file handles or sql connections. Tips ---- ``task_done`` is required both for file based queue and SQLite3 based queue (when ``auto_commit=False``) to persist the cursor of next ``get`` to the disk. Performance impact ------------------ - **WAL** Starting on v0.3.2, the ``persistqueue`` is leveraging the sqlite3 builtin feature `WAL `_ which can improve the performance significantly, a general testing indicates that ``persistqueue`` is 2-4 times faster than previous version. - **auto_commit=False** Since persistqueue v0.3.0, a new parameter ``auto_commit`` is introduced to tweak the performance for sqlite3 based queues as needed. When specify ``auto_commit=False``, user needs to perform ``queue.task_done()`` to persist the changes made to the disk since last ``task_done`` invocation. - **pickle protocol selection** From v0.3.6, the ``persistqueue`` will select ``Protocol version 2`` for python2 and ``Protocol version 4`` for python3 respectively. This selection only happens when the directory is not present when initializing the queue. Tests ----- *persist-queue* use ``tox`` to trigger tests. - Unit test .. code-block:: console tox -e Available ````: ``py27``, ``py34``, ``py35``, ``py36``, ``py37`` - PEP8 check .. code-block:: console tox -e pep8 `pyenv `_ is usually a helpful tool to manage multiple versions of Python. Caution ------- Currently, the atomic operation is supported on Windows while still in experimental, That's saying, the data in ``persistqueue.Queue`` could be in unreadable state when an incidental failure occurs during ``Queue.task_done``. **DO NOT put any critical data on persistqueue.queue on Windows**. Contribution ------------ Simply fork this repo and send PR for your code change(also tests to cover your change), remember to give a title and description of your PR. I am willing to enhance this project with you :). License ------- `BSD `_ Contributors ------------ `Contributors `_ FAQ --- * ``sqlite3.OperationalError: database is locked`` is raised. persistqueue open 2 connections for the db if ``multithreading=True``, the SQLite database is locked until that transaction is committed. The ``timeout`` parameter specifies how long the connection should wait for the lock to go away until raising an exception. Default time is **10**, increase ``timeout`` when creating the queue if above error occurs. * sqlite3 based queues are not thread-safe. The sqlite3 queues are heavily tested under multi-threading environment, if you find it's not thread-safe, please make sure you set the ``multithreading=True`` when initializing the queue before submitting new issue:). persist-queue-0.5.1/appveyor.yml000066400000000000000000000031021375215242100167160ustar00rootroot00000000000000environment: matrix: # For Python versions available on Appveyor, see # http://www.appveyor.com/docs/installed-software#python # The list here is complete (excluding Python 2.6, which # isn't covered by this document) at the time of writing. - TOXENV: "pep8" PYTHON: "C:\\Python27-x64" DISTUTILS_USE_SDK: "1" - TOXENV: "py27" PYTHON: "C:\\Python27-x64" DISTUTILS_USE_SDK: "1" - TOXENV: "py34" PYTHON: "C:\\Python34-x64" DISTUTILS_USE_SDK: "1" - TOXENV: "py35" PYTHON: "C:\\Python35-x64" DISTUTILS_USE_SDK: "1" - TOXENV: "py36" PYTHON: "C:\\Python36-x64" DISTUTILS_USE_SDK: "1" - TOXENV: "py37" PYTHON: "C:\\Python37-x64" DISTUTILS_USE_SDK: "1" - TOXENV: "cover" PYTHON: "C:\\Python36-x64" DISTUTILS_USE_SDK: "1" install: # We need wheel installed to build wheels - "%PYTHON%\\python.exe -m pip install tox" build: off test_script: # Put your test command here. # If you don't need to build C extensions on 64-bit Python 3.3 or 3.4, # you can remove "build.cmd" from the front of the command, as it's # only needed to support those cases. # Note that you must use the environment variable %PYTHON% to refer to # the interpreter you're using - Appveyor does not do anything special # to put the Python evrsion you want to use on PATH. - "%PYTHON%\\Scripts\\tox.exe" #on_success: # You can use this step to upload your artifacts to a public website. # See Appveyor's documentation for more details. Or you can simply # access your wheels from the Appveyor "artifacts" tab for your build. persist-queue-0.5.1/benchmark/000077500000000000000000000000001375215242100162645ustar00rootroot00000000000000persist-queue-0.5.1/benchmark/run_benchmark.py000066400000000000000000000102701375215242100214540ustar00rootroot00000000000000"""This file provides tests to benchmark performance sqlite/file queue on specific hardware. User can easily evaluate the performance by running this file directly via `python run_benchmark.py` """ from persistqueue import SQLiteQueue from persistqueue import Queue import tempfile import time BENCHMARK_COUNT = 100 def time_it(func): def _exec(*args, **kwargs): start = time.time() func(*args, **kwargs) end = time.time() print( "\t{} => time used: {:.4f} seconds.".format( func.__doc__, (end - start))) return _exec class FileQueueBench(object): """Benchmark File queue performance.""" def __init__(self, prefix=None): self.path = prefix @time_it def benchmark_file_write(self): """Writing items.""" self.path = tempfile.mkdtemp('b_file_10000') q = Queue(self.path) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) assert q.qsize() == BENCHMARK_COUNT @time_it def benchmark_file_read_write_false(self): """Writing and reading items(1 task_done).""" self.path = tempfile.mkdtemp('b_file_10000') q = Queue(self.path) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) for i in range(BENCHMARK_COUNT): q.get() q.task_done() assert q.qsize() == 0 @time_it def benchmark_file_read_write_autosave(self): """Writing and reading items(autosave).""" self.path = tempfile.mkdtemp('b_file_10000') q = Queue(self.path, autosave=True) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) for i in range(BENCHMARK_COUNT): q.get() assert q.qsize() == 0 @time_it def benchmark_file_read_write_true(self): """Writing and reading items(many task_done).""" self.path = tempfile.mkdtemp('b_file_10000') q = Queue(self.path) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) for i in range(BENCHMARK_COUNT): q.get() q.task_done() assert q.qsize() == 0 @classmethod def run(cls): print(cls.__doc__) ins = cls() for name in sorted(cls.__dict__): if name.startswith('benchmark'): func = getattr(ins, name) func() class Sqlite3QueueBench(object): """Benchmark Sqlite3 queue performance.""" @time_it def benchmark_sqlite_write(self): """Writing items.""" self.path = tempfile.mkdtemp('b_sql_10000') q = SQLiteQueue(self.path, auto_commit=False) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) assert q.qsize() == BENCHMARK_COUNT @time_it def benchmark_sqlite_read_write_false(self): """Writing and reading items(1 task_done).""" self.path = tempfile.mkdtemp('b_sql_10000') q = SQLiteQueue(self.path, auto_commit=False) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) for i in range(BENCHMARK_COUNT): q.get() q.task_done() assert q.qsize() == 0 @time_it def benchmark_sqlite_read_write_true(self): """Writing and reading items(many task_done).""" self.path = tempfile.mkdtemp('b_sql_10000') q = SQLiteQueue(self.path, auto_commit=True) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) for i in range(BENCHMARK_COUNT): q.get() q.task_done() assert q.qsize() == 0 @classmethod def run(cls): print(cls.__doc__) ins = cls() for name in sorted(cls.__dict__): if name.startswith('benchmark'): func = getattr(ins, name) func() if __name__ == '__main__': import sys if len(sys.argv) > 1: BENCHMARK_COUNT = int(sys.argv[1]) print(" = {}".format(BENCHMARK_COUNT)) file_bench = FileQueueBench() file_bench.run() sql_bench = Sqlite3QueueBench() sql_bench.run() persist-queue-0.5.1/extra-requirements.txt000066400000000000000000000000161375215242100207340ustar00rootroot00000000000000msgpack>=0.5.6persist-queue-0.5.1/persistqueue/000077500000000000000000000000001375215242100170705ustar00rootroot00000000000000persist-queue-0.5.1/persistqueue/__init__.py000066400000000000000000000013261375215242100212030ustar00rootroot00000000000000# coding=utf-8 __author__ = 'Peter Wang' __license__ = 'BSD' __version__ = '0.5.1' from .exceptions import Empty, Full # noqa from .queue import Queue # noqa try: from .pdict import PDict # noqa from .sqlqueue import SQLiteQueue, FIFOSQLiteQueue, FILOSQLiteQueue, \ UniqueQ # noqa from .sqlackqueue import SQLiteAckQueue, UniqueAckQ except ImportError: import logging log = logging.getLogger(__name__) log.info("No sqlite3 module found, sqlite3 based queues are not available") __all__ = ["Queue", "SQLiteQueue", "FIFOSQLiteQueue", "FILOSQLiteQueue", "UniqueQ", "PDict", "SQLiteAckQueue", "UniqueAckQ", "Empty", "Full", "__author__", "__license__", "__version__"] persist-queue-0.5.1/persistqueue/common.py000066400000000000000000000005011375215242100207260ustar00rootroot00000000000000#! coding = utf-8 import logging import pickle log = logging.getLogger(__name__) def select_pickle_protocol(): if pickle.HIGHEST_PROTOCOL <= 2: r = 2 # python2 use fixed 2 else: r = 4 # python3 use fixed 4 log.info("Selected pickle protocol: '{}'".format(r)) return r persist-queue-0.5.1/persistqueue/exceptions.py000066400000000000000000000001271375215242100216230ustar00rootroot00000000000000#! coding = utf-8 class Empty(Exception): pass class Full(Exception): pass persist-queue-0.5.1/persistqueue/pdict.py000066400000000000000000000037611375215242100205540ustar00rootroot00000000000000#! coding = utf-8 import logging import sqlite3 from persistqueue import sqlbase log = logging.getLogger(__name__) class PDict(sqlbase.SQLiteBase, dict): _TABLE_NAME = 'dict' _KEY_COLUMN = 'key' _SQL_CREATE = ('CREATE TABLE IF NOT EXISTS {table_name} (' '{key_column} TEXT PRIMARY KEY, data BLOB)') _SQL_INSERT = 'INSERT INTO {table_name} (key, data) VALUES (?, ?)' _SQL_SELECT = ('SELECT {key_column}, data FROM {table_name} ' 'WHERE {key_column} = ?') _SQL_UPDATE = 'UPDATE {table_name} SET data = ? WHERE {key_column} = ?' def __init__(self, path, name, multithreading=False): # PDict is always auto_commit=True super(PDict, self).__init__(path, name=name, multithreading=multithreading, auto_commit=True) def __iter__(self): raise NotImplementedError('Not supported.') def keys(self): raise NotImplementedError('Not supported.') def iterkeys(self): raise NotImplementedError('Not supported.') def values(self): raise NotImplementedError('Not supported.') def itervalues(self): raise NotImplementedError('Not supported.') def iteritems(self): raise NotImplementedError('Not supported.') def items(self): raise NotImplementedError('Not supported.') def __contains__(self, item): row = self._select(item) return row is not None def __setitem__(self, key, value): obj = self._serializer.dumps(value) try: self._insert_into(key, obj) except sqlite3.IntegrityError: self._update(key, obj) def __getitem__(self, item): row = self._select(item) if row: return self._serializer.loads(row[1]) else: raise KeyError('Key: {} not exists.'.format(item)) def __delitem__(self, key): self._delete(key) def __len__(self): return self._count() persist-queue-0.5.1/persistqueue/queue.py000066400000000000000000000252401375215242100205710ustar00rootroot00000000000000# coding=utf-8 """A thread-safe disk based persistent queue in Python.""" import logging import os import tempfile import threading from time import time as _time import persistqueue.serializers.pickle from persistqueue.exceptions import Empty, Full log = logging.getLogger(__name__) def _truncate(fn, length): with open(fn, 'ab+') as f: f.truncate(length) def atomic_rename(src, dst): try: os.replace(src, dst) except AttributeError: # python < 3.3 import sys if sys.platform == 'win32': import ctypes if sys.version_info[0] == 2: _str = unicode # noqa _bytes = str else: _str = str _bytes = bytes if isinstance(src, _str) and isinstance(dst, _str): MoveFileEx = ctypes.windll.kernel32.MoveFileExW elif isinstance(src, _bytes) and isinstance(dst, _bytes): MoveFileEx = ctypes.windll.kernel32.MoveFileExA else: raise ValueError("Both args must be bytes or unicode.") MOVEFILE_REPLACE_EXISTING = 0x1 if not MoveFileEx(src, dst, MOVEFILE_REPLACE_EXISTING): errno = ctypes.GetLastError() strerror = os.strerror(errno) raise WindowsError(errno, strerror) else: os.rename(src, dst) class Queue(object): def __init__(self, path, maxsize=0, chunksize=100, tempdir=None, serializer=persistqueue.serializers.pickle, autosave=False): """Create a persistent queue object on a given path. The argument path indicates a directory where enqueued data should be persisted. If the directory doesn't exist, one will be created. If maxsize is <= 0, the queue size is infinite. The optional argument chunksize indicates how many entries should exist in each chunk file on disk. The tempdir parameter indicates where temporary files should be stored. The tempdir has to be located on the same disk as the enqueued data in order to obtain atomic operations. The serializer parameter controls how enqueued data is serialized. It must have methods dump(value, fp) and load(fp). The dump method must serialize value and write it to fp, and may be called for multiple values with the same fp. The load method must deserialize and return one value from fp, and may be called multiple times with the same fp to read multiple values. The autosave parameter controls when data removed from the queue is persisted. By default (disabled), the change is only persisted when task_done() is called. If autosave is enabled, data is persisted immediately when get() is called. Adding data to the queue with put() will always persist immediately regardless of this setting. """ log.debug('Initializing File based Queue with path {}'.format(path)) self.path = path self.chunksize = chunksize self.tempdir = tempdir self.maxsize = maxsize self.serializer = serializer self.autosave = autosave self._init(maxsize) if self.tempdir: if os.stat(self.path).st_dev != os.stat(self.tempdir).st_dev: raise ValueError("tempdir has to be located " "on same path filesystem") else: _, tempdir = tempfile.mkstemp() if os.stat(self.path).st_dev != os.stat(tempdir).st_dev: self.tempdir = self.path log.warning("Default tempdir '%(dft_dir)s' is not on the " "same filesystem with queue path '%(queue_path)s'" ",defaulting to '%(new_path)s'." % { "dft_dir": tempdir, "queue_path": self.path, "new_path": self.tempdir}) self.info = self._loadinfo() # truncate head case it contains garbage hnum, hcnt, hoffset = self.info['head'] headfn = self._qfile(hnum) if os.path.exists(headfn): if hoffset < os.path.getsize(headfn): _truncate(headfn, hoffset) # let the head file open self.headf = self._openchunk(hnum, 'ab+') # let the tail file open tnum, _, toffset = self.info['tail'] self.tailf = self._openchunk(tnum) self.tailf.seek(toffset) # update unfinished tasks with the current number of enqueued tasks self.unfinished_tasks = self.info['size'] # optimize info file updates self.update_info = True def _init(self, maxsize): self.mutex = threading.Lock() self.not_empty = threading.Condition(self.mutex) self.not_full = threading.Condition(self.mutex) self.all_tasks_done = threading.Condition(self.mutex) if not os.path.exists(self.path): os.makedirs(self.path) def join(self): with self.all_tasks_done: while self.unfinished_tasks: self.all_tasks_done.wait() def qsize(self): n = None with self.mutex: n = self._qsize() return n def _qsize(self): return self.info['size'] def empty(self): return self.qsize() == 0 def put(self, item, block=True, timeout=None): "Interface for putting item in disk-based queue." self.not_full.acquire() try: if self.maxsize > 0: if not block: if self._qsize() == self.maxsize: raise Full elif timeout is None: while self._qsize() == self.maxsize: self.not_full.wait() elif timeout < 0: raise ValueError("'timeout' must be a non-negative number") else: endtime = _time() + timeout while self._qsize() == self.maxsize: remaining = endtime - _time() if remaining <= 0.0: raise Full self.not_full.wait(remaining) self._put(item) self.unfinished_tasks += 1 self.not_empty.notify() finally: self.not_full.release() def _put(self, item): self.serializer.dump(item, self.headf) self.headf.flush() hnum, hpos, _ = self.info['head'] hpos += 1 if hpos == self.info['chunksize']: hpos = 0 hnum += 1 # make sure data is written to disk whatever # its underlying file system os.fsync(self.headf.fileno()) self.headf.close() self.headf = self._openchunk(hnum, 'ab+') self.info['size'] += 1 self.info['head'] = [hnum, hpos, self.headf.tell()] self._saveinfo() def put_nowait(self, item): return self.put(item, False) def get(self, block=True, timeout=None): self.not_empty.acquire() try: if not block: if not self._qsize(): raise Empty elif timeout is None: while not self._qsize(): self.not_empty.wait() elif timeout < 0: raise ValueError("'timeout' must be a non-negative number") else: endtime = _time() + timeout while not self._qsize(): remaining = endtime - _time() if remaining <= 0.0: raise Empty self.not_empty.wait(remaining) item = self._get() self.not_full.notify() return item finally: self.not_empty.release() def get_nowait(self): return self.get(False) def _get(self): tnum, tcnt, toffset = self.info['tail'] hnum, hcnt, _ = self.info['head'] if [tnum, tcnt] >= [hnum, hcnt]: return None data = self.serializer.load(self.tailf) toffset = self.tailf.tell() tcnt += 1 if tcnt == self.info['chunksize'] and tnum <= hnum: tcnt = toffset = 0 tnum += 1 self.tailf.close() self.tailf = self._openchunk(tnum) self.info['size'] -= 1 self.info['tail'] = [tnum, tcnt, toffset] if self.autosave: self._saveinfo() self.update_info = False else: self.update_info = True return data def task_done(self): with self.all_tasks_done: unfinished = self.unfinished_tasks - 1 if unfinished <= 0: if unfinished < 0: raise ValueError("task_done() called too many times.") self.all_tasks_done.notify_all() self.unfinished_tasks = unfinished self._task_done() def _task_done(self): if self.autosave: return if self.update_info: self._saveinfo() self.update_info = False def _openchunk(self, number, mode='rb'): return open(self._qfile(number), mode) def _loadinfo(self): infopath = self._infopath() if os.path.exists(infopath): with open(infopath, 'rb') as f: info = self.serializer.load(f) else: info = { 'chunksize': self.chunksize, 'size': 0, 'tail': [0, 0, 0], 'head': [0, 0, 0], } return info def _gettempfile(self): if self.tempdir: return tempfile.mkstemp(dir=self.tempdir) else: return tempfile.mkstemp() def _saveinfo(self): tmpfd, tmpfn = self._gettempfile() with os.fdopen(tmpfd, "wb") as tmpfo: self.serializer.dump(self.info, tmpfo) atomic_rename(tmpfn, self._infopath()) self._clear_tail_file() def _clear_tail_file(self): """Remove the tail files whose items were already get.""" tnum, _, _ = self.info['tail'] while tnum >= 1: tnum -= 1 path = self._qfile(tnum) if os.path.exists(path): os.remove(path) else: break def _qfile(self, number): return os.path.join(self.path, 'q%05d' % number) def _infopath(self): return os.path.join(self.path, 'info') def __del__(self): """Handles the removal of queue.""" for to_close in [self.headf, self.tailf]: if to_close and not to_close.closed: to_close.close() persist-queue-0.5.1/persistqueue/serializers/000077500000000000000000000000001375215242100214245ustar00rootroot00000000000000persist-queue-0.5.1/persistqueue/serializers/__init__.py000066400000000000000000000000221375215242100235270ustar00rootroot00000000000000#! coding = utf-8 persist-queue-0.5.1/persistqueue/serializers/json.py000066400000000000000000000013731375215242100227530ustar00rootroot00000000000000#! coding = utf-8 """ A serializer that extends json to use bytes and uses newlines to store multiple objects per file. """ from __future__ import absolute_import import json def dump(value, fp, sort_keys=False): "Serialize value as json line to a byte-mode file object" fp.write(json.dumps(value, sort_keys=sort_keys).encode()) fp.write(b"\n") def dumps(value, sort_keys=False): "Serialize value as json to bytes" return json.dumps(value, sort_keys=sort_keys).encode() def load(fp): "Deserialize one json line from a byte-mode file object" return json.loads(fp.readline().decode()) def loads(bytes_value): "Deserialize one json value from bytes" return json.loads(bytes_value.decode()) persist-queue-0.5.1/persistqueue/serializers/msgpack.py000066400000000000000000000022451375215242100234260ustar00rootroot00000000000000#! coding = utf-8 """ A serializer that extends msgpack to specify recommended parameters and adds a 4 byte length prefix to store multiple objects per file. """ from __future__ import absolute_import import msgpack import struct def dump(value, fp, sort_keys=False): "Serialize value as msgpack to a byte-mode file object" if sort_keys and isinstance(value, dict): value = {key: value[key] for key in sorted(value)} packed = msgpack.packb(value, use_bin_type=True) length = struct.pack(" 0: q.get_nowait() q.task_done() n -= 1 else: with self.assertRaises(Empty): q.get_nowait() else: q.put('var%d' % random.getrandbits(16)) n += 1 @params(*serializer_params) def test_multi_threaded(self, serializer): """Create consumer and producer threads, check parallelism""" q = Queue(self.path, **serializer_params[serializer]) def producer(): for i in range(1000): q.put('var%d' % i) def consumer(): for i in range(1000): q.get() q.task_done() c = Thread(target=consumer) c.start() p = Thread(target=producer) p.start() c.join() p.join() q.join() with self.assertRaises(Empty): q.get_nowait() @params(*serializer_params) def test_garbage_on_head(self, serializer): """Adds garbage to the queue head and let the internal integrity checks fix it""" q = Queue(self.path, **serializer_params[serializer]) q.put('var1') del q with open(os.path.join(self.path, 'q00000'), 'ab') as f: f.write(b'garbage') q = Queue(self.path, **serializer_params[serializer]) q.put('var2') self.assertEqual(2, q.qsize()) self.assertEqual('var1', q.get()) q.task_done() @params(*serializer_params) def test_task_done_too_many_times(self, serializer): """Test too many task_done called.""" q = Queue(self.path, **serializer_params[serializer]) q.put('var1') q.get() q.task_done() with self.assertRaises(ValueError): q.task_done() @params(*serializer_params) def test_get_timeout_negative(self, serializer): q = Queue(self.path, **serializer_params[serializer]) q.put('var1') with self.assertRaises(ValueError): q.get(timeout=-1) @params(*serializer_params) def test_get_timeout(self, serializer): """Test when get failed within timeout.""" q = Queue(self.path, **serializer_params[serializer]) q.put('var1') q.get() with self.assertRaises(Empty): q.get(timeout=1) @params(*serializer_params) def test_put_nowait(self, serializer): """Tests the put_nowait interface.""" q = Queue(self.path, **serializer_params[serializer]) q.put_nowait('var1') self.assertEqual('var1', q.get()) q.task_done() @params(*serializer_params) def test_put_maxsize_reached(self, serializer): """Test that maxsize reached.""" q = Queue(self.path, maxsize=10, **serializer_params[serializer]) for x in range(10): q.put(x) with self.assertRaises(Full): q.put('full_now', block=False) @params(*serializer_params) def test_put_timeout_reached(self, serializer): """Test put with block and timeout.""" q = Queue(self.path, maxsize=2, **serializer_params[serializer]) for x in range(2): q.put(x) with self.assertRaises(Full): q.put('full_and_timeout', block=True, timeout=1) @params(*serializer_params) def test_put_timeout_negative(self, serializer): """Test and put with timeout < 0""" q = Queue(self.path, maxsize=1, **serializer_params[serializer]) with self.assertRaises(ValueError): q.put('var1', timeout=-1) @params(*serializer_params) def test_put_block_and_wait(self, serializer): """Test block until queue is not full.""" q = Queue(self.path, maxsize=10, **serializer_params[serializer]) def consumer(): for i in range(5): q.get() q.task_done() def producer(): for j in range(16): q.put('var%d' % j) p = Thread(target=producer) p.start() c = Thread(target=consumer) c.start() c.join() val = q.get_nowait() p.join() self.assertEqual('var5', val) @params(*serializer_params) def test_clear_tail_file(self, serializer): """Test that only remove tail file when calling task_done.""" q = Queue(self.path, chunksize=10, **serializer_params[serializer]) for i in range(35): q.put('var%d' % i) for _ in range(15): q.get() q = Queue(self.path, chunksize=10, **serializer_params[serializer]) self.assertEqual(q.qsize(), 35) for _ in range(15): q.get() # the first tail file gets removed after task_done q.task_done() for _ in range(16): q.get() # the second and third files get removed after task_done q.task_done() self.assertEqual(q.qsize(), 4) def test_protocol(self): # test that protocol is set properly expect_protocol = 2 if sys.version_info[0] == 2 else 4 self.assertEqual( serializers_pickle.protocol, expect_protocol, ) # test that protocol is used properly serializer = namedtuple("Serializer", ["dump", "load"])( serializers_pickle.dump, lambda fp: fp.read()) q = Queue(path=self.path, serializer=serializer) q.put(b'a') self.assertEqual(q.get(), pickle.dumps(b'a', protocol=expect_protocol)) @params(*serializer_params) def test_del(self, serializer): """test that __del__ can be called successfully""" q = Queue(self.path, **serializer_params[serializer]) q.__del__() self.assertTrue(q.headf.closed) self.assertTrue(q.tailf.closed) @params(*serializer_params) def test_autosave_get(self, serializer): """test the autosave feature saves on get()""" q = Queue(self.path, autosave=True, **serializer_params[serializer]) q.put('var1') q.put('var2') self.assertEqual('var1', q.get()) del q # queue should save on get(), only one item should remain q = Queue(self.path, autosave=True, **serializer_params[serializer]) self.assertEqual(1, q.qsize()) self.assertEqual('var2', q.get()) del q @params(*serializer_params) def test_autosave_join(self, serializer): """Enabling autosave should still allow task_done/join behavior""" q = Queue(self.path, autosave=True, **serializer_params[serializer]) for i in range(10): q.put('var%d' % i) def consumer(): for i in range(10): q.get() # this should still 'count down' properly and allow q.join() # to finish q.task_done() c = Thread(target=consumer) c.start() q.join() with self.assertRaises(Empty): q.get_nowait() persist-queue-0.5.1/persistqueue/tests/test_sqlackqueue.py000066400000000000000000000261211375215242100241700ustar00rootroot00000000000000# coding=utf-8 import random import shutil import sys import tempfile import unittest from threading import Thread from persistqueue.sqlackqueue import ( SQLiteAckQueue, FILOSQLiteAckQueue, UniqueAckQ) from persistqueue import Empty class SQLite3AckQueueTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='sqlackqueue') self.auto_commit = True def tearDown(self): shutil.rmtree(self.path, ignore_errors=True) def test_raise_empty(self): q = SQLiteAckQueue(self.path, auto_commit=self.auto_commit) q.put('first') d = q.get() self.assertEqual('first', d) self.assertRaises(Empty, q.get, block=False) # assert with timeout self.assertRaises(Empty, q.get, block=True, timeout=1.0) # assert with negative timeout self.assertRaises(ValueError, q.get, block=True, timeout=-1.0) def test_empty(self): q = SQLiteAckQueue(self.path, auto_commit=self.auto_commit) self.assertEqual(q.empty(), True) q.put('first') self.assertEqual(q.empty(), False) q.get() self.assertEqual(q.empty(), True) def test_open_close_single(self): """Write 1 item, close, reopen checking if same item is there""" q = SQLiteAckQueue(self.path, auto_commit=self.auto_commit) q.put(b'var1') del q q = SQLiteAckQueue(self.path) self.assertEqual(1, q.qsize()) self.assertEqual(b'var1', q.get()) def test_open_close_1000(self): """Write 1000 items, close, reopen checking if all items are there""" q = SQLiteAckQueue(self.path, auto_commit=self.auto_commit) for i in range(1000): q.put('var%d' % i) self.assertEqual(1000, q.qsize()) del q q = SQLiteAckQueue(self.path) self.assertEqual(1000, q.qsize()) for i in range(1000): data = q.get() self.assertEqual('var%d' % i, data) # assert adding another one still works q.put('foobar') data = q.get() self.assertEqual('foobar', data) def test_random_read_write(self): """Test random read/write""" q = SQLiteAckQueue(self.path, auto_commit=self.auto_commit) n = 0 for _ in range(1000): if random.random() < 0.5: if n > 0: q.get() n -= 1 else: self.assertRaises(Empty, q.get, block=False) else: q.put('var%d' % random.getrandbits(16)) n += 1 def test_multi_threaded_parallel(self): """Create consumer and producer threads, check parallelism""" # self.skipTest("Not supported multi-thread.") m_queue = SQLiteAckQueue( path=self.path, multithreading=True, auto_commit=self.auto_commit ) def producer(): for i in range(1000): m_queue.put('var%d' % i) def consumer(): for i in range(1000): x = m_queue.get(block=True) self.assertEqual('var%d' % i, x) c = Thread(target=consumer) c.start() p = Thread(target=producer) p.start() p.join() c.join() self.assertEqual(0, m_queue.size) self.assertEqual(0, len(m_queue)) self.assertRaises(Empty, m_queue.get, block=False) def test_multi_threaded_multi_producer(self): """Test sqlqueue can be used by multiple producers.""" queue = SQLiteAckQueue( path=self.path, multithreading=True, auto_commit=self.auto_commit ) def producer(seq): for i in range(10): queue.put('var%d' % (i + (seq * 10))) def consumer(): for _ in range(100): data = queue.get(block=True) self.assertTrue('var' in data) c = Thread(target=consumer) c.start() producers = [] for seq in range(10): t = Thread(target=producer, args=(seq,)) t.start() producers.append(t) for t in producers: t.join() c.join() def test_multiple_consumers(self): """Test sqlqueue can be used by multiple consumers.""" queue = SQLiteAckQueue( path=self.path, multithreading=True, auto_commit=self.auto_commit ) def producer(): for x in range(1000): queue.put('var%d' % x) counter = [] # Set all to 0 for _ in range(1000): counter.append(0) def consumer(index): for i in range(200): data = queue.get(block=True) self.assertTrue('var' in data) counter[index * 200 + i] = data p = Thread(target=producer) p.start() consumers = [] for index in range(5): t = Thread(target=consumer, args=(index,)) t.start() consumers.append(t) p.join() for t in consumers: t.join() self.assertEqual(0, queue.qsize()) for x in range(1000): self.assertNotEqual(0, counter[x], "not 0 for counter's index %s" % x) def test_protocol_1(self): shutil.rmtree(self.path, ignore_errors=True) q = SQLiteAckQueue(path=self.path) self.assertEqual(q._serializer.protocol, 2 if sys.version_info[0] == 2 else 4) def test_protocol_2(self): q = SQLiteAckQueue(path=self.path) self.assertEqual(q._serializer.protocol, 2 if sys.version_info[0] == 2 else 4) def test_ack_and_clear(self): q = SQLiteAckQueue(path=self.path) q._MAX_ACKED_LENGTH = 10 ret_list = [] for _ in range(100): q.put("val%s" % _) for _ in range(100): ret_list.append(q.get()) for ret in ret_list: q.ack(ret) self.assertEqual(q.acked_count(), 100) q.clear_acked_data() self.assertEqual(q.acked_count(), 10) q.shrink_disk_usage() def test_ack_unknown_item(self): q = SQLiteAckQueue(path=self.path) q.put("val1") val1 = q.get() q.ack("val2") q.nack("val3") q.ack_failed("val4") self.assertEqual(q.qsize(), 0) self.assertEqual(q.unack_count(), 1) q.ack(val1) self.assertEqual(q.unack_count(), 0) def test_resume_unack(self): q = SQLiteAckQueue(path=self.path) q.put("val1") val1 = q.get() self.assertEqual(q.qsize(), 0) self.assertEqual(q.unack_count(), 1) self.assertEqual(q.ready_count(), 0) del q q = SQLiteAckQueue(path=self.path, auto_resume=False) self.assertEqual(q.qsize(), 0) self.assertEqual(q.unack_count(), 1) self.assertEqual(q.ready_count(), 0) q.resume_unack_tasks() self.assertEqual(q.qsize(), 0) self.assertEqual(q.unack_count(), 0) self.assertEqual(q.ready_count(), 1) self.assertEqual(val1, q.get()) del q q = SQLiteAckQueue(path=self.path, auto_resume=True) self.assertEqual(q.qsize(), 0) self.assertEqual(q.unack_count(), 0) self.assertEqual(q.ready_count(), 1) self.assertEqual(val1, q.get()) def test_ack_unack_ack_failed(self): q = SQLiteAckQueue(path=self.path) q.put("val1") q.put("val2") q.put("val3") val1 = q.get() val2 = q.get() val3 = q.get() # qsize should be zero when all item is getted from q self.assertEqual(q.qsize(), 0) self.assertEqual(q.unack_count(), 3) # nack will let the item requeued as ready status q.nack(val1) self.assertEqual(q.qsize(), 1) self.assertEqual(q.ready_count(), 1) # ack failed is just mark item as ack failed q.ack_failed(val3) self.assertEqual(q.ack_failed_count(), 1) # ack should not effect qsize q.ack(val2) self.assertEqual(q.acked_count(), 1) self.assertEqual(q.qsize(), 1) # all ack* related action will reduce unack count self.assertEqual(q.unack_count(), 0) # reget the nacked item ready_val = q.get() self.assertEqual(ready_val, val1) q.ack(ready_val) self.assertEqual(q.qsize(), 0) self.assertEqual(q.acked_count(), 2) self.assertEqual(q.ready_count(), 0) def test_put_0(self): q = SQLiteAckQueue(path=self.path) q.put(0) d = q.get(block=False) self.assertIsNotNone(d) class SQLite3QueueInMemory(SQLite3AckQueueTest): def setUp(self): self.path = ":memory:" self.auto_commit = True def test_open_close_1000(self): self.skipTest('Memory based sqlite is not persistent.') def test_open_close_single(self): self.skipTest('Memory based sqlite is not persistent.') def test_multiple_consumers(self): self.skipTest('Skipped due to occasional crash during ' 'multithreading mode.') def test_multi_threaded_multi_producer(self): self.skipTest('Skipped due to occasional crash during ' 'multithreading mode.') def test_multi_threaded_parallel(self): self.skipTest('Skipped due to occasional crash during ' 'multithreading mode.') def test_task_done_with_restart(self): self.skipTest('Skipped due to not persistent.') def test_protocol_2(self): self.skipTest('In memory queue is always new.') def test_resume_unack(self): self.skipTest('Memory based sqlite is not persistent.') class FILOSQLite3AckQueueTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='filo_sqlackqueue') self.auto_commit = True def tearDown(self): shutil.rmtree(self.path, ignore_errors=True) def test_open_close_1000(self): """Write 1000 items, close, reopen checking if all items are there""" q = FILOSQLiteAckQueue(self.path, auto_commit=self.auto_commit) for i in range(1000): q.put('var%d' % i) self.assertEqual(1000, q.qsize()) del q q = FILOSQLiteAckQueue(self.path) self.assertEqual(1000, q.qsize()) for i in range(1000): data = q.get() self.assertEqual('var%d' % (999 - i), data) # assert adding another one still works q.put('foobar') data = q.get() self.assertEqual('foobar', data) class SQLite3UniqueAckQueueTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='sqlackqueue') self.auto_commit = True def test_add_duplicate_item(self): q = UniqueAckQ(self.path) q.put(1111) self.assertEqual(1, q.size) # put duplicate item q.put(1111) self.assertEqual(1, q.size) q.put(2222) self.assertEqual(2, q.size) del q q = UniqueAckQ(self.path) self.assertEqual(2, q.size) persist-queue-0.5.1/persistqueue/tests/test_sqlqueue.py000066400000000000000000000313271375215242100235150ustar00rootroot00000000000000# coding=utf-8 import random import shutil import sys import tempfile import unittest from threading import Thread from persistqueue import SQLiteQueue, FILOSQLiteQueue, UniqueQ from persistqueue import Empty from persistqueue.serializers import json as serializers_json from persistqueue.serializers import pickle as serializers_pickle from persistqueue.serializers import msgpack as serializers_msgpack class SQLite3QueueTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='sqlqueue') self.auto_commit = True def tearDown(self): shutil.rmtree(self.path, ignore_errors=True) def test_raise_empty(self): q = SQLiteQueue(self.path, auto_commit=self.auto_commit) q.put('first') d = q.get() self.assertEqual('first', d) self.assertRaises(Empty, q.get, block=False) # assert with timeout self.assertRaises(Empty, q.get, block=True, timeout=1.0) # assert with negative timeout self.assertRaises(ValueError, q.get, block=True, timeout=-1.0) del q def test_empty(self): q = SQLiteQueue(self.path, auto_commit=self.auto_commit) self.assertEqual(q.empty(), True) q.put('first') self.assertEqual(q.empty(), False) q.get() self.assertEqual(q.empty(), True) def test_open_close_single(self): """Write 1 item, close, reopen checking if same item is there""" q = SQLiteQueue(self.path, auto_commit=self.auto_commit) q.put(b'var1') del q q = SQLiteQueue(self.path) self.assertEqual(1, q.qsize()) self.assertEqual(b'var1', q.get()) def test_open_close_1000(self): """Write 1000 items, close, reopen checking if all items are there""" q = SQLiteQueue(self.path, auto_commit=self.auto_commit) for i in range(1000): q.put('var%d' % i) self.assertEqual(1000, q.qsize()) del q q = SQLiteQueue(self.path) self.assertEqual(1000, q.qsize()) for i in range(1000): data = q.get() self.assertEqual('var%d' % i, data) # assert adding another one still works q.put('foobar') data = q.get() self.assertEqual('foobar', data) def test_random_read_write(self): """Test random read/write""" q = SQLiteQueue(self.path, auto_commit=self.auto_commit) n = 0 for _ in range(1000): if random.random() < 0.5: if n > 0: q.get() n -= 1 else: self.assertRaises(Empty, q.get, block=False) else: q.put('var%d' % random.getrandbits(16)) n += 1 def test_multi_threaded_parallel(self): """Create consumer and producer threads, check parallelism""" # self.skipTest("Not supported multi-thread.") m_queue = SQLiteQueue(path=self.path, multithreading=True, auto_commit=self.auto_commit) def producer(): for i in range(1000): m_queue.put('var%d' % i) def consumer(): for i in range(1000): x = m_queue.get(block=True) self.assertEqual('var%d' % i, x) c = Thread(target=consumer) c.start() p = Thread(target=producer) p.start() p.join() c.join() self.assertEqual(0, m_queue.size) self.assertEqual(0, len(m_queue)) self.assertRaises(Empty, m_queue.get, block=False) def test_multi_threaded_multi_producer(self): """Test sqlqueue can be used by multiple producers.""" queue = SQLiteQueue(path=self.path, multithreading=True, auto_commit=self.auto_commit) def producer(seq): for i in range(10): queue.put('var%d' % (i + (seq * 10))) def consumer(): for _ in range(100): data = queue.get(block=True) self.assertTrue('var' in data) c = Thread(target=consumer) c.start() producers = [] for seq in range(10): t = Thread(target=producer, args=(seq,)) t.start() producers.append(t) for t in producers: t.join() c.join() def test_multiple_consumers(self): """Test sqlqueue can be used by multiple consumers.""" queue = SQLiteQueue(path=self.path, multithreading=True, auto_commit=self.auto_commit) def producer(): for x in range(1000): queue.put('var%d' % x) counter = [] # Set all to 0 for _ in range(1000): counter.append(0) def consumer(index): for i in range(200): data = queue.get(block=True) self.assertTrue('var' in data) counter[index * 200 + i] = data p = Thread(target=producer) p.start() consumers = [] for index in range(5): t = Thread(target=consumer, args=(index,)) t.start() consumers.append(t) p.join() for t in consumers: t.join() self.assertEqual(0, queue.qsize()) for x in range(1000): self.assertNotEqual(0, counter[x], "not 0 for counter's index %s" % x) self.assertEqual(len(set(counter)), len(counter)) def test_task_done_with_restart(self): """Test that items are not deleted before task_done.""" q = SQLiteQueue(path=self.path, auto_commit=False) for i in range(1, 11): q.put(i) self.assertEqual(1, q.get()) self.assertEqual(2, q.get()) # size is correct before task_done self.assertEqual(8, q.qsize()) q.task_done() # make sure the size still correct self.assertEqual(8, q.qsize()) self.assertEqual(3, q.get()) # without task done del q q = SQLiteQueue(path=self.path, auto_commit=False) # After restart, the qsize and head item are the same self.assertEqual(8, q.qsize()) # After restart, the queue still works self.assertEqual(3, q.get()) self.assertEqual(7, q.qsize()) def test_protocol_1(self): shutil.rmtree(self.path, ignore_errors=True) q = SQLiteQueue(path=self.path) self.assertEqual(q._serializer.protocol, 2 if sys.version_info[0] == 2 else 4) def test_protocol_2(self): q = SQLiteQueue(path=self.path) self.assertEqual(q._serializer.protocol, 2 if sys.version_info[0] == 2 else 4) def test_json_serializer(self): q = SQLiteQueue( path=self.path, serializer=serializers_json) x = dict( a=1, b=2, c=dict( d=list(range(5)), e=[1] )) q.put(x) self.assertEquals(q.get(), x) def test_put_0(self): q = SQLiteQueue(path=self.path) q.put(0) d = q.get(block=False) self.assertIsNotNone(d) class SQLite3QueueNoAutoCommitTest(SQLite3QueueTest): def setUp(self): self.path = tempfile.mkdtemp(suffix='sqlqueue_auto_commit') self.auto_commit = False def test_multiple_consumers(self): """ FAIL: test_multiple_consumers ( -tests.test_sqlqueue.SQLite3QueueNoAutoCommitTest) Test sqlqueue can be used by multiple consumers. ---------------------------------------------------------------------- Traceback (most recent call last): File "persist-queue\tests\test_sqlqueue.py", line 183, -in test_multiple_consumers self.assertEqual(0, queue.qsize()) AssertionError: 0 != 72 :return: """ self.skipTest('Skipped due to a known bug above.') class SQLite3QueueInMemory(SQLite3QueueTest): def setUp(self): self.path = ":memory:" self.auto_commit = True def test_open_close_1000(self): self.skipTest('Memory based sqlite is not persistent.') def test_open_close_single(self): self.skipTest('Memory based sqlite is not persistent.') def test_multiple_consumers(self): self.skipTest('Skipped due to occasional crash during ' 'multithreading mode.') def test_multi_threaded_multi_producer(self): self.skipTest('Skipped due to occasional crash during ' 'multithreading mode.') def test_multi_threaded_parallel(self): self.skipTest('Skipped due to occasional crash during ' 'multithreading mode.') def test_task_done_with_restart(self): self.skipTest('Skipped due to not persistent.') def test_protocol_2(self): self.skipTest('In memory queue is always new.') class FILOSQLite3QueueTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='filo_sqlqueue') self.auto_commit = True def tearDown(self): shutil.rmtree(self.path, ignore_errors=True) def test_open_close_1000(self): """Write 1000 items, close, reopen checking if all items are there""" q = FILOSQLiteQueue(self.path, auto_commit=self.auto_commit) for i in range(1000): q.put('var%d' % i) self.assertEqual(1000, q.qsize()) del q q = FILOSQLiteQueue(self.path) self.assertEqual(1000, q.qsize()) for i in range(1000): data = q.get() self.assertEqual('var%d' % (999 - i), data) # assert adding another one still works q.put('foobar') data = q.get() self.assertEqual('foobar', data) class FILOSQLite3QueueNoAutoCommitTest(FILOSQLite3QueueTest): def setUp(self): self.path = tempfile.mkdtemp(suffix='filo_sqlqueue_auto_commit') self.auto_commit = False class SQLite3UniqueQueueTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='sqlqueue') self.auto_commit = True def test_add_duplicate_item(self): q = UniqueQ(self.path) q.put(1111) self.assertEqual(1, q.size) # put duplicate item q.put(1111) self.assertEqual(1, q.size) q.put(2222) self.assertEqual(2, q.size) del q q = UniqueQ(self.path) self.assertEqual(2, q.size) def test_multiple_consumers(self): """Test UniqueQ can be used by multiple consumers.""" queue = UniqueQ(path=self.path, multithreading=True, auto_commit=self.auto_commit) def producer(): for x in range(1000): queue.put('var%d' % x) counter = [] # Set all to 0 for _ in range(1000): counter.append(0) def consumer(index): for i in range(200): data = queue.get(block=True) self.assertTrue('var' in data) counter[index * 200 + i] = data p = Thread(target=producer) p.start() consumers = [] for index in range(5): t = Thread(target=consumer, args=(index,)) t.start() consumers.append(t) p.join() for t in consumers: t.join() self.assertEqual(0, queue.qsize()) for x in range(1000): self.assertNotEqual(0, counter[x], "not 0 for counter's index %s" % x) self.assertEqual(len(set(counter)), len(counter)) def test_unique_dictionary_serialization_pickle(self): queue = UniqueQ( path=self.path, multithreading=True, auto_commit=self.auto_commit, serializer=serializers_pickle, ) queue.put({"foo": 1, "bar": 2}) self.assertEqual(queue.total, 1) queue.put({"bar": 2, "foo": 1}) self.assertEqual(queue.total, 1) def test_unique_dictionary_serialization_msgpack(self): queue = UniqueQ( path=self.path, multithreading=True, auto_commit=self.auto_commit, serializer=serializers_msgpack ) queue.put({"foo": 1, "bar": 2}) self.assertEqual(queue.total, 1) queue.put({"bar": 2, "foo": 1}) self.assertEqual(queue.total, 1) def test_unique_dictionary_serialization_json(self): queue = UniqueQ( path=self.path, multithreading=True, auto_commit=self.auto_commit, serializer=serializers_json ) queue.put({"foo": 1, "bar": 2}) self.assertEqual(queue.total, 1) queue.put({"bar": 2, "foo": 1}) self.assertEqual(queue.total, 1) persist-queue-0.5.1/requirements.txt000066400000000000000000000000001375215242100176040ustar00rootroot00000000000000persist-queue-0.5.1/scripts/000077500000000000000000000000001375215242100160215ustar00rootroot00000000000000persist-queue-0.5.1/scripts/publish.sh000066400000000000000000000005141375215242100200230ustar00rootroot00000000000000#!/usr/bin/env bash set -e BASE_DIR=`pwd` NAME=$(basename $BASE_DIR) if [[ "$NAME" != "persist-queue" ]];then echo "must run this in project root" exit 1 fi python setup.py build sdist python setup.py build bdist_wheel twine check ${BASE_DIR}/dist/*.tar.gz twine check ${BASE_DIR}/dist/*.whl twine upload ${BASE_DIR}/dist/* persist-queue-0.5.1/setup.cfg000066400000000000000000000000341375215242100161500ustar00rootroot00000000000000[bdist_wheel] universal = 1 persist-queue-0.5.1/setup.py000066400000000000000000000027361375215242100160540ustar00rootroot00000000000000#!/usr/bin/env python # coding=utf-8 from setuptools import setup, find_packages def get_extras(): return { "extra": open("extra-requirements.txt").read().splitlines() } setup( name='persist-queue', version=__import__('persistqueue').__version__, description=( 'A thread-safe disk based persistent queue in Python.' ), long_description=open('README.rst').read(), author=__import__('persistqueue').__author__, author_email='wangxu198709@gmail.com', maintainer=__import__('persistqueue').__author__, maintainer_email='wangxu198709@gmail.com', license=__import__('persistqueue').__license__, packages=find_packages(), extras_require=get_extras(), platforms=["all"], url='http://github.com/peter-wangxu/persist-queue', classifiers=[ 'Development Status :: 4 - Beta', 'Operating System :: OS Independent', 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Programming Language :: Python', 'Programming Language :: Python :: Implementation', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Topic :: Software Development :: Libraries' ], ) persist-queue-0.5.1/test-requirements.txt000066400000000000000000000001701375215242100205710ustar00rootroot00000000000000mock>=2.0.0 flake8>=3.2.1 eventlet>=0.19.0 msgpack>=0.5.6 nose2>=0.6.5 coverage!=4.5 cov_core>=1.15.0 virtualenv>=15.1.0persist-queue-0.5.1/tox.ini000066400000000000000000000007721375215242100156530ustar00rootroot00000000000000[tox] minversion = 2.0 skipsdist = True envlist = py27, py34, py35, py36, py37, pep8, cover deps = -r{toxinidir}/test-requirements.txt [testenv] setenv = VIRTUAL_ENV={envdir} usedevelop = True deps = -r{toxinidir}/test-requirements.txt whitelist_externals = bash find commands = nose2 {posargs} [testenv:pep8] commands = flake8 ./persistqueue ./tests {posargs} [testenv:cover] commands = nose2 --with-coverage --coverage-report xml --coverage-report html --coverage-report term {posargs}