pax_global_header00006660000000000000000000000064146407732510014523gustar00rootroot0000000000000052 comment=b194f07553379a8319ce1bd0adbb0ad4c610eba9 persist-queue-1.0.0/000077500000000000000000000000001464077325100143345ustar00rootroot00000000000000persist-queue-1.0.0/.circleci/000077500000000000000000000000001464077325100161675ustar00rootroot00000000000000persist-queue-1.0.0/.circleci/config.yml000066400000000000000000000106101464077325100201550ustar00rootroot00000000000000version: 2.1 jobs: py27: docker: # Primary container image where all steps run. - image: circleci/python:2.7.17 environment: TOXENV: py27 # MySQL env for mysql queue tests - image: circleci/mysql:8.0 environment: MYSQL_ROOT_PASSWORD: rootpw MYSQL_DATABASE: testqueue MYSQL_USER: user MYSQL_PASSWORD: passw0rd MYSQL_HOST: '%' steps: &common_steps - checkout - run: # Our primary container isn't MYSQL so run a sleep command until it's ready. name: Waiting for MySQL to be ready command: | for i in `seq 1 10`; do nc -z 127.0.0.1 3306 && echo Success && exit 0 echo -n . sleep 5 done echo Failed waiting for MySQL && exit 1 - run: command: | pip install tox - run: command: | # tell the operating system to remove the file size limit on core dump files tox - run: bash <(curl -s https://codecov.io/bash) -cF python - run: command: | mkdir -p /tmp/core_dumps ls core.* && cp core.* /tmp/core_dumps when: on_fail - store_artifacts: # collect core dumps path: /tmp/core_dumps - store_artifacts: path: .coverage - store_artifacts: path: coverage.xml - store_artifacts: path: htmlcov py38: docker: # Primary container image where all steps run. - image: cimg/python:3.8 environment: TOXENV: py38 # MySQL env for mysql queue tests - image: circleci/mysql:8.0 environment: MYSQL_ROOT_PASSWORD: 123456 MYSQL_DATABASE: testqueue MYSQL_USER: user MYSQL_PASSWORD: passw0rd steps: *common_steps py39: docker: # Primary container image where all steps run. - image: cimg/python:3.9 environment: TOXENV: py39 # MySQL env for mysql queue tests - image: circleci/mysql:8.0 environment: MYSQL_ROOT_PASSWORD: 123456 MYSQL_DATABASE: testqueue MYSQL_USER: user MYSQL_PASSWORD: passw0rd steps: *common_steps py310: docker: # Primary container image where all steps run. - image: cimg/python:3.10 environment: TOXENV: py310 # MySQL env for mysql queue tests - image: circleci/mysql:8.0 environment: MYSQL_ROOT_PASSWORD: 123456 MYSQL_DATABASE: testqueue MYSQL_USER: user MYSQL_PASSWORD: passw0rd steps: *common_steps py311: docker: # Primary container image where all steps run. - image: cimg/python:3.11 environment: TOXENV: py311 # MySQL env for mysql queue tests - image: circleci/mysql:8.0 environment: MYSQL_ROOT_PASSWORD: 123456 MYSQL_DATABASE: testqueue MYSQL_USER: user MYSQL_PASSWORD: passw0rd steps: *common_steps py312: docker: # Primary container image where all steps run. - image: cimg/python:3.12 environment: TOXENV: py312 # MySQL env for mysql queue tests - image: circleci/mysql:8.0 environment: MYSQL_ROOT_PASSWORD: 123456 MYSQL_DATABASE: testqueue MYSQL_USER: user MYSQL_PASSWORD: passw0rd steps: *common_steps pep8: docker: # Primary container image where all steps run. - image: cimg/python:3.8 environment: TOXENV: pep8 # MySQL env for mysql queue tests - image: circleci/mysql:8.0 environment: MYSQL_ROOT_PASSWORD: rootpw MYSQL_DATABASE: testqueue MYSQL_USER: user MYSQL_PASSWORD: passw0rd steps: *common_steps cover: docker: # Primary container image where all steps run. - image: cimg/python:3.8 environment: TOXENV: cover # MySQL env for mysql queue tests - image: circleci/mysql:8.0 environment: MYSQL_ROOT_PASSWORD: 123456 MYSQL_DATABASE: testqueue MYSQL_USER: user MYSQL_PASSWORD: passw0rd steps: *common_steps workflows: version: 2 build: jobs: - pep8 - py38 - py39 - py310 - py311 - py312 - cover persist-queue-1.0.0/.coveragerc000066400000000000000000000001711464077325100164540ustar00rootroot00000000000000[run] branch = True source = persistqueue/* omit = ./tests/* ./.tox/* ./setup.py [xml] output = coverage.xml persist-queue-1.0.0/.gitignore000066400000000000000000000021411464077325100163220ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg .testrepository/ # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject # IDE specific folders .idea/ .vscode/ # MacOS .DS_Store persist-queue-1.0.0/LICENSE000066400000000000000000000030171464077325100153420ustar00rootroot00000000000000Copyright (c) G. B. Versiani. Copyright (c) Peter Wang. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of python-pqueue nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. persist-queue-1.0.0/MANIFEST.in000066400000000000000000000000611464077325100160670ustar00rootroot00000000000000include LICENSE include README.rst include *.txt persist-queue-1.0.0/README.rst000066400000000000000000000504371464077325100160340ustar00rootroot00000000000000persist-queue - A thread-safe, disk-based queue for Python ========================================================== .. image:: https://img.shields.io/circleci/project/github/peter-wangxu/persist-queue/master.svg?label=Linux%20%26%20Mac :target: https://circleci.com/gh/peter-wangxu/persist-queue .. image:: https://img.shields.io/appveyor/ci/peter-wangxu/persist-queue/master.svg?label=Windows :target: https://ci.appveyor.com/project/peter-wangxu/persist-queue .. image:: https://img.shields.io/codecov/c/github/peter-wangxu/persist-queue/master.svg :target: https://codecov.io/gh/peter-wangxu/persist-queue .. image:: https://img.shields.io/pypi/v/persist-queue.svg :target: https://pypi.python.org/pypi/persist-queue .. image:: https://img.shields.io/pypi/pyversions/persist-queue :alt: PyPI - Python Version ``persist-queue`` implements a file-based queue and a serial of sqlite3-based queues. The goals is to achieve following requirements: * Disk-based: each queued item should be stored in disk in case of any crash. * Thread-safe: can be used by multi-threaded producers and multi-threaded consumers. * Recoverable: Items can be read after process restart. * Green-compatible: can be used in ``greenlet`` or ``eventlet`` environment. While *queuelib* and *python-pqueue* cannot fulfil all of above. After some try, I found it's hard to achieve based on their current implementation without huge code change. this is the motivation to start this project. By default, *persist-queue* use *pickle* object serialization module to support object instances. Most built-in type, like `int`, `dict`, `list` are able to be persisted by `persist-queue` directly, to support customized objects, please refer to `Pickling and unpickling extension types(Python2) `_ and `Pickling Class Instances(Python3) `_ This project is based on the achievements of `python-pqueue `_ and `queuelib `_ Slack channels ^^^^^^^^^^^^^^ Join `persist-queue `_ channel Requirements ------------ * Python 3.5 or newer versions (refer to `Deprecation`_ for older Python versions) * Full support for Linux and MacOS. * Windows support (with `Caution`_ if ``persistqueue.Queue`` is used). Features -------- - Multiple platforms support: Linux, macOS, Windows - Pure python - Both filed based queues and sqlite3 based queues are supported - Filed based queue: multiple serialization protocol support: pickle(default), msgpack, cbor, json Deprecation ----------- - `persist-queue` drops Python 2 support since version `1.0.0`, no new feature will be developed under Python 2 as `Python 2 was sunset on January 1, 2020 `_. - `Python 3.4 release has reached end of life `_ and `DBUtils `_ ceased support for `Python 3.4`, `persist queue` drops MySQL based queue for python 3.4 since version 0.8.0. other queue implementations such as file based queue and sqlite3 based queue are still workable. Installation ------------ from pypi ^^^^^^^^^ .. code-block:: console pip install persist-queue # for msgpack, cbor and mysql support, use following command pip install "persist-queue[extra]" from source code ^^^^^^^^^^^^^^^^ .. code-block:: console git clone https://github.com/peter-wangxu/persist-queue cd persist-queue # for msgpack and cbor support, run 'pip install -r extra-requirements.txt' first python setup.py install Benchmark --------- Here are the time spent(in seconds) for writing/reading **1000** items to the disk comparing the sqlite3 and file queue. - Windows - OS: Windows 10 - Disk: SATA3 SSD - RAM: 16 GiB +---------------+---------+-------------------------+----------------------------+ | | Write | Write/Read(1 task_done) | Write/Read(many task_done) | +---------------+---------+-------------------------+----------------------------+ | SQLite3 Queue | 1.8880 | 2.0290 | 3.5940 | +---------------+---------+-------------------------+----------------------------+ | File Queue | 4.9520 | 5.0560 | 8.4900 | +---------------+---------+-------------------------+----------------------------+ **windows note** Performance of Windows File Queue has dramatic improvement since `v0.4.1` due to the atomic renaming support(3-4X faster) - Linux - OS: Ubuntu 16.04 (VM) - Disk: SATA3 SSD - RAM: 4 GiB +---------------+--------+-------------------------+----------------------------+ | | Write | Write/Read(1 task_done) | Write/Read(many task_done) | +---------------+--------+-------------------------+----------------------------+ | SQLite3 Queue | 1.8282 | 1.8075 | 2.8639 | +---------------+--------+-------------------------+----------------------------+ | File Queue | 0.9123 | 1.0411 | 2.5104 | +---------------+--------+-------------------------+----------------------------+ - Mac OS - OS: 10.14 (macOS Mojave) - Disk: PCIe SSD - RAM: 16 GiB +---------------+--------+-------------------------+----------------------------+ | | Write | Write/Read(1 task_done) | Write/Read(many task_done) | +---------------+--------+-------------------------+----------------------------+ | SQLite3 Queue | 0.1879 | 0.2115 | 0.3147 | +---------------+--------+-------------------------+----------------------------+ | File Queue | 0.5158 | 0.5357 | 1.0446 | +---------------+--------+-------------------------+----------------------------+ **note** - The value above is in seconds for reading/writing *1000* items, the less the better - Above result was got from: .. code-block:: console python benchmark/run_benchmark.py 1000 To see the real performance on your host, run the script under ``benchmark/run_benchmark.py``: .. code-block:: console python benchmark/run_benchmark.py Examples -------- Example usage with a SQLite3 based queue ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: python >>> import persistqueue >>> q = persistqueue.SQLiteQueue('mypath', auto_commit=True) >>> q.put('str1') >>> q.put('str2') >>> q.put('str3') >>> q.get() 'str1' >>> del q Close the console, and then recreate the queue: .. code-block:: python >>> import persistqueue >>> q = persistqueue.SQLiteQueue('mypath', auto_commit=True) >>> q.get() 'str2' >>> New functions: *Available since v0.8.0* - ``shrink_disk_usage`` perform a ``VACUUM`` against the sqlite, and rebuild the database file, this usually takes long time and frees a lot of disk space after ``get()`` Example usage of SQLite3 based ``UniqueQ`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This queue does not allow duplicate items. .. code-block:: python >>> import persistqueue >>> q = persistqueue.UniqueQ('mypath') >>> q.put('str1') >>> q.put('str1') >>> q.size 1 >>> q.put('str2') >>> q.size 2 >>> Example usage of SQLite3 based ``SQLiteAckQueue``/``UniqueAckQ`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The core functions: - ``put``: add item to the queue. Returns ``id`` - ``get``: get item from queue and mark as unack. Returns ``item``, Optional paramaters (``block``, ``timeout``, ``id``, ``next_in_order``, ``raw``) - ``update``: update an item. Returns ``id``, Paramaters (``item``), Optional parameter if item not in raw format (``id``) - ``ack``: mark item as acked. Returns ``id``, Parameters (``item`` or ``id``) - ``nack``: there might be something wrong with current consumer, so mark item as ready and new consumer will get it. Returns ``id``, Parameters (``item`` or ``id``) - ``ack_failed``: there might be something wrong during process, so just mark item as failed. Returns ``id``, Parameters (``item`` or ``id``) - ``clear_acked_data``: perform a sql delete agaist sqlite. It removes 1000 items, while keeping 1000 of the most recent, whose status is ``AckStatus.acked`` (note: this does not shrink the file size on disk) Optional paramters (``max_delete``, ``keep_latest``, ``clear_ack_failed``) - ``shrink_disk_usage`` perform a ``VACUUM`` against the sqlite, and rebuild the database file, this usually takes long time and frees a lot of disk space after ``clear_acked_data`` - ``queue``: returns the database contents as a Python List[Dict] - ``active_size``: The active size changes when an item is added (put) and completed (ack/ack_failed) unlike ``qsize`` which changes when an item is pulled (get) or returned (nack). .. code-block:: python >>> import persistqueue >>> ackq = persistqueue.SQLiteAckQueue('path') >>> ackq.put('str1') >>> item = ackq.get() >>> # Do something with the item >>> ackq.ack(item) # If done with the item >>> ackq.nack(item) # Else mark item as `nack` so that it can be proceeded again by any worker >>> ackq.ack_failed(item) # Or else mark item as `ack_failed` to discard this item Parameters: - ``clear_acked_data`` - ``max_delete`` (defaults to 1000): This is the LIMIT. How many items to delete. - ``keep_latest`` (defaults to 1000): This is the OFFSET. How many recent items to keep. - ``clear_ack_failed`` (defaults to False): Clears the ``AckStatus.ack_failed`` in addition to the ``AckStatus.ack``. - ``get`` - ``raw`` (defaults to False): Returns the metadata along with the record, which includes the id (``pqid``) and timestamp. On the SQLiteAckQueue, the raw results can be ack, nack, ack_failed similar to the normal return. - ``id`` (defaults to None): Accepts an `id` or a raw item containing ``pqid``. Will select the item based on the row id. - ``next_in_order`` (defaults to False): Requires the ``id`` attribute. This option tells the SQLiteAckQueue/UniqueAckQ to get the next item based on ``id``, not the first available. This allows the user to get, nack, get, nack and progress down the queue, instead of continuing to get the same nack'd item over again. ``raw`` example: .. code-block:: python >>> q.put('val1') >>> d = q.get(raw=True) >>> print(d) >>> {'pqid': 1, 'data': 'val1', 'timestamp': 1616719225.012912} >>> q.ack(d) ``next_in_order`` example: .. code-block:: python >>> q.put("val1") >>> q.put("val2") >>> q.put("val3") >>> item = q.get() >>> id = q.nack(item) >>> item = q.get(id=id, next_in_order=True) >>> print(item) >>> val2 Note: 1. The SQLiteAckQueue always uses "auto_commit=True". 2. The Queue could be set in non-block style, e.g. "SQLiteAckQueue.get(block=False, timeout=5)". 3. ``UniqueAckQ`` only allows for unique items Example usage with a file based queue ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Parameters: - ``path``: specifies the directory wher enqueued data persisted. - ``maxsize``: indicates the maximum size stored in the queue, if maxsize<=0 the queue is unlimited. - ``chunksize``: indicates how many entries should exist in each chunk file on disk. When a all entries in a chunk file was dequeued by get(), the file would be removed from filesystem. - ``tempdir``: indicates where temporary files should be stored. The tempdir has to be located on the same disk as the enqueued data in order to obtain atomic operations. - ``serializer``: controls how enqueued data is serialized. - ``auto_save``: `True` or `False`. By default, the change is only persisted when task_done() is called. If autosave is enabled, info data is persisted immediately when get() is called. Adding data to the queue with put() will always persist immediately regardless of this setting. .. code-block:: python >>> from persistqueue import Queue >>> q = Queue("mypath") >>> q.put('a') >>> q.put('b') >>> q.put('c') >>> q.get() 'a' >>> q.task_done() Close the python console, and then we restart the queue from the same path, .. code-block:: python >>> from persistqueue import Queue >>> q = Queue('mypath') >>> q.get() 'b' >>> q.task_done() Example usage with an auto-saving file based queue ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ *Available since: v0.5.0* By default, items added to the queue are persisted during the ``put()`` call, and items removed from a queue are only persisted when ``task_done()`` is called. .. code-block:: python >>> from persistqueue import Queue >>> q = Queue("mypath") >>> q.put('a') >>> q.put('b') >>> q.get() 'a' >>> q.get() 'b' After exiting and restarting the queue from the same path, we see the items remain in the queue, because ``task_done()`` wasn't called before. .. code-block:: python >>> from persistqueue import Queue >>> q = Queue('mypath') >>> q.get() 'a' >>> q.get() 'b' This can be advantageous. For example, if your program crashes before finishing processing an item, it will remain in the queue after restarting. You can also spread out the ``task_done()`` calls for performance reasons to avoid lots of individual writes. Using ``autosave=True`` on a file based queue will automatically save on every call to ``get()``. Calling ``task_done()`` is not necessary, but may still be used to ``join()`` against the queue. .. code-block:: python >>> from persistqueue import Queue >>> q = Queue("mypath", autosave=True) >>> q.put('a') >>> q.put('b') >>> q.get() 'a' After exiting and restarting the queue from the same path, only the second item remains: .. code-block:: python >>> from persistqueue import Queue >>> q = Queue('mypath', autosave=True) >>> q.get() 'b' Example usage with a SQLite3 based dict ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: python >>> from persisitqueue import PDict >>> q = PDict("testpath", "testname") >>> q['key1'] = 123 >>> q['key2'] = 321 >>> q['key1'] 123 >>> len(q) 2 >>> del q['key1'] >>> q['key1'] Traceback (most recent call last): File "", line 1, in File "persistqueue\pdict.py", line 58, in __getitem__ raise KeyError('Key: {} not exists.'.format(item)) KeyError: 'Key: key1 not exists.' Close the console and restart the PDict .. code-block:: python >>> from persisitqueue import PDict >>> q = PDict("testpath", "testname") >>> q['key2'] 321 Multi-thread usage for **SQLite3** based queue ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: python from persistqueue import FIFOSQLiteQueue q = FIFOSQLiteQueue(path="./test", multithreading=True) def worker(): while True: item = q.get() do_work(item) for i in range(num_worker_threads): t = Thread(target=worker) t.daemon = True t.start() for item in source(): q.put(item) multi-thread usage for **Queue** ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: python from persistqueue import Queue q = Queue() def worker(): while True: item = q.get() do_work(item) q.task_done() for i in range(num_worker_threads): t = Thread(target=worker) t.daemon = True t.start() for item in source(): q.put(item) q.join() # block until all tasks are done Example usage with a MySQL based queue ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ *Available since: v0.8.0* .. code-block:: python >>> import persistqueue >>> db_conf = { >>> "host": "127.0.0.1", >>> "user": "user", >>> "passwd": "passw0rd", >>> "db_name": "testqueue", >>> # "name": "", >>> "port": 3306 >>> } >>> q = persistqueue.MySQLQueue(name="testtable", **db_conf) >>> q.put('str1') >>> q.put('str2') >>> q.put('str3') >>> q.get() 'str1' >>> del q Close the console, and then recreate the queue: .. code-block:: python >>> import persistqueue >>> q = persistqueue.MySQLQueue(name="testtable", **db_conf) >>> q.get() 'str2' >>> **note** Due to the limitation of file queue described in issue `#89 `_, `task_done` in one thread may acknowledge items in other threads which should not be. Considering the `SQLiteAckQueue` if you have such requirement. Serialization via msgpack/cbor/json ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - v0.4.1: Currently only available for file based Queue - v0.4.2: Also available for SQLite3 based Queues .. code-block:: python >>> from persistqueue >>> q = persistqueue.Queue('mypath', serializer=persistqueue.serializers.msgpack) >>> # via cbor2 >>> # q = persistqueue.Queue('mypath', serializer=persistqueue.serializers.cbor2) >>> # via json >>> # q = Queue('mypath', serializer=persistqueue.serializers.json) >>> q.get() 'b' >>> q.task_done() Explicit resource reclaim ^^^^^^^^^^^^^^^^^^^^^^^^^ For some reasons, an application may require explicit reclamation for file handles or sql connections before end of execution. In these cases, user can simply call: .. code-block:: python q = Queue() # or q = persistqueue.SQLiteQueue('mypath', auto_commit=True) del q to reclaim related file handles or sql connections. Tips ---- ``task_done`` is required both for file based queue and SQLite3 based queue (when ``auto_commit=False``) to persist the cursor of next ``get`` to the disk. Performance impact ------------------ - **WAL** Starting on v0.3.2, the ``persistqueue`` is leveraging the sqlite3 builtin feature `WAL `_ which can improve the performance significantly, a general testing indicates that ``persistqueue`` is 2-4 times faster than previous version. - **auto_commit=False** Since persistqueue v0.3.0, a new parameter ``auto_commit`` is introduced to tweak the performance for sqlite3 based queues as needed. When specify ``auto_commit=False``, user needs to perform ``queue.task_done()`` to persist the changes made to the disk since last ``task_done`` invocation. - **pickle protocol selection** From v0.3.6, the ``persistqueue`` will select ``Protocol version 2`` for python2 and ``Protocol version 4`` for python3 respectively. This selection only happens when the directory is not present when initializing the queue. Tests ----- *persist-queue* use ``tox`` to trigger tests. - Unit test .. code-block:: console tox -e Available ````: ``py27``, ``py34``, ``py35``, ``py36``, ``py37`` - PEP8 check .. code-block:: console tox -e pep8 `pyenv `_ is usually a helpful tool to manage multiple versions of Python. Caution ------- Currently, the atomic operation is supported on Windows while still in experimental, That's saying, the data in ``persistqueue.Queue`` could be in unreadable state when an incidental failure occurs during ``Queue.task_done``. **DO NOT put any critical data on persistqueue.queue on Windows**. Contribution ------------ Simply fork this repo and send PR for your code change(also tests to cover your change), remember to give a title and description of your PR. I am willing to enhance this project with you :). License ------- `BSD `_ Contributors ------------ `Contributors `_ FAQ --- * ``sqlite3.OperationalError: database is locked`` is raised. persistqueue open 2 connections for the db if ``multithreading=True``, the SQLite database is locked until that transaction is committed. The ``timeout`` parameter specifies how long the connection should wait for the lock to go away until raising an exception. Default time is **10**, increase ``timeout`` when creating the queue if above error occurs. * sqlite3 based queues are not thread-safe. The sqlite3 queues are heavily tested under multi-threading environment, if you find it's not thread-safe, please make sure you set the ``multithreading=True`` when initializing the queue before submitting new issue:). persist-queue-1.0.0/RELEASENOTE.md000066400000000000000000000005421464077325100164250ustar00rootroot00000000000000# 1.0.0-alpha 1. Only Python3.x series are offically supported by persistqueue, since Python 2 was no longer under maintenance since 2020 2. `persistqueue.Queue` using `serializer=persistqueue.serializers.pickle` created under python2 was no longer able to be read by persist queue after 1.0.0 as the default pickle version changed from `2` to `4` persist-queue-1.0.0/appveyor.yml000066400000000000000000000065231464077325100167320ustar00rootroot00000000000000# image: # - Visual Studio 2017 # contains python2.7 ~ python3.8 and mysql 5.7 # - Visual Studio 2022 # contains python3.7 ~ python3.12 and mysql 8.0 # services: # mysql is now MySQL5.7 now for MySQL8.0 # - mysql init: - ps: | if ($env:APPVEYOR_BUILD_WORKER_IMAGE -eq "Visual Studio 2022") { Start-Service MySQL80 } else { Start-Service MySQL57 } environment: matrix: # For Python versions available on Appveyor, see # http://www.appveyor.com/docs/installed-software#python # The list here is complete (excluding Python 2.6, which # isn't covered by this document) at the time of writing. - TOXENV: "pep8" PYTHON: "C:\\Python38-x64" DISTUTILS_USE_SDK: "1" APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 # - TOXENV: "py27" # PYTHON: "C:\\Python27-x64" # DISTUTILS_USE_SDK: "1" # - TOXENV: "py35" # PYTHON: "C:\\Python35-x64" # DISTUTILS_USE_SDK: "1" # APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 # - TOXENV: "py36" # PYTHON: "C:\\Python36-x64" # DISTUTILS_USE_SDK: "1" # APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 # - TOXENV: "py37" # PYTHON: "C:\\Python37-x64" # DISTUTILS_USE_SDK: "1" # APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 - TOXENV: "py38" PYTHON: "C:\\Python38-x64" DISTUTILS_USE_SDK: "1" APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 - TOXENV: "py39" PYTHON: "C:\\Python39-x64" DISTUTILS_USE_SDK: "1" APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2022 - TOXENV: "py310" PYTHON: "C:\\Python310-x64" DISTUTILS_USE_SDK: "1" APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2022 - TOXENV: "py311" PYTHON: "C:\\Python311-x64" DISTUTILS_USE_SDK: "1" APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2022 - TOXENV: "py312" PYTHON: "C:\\Python312-x64" APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2022 DISTUTILS_USE_SDK: "1" - TOXENV: "cover" PYTHON: "C:\\Python38-x64" DISTUTILS_USE_SDK: "1" APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 install: # We need wheel installed to build wheels - "%PYTHON%\\python.exe -m pip install tox" build: false test_script: # Put your test command here. # If you don't need to build C extensions on 64-bit Python 3.3 or 3.4, # you can remove "build.cmd" from the front of the command, as it's # only needed to support those cases. # Note that you must use the environment variable %PYTHON% to refer to # the interpreter you're using - Appveyor does not do anything special # to put the Python evrsion you want to use on PATH. - ps: | if ($env:APPVEYOR_BUILD_WORKER_IMAGE -eq "Visual Studio 2017") { $env:MYSQL_PWD="Password12!" $cmd = '"C:\Program Files\MySQL\MySQL Server 5.7\bin\mysql" -e "create database testqueue;" --user=root' iex "& $cmd" } else { $env:MYSQL_PWD="Password12!" $cmd = '"C:\Program Files\MySQL\MySQL Server 8.0\bin\mysql" -e "create database testqueue;" --user=root' iex "& $cmd" } - | echo image: %APPVEYOR_BUILD_WORKER_IMAGE%, tox:%TOXENV% "%PYTHON%\\Scripts\\tox.exe" #on_success: # You can use this step to upload your artifacts to a public website. # See Appveyor's documentation for more details. Or you can simply # access your wheels from the Appveyor "artifacts" tab for your build. persist-queue-1.0.0/benchmark/000077500000000000000000000000001464077325100162665ustar00rootroot00000000000000persist-queue-1.0.0/benchmark/run_benchmark.py000066400000000000000000000102711464077325100214570ustar00rootroot00000000000000"""This file provides tests to benchmark performance sqlite/file queue on specific hardware. User can easily evaluate the performance by running this file directly via `python run_benchmark.py` """ from persistqueue import SQLiteQueue from persistqueue import Queue import tempfile import time BENCHMARK_COUNT = 100 def time_it(func): def _exec(*args, **kwargs): start = time.time() func(*args, **kwargs) end = time.time() print( "\t{} => time used: {:.4f} seconds.".format( func.__doc__, (end - start))) return _exec class FileQueueBench(object): """Benchmark File queue performance.""" def __init__(self, prefix=None): self.path = prefix @time_it def benchmark_file_write(self): """Writing items.""" self.path = tempfile.mkdtemp('b_file_10000') q = Queue(self.path) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) assert q.qsize() == BENCHMARK_COUNT @time_it def benchmark_file_read_write_false(self): """Writing and reading items(1 task_done).""" self.path = tempfile.mkdtemp('b_file_10000') q = Queue(self.path) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) for i in range(BENCHMARK_COUNT): q.get() q.task_done() assert q.qsize() == 0 @time_it def benchmark_file_read_write_autosave(self): """Writing and reading items(autosave).""" self.path = tempfile.mkdtemp('b_file_10000') q = Queue(self.path, autosave=True) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) for i in range(BENCHMARK_COUNT): q.get() assert q.qsize() == 0 @time_it def benchmark_file_read_write_true(self): """Writing and reading items(many task_done).""" self.path = tempfile.mkdtemp('b_file_10000') q = Queue(self.path) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) for i in range(BENCHMARK_COUNT): q.get() q.task_done() assert q.qsize() == 0 @classmethod def run(cls): print(cls.__doc__) ins = cls() for name in sorted(cls.__dict__): if name.startswith('benchmark'): func = getattr(ins, name) func() class Sqlite3QueueBench(object): """Benchmark Sqlite3 queue performance.""" @time_it def benchmark_sqlite_write(self): """Writing items.""" self.path = tempfile.mkdtemp('b_sql_10000') q = SQLiteQueue(self.path, auto_commit=False) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) assert q.qsize() == BENCHMARK_COUNT @time_it def benchmark_sqlite_read_write_false(self): """Writing and reading items(1 task_done).""" self.path = tempfile.mkdtemp('b_sql_10000') q = SQLiteQueue(self.path, auto_commit=False) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) for i in range(BENCHMARK_COUNT): q.get() q.task_done() assert q.qsize() == 0 @time_it def benchmark_sqlite_read_write_true(self): """Writing and reading items(many task_done).""" self.path = tempfile.mkdtemp('b_sql_10000') q = SQLiteQueue(self.path, auto_commit=True) for i in range(BENCHMARK_COUNT): q.put('bench%d' % i) for i in range(BENCHMARK_COUNT): q.get() q.task_done() assert q.qsize() == 0 @classmethod def run(cls): print(cls.__doc__) ins = cls() for name in sorted(cls.__dict__): if name.startswith('benchmark'): func = getattr(ins, name) func() if __name__ == '__main__': import sys if len(sys.argv) > 1: BENCHMARK_COUNT = int(sys.argv[1]) print(" = {}".format(BENCHMARK_COUNT)) file_bench = FileQueueBench() file_bench.run() sql_bench = Sqlite3QueueBench() sql_bench.run() persist-queue-1.0.0/extra-requirements.txt000066400000000000000000000001351464077325100207400ustar00rootroot00000000000000msgpack>=0.5.6 cbor2>=5.2.0 PyMySQL DBUtils<3.0.0 # since 3.0.0 no longer supports Python2.x persist-queue-1.0.0/persistqueue/000077500000000000000000000000001464077325100170725ustar00rootroot00000000000000persist-queue-1.0.0/persistqueue/__init__.py000066400000000000000000000026721464077325100212120ustar00rootroot00000000000000__author__ = 'Peter Wang' __license__ = 'BSD' __version__ = '1.0.0' # Relative imports assuming the current package structure from .exceptions import Empty, Full # noqa: F401 from .queue import Queue # noqa: F401 import logging log = logging.getLogger(__name__) # Attempt to import optional components, logging if not found. try: from .pdict import PDict # noqa: F401 from .sqlqueue import ( # noqa: F401 SQLiteQueue, FIFOSQLiteQueue, FILOSQLiteQueue, UniqueQ ) from .sqlackqueue import ( # noqa: F401 SQLiteAckQueue, FIFOSQLiteAckQueue, FILOSQLiteAckQueue, UniqueAckQ, AckStatus ) except ImportError: # If sqlite3 is not available, log a message. log.info("No sqlite3 module found, sqlite3 based queues are not available") try: from .mysqlqueue import MySQLQueue # noqa: F401 except ImportError: # failed due to DBUtils not installed via extra-requirements.txt log.info("DBUtils may not be installed, install " "via 'pip install persist-queue[extra]'") # Define what symbols are exported by the module. __all__ = [ "Queue", "SQLiteQueue", "FIFOSQLiteQueue", "FILOSQLiteQueue", "UniqueQ", "PDict", "SQLiteAckQueue", "FIFOSQLiteAckQueue", "FILOSQLiteAckQueue", "UniqueAckQ", "AckStatus", "MySQLQueue", "Empty", "Full", "__author__", "__license__", "__version__" ] persist-queue-1.0.0/persistqueue/exceptions.py000066400000000000000000000003561464077325100216310ustar00rootroot00000000000000class Empty(Exception): """Exception raised when an operation is attempted on an empty queue.""" pass class Full(Exception): """Exception raised when an attempt is made to add an item to a full container.""" pass persist-queue-1.0.0/persistqueue/mysqlqueue.py000066400000000000000000000120621464077325100216570ustar00rootroot00000000000000from dbutils.pooled_db import PooledDB import threading import time as _time import persistqueue from .sqlbase import SQLBase from typing import Any, Optional class MySQLQueue(SQLBase): """Mysql(or future standard dbms) based FIFO queue.""" _TABLE_NAME = 'queue' _KEY_COLUMN = '_id' # the name of the key column, used in DB CRUD # SQL to create a table _SQL_CREATE = ( 'CREATE TABLE IF NOT EXISTS {table_name} (' '{key_column} INTEGER PRIMARY KEY AUTO_INCREMENT, ' 'data BLOB, timestamp FLOAT)') # SQL to insert a record _SQL_INSERT = 'INSERT INTO {table_name} (data, timestamp) VALUES (%s, %s)' # SQL to select a record _SQL_SELECT_ID = ( 'SELECT {key_column}, data, timestamp FROM {table_name} WHERE' ' {key_column} = {rowid}' ) _SQL_SELECT = ( 'SELECT {key_column}, data, timestamp FROM {table_name} ' 'ORDER BY {key_column} ASC LIMIT 1' ) _SQL_SELECT_WHERE = ( 'SELECT {key_column}, data, timestamp FROM {table_name} WHERE' ' {column} {op} %s ORDER BY {key_column} ASC LIMIT 1 ' ) _SQL_UPDATE = 'UPDATE {table_name} SET data = %s WHERE {key_column} = %s' _SQL_DELETE = 'DELETE FROM {table_name} WHERE {key_column} {op} %s' def __init__( self, host: str, user: str, passwd: str, db_name: str, name: Optional[str] = None, port: int = 3306, charset: str = 'utf8mb4', auto_commit: bool = True, serializer: Any = persistqueue.serializers.pickle, ) -> None: super(MySQLQueue, self).__init__() self.name = name if name else "sql" self.host = host self.user = user self.passwd = passwd self.db_name = db_name self.port = port self.charset = charset self._serializer = serializer self.auto_commit = auto_commit self.tran_lock = threading.Lock() self.put_event = threading.Event() self.action_lock = threading.Lock() self._connection_pool = None self._getter = None self._putter = None self._new_db_connection() self._init() def _new_db_connection(self) -> None: try: import pymysql except ImportError: print("Please install mysql library via 'pip install PyMySQL'") raise db_pool = PooledDB(pymysql, 2, 10, 5, 10, True, host=self.host, port=self.port, user=self.user, passwd=self.passwd, database=self.db_name, charset=self.charset) self._connection_pool = db_pool conn = db_pool.connection() cursor = conn.cursor() cursor.execute("SELECT VERSION()") _ = cursor.fetchone() cursor.execute(self._sql_create) conn.commit() cursor.execute("use %s" % self.db_name) self._putter = MySQLConn(queue=self) self._getter = self._putter def put(self, item: Any, block: bool = True) -> int: # block kwarg is noop and only here to align with python's queue obj = self._serializer.dumps(item) _id = self._insert_into(obj, _time.time()) self.total += 1 self.put_event.set() return _id def put_nowait(self, item: Any) -> int: return self.put(item, block=False) def _init(self) -> None: self.action_lock = threading.Lock() if not self.auto_commit: head = self._select() if head: self.cursor = head[0] - 1 else: self.cursor = 0 self.total = self._count() def get_pooled_conn(self) -> Any: return self._connection_pool.connection() class MySQLConn: """MySqlConn defines a common structure for both mysql and sqlite3 connections. used to mitigate the interface differences between drivers/db """ def __init__(self, queue: Optional[MySQLQueue] = None, conn: Optional[Any] = None) -> None: self._queue = queue if queue is not None: self._conn = queue.get_pooled_conn() else: self._conn = conn self._cursor = None self.closed = False def __enter__(self) -> Any: self._cursor = self._conn.cursor() return self._conn def __exit__(self, exc_type: Optional[type], exc_val: Optional[BaseException], exc_tb: Optional[Any]) -> None: # do not commit() but to close() , keep same behavior # with dbutils self._cursor.close() def execute(self, *args: Any, **kwargs: Any) -> Any: if self._queue is not None: conn = self._queue.get_pooled_conn() else: conn = self._conn cursor = conn.cursor() cursor.execute(*args, **kwargs) return cursor def close(self) -> None: if not self.closed: self._conn.close() self.closed = True def commit(self) -> None: if not self.closed: self._conn.commit() persist-queue-1.0.0/persistqueue/pdict.py000066400000000000000000000046351464077325100205570ustar00rootroot00000000000000import logging import sqlite3 from persistqueue import sqlbase from typing import Any, Iterator log = logging.getLogger(__name__) class PDict(sqlbase.SQLiteBase, dict): _TABLE_NAME = 'dict' _KEY_COLUMN = 'key' _SQL_CREATE = ('CREATE TABLE IF NOT EXISTS {table_name} (' '{key_column} TEXT PRIMARY KEY, data BLOB)') _SQL_INSERT = 'INSERT INTO {table_name} (key, data) VALUES (?, ?)' _SQL_SELECT = ('SELECT {key_column}, data FROM {table_name} ' 'WHERE {key_column} = ?') _SQL_UPDATE = 'UPDATE {table_name} SET data = ? WHERE {key_column} = ?' _SQL_DELETE = 'DELETE FROM {table_name} WHERE {key_column} {op} ?' def __init__(self, path: str, name: str, multithreading: bool = False) -> None: # PDict is always auto_commit=True super().__init__(path, name=name, multithreading=multithreading, auto_commit=True) def __iter__(self) -> Iterator: raise NotImplementedError('Not supported.') def keys(self) -> Iterator: raise NotImplementedError('Not supported.') def iterkeys(self) -> Iterator: raise NotImplementedError('Not supported.') def values(self) -> Iterator: raise NotImplementedError('Not supported.') def itervalues(self) -> Iterator: raise NotImplementedError('Not supported.') def iteritems(self) -> Iterator: raise NotImplementedError('Not supported.') def items(self) -> Iterator: raise NotImplementedError('Not supported.') def __contains__(self, item: Any) -> bool: row = self._select(item) return row is not None def __setitem__(self, key: Any, value: Any) -> None: obj = self._serializer.dumps(value) try: self._insert_into(key, obj) except sqlite3.IntegrityError: self._update(key, obj) def __getitem__(self, item: Any) -> Any: row = self._select(item) if row: return self._serializer.loads(row[1]) else: raise KeyError('Key: {} not exists.'.format(item)) def get(self, key: Any, default: Any = None) -> Any: try: return self[key] except KeyError: return default def __delitem__(self, key: Any) -> None: self._delete(key) def __len__(self) -> int: return self._count() persist-queue-1.0.0/persistqueue/py.typed000066400000000000000000000000011464077325100205600ustar00rootroot00000000000000 persist-queue-1.0.0/persistqueue/queue.py000066400000000000000000000241751464077325100206010ustar00rootroot00000000000000"""A thread-safe disk based persistent queue in Python.""" import logging import os import tempfile import threading from time import time as _time import persistqueue.serializers.pickle from persistqueue.exceptions import Empty, Full from typing import Any, Optional, Tuple, BinaryIO log = logging.getLogger(__name__) def _truncate(fn: str, length: int) -> None: """Truncate the file to a specified length.""" with open(fn, 'ab+') as f: f.truncate(length) def atomic_rename(src: str, dst: str) -> None: """Atomically rename a file from src to dst.""" os.replace(src, dst) class Queue: """Thread-safe, persistent queue.""" def __init__( self, path: str, maxsize: int = 0, chunksize: int = 100, tempdir: Optional[str] = None, serializer: Any = persistqueue.serializers.pickle, autosave: bool = False ) -> None: """Create a persistent queue object on a given path. The argument path indicates a directory where enqueued data should be persisted. If the directory doesn't exist, one will be created. If maxsize is <= 0, the queue size is infinite. The optional argument chunksize indicates how many entries should exist in each chunk file on disk. The tempdir parameter indicates where temporary files should be stored. The tempdir has to be located on the same disk as the enqueued data in order to obtain atomic operations. The serializer parameter controls how enqueued data is serialized. It must have methods dump(value, fp) and load(fp). The dump method must serialize value and write it to fp, and may be called for multiple values with the same fp. The load method must deserialize and return one value from fp, and may be called multiple times with the same fp to read multiple values. The autosave parameter controls when data removed from the queue is persisted. By default, (disabled), the change is only persisted when task_done() is called. If autosave is enabled, data is persisted immediately when get() is called. Adding data to the queue with put() will always persist immediately regardless of this setting. """ log.debug('Initializing File based Queue with path {}'.format(path)) self.path = path self.chunksize = chunksize self.tempdir = tempdir self.maxsize = maxsize self.serializer = serializer self.autosave = autosave self._init(maxsize) if self.tempdir: if os.stat(self.path).st_dev != os.stat(self.tempdir).st_dev: raise ValueError( "tempdir has to be located on same path filesystem") else: fd, tempdir = tempfile.mkstemp() if os.stat(self.path).st_dev != os.stat(tempdir).st_dev: self.tempdir = self.path log.warning("Default tempdir '%(dft_dir)s' is not on the " "same filesystem with queue path '%(queue_path)s'" ",defaulting to '%(new_path)s'." % { "dft_dir": tempdir, "queue_path": self.path, "new_path": self.tempdir}) os.close(fd) os.remove(tempdir) self.info = self._loadinfo() # truncate head in case it contains garbage hnum, hcnt, hoffset = self.info['head'] headfn = self._qfile(hnum) if os.path.exists(headfn): if hoffset < os.path.getsize(headfn): _truncate(headfn, hoffset) # let the head file open self.headf = self._openchunk(hnum, 'ab+') tnum, _, toffset = self.info['tail'] self.tailf = self._openchunk(tnum) self.tailf.seek(toffset) # update unfinished tasks with the current number of enqueued tasks self.unfinished_tasks = self.info['size'] self.update_info = True def _init(self, maxsize: int) -> None: self.mutex = threading.Lock() self.not_empty = threading.Condition(self.mutex) self.not_full = threading.Condition(self.mutex) self.all_tasks_done = threading.Condition( self.mutex) if not os.path.exists(self.path): os.makedirs(self.path) def join(self) -> None: with self.all_tasks_done: while self.unfinished_tasks: self.all_tasks_done.wait() def qsize(self) -> int: with self.mutex: return self._qsize() def _qsize(self) -> int: return self.info['size'] def empty(self) -> bool: return self.qsize() == 0 def full(self) -> bool: return self.qsize() == self.maxsize def put(self, item: Any, block: bool = True, timeout: Optional[float] = None) -> None: self.not_full.acquire() try: if self.maxsize > 0: if not block: if self._qsize() == self.maxsize: raise Full elif timeout is None: while self._qsize() == self.maxsize: self.not_full.wait() elif timeout < 0: raise ValueError("'timeout' must be a non-negative number") else: endtime = _time() + timeout while self._qsize() == self.maxsize: remaining = endtime - _time() if remaining <= 0.0: raise Full self.not_full.wait(remaining) self._put(item) self.unfinished_tasks += 1 self.not_empty.notify() finally: self.not_full.release() def _put(self, item: Any) -> None: self.serializer.dump(item, self.headf) self.headf.flush() hnum, hpos, _ = self.info['head'] hpos += 1 if hpos == self.info['chunksize']: hpos = 0 hnum += 1 os.fsync(self.headf.fileno()) self.headf.close() self.headf = self._openchunk(hnum, 'ab+') self.info['size'] += 1 self.info['head'] = [hnum, hpos, self.headf.tell()] self._saveinfo() def put_nowait(self, item: Any) -> None: self.put(item, False) def get(self, block: bool = True, timeout: Optional[float] = None) -> Any: self.not_empty.acquire() try: if not block: if not self._qsize(): raise Empty elif timeout is None: while not self._qsize(): self.not_empty.wait() elif timeout < 0: raise ValueError("'timeout' must be a non-negative number") else: endtime = _time() + timeout while not self._qsize(): remaining = endtime - _time() if remaining <= 0.0: raise Empty self.not_empty.wait(remaining) item = self._get() self.not_full.notify() return item finally: self.not_empty.release() def get_nowait(self) -> Any: return self.get(False) def _get(self) -> Any: tnum, tcnt, toffset = self.info['tail'] hnum, hcnt, _ = self.info['head'] if [tnum, tcnt] >= [hnum, hcnt]: return None data = self.serializer.load(self.tailf) toffset = self.tailf.tell() tcnt += 1 if tcnt == self.info['chunksize'] and tnum <= hnum: tcnt = toffset = 0 tnum += 1 self.tailf.close() self.tailf = self._openchunk(tnum) self.info['size'] -= 1 self.info['tail'] = [tnum, tcnt, toffset] if self.autosave: self._saveinfo() self.update_info = False else: self.update_info = True return data def task_done(self) -> None: with self.all_tasks_done: unfinished = self.unfinished_tasks - 1 if unfinished <= 0: if unfinished < 0: raise ValueError("task_done() called too many times.") self.all_tasks_done.notify_all() self.unfinished_tasks = unfinished self._task_done() def _task_done(self) -> None: if self.autosave: return if self.update_info: self._saveinfo() self.update_info = False def _openchunk(self, number: int, mode: str = 'rb') -> BinaryIO: return open(self._qfile(number), mode) def _loadinfo(self) -> dict: infopath = self._infopath() if os.path.exists(infopath): with open(infopath, 'rb') as f: info = self.serializer.load(f) else: info = { 'chunksize': self.chunksize, 'size': 0, 'tail': [0, 0, 0], 'head': [0, 0, 0], } return info def _gettempfile(self) -> Tuple[int, str]: if self.tempdir: return tempfile.mkstemp(dir=self.tempdir) else: return tempfile.mkstemp() def _saveinfo(self) -> None: tmpfd, tmpfn = self._gettempfile() with os.fdopen(tmpfd, "wb") as tmpfo: self.serializer.dump(self.info, tmpfo) atomic_rename(tmpfn, self._infopath()) self._clear_tail_file() def _clear_tail_file(self) -> None: """Remove the tail files whose items were already get.""" tnum, _, _ = self.info['tail'] while tnum >= 1: tnum -= 1 path = self._qfile(tnum) if os.path.exists(path): os.remove(path) else: break def _qfile(self, number: int) -> str: return os.path.join(self.path, 'q%05d' % number) def _infopath(self) -> str: return os.path.join(self.path, 'info') def __del__(self) -> None: """Handles the removal of queue.""" for to_close in self.headf, self.tailf: if to_close and not to_close.closed: to_close.close() persist-queue-1.0.0/persistqueue/serializers/000077500000000000000000000000001464077325100214265ustar00rootroot00000000000000persist-queue-1.0.0/persistqueue/serializers/__init__.py000066400000000000000000000000001464077325100235250ustar00rootroot00000000000000persist-queue-1.0.0/persistqueue/serializers/cbor2.py000066400000000000000000000046641464077325100230210ustar00rootroot00000000000000""" A serializer that extends cbor2 to specify recommended parameters and adds a 4 byte length prefix to store multiple objects per file. """ import cbor2 from struct import Struct from typing import Any, BinaryIO # Define the Struct for prefixing serialized objects with their byte length length_struct = Struct(" None: """ Serialize value as cbor2 to a byte-mode file object with a length prefix. Args: value: The Python object to serialize. fp: A file-like object supporting binary write operations. sort_keys: If True, the output of dictionaries will be sorted by key. Returns: None """ # If sorting is required and the value is a dictionary, sort it by keys if sort_keys and isinstance(value, dict): value = {key: value[key] for key in sorted(value)} packed = cbor2.dumps(value) length = length_struct.pack(len(packed)) fp.write(length) fp.write(packed) def dumps(value: Any, sort_keys: bool = False) -> bytes: """ Serialize value as cbor2 to bytes without length prefix. Args: value: The Python object to serialize. sort_keys: If True, the output of dictionaries will be sorted by key. Returns: A bytes object containing the serialized representation of the value. """ # If sorting is required and the value is a dictionary, sort it by keys if sort_keys and isinstance(value, dict): value = {key: value[key] for key in sorted(value)} return cbor2.dumps(value) def load(fp: BinaryIO) -> Any: """ Deserialize one cbor2 value from a byte-mode file object using length prefix. Args: fp: A file-like object supporting binary read operations. Returns: The deserialized Python object. """ # Read the 4-byte length prefix and determine the length of the # serialized object length = length_struct.unpack(fp.read(4))[0] # Read the serialized object using the determined length and # deserialize it return cbor2.loads(fp.read(length)) def loads(bytes_value: bytes) -> Any: """ Deserialize one cbor2 value from bytes. Args: bytes_value: The bytes object containing the serialized representation. Returns: The deserialized Python object. """ return cbor2.loads(bytes_value) persist-queue-1.0.0/persistqueue/serializers/json.py000066400000000000000000000031071464077325100227520ustar00rootroot00000000000000""" A serializer that extends json to use bytes and uses newlines to store multiple objects per file. """ import json from typing import Any, BinaryIO def dump(value: Any, fp: BinaryIO, sort_keys: bool = False) -> None: """Serialize value as json line to a byte-mode file object. Args: value: The Python object to serialize. fp: A file-like object supporting .write() in binary mode. sort_keys: If True, the output of dictionaries will be sorted by key. Returns: None """ fp.write(json.dumps(value, sort_keys=sort_keys).encode('utf-8')) fp.write(b"\n") def dumps(value: Any, sort_keys: bool = False) -> bytes: """Serialize value as json to bytes. Args: value: The Python object to serialize. sort_keys: If True, the output of dictionaries will be sorted by key. Returns: A json-encoded string converted to bytes. """ return json.dumps(value, sort_keys=sort_keys).encode('utf-8') def load(fp: BinaryIO) -> Any: """Deserialize one json line from a byte-mode file object. Args: fp: A file-like object supporting .readline() in binary mode. Returns: The deserialized Python object. """ return json.loads(fp.readline().decode('utf-8')) def loads(bytes_value: bytes) -> Any: """Deserialize one json value from bytes. Args: bytes_value: The json-encoded bytes to deserialize. Returns: The deserialized Python object. """ return json.loads(bytes_value.decode('utf-8')) persist-queue-1.0.0/persistqueue/serializers/msgpack.py000066400000000000000000000041051464077325100234250ustar00rootroot00000000000000""" A serializer that extends msgpack to specify recommended parameters and adds a 4 byte length prefix to store multiple objects per file. """ import msgpack import struct from typing import Any, BinaryIO, Dict def dump(value: Any, fp: BinaryIO, sort_keys: bool = False) -> None: """ Serialize value as msgpack to a byte-mode file object with a length prefix. Args: value: The Python object to serialize. fp: A file-like object supporting binary write operations. sort_keys: If True, the output of dictionaries will be sorted by key. Returns: None """ if sort_keys and isinstance(value, Dict): value = {key: value[key] for key in sorted(value)} packed = msgpack.packb(value, use_bin_type=True) length = struct.pack(" bytes: """ Serialize value as msgpack to bytes. Args: value: The Python object to serialize. sort_keys: If True, the output of dictionaries will be sorted by key. Returns: A bytes object containing the serialized representation of value. """ if sort_keys and isinstance(value, Dict): value = {key: value[key] for key in sorted(value)} return msgpack.packb(value, use_bin_type=True) def load(fp: BinaryIO) -> Any: """ Deserialize one msgpack value from a byte-mode file object using length prefix. Args: fp: A file-like object supporting binary read operations. Returns: The deserialized Python object. """ length = struct.unpack(" Any: """ Deserialize one msgpack value from bytes. Args: bytes_value: A bytes object containing the serialized msgpack data. Returns: The deserialized Python object. """ return msgpack.unpackb(bytes_value, use_list=False, raw=False) persist-queue-1.0.0/persistqueue/serializers/pickle.py000066400000000000000000000042541464077325100232540ustar00rootroot00000000000000"""A serializer that extends pickle to change the default protocol.""" from typing import Any, BinaryIO, Dict import pickle import logging log = logging.getLogger(__name__) # Retrieve the selected pickle protocol from a common utility module protocol = 4 # Python 3 uses protocol version 4 or higher log.info("Selected pickle protocol: '{}'".format(protocol)) def dump(value: Any, fp: BinaryIO, sort_keys: bool = False) -> None: """ Serialize value as pickle to a byte-mode file object. Args: value: The Python object to serialize. fp: A file-like object supporting binary write operations. sort_keys: If True and if the value is a dictionary, the keys will be sorted before serialization. Returns: None """ if sort_keys and isinstance(value, Dict): # Sort the dictionary by keys if sort_keys is True value = {key: value[key] for key in sorted(value)} pickle.dump(value, fp, protocol=protocol) def dumps(value: Any, sort_keys: bool = False) -> bytes: """ Serialize value as pickle to bytes. Args: value: The Python object to serialize. sort_keys: If True and if the value is a dictionary, the keys will be sorted before serialization. Returns: A bytes object containing the serialized representation of value. """ if sort_keys and isinstance(value, Dict): # Sort the dictionary by keys if sort_keys is True value = {key: value[key] for key in sorted(value)} return pickle.dumps(value, protocol=protocol) def load(fp: BinaryIO) -> Any: """ Deserialize one pickle value from a byte-mode file object. Args: fp: A file-like object supporting binary read operations. Returns: The deserialized Python object. """ return pickle.load(fp) def loads(bytes_value: bytes) -> Any: """ Deserialize one pickle value from bytes. Args: bytes_value: A bytes object containing the serialized pickle data. Returns: The deserialized Python object. """ return pickle.loads(bytes_value) persist-queue-1.0.0/persistqueue/sqlackqueue.py000066400000000000000000000320131464077325100217660ustar00rootroot00000000000000import logging import sqlite3 import time as _time import threading import warnings from typing import Any, Dict, Optional, Tuple from . import sqlbase from .exceptions import Empty sqlite3.enable_callback_tracebacks(True) log = logging.getLogger(__name__) # 10 seconds interval for `wait` of event TICK_FOR_WAIT = 10 class AckStatus: inited = '0' ready = '1' unack = '2' acked = '5' ack_failed = '9' class SQLiteAckQueue(sqlbase.SQLiteBase): """SQLite3 based FIFO queue with ack support.""" _TABLE_NAME = 'ack_queue' _KEY_COLUMN = '_id' # the name of the key column, used in DB CRUD _MAX_ACKED_LENGTH = 1000 # deprecated # SQL to create a table _SQL_CREATE = ( 'CREATE TABLE IF NOT EXISTS {table_name} (' '{key_column} INTEGER PRIMARY KEY AUTOINCREMENT, ' 'data BLOB, timestamp FLOAT, status INTEGER)' ) # SQL to insert a record _SQL_INSERT = ( 'INSERT INTO {table_name} (data, timestamp, status)' ' VALUES (?, ?, %s)' % AckStatus.inited ) # SQL to select a record _SQL_SELECT_ID = ( 'SELECT {key_column}, data, timestamp, status FROM {table_name} WHERE' ' {key_column} = {rowid}' ) _SQL_SELECT = ( 'SELECT {key_column}, data, timestamp, status FROM {table_name} ' 'WHERE {key_column} > {rowid} AND status < %s ' 'ORDER BY {key_column} ASC LIMIT 1' % AckStatus.unack ) _SQL_MARK_ACK_UPDATE = ( 'UPDATE {table_name} SET status = ? WHERE {key_column} = ?' ) _SQL_SELECT_WHERE = ( 'SELECT {key_column}, data, timestamp FROM {table_name}' ' WHERE {key_column} > {rowid} AND status < %s AND' ' {column} {op} ? ORDER BY {key_column} ASC' ' LIMIT 1 ' % AckStatus.unack ) _SQL_UPDATE = 'UPDATE {table_name} SET data = ? WHERE {key_column} = ?' def __init__(self, path: str, auto_resume: bool = True, **kwargs): super(SQLiteAckQueue, self).__init__(path, **kwargs) if not self.auto_commit: warnings.warn("disable auto commit is not supported in ack queue") self.auto_commit = True self._unack_cache = {} if auto_resume: self.resume_unack_tasks() def resume_unack_tasks(self) -> None: unack_count = self.unack_count() if unack_count: log.info("resume %d unack tasks", unack_count) sql = 'UPDATE {} set status = ? WHERE status = ?'.format( self._table_name) with self.tran_lock: with self._putter as tran: tran.execute(sql, (AckStatus.ready, AckStatus.unack,)) self.total = self._count() def put(self, item: Any) -> Optional[int]: obj = self._serializer.dumps(item) _id = self._insert_into(obj, _time.time()) self.total += 1 self.put_event.set() return _id def _init(self) -> None: super(SQLiteAckQueue, self)._init() self.action_lock = threading.Lock() self.total = self._count() def _count(self) -> int: sql = 'SELECT COUNT({}) FROM {} WHERE status < ?'.format( self._key_column, self._table_name ) row = self._getter.execute(sql, (AckStatus.unack,)).fetchone() return row[0] if row else 0 def _ack_count_via_status(self, status: str) -> int: sql = 'SELECT COUNT({}) FROM {} WHERE status = ?'.format( self._key_column, self._table_name ) row = self._getter.execute(sql, (status,)).fetchone() return row[0] if row else 0 def unack_count(self) -> int: return self._ack_count_via_status(AckStatus.unack) def acked_count(self) -> int: return self._ack_count_via_status(AckStatus.acked) def ready_count(self) -> int: return self._ack_count_via_status(AckStatus.ready) def ack_failed_count(self) -> int: return self._ack_count_via_status(AckStatus.ack_failed) @sqlbase.with_conditional_transaction def _mark_ack_status(self, key: int, status: str) -> None: return self._sql_mark_ack_status, (status, key,) @sqlbase.with_conditional_transaction def clear_acked_data( self, max_delete: int = 1000, keep_latest: int = 1000, clear_ack_failed: bool = False ) -> None: acked_clear_all = '' acked_to_delete = '' acked_to_keep = '' if self._MAX_ACKED_LENGTH != 1000 and not max_delete: # Added for backward compatibility for # those that set the _MAX_ACKED_LENGTH print( "_MAX_ACKED_LENGTH has been deprecated. " "Use clear_acked_data(keep_latest=1000, max_delete=1000)" ) keep_latest = self._MAX_ACKED_LENGTH if clear_ack_failed: acked_clear_all = 'OR status = %s' % AckStatus.ack_failed if max_delete and max_delete > 0: acked_to_delete = 'LIMIT %d' % max_delete if keep_latest and keep_latest > 0: acked_to_keep = 'OFFSET %d' % keep_latest sql = """DELETE FROM {table_name} WHERE {key_column} IN ( SELECT _id FROM {table_name} WHERE status = ? {clear_ack_failed} ORDER BY {key_column} DESC {acked_to_delete} {acked_to_keep} )""".format( table_name=self._table_name, key_column=self._key_column, acked_to_delete=acked_to_delete, acked_to_keep=acked_to_keep, clear_ack_failed=acked_clear_all, ) return sql, AckStatus.acked @property def _sql_mark_ack_status(self) -> str: return self._SQL_MARK_ACK_UPDATE.format( table_name=self._table_name, key_column=self._key_column ) def _pop(self, rowid: Optional[int] = None, next_in_order: bool = False, raw: bool = False) -> Optional[Dict[str, Any]]: with self.action_lock: row = self._select(next_in_order=next_in_order, rowid=rowid) if row and row[0] is not None: self._mark_ack_status(row[0], AckStatus.unack) serialized_data = row[1] item = self._serializer.loads(serialized_data) self._unack_cache[row[0]] = item self.total -= 1 if raw: return {'pqid': row[0], 'data': item, 'timestamp': row[2]} else: return item return None def _find_item_id(self, item: Any, search: bool = True) -> Optional[int]: if item is None: return None elif isinstance(item, dict) and "pqid" in item: return item.get("pqid") elif search: for key, value in self._unack_cache.items(): if value is item: return key elif isinstance(item, int) or ( isinstance(item, str) and item.isnumeric() ): return int(item) log.warning("Item id not Interger and can't find item in unack cache.") return None def _check_id(self, item: Any, id: Optional[int]) -> Tuple[Any, bool]: if id is not None and item is not None: raise ValueError("Specify an id or an item, not both.") elif id is None and item is None: raise ValueError("Specify an id or an item.") elif id is not None: search = False item = id else: search = True return item, search def ack(self, item: Any = None, id: Optional[int] = None) -> Optional[int]: item, search = self._check_id(item, id) with self.action_lock: _id = self._find_item_id(item, search) if _id is None: return None self._mark_ack_status(_id, AckStatus.acked) if _id in self._unack_cache: self._unack_cache.pop(_id) return _id def ack_failed(self, item: Any = None, id: Optional[int] = None) -> Optional[int]: item, search = self._check_id(item, id) with self.action_lock: _id = self._find_item_id(item, search) if _id is None: return None self._mark_ack_status(_id, AckStatus.ack_failed) if _id in self._unack_cache: self._unack_cache.pop(_id) return _id def nack(self, item: Any = None, id: Optional[int] = None) -> Optional[int]: item, search = self._check_id(item, id) with self.action_lock: _id = self._find_item_id(item, search) if _id is None: return None self._mark_ack_status(_id, AckStatus.ready) if _id in self._unack_cache: self._unack_cache.pop(_id) self.total += 1 return _id def update(self, item: Any, id: Optional[int] = None) -> Optional[int]: _id = None if isinstance(item, dict) and "pqid" in item: _id = item.get("pqid") item = item.get("data") if id is not None: _id = id if _id is None: raise ValueError("Provide an id or raw item") obj = self._serializer.dumps(item) self._update(_id, obj) return _id def get( self, block: bool = True, timeout: Optional[float] = None, id: Optional[int] = None, next_in_order: bool = False, raw: bool = False) -> Any: rowid = self._find_item_id(id, search=False) if rowid is None and next_in_order: raise ValueError( "'next_in_order' requires the preceding 'id' be specified." ) if next_in_order and not isinstance(next_in_order, bool): raise ValueError("'next_in_order' must be a boolean (True/False)") if not block: serialized = self._pop( next_in_order=next_in_order, raw=raw, rowid=rowid ) if serialized is None: raise Empty elif timeout is None: # block until a put event. serialized = self._pop( next_in_order=next_in_order, raw=raw, rowid=rowid ) while serialized is None: self.put_event.clear() self.put_event.wait(TICK_FOR_WAIT) serialized = self._pop( next_in_order=next_in_order, raw=raw, rowid=rowid ) elif timeout < 0: raise ValueError("'timeout' must be a non-negative number") else: # block until the timeout reached endtime = _time.time() + timeout serialized = self._pop( next_in_order=next_in_order, raw=raw, rowid=rowid ) while serialized is None: self.put_event.clear() remaining = endtime - _time.time() if remaining <= 0.0: raise Empty self.put_event.wait( TICK_FOR_WAIT if TICK_FOR_WAIT < remaining else remaining ) serialized = self._pop( next_in_order=next_in_order, raw=raw, rowid=rowid ) return serialized def task_done(self) -> None: """Persist the current state if auto_commit=False.""" if not self.auto_commit: self._task_done() def queue(self) -> Any: rows = self._sql_queue() datarows = [] for row in rows: item = { 'id': row[0], 'data': self._serializer.loads(row[1]), 'timestamp': row[2], 'status': row[3], } datarows.append(item) return datarows @property def size(self) -> int: return self.total def qsize(self) -> int: return max(0, self.size) def active_size(self) -> int: return max(0, self.size + len(self._unack_cache)) def empty(self) -> bool: return self.size == 0 def full(self) -> bool: return False def __len__(self) -> int: return self.size FIFOSQLiteAckQueue = SQLiteAckQueue class FILOSQLiteAckQueue(SQLiteAckQueue): """SQLite3 based FILO queue with ack support.""" _TABLE_NAME = 'ack_filo_queue' # SQL to select a record _SQL_SELECT = ( 'SELECT {key_column}, data, timestamp, status FROM {table_name} ' 'WHERE {key_column} < {rowid} and status < %s ' 'ORDER BY {key_column} DESC LIMIT 1' % AckStatus.unack ) class UniqueAckQ(SQLiteAckQueue): _TABLE_NAME = 'ack_unique_queue' _SQL_CREATE = ( 'CREATE TABLE IF NOT EXISTS {table_name} (' '{key_column} INTEGER PRIMARY KEY AUTOINCREMENT, ' 'data BLOB, timestamp FLOAT, status INTEGER, UNIQUE (data))' ) def put(self, item: Any) -> Optional[int]: obj = self._serializer.dumps(item, sort_keys=True) _id = None try: _id = self._insert_into(obj, _time.time()) except sqlite3.IntegrityError: pass else: self.total += 1 self.put_event.set() return _id persist-queue-1.0.0/persistqueue/sqlbase.py000066400000000000000000000400411464077325100210750ustar00rootroot00000000000000import logging import os import time as _time import sqlite3 import threading from typing import Any, Callable, Tuple, Optional from persistqueue.exceptions import Empty import persistqueue.serializers.pickle sqlite3.enable_callback_tracebacks(True) log = logging.getLogger(__name__) # 10 seconds interval for `wait` of event TICK_FOR_WAIT = 10 def with_conditional_transaction(func: Callable) -> Callable: def _execute(obj: 'SQLBase', *args: Any, **kwargs: Any) -> Any: # for MySQL, connection pool should be used since db connection is # basically not thread-safe _putter = obj._putter if str(type(obj)).find("MySQLQueue") > 0: # use fresh connection from pool not the shared one _putter = obj.get_pooled_conn() with obj.tran_lock: with _putter as tran: # For sqlite3, commit() is called automatically afterwards # but for other db API, this is not TRUE! stat, param = func(obj, *args, **kwargs) s = str(type(tran)) if s.find("Cursor") > 0: cur = tran cur.execute(stat, param) else: cur = tran.cursor() cur.execute(stat, param) cur.close() tran.commit() return cur.lastrowid return _execute def commit_ignore_error(conn: sqlite3.Connection) -> None: """Ignore the error of no transaction is active. The transaction may be already committed by user's task_done call. It's safe to ignore all errors of this kind. """ try: conn.commit() except sqlite3.OperationalError as ex: if 'no transaction is active' in str(ex): log.debug( 'Not able to commit the transaction, ' 'may already be committed.' ) else: raise class SQLBase(object): """SQL base class.""" """SQL base class.""" _TABLE_NAME = 'base' # DB table name _KEY_COLUMN = '' # the name of the key column, used in DB CRUD _SQL_CREATE = '' # SQL to create a table _SQL_UPDATE = '' # SQL to update a record _SQL_INSERT = '' # SQL to insert a record _SQL_SELECT = '' # SQL to select a record _SQL_SELECT_ID = '' # SQL to select a record with criteria _SQL_SELECT_WHERE = '' # SQL to select a record with criteria _SQL_DELETE = '' # SQL to delete a record def __init__(self) -> None: self._serializer = persistqueue.serializers.pickle self.auto_commit = True # Transaction commit behavior # SQL transaction lock self.tran_lock = threading.Lock() # Event signaling new data self.put_event = threading.Event() # Lock for atomic actions self.action_lock = threading.Lock() self.total = 0 # Total tasks self.cursor = 0 # Cursor for task processing # Connection for getting tasks self._getter = None # Connection for putting tasks self._putter = None @with_conditional_transaction def _insert_into(self, *record: Any) -> Tuple[str, Tuple[Any, ...]]: return self._sql_insert, record @with_conditional_transaction def _update(self, key: Any, *args: Any) -> Tuple[str, Tuple[Any, ...]]: args = list(args) + [key] return self._sql_update, args @with_conditional_transaction def _delete(self, key: Any, op: str = '=') -> Tuple[str, Tuple[Any, ...]]: sql = self._SQL_DELETE.format( table_name=self._table_name, key_column=self._key_column, op=op) return sql, (key,) def _pop(self, rowid: Optional[int] = None, raw: bool = False ) -> Optional[Any]: with self.action_lock: if self.auto_commit: row = self._select(rowid=rowid) # Perhaps a sqlite3 bug, sometimes (None, None) is returned # by select, below can avoid these invalid records. if row and row[0] is not None: self._delete(row[0]) self.total -= 1 item = self._serializer.loads(row[1]) if raw: return { 'pqid': row[0], 'data': item, 'timestamp': row[2], } else: return item else: row = self._select( self.cursor, op=">", column=self._KEY_COLUMN, rowid=rowid ) if row and row[0] is not None: self.cursor = row[0] self.total -= 1 item = self._serializer.loads(row[1]) if raw: return { 'pqid': row[0], 'data': item, 'timestamp': row[2], } else: return item return None def update(self, item: Any, id: Optional[int] = None) -> int: if isinstance(item, dict) and "pqid" in item: _id = item.get("pqid") item = item.get("data") if id is not None: _id = id if _id is None: raise ValueError("Provide an id or raw item") obj = self._serializer.dumps(item) self._update(_id, obj) return _id def get(self, block: bool = True, timeout: Optional[float] = None, id: Optional[int] = None, raw: bool = False ) -> Any: if isinstance(id, dict) and "pqid" in id: rowid = id.get("pqid") elif isinstance(id, int): rowid = id else: rowid = None if not block: serialized = self._pop(raw=raw, rowid=rowid) if serialized is None: raise Empty elif timeout is None: # block until a put event. serialized = self._pop(raw=raw, rowid=rowid) while serialized is None: self.put_event.clear() self.put_event.wait(TICK_FOR_WAIT) serialized = self._pop(raw=raw, rowid=rowid) elif timeout < 0: raise ValueError("'timeout' must be a non-negative number") else: # block until the timeout reached endtime = _time.time() + timeout serialized = self._pop(raw=raw, rowid=rowid) while serialized is None: self.put_event.clear() remaining = endtime - _time.time() if remaining <= 0.0: raise Empty self.put_event.wait( TICK_FOR_WAIT if TICK_FOR_WAIT < remaining else remaining ) serialized = self._pop(raw=raw, rowid=rowid) return serialized def get_nowait(self, id: Optional[int] = None, raw: bool = False) -> Any: return self.get(block=False, id=id, raw=raw) def task_done(self) -> None: """Persist the current state if auto_commit=False.""" if not self.auto_commit: self._delete(self.cursor, op='<=') self._task_done() def queue(self) -> Any: rows = self._sql_queue().fetchall() datarows = [] for row in rows: item = { 'id': row[0], 'data': self._serializer.loads(row[1]), 'timestamp': row[2], } datarows.append(item) return datarows @with_conditional_transaction def shrink_disk_usage(self) -> Tuple[str, Tuple[()]]: sql = """VACUUM""" return sql, () @property def size(self) -> int: return self.total def qsize(self) -> int: return max(0, self.size) def empty(self) -> bool: return self.size == 0 def full(self) -> bool: return False def __len__(self) -> int: return self.size def _select(self, *args, **kwargs) -> Any: start_key = self._start_key() op = kwargs.get('op', None) column = kwargs.get('column', None) next_in_order = kwargs.get('next_in_order', False) rowid = kwargs.get('rowid') if kwargs.get('rowid', None) else start_key if not next_in_order and rowid != start_key: # Get the record by the id result = self._getter.execute( self._sql_select_id(rowid), args ).fetchone() elif op and column: # Get the next record with criteria rowid = rowid if next_in_order else start_key result = self._getter.execute( self._sql_select_where(rowid, op, column), args ).fetchone() else: # Get the next record rowid = rowid if next_in_order else start_key result = self._getter.execute( self._sql_select(rowid), args ).fetchone() if ( next_in_order and rowid != start_key and (not result or len(result) == 0) ): # sqlackqueue: if we're at the end, start over kwargs['rowid'] = start_key result = self._select(*args, **kwargs) return result def _count(self) -> int: sql = 'SELECT COUNT({}) FROM {}'.format( self._key_column, self._table_name ) row = self._getter.execute(sql).fetchone() return row[0] if row else 0 def _start_key(self) -> int: if self._TABLE_NAME == 'ack_filo_queue': return 9223372036854775807 # maxsize else: return 0 def _task_done(self) -> None: """Only required if auto-commit is set as False.""" commit_ignore_error(self._putter) def _sql_queue(self) -> Any: sql = 'SELECT * FROM {}'.format(self._table_name) return self._getter.execute(sql) @property def _table_name(self) -> str: return '`{}_{}`'.format(self._TABLE_NAME, self.name) @property def _key_column(self) -> str: return self._KEY_COLUMN @property def _sql_create(self) -> str: return self._SQL_CREATE.format( table_name=self._table_name, key_column=self._key_column ) @property def _sql_insert(self) -> str: return self._SQL_INSERT.format( table_name=self._table_name, key_column=self._key_column ) @property def _sql_update(self) -> str: return self._SQL_UPDATE.format( table_name=self._table_name, key_column=self._key_column ) def _sql_select_id(self, rowid) -> str: return self._SQL_SELECT_ID.format( table_name=self._table_name, key_column=self._key_column, rowid=rowid, ) def _sql_select(self, rowid) -> str: return self._SQL_SELECT.format( table_name=self._table_name, key_column=self._key_column, rowid=rowid, ) def _sql_select_where(self, rowid, op, column) -> str: return self._SQL_SELECT_WHERE.format( table_name=self._table_name, key_column=self._key_column, rowid=rowid, op=op, column=column, ) def __del__(self) -> None: """Handles sqlite connection when queue was deleted""" if self._getter: self._getter.close() if self._putter: self._putter.close() class SQLiteBase(SQLBase): """SQLite3 base class.""" _TABLE_NAME = 'base' # DB table name _KEY_COLUMN = '' # the name of the key column, used in DB CRUD _SQL_CREATE = '' # SQL to create a table _SQL_UPDATE = '' # SQL to update a record _SQL_INSERT = '' # SQL to insert a record _SQL_SELECT = '' # SQL to select a record _SQL_SELECT_ID = '' # SQL to select a record with criteria _SQL_SELECT_WHERE = '' # SQL to select a record with criteria _SQL_DELETE = '' # SQL to delete a record _MEMORY = ':memory:' # flag indicating store DB in memory def __init__(self, path: str, name: str = 'default', multithreading: bool = False, timeout: float = 10.0, auto_commit: bool = True, serializer: Any = persistqueue.serializers.pickle, db_file_name: Optional[str] = None) -> None: """Initiate a queue in sqlite3 or memory. :param path: path for storing DB file. :param name: the suffix for the table name, table name would be ${_TABLE_NAME}_${name} :param multithreading: if set to True, two db connections will be, one for **put** and one for **get**. :param timeout: timeout in second waiting for the database lock. :param auto_commit: Set to True, if commit is required on every INSERT/UPDATE action, otherwise False, whereas a **task_done** is required to persist changes after **put**. :param serializer: The serializer parameter controls how enqueued data is serialized. It must have methods dump(value, fp) and load(fp). The dump method must serialize the value and write it to fp, and may be called for multiple values with the same fp. The load method must deserialize and return one value from fp, and may be called multiple times with the same fp to read multiple values. :param db_file_name: set the db file name of the queue data, otherwise default to `data.db` """ super(SQLiteBase, self).__init__() self.memory_sql = False self.path = path self.name = name self.timeout = timeout self.multithreading = multithreading self.auto_commit = auto_commit self._serializer = serializer self.db_file_name = "data.db" if db_file_name: self.db_file_name = db_file_name self._init() def _init(self) -> None: """Initialize the tables in DB.""" if self.path == self._MEMORY: self.memory_sql = True log.debug("Initializing Sqlite3 Queue in memory.") elif not os.path.exists(self.path): os.makedirs(self.path) log.debug( 'Initializing Sqlite3 Queue with path {}'.format(self.path) ) self._conn = self._new_db_connection( self.path, self.multithreading, self.timeout ) self._getter = self._conn self._putter = self._conn self._conn.execute(self._sql_create) self._conn.commit() # Setup another session only for disk-based queue. if self.multithreading: if not self.memory_sql: self._putter = self._new_db_connection( self.path, self.multithreading, self.timeout ) self._conn.text_factory = str self._putter.text_factory = str # SQLite3 transaction lock self.tran_lock = threading.Lock() self.put_event = threading.Event() def _new_db_connection(self, path, multithreading, timeout ) -> sqlite3.Connection: conn = None if path == self._MEMORY: conn = sqlite3.connect(path, check_same_thread=not multithreading) else: conn = sqlite3.connect( '{}/{}'.format(path, self.db_file_name), timeout=timeout, check_same_thread=not multithreading, ) conn.execute('PRAGMA journal_mode=WAL;') return conn def close(self) -> None: """Closes sqlite connections""" if self._getter is not None: self._getter.close() if self._putter is not None: self._putter.close() def __del__(self) -> None: """Handles sqlite connection when queue was deleted""" self.close() persist-queue-1.0.0/persistqueue/sqlqueue.py000066400000000000000000000056061464077325100213170ustar00rootroot00000000000000"""A thread-safe sqlite3 based persistent queue in Python.""" import logging import sqlite3 import time as _time import threading from typing import Any from persistqueue import sqlbase sqlite3.enable_callback_tracebacks(True) log = logging.getLogger(__name__) class SQLiteQueue(sqlbase.SQLiteBase): """SQLite3 based FIFO queue.""" _TABLE_NAME = 'queue' _KEY_COLUMN = '_id' # the name of the key column, used in DB CRUD # SQL to create a table _SQL_CREATE = ( 'CREATE TABLE IF NOT EXISTS {table_name} (' '{key_column} INTEGER PRIMARY KEY AUTOINCREMENT, ' 'data BLOB, timestamp FLOAT)' ) # SQL to insert a record _SQL_INSERT = 'INSERT INTO {table_name} (data, timestamp) VALUES (?, ?)' # SQL to select a record _SQL_SELECT_ID = ( 'SELECT {key_column}, data, timestamp FROM {table_name} WHERE' ' {key_column} = {rowid}' ) _SQL_SELECT = ( 'SELECT {key_column}, data, timestamp FROM {table_name} ' 'ORDER BY {key_column} ASC LIMIT 1' ) _SQL_SELECT_WHERE = ( 'SELECT {key_column}, data, timestamp FROM {table_name} WHERE' ' {column} {op} ? ORDER BY {key_column} ASC LIMIT 1 ' ) _SQL_UPDATE = 'UPDATE {table_name} SET data = ? WHERE {key_column} = ?' _SQL_DELETE = 'DELETE FROM {table_name} WHERE {key_column} {op} ?' def put(self, item: Any, block: bool = True) -> int: # block kwarg is noop and only here to align with python's queue obj = self._serializer.dumps(item) _id = self._insert_into(obj, _time.time()) self.total += 1 self.put_event.set() return _id def put_nowait(self, item: Any) -> int: return self.put(item, block=False) def _init(self) -> None: super(SQLiteQueue, self)._init() self.action_lock = threading.Lock() if not self.auto_commit: head = self._select() if head: self.cursor = head[0] - 1 else: self.cursor = 0 self.total = self._count() FIFOSQLiteQueue = SQLiteQueue class FILOSQLiteQueue(SQLiteQueue): """SQLite3 based FILO queue.""" _TABLE_NAME = 'filo_queue' # SQL to select a record _SQL_SELECT = ( 'SELECT {key_column}, data FROM {table_name} ' 'ORDER BY {key_column} DESC LIMIT 1' ) class UniqueQ(SQLiteQueue): _TABLE_NAME = 'unique_queue' _SQL_CREATE = ( 'CREATE TABLE IF NOT EXISTS {table_name} (' '{key_column} INTEGER PRIMARY KEY AUTOINCREMENT, ' 'data BLOB, timestamp FLOAT, UNIQUE (data))' ) def put(self, item: Any) -> Any: obj = self._serializer.dumps(item, sort_keys=True) _id = None try: _id = self._insert_into(obj, _time.time()) except sqlite3.IntegrityError: pass else: self.total += 1 self.put_event.set() return _id persist-queue-1.0.0/persistqueue/tests/000077500000000000000000000000001464077325100202345ustar00rootroot00000000000000persist-queue-1.0.0/persistqueue/tests/__init__.py000066400000000000000000000000001464077325100223330ustar00rootroot00000000000000persist-queue-1.0.0/persistqueue/tests/test_mysqlqueue.py000066400000000000000000000220241464077325100240570ustar00rootroot00000000000000# coding=utf-8 import unittest import random from threading import Thread import time import sys from persistqueue.mysqlqueue import MySQLQueue from persistqueue import Empty # db config aligned with .circleci/config.yml db_conf = { "host": "127.0.0.1", "user": "user", "passwd": "passw0rd", "db_name": "testqueue", # "name": "", "port": 3306 } # for appveyor (windows ci), not able to config use the default # https://www.appveyor.com/docs/services-databases/#mysql if sys.platform.startswith('win32'): db_conf = { "host": "127.0.0.1", "user": "root", "passwd": "Password12!", "db_name": "testqueue", # "name": "", "port": 3306 } class MySQLQueueTest(unittest.TestCase): """tests that focus on feature specific to mysql""" def setUp(self): _name = self.id().split(".")[-1:] _name.append(str(time.time())) self._table_name = ".".join(_name) self.queue_class = MySQLQueue self.mysql_queue = MySQLQueue(name=self._table_name, **db_conf) self.queue = self.mysql_queue def tearDown(self): pass tmp_conn = self.mysql_queue.get_pooled_conn() tmp_conn.cursor().execute( "drop table if exists %s" % self.mysql_queue._table_name) tmp_conn.commit() def test_raise_empty(self): q = self.queue q.put('first') d = q.get() self.assertEqual('first', d) self.assertRaises(Empty, q.get, block=False) self.assertRaises(Empty, q.get_nowait) # assert with timeout self.assertRaises(Empty, q.get, block=True, timeout=1.0) # assert with negative timeout self.assertRaises(ValueError, q.get, block=True, timeout=-1.0) del q def test_empty(self): q = self.queue self.assertEqual(q.empty(), True) q.put('first') self.assertEqual(q.empty(), False) q.get() self.assertEqual(q.empty(), True) def test_full(self): # SQL queue `full()` always returns `False` !! q = self.queue self.assertEqual(q.full(), False) q.put('first') self.assertEqual(q.full(), False) q.get() self.assertEqual(q.full(), False) def test_open_close_single(self): """Write 1 item, close, reopen checking if same item is there""" q = self.queue q.put(b'var1') del q q = MySQLQueue(name=self._table_name, **db_conf) self.assertEqual(1, q.qsize()) self.assertEqual(b'var1', q.get()) def test_open_close_1000(self): """Write 1000 items, close, reopen checking if all items are there""" q = self.queue for i in range(1000): q.put('var%d' % i) self.assertEqual(1000, q.qsize()) del q q = MySQLQueue(name=self._table_name, **db_conf) self.assertEqual(1000, q.qsize()) for i in range(1000): data = q.get() self.assertEqual('var%d' % i, data) # assert adding another one still works q.put('foobar') data = q.get() self.assertEqual('foobar', data) def test_random_read_write(self): """Test random read/write""" q = self.queue n = 0 for _ in range(1000): if random.random() < 0.5: if n > 0: q.get() n -= 1 else: self.assertRaises(Empty, q.get, block=False) else: q.put('var%d' % random.getrandbits(16)) n += 1 def test_multi_threaded_parallel(self): """Create consumer and producer threads, check parallelism""" m_queue = self.queue def producer(): for i in range(1000): m_queue.put('var%d' % i) def consumer(): for i in range(1000): x = m_queue.get(block=True) self.assertEqual('var%d' % i, x) c = Thread(target=consumer) c.start() p = Thread(target=producer) p.start() p.join() c.join() self.assertEqual(0, m_queue.size) self.assertEqual(0, len(m_queue)) self.assertRaises(Empty, m_queue.get, block=False) def test_multi_threaded_multi_producer(self): """Test mysqlqueue can be used by multiple producers.""" queue = self.queue def producer(seq): for i in range(10): queue.put('var%d' % (i + (seq * 10))) def consumer(): for _ in range(100): data = queue.get(block=True) self.assertTrue('var' in data) c = Thread(target=consumer) c.start() producers = [] for seq in range(10): t = Thread(target=producer, args=(seq,)) t.start() producers.append(t) for t in producers: t.join() c.join() def test_multiple_consumers(self): """Test mysqlqueue can be used by multiple consumers.""" queue = self.queue def producer(): for x in range(1000): queue.put('var%d' % x) counter = [] # Set all to 0 for _ in range(1000): counter.append(0) def consumer(t_index): for i in range(200): data = queue.get(block=True) self.assertTrue('var' in data) counter[t_index * 200 + i] = data p = Thread(target=producer) p.start() consumers = [] for index in range(5): t = Thread(target=consumer, args=(index,)) t.start() consumers.append(t) p.join() for t in consumers: t.join() self.assertEqual(0, queue.qsize()) for x in range(1000): self.assertNotEqual(0, counter[x], "not 0 for counter's index %s" % x) self.assertEqual(len(set(counter)), len(counter)) def test_task_done_with_restart(self): """Test that items are not deleted before task_done.""" q = self.queue for i in range(1, 11): q.put(i) self.assertEqual(1, q.get()) self.assertEqual(2, q.get()) # size is correct before task_done self.assertEqual(8, q.qsize()) q.task_done() # make sure the size still correct self.assertEqual(8, q.qsize()) self.assertEqual(3, q.get()) # without task done del q q = MySQLQueue(name=self._table_name, **db_conf) # After restart, the qsize and head item are the same self.assertEqual(7, q.qsize()) # After restart, the queue still works self.assertEqual(4, q.get()) self.assertEqual(6, q.qsize()) # auto_commit=False del q q = MySQLQueue(name=self._table_name, auto_commit=False, **db_conf) self.assertEqual(6, q.qsize()) # After restart, the queue still works self.assertEqual(5, q.get()) self.assertEqual(5, q.qsize()) del q q = MySQLQueue(name=self._table_name, auto_commit=False, **db_conf) # After restart, the queue still works self.assertEqual(5, q.get()) self.assertEqual(5, q.qsize()) def test_protocol_1(self): q = self.queue self.assertEqual(q._serializer.protocol, 2 if sys.version_info[0] == 2 else 4) def test_protocol_2(self): q = self.queue self.assertEqual(q._serializer.protocol, 2 if sys.version_info[0] == 2 else 4) def test_json_serializer(self): q = self.queue x = dict( a=1, b=2, c=dict( d=list(range(5)), e=[1] )) q.put(x) self.assertEqual(q.get(), x) def test_put_0(self): q = self.queue q.put(0) d = q.get(block=False) self.assertIsNotNone(d) def test_get_id(self): q = self.queue q.put("val1") val2_id = q.put("val2") q.put("val3") item = q.get(id=val2_id) # item id should be 2 self.assertEqual(val2_id, 2) # item should get val2 self.assertEqual(item, 'val2') def test_get_raw(self): q = self.queue q.put("val1") item = q.get(raw=True) # item should get val2 self.assertEqual(True, "pqid" in item) self.assertEqual(item.get("data"), 'val1') def test_queue(self): q = self.queue q.put("val1") q.put("val2") q.put("val3") # queue should get the three items d = q.queue() self.assertEqual(len(d), 3) self.assertEqual(d[1].get("data"), "val2") def test_update(self): q = self.queue qid = q.put("val1") q.update(item="val2", id=qid) item = q.get(id=qid) self.assertEqual(item, "val2") persist-queue-1.0.0/persistqueue/tests/test_pdict.py000066400000000000000000000052371464077325100227570ustar00rootroot00000000000000 import shutil import tempfile import unittest from persistqueue import pdict class PDictTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='pdict') def tearDown(self): shutil.rmtree(self.path, ignore_errors=True) def test_unsupported(self): pd = pdict.PDict(self.path, 'pd') pd['key_a'] = 'value_a' self.assertRaises(NotImplementedError, pd.keys) self.assertRaises(NotImplementedError, pd.iterkeys) self.assertRaises(NotImplementedError, pd.values) self.assertRaises(NotImplementedError, pd.itervalues) self.assertRaises(NotImplementedError, pd.items) self.assertRaises(NotImplementedError, pd.iteritems) def _for(): for _ in pd: pass self.assertRaises(NotImplementedError, _for) def test_add(self): pd = pdict.PDict(self.path, 'pd') pd['key_a'] = 'value_a' self.assertEqual(pd['key_a'], 'value_a') self.assertTrue('key_a' in pd) self.assertFalse('key_b' in pd) self.assertEqual(pd.get('key_a'), 'value_a') self.assertEqual(pd.get('key_b'), None) self.assertEqual(pd.get('key_b', 'absent'), 'absent') self.assertRaises(KeyError, lambda: pd['key_b']) pd['key_b'] = 'value_b' self.assertEqual(pd['key_a'], 'value_a') self.assertEqual(pd['key_b'], 'value_b') def test_set(self): pd = pdict.PDict(self.path, 'pd') pd['key_a'] = 'value_a' pd['key_b'] = 'value_b' self.assertEqual(pd['key_a'], 'value_a') self.assertEqual(pd['key_b'], 'value_b') self.assertEqual(pd.get('key_a'), 'value_a') self.assertEqual(pd.get('key_b', 'absent'), 'value_b') pd['key_a'] = 'value_aaaaaa' self.assertEqual(pd['key_a'], 'value_aaaaaa') self.assertEqual(pd['key_b'], 'value_b') def test_delete(self): pd = pdict.PDict(self.path, 'pd') pd['key_a'] = 'value_a' pd['key_b'] = 'value_b' self.assertEqual(pd['key_a'], 'value_a') self.assertEqual(pd['key_b'], 'value_b') del pd['key_a'] self.assertFalse('key_a' in pd) self.assertRaises(KeyError, lambda: pd['key_a']) self.assertEqual(pd['key_b'], 'value_b') def test_two_dicts(self): pd_1 = pdict.PDict(self.path, '1') pd_2 = pdict.PDict(self.path, '2') pd_1['key_a'] = 'value_a' pd_2['key_b'] = 'value_b' self.assertEqual(pd_1['key_a'], 'value_a') self.assertEqual(pd_2['key_b'], 'value_b') self.assertRaises(KeyError, lambda: pd_1['key_b']) self.assertRaises(KeyError, lambda: pd_2['key_a']) persist-queue-1.0.0/persistqueue/tests/test_queue.py000066400000000000000000000250411464077325100227730ustar00rootroot00000000000000# coding=utf-8 import os import pickle import random import shutil import sys import tempfile import unittest from collections import namedtuple from nose2.tools import params from threading import Thread from persistqueue.serializers import json as serializers_json from persistqueue.serializers import pickle as serializers_pickle from persistqueue.serializers import msgpack as serializers_msgpack from persistqueue.serializers import cbor2 as serializers_cbor2 from persistqueue import Queue, Empty, Full # map keys as params for readable errors from nose serializer_params = { "serializer=default": {}, "serializer=json": {"serializer": serializers_json}, "serializer=msgpack": {"serializer": serializers_msgpack}, "serializer=cbor2": {"serializer": serializers_cbor2}, "serializer=pickle": {"serializer": serializers_pickle}, } class PersistTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='queue') def tearDown(self): shutil.rmtree(self.path, ignore_errors=True) @params(*serializer_params) def test_open_close_single(self, serializer): """Write 1 item, close, reopen checking if same item is there""" q = Queue(self.path, **serializer_params[serializer]) q.put('var1') del q q = Queue(self.path, **serializer_params[serializer]) self.assertEqual(1, q.qsize()) self.assertEqual('var1', q.get()) q.task_done() del q def test_empty(self): q = Queue(self.path) self.assertEqual(q.empty(), True) q.put('var1') self.assertEqual(q.empty(), False) q.get() self.assertEqual(q.empty(), True) def test_full(self): q = Queue(self.path, maxsize=3) for i in range(1, q.maxsize): q.put('var{}'.format(i)) self.assertEqual(q.full(), False) q.put('var{}'.format(q.maxsize)) self.assertEqual(q.full(), True) q.get() self.assertEqual(q.full(), False) @params(*serializer_params) def test_open_close_1000(self, serializer): """Write 1000 items, close, reopen checking if all items are there""" q = Queue(self.path, **serializer_params[serializer]) for i in range(1000): q.put('var%d' % i) self.assertEqual(1000, q.qsize()) del q q = Queue(self.path, **serializer_params[serializer]) self.assertEqual(1000, q.qsize()) for i in range(1000): data = q.get() self.assertEqual('var%d' % i, data) q.task_done() with self.assertRaises(Empty): q.get_nowait() # assert adding another one still works q.put('foobar') data = q.get() @params(*serializer_params) def test_partial_write(self, serializer): """Test recovery from previous crash w/ partial write""" q = Queue(self.path, **serializer_params[serializer]) for i in range(100): q.put('var%d' % i) del q with open(os.path.join(self.path, 'q00000'), 'ab') as f: pickle.dump('文字化け', f) q = Queue(self.path, **serializer_params[serializer]) self.assertEqual(100, q.qsize()) for i in range(100): self.assertEqual('var%d' % i, q.get()) q.task_done() with self.assertRaises(Empty): q.get_nowait() @params(*serializer_params) def test_random_read_write(self, serializer): """Test random read/write""" q = Queue(self.path, **serializer_params[serializer]) n = 0 for i in range(1000): if random.random() < 0.5: if n > 0: q.get_nowait() q.task_done() n -= 1 else: with self.assertRaises(Empty): q.get_nowait() else: q.put('var%d' % random.getrandbits(16)) n += 1 @params(*serializer_params) def test_multi_threaded(self, serializer): """Create consumer and producer threads, check parallelism""" q = Queue(self.path, **serializer_params[serializer]) def producer(): for i in range(1000): q.put('var%d' % i) def consumer(): for i in range(1000): q.get() q.task_done() c = Thread(target=consumer) c.start() p = Thread(target=producer) p.start() c.join() p.join() q.join() with self.assertRaises(Empty): q.get_nowait() @params(*serializer_params) def test_garbage_on_head(self, serializer): """Adds garbage to the queue head and let the internal integrity checks fix it""" q = Queue(self.path, **serializer_params[serializer]) q.put('var1') del q with open(os.path.join(self.path, 'q00000'), 'ab') as f: f.write(b'garbage') q = Queue(self.path, **serializer_params[serializer]) q.put('var2') self.assertEqual(2, q.qsize()) self.assertEqual('var1', q.get()) q.task_done() @params(*serializer_params) def test_task_done_too_many_times(self, serializer): """Test too many task_done called.""" q = Queue(self.path, **serializer_params[serializer]) q.put('var1') q.get() q.task_done() with self.assertRaises(ValueError): q.task_done() @params(*serializer_params) def test_get_timeout_negative(self, serializer): q = Queue(self.path, **serializer_params[serializer]) q.put('var1') with self.assertRaises(ValueError): q.get(timeout=-1) @params(*serializer_params) def test_get_timeout(self, serializer): """Test when get failed within timeout.""" q = Queue(self.path, **serializer_params[serializer]) q.put('var1') q.get() with self.assertRaises(Empty): q.get(timeout=1) @params(*serializer_params) def test_put_nowait(self, serializer): """Tests the put_nowait interface.""" q = Queue(self.path, **serializer_params[serializer]) q.put_nowait('var1') self.assertEqual('var1', q.get()) q.task_done() @params(*serializer_params) def test_put_maxsize_reached(self, serializer): """Test that maxsize reached.""" q = Queue(self.path, maxsize=10, **serializer_params[serializer]) for x in range(10): q.put(x) with self.assertRaises(Full): q.put('full_now', block=False) @params(*serializer_params) def test_put_timeout_reached(self, serializer): """Test put with block and timeout.""" q = Queue(self.path, maxsize=2, **serializer_params[serializer]) for x in range(2): q.put(x) with self.assertRaises(Full): q.put('full_and_timeout', block=True, timeout=1) @params(*serializer_params) def test_put_timeout_negative(self, serializer): """Test and put with timeout < 0""" q = Queue(self.path, maxsize=1, **serializer_params[serializer]) with self.assertRaises(ValueError): q.put('var1', timeout=-1) @params(*serializer_params) def test_put_block_and_wait(self, serializer): """Test block until queue is not full.""" q = Queue(self.path, maxsize=10, **serializer_params[serializer]) def consumer(): for i in range(5): q.get() q.task_done() def producer(): for j in range(16): q.put('var%d' % j) p = Thread(target=producer) p.start() c = Thread(target=consumer) c.start() c.join() val = q.get_nowait() p.join() self.assertEqual('var5', val) @params(*serializer_params) def test_clear_tail_file(self, serializer): """Test that only remove tail file when calling task_done.""" q = Queue(self.path, chunksize=10, **serializer_params[serializer]) for i in range(35): q.put('var%d' % i) for _ in range(15): q.get() q = Queue(self.path, chunksize=10, **serializer_params[serializer]) self.assertEqual(q.qsize(), 35) for _ in range(15): q.get() # the first tail file gets removed after task_done q.task_done() for _ in range(16): q.get() # the second and third files get removed after task_done q.task_done() self.assertEqual(q.qsize(), 4) def test_protocol(self): # test that protocol is set properly expect_protocol = 2 if sys.version_info[0] == 2 else 4 self.assertEqual( serializers_pickle.protocol, expect_protocol, ) # test that protocol is used properly serializer = namedtuple("Serializer", ["dump", "load"])( serializers_pickle.dump, lambda fp: fp.read()) q = Queue(path=self.path, serializer=serializer) q.put(b'a') self.assertEqual(q.get(), pickle.dumps(b'a', protocol=expect_protocol)) @params(*serializer_params) def test_del(self, serializer): """test that __del__ can be called successfully""" q = Queue(self.path, **serializer_params[serializer]) q.__del__() self.assertTrue(q.headf.closed) self.assertTrue(q.tailf.closed) @params(*serializer_params) def test_autosave_get(self, serializer): """test the autosave feature saves on get()""" q = Queue(self.path, autosave=True, **serializer_params[serializer]) q.put('var1') q.put('var2') self.assertEqual('var1', q.get()) del q # queue should save on get(), only one item should remain q = Queue(self.path, autosave=True, **serializer_params[serializer]) self.assertEqual(1, q.qsize()) self.assertEqual('var2', q.get()) del q @params(*serializer_params) def test_autosave_join(self, serializer): """Enabling autosave should still allow task_done/join behavior""" q = Queue(self.path, autosave=True, **serializer_params[serializer]) for i in range(10): q.put('var%d' % i) def consumer(): for i in range(10): q.get() # this should still 'count down' properly and allow q.join() # to finish q.task_done() c = Thread(target=consumer) c.start() q.join() with self.assertRaises(Empty): q.get_nowait() persist-queue-1.0.0/persistqueue/tests/test_sqlackqueue.py000066400000000000000000000374221464077325100242000ustar00rootroot00000000000000# coding=utf-8 import random import shutil import sys import tempfile import unittest from threading import Thread import uuid from persistqueue.sqlackqueue import ( SQLiteAckQueue, FILOSQLiteAckQueue, UniqueAckQ, ) from persistqueue import Empty class SQLite3AckQueueTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='sqlackqueue') self.auto_commit = True self.queue_class = SQLiteAckQueue def tearDown(self): shutil.rmtree(self.path, ignore_errors=True) def test_raise_empty(self): q = self.queue_class(self.path, auto_commit=self.auto_commit) q.put('first') d = q.get() self.assertEqual('first', d) self.assertRaises(Empty, q.get, block=False) # assert with timeout self.assertRaises(Empty, q.get, block=True, timeout=1.0) # assert with negative timeout self.assertRaises(ValueError, q.get, block=True, timeout=-1.0) def test_empty(self): q = self.queue_class(self.path, auto_commit=self.auto_commit) self.assertEqual(q.empty(), True) q.put('first') self.assertEqual(q.empty(), False) q.get() self.assertEqual(q.empty(), True) def test_full(self): # SQL queue `full()` always returns `False` !! q = self.queue_class(self.path, auto_commit=self.auto_commit) self.assertEqual(q.full(), False) q.put('first') self.assertEqual(q.full(), False) q.get() self.assertEqual(q.full(), False) def test_open_close_single(self): """Write 1 item, close, reopen checking if same item is there""" q = self.queue_class(self.path, auto_commit=self.auto_commit) q.put(b'var1') del q q = self.queue_class(self.path) self.assertEqual(1, q.qsize()) self.assertEqual(b'var1', q.get()) def test_open_close_1000(self): """Write 1000 items, close, reopen checking if all items are there""" q = self.queue_class(self.path, auto_commit=self.auto_commit) for i in range(1000): q.put('var%d' % i) self.assertEqual(1000, q.qsize()) del q q = self.queue_class(self.path) self.assertEqual(1000, q.qsize()) for i in range(1000): data = q.get() self.assertEqual('var%d' % i, data) # assert adding another one still works q.put('foobar') data = q.get() q.shrink_disk_usage() self.assertEqual('foobar', data) def test_random_read_write(self): """Test random read/write""" q = self.queue_class(self.path, auto_commit=self.auto_commit) n = 0 for _ in range(1000): if random.random() < 0.5: if n > 0: q.get() n -= 1 else: self.assertRaises(Empty, q.get, block=False) else: # UniqueQueue will block at get() if this is not unique # uuid.uuid4() should be unique q.put('var%s' % uuid.uuid4()) n += 1 def test_multi_threaded_parallel(self): """Create consumer and producer threads, check parallelism""" # self.skipTest("Not supported multi-thread.") m_queue = self.queue_class( path=self.path, multithreading=True, auto_commit=self.auto_commit ) def producer(): for i in range(1000): m_queue.put('var%d' % i) def consumer(): for i in range(1000): x = m_queue.get(block=True) self.assertEqual('var%d' % i, x) c = Thread(target=consumer) c.start() p = Thread(target=producer) p.start() p.join() c.join() self.assertEqual(0, m_queue.size) self.assertEqual(0, len(m_queue)) self.assertRaises(Empty, m_queue.get, block=False) def test_multi_threaded_multi_producer(self): """Test sqlqueue can be used by multiple producers.""" queue = self.queue_class( path=self.path, multithreading=True, auto_commit=self.auto_commit ) def producer(seq): for i in range(10): queue.put('var%d' % (i + (seq * 10))) def consumer(): for _ in range(100): data = queue.get(block=True) self.assertTrue('var' in data) c = Thread(target=consumer) c.start() producers = [] for seq in range(10): t = Thread(target=producer, args=(seq,)) t.start() producers.append(t) for t in producers: t.join() c.join() def test_multiple_consumers(self): """Test sqlqueue can be used by multiple consumers.""" queue = self.queue_class( path=self.path, multithreading=True, auto_commit=self.auto_commit ) def producer(): for x in range(1000): queue.put('var%d' % x) counter = [] # Set all to 0 for _ in range(1000): counter.append(0) def consumer(index): for i in range(200): data = queue.get(block=True) self.assertTrue('var' in data) counter[index * 200 + i] = data p = Thread(target=producer) p.start() consumers = [] for index in range(5): t = Thread(target=consumer, args=(index,)) t.start() consumers.append(t) p.join() for t in consumers: t.join() self.assertEqual(0, queue.qsize()) for x in range(1000): self.assertNotEqual( 0, counter[x], "not 0 for counter's index %s" % x ) def test_protocol_1(self): shutil.rmtree(self.path, ignore_errors=True) q = self.queue_class(path=self.path) self.assertEqual( q._serializer.protocol, 2 if sys.version_info[0] == 2 else 4 ) def test_protocol_2(self): q = self.queue_class(path=self.path) self.assertEqual( q._serializer.protocol, 2 if sys.version_info[0] == 2 else 4 ) def test_ack_and_clear(self): q = self.queue_class(path=self.path) ret_list = [] for _ in range(100): q.put("val%s" % _) for _ in range(100): ret_list.append(q.get()) for ret in ret_list: q.ack(ret) self.assertEqual(q.acked_count(), 100) q.clear_acked_data(keep_latest=10) self.assertEqual(q.acked_count(), 10) q.shrink_disk_usage() def test_ack_unknown_item(self): q = self.queue_class(path=self.path) q.put("val1") val1 = q.get() q.ack("val2") q.nack("val3") q.ack_failed("val4") self.assertEqual(q.qsize(), 0) self.assertEqual(q.unack_count(), 1) q.ack(val1) self.assertEqual(q.unack_count(), 0) def test_resume_unack(self): q = self.queue_class(path=self.path) q.put("val1") val1 = q.get() self.assertEqual(q.empty(), True) self.assertEqual(q.qsize(), 0) self.assertEqual(q.unack_count(), 1) self.assertEqual(q.ready_count(), 0) del q q = self.queue_class(path=self.path, auto_resume=False) self.assertEqual(q.empty(), True) self.assertEqual(q.qsize(), 0) self.assertEqual(q.unack_count(), 1) self.assertEqual(q.ready_count(), 0) q.resume_unack_tasks() self.assertEqual(q.empty(), False) self.assertEqual(q.qsize(), 1) self.assertEqual(q.unack_count(), 0) self.assertEqual(q.ready_count(), 1) self.assertEqual(val1, q.get()) del q q = self.queue_class(path=self.path, auto_resume=True) self.assertEqual(q.empty(), False) self.assertEqual(q.qsize(), 1) self.assertEqual(q.unack_count(), 0) self.assertEqual(q.ready_count(), 1) self.assertEqual(val1, q.get()) def test_ack_unack_ack_failed(self): q = self.queue_class(path=self.path) q.put("val1") q.put("val2") q.put("val3") val1 = q.get() val2 = q.get() val3 = q.get() # qsize should be zero when all item is getted from q self.assertEqual(q.qsize(), 0) self.assertEqual(q.unack_count(), 3) # active size should be equal to qsize + unack_count self.assertEqual(q.active_size(), 3) # nack will let the item requeued as ready status q.nack(val1) self.assertEqual(q.qsize(), 1) self.assertEqual(q.ready_count(), 1) # ack failed is just mark item as ack failed q.ack_failed(val3) self.assertEqual(q.ack_failed_count(), 1) # ack should not effect qsize q.ack(val2) self.assertEqual(q.acked_count(), 1) self.assertEqual(q.qsize(), 1) # all ack* related action will reduce unack count self.assertEqual(q.unack_count(), 0) # reget the nacked item ready_val = q.get() self.assertEqual(ready_val, val1) q.ack(ready_val) self.assertEqual(q.qsize(), 0) self.assertEqual(q.acked_count(), 2) self.assertEqual(q.ready_count(), 0) def test_put_0(self): q = self.queue_class(path=self.path) q.put(0) d = q.get(block=False) self.assertIsNotNone(d) def test_get_id(self): q = self.queue_class(path=self.path) q.put("val1") val2_id = q.put("val2") q.put("val3") item = q.get(id=val2_id) # item id should be 2 self.assertEqual(val2_id, 2) # item should get val2 self.assertEqual(item, 'val2') def test_get_next_in_order(self): q = self.queue_class(path=self.path) val1_id = q.put("val1") q.put("val2") q.put("val3") item = q.get(id=val1_id, next_in_order=True) # item id should be 1 self.assertEqual(val1_id, 1) # item should get val2 self.assertEqual(item, 'val2') q.nack(item) # queue should roll over to begining if next > end item = q.get(id=3, next_in_order=True, raw=True) q.nack(item) self.assertEqual(item.get("pqid"), 1) def test_get_raw(self): q = self.queue_class(path=self.path) q.put("val1") item = q.get(raw=True) q.nack(item) # item should get val2 self.assertEqual(True, "pqid" in item) self.assertEqual(item.get("data"), 'val1') def test_nack_raw(self): q = self.queue_class(path=self.path) q.put("val1") item = q.get(raw=True) # nack a raw return q.nack(item) # size should be 1 after nack self.assertEqual(q.qsize(), 1) def test_ack_active_size(self): q = self.queue_class(path=self.path) q.put("val1") item = q.get(raw=True) # active_size should be 1 as it hasn't been acked self.assertEqual(q.active_size(), 1) q.ack(item) # active_size should be 0 after ack self.assertEqual(q.active_size(), 0) def test_queue(self): q = self.queue_class(path=self.path) q.put("val1") q.put("val2") q.put("val3") # queue should get the three items d = q.queue() self.assertEqual(len(d), 3) self.assertEqual(d[1].get("data"), "val2") def test_update(self): q = self.queue_class(path=self.path) qid = q.put("val1") q.update(id=qid, item="val2") item = q.get(id=qid) q.nack(item) self.assertEqual(item, "val2") class SQLite3QueueInMemory(SQLite3AckQueueTest): def setUp(self): self.path = ":memory:" self.auto_commit = True self.queue_class = SQLiteAckQueue def test_open_close_1000(self): self.skipTest('Memory based sqlite is not persistent.') def test_open_close_single(self): self.skipTest('Memory based sqlite is not persistent.') def test_multiple_consumers(self): self.skipTest( 'Skipped due to occasional crash during multithreading mode.' ) def test_multi_threaded_multi_producer(self): self.skipTest( 'Skipped due to occasional crash during multithreading mode.' ) def test_multi_threaded_parallel(self): self.skipTest( 'Skipped due to occasional crash during multithreading mode.' ) def test_task_done_with_restart(self): self.skipTest('Skipped due to not persistent.') def test_protocol_2(self): self.skipTest('In memory queue is always new.') def test_resume_unack(self): self.skipTest('Memory based sqlite is not persistent.') class FILOSQLite3AckQueueTest(SQLite3AckQueueTest): def setUp(self): self.path = tempfile.mkdtemp(suffix='filo_sqlackqueue') self.auto_commit = True self.queue_class = FILOSQLiteAckQueue def tearDown(self): shutil.rmtree(self.path, ignore_errors=True) def test_open_close_1000(self): """Write 1000 items, close, reopen checking if all items are there""" q = self.queue_class(self.path, auto_commit=self.auto_commit) for i in range(1000): q.put('var%d' % i) self.assertEqual(1000, q.qsize()) del q q = self.queue_class(self.path) self.assertEqual(1000, q.qsize()) for i in range(1000): data = q.get() self.assertEqual('var%d' % (999 - i), data) # assert adding another one still works q.put('foobar') data = q.get() q.nack(data) self.assertEqual('foobar', data) def test_multi_threaded_parallel(self): """Create consumer and producer threads, check parallelism""" # self.skipTest("Not supported multi-thread.") m_queue = self.queue_class( path=self.path, multithreading=True, auto_commit=self.auto_commit ) def producer(): for i in range(1000): m_queue.put('var%d' % i) def consumer(): # We cannot quarantee what next number will be like in FIFO for _ in range(1000): x = m_queue.get(block=True) self.assertTrue('var' in x) c = Thread(target=consumer) c.start() p = Thread(target=producer) p.start() p.join() c.join() self.assertEqual(0, m_queue.size) self.assertEqual(0, len(m_queue)) self.assertRaises(Empty, m_queue.get, block=False) def test_get_next_in_order(self): q = self.queue_class(path=self.path) val1_id = q.put("val1") q.put("val2") q.put("val3") item = q.get(id=val1_id, next_in_order=True) q.nack(item) # item id should be 1 self.assertEqual(val1_id, 1) # item should get val2 self.assertEqual(item, 'val3') # queue should roll over to end if next < begining item = q.get(id=1, next_in_order=True, raw=True) q.nack(item) self.assertEqual(item.get("pqid"), 3) # Note # We have to be carefull to avoid test cases from SQLite3AckQueueTest having # duplicate values in their q.put()'s. This could block the test indefinitely class SQLite3UniqueAckQueueTest(SQLite3AckQueueTest): def setUp(self): self.path = tempfile.mkdtemp(suffix='sqlackqueue') self.auto_commit = True self.queue_class = UniqueAckQ def test_add_duplicate_item(self): q = self.queue_class(self.path) q.put(1111) self.assertEqual(1, q.size) # put duplicate item q.put(1111) self.assertEqual(1, q.size) q.put(2222) self.assertEqual(2, q.size) del q q = self.queue_class(self.path) self.assertEqual(2, q.size) persist-queue-1.0.0/persistqueue/tests/test_sqlqueue.py000066400000000000000000000354001464077325100235130ustar00rootroot00000000000000# coding=utf-8 import random import shutil import sys import tempfile import unittest from threading import Thread from persistqueue import Empty from persistqueue import SQLiteQueue, FILOSQLiteQueue, UniqueQ from persistqueue.serializers import json as serializers_json from persistqueue.serializers import pickle as serializers_pickle from persistqueue.serializers import msgpack as serializers_msgpack from persistqueue.serializers import cbor2 as serializers_cbor2 class SQLite3QueueTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='sqlqueue') self.auto_commit = True self.queue_class = SQLiteQueue def tearDown(self): shutil.rmtree(self.path, ignore_errors=True) def test_raise_empty(self): q = self.queue_class(self.path, auto_commit=self.auto_commit) q.put('first') d = q.get() self.assertEqual('first', d) self.assertRaises(Empty, q.get, block=False) self.assertRaises(Empty, q.get_nowait) # assert with timeout self.assertRaises(Empty, q.get, block=True, timeout=1.0) # assert with negative timeout self.assertRaises(ValueError, q.get, block=True, timeout=-1.0) del q def test_empty(self): q = self.queue_class(self.path, auto_commit=self.auto_commit) self.assertEqual(q.empty(), True) q.put('first') self.assertEqual(q.empty(), False) q.get() self.assertEqual(q.empty(), True) def test_full(self): # SQL queue `full()` always returns `False` !! q = self.queue_class(self.path, auto_commit=self.auto_commit) self.assertEqual(q.full(), False) q.put('first') self.assertEqual(q.full(), False) q.get() self.assertEqual(q.full(), False) def test_open_close_single(self): """Write 1 item, close, reopen checking if same item is there""" q = self.queue_class(self.path, auto_commit=self.auto_commit) q.put(b'var1') del q q = SQLiteQueue(self.path) self.assertEqual(1, q.qsize()) self.assertEqual(b'var1', q.get()) def test_open_close_1000(self): """Write 1000 items, close, reopen checking if all items are there""" q = self.queue_class(self.path, auto_commit=self.auto_commit) for i in range(1000): q.put('var%d' % i) self.assertEqual(1000, q.qsize()) del q q = SQLiteQueue(self.path) self.assertEqual(1000, q.qsize()) for i in range(1000): data = q.get() self.assertEqual('var%d' % i, data) # assert adding another one still works q.put('foobar') data = q.get() q.shrink_disk_usage() self.assertEqual('foobar', data) def test_random_read_write(self): """Test random read/write""" q = self.queue_class(self.path, auto_commit=self.auto_commit) n = 0 for _ in range(1000): if random.random() < 0.5: if n > 0: q.get() n -= 1 else: self.assertRaises(Empty, q.get, block=False) else: q.put('var%d' % random.getrandbits(16)) n += 1 def test_multi_threaded_parallel(self): """Create consumer and producer threads, check parallelism""" # self.skipTest("Not supported multi-thread.") m_queue = SQLiteQueue(path=self.path, multithreading=True, auto_commit=self.auto_commit) def producer(): for i in range(1000): m_queue.put('var%d' % i) def consumer(): for i in range(1000): x = m_queue.get(block=True) self.assertEqual('var%d' % i, x) c = Thread(target=consumer) c.start() p = Thread(target=producer) p.start() p.join() c.join() self.assertEqual(0, m_queue.size) self.assertEqual(0, len(m_queue)) self.assertRaises(Empty, m_queue.get, block=False) def test_multi_threaded_multi_producer(self): """Test sqlqueue can be used by multiple producers.""" queue = self.queue_class(path=self.path, multithreading=True, auto_commit=self.auto_commit) def producer(seq): for i in range(10): queue.put('var%d' % (i + (seq * 10))) def consumer(): for _ in range(100): data = queue.get(block=True) self.assertTrue('var' in data) c = Thread(target=consumer) c.start() producers = [] for seq in range(10): t = Thread(target=producer, args=(seq,)) t.start() producers.append(t) for t in producers: t.join() c.join() def test_multiple_consumers(self): """Test sqlqueue can be used by multiple consumers.""" queue = self.queue_class(path=self.path, multithreading=True, auto_commit=self.auto_commit) def producer(): for x in range(1000): queue.put('var%d' % x) counter = [] # Set all to 0 for _ in range(1000): counter.append(0) def consumer(index): for i in range(200): data = queue.get(block=True) self.assertTrue('var' in data) counter[index * 200 + i] = data p = Thread(target=producer) p.start() consumers = [] for index in range(5): t = Thread(target=consumer, args=(index,)) t.start() consumers.append(t) p.join() for t in consumers: t.join() self.assertEqual(0, queue.qsize()) for x in range(1000): self.assertNotEqual(0, counter[x], "not 0 for counter's index %s" % x) self.assertEqual(len(set(counter)), len(counter)) def test_task_done_with_restart(self): """Test that items are not deleted before task_done.""" q = self.queue_class(path=self.path, auto_commit=False) for i in range(1, 11): q.put(i) self.assertEqual(1, q.get()) self.assertEqual(2, q.get()) # size is correct before task_done self.assertEqual(8, q.qsize()) q.task_done() # make sure the size still correct self.assertEqual(8, q.qsize()) self.assertEqual(3, q.get()) # without task done del q q = SQLiteQueue(path=self.path, auto_commit=False) # After restart, the qsize and head item are the same self.assertEqual(8, q.qsize()) # After restart, the queue still works self.assertEqual(3, q.get()) self.assertEqual(7, q.qsize()) def test_protocol_1(self): shutil.rmtree(self.path, ignore_errors=True) q = self.queue_class(path=self.path) self.assertEqual(q._serializer.protocol, 2 if sys.version_info[0] == 2 else 4) def test_protocol_2(self): q = self.queue_class(path=self.path) self.assertEqual(q._serializer.protocol, 2 if sys.version_info[0] == 2 else 4) def test_json_serializer(self): q = self.queue_class( path=self.path, serializer=serializers_json) x = dict( a=1, b=2, c=dict( d=list(range(5)), e=[1] )) q.put(x) self.assertEqual(q.get(), x) def test_put_0(self): q = self.queue_class(path=self.path) q.put(0) d = q.get(block=False) self.assertIsNotNone(d) def test_get_id(self): q = self.queue_class(path=self.path) q.put("val1") val2_id = q.put("val2") q.put("val3") item = q.get(id=val2_id) # item id should be 2 self.assertEqual(val2_id, 2) # item should get val2 self.assertEqual(item, 'val2') def test_get_raw(self): q = self.queue_class(path=self.path) q.put("val1") item = q.get(raw=True) # item should get val2 self.assertEqual(True, "pqid" in item) self.assertEqual(item.get("data"), 'val1') def test_queue(self): q = self.queue_class(path=self.path) q.put("val1") q.put("val2") q.put("val3") # queue should get the three items d = q.queue() self.assertEqual(len(d), 3) self.assertEqual(d[1].get("data"), "val2") def test_update(self): q = self.queue_class(path=self.path) qid = q.put("val1") q.update(item="val2", id=qid) item = q.get(id=qid) self.assertEqual(item, "val2") class SQLite3QueueNoAutoCommitTest(SQLite3QueueTest): def setUp(self): self.path = tempfile.mkdtemp(suffix='sqlqueue_auto_commit') self.auto_commit = False self.queue_class = SQLiteQueue def test_multiple_consumers(self): """ FAIL: test_multiple_consumers ( -tests.test_sqlqueue.SQLite3QueueNoAutoCommitTest) Test sqlqueue can be used by multiple consumers. ---------------------------------------------------------------------- Traceback (most recent call last): File "persist-queue\tests\test_sqlqueue.py", line 183, -in test_multiple_consumers self.assertEqual(0, queue.qsize()) AssertionError: 0 != 72 :return: """ self.skipTest('Skipped due to a known bug above.') class SQLite3QueueInMemory(SQLite3QueueTest): skipstr = 'Skipped due to occasional crash during multithreading mode.' def setUp(self): self.path = ":memory:" self.auto_commit = True self.queue_class = SQLiteQueue def test_open_close_1000(self): self.skipTest('Memory based sqlite is not persistent.') def test_open_close_single(self): self.skipTest('Memory based sqlite is not persistent.') def test_multiple_consumers(self): self.skipTest(self.skipstr) def test_multi_threaded_multi_producer(self): self.skipTest(self.skipstr) def test_multi_threaded_parallel(self): self.skipTest(self.skipstr) def test_task_done_with_restart(self): self.skipTest('Skipped due to not persistent.') def test_protocol_2(self): self.skipTest('In memory queue is always new.') class FILOSQLite3QueueTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='filo_sqlqueue') self.auto_commit = True self.queue_class = SQLiteQueue def tearDown(self): shutil.rmtree(self.path, ignore_errors=True) def test_open_close_1000(self): """Write 1000 items, close, reopen checking if all items are there""" q = FILOSQLiteQueue(self.path, auto_commit=self.auto_commit) for i in range(1000): q.put('var%d' % i) self.assertEqual(1000, q.qsize()) del q q = FILOSQLiteQueue(self.path) self.assertEqual(1000, q.qsize()) for i in range(1000): data = q.get() self.assertEqual('var%d' % (999 - i), data) # assert adding another one still works q.put('foobar') data = q.get() self.assertEqual('foobar', data) class FILOSQLite3QueueNoAutoCommitTest(FILOSQLite3QueueTest): def setUp(self): self.path = tempfile.mkdtemp(suffix='filo_sqlqueue_auto_commit') self.auto_commit = False self.queue_class = FILOSQLiteQueue class SQLite3UniqueQueueTest(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp(suffix='sqlqueue') self.auto_commit = True self.queue_class = UniqueQ def test_add_duplicate_item(self): q = UniqueQ(self.path) q.put(1111) self.assertEqual(1, q.size) # put duplicate item q.put(1111) self.assertEqual(1, q.size) q.put(2222) self.assertEqual(2, q.size) del q q = UniqueQ(self.path) self.assertEqual(2, q.size) def test_multiple_consumers(self): """Test UniqueQ can be used by multiple consumers.""" queue = UniqueQ(path=self.path, multithreading=True, auto_commit=self.auto_commit) def producer(): for x in range(1000): queue.put('var%d' % x) counter = [] # Set all to 0 for _ in range(1000): counter.append(0) def consumer(index): for i in range(200): data = queue.get(block=True) self.assertTrue('var' in data) counter[index * 200 + i] = data p = Thread(target=producer) p.start() consumers = [] for index in range(5): t = Thread(target=consumer, args=(index,)) t.start() consumers.append(t) p.join() for t in consumers: t.join() self.assertEqual(0, queue.qsize()) for x in range(1000): self.assertNotEqual(0, counter[x], "not 0 for counter's index %s" % x) self.assertEqual(len(set(counter)), len(counter)) def test_unique_dictionary_serialization_pickle(self): queue = UniqueQ( path=self.path, multithreading=True, auto_commit=self.auto_commit, serializer=serializers_pickle, ) queue.put({"foo": 1, "bar": 2}) self.assertEqual(queue.total, 1) queue.put({"bar": 2, "foo": 1}) self.assertEqual(queue.total, 1) def test_unique_dictionary_serialization_msgpack(self): queue = UniqueQ( path=self.path, multithreading=True, auto_commit=self.auto_commit, serializer=serializers_msgpack ) queue.put({"foo": 1, "bar": 2}) self.assertEqual(queue.total, 1) queue.put({"bar": 2, "foo": 1}) self.assertEqual(queue.total, 1) def test_unique_dictionary_serialization_cbor2(self): queue = UniqueQ( path=self.path, multithreading=True, auto_commit=self.auto_commit, serializer=serializers_cbor2 ) queue.put({"foo": 1, "bar": 2}) self.assertEqual(queue.total, 1) queue.put({"bar": 2, "foo": 1}) self.assertEqual(queue.total, 1) def test_unique_dictionary_serialization_json(self): queue = UniqueQ( path=self.path, multithreading=True, auto_commit=self.auto_commit, serializer=serializers_json ) queue.put({"foo": 1, "bar": 2}) self.assertEqual(queue.total, 1) queue.put({"bar": 2, "foo": 1}) self.assertEqual(queue.total, 1) persist-queue-1.0.0/requirements.txt000066400000000000000000000000001464077325100176060ustar00rootroot00000000000000persist-queue-1.0.0/scripts/000077500000000000000000000000001464077325100160235ustar00rootroot00000000000000persist-queue-1.0.0/scripts/publish.sh000066400000000000000000000006111464077325100200230ustar00rootroot00000000000000#!/usr/bin/env bash set -e BASE_DIR=`pwd` NAME=$(basename $BASE_DIR) if [[ "$NAME" != "persist-queue" ]];then echo "must run this in project root" exit 1 fi rm -rf ./build/*.* ./dist/*.* python setup.py build sdist python setup.py build bdist_wheel # requires `pip install wheel` twine check ${BASE_DIR}/dist/*.tar.gz twine check ${BASE_DIR}/dist/*.whl twine upload ${BASE_DIR}/dist/* persist-queue-1.0.0/setup.cfg000066400000000000000000000000341464077325100161520ustar00rootroot00000000000000[bdist_wheel] universal = 0 persist-queue-1.0.0/setup.py000066400000000000000000000030221464077325100160430ustar00rootroot00000000000000#!/usr/bin/env python # coding=utf-8 from setuptools import setup, find_packages def get_extras(): return { "extra": open("extra-requirements.txt").read().splitlines() } setup( name='persist-queue', version=__import__('persistqueue').__version__, description=( 'A thread-safe disk based persistent queue in Python.' ), long_description=open('README.rst').read(), long_description_content_type='text/x-rst', author=__import__('persistqueue').__author__, author_email='wangxu198709@gmail.com', maintainer=__import__('persistqueue').__author__, maintainer_email='wangxu198709@gmail.com', license=__import__('persistqueue').__license__, packages=find_packages(), extras_require=get_extras(), platforms=["all"], url='http://github.com/peter-wangxu/persist-queue', classifiers=[ 'Development Status :: 4 - Beta', 'Operating System :: OS Independent', 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Programming Language :: Python', 'Programming Language :: Python :: Implementation', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', 'Topic :: Software Development :: Libraries' ], package_date={'persistqueue': ['py.typed']} ) persist-queue-1.0.0/test-requirements.txt000066400000000000000000000003441464077325100205760ustar00rootroot00000000000000tox mock>=2.0.0 flake8>=3.2.1 eventlet>=0.19.0 msgpack>=0.5.6 cbor2>=5.6.0 nose2>=0.6.5 coverage!=4.5 cov_core>=1.15.0 virtualenv>=15.1.0 cryptography;sys_platform!="win32" # package only required for tests under mysql8.0&linux persist-queue-1.0.0/tox.ini000066400000000000000000000012771464077325100156560ustar00rootroot00000000000000[tox] minversion = 2.0 skipsdist = True recreate = false envlist = py38, py39, py310, py311, py312, pep8, cover deps = -r{toxinidir}/test-requirements.txt -r{toxinidir}/extra-requirements.txt -r{toxinidir}/requirements.txt [testenv] setenv = VIRTUAL_ENV={envdir} usedevelop = True deps = -r{toxinidir}/test-requirements.txt -r{toxinidir}/extra-requirements.txt -r{toxinidir}/requirements.txt whitelist_externals = bash find commands = nose2 {posargs} [testenv:pep8] commands = flake8 ./persistqueue ./persistqueue/tests {posargs} [testenv:cover] commands = nose2 --with-coverage --coverage-report xml --coverage-report html --coverage-report term {posargs}