pax_global_header00006660000000000000000000000064147500554520014521gustar00rootroot0000000000000052 comment=c23dbdadb6fecdf505eb4231559561913b84c13f boltons-25.0.0/000077500000000000000000000000001475005545200132655ustar00rootroot00000000000000boltons-25.0.0/.github/000077500000000000000000000000001475005545200146255ustar00rootroot00000000000000boltons-25.0.0/.github/workflows/000077500000000000000000000000001475005545200166625ustar00rootroot00000000000000boltons-25.0.0/.github/workflows/tests.yaml000066400000000000000000000034371475005545200207170ustar00rootroot00000000000000name: Tests on: push: paths-ignore: - "docs/**" - "*.md" - "*.rst" pull_request: paths-ignore: - "docs/**" - "*.md" - "*.rst" jobs: tests: name: ${{ matrix.name }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: include: - { name: Linux, python: "3.13", os: ubuntu-latest, tox: py313 } - { name: Windows, python: "3.13", os: windows-latest, tox: py313 } - { name: Mac, python: "3.13", os: macos-latest, tox: py313 } - { name: "3.12", python: "3.12", os: ubuntu-latest, tox: py312 } - { name: "3.11", python: "3.11", os: ubuntu-latest, tox: py311 } - { name: "3.10", python: "3.10", os: ubuntu-latest, tox: py310 } - { name: "3.9", python: "3.9", os: ubuntu-latest, tox: py39 } - { name: "3.8", python: "3.8", os: ubuntu-latest, tox: py38 } - { name: "3.7", python: "3.7", os: ubuntu-22.04, tox: py37 } - { name: "PyPy3", python: "pypy-3.9", os: ubuntu-latest, tox: pypy3 } steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - name: update pip run: | pip install -U wheel pip install -U setuptools python -m pip install -U pip - name: get pip cache dir id: pip-cache shell: bash run: | echo "dir=$(pip cache dir)" >> "$GITHUB_OUTPUT" - name: cache pip uses: actions/cache@v4 with: path: ${{ steps.pip-cache.outputs.dir }} key: pip|${{ runner.os }}|${{ matrix.python }}|${{ hashFiles('pyproject.toml') }}|${{ hashFiles('requirements/*.txt') }} - run: pip install tox - run: tox -e ${{ matrix.tox }} boltons-25.0.0/.gitignore000066400000000000000000000006171475005545200152610ustar00rootroot00000000000000docs/_build tmp.py htmlcov/ venv/ *.py[cod] venv-rtd/ # emacs *~ ._* .\#* \#*\# # C extensions *.so # Packages *.egg *.egg-info dist build eggs parts bin var sdist develop-eggs .installed.cfg lib lib64 # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox nosetests.xml # Translations *.mo # Mr Developer .mr.developer.cfg .project .pydevproject # Vim *.sw[op] .cache/ boltons-25.0.0/.readthedocs.yaml000066400000000000000000000011771475005545200165220ustar00rootroot00000000000000# .readthedocs.yaml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Set the version of Python and other tools you might need build: os: ubuntu-22.04 tools: python: "3.10" # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py # If using Sphinx, optionally build your docs in additional formats such as PDF # formats: # - pdf # Optionally declare the Python requirements required to build your docs python: install: - requirements: docs/requirements-rtd.txt - method: pip path: .boltons-25.0.0/CHANGELOG.md000066400000000000000000001672451475005545200151150ustar00rootroot00000000000000# boltons Changelog Since February 20, 2013 there have been 45 releases and 1492 commits for an average of one 33-commit release about every 9 weeks. Versions are named according to the [CalVer](https://calver.org) versioning scheme (`YY.MINOR.MICRO`). ## 25.0.0 _(February 2, 2025)_ - Added Python 3.13 support - Replace deprecated `utcnow()` - Add fsync to [`fileutils.atomic_save`][fileutils.atomic_save] - Add [`fileutils.rotate_file`][fileutils.rotate_file] ## 24.1.0 _(November 1, 2024)_ - Added `max_depth` parameter to [`fileutils.iter_find_files`][fileutils.iter_find_files] - Added `enter` parameter to [`iterutils.research`][iterutils.research] to support traversing custom data types - Add optional print tracing for [`iterutils.remap`][iterutils.remap] for easier debugging - Fixed [`typeutils.Sentinel`][typeutils.make_sentinel] copy behavior to return self - Tentative Python 3.13 support ([#365][i365], [#366][i366]) [i365]: https://github.com/mahmoud/boltons/issues/365 [i366]: https://github.com/mahmoud/boltons/issues/366 ## 24.0.0 Per the RFC in issue [#365][i339], boltons is now **Python 3 only**. 3.7+ for now. If you're a Python 2 user, feel free to pin at `boltons<24.0.0`. Other minor changes: - Added Python 3.12 support ([#361][i361]) - Fix [dictutils.OneToOne][dictutils.OneToOne]'s `update()` behavior with empty iterables [i339]: https://github.com/mahmoud/boltons/issues/339 [i361]: https://github.com/mahmoud/boltons/issues/361 ## 23.1.1 _(November 1, 2023)_ Tiny release to include more test files in the sdist (source distribution) on PyPI. ## 23.1.0 _(October 31, 2023)_ - Add `fill`/`end` parameters for [`iterutils.windowed`][iterutils.windowed] and [`iterutils.pairwise`][iterutils.pairwise], respectively ([#350][i350]) - Fix cache eviction for [`cacheutils.LRU`][cacheutils.LRU] ([#348][i348]) - Fix OrderedMultiDict (OMD) pickleability under Py3 ([#337][i337]) - `funcutils.copy_function` maintains kw-only defaults ([#336][i336]) - Support OMD `ior` ([#341][i341]) [i350]: https://github.com/mahmoud/boltons/issues/350 [i348]: https://github.com/mahmoud/boltons/issues/348 [i341]: https://github.com/mahmoud/boltons/issues/341 [i337]: https://github.com/mahmoud/boltons/issues/337 [i336]: https://github.com/mahmoud/boltons/issues/336 [cacheutils.LRU]: http://boltons.readthedocs.org/en/latest/cacheutils.html#boltons.cacheutils.LRU [iterutils.windowed]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.windowed [iterutils.pairwise]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.pairwise ## 23.0.0 _(February 19, 2023)_ - Overdue update for Python 3.10 and 3.11 support ([#294][i294], [#303][i303], [#320][i320], [#323][i323], [#326][i326]/[#327][i327]) - Add [iterutils.chunk_ranges][iterutils.chunk_ranges] ([#312][i312]) - Improvements to `SpooledBytesIO`/`SpooledStringIO` ([#305][i305]) - Bugfix for infinite daterange issue when start and stop is the same ([#302][i302]) - Fix `Bits.as_list` behavior ([#315][i315]) 21.0.0 --- _(May 16, 2021)_ - Fix [OMD][dictutils.OrderedMultiDict].addlist when the added list is empty - Add [funcutils.noop][funcutils.noop], satisfying [PEP 559](https://www.python.org/dev/peps/pep-0559/) - Support lists for [iterutils.bucketize][iterutils.bucketize] - Python 3.9 test fixes for OMD (PEP 584, see [#271][i271]) - Make [typeutils.make_sentinel][typeutils.make_sentinel] more pickleable - [jsonutils.reverse_iter_lines][jsonutils.reverse_iter_lines] now works on Py3 and Windows [jsonutils.reverse_iter_lines]: http://boltons.readthedocs.org/en/latest/jsonutils.html#boltons.jsonutils.reverse_iter_lines [funcutils.noop]: https://boltons.readthedocs.io/en/latest/funcutils.html#boltons.funcutils.noop [i271]: https://github.com/mahmoud/boltons/issues/271 ## 20.2.1 _(August 11, 2020)_ - Improve import time of [iterutils][iterutils] by deferring hashlib/socket imports - Add custom `repr` parameter to [funcutils.format_invocation][funcutils.format_invocation] 20.2.0 --- _(June 21, 2020)_ - Added [iterutils.lstrip][iterutils.lstrip], [iterutils.rstrip][iterutils.rstrip], [iterutils.strip][iterutils.strip] - More robust and complete [strutils.strip_ansi][strutils.strip_ansi] - Add [iterutils.untyped_sorted][iterutils.untyped_sorted] - Fixes to [IndexedSet][IndexedSet] rsub and index methods - Expose text mode flag in [fileutils.AtomicSaver][fileutils.AtomicSaver] - Add [strutils.int_list_complement][strutils.int_list_complement] and [strutils.int_list_to_int_tuples][strutils.int_list_to_int_tuples] to the _int_list_ suite. - Docs: intersphinx links finally point to Python 3 docs 20.1.0 --- _(March 29, 2020)_ - Add [funcutils.update_wrapper][funcutils.update_wrapper], used to make a wrapper function reflect various aspects of the wrapped function's API. - Fix [FunctionBuilder][FunctionBuilder] handling of functions without `__module__` - Add `partial` support to [FunctionBuilder][FunctionBuilder] - Fix [NetstringSocket][socketutils.NetstringSocket]'s handling of arguments in `read_ns` - Fix [IndexedSet][IndexedSet]'s `index()` method to account for removals - Add `seekable`, `readable`, and `writable` to SpooledIOBase - Add a special case to `singularize` - Fix various warnings for Py3.9 20.0.0 --- _(January 8, 2020)_ - New module [pathutils][pathutils]: - [pathutils.augpath][pathutils.augpath] augments a path by modifying its components - [pathutils.shrinkuser][pathutils.shrinkuser] inverts :func:`os.path.expanduser`. - [pathutils.expandpath][pathutils.expandpath] shell-like environ and tilde expansion - add `include_dirs` param to [fileutils.iter_find_files][fileutils.iter_find_files] - Make [funcutils.format_invocation][funcutils.format_invocation] more deterministic - add [strutils.unwrap_text][strutils.unwrap_text] which does what you think to wrapped text - Py3 fixes - [iterutils.chunked][iterutils.chunked] to work with the `bytes` type ([#231][i231]) - [cacheutils.ThresholdCounter][cacheutils.ThresholdCounter]'s `get_common_count()` [i231]: https://github.com/mahmoud/boltons/issues/231 [pathutils]: https://boltons.readthedocs.io/en/latest/pathutils.html [pathutils.augpath]: https://boltons.readthedocs.io/en/latest/pathutils.html#boltons.pathutils.augpath [pathutils.augpath]: https://boltons.readthedocs.io/en/latest/pathutils.html#boltons.pathutils.augpath [pathutils.shrinkuser]: https://boltons.readthedocs.io/en/latest/pathutils.html#boltons.pathutils.shrinkuser [pathutils.expandpath]: https://boltons.readthedocs.io/en/latest/pathutils.html#boltons.pathutils.expandpath [strutils.unwrap_text]: https://boltons.readthedocs.io/en/latest/strutils.html#boltons.strutils.unwrap_text ## 19.3.0 _(October 28, 2019)_ Three funcutils: - [funcutils.format_invocation][funcutils.format_invocation] for formatting simple function calls `func(pos1, pos2, kw_k=kw_v)` - [funcutils.format_exp_repr][funcutils.format_exp_repr] for formatting a repr like `Type(pos, kw_k=kw_v)` - [funcutils.format_nonexp_repr][funcutils.format_nonexp_repr] for formatting a repr like `` [funcutils.format_invocation]: https://boltons.readthedocs.io/en/latest/funcutils.html#boltons.funcutils.format_invocation [funcutils.format_exp_repr]: https://boltons.readthedocs.io/en/latest/funcutils.html#boltons.funcutils.format_exp_repr [funcutils.format_nonexp_repr]: https://boltons.readthedocs.io/en/latest/funcutils.html#boltons.funcutils.format_nonexp_repr ## 19.2.0 _(October 19, 2019)_ A bunch of small fixes and enhancements. - [tbutils.TracebackInfo][tbutils.TracebackInfo]'s from_frame now respects `level` arg - [OrderedMultiDict.sorted()][OrderedMultiDict.sorted] now maintains all items, not just the most recent - [setutils.complement()][setutils.complement] now supports `__rsub__` for better interop with the builtin `set` - [FunctionBuilder][FunctionBuilder] fixed a few py3 warnings related to inspect module usage (`formatargspec`) - [iterutils.bucketize][iterutils.bucketize] now takes a string key which works like an attribute getter, similar to other iterutils functions - Docstring fixes across the board - CI fixes for Travis default dist change [OrderedMultiDict.sorted]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.OrderedMultiDict.sorted ## 19.1.0 _(February 28, 2019)_ Couple of enhancements, couple of cleanups. - [queueutils][queueutils] now supports float-based priorities ([#204][i204]) - [FunctionBuilder][funcutils.FunctionBuilder] has a new `get_arg_names()` method, and its `get_defaults_dict()` method finally includes kwonly argument defaults. - [strutils.gzip_bytes][strutils.gzip_bytes] arrives to match [strutils.gunzip_bytes][strutils.gunzip_bytes] [i204]: https://github.com/mahmoud/boltons/issues/204 ## 19.0.1 _(February 12, 2019)_ Quick release to enhance [FunctionBuilder][funcutils.FunctionBuilder] and [funcutils.wraps][funcutils.wraps] to maintain function annotations on Python 3+. ([#133][i133], [#134][i134], [#203][i203]) [i133]: https://github.com/mahmoud/boltons/issues/133 [i134]: https://github.com/mahmoud/boltons/issues/134 [i203]: https://github.com/mahmoud/boltons/issues/203 ## 19.0.0 _(February 10, 2019)_ A very big release indeed, perhaps the biggest yet. A big, big thank you to all the contributors! - New types and utilities - [dictutils.ManyToMany][dictutils.ManyToMany] arrives, to complement [dictutils.OneToOne][dictutils.OneToOne] - [dictutils.FrozenDict][dictutils.FrozenDict] brings immutable mapping to a module near you ([#105][i105]) - [setutils.complement()][setutils.complement] introduces "negative" sets, useful for exclusion and many other set operations - [iterutils.soft_sorted()][iterutils.soft_sorted] allows for looser, more flexible sorting of sequences - [iterutils.flatten_iter()][iterutils.flatten_iter] and [iterutils.flatten()][iterutils.flatten], to collapse nested iterables. ([#118][i118]) - [mathutils.Bits][mathutils.Bits] type for representing a bitstring and translating between integer, bytestring, hex, and boolean sequence representations. - funcutils improvements - [FunctionBuilder][funcutils.FunctionBuilder] and [funcutils.wraps][funcutils.wraps] now support coroutines/async ([#194][i194]) - [FunctionBuilder.add_arg()][funcutils.FunctionBuilder.add_arg] allows the addition of arguments to the signature, to match [FunctionBuilder.remove_arg()][funcutils.FunctionBuilder.remove_arg] ([#201][i201]) - Similarly [funcutils.wraps()][funcutils.wraps] now takes an "expected" argument, to complement "injected" ([#161][i161]) - Other bugfixes and improvements - [cacheutils.LRI][cacheutils.LRI] is now threadsafe and correctly evicts when duplicate keys are added ([#155][i155], [#157][i157]) - [dictutils.subdict()][dictutils.subdict] now does its best to return the same type of dictionary it was passed. - [urlutils][urlutils] now has better IPv6 support and URL can be used more natively like a string - Improve singularization in [strutils][strutils] - Fix some deprecation warnings in Python 3.7 ([#165][i165], [#196][i196]) - Document the change in dict constructor behavior affecting [dictutils.OMD][dictutils.OMD] under Python 3.7+ ([#179][i179]) [i105]: https://github.com/mahmoud/boltons/issues/105 [i118]: https://github.com/mahmoud/boltons/issues/118 [i155]: https://github.com/mahmoud/boltons/issues/155 [i157]: https://github.com/mahmoud/boltons/issues/157 [i161]: https://github.com/mahmoud/boltons/issues/161 [i165]: https://github.com/mahmoud/boltons/issues/165 [i179]: https://github.com/mahmoud/boltons/issues/179 [i194]: https://github.com/mahmoud/boltons/issues/194 [i195]: https://github.com/mahmoud/boltons/issues/195 [i196]: https://github.com/mahmoud/boltons/issues/196 [i201]: https://github.com/mahmoud/boltons/issues/201 ## 18.0.1 _(August 29, 2018)_ A few bugfixes and a handy text utility. - Add MultiSub for multiple string substitutions in a single call ([#162][i162]) - `tableutils.Table.to_text()` is more Markdown compatible - Add LICENSE to package ([#164][i164]) - `atomic_save` works better with `overwrite=True` ([#161][i161]) - Reduced memory footprint on `tbutils._DeferredLine` with `__slots__` 18.0.0 --- _(March 2, 2018)_ - Add `` and `` structure to tableutils.Table HTML output, which helps with styling and other functionality (e.g., jQuery datatables). - Add [dictutils.subdict()][dictutils.subdict] to get a filtered version of a dictionary based on a subset of keys. ([#150][i150]) - Add beta version of cacheutils.MinIDMap. 17.2.0 --- _(December 16, 2017)_ A big release with a lot of features and bugfixes, big and small. Just in time for the holidays! - Better handling of `file` and `file`-like objects in [remap][iterutils.remap]'s `default_enter` - Fix line-by-line iteration in [ioutils][ioutils] types - Change [strutils.slugify][strutils.slugify] to always output at least a single-character slug (in cases of all-punctuation/whitespace inputs). - Fix [DeferredValue][formatutils.DeferredValue] caching in [formatutils][formatutils] - Add [OneToOne][dictutils.OneToOne] to [dictutils][dictutils] - Add [MultiFileReader][ioutils.MultiFileReader] to [ioutils][ioutils] (see [#135][i135]) - Support passing `dir` argument to [ioutils][ioutils] SpooledIO types - fix default arguments for [mathutils.clamp][mathutils.clamp] (see [#128][i128]) - Add [iterutils.research][iterutils.research], a [remap][iterutils.remap]-based recursive search function for nested data - Improved and expanded [urlutils.SCHEME_PORT_MAP][urlutils.SCHEME_PORT_MAP] - Simplify [urlutils.find_all_links][urlutils.find_all_links] signature 17.1.0 --- _(February 27, 2017)_ Add urlutils module, with URL type and find_all_links function. also update Sentinel for Python 3 falsiness - Add urlutils module, complete with RFC3986-compliant `URL` type - Also add `urlutils.find_all_links` function, which heuristically finds all links in plaintext, and creates URLs out of them. - Update typeutils.Sentinel to be appropriately falsy on Python 3 17.0.0 --- _(January 24, 2017)_ Several tweaks and enhancements to ring in the new year. - [tbutils][tbutils] objects like the [ExceptionInfo][tbutils.ExceptionInfo] are now more easily JSON-serializable thanks to a tweak to [Callpoint][tbutils.Callpoint]. - SpooledIO objects like [ioutils.SpooledBytesIO][ioutils.SpooledBytesIO] are now `bool`-able. - [iterutils.bucketize][iterutils.bucketize] gains the `value_transform` and `key_filter` arguments. - [cachedproperty][cacheutils.cachedproperty] properly maintains docstring - [funcutils.wraps][funcutils.wraps] maintains a reference to the wrapped function with `__wrapped__` attribute. - A bit of cleanup to be backwards compatible to Python 3.3 16.5.1 --- _(November 6, 2016)_ Mostly bug fixes and various tweaks, optimizations, and documentation. Also added a bit of functionality in the form of [ioutils][ioutils] and some GUID stuff. - Add [ioutils][ioutils] with [SpooledStringIO][ioutils.SpooledStringIO] and [SpooledBytesIO][ioutils.SpooledBytesIO], two in-memory file-like objects, like the stdlib [StringIO][StringIO], except that they automatically spill over to disk when they reach a configurable size. - Add [iterutils.GUIDerator][iterutils.GUIDerator] and [iterutils.SequentialGUIDerator][iterutils.SequentialGUIDerator], two methods of getting random iterables. - Add [mathutils.clamp][mathutils.clamp], a combined min-max function, like numpy's clip. - Optimized [iterutils.first][iterutils.first]. - Enabled spillover kwargs in [funcutils.wraps][funcutils.wraps] - fix for default [remap][iterutils.remap] set support, fixes [#84][i84] - improving and testing exceptions around classmethod and staticmethod for [funcutils.wraps][funcutils.wraps] and [FunctionBuilder][funcutils.FunctionBuilder], fixes [#86][i86] to the degree possible. 16.5.0 --- _(July 16, 2016)_ A few minor changes, and medium-sized breaking change to [cacheutils][cacheutils]. - [cacheutils][cacheutils] caching decorators now take the function/method into account by default. This was done by adding the scoped argument to [@cached][cacheutils.cached] and [@cachedmethod][cacheutils.cachedmethod] (and removing selfish from cachedmethod). also fixed a bug in a cachedmethod test, as well as added docs for scoped and key arguments. all of this to fix [#83][i83]. - [tableutils.Table][tableutils.Table] cell html can be customized by overriding `get_cell_html` method. - [funcutils.total_ordering][funcutils.total_ordering], a [functools.total_ordering][functools.total_ordering] backport for python 2.6. - [funcutils.FunctionBuilder][funcutils.FunctionBuilder] function names are now configurable. 16.4.1 --- _(June 14, 2016)_ This release primarily contains several [statsutils][statsutils] updates. - The biggest change was the addition of [Stats.format_histogram][statsutils.Stats.format_histogram] complete with Freedman bin selection and other useful options. - Added inter-quartile range (iqr) to [statsutils.Stats][statsutils.Stats] - Adding mad (median absolute deviation) to [Stats.describe][statsutils.Stats.describe], since median and std_dev were already there. 16.4.0 --- _(June 8, 2016)_ another significant release, thanks to the addition of funcutils.wraps and funcutils.FunctionBuilder. also iterutils.chunked speedup, and tbutils.ParsedException.to_string. - [funcutils.wraps][funcutils.wraps]: Just like functools.wraps, but can preserve the function signature as well. - [funcutils.FunctionBuilder][funcutils.FunctionBuilder]: The basis for [funcutils.wraps][funcutils.wraps], this full-featured type enables programmatically creating functions, from scratch or from existing functions. Supports all Python 2 and 3 function features. - [ecoutils][ecoutils]: Python 2.4 and 2.5 support. - [iterutils][iterutils]: optimize [iterutils.chunked_iter][iterutils.chunked_iter] (2-5x faster depending on runtime). [See #79][i79]. - [tbutils][tbutils]: add the [ParsedException.to_string][tbutils.ParsedException.to_string] method, to convert parsed exceptions back into strings, possibly after manipulation - switch FunctionBuilder on Python 2 to be congruent with Python 3 (keywords attribute renamed to varkw, preview users might have to adjust) ## 16.3.1 _(May 24, 2016)_ Just a couple of [ecoutils][ecoutils] follow-ons, removing uuid dependency and adding the ability to scrub identifiable data. ## 16.3.0 _(May 23, 2016)_ Big, big update. Lots of additions, a few bugfixes. - [ecoutils][ecoutils] - Python runtime/environment profile generator - [timeutils.strpdate][timeutils.strpdate] - like datetime.datetime.strpdate but for date - [timeutils.daterange][timeutils.daterange] - like range() but for datetime.date objects - [strutils.parse_int_list][strutils.parse_int_list] and [strutils.format_int_list][strutils.format_int_list] - [cacheutils][cacheutils] - [cachedproperty][cacheutils.cachedproperty] - [cacheutils.cachedmethod][cacheutils.cachedmethod] - [cacheutils.cached][cacheutils.cached] now accepts a callable, as well. - `cacheutils.make_cache_key` is now public, should others need it - [statsutils.Stats][statsutils.Stats] update, several new methods, including [Stats.describe][statsutils.Stats.describe] - A few [socketutils][socketutils] platform tweaks - `debugutils.wrap_trace` preview 16.2.2 --- _(May 3, 2016)_ many small tweaks to socketutils.BufferedSocket, including optional inclusion of the delimiter in recv_until. also undid the enabling of bak files with AtomicSaver on windows - Small [socketutils.BufferedSocket][socketutils.BufferedSocket] tweaks - make recv_until conditionally return the delimiter (by default it does not). also fix a NetstringException inheritance typo - [socketutils][socketutils]: rename BufferedSocket.recv_lock to \_recv_lock, and same for send_lock. - add a bunch of simple passthrough methods to better fill out socket's API - add .fileno/.close/.shutdown to [socketutils.BufferedSocket][socketutils.BufferedSocket] - added type/family/proto [socketutils.BufferedSocket][socketutils.BufferedSocket] passthrough properties - BufferedSocket: also lock on .shutdown() - adding an rbuf_unconsumed attribute for post-close debugging, per @doublereedkurt's request - `getsendbuffer()` returns a bytestring and `recv_size()` uses the proper `._recvsize` on the first socket fetch - [fileutils.AtomicSaver][fileutils.AtomicSaver]: revert bak file as it was causing confusion, per [nvie/pip-tools#351](https://github.com/nvie/pip-tools/issues/351) ## 16.2.1 _(April 29, 2016)_ This version sees the soft debut of [socketutils][socketutils], which includes wrappers and tools for working with the built-in socket. A lot of [socketutils.BufferedSocket][socketutils.BufferedSocket] changes. - [BufferedSocket.recv_until][socketutils.BufferedSocket.recv_until] now supports multibyte delimiters and also includes the delimiter in its returns. - Better BufferedSocket timeout discipline throughout. - Various BufferedSocket argument name changes, _maxbytes_ became _maxsize_, _n_ became _size_, _marker_ became _delimiter_, etc. - [BufferedSocket][socketutils.BufferedSocket] BufferedSocket became threadsafe - [BufferedSocket.recv][socketutils.BufferedSocket.recv] now always returns the contents of the internal buffer before doing a socket call. - [BufferedSocket.recv_close][socketutils.BufferedSocket.recv_close] now exists to receive until the sending end closes the connection. - Can now pass _recvsize_ to [BufferedSocket][socketutils.BufferedSocket] constructor to tune the size passed to the lower-level recv call. - [socketutils][socketutils] got documented and tested. ## 16.2.0 _(April 18, 2016)_ adding shell args escaper-joiners to strutils (escape_shell_args, args2cmd, args2sh) as well as a rare breaking fix to [iterutils.pairwise][iterutils.pairwise]. - Argument joiners, functions to join command line arguments in context-appropriate ways: - [strutils.escape_shell_args][strutils.escape_shell_args] - [strutils.args2cmd][strutils.args2cmd] - [strutils.args2sh][strutils.args2sh] - BREAKING: finally fixing [iterutils.pairwise][iterutils.pairwise]. pairwise used to call to `chunked`, now it calls to `windowed`. `pairwise([1, 2, 3, 4])` no longer returns `[(1, 2), (3, 4)]`. Instead, it returns `[(1, 2), (2, 3), (3, 4)]`, which is what I always mean when I say pairwise, but not what the original contributor implemented. - Adding a universal wheel distribution option! ## 16.1.1 _(March 6, 2016)_ Added [iterutils.same][iterutils.same], improvement of Windows [fileutils.AtomicSaver][fileutils.AtomicSaver] behavior for old filesystems, bugfix on [strutils.is_uuid][strutils.is_uuid], expansion of [strutils.pluralize][strutils.pluralize], new trove classifiers and docs improvements! - [fileutils.replace][fileutils.replace]: use bak file option for win32 ReplaceFile for slightly better corner case coverage on less featureful filesystems - [strutils.pluralize][strutils.pluralize]: Add more irregular plurals - [strutils.is_uuid][strutils.is_uuid]: Catch un-parsable UUIDs. - [iterutils.same][iterutils.same]: Return `True` when all values in iterable are the same. ## 16.1.0 _(February 24, 2016)_ The centerpiece of this release is highly improved Windows support for [fileutils.atomic_save][fileutils.atomic_save] via [ReplaceFile](https://msdn.microsoft.com/en-us/library/windows/desktop/aa365512%28v=vs.85%29.aspx) system call. This functionality is also made available directly via [fileutils.replace][fileutils.replace], which is akin to Python 3.3+'s [os.replace][os.replace], except that `os.replace`'s approach has [arguably poorer behavior and atomicity](http://stupidpythonideas.blogspot.com/2014/07/getting-atomic-writes-right.html) compared to `fileutils.replace`. Also, a couple new strutils, and [iterutils.backoff][iterutils.backoff] grew a jitter argument. - [iterutils.backoff][iterutils.backoff] now supports start=0 - More comprehensive [iterutils.backoff][iterutils.backoff] argument checking/validation - [fileutils.replace][fileutils.replace] and [fileutils.atomic_rename][fileutils.atomic_rename] are now public functions in [fileutils][fileutils] with cross-platform implementations ([discussion here](https://github.com/mahmoud/boltons/issues/60)) - [tableutils.Table][tableutils.Table]s have a metadata argument and attribute for miscellaneous metadata. - [strutils.is_ascii][strutils.is_ascii] and [strutils.is_uuid][strutils.is_uuid]: About as straightforward as they are handy. - Tox testing improvements ## 16.0.1 _(January 24, 2016)_ DummyFile, Table.metadata, better exception handling, and in-progress iterutils.get_path - Small format fix in [iterutils.one][iterutils.one] for None - Initial implementation of [fileutils.DummyFile][fileutils.DummyFile], which allows for easy no-op file handling without restructuring code. Sort of like a dummy RLock for systems without threading, if you've seen those. - avoid catching BaseException in all boltons - better error handling in iterutils.get_path ## 16.0.0 One important fix and one small but handy string function. - Fixed an [LRU][cacheutils.LRU] bug related to the 15.1.1 refactor. Also enhanced LRU testing with doubly-linked list invariant enforcement. - Added [strutils.indent][strutils.indent], the counterpart to [textwrap.dedent](https://docs.python.org/2/library/textwrap.html#textwrap.dedent). 15.1.1 --- _(November 18, 2015)_ A lot of bugfixes and docfixes in 15.1.1. updated AtomicSaver for better permissions handling, update BufferedSocket message sending, beta version of iterutils.get_path, several docs fixes, Stats zscore and cache bugfix, and an LRU refactor with significantly improved behavior and code factoring. - Updated [fileutils.AtomicSaver][fileutils.AtomicSaver] handling of filesystem permissions to be simpler and more secure. This also merges `dest_perms` and `part_perms` arguments to AtomicSaver and atomic_save. - Fix large message sending with [socketutils.BufferedSocket][socketutils.BufferedSocket] - [strutils.iter_splitlines][strutils.iter_splitlines] is now in the docs. - [cacheutils][cacheutils]: now imports RLock from the right place for python 2 - [statsutils][statsutils]: Only `delattr` when `hasattr` in [Stats.clear_cache][statsutils.Stats.clear_cache] - [statsutils.Stats][statsutils.Stats]: Add [Stats.get_zscore][statsutils.Stats.get_zscore] to support calculating the [z-score][zscore] (see also: t-statistic) - [cacheutils.LRU][cacheutils.LRU]: Massive refactor of the backing linked list for better handling of duplicate data in the cache. More aggressive locking and better `__eq__` ## 15.1.0 _(September 23, 2015)_ Reached the first release version of [iterutils.remap][iterutils.remap](), fully tested and documented. Also a couple of tweaks to expose the [iterutils.unique][iterutils.unique] docs. ## 15.0.2 _(September 9, 2015)_ a couple [dictutils.OMD][dictutils.OMD] fixes to [dictutils.OMD.pop][dictutils.OMD.pop] and [dictutils.OMD.popall][dictutils.OMD.popall] to make them consistent with the docstrings. and the accompanying tests of course. - fix [dictutils.OMD.setdefault][dictutils.OMD.setdefault] to default to None and not empty list, per documentation (and add a test to the same effect) 15.0.1 --- _(August 27, 2015)_ - Added [OrderedMultiDict.sortedvalues()][OrderedMultiDict.sortedvalues], which returns a copy of the OMD with sublists within a keyspace sorted. - Fixing a bug in [dictutils.OrderedMultiDict][dictutils.OrderedMultiDict]'s addlist method that caused values to be added multiple times. - Fixing a [iterutils.backoff][iterutils.backoff] string identity check [OrderedMultiDict.sortedvalues]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.OrderedMultiDict.sortedvalues ## 15.0.0 _(August 19, 2015)_ Finally the 15.0 major release. All passing PRs and feature requests from the first wave addressed and closed. tzutils merged into timeutils. AtomicSaver approach and API much improved. Several other features added: - [iterutils.backoff][iterutils.backoff] and [iterutils.backoff_iter][iterutils.backoff_iter] for exponential backoff - [iterutils.frange][iterutils.frange] and [iterutils.xfrange][iterutils.xfrange] for floating point range generation - Slightly more permissive [jsonutils.JSONLIterator][jsonutils.JSONLIterator] blank line ignoring - [strutils.iter_splitlines][strutils.iter_splitlines] for lazily getting lines from a larger string - [timeutils.dt_to_timestamp][timeutils.dt_to_timestamp], per the long-lived PR [#13][i13]. - Merged tzutils into timeutils - [fileutils.AtomicSaver][fileutils.AtomicSaver] rewrite and redoc - -teens support for [strutils.ordinalize][strutils.ordinalize] - made [iterutils.one][iterutils.one] consistent with [iterutils.first][iterutils.first] ## 0.6.6 _(July 31, 2015)_ Fix atomic saving open-file issue for Windows. - Patch for AtomicSaver on Windows. Full rewrite comes in 15.0.0. - [strutils.gunzip_bytes][strutils.gunzip_bytes] for decompressing a gzip bytestring ## 0.6.5 _(July 30, 2015)_ BufferedSocket work, html2text, pairwise shortcut, is_container, plural typo fix, [timeutils.isoparse][timeutils.isoparse], [cacheutils.ThresholdCounter][cacheutils.ThresholdCounter], and lots of testing - Add [iterutils.first][iterutils.first] function - Add [cacheutils.ThresholdCounter][cacheutils.ThresholdCounter] - Add JSONL verification to jsonutils - Add [timeutils.isoparse][timeutils.isoparse] - Add [strutils.html2text][strutils.html2text] and [strutils.HTMLTextExtractor][strutils.HTMLTextExtractor] - Fix [strutils.pluralize][strutils.pluralize] (indeces -> indices, per [#41][i41]) - Add [iterutils.is_container][iterutils.is_container] function - Fixed a small formatting bug in [tbutils.ExceptionInfo][tbutils.ExceptionInfo] that added an extra 'builtins.' for builtin exceptions under python 3 - Added tests for many modules - Create [iterutils.pairwise][iterutils.pairwise] shortcuts for pairwise chunks since pairs (key/val) are common - Additional 2.6 compatibility and tests - Fixed CachedInstancePartial to be Python 3 friendly without breaking PyPy. - Made formatutils Python 3 compatible - Rename sockutils to socketutils and other changes ## 0.6.4 _(May 10, 2015)_ Fixed multiline exception message handling in ParsedException. added mathutils. adding a tentative version of socketutils. fix LRU.popitem. fix OMD.**eq**. - Fix a bug where [dictutils.OrderedMultiDict][dictutils.OrderedMultiDict]'s **eq** would fail with non-iterable objects of comparison - Fixed `LRU.popitem` to return a key value pair - Added mathutils with [mathutils.ceil][mathutils.ceil] and [mathutils.floor][mathutils.floor] implementations that can search a fixed set of choices using the bisect module. - Fix [excutils.ParsedException][excutils.ParsedException] so exception message would not start with whitespace - Fix multiline exception messages - Adding [socketutils.BufferedSocket][socketutils.BufferedSocket] and [socketutils.NetstringSocket][socketutils.NetstringSocket] ## 0.6.3 _(April 20, 2015)_ Add typeutils, remove compat.py, make ParsedException work with eval()ed code - Properly parse tracebacks with missing source. Resolves [#30][i30] - Tweak the docs for [typeutils.get_all_subclasses][typeutils.get_all_subclasses] - Moved [typeutils.make_sentinel][typeutils.make_sentinel] into typeutils and removed the confusing compat module - Add in typeutils with modifications per the caveats of [#15][i15] - Added function [iterutils.one][iterutils.one] ## 0.6.2 _(April 11, 2015)_ Add partial_ordering, fix LRU repr and addition behavior - Add [funcutils.partial_ordering][funcutils.partial_ordering](), decorator similar to functools.total_ordering() - Fixed [cacheutils.LRU][cacheutils.LRU]'s behavior per [#21][i21] - Fix [cacheutils.LRU][cacheutils.LRU] repr reversal, fixes [#20][i20] ## 0.6.0 _(April 10, 2015)_ Python 3 support and several community bugfixes. Docs clarifications, too. - Make boltons Python 3 compatible without any external dependencies. All modules are independent and work in Python 2.6, 2.7, 3.4, and PyPy. - clarify TracebackInfo.from_current() method gap, per user 'dl\_\_' here: http://www.reddit.com/r/Python/comments/321d3o/boltons_over_100_python_utilities/ - Fix the [cacheutils.cached][cacheutils.cached] decorator, adding a sanity test, fixes [#12][i12] - Fix bytes2human when builtin zip returns iterators - Simplified logic of [iterutils.chunked][iterutils.chunked] ## 0.5.1 _(April 10, 2015)_ A lot of bugfixes and Python 2.6 and PyPy compatibility changes thanks to community contributions and encouragement. - Corrected cases where OMD was not exactly a dropin for OrderedDict - conditional availability of [gcutils.get_all][gcutils.get_all] based on pypy or cpython, also [gcutils.is_tracked][gcutils.is_tracked] was added in 2.7, so making [gcutils.get_all][gcutils.get_all] work with 2.6 - Made namedutils backwards compatibility for python 2.6 best effort - Fix invalid part_path usage in [fileutils.AtomicSaver][fileutils.AtomicSaver] ## 0.5.0 _(April 9, 2015)_ First publicly released version. The major focus of this release was docs, docstrings, and Read The Docs. - Cleared out **init** module for maximum independence - making [statsutils.median][statsutils.median] use \_get_quantile and add [statsutils.trimean][statsutils.trimean] - Switching the [statsutils.Stats][statsutils.Stats] type to be more sorted-data oriented, since it's only for offline analysis of unordered data - Made consistent multi-line string formats, as well as usage of the term 'builtin' vs 'built-in' (noun vs adjective) - Instrumented LRI with stats tracking - Made [timeutils.decimal_relative_time][timeutils.decimal_relative_time] cardinalization optional - Removed timeutils dependency on strutils - Made [tbutils.TracebackInfo][tbutils.TracebackInfo] classmethods work with no arguments. - Renamed ParsedTB to [tbutils.ParsedException][tbutils.ParsedException] - Made [dictutils.OMD][dictutils.OMD] .get()/.getlist() semantics more consistent. - finalizing .todict() and adding .sorted() to the [dictutils.OMD][dictutils.OMD] - Removed osutils and adding a note about utils in general - Made cacheutils more consistent between LRU and LRI, adding some cacheutils docs - Deprecate osutils, moving its contents into fileutils - Adding in-process statsutils2, with new DataAnalyzer and get_pearson_type (not merged yet) ## 0.4.2 _(March 8, 2015)_ Mostly a dictutils API update (addlist), but also gcutils. - [dictutils.OMD][dictutils.OMD]: split out addlist() from add(), removing the multi kwarg - adding gcutils with [gcutils.GCToggler][gcutils.GCToggler] and gc.get_all ## 0.4.1 _(February 26, 2015)_ adding mboxutils - adding mboxutils for handy dandy /var/mail integrations like cronfed ## 0.4.0 _(February 23, 2015)_ updated tbutils, JSONL support, initial cacheutils, atomic writer, hashtags - tbutils: Changed the way exceptions are parsed out of tracebacks - tbutils: Guard against potential linecache issues - Defined/implemented [iterutils.windowed_iter][iterutils.windowed_iter] corner case behavior - Added from_path to [fileutils.FilePerms][fileutils.FilePerms] - Adding [strutils.find_hashtags][strutils.find_hashtags] - Add ignore patterns to [fileutils.iter_find_files][fileutils.iter_find_files] ## 0.3.0 _(October 19, 2014)_ First alpha release. Practically, everything not mentioned above was added in this release. - tbutils: add ContextualTracebackInfo and ContextualExceptionInfo - cacheutils: a few minor changes to the BasicCache - update tbutils with some critical bits. still need to add convenience method for ExceptionInfo -> default exception print, also need to add more docstrings. - adding initial jsonutils with JSONL support - added [cacheutils.LRU][cacheutils.LRU] - added [timeutils.parse_timedelta][timeutils.parse_timedelta] - fixing iteritems with multi=False in the [dictutils.OrderedMultiDict][dictutils.OrderedMultiDict] (should return first key, but last value not first) - debugutils: add pdb excepthook and [debugutils.pdb_on_signal][debugutils.pdb_on_signal] - add [fileutils.mkdir_p][fileutils.mkdir_p] - tableutils: add maxlen to table text stuff - fix date citation for gob's - adding pure-Python implementation of Gob's algorithm - fix object header guessing - namedutils: add namedtuple support - fix a headers bug in tableutils - tableutils: add a couple more do-not-recurse types, add UnsupportedData exception for better recursion, insert recursive entries in-line, improve 'Object' strategy heuristic - wrap up html tag customization and fix a related bug - make html configurable via class attributes - strengthen the max_depth check - InputType classes are just shorter and clearer, imo - new from_dict, from_list, from_object, removing old ones. almost used a MetaClass for this - starting new table - only update the integer for the field being updated - fixing a verbalization/pronunciation issue - no regex for better error messages - being opinionated about some FilePerms things - adding initial version of fileutils/FilePerms - update formatutils - fix a slightly nasty namedlist bug - make OrderedMultiDict.get()'s default allow singulars - sync over ExceptionInfo - add from_current() classmethod with depth option to [Callpoint][tbutils.Callpoint] class for easier instantiation - it's called a numeronym - add a repr to ParsedTB. A bit verbose, but better than nothing. - add ParsedTB, which provides the ability to parse tracebacks dumped out to logs, the command line, etc. - improve test output and make assertion that new except hook is the same as the builtin. - update tbutils to use the more-powerful [Callpoint][tbutils.Callpoint] type. - copy_function - partially clean up partial stuff - first version of the namedlist - fixing up namedtuple, groundwork for namedlist. humorously named module. - embed value in cell for incremental iteration boost on OMD - reorganize code, add 'default' argument to poplast() - make key argument optional to OMD.poplast - rectifying inconsistent names and adjusting respective tests accordingly. using smashcase because that's what builtin dict() uses. - fix reverse; missing yield from! - add initial table biz - add get_counts - add [dictutils.OrderedMultiDict.get_inverted][dictutils.OrderedMultiDict.get_inverted]() for those handy reverse lookups - break out skip list to FastIter OMD + bench - add [strutils.a10n][strutils.a10n]() - fix a bug in [dictutils.OrderedMultiDict][dictutils.OrderedMultiDict]'s .add() - adding initial reimplementation of OMD - adding some tests to dictutils - update boltons formatutils to match what's going on in lithoxyl - remove infer_pos_args() from strutils (already in formatutils) - add formatutils to boltons - fix a potential infinite recursion in LocalTZ - use more explicit names for Local/Constant tzinfo types - add a basic but handy file finder - add infer_positional_args() to strutils (from lithoxyl) - split BasicCache out of dictutils into cacheutils - update median calculation slightly - add appropriate stacklevel to deprutils warning - add an initial version of deprutils (basic utils for facilitating deprecation) - add bytes2human - first version of some basic timezone utils which came in handy for a train scheduling application I wrote (etavta) - reorder imports for pep8 - redo plain-english relative_time() to have a decimal rounding factor and handle future dates - swap the order of cardinalize()'s arguments after intuiting the wrong order a couple times. gotta be consistent, this isn't PHP. - a weird little relative time approach - add total_seconds() implementation for Python <2.7, rename relative_datetime to relative_time - add a relative datetime function in a new module: timeutils - a little more transparency with orderedmultidict's maphistory - add a test for BasicCache - add the super simple BasicCache, a size-limited defaultdict-like thing - add a cheesy little splay list construct that can be used for splay- like manual reordering for eventual optimization - traceback utils, first draft - add [strutils.strip_ansi][strutils.strip_ansi] (need to make a cliutils or something) - add ansi strip task - mess with list tuning - add ordinalize() - add **all** to statsutils - add more stats docs and doctests - add some stats functions - add unit_len() - add pluralize/singularize/cardinalize to strutils - add **all**s all around, clean up imports a bit - adding license - add sorted queue type, make it the default - fix little bug in insert - inheriting from list necessitates overriding the deprecated **get-, **set-, and \_\_del- slice methods - hacky refactor to have a BasePriorityQueue to make room for SortedPriorityQueue with peek_n, etc. - add a little docstring and update sort method in BarrelList - add HeapPriorityQueue - tidy up listutils comments and imports - move treeutils out of boltons since I don't really think a pure python version actually adds much. i'll make an academic one-off repo for less practical data structure experiments like that. - inherit from list - add reverse() to blist - add index() to blist - cheesy **setitem**() for blist - add **delitem**() to BarrelList - change the way the in-place sort works with just one list - tune the list size a bit - add slicing to BarrelList - add initial version of BarrelList, a pure-python b-list-y thing to approximate O(log(n)) behavior by multiplexing the fast O(n) list operations - switch to new dead index interval approach; the IndexedSet is about half the speed of a list in the ultra-pathological case of random popping on the low end of the IndexedSet - made BisectTree's get() defaulting work a bit more like a dict's - added get_adjacent and miscellaneous to BisectTree - added a default name and always-falsy **nonzero** to Sentinel - add pop() for BisectTree and export the generic Tree - make a bisect tree, because O(n) is still pretttttty fast up to about 100k items - add a little hack to chunked/chunked_iter to make it work nicely with strings - tidy up ki_vs_vi_lrh usage just a titch - revamp indices behavior (key_size and value_size) - switch to new multi-key mode - pretty much done porting insert, delete, balance (the basic operations) - switch to negative refs, arbitrary length nodes - add sentinel utility thing - add .index() for list compat, updated exception messages, and added a silly test to show off slicing and indexing - add slicing support and .clear() - remove ifilter dependency (using generator expression) - add .reverse and .sort() to IndexedSet, fix bisection related bug exposing MISSING (insort requested index, not real_index) - pretty much all fundy IndexedSet bugs hit and fixed, looks like - IndexedSet getting much closer - initial rough draft of IndexedSet with a short docstring and a bunch of fixes already (still not workin tho) - add dictutils (OrderedMultiDict) - it was a long time coming, but I'm finally halfway happy with this wrapped exception - add uniqueification capabilities - go back to using **new** and positional arguments - exception wrapping green path mostly working - working on a wrapping exception mixin thing for less lossy nonraising. - add asciify and update slugify with ascii flag - add basic docs and doctests to strutils - scratch that, regexes still faster - add under2camel, camel2under, and slugify (and split_punct_ws, which is much faster than re-based punctuation splitting) - python3-compatible doctest for bucketize_bool - bucketize and bucketize_bool, with docs - add examples to chunked and chunked_iter - update split() docs with more examples. - [iterutils.chunked_iter][iterutils.chunked_iter] and [iterutils.chunked][iterutils.chunked] - [iterutils.split][iterutils.split] and [iterutils.split_iter][iterutils.split_iter] work [os.replace]: https://docs.python.org/3/library/os.html#os.replace [functools.total_ordering]: https://docs.python.org/2/library/functools.html#functools.total_ordering [StringIO]: https://docs.python.org/2/library/stringio.html [zscore]: https://en.wikipedia.org/wiki/Standard_score [cacheutils]: http://boltons.readthedocs.org/en/latest/cacheutils.html [cacheutils.LRI]: http://boltons.readthedocs.org/en/latest/cacheutils.html#boltons.cacheutils.LRI [cacheutils.LRU]: http://boltons.readthedocs.org/en/latest/cacheutils.html#boltons.cacheutils.LRU [cacheutils.ThresholdCounter]: http://boltons.readthedocs.org/en/latest/cacheutils.html#boltons.cacheutils.ThresholdCounter [cacheutils.cached]: http://boltons.readthedocs.org/en/latest/cacheutils.html#boltons.cacheutils.cached [cacheutils.cachedmethod]: http://boltons.readthedocs.org/en/latest/cacheutils.html#boltons.cacheutils.cachedmethod [cacheutils.cachedproperty]: http://boltons.readthedocs.org/en/latest/cacheutils.html#boltons.cacheutils.cachedproperty [debugutils.pdb_on_signal]: http://boltons.readthedocs.org/en/latest/debugutils.html#boltons.debugutils.pdb_on_signal [dictutils]: http://boltons.readthedocs.org/en/latest/dictutils.html [dictutils.OMD]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.OMD [dictutils.OMD.pop]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.OrderedMultiDict.pop [dictutils.OMD.popall]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.OrderedMultiDict.popall [dictutils.OMD.setdefault]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.OrderedMultiDict.setdefault [dictutils.OrderedMultiDict]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.OrderedMultiDict [dictutils.OrderedMultiDict.get_inverted]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.OrderedMultiDict.get_inverted [dictutils.OneToOne]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.OneToOne [dictutils.ManyToMany]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.ManyToMany [dictutils.FrozenDict]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.FrozenDict [dictutils.subdict]: http://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.subdict [ecoutils]: http://boltons.readthedocs.org/en/latest/ecoutils.html [excutils.ParsedException]: http://boltons.readthedocs.org/en/latest/excutils.html#boltons.excutils.ParsedException [fileutils]: http://boltons.readthedocs.org/en/latest/fileutils.html [fileutils.replace]: http://boltons.readthedocs.org/en/latest/fileutils.html#boltons.fileutils.replace [fileutils.rotate_file]: http://boltons.readthedocs.org/en/latest/fileutils.html#boltons.fileutils.rotate_file [fileutils.atomic_rename]: http://boltons.readthedocs.org/en/latest/fileutils.html#boltons.fileutils.atomic_rename [fileutils.atomic_save]: http://boltons.readthedocs.org/en/latest/fileutils.html#boltons.fileutils.atomic_save [fileutils.AtomicSaver]: http://boltons.readthedocs.org/en/latest/fileutils.html#boltons.fileutils.AtomicSaver [fileutils.FilePerms]: http://boltons.readthedocs.org/en/latest/fileutils.html#boltons.fileutils.FilePerms [fileutils.iter_find_files]: http://boltons.readthedocs.org/en/latest/fileutils.html#boltons.fileutils.iter_find_files [fileutils.mkdir_p]: http://boltons.readthedocs.org/en/latest/fileutils.html#boltons.fileutils.mkdir_p [fileutils.DummyFile]: http://boltons.readthedocs.org/en/latest/fileutils.html#boltons.fileutils.DummyFile [formatutils]: http://boltons.readthedocs.org/en/latest/formatutils.html [formatutils.DeferredValue]: http://boltons.readthedocs.org/en/latest/formatutils.html#boltons.fileutils.DeferredValue [funcutils.FunctionBuilder]: http://boltons.readthedocs.org/en/latest/funcutils.html#boltons.funcutils.FunctionBuilder [funcutils.FunctionBuilder.remove_arg]: https://boltons.readthedocs.io/en/latest/funcutils.html#boltons.funcutils.FunctionBuilder.remove_arg [funcutils.FunctionBuilder.add_arg]: https://boltons.readthedocs.io/en/latest/funcutils.html#boltons.funcutils.FunctionBuilder.add_arg [funcutils.partial_ordering]: http://boltons.readthedocs.org/en/latest/funcutils.html#boltons.funcutils.partial_ordering [funcutils.total_ordering]: http://boltons.readthedocs.org/en/latest/funcutils.html#boltons.funcutils.total_ordering [funcutils.update_wrapper]: http://boltons.readthedocs.org/en/latest/funcutils.html#boltons.funcutils.update_wrapper [funcutils.wraps]: http://boltons.readthedocs.org/en/latest/funcutils.html#boltons.funcutils.wraps [gcutils.GCToggler]: http://boltons.readthedocs.org/en/latest/gcutils.html#boltons.gcutils.GCToggler [gcutils.get_all]: http://boltons.readthedocs.org/en/latest/gcutils.html#boltons.gcutils.get_all [gcutils.is_tracked]: http://boltons.readthedocs.org/en/latest/gcutils.html#boltons.gcutils.is_tracked [i12]: https://github.com/mahmoud/boltons/issues/12 [i13]: https://github.com/mahmoud/boltons/issues/13 [i15]: https://github.com/mahmoud/boltons/issues/15 [i20]: https://github.com/mahmoud/boltons/issues/20 [i21]: https://github.com/mahmoud/boltons/issues/21 [i30]: https://github.com/mahmoud/boltons/issues/30 [i41]: https://github.com/mahmoud/boltons/issues/41 [i79]: https://github.com/mahmoud/boltons/pull/79 [i83]: https://github.com/mahmoud/boltons/issues/83 [i84]: https://github.com/mahmoud/boltons/issues/84 [i86]: https://github.com/mahmoud/boltons/issues/86 [i128]: https://github.com/mahmoud/boltons/issues/128 [i135]: https://github.com/mahmoud/boltons/issues/135 [i150]: https://github.com/mahmoud/boltons/issues/150 [i161]: https://github.com/mahmoud/boltons/issues/161 [i162]: https://github.com/mahmoud/boltons/issues/162 [i164]: https://github.com/mahmoud/boltons/issues/164 [i294]: https://github.com/mahmoud/boltons/issues/294 [i302]: https://github.com/mahmoud/boltons/issues/302 [i303]: https://github.com/mahmoud/boltons/issues/303 [i305]: https://github.com/mahmoud/boltons/issues/305 [i312]: https://github.com/mahmoud/boltons/issues/312 [i315]: https://github.com/mahmoud/boltons/issues/315 [i320]: https://github.com/mahmoud/boltons/issues/320 [i323]: https://github.com/mahmoud/boltons/issues/323 [i326]: https://github.com/mahmoud/boltons/issues/326 [i327]: https://github.com/mahmoud/boltons/issues/327 [ioutils]: http://boltons.readthedocs.org/en/latest/ioutils.html [ioutils.MultiFileReader]: http://boltons.readthedocs.org/en/latest/ioutils.html#boltons.ioutils.MultiFileReader [ioutils.SpooledBytesIO]: http://boltons.readthedocs.org/en/latest/ioutils.html#boltons.ioutils.SpooledBytesIO [ioutils.SpooledStringIO]: http://boltons.readthedocs.org/en/latest/ioutils.html#boltons.ioutils.SpooledStringIO [iterutils]: http://boltons.readthedocs.org/en/latest/iterutils.html [iterutils.backoff]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.backoff [iterutils.backoff_iter]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.backoff_iter [iterutils.chunked]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.chunked [iterutils.chunked_iter]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.chunked_iter [iterutils.chunk_ranges]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.chunk_ranges [iterutils.first]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.first [iterutils.flatten]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.flatten [iterutils.flatten_iter]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.flatten_iter [iterutils.backoff]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.backoff [iterutils.frange]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.frange [iterutils.GUIDerator]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.GUIDerator [iterutils.SequentialGUIDerator]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.SequentialGUIDerator [iterutils.is_container]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.is_container [iterutils.bucketize]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.bucketize [iterutils.one]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.one [iterutils.pairwise]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.pairwise [iterutils.same]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.same [iterutils.remap]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.remap [iterutils.research]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.research [iterutils.soft_sorted]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.soft_sorted [iterutils.untyped_sorted]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.untyped_sorted [iterutils.split]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.split [iterutils.split_iter]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.split_iter [iterutils.strip]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.strip [iterutils.rstrip]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.rstrip [iterutils.lstrip]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.lstrip [iterutils.unique]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.unique [iterutils.windowed_iter]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.windowed_iter [iterutils.xfrange]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.xfrange [jsonutils.JSONLIterator]: http://boltons.readthedocs.org/en/latest/jsonutils.html#boltons.jsonutils.JSONLIterator [mathutils.Bits]: http://boltons.readthedocs.org/en/latest/mathutils.html#boltons.mathutils.Bits [mathutils.ceil]: http://boltons.readthedocs.org/en/latest/mathutils.html#boltons.mathutils.ceil [mathutils.floor]: http://boltons.readthedocs.org/en/latest/mathutils.html#boltons.mathutils.floor [mathutils.clamp]: http://boltons.readthedocs.org/en/latest/mathutils.html#boltons.mathutils.clamp [queueutils]: http://boltons.readthedocs.org/en/latest/queueutils.html [setutils.complement]: http://boltons.readthedocs.org/en/latest/setutils.html#boltons.setutils.complement [IndexedSet]: http://boltons.readthedocs.org/en/latest/setutils.html#boltons.setutils.IndexedSet [socketutils]: http://boltons.readthedocs.org/en/latest/socketutils.html [socketutils.BufferedSocket]: http://boltons.readthedocs.org/en/latest/socketutils.html#boltons.socketutils.BufferedSocket [socketutils.BufferedSocket.recv]: http://boltons.readthedocs.org/en/latest/socketutils.html#boltons.socketutils.BufferedSocket.recv [socketutils.BufferedSocket.recv_until]: http://boltons.readthedocs.org/en/latest/socketutils.html#boltons.socketutils.BufferedSocket.recv_until [socketutils.BufferedSocket.recv_close]: http://boltons.readthedocs.org/en/latest/socketutils.html#boltons.socketutils.BufferedSocket.recv_close [socketutils.NetstringSocket]: http://boltons.readthedocs.org/en/latest/socketutils.html#boltons.socketutils.NetstringSocket [statsutils]: http://boltons.readthedocs.org/en/latest/statsutils.html [statsutils.Stats]: http://boltons.readthedocs.org/en/latest/statsutils.html#boltons.statsutils.Stats [statsutils.Stats.clear_cache]: http://boltons.readthedocs.org/en/latest/statsutils.html#boltons.statsutils.Stats.clear_cache [statsutils.Stats.describe]: http://boltons.readthedocs.org/en/latest/statsutils.html#boltons.statsutils.Stats.describe [statsutils.Stats.format_histogram]: http://boltons.readthedocs.org/en/latest/statsutils.html#boltons.statsutils.Stats.format_histogram [statsutils.Stats.get_zscore]: http://boltons.readthedocs.org/en/latest/statsutils.html#boltons.statsutils.Stats.get_zscore [statsutils.median]: http://boltons.readthedocs.org/en/latest/statsutils.html#boltons.statsutils.median [statsutils.trimean]: http://boltons.readthedocs.org/en/latest/statsutils.html#boltons.statsutils.trimean [strutils]: http://boltons.readthedocs.org/en/latest/strutils.html [strutils.HTMLTextExtractor]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.HTMLTextExtractor [strutils.a10n]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.a10n [strutils.args2cmd]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.args2cmd [strutils.args2sh]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.args2sh [strutils.escape_shell_args]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.escape_shell_args [strutils.find_hashtags]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.find_hashtags [strutils.gzip_bytes]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.gzip_bytes [strutils.gunzip_bytes]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.gunzip_bytes [strutils.html2text]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.html2text [strutils.indent]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.indent [strutils.iter_splitlines]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.iter_splitlines [strutils.ordinalize]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.ordinalize [strutils.pluralize]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.pluralize [strutils.is_ascii]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.is_ascii [strutils.is_uuid]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.is_uuid [strutils.parse_int_list]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.parse_int_list [strutils.format_int_list]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.format_int_list [strutils.int_list_complement]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.int_list_complement [strutils.int_list_to_int_tuples]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.int_list_to_int_tuples [strutils.slugify]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.slugify [strutils.strip_ansi]: http://boltons.readthedocs.org/en/latest/strutils.html#boltons.strutils.strip_ansi [tableutils]: http://boltons.readthedocs.org/en/latest/tableutils.html [tableutils.Table]: http://boltons.readthedocs.org/en/latest/tableutils.html#boltons.tableutils.Table [tbutils]: http://boltons.readthedocs.org/en/latest/tbutils.html [tbutils.Callpoint]: http://boltons.readthedocs.org/en/latest/tbutils.html#boltons.tbutils.Callpoint [tbutils.ExceptionInfo]: http://boltons.readthedocs.org/en/latest/tbutils.html#boltons.tbutils.ExceptionInfo [tbutils.ParsedException]: http://boltons.readthedocs.org/en/latest/tbutils.html#boltons.tbutils.ParsedException [tbutils.ParsedException.to_string]: http://boltons.readthedocs.org/en/latest/tbutils.html#boltons.tbutils.ParsedException.to_string [tbutils.TracebackInfo]: http://boltons.readthedocs.org/en/latest/tbutils.html#boltons.tbutils.TracebackInfo [timeutils.daterange]: http://boltons.readthedocs.org/en/latest/timeutils.html#boltons.timeutils.daterange [timeutils.decimal_relative_time]: http://boltons.readthedocs.org/en/latest/timeutils.html#boltons.timeutils.decimal_relative_time [timeutils.dt_to_timestamp]: http://boltons.readthedocs.org/en/latest/timeutils.html#boltons.timeutils.dt_to_timestamp [timeutils.isoparse]: http://boltons.readthedocs.org/en/latest/timeutils.html#boltons.timeutils.isoparse [timeutils.parse_timedelta]: http://boltons.readthedocs.org/en/latest/timeutils.html#boltons.timeutils.parse_timedelta [timeutils.strpdate]: http://boltons.readthedocs.org/en/latest/timeutils.html#boltons.timeutils.strpdate [typeutils.get_all_subclasses]: http://boltons.readthedocs.org/en/latest/typeutils.html#boltons.typeutils.get_all_subclasses [typeutils.make_sentinel]: http://boltons.readthedocs.org/en/latest/typeutils.html#boltons.typeutils.make_sentinel [urlutils]: http://boltons.readthedocs.org/en/latest/urlutils.html [urlutils.SCHEME_PORT_MAP]: http://boltons.readthedocs.org/en/latest/urlutils.html#boltons.urlutils.SCHEME_PORT_MAP [urlutils.find_all_links]: http://boltons.readthedocs.org/en/latest/urlutils.html#boltons.urlutils.find_all_links boltons-25.0.0/LICENSE000066400000000000000000000027311475005545200142750ustar00rootroot00000000000000Copyright (c) 2013, Mahmoud Hashemi Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. boltons-25.0.0/README.md000066400000000000000000000123441475005545200145500ustar00rootroot00000000000000# Boltons *boltons should be builtins.* **Boltons** is a set of over 230 BSD-licensed, pure-Python utilities in the same spirit as — and yet conspicuously missing from — [the standard library][stdlib], including: * [Atomic file saving][atomic], bolted on with [fileutils][fileutils] * A highly-optimized [OrderedMultiDict][omd], in [dictutils][dictutils] * *Two* types of [PriorityQueue][pq], in [queueutils][queueutils] * [Chunked][chunked] and [windowed][windowed] iteration, in [iterutils][iterutils] * Recursive data structure [iteration and merging][remap], with [iterutils.remap][iterutils.remap] * Exponential backoff functionality, including jitter, through [iterutils.backoff][iterutils.backoff] * A full-featured [TracebackInfo][tbinfo] type, for representing stack traces, in [tbutils][tbutils] **[Full and extensive docs are available on Read The Docs.][rtd]** See what's new [by checking the CHANGELOG][changelog]. Boltons is tested against Python 3.7-3.13, as well as PyPy3. [stdlib]: https://docs.python.org/3/library/index.html [rtd]: https://boltons.readthedocs.org/en/latest/ [changelog]: https://github.com/mahmoud/boltons/blob/master/CHANGELOG.md [atomic]: https://boltons.readthedocs.org/en/latest/fileutils.html#boltons.fileutils.atomic_save [omd]: https://boltons.readthedocs.org/en/latest/dictutils.html#boltons.dictutils.OrderedMultiDict [pq]: https://boltons.readthedocs.org/en/latest/queueutils.html#boltons.queueutils.PriorityQueue [chunked]: https://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.chunked [windowed]: https://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.windowed [tbinfo]: https://boltons.readthedocs.org/en/latest/tbutils.html#boltons.tbutils.TracebackInfo [fileutils]: https://boltons.readthedocs.org/en/latest/fileutils.html#module-boltons.fileutils [ioutils]: https://boltons.readthedocs.org/en/latest/ioutils.html#module-boltons.ioutils [dictutils]: https://boltons.readthedocs.org/en/latest/dictutils.html#module-boltons.dictutils [queueutils]: https://boltons.readthedocs.org/en/latest/queueutils.html#module-boltons.queueutils [iterutils]: https://boltons.readthedocs.org/en/latest/iterutils.html#module-boltons.iterutils [iterutils.remap]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.remap [iterutils.backoff]: http://boltons.readthedocs.org/en/latest/iterutils.html#boltons.iterutils.backoff [tbutils]: https://boltons.readthedocs.org/en/latest/tbutils.html#module-boltons.tbutils [remap]: http://sedimental.org/remap.html ## Installation Boltons can be added to a project in a few ways. There's the obvious one: ```bash pip install boltons ``` On macOS, it can also be installed via [MacPorts](https://ports.macports.org/port/py-boltons/summary): ```bash sudo port install py-boltons ``` Then, [thanks to PyPI][boltons_pypi], dozens of boltons are just an import away: ```python from boltons.cacheutils import LRU my_cache = LRU() ``` However, due to the nature of utilities, application developers might want to consider other options, including vendorization of individual modules into a project. Boltons is pure-Python and has no dependencies. If the whole project is too big, each module is independent, and can be copied directly into a project. See the [Integration][integration] section of the docs for more details. [boltons_pypi]: https://pypi.python.org/pypi/boltons [integration]: https://boltons.readthedocs.org/en/latest/architecture.html#integration ## Third-party packages The majority of boltons strive to be "good enough" for a wide range of basic uses, leaving advanced use cases to Python's [myriad specialized 3rd-party libraries][pypi]. In many cases the respective ``boltons`` module will describe 3rd-party alternatives worth investigating when use cases outgrow `boltons`. If you've found a natural "next-step" library worth mentioning, see the next section! [pypi]: https://pypi.python.org/pypi ## Gaps Found something missing in the standard library that should be in `boltons`? Found something missing in `boltons`? First, take a moment to read the very brief [architecture statement][architecture] to make sure the functionality would be a good fit. Then, if you are very motivated, submit [a Pull Request][prs]. Otherwise, submit a short feature request on [the Issues page][issues], and we will figure something out. [architecture]: https://boltons.readthedocs.org/en/latest/architecture.html [issues]: https://github.com/mahmoud/boltons/issues [prs]: https://github.com/mahmoud/boltons/pulls boltons-25.0.0/TODO.rst000066400000000000000000000047101475005545200145660ustar00rootroot00000000000000TODO ==== @tlog.wrap('critical', 'update campaign', verbose=True, inject_as='_act') def update(self, _act, force=False): Resulted in: Traceback (most recent call last): File "/home/mahmoud/virtualenvs/pacetrack/bin/pt", line 11, in load_entry_point('pacetrack', 'console_scripts', 'pt')() File "/home/mahmoud/hatnote/pacetrack/pacetrack/cli.py", line 131, in main cmd.run() File "/home/mahmoud/projects/face/face/command.py", line 403, in run ret = inject(wrapped, kwargs) File "/home/mahmoud/projects/face/face/sinter.py", line 59, in inject return f(**kwargs) File "", line 6, in next_ File "/home/mahmoud/hatnote/pacetrack/pacetrack/cli.py", line 138, in mw_cli_log return next_() File "", line 4, in next_ File "/home/mahmoud/hatnote/pacetrack/pacetrack/cli.py", line 89, in update return update_all(campaign_ids=posargs_, force=force, jsub=jsub, args_=args_) File "/home/mahmoud/hatnote/pacetrack/pacetrack/cli.py", line 73, in update_all cur_pt = load_and_update_campaign(campaign_dir, force=force) File "/home/mahmoud/hatnote/pacetrack/pacetrack/update.py", line 622, in load_and_update_campaign ptc.update(force=force) File "", line 2, in update File "/home/mahmoud/virtualenvs/pacetrack/local/lib/python2.7/site-packages/lithoxyl/logger.py", line 298, in logged_func return func_to_log(*a, **kw) TypeError: update() got multiple values for keyword argument '_act' dictutils --------- - autoindexing list for dictionaries. As records get added, uses a basic proportion-based heuristic to create subdictionaries as indexes over the same data. Maybe automatically does a full-scan option too. - non-overwriting version of dict.update() jsonutils --------- * jsonl ignore blank lines * jsonl add line number to error message misc? ----- - wrap_trace debug utility. Takes an object, looks at its dir, wraps everything callable, with a hook. Needs an enable/disable flag. - get/set/call/return/exception - __slots__ - Top/Bottom singletons (greater than and less than everything) cliutils -------- - progress bar - confirmation prompt (e.g., "Question? (Y/n)") tbutils ------- - fold repeated frames (recursive calls) statsutils ---------- - dirty bit auto clears cache on property access - geometric mean (2 ** sum(log(a, b, ...)) urlutils -------- * improve usage of ``encoding`` arg (in parse_qsl for example) * normalize unicode on input? boltons-25.0.0/boltons/000077500000000000000000000000001475005545200147455ustar00rootroot00000000000000boltons-25.0.0/boltons/__init__.py000066400000000000000000000000001475005545200170440ustar00rootroot00000000000000boltons-25.0.0/boltons/cacheutils.py000066400000000000000000000732771475005545200174630ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """``cacheutils`` contains consistent implementations of fundamental cache types. Currently there are two to choose from: * :class:`LRI` - Least-recently inserted * :class:`LRU` - Least-recently used Both caches are :class:`dict` subtypes, designed to be as interchangeable as possible, to facilitate experimentation. A key practice with performance enhancement with caching is ensuring that the caching strategy is working. If the cache is constantly missing, it is just adding more overhead and code complexity. The standard statistics are: * ``hit_count`` - the number of times the queried key has been in the cache * ``miss_count`` - the number of times a key has been absent and/or fetched by the cache * ``soft_miss_count`` - the number of times a key has been absent, but a default has been provided by the caller, as with :meth:`dict.get` and :meth:`dict.setdefault`. Soft misses are a subset of misses, so this number is always less than or equal to ``miss_count``. Additionally, ``cacheutils`` provides :class:`ThresholdCounter`, a cache-like bounded counter useful for online statistics collection. Learn more about `caching algorithms on Wikipedia `_. """ # TODO: TimedLRI # TODO: support 0 max_size? import heapq import weakref import itertools from operator import attrgetter try: from threading import RLock except Exception: class RLock: 'Dummy reentrant lock for builds without threads' def __enter__(self): pass def __exit__(self, exctype, excinst, exctb): pass try: from .typeutils import make_sentinel _MISSING = make_sentinel(var_name='_MISSING') _KWARG_MARK = make_sentinel(var_name='_KWARG_MARK') except ImportError: _MISSING = object() _KWARG_MARK = object() PREV, NEXT, KEY, VALUE = range(4) # names for the link fields DEFAULT_MAX_SIZE = 128 class LRI(dict): """The ``LRI`` implements the basic *Least Recently Inserted* strategy to caching. One could also think of this as a ``SizeLimitedDefaultDict``. *on_miss* is a callable that accepts the missing key (as opposed to :class:`collections.defaultdict`'s "default_factory", which accepts no arguments.) Also note that, like the :class:`LRI`, the ``LRI`` is instrumented with statistics tracking. >>> cap_cache = LRI(max_size=2) >>> cap_cache['a'], cap_cache['b'] = 'A', 'B' >>> from pprint import pprint as pp >>> pp(dict(cap_cache)) {'a': 'A', 'b': 'B'} >>> [cap_cache['b'] for i in range(3)][0] 'B' >>> cap_cache['c'] = 'C' >>> print(cap_cache.get('a')) None >>> cap_cache.hit_count, cap_cache.miss_count, cap_cache.soft_miss_count (3, 1, 1) """ def __init__(self, max_size=DEFAULT_MAX_SIZE, values=None, on_miss=None): if max_size <= 0: raise ValueError('expected max_size > 0, not %r' % max_size) self.hit_count = self.miss_count = self.soft_miss_count = 0 self.max_size = max_size self._lock = RLock() self._init_ll() if on_miss is not None and not callable(on_miss): raise TypeError('expected on_miss to be a callable' ' (or None), not %r' % on_miss) self.on_miss = on_miss if values: self.update(values) # TODO: fromkeys()? # linked list manipulation methods. # # invariants: # 1) 'anchor' is the sentinel node in the doubly linked list. there is # always only one, and its KEY and VALUE are both _MISSING. # 2) the most recently accessed node comes immediately before 'anchor'. # 3) the least recently accessed node comes immediately after 'anchor'. def _init_ll(self): anchor = [] anchor[:] = [anchor, anchor, _MISSING, _MISSING] # a link lookup table for finding linked list links in O(1) # time. self._link_lookup = {} self._anchor = anchor def _print_ll(self): print('***') for (key, val) in self._get_flattened_ll(): print(key, val) print('***') return def _get_flattened_ll(self): flattened_list = [] link = self._anchor while True: flattened_list.append((link[KEY], link[VALUE])) link = link[NEXT] if link is self._anchor: break return flattened_list def _get_link_and_move_to_front_of_ll(self, key): # find what will become the newest link. this may raise a # KeyError, which is useful to __getitem__ and __setitem__ newest = self._link_lookup[key] # splice out what will become the newest link. newest[PREV][NEXT] = newest[NEXT] newest[NEXT][PREV] = newest[PREV] # move what will become the newest link immediately before # anchor (invariant 2) anchor = self._anchor second_newest = anchor[PREV] second_newest[NEXT] = anchor[PREV] = newest newest[PREV] = second_newest newest[NEXT] = anchor return newest def _set_key_and_add_to_front_of_ll(self, key, value): # create a new link and place it immediately before anchor # (invariant 2). anchor = self._anchor second_newest = anchor[PREV] newest = [second_newest, anchor, key, value] second_newest[NEXT] = anchor[PREV] = newest self._link_lookup[key] = newest def _set_key_and_evict_last_in_ll(self, key, value): # the link after anchor is the oldest in the linked list # (invariant 3). the current anchor becomes a link that holds # the newest key, and the oldest link becomes the new anchor # (invariant 1). now the newest link comes before anchor # (invariant 2). no links are moved; only their keys # and values are changed. oldanchor = self._anchor oldanchor[KEY] = key oldanchor[VALUE] = value self._anchor = anchor = oldanchor[NEXT] evicted = anchor[KEY] anchor[KEY] = anchor[VALUE] = _MISSING del self._link_lookup[evicted] self._link_lookup[key] = oldanchor return evicted def _remove_from_ll(self, key): # splice a link out of the list and drop it from our lookup # table. link = self._link_lookup.pop(key) link[PREV][NEXT] = link[NEXT] link[NEXT][PREV] = link[PREV] def __setitem__(self, key, value): with self._lock: try: link = self._get_link_and_move_to_front_of_ll(key) except KeyError: if len(self) < self.max_size: self._set_key_and_add_to_front_of_ll(key, value) else: evicted = self._set_key_and_evict_last_in_ll(key, value) super().__delitem__(evicted) else: link[VALUE] = value super().__setitem__(key, value) return def __getitem__(self, key): with self._lock: try: link = self._link_lookup[key] except KeyError: self.miss_count += 1 if not self.on_miss: raise ret = self[key] = self.on_miss(key) return ret self.hit_count += 1 return link[VALUE] def get(self, key, default=None): try: return self[key] except KeyError: self.soft_miss_count += 1 return default def __delitem__(self, key): with self._lock: super().__delitem__(key) self._remove_from_ll(key) def pop(self, key, default=_MISSING): # NB: hit/miss counts are bypassed for pop() with self._lock: try: ret = super().pop(key) except KeyError: if default is _MISSING: raise ret = default else: self._remove_from_ll(key) return ret def popitem(self): with self._lock: item = super().popitem() self._remove_from_ll(item[0]) return item def clear(self): with self._lock: super().clear() self._init_ll() def copy(self): return self.__class__(max_size=self.max_size, values=self) def setdefault(self, key, default=None): with self._lock: try: return self[key] except KeyError: self.soft_miss_count += 1 self[key] = default return default def update(self, E, **F): # E and F are throwback names to the dict() __doc__ with self._lock: if E is self: return setitem = self.__setitem__ if callable(getattr(E, 'keys', None)): for k in E.keys(): setitem(k, E[k]) else: for k, v in E: setitem(k, v) for k in F: setitem(k, F[k]) return def __eq__(self, other): with self._lock: if self is other: return True if len(other) != len(self): return False if not isinstance(other, LRI): return other == self return super().__eq__(other) def __ne__(self, other): return not (self == other) def __repr__(self): cn = self.__class__.__name__ val_map = super().__repr__() return ('%s(max_size=%r, on_miss=%r, values=%s)' % (cn, self.max_size, self.on_miss, val_map)) class LRU(LRI): """The ``LRU`` is :class:`dict` subtype implementation of the *Least-Recently Used* caching strategy. Args: max_size (int): Max number of items to cache. Defaults to ``128``. values (iterable): Initial values for the cache. Defaults to ``None``. on_miss (callable): a callable which accepts a single argument, the key not present in the cache, and returns the value to be cached. >>> cap_cache = LRU(max_size=2) >>> cap_cache['a'], cap_cache['b'] = 'A', 'B' >>> from pprint import pprint as pp >>> pp(dict(cap_cache)) {'a': 'A', 'b': 'B'} >>> [cap_cache['b'] for i in range(3)][0] 'B' >>> cap_cache['c'] = 'C' >>> print(cap_cache.get('a')) None This cache is also instrumented with statistics collection. ``hit_count``, ``miss_count``, and ``soft_miss_count`` are all integer members that can be used to introspect the performance of the cache. ("Soft" misses are misses that did not raise :exc:`KeyError`, e.g., ``LRU.get()`` or ``on_miss`` was used to cache a default. >>> cap_cache.hit_count, cap_cache.miss_count, cap_cache.soft_miss_count (3, 1, 1) Other than the size-limiting caching behavior and statistics, ``LRU`` acts like its parent class, the built-in Python :class:`dict`. """ def __getitem__(self, key): with self._lock: try: link = self._get_link_and_move_to_front_of_ll(key) except KeyError: self.miss_count += 1 if not self.on_miss: raise ret = self[key] = self.on_miss(key) return ret self.hit_count += 1 return link[VALUE] ### Cached decorator # Key-making technique adapted from Python 3.4's functools class _HashedKey(list): """The _HashedKey guarantees that hash() will be called no more than once per cached function invocation. """ __slots__ = 'hash_value' def __init__(self, key): self[:] = key self.hash_value = hash(tuple(key)) def __hash__(self): return self.hash_value def __repr__(self): return f'{self.__class__.__name__}({list.__repr__(self)})' def make_cache_key(args, kwargs, typed=False, kwarg_mark=_KWARG_MARK, fasttypes=frozenset([int, str, frozenset, type(None)])): """Make a generic key from a function's positional and keyword arguments, suitable for use in caches. Arguments within *args* and *kwargs* must be `hashable`_. If *typed* is ``True``, ``3`` and ``3.0`` will be treated as separate keys. The key is constructed in a way that is flat as possible rather than as a nested structure that would take more memory. If there is only a single argument and its data type is known to cache its hash value, then that argument is returned without a wrapper. This saves space and improves lookup speed. >>> tuple(make_cache_key(('a', 'b'), {'c': ('d')})) ('a', 'b', _KWARG_MARK, ('c', 'd')) .. _hashable: https://docs.python.org/2/glossary.html#term-hashable """ # key = [func_name] if func_name else [] # key.extend(args) key = list(args) if kwargs: sorted_items = sorted(kwargs.items()) key.append(kwarg_mark) key.extend(sorted_items) if typed: key.extend([type(v) for v in args]) if kwargs: key.extend([type(v) for k, v in sorted_items]) elif len(key) == 1 and type(key[0]) in fasttypes: return key[0] return _HashedKey(key) # for backwards compatibility in case someone was importing it _make_cache_key = make_cache_key class CachedFunction: """This type is used by :func:`cached`, below. Instances of this class are used to wrap functions in caching logic. """ def __init__(self, func, cache, scoped=True, typed=False, key=None): self.func = func if callable(cache): self.get_cache = cache elif not (callable(getattr(cache, '__getitem__', None)) and callable(getattr(cache, '__setitem__', None))): raise TypeError('expected cache to be a dict-like object,' ' or callable returning a dict-like object, not %r' % cache) else: def _get_cache(): return cache self.get_cache = _get_cache self.scoped = scoped self.typed = typed self.key_func = key or make_cache_key def __call__(self, *args, **kwargs): cache = self.get_cache() key = self.key_func(args, kwargs, typed=self.typed) try: ret = cache[key] except KeyError: ret = cache[key] = self.func(*args, **kwargs) return ret def __repr__(self): cn = self.__class__.__name__ if self.typed or not self.scoped: return ("%s(func=%r, scoped=%r, typed=%r)" % (cn, self.func, self.scoped, self.typed)) return f"{cn}(func={self.func!r})" class CachedMethod: """Similar to :class:`CachedFunction`, this type is used by :func:`cachedmethod` to wrap methods in caching logic. """ def __init__(self, func, cache, scoped=True, typed=False, key=None): self.func = func self.__isabstractmethod__ = getattr(func, '__isabstractmethod__', False) if isinstance(cache, str): self.get_cache = attrgetter(cache) elif callable(cache): self.get_cache = cache elif not (callable(getattr(cache, '__getitem__', None)) and callable(getattr(cache, '__setitem__', None))): raise TypeError('expected cache to be an attribute name,' ' dict-like object, or callable returning' ' a dict-like object, not %r' % cache) else: def _get_cache(obj): return cache self.get_cache = _get_cache self.scoped = scoped self.typed = typed self.key_func = key or make_cache_key self.bound_to = None def __get__(self, obj, objtype=None): if obj is None: return self cls = self.__class__ ret = cls(self.func, self.get_cache, typed=self.typed, scoped=self.scoped, key=self.key_func) ret.bound_to = obj return ret def __call__(self, *args, **kwargs): obj = args[0] if self.bound_to is None else self.bound_to cache = self.get_cache(obj) key_args = (self.bound_to, self.func) + args if self.scoped else args key = self.key_func(key_args, kwargs, typed=self.typed) try: ret = cache[key] except KeyError: if self.bound_to is not None: args = (self.bound_to,) + args ret = cache[key] = self.func(*args, **kwargs) return ret def __repr__(self): cn = self.__class__.__name__ args = (cn, self.func, self.scoped, self.typed) if self.bound_to is not None: args += (self.bound_to,) return ('<%s func=%r scoped=%r typed=%r bound_to=%r>' % args) return ("%s(func=%r, scoped=%r, typed=%r)" % args) def cached(cache, scoped=True, typed=False, key=None): """Cache any function with the cache object of your choosing. Note that the function wrapped should take only `hashable`_ arguments. Args: cache (Mapping): Any :class:`dict`-like object suitable for use as a cache. Instances of the :class:`LRU` and :class:`LRI` are good choices, but a plain :class:`dict` can work in some cases, as well. This argument can also be a callable which accepts no arguments and returns a mapping. scoped (bool): Whether the function itself is part of the cache key. ``True`` by default, different functions will not read one another's cache entries, but can evict one another's results. ``False`` can be useful for certain shared cache use cases. More advanced behavior can be produced through the *key* argument. typed (bool): Whether to factor argument types into the cache check. Default ``False``, setting to ``True`` causes the cache keys for ``3`` and ``3.0`` to be considered unequal. >>> my_cache = LRU() >>> @cached(my_cache) ... def cached_lower(x): ... return x.lower() ... >>> cached_lower("CaChInG's FuN AgAiN!") "caching's fun again!" >>> len(my_cache) 1 .. _hashable: https://docs.python.org/2/glossary.html#term-hashable """ def cached_func_decorator(func): return CachedFunction(func, cache, scoped=scoped, typed=typed, key=key) return cached_func_decorator def cachedmethod(cache, scoped=True, typed=False, key=None): """Similar to :func:`cached`, ``cachedmethod`` is used to cache methods based on their arguments, using any :class:`dict`-like *cache* object. Args: cache (str/Mapping/callable): Can be the name of an attribute on the instance, any Mapping/:class:`dict`-like object, or a callable which returns a Mapping. scoped (bool): Whether the method itself and the object it is bound to are part of the cache keys. ``True`` by default, different methods will not read one another's cache results. ``False`` can be useful for certain shared cache use cases. More advanced behavior can be produced through the *key* arguments. typed (bool): Whether to factor argument types into the cache check. Default ``False``, setting to ``True`` causes the cache keys for ``3`` and ``3.0`` to be considered unequal. key (callable): A callable with a signature that matches :func:`make_cache_key` that returns a tuple of hashable values to be used as the key in the cache. >>> class Lowerer(object): ... def __init__(self): ... self.cache = LRI() ... ... @cachedmethod('cache') ... def lower(self, text): ... return text.lower() ... >>> lowerer = Lowerer() >>> lowerer.lower('WOW WHO COULD GUESS CACHING COULD BE SO NEAT') 'wow who could guess caching could be so neat' >>> len(lowerer.cache) 1 """ def cached_method_decorator(func): return CachedMethod(func, cache, scoped=scoped, typed=typed, key=key) return cached_method_decorator class cachedproperty: """The ``cachedproperty`` is used similar to :class:`property`, except that the wrapped method is only called once. This is commonly used to implement lazy attributes. After the property has been accessed, the value is stored on the instance itself, using the same name as the cachedproperty. This allows the cache to be cleared with :func:`delattr`, or through manipulating the object's ``__dict__``. """ def __init__(self, func): self.__doc__ = getattr(func, '__doc__') self.__isabstractmethod__ = getattr(func, '__isabstractmethod__', False) self.func = func def __get__(self, obj, objtype=None): if obj is None: return self value = obj.__dict__[self.func.__name__] = self.func(obj) return value def __repr__(self): cn = self.__class__.__name__ return f'<{cn} func={self.func}>' class ThresholdCounter: """A **bounded** dict-like Mapping from keys to counts. The ThresholdCounter automatically compacts after every (1 / *threshold*) additions, maintaining exact counts for any keys whose count represents at least a *threshold* ratio of the total data. In other words, if a particular key is not present in the ThresholdCounter, its count represents less than *threshold* of the total data. >>> tc = ThresholdCounter(threshold=0.1) >>> tc.add(1) >>> tc.items() [(1, 1)] >>> tc.update([2] * 10) >>> tc.get(1) 0 >>> tc.add(5) >>> 5 in tc True >>> len(list(tc.elements())) 11 As you can see above, the API is kept similar to :class:`collections.Counter`. The most notable feature omissions being that counted items cannot be set directly, uncounted, or removed, as this would disrupt the math. Use the ThresholdCounter when you need best-effort long-lived counts for dynamically-keyed data. Without a bounded datastructure such as this one, the dynamic keys often represent a memory leak and can impact application reliability. The ThresholdCounter's item replacement strategy is fully deterministic and can be thought of as *Amortized Least Relevant*. The absolute upper bound of keys it will store is *(2/threshold)*, but realistically *(1/threshold)* is expected for uniformly random datastreams, and one or two orders of magnitude better for real-world data. This algorithm is an implementation of the Lossy Counting algorithm described in "Approximate Frequency Counts over Data Streams" by Manku & Motwani. Hat tip to Kurt Rose for discovery and initial implementation. """ # TODO: hit_count/miss_count? def __init__(self, threshold=0.001): if not 0 < threshold < 1: raise ValueError('expected threshold between 0 and 1, not: %r' % threshold) self.total = 0 self._count_map = {} self._threshold = threshold self._thresh_count = int(1 / threshold) self._cur_bucket = 1 @property def threshold(self): return self._threshold def add(self, key): """Increment the count of *key* by 1, automatically adding it if it does not exist. Cache compaction is triggered every *1/threshold* additions. """ self.total += 1 try: self._count_map[key][0] += 1 except KeyError: self._count_map[key] = [1, self._cur_bucket - 1] if self.total % self._thresh_count == 0: self._count_map = {k: v for k, v in self._count_map.items() if sum(v) > self._cur_bucket} self._cur_bucket += 1 return def elements(self): """Return an iterator of all the common elements tracked by the counter. Yields each key as many times as it has been seen. """ repeaters = itertools.starmap(itertools.repeat, self.iteritems()) return itertools.chain.from_iterable(repeaters) def most_common(self, n=None): """Get the top *n* keys and counts as tuples. If *n* is omitted, returns all the pairs. """ if not n or n <= 0: return [] ret = sorted(self.iteritems(), key=lambda x: x[1], reverse=True) if n is None or n >= len(ret): return ret return ret[:n] def get_common_count(self): """Get the sum of counts for keys exceeding the configured data threshold. """ return sum([count for count, _ in self._count_map.values()]) def get_uncommon_count(self): """Get the sum of counts for keys that were culled because the associated counts represented less than the configured threshold. The long-tail counts. """ return self.total - self.get_common_count() def get_commonality(self): """Get a float representation of the effective count accuracy. The higher the number, the less uniform the keys being added, and the higher accuracy and efficiency of the ThresholdCounter. If a stronger measure of data cardinality is required, consider using hyperloglog. """ return float(self.get_common_count()) / self.total def __getitem__(self, key): return self._count_map[key][0] def __len__(self): return len(self._count_map) def __contains__(self, key): return key in self._count_map def iterkeys(self): return iter(self._count_map) def keys(self): return list(self.iterkeys()) def itervalues(self): count_map = self._count_map for k in count_map: yield count_map[k][0] def values(self): return list(self.itervalues()) def iteritems(self): count_map = self._count_map for k in count_map: yield (k, count_map[k][0]) def items(self): return list(self.iteritems()) def get(self, key, default=0): "Get count for *key*, defaulting to 0." try: return self[key] except KeyError: return default def update(self, iterable, **kwargs): """Like dict.update() but add counts instead of replacing them, used to add multiple items in one call. Source can be an iterable of keys to add, or a mapping of keys to integer counts. """ if iterable is not None: if callable(getattr(iterable, 'iteritems', None)): for key, count in iterable.iteritems(): for i in range(count): self.add(key) else: for key in iterable: self.add(key) if kwargs: self.update(kwargs) class MinIDMap: """ Assigns arbitrary weakref-able objects the smallest possible unique integer IDs, such that no two objects have the same ID at the same time. Maps arbitrary hashable objects to IDs. Based on https://gist.github.com/kurtbrose/25b48114de216a5e55df """ def __init__(self): self.mapping = weakref.WeakKeyDictionary() self.ref_map = {} self.free = [] def get(self, a): try: return self.mapping[a][0] # if object is mapped, return ID except KeyError: pass if self.free: # if there are any free IDs, use the smallest nxt = heapq.heappop(self.free) else: # if there are no free numbers, use the next highest ID nxt = len(self.mapping) ref = weakref.ref(a, self._clean) self.mapping[a] = (nxt, ref) self.ref_map[ref] = nxt return nxt def drop(self, a): freed, ref = self.mapping[a] del self.mapping[a] del self.ref_map[ref] heapq.heappush(self.free, freed) def _clean(self, ref): print(self.ref_map[ref]) heapq.heappush(self.free, self.ref_map[ref]) del self.ref_map[ref] def __contains__(self, a): return a in self.mapping def __iter__(self): return iter(self.mapping) def __len__(self): return self.mapping.__len__() def iteritems(self): return iter((k, self.mapping[k][0]) for k in iter(self.mapping)) # end cacheutils.py boltons-25.0.0/boltons/debugutils.py000066400000000000000000000241201475005545200174650ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ A small set of utilities useful for debugging misbehaving applications. Currently this focuses on ways to use :mod:`pdb`, the built-in Python debugger. """ import sys import time from reprlib import Repr try: from .typeutils import make_sentinel _UNSET = make_sentinel(var_name='_UNSET') except ImportError: _UNSET = object() __all__ = ['pdb_on_signal', 'pdb_on_exception', 'wrap_trace'] def pdb_on_signal(signalnum=None): """Installs a signal handler for *signalnum*, which defaults to ``SIGINT``, or keyboard interrupt/ctrl-c. This signal handler launches a :mod:`pdb` breakpoint. Results vary in concurrent systems, but this technique can be useful for debugging infinite loops, or easily getting into deep call stacks. Args: signalnum (int): The signal number of the signal to handle with pdb. Defaults to :mod:`signal.SIGINT`, see :mod:`signal` for more information. """ import pdb import signal if not signalnum: signalnum = signal.SIGINT old_handler = signal.getsignal(signalnum) def pdb_int_handler(sig, frame): signal.signal(signalnum, old_handler) pdb.set_trace() pdb_on_signal(signalnum) # use 'u' to find your code and 'h' for help signal.signal(signalnum, pdb_int_handler) return def pdb_on_exception(limit=100): """Installs a handler which, instead of exiting, attaches a post-mortem pdb console whenever an unhandled exception is encountered. Args: limit (int): the max number of stack frames to display when printing the traceback A similar effect can be achieved from the command-line using the following command:: python -m pdb your_code.py But ``pdb_on_exception`` allows you to do this conditionally and within your application. To restore default behavior, just do:: sys.excepthook = sys.__excepthook__ """ import pdb import sys import traceback def pdb_excepthook(exc_type, exc_val, exc_tb): traceback.print_tb(exc_tb, limit=limit) pdb.post_mortem(exc_tb) sys.excepthook = pdb_excepthook return _repr_obj = Repr() _repr_obj.maxstring = 50 _repr_obj.maxother = 50 brief_repr = _repr_obj.repr # events: call, return, get, set, del, raise def trace_print_hook(event, label, obj, attr_name, args=(), kwargs={}, result=_UNSET): fargs = (event.ljust(6), time.time(), label.rjust(10), obj.__class__.__name__, attr_name) if event == 'get': tmpl = '%s %s - %s - %s.%s -> %s' fargs += (brief_repr(result),) elif event == 'set': tmpl = '%s %s - %s - %s.%s = %s' fargs += (brief_repr(args[0]),) elif event == 'del': tmpl = '%s %s - %s - %s.%s' else: # call/return/raise tmpl = '%s %s - %s - %s.%s(%s)' fargs += (', '.join([brief_repr(a) for a in args]),) if kwargs: tmpl = '%s %s - %s - %s.%s(%s, %s)' fargs += (', '.join([f'{k}={brief_repr(v)}' for k, v in kwargs.items()]),) if result is not _UNSET: tmpl += ' -> %s' fargs += (brief_repr(result),) print(tmpl % fargs) return def wrap_trace(obj, hook=trace_print_hook, which=None, events=None, label=None): """Monitor an object for interactions. Whenever code calls a method, gets an attribute, or sets an attribute, an event is called. By default the trace output is printed, but a custom tracing *hook* can be passed. Args: obj (object): New- or old-style object to be traced. Built-in objects like lists and dicts also supported. hook (callable): A function called once for every event. See below for details. which (str): One or more attribute names to trace, or a function accepting attribute name and value, and returning True/False. events (str): One or more kinds of events to call *hook* on. Expected values are ``['get', 'set', 'del', 'call', 'raise', 'return']``. Defaults to all events. label (str): A name to associate with the traced object Defaults to hexadecimal memory address, similar to repr. The object returned is not the same object as the one passed in. It will not pass identity checks. However, it will pass :func:`isinstance` checks, as it is a new instance of a new subtype of the object passed. """ # other actions: pdb.set_trace, print, aggregate, aggregate_return # (like aggregate but with the return value) # TODO: test classmethod/staticmethod/property # TODO: wrap __dict__ for old-style classes? if isinstance(which, str): which_func = lambda attr_name, attr_val: attr_name == which elif callable(getattr(which, '__contains__', None)): which_func = lambda attr_name, attr_val: attr_name in which elif which is None or callable(which): which_func = which else: raise TypeError('expected attr name(s) or callable, not: %r' % which) label = label or hex(id(obj)) if isinstance(events, str): events = [events] do_get = not events or 'get' in events do_set = not events or 'set' in events do_del = not events or 'del' in events do_call = not events or 'call' in events do_raise = not events or 'raise' in events do_return = not events or 'return' in events def wrap_method(attr_name, func, _hook=hook, _label=label): def wrapped(*a, **kw): a = a[1:] if do_call: hook(event='call', label=_label, obj=obj, attr_name=attr_name, args=a, kwargs=kw) if do_raise: try: ret = func(*a, **kw) except Exception: if not hook(event='raise', label=_label, obj=obj, attr_name=attr_name, args=a, kwargs=kw, result=sys.exc_info()): raise else: ret = func(*a, **kw) if do_return: hook(event='return', label=_label, obj=obj, attr_name=attr_name, args=a, kwargs=kw, result=ret) return ret wrapped.__name__ = func.__name__ wrapped.__doc__ = func.__doc__ try: wrapped.__module__ = func.__module__ except Exception: pass try: if func.__dict__: wrapped.__dict__.update(func.__dict__) except Exception: pass return wrapped def __getattribute__(self, attr_name): ret = type(obj).__getattribute__(obj, attr_name) if callable(ret): # wrap any bound methods ret = type(obj).__getattribute__(self, attr_name) if do_get: hook('get', label, obj, attr_name, (), {}, result=ret) return ret def __setattr__(self, attr_name, value): type(obj).__setattr__(obj, attr_name, value) if do_set: hook('set', label, obj, attr_name, (value,), {}) return def __delattr__(self, attr_name): type(obj).__delattr__(obj, attr_name) if do_del: hook('del', label, obj, attr_name, (), {}) return attrs = {} for attr_name in dir(obj): try: attr_val = getattr(obj, attr_name) except Exception: continue if not callable(attr_val) or attr_name in ('__new__',): continue elif which_func and not which_func(attr_name, attr_val): continue if attr_name == '__getattribute__': wrapped_method = __getattribute__ elif attr_name == '__setattr__': wrapped_method = __setattr__ elif attr_name == '__delattr__': wrapped_method = __delattr__ else: wrapped_method = wrap_method(attr_name, attr_val) attrs[attr_name] = wrapped_method cls_name = obj.__class__.__name__ if cls_name == cls_name.lower(): type_name = 'traced_' + cls_name else: type_name = 'Traced' + cls_name if hasattr(obj, '__mro__'): bases = (obj.__class__,) else: # need new-style class for even basic wrapping of callables to # work. getattribute won't work for old-style classes of course. bases = (obj.__class__, object) trace_type = type(type_name, bases, attrs) for cls in trace_type.__mro__: try: return cls.__new__(trace_type) except Exception: pass raise TypeError('unable to wrap_trace %r instance %r' % (obj.__class__, obj)) boltons-25.0.0/boltons/deprutils.py000066400000000000000000000046561475005545200173450ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import sys from types import ModuleType from warnings import warn # todo: only warn once class DeprecatableModule(ModuleType): def __init__(self, module): name = module.__name__ super().__init__(name=name) self.__dict__.update(module.__dict__) def __getattribute__(self, name): get_attribute = super().__getattribute__ try: depros = get_attribute('_deprecated_members') except AttributeError: self._deprecated_members = depros = {} ret = get_attribute(name) message = depros.get(name) if message is not None: warn(message, DeprecationWarning, stacklevel=2) return ret def deprecate_module_member(mod_name, name, message): module = sys.modules[mod_name] if not isinstance(module, DeprecatableModule): sys.modules[mod_name] = module = DeprecatableModule(module) module._deprecated_members[name] = message return boltons-25.0.0/boltons/dictutils.py000066400000000000000000001114121475005545200173230ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Python has a very powerful mapping type at its core: the :class:`dict` type. While versatile and featureful, the :class:`dict` prioritizes simplicity and performance. As a result, it does not retain the order of item insertion [1]_, nor does it store multiple values per key. It is a fast, unordered 1:1 mapping. The :class:`OrderedMultiDict` contrasts to the built-in :class:`dict`, as a relatively maximalist, ordered 1:n subtype of :class:`dict`. Virtually every feature of :class:`dict` has been retooled to be intuitive in the face of this added complexity. Additional methods have been added, such as :class:`collections.Counter`-like functionality. A prime advantage of the :class:`OrderedMultiDict` (OMD) is its non-destructive nature. Data can be added to an :class:`OMD` without being rearranged or overwritten. The property can allow the developer to work more freely with the data, as well as make more assumptions about where input data will end up in the output, all without any extra work. One great example of this is the :meth:`OMD.inverted()` method, which returns a new OMD with the values as keys and the keys as values. All the data and the respective order is still represented in the inverted form, all from an operation which would be outright wrong and reckless with a built-in :class:`dict` or :class:`collections.OrderedDict`. The OMD has been performance tuned to be suitable for a wide range of usages, including as a basic unordered MultiDict. Special thanks to `Mark Williams`_ for all his help. .. [1] As of 2015, `basic dicts on PyPy are ordered `_, and as of December 2017, `basic dicts in CPython 3 are now ordered `_, as well. .. _Mark Williams: https://github.com/markrwilliams """ from collections.abc import KeysView, ValuesView, ItemsView from itertools import zip_longest try: from .typeutils import make_sentinel _MISSING = make_sentinel(var_name='_MISSING') except ImportError: _MISSING = object() PREV, NEXT, KEY, VALUE, SPREV, SNEXT = range(6) __all__ = ['MultiDict', 'OMD', 'OrderedMultiDict', 'OneToOne', 'ManyToMany', 'subdict', 'FrozenDict'] class OrderedMultiDict(dict): """A MultiDict is a dictionary that can have multiple values per key and the OrderedMultiDict (OMD) is a MultiDict that retains original insertion order. Common use cases include: * handling query strings parsed from URLs * inverting a dictionary to create a reverse index (values to keys) * stacking data from multiple dictionaries in a non-destructive way The OrderedMultiDict constructor is identical to the built-in :class:`dict`, and overall the API constitutes an intuitive superset of the built-in type: >>> omd = OrderedMultiDict() >>> omd['a'] = 1 >>> omd['b'] = 2 >>> omd.add('a', 3) >>> omd.get('a') 3 >>> omd.getlist('a') [1, 3] Some non-:class:`dict`-like behaviors also make an appearance, such as support for :func:`reversed`: >>> list(reversed(omd)) ['b', 'a'] Note that unlike some other MultiDicts, this OMD gives precedence to the most recent value added. ``omd['a']`` refers to ``3``, not ``1``. >>> omd OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]) >>> omd.poplast('a') 3 >>> omd OrderedMultiDict([('a', 1), ('b', 2)]) >>> omd.pop('a') 1 >>> omd OrderedMultiDict([('b', 2)]) If you want a safe-to-modify or flat dictionary, use :meth:`OrderedMultiDict.todict()`. >>> from pprint import pprint as pp # preserve printed ordering >>> omd = OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]) >>> pp(omd.todict()) {'a': 3, 'b': 2} >>> pp(omd.todict(multi=True)) {'a': [1, 3], 'b': [2]} With ``multi=False``, items appear with the keys in to original insertion order, alongside the most-recently inserted value for that key. >>> OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]).items(multi=False) [('a', 3), ('b', 2)] .. warning:: ``dict(omd)`` changed behavior `in Python 3.7 `_ due to changes made to support the transition from :class:`collections.OrderedDict` to the built-in dictionary being ordered. Before 3.7, the result would be a new dictionary, with values that were lists, similar to ``omd.todict(multi=True)`` (but only shallow-copy; the lists were direct references to OMD internal structures). From 3.7 onward, the values became singular, like ``omd.todict(multi=False)``. For reliable cross-version behavior, just use :meth:`~OrderedMultiDict.todict()`. """ def __new__(cls, *a, **kw): ret = super().__new__(cls) ret._clear_ll() return ret def __init__(self, *args, **kwargs): if len(args) > 1: raise TypeError('%s expected at most 1 argument, got %s' % (self.__class__.__name__, len(args))) super().__init__() if args: self.update_extend(args[0]) if kwargs: self.update(kwargs) def __getstate__(self): return list(self.iteritems(multi=True)) def __setstate__(self, state): self.clear() self.update_extend(state) def _clear_ll(self): try: _map = self._map except AttributeError: _map = self._map = {} self.root = [] _map.clear() self.root[:] = [self.root, self.root, None] def _insert(self, k, v): root = self.root cells = self._map.setdefault(k, []) last = root[PREV] cell = [last, root, k, v] last[NEXT] = root[PREV] = cell cells.append(cell) def add(self, k, v): """Add a single value *v* under a key *k*. Existing values under *k* are preserved. """ values = super().setdefault(k, []) self._insert(k, v) values.append(v) def addlist(self, k, v): """Add an iterable of values underneath a specific key, preserving any values already under that key. >>> omd = OrderedMultiDict([('a', -1)]) >>> omd.addlist('a', range(3)) >>> omd OrderedMultiDict([('a', -1), ('a', 0), ('a', 1), ('a', 2)]) Called ``addlist`` for consistency with :meth:`getlist`, but tuples and other sequences and iterables work. """ if not v: return self_insert = self._insert values = super().setdefault(k, []) for subv in v: self_insert(k, subv) values.extend(v) def get(self, k, default=None): """Return the value for key *k* if present in the dictionary, else *default*. If *default* is not given, ``None`` is returned. This method never raises a :exc:`KeyError`. To get all values under a key, use :meth:`OrderedMultiDict.getlist`. """ return super().get(k, [default])[-1] def getlist(self, k, default=_MISSING): """Get all values for key *k* as a list, if *k* is in the dictionary, else *default*. The list returned is a copy and can be safely mutated. If *default* is not given, an empty :class:`list` is returned. """ try: return super().__getitem__(k)[:] except KeyError: if default is _MISSING: return [] return default def clear(self): "Empty the dictionary." super().clear() self._clear_ll() def setdefault(self, k, default=_MISSING): """If key *k* is in the dictionary, return its value. If not, insert *k* with a value of *default* and return *default*. *default* defaults to ``None``. See :meth:`dict.setdefault` for more information. """ if not super().__contains__(k): self[k] = None if default is _MISSING else default return self[k] def copy(self): "Return a shallow copy of the dictionary." return self.__class__(self.iteritems(multi=True)) @classmethod def fromkeys(cls, keys, default=None): """Create a dictionary from a list of keys, with all the values set to *default*, or ``None`` if *default* is not set. """ return cls([(k, default) for k in keys]) def update(self, E, **F): """Add items from a dictionary or iterable (and/or keyword arguments), overwriting values under an existing key. See :meth:`dict.update` for more details. """ # E and F are throwback names to the dict() __doc__ if E is self: return self_add = self.add if isinstance(E, OrderedMultiDict): for k in E: if k in self: del self[k] for k, v in E.iteritems(multi=True): self_add(k, v) elif callable(getattr(E, 'keys', None)): for k in E.keys(): self[k] = E[k] else: seen = set() seen_add = seen.add for k, v in E: if k not in seen and k in self: del self[k] seen_add(k) self_add(k, v) for k in F: self[k] = F[k] return def update_extend(self, E, **F): """Add items from a dictionary, iterable, and/or keyword arguments without overwriting existing items present in the dictionary. Like :meth:`update`, but adds to existing keys instead of overwriting them. """ if E is self: iterator = iter(E.items()) elif isinstance(E, OrderedMultiDict): iterator = E.iteritems(multi=True) elif hasattr(E, 'keys'): iterator = ((k, E[k]) for k in E.keys()) else: iterator = E self_add = self.add for k, v in iterator: self_add(k, v) def __setitem__(self, k, v): if super().__contains__(k): self._remove_all(k) self._insert(k, v) super().__setitem__(k, [v]) def __getitem__(self, k): return super().__getitem__(k)[-1] def __delitem__(self, k): super().__delitem__(k) self._remove_all(k) def __eq__(self, other): if self is other: return True try: if len(other) != len(self): return False except TypeError: return False if isinstance(other, OrderedMultiDict): selfi = self.iteritems(multi=True) otheri = other.iteritems(multi=True) zipped_items = zip_longest(selfi, otheri, fillvalue=(None, None)) for (selfk, selfv), (otherk, otherv) in zipped_items: if selfk != otherk or selfv != otherv: return False if not(next(selfi, _MISSING) is _MISSING and next(otheri, _MISSING) is _MISSING): # leftovers (TODO: watch for StopIteration?) return False return True elif hasattr(other, 'keys'): for selfk in self: try: other[selfk] == self[selfk] except KeyError: return False return True return False def __ne__(self, other): return not (self == other) def __ior__(self, other): self.update(other) return self def pop(self, k, default=_MISSING): """Remove all values under key *k*, returning the most-recently inserted value. Raises :exc:`KeyError` if the key is not present and no *default* is provided. """ try: return self.popall(k)[-1] except KeyError: if default is _MISSING: raise KeyError(k) return default def popall(self, k, default=_MISSING): """Remove all values under key *k*, returning them in the form of a list. Raises :exc:`KeyError` if the key is not present and no *default* is provided. """ super_self = super() if super_self.__contains__(k): self._remove_all(k) if default is _MISSING: return super_self.pop(k) return super_self.pop(k, default) def poplast(self, k=_MISSING, default=_MISSING): """Remove and return the most-recently inserted value under the key *k*, or the most-recently inserted key if *k* is not provided. If no values remain under *k*, it will be removed from the OMD. Raises :exc:`KeyError` if *k* is not present in the dictionary, or the dictionary is empty. """ if k is _MISSING: if self: k = self.root[PREV][KEY] else: if default is _MISSING: raise KeyError('empty %r' % type(self)) return default try: self._remove(k) except KeyError: if default is _MISSING: raise KeyError(k) return default values = super().__getitem__(k) v = values.pop() if not values: super().__delitem__(k) return v def _remove(self, k): values = self._map[k] cell = values.pop() cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV] if not values: del self._map[k] def _remove_all(self, k): values = self._map[k] while values: cell = values.pop() cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV] del self._map[k] def iteritems(self, multi=False): """Iterate over the OMD's items in insertion order. By default, yields only the most-recently inserted value for each key. Set *multi* to ``True`` to get all inserted items. """ root = self.root curr = root[NEXT] if multi: while curr is not root: yield curr[KEY], curr[VALUE] curr = curr[NEXT] else: for key in self.iterkeys(): yield key, self[key] def iterkeys(self, multi=False): """Iterate over the OMD's keys in insertion order. By default, yields each key once, according to the most recent insertion. Set *multi* to ``True`` to get all keys, including duplicates, in insertion order. """ root = self.root curr = root[NEXT] if multi: while curr is not root: yield curr[KEY] curr = curr[NEXT] else: yielded = set() yielded_add = yielded.add while curr is not root: k = curr[KEY] if k not in yielded: yielded_add(k) yield k curr = curr[NEXT] def itervalues(self, multi=False): """Iterate over the OMD's values in insertion order. By default, yields the most-recently inserted value per unique key. Set *multi* to ``True`` to get all values according to insertion order. """ for k, v in self.iteritems(multi=multi): yield v def todict(self, multi=False): """Gets a basic :class:`dict` of the items in this dictionary. Keys are the same as the OMD, values are the most recently inserted values for each key. Setting the *multi* arg to ``True`` is yields the same result as calling :class:`dict` on the OMD, except that all the value lists are copies that can be safely mutated. """ if multi: return {k: self.getlist(k) for k in self} return {k: self[k] for k in self} def sorted(self, key=None, reverse=False): """Similar to the built-in :func:`sorted`, except this method returns a new :class:`OrderedMultiDict` sorted by the provided key function, optionally reversed. Args: key (callable): A callable to determine the sort key of each element. The callable should expect an **item** (key-value pair tuple). reverse (bool): Set to ``True`` to reverse the ordering. >>> omd = OrderedMultiDict(zip(range(3), range(3))) >>> omd.sorted(reverse=True) OrderedMultiDict([(2, 2), (1, 1), (0, 0)]) Note that the key function receives an **item** (key-value tuple), so the recommended signature looks like: >>> omd = OrderedMultiDict(zip('hello', 'world')) >>> omd.sorted(key=lambda i: i[1]) # i[0] is the key, i[1] is the val OrderedMultiDict([('o', 'd'), ('l', 'l'), ('e', 'o'), ('l', 'r'), ('h', 'w')]) """ cls = self.__class__ return cls(sorted(self.iteritems(multi=True), key=key, reverse=reverse)) def sortedvalues(self, key=None, reverse=False): """Returns a copy of the :class:`OrderedMultiDict` with the same keys in the same order as the original OMD, but the values within each keyspace have been sorted according to *key* and *reverse*. Args: key (callable): A single-argument callable to determine the sort key of each element. The callable should expect an **item** (key-value pair tuple). reverse (bool): Set to ``True`` to reverse the ordering. >>> omd = OrderedMultiDict() >>> omd.addlist('even', [6, 2]) >>> omd.addlist('odd', [1, 5]) >>> omd.add('even', 4) >>> omd.add('odd', 3) >>> somd = omd.sortedvalues() >>> somd.getlist('even') [2, 4, 6] >>> somd.keys(multi=True) == omd.keys(multi=True) True >>> omd == somd False >>> somd OrderedMultiDict([('even', 2), ('even', 4), ('odd', 1), ('odd', 3), ('even', 6), ('odd', 5)]) As demonstrated above, contents and key order are retained. Only value order changes. """ try: superself_iteritems = super().iteritems() except AttributeError: superself_iteritems = super().items() # (not reverse) because they pop off in reverse order for reinsertion sorted_val_map = {k: sorted(v, key=key, reverse=(not reverse)) for k, v in superself_iteritems} ret = self.__class__() for k in self.iterkeys(multi=True): ret.add(k, sorted_val_map[k].pop()) return ret def inverted(self): """Returns a new :class:`OrderedMultiDict` with values and keys swapped, like creating dictionary transposition or reverse index. Insertion order is retained and all keys and values are represented in the output. >>> omd = OMD([(0, 2), (1, 2)]) >>> omd.inverted().getlist(2) [0, 1] Inverting twice yields a copy of the original: >>> omd.inverted().inverted() OrderedMultiDict([(0, 2), (1, 2)]) """ return self.__class__((v, k) for k, v in self.iteritems(multi=True)) def counts(self): """Returns a mapping from key to number of values inserted under that key. Like :py:class:`collections.Counter`, but returns a new :class:`OrderedMultiDict`. """ # Returns an OMD because Counter/OrderedDict may not be # available, and neither Counter nor dict maintain order. super_getitem = super().__getitem__ return self.__class__((k, len(super_getitem(k))) for k in self) def keys(self, multi=False): """Returns a list containing the output of :meth:`iterkeys`. See that method's docs for more details. """ return list(self.iterkeys(multi=multi)) def values(self, multi=False): """Returns a list containing the output of :meth:`itervalues`. See that method's docs for more details. """ return list(self.itervalues(multi=multi)) def items(self, multi=False): """Returns a list containing the output of :meth:`iteritems`. See that method's docs for more details. """ return list(self.iteritems(multi=multi)) def __iter__(self): return self.iterkeys() def __reversed__(self): root = self.root curr = root[PREV] lengths = {} lengths_sd = lengths.setdefault get_values = super().__getitem__ while curr is not root: k = curr[KEY] vals = get_values(k) if lengths_sd(k, 1) == len(vals): yield k lengths[k] += 1 curr = curr[PREV] def __repr__(self): cn = self.__class__.__name__ kvs = ', '.join([repr((k, v)) for k, v in self.iteritems(multi=True)]) return f'{cn}([{kvs}])' def viewkeys(self): "OMD.viewkeys() -> a set-like object providing a view on OMD's keys" return KeysView(self) def viewvalues(self): "OMD.viewvalues() -> an object providing a view on OMD's values" return ValuesView(self) def viewitems(self): "OMD.viewitems() -> a set-like object providing a view on OMD's items" return ItemsView(self) # A couple of convenient aliases OMD = OrderedMultiDict MultiDict = OrderedMultiDict class FastIterOrderedMultiDict(OrderedMultiDict): """An OrderedMultiDict backed by a skip list. Iteration over keys is faster and uses constant memory but adding duplicate key-value pairs is slower. Brainchild of Mark Williams. """ def _clear_ll(self): # TODO: always reset objects? (i.e., no else block below) try: _map = self._map except AttributeError: _map = self._map = {} self.root = [] _map.clear() self.root[:] = [self.root, self.root, None, None, self.root, self.root] def _insert(self, k, v): root = self.root empty = [] cells = self._map.setdefault(k, empty) last = root[PREV] if cells is empty: cell = [last, root, k, v, last, root] # was the last one skipped? if last[SPREV][SNEXT] is root: last[SPREV][SNEXT] = cell last[NEXT] = last[SNEXT] = root[PREV] = root[SPREV] = cell cells.append(cell) else: # if the previous was skipped, go back to the cell that # skipped it sprev = last[SPREV] if (last[SPREV][SNEXT] is not last) else last cell = [last, root, k, v, sprev, root] # skip me last[SNEXT] = root last[NEXT] = root[PREV] = root[SPREV] = cell cells.append(cell) def _remove(self, k): cells = self._map[k] cell = cells.pop() if not cells: del self._map[k] cell[PREV][SNEXT] = cell[SNEXT] if cell[PREV][SPREV][SNEXT] is cell: cell[PREV][SPREV][SNEXT] = cell[NEXT] elif cell[SNEXT] is cell[NEXT]: cell[SPREV][SNEXT], cell[SNEXT][SPREV] = cell[SNEXT], cell[SPREV] cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV] def _remove_all(self, k): cells = self._map.pop(k) while cells: cell = cells.pop() if cell[PREV][SPREV][SNEXT] is cell: cell[PREV][SPREV][SNEXT] = cell[NEXT] elif cell[SNEXT] is cell[NEXT]: cell[SPREV][SNEXT], cell[SNEXT][SPREV] = cell[SNEXT], cell[SPREV] cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV] cell[PREV][SNEXT] = cell[SNEXT] def iteritems(self, multi=False): next_link = NEXT if multi else SNEXT root = self.root curr = root[next_link] while curr is not root: yield curr[KEY], curr[VALUE] curr = curr[next_link] def iterkeys(self, multi=False): next_link = NEXT if multi else SNEXT root = self.root curr = root[next_link] while curr is not root: yield curr[KEY] curr = curr[next_link] def __reversed__(self): root = self.root curr = root[PREV] while curr is not root: if curr[SPREV][SNEXT] is not curr: curr = curr[SPREV] if curr is root: break yield curr[KEY] curr = curr[PREV] _OTO_INV_MARKER = object() _OTO_UNIQUE_MARKER = object() class OneToOne(dict): """Implements a one-to-one mapping dictionary. In addition to inheriting from and behaving exactly like the builtin :class:`dict`, all values are automatically added as keys on a reverse mapping, available as the `inv` attribute. This arrangement keeps key and value namespaces distinct. Basic operations are intuitive: >>> oto = OneToOne({'a': 1, 'b': 2}) >>> print(oto['a']) 1 >>> print(oto.inv[1]) a >>> len(oto) 2 Overwrites happens in both directions: >>> oto.inv[1] = 'c' >>> print(oto.get('a')) None >>> len(oto) 2 For a very similar project, with even more one-to-one functionality, check out `bidict `_. """ __slots__ = ('inv',) def __init__(self, *a, **kw): raise_on_dupe = False if a: if a[0] is _OTO_INV_MARKER: self.inv = a[1] dict.__init__(self, [(v, k) for k, v in self.inv.items()]) return elif a[0] is _OTO_UNIQUE_MARKER: a, raise_on_dupe = a[1:], True dict.__init__(self, *a, **kw) self.inv = self.__class__(_OTO_INV_MARKER, self) if len(self) == len(self.inv): # if lengths match, that means everything's unique return if not raise_on_dupe: dict.clear(self) dict.update(self, [(v, k) for k, v in self.inv.items()]) return # generate an error message if the values aren't 1:1 val_multidict = {} for k, v in self.items(): val_multidict.setdefault(v, []).append(k) dupes = {v: k_list for v, k_list in val_multidict.items() if len(k_list) > 1} raise ValueError('expected unique values, got multiple keys for' ' the following values: %r' % dupes) @classmethod def unique(cls, *a, **kw): """This alternate constructor for OneToOne will raise an exception when input values overlap. For instance: >>> OneToOne.unique({'a': 1, 'b': 1}) Traceback (most recent call last): ... ValueError: expected unique values, got multiple keys for the following values: ... This even works across inputs: >>> a_dict = {'a': 2} >>> OneToOne.unique(a_dict, b=2) Traceback (most recent call last): ... ValueError: expected unique values, got multiple keys for the following values: ... """ return cls(_OTO_UNIQUE_MARKER, *a, **kw) def __setitem__(self, key, val): hash(val) # ensure val is a valid key if key in self: dict.__delitem__(self.inv, self[key]) if val in self.inv: del self.inv[val] dict.__setitem__(self, key, val) dict.__setitem__(self.inv, val, key) def __delitem__(self, key): dict.__delitem__(self.inv, self[key]) dict.__delitem__(self, key) def clear(self): dict.clear(self) dict.clear(self.inv) def copy(self): return self.__class__(self) def pop(self, key, default=_MISSING): if key in self: dict.__delitem__(self.inv, self[key]) return dict.pop(self, key) if default is not _MISSING: return default raise KeyError() def popitem(self): key, val = dict.popitem(self) dict.__delitem__(self.inv, val) return key, val def setdefault(self, key, default=None): if key not in self: self[key] = default return self[key] def update(self, dict_or_iterable, **kw): keys_vals = [] if isinstance(dict_or_iterable, dict): for val in dict_or_iterable.values(): hash(val) keys_vals = list(dict_or_iterable.items()) else: for key, val in dict_or_iterable: hash(key) hash(val) keys_vals = list(dict_or_iterable) for val in kw.values(): hash(val) keys_vals.extend(kw.items()) for key, val in keys_vals: self[key] = val def __repr__(self): cn = self.__class__.__name__ dict_repr = dict.__repr__(self) return f"{cn}({dict_repr})" # marker for the secret handshake used internally to set up the invert ManyToMany _PAIRING = object() class ManyToMany: """ a dict-like entity that represents a many-to-many relationship between two groups of objects behaves like a dict-of-tuples; also has .inv which is kept up to date which is a dict-of-tuples in the other direction also, can be used as a directed graph among hashable python objects """ def __init__(self, items=None): self.data = {} if type(items) is tuple and items and items[0] is _PAIRING: self.inv = items[1] else: self.inv = self.__class__((_PAIRING, self)) if items: self.update(items) return def get(self, key, default=frozenset()): try: return self[key] except KeyError: return default def __getitem__(self, key): return frozenset(self.data[key]) def __setitem__(self, key, vals): vals = set(vals) if key in self: to_remove = self.data[key] - vals vals -= self.data[key] for val in to_remove: self.remove(key, val) for val in vals: self.add(key, val) def __delitem__(self, key): for val in self.data.pop(key): self.inv.data[val].remove(key) if not self.inv.data[val]: del self.inv.data[val] def update(self, iterable): """given an iterable of (key, val), add them all""" if type(iterable) is type(self): other = iterable for k in other.data: if k not in self.data: self.data[k] = other.data[k] else: self.data[k].update(other.data[k]) for k in other.inv.data: if k not in self.inv.data: self.inv.data[k] = other.inv.data[k] else: self.inv.data[k].update(other.inv.data[k]) elif callable(getattr(iterable, 'keys', None)): for k in iterable.keys(): self.add(k, iterable[k]) else: for key, val in iterable: self.add(key, val) return def add(self, key, val): if key not in self.data: self.data[key] = set() self.data[key].add(val) if val not in self.inv.data: self.inv.data[val] = set() self.inv.data[val].add(key) def remove(self, key, val): self.data[key].remove(val) if not self.data[key]: del self.data[key] self.inv.data[val].remove(key) if not self.inv.data[val]: del self.inv.data[val] def replace(self, key, newkey): """ replace instances of key by newkey """ if key not in self.data: return self.data[newkey] = fwdset = self.data.pop(key) for val in fwdset: revset = self.inv.data[val] revset.remove(key) revset.add(newkey) def iteritems(self): for key in self.data: for val in self.data[key]: yield key, val def keys(self): return self.data.keys() def __contains__(self, key): return key in self.data def __iter__(self): return self.data.__iter__() def __len__(self): return self.data.__len__() def __eq__(self, other): return type(self) == type(other) and self.data == other.data def __repr__(self): cn = self.__class__.__name__ return f'{cn}({list(self.iteritems())!r})' def subdict(d, keep=None, drop=None): """Compute the "subdictionary" of a dict, *d*. A subdict is to a dict what a subset is a to set. If *A* is a subdict of *B*, that means that all keys of *A* are present in *B*. Returns a new dict with any keys in *drop* removed, and any keys in *keep* still present, provided they were in the original dict. *keep* defaults to all keys, *drop* defaults to empty, so without one of these arguments, calling this function is equivalent to calling ``dict()``. >>> from pprint import pprint as pp >>> pp(subdict({'a': 1, 'b': 2})) {'a': 1, 'b': 2} >>> subdict({'a': 1, 'b': 2, 'c': 3}, drop=['b', 'c']) {'a': 1} >>> pp(subdict({'a': 1, 'b': 2, 'c': 3}, keep=['a', 'c'])) {'a': 1, 'c': 3} """ if keep is None: keep = d.keys() if drop is None: drop = [] keys = set(keep) - set(drop) return type(d)([(k, v) for k, v in d.items() if k in keys]) class FrozenHashError(TypeError): pass class FrozenDict(dict): """An immutable dict subtype that is hashable and can itself be used as a :class:`dict` key or :class:`set` entry. What :class:`frozenset` is to :class:`set`, FrozenDict is to :class:`dict`. There was once an attempt to introduce such a type to the standard library, but it was rejected: `PEP 416 `_. Because FrozenDict is a :class:`dict` subtype, it automatically works everywhere a dict would, including JSON serialization. """ __slots__ = ('_hash',) def updated(self, *a, **kw): """Make a copy and add items from a dictionary or iterable (and/or keyword arguments), overwriting values under an existing key. See :meth:`dict.update` for more details. """ data = dict(self) data.update(*a, **kw) return type(self)(data) @classmethod def fromkeys(cls, keys, value=None): # one of the lesser known and used/useful dict methods return cls(dict.fromkeys(keys, value)) def __repr__(self): cn = self.__class__.__name__ return f'{cn}({dict.__repr__(self)})' def __reduce_ex__(self, protocol): return type(self), (dict(self),) def __hash__(self): try: ret = self._hash except AttributeError: try: ret = self._hash = hash(frozenset(self.items())) except Exception as e: ret = self._hash = FrozenHashError(e) if ret.__class__ is FrozenHashError: raise ret return ret def __copy__(self): return self # immutable types don't copy, see tuple's behavior # block everything else def _raise_frozen_typeerror(self, *a, **kw): "raises a TypeError, because FrozenDicts are immutable" raise TypeError('%s object is immutable' % self.__class__.__name__) __ior__ = __setitem__ = __delitem__ = update = _raise_frozen_typeerror setdefault = pop = popitem = clear = _raise_frozen_typeerror del _raise_frozen_typeerror # end dictutils.py boltons-25.0.0/boltons/easterutils.py000066400000000000000000000034061475005545200176660ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. def gobs_program(): """ A pure-Python implementation of Gob's Algorithm (2006). A brief explanation can be found here: https://www.youtube.com/watch?v=JbnjusltDHk """ while True: print("Penus", end=" ") if __name__ == '__main__': gobs_program() boltons-25.0.0/boltons/ecoutils.py000066400000000000000000000333241475005545200171530ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """As a programming ecosystem grows, so do the chances of runtime variability. Python boasts one of the widest deployments for a high-level programming environment, making it a viable target for all manner of application. But with breadth comes variance, so it's important to know what you're working with. Some basic variations that are common among development machines: * **Executable runtime**: CPython, PyPy, Jython, etc., plus build date and compiler * **Language version**: 2.7 through 3.12 * **Host operating system**: Windows, OS X, Ubuntu, Debian, CentOS, RHEL, etc. * **Features**: 64-bit, IPv6, Unicode character support (UCS-2/UCS-4) * **Built-in library support**: OpenSSL, threading, SQLite, zlib * **User environment**: umask, ulimit, working directory path * **Machine info**: CPU count, hostname, filesystem encoding See the full example profile below for more. ecoutils was created to quantify that variability. ecoutils quickly produces an information-dense description of critical runtime factors, with minimal side effects. In short, ecoutils is like browser and user agent analytics, but for Python environments. Transmission and collection --------------------------- The data is all JSON serializable, and is suitable for sending to a central analytics server. An HTTP-backed service for this can be found at: https://github.com/mahmoud/espymetrics/ Notable omissions ----------------- Due to space constraints (and possibly latency constraints), the following information is deemed not dense enough, and thus omitted: * :data:`sys.path` * full :mod:`sysconfig` * environment variables (:data:`os.environ`) Compatibility ------------- So far ecoutils has has been tested on Python 3.7+ and PyPy3. Various versions have been tested on Ubuntu, Debian, RHEL, OS X, FreeBSD, and Windows 7. .. note:: ``boltons.ecoutils`` historically supported back to Python 2.4, but in 2024, due to increasing testing burden, ecoutils support tracks the same versions of Python as the rest of the boltons package. For older Pythons, see `this version`_ from boltons 23.0.0. .. _this version: https://github.com/mahmoud/boltons/blob/4b1d728f31a8378b193be9c966c853be0a57527d/boltons/ecoutils.py Profile generation ------------------ Profiles are generated by :func:`ecoutils.get_profile`. When run as a module, ecoutils will call :func:`~ecoutils.get_profile` and print a profile in JSON format:: $ python -m boltons.ecoutils { "_eco_version": "1.0.0", "cpu_count": 4, "cwd": "/home/mahmoud/projects/boltons", "fs_encoding": "UTF-8", "guid": "6b139e7bbf5ad4ed8d4063bf6235b4d2", "hostfqdn": "mahmoud-host", "hostname": "mahmoud-host", "linux_dist_name": "Ubuntu", "linux_dist_version": "14.04", "python": { "argv": "boltons/ecoutils.py", "bin": "/usr/bin/python", "build_date": "Jun 22 2015 17:58:13", "compiler": "GCC 4.8.2", "features": { "64bit": true, "expat": "expat_2.1.0", "ipv6": true, "openssl": "OpenSSL 1.0.1f 6 Jan 2014", "readline": true, "sqlite": "3.8.2", "threading": true, "tkinter": "8.6", "unicode_wide": true, "zlib": "1.2.8" }, "version": "2.7.6 (default, Jun 22 2015, 17:58:13) [GCC 4.8.2]", "version_info": [ 2, 7, 6, "final", 0 ] }, "time_utc": "2016-05-24 07:59:40.473140", "time_utc_offset": -8.0, "ulimit_hard": 4096, "ulimit_soft": 1024, "umask": "002", "uname": { "machine": "x86_64", "node": "mahmoud-host", "processor": "x86_64", "release": "3.13.0-85-generic", "system": "Linux", "version": "#129-Ubuntu SMP Thu Mar 17 20:50:15 UTC 2016" }, "username": "mahmoud" } ``pip install boltons`` and try it yourself! """ import re import os import sys import json import time import random import socket import struct import getpass import datetime import platform ECO_VERSION = '1.1.0' # see version history below try: getrandbits = random.SystemRandom().getrandbits HAVE_URANDOM = True except Exception: HAVE_URANDOM = False getrandbits = random.getrandbits # 128-bit GUID just like a UUID, but backwards compatible to 2.4 INSTANCE_ID = hex(getrandbits(128))[2:-1].lower() IS_64BIT = struct.calcsize("P") > 4 HAVE_UCS4 = getattr(sys, 'maxunicode', 0) > 65536 HAVE_READLINE = True try: import readline except Exception: HAVE_READLINE = False try: import sqlite3 SQLITE_VERSION = sqlite3.sqlite_version except Exception: # note: 2.5 and older have sqlite, but not sqlite3 SQLITE_VERSION = '' try: import ssl try: OPENSSL_VERSION = ssl.OPENSSL_VERSION except AttributeError: # This is a conservative estimate for Python <2.6 # SSL module added in 2006, when 0.9.7 was standard OPENSSL_VERSION = 'OpenSSL >0.8.0' except Exception: OPENSSL_VERSION = '' try: import tkinter TKINTER_VERSION = str(tkinter.TkVersion) except Exception: TKINTER_VERSION = '' try: import zlib ZLIB_VERSION = zlib.ZLIB_VERSION except Exception: ZLIB_VERSION = '' try: from xml.parsers import expat EXPAT_VERSION = expat.EXPAT_VERSION except Exception: EXPAT_VERSION = '' try: from multiprocessing import cpu_count CPU_COUNT = cpu_count() except Exception: CPU_COUNT = 0 try: import threading HAVE_THREADING = True except Exception: HAVE_THREADING = False try: HAVE_IPV6 = socket.has_ipv6 except Exception: HAVE_IPV6 = False try: from resource import getrlimit, RLIMIT_NOFILE RLIMIT_FDS_SOFT, RLIMIT_FDS_HARD = getrlimit(RLIMIT_NOFILE) except Exception: RLIMIT_FDS_SOFT, RLIMIT_FDS_HARD = 0, 0 START_TIME_INFO = {'time_utc': str(datetime.datetime.now(datetime.timezone.utc)), 'time_utc_offset': -time.timezone / 3600.0} def get_python_info(): ret = {} ret['argv'] = _escape_shell_args(sys.argv) ret['bin'] = sys.executable # Even though compiler/build_date are already here, they're # actually parsed from the version string. So, in the rare case of # the unparsable version string, we're still transmitting it. ret['version'] = ' '.join(sys.version.split()) ret['compiler'] = platform.python_compiler() ret['build_date'] = platform.python_build()[1] ret['version_info'] = list(sys.version_info) ret['features'] = {'openssl': OPENSSL_VERSION, 'expat': EXPAT_VERSION, 'sqlite': SQLITE_VERSION, 'tkinter': TKINTER_VERSION, 'zlib': ZLIB_VERSION, 'unicode_wide': HAVE_UCS4, 'readline': HAVE_READLINE, '64bit': IS_64BIT, 'ipv6': HAVE_IPV6, 'threading': HAVE_THREADING, 'urandom': HAVE_URANDOM} return ret def get_profile(**kwargs): """The main entrypoint to ecoutils. Calling this will return a JSON-serializable dictionary of information about the current process. It is very unlikely that the information returned will change during the lifetime of the process, and in most cases the majority of the information stays the same between runs as well. :func:`get_profile` takes one optional keyword argument, *scrub*, a :class:`bool` that, if True, blanks out identifiable information. This includes current working directory, hostname, Python executable path, command-line arguments, and username. Values are replaced with '-', but for compatibility keys remain in place. """ scrub = kwargs.pop('scrub', False) if kwargs: raise TypeError(f'unexpected keyword arguments: {kwargs.keys()!r}') ret = {} try: ret['username'] = getpass.getuser() except Exception: ret['username'] = '' ret['guid'] = str(INSTANCE_ID) ret['hostname'] = socket.gethostname() ret['hostfqdn'] = socket.getfqdn() uname = platform.uname() ret['uname'] = {'system': uname[0], 'node': uname[1], 'release': uname[2], # linux: distro name 'version': uname[3], # linux: kernel version 'machine': uname[4], 'processor': uname[5]} try: # TODO: removed in 3.7, replaced with freedesktop_os_release in 3.10 linux_dist = platform.linux_distribution() except Exception: linux_dist = ('', '', '') ret['linux_dist_name'] = linux_dist[0] ret['linux_dist_version'] = linux_dist[1] ret['cpu_count'] = CPU_COUNT ret['fs_encoding'] = sys.getfilesystemencoding() ret['ulimit_soft'] = RLIMIT_FDS_SOFT ret['ulimit_hard'] = RLIMIT_FDS_HARD ret['cwd'] = os.getcwd() ret['umask'] = oct(os.umask(os.umask(2))).rjust(3, '0') ret['python'] = get_python_info() ret.update(START_TIME_INFO) ret['_eco_version'] = ECO_VERSION if scrub: # mask identifiable information ret['cwd'] = '-' ret['hostname'] = '-' ret['hostfqdn'] = '-' ret['python']['bin'] = '-' ret['python']['argv'] = '-' ret['uname']['node'] = '-' ret['username'] = '-' return ret def dumps(val, indent): if indent: return json.dumps(val, sort_keys=True, indent=indent) return json.dumps(val, sort_keys=True) def get_profile_json(indent=False): if indent: indent = 2 else: indent = 0 data_dict = get_profile() return dumps(data_dict, indent) def main(): print(get_profile_json(indent=True)) ############################################# # The shell escaping copied in from strutils ############################################# def _escape_shell_args(args, sep=' ', style=None): if not style: if sys.platform == 'win32': style = 'cmd' else: style = 'sh' if style == 'sh': return _args2sh(args, sep=sep) elif style == 'cmd': return _args2cmd(args, sep=sep) raise ValueError("style expected one of 'cmd' or 'sh', not %r" % style) _find_sh_unsafe = re.compile(r'[^a-zA-Z0-9_@%+=:,./-]').search def _args2sh(args, sep=' '): # see strutils ret_list = [] for arg in args: if not arg: ret_list.append("''") continue if _find_sh_unsafe(arg) is None: ret_list.append(arg) continue # use single quotes, and put single quotes into double quotes # the string $'b is then quoted as '$'"'"'b' ret_list.append("'" + arg.replace("'", "'\"'\"'") + "'") return ' '.join(ret_list) def _args2cmd(args, sep=' '): # see strutils result = [] needquote = False for arg in args: bs_buf = [] # Add a space to separate this argument from the others if result: result.append(' ') needquote = (" " in arg) or ("\t" in arg) or not arg if needquote: result.append('"') for c in arg: if c == '\\': # Don't know if we need to double yet. bs_buf.append(c) elif c == '"': # Double backslashes. result.append('\\' * len(bs_buf)*2) bs_buf = [] result.append('\\"') else: # Normal char if bs_buf: result.extend(bs_buf) bs_buf = [] result.append(c) # Add remaining backslashes, if any. if bs_buf: result.extend(bs_buf) if needquote: result.extend(bs_buf) result.append('"') return ''.join(result) ############################ # End shell escaping code ############################ if __name__ == '__main__': main() """ ecoutils protocol version history --------------------------------- The version is ECO_VERSION module-level constant, and _eco_version key in the dictionary returned from ecoutils.get_profile(). 1.1.0 - (boltons version 24.0.0+) Drop Python <=3.6 compat 1.0.1 - (boltons version 16.3.2+) Remove uuid dependency and add HAVE_URANDOM 1.0.0 - (boltons version 16.3.0-16.3.1) Initial release """ boltons-25.0.0/boltons/excutils.py000066400000000000000000000214141475005545200171610ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import sys import traceback import linecache from collections import namedtuple # TODO: last arg or first arg? (last arg makes it harder to *args # into, but makes it more readable in the default exception # __repr__ output) # TODO: Multiexception wrapper __all__ = ['ExceptionCauseMixin'] class ExceptionCauseMixin(Exception): """ A mixin class for wrapping an exception in another exception, or otherwise indicating an exception was caused by another exception. This is most useful in concurrent or failure-intolerant scenarios, where just because one operation failed, doesn't mean the remainder should be aborted, or that it's the appropriate time to raise exceptions. This is still a work in progress, but an example use case at the bottom of this module. NOTE: when inheriting, you will probably want to put the ExceptionCauseMixin first. Builtin exceptions are not good about calling super() """ cause = None def __new__(cls, *args, **kw): cause = None if args and isinstance(args[0], Exception): cause, args = args[0], args[1:] ret = super().__new__(cls, *args, **kw) ret.cause = cause if cause is None: return ret root_cause = getattr(cause, 'root_cause', None) if root_cause is None: ret.root_cause = cause else: ret.root_cause = root_cause full_trace = getattr(cause, 'full_trace', None) if full_trace is not None: ret.full_trace = list(full_trace) ret._tb = list(cause._tb) ret._stack = list(cause._stack) return ret try: exc_type, exc_value, exc_tb = sys.exc_info() if exc_type is None and exc_value is None: return ret if cause is exc_value or root_cause is exc_value: # handles when cause is the current exception or when # there are multiple wraps while handling the original # exception, but a cause was never provided ret._tb = _extract_from_tb(exc_tb) ret._stack = _extract_from_frame(exc_tb.tb_frame) ret.full_trace = ret._stack[:-1] + ret._tb finally: del exc_tb return ret def get_str(self): """ Get formatted the formatted traceback and exception message. This function exists separately from __str__() because __str__() is somewhat specialized for the built-in traceback module's particular usage. """ ret = [] trace_str = self._get_trace_str() if trace_str: ret.extend(['Traceback (most recent call last):\n', trace_str]) ret.append(self._get_exc_str()) return ''.join(ret) def _get_message(self): args = getattr(self, 'args', []) if self.cause: args = args[1:] if args and args[0]: return args[0] return '' def _get_trace_str(self): if not self.cause: return super().__repr__() if self.full_trace: return ''.join(traceback.format_list(self.full_trace)) return '' def _get_exc_str(self, incl_name=True): cause_str = _format_exc(self.root_cause) message = self._get_message() ret = [] if incl_name: ret = [self.__class__.__name__, ': '] if message: ret.extend([message, ' (caused by ', cause_str, ')']) else: ret.extend([' caused by ', cause_str]) return ''.join(ret) def __str__(self): if not self.cause: return super().__str__() trace_str = self._get_trace_str() ret = [] if trace_str: message = self._get_message() if message: ret.extend([message, ' --- ']) ret.extend(['Wrapped traceback (most recent call last):\n', trace_str, self._get_exc_str(incl_name=True)]) return ''.join(ret) else: return self._get_exc_str(incl_name=False) def _format_exc(exc, message=None): if message is None: message = exc exc_str = traceback._format_final_exc_line(exc.__class__.__name__, message) return exc_str.rstrip() _BaseTBItem = namedtuple('_BaseTBItem', 'filename, lineno, name, line') class _TBItem(_BaseTBItem): def __repr__(self): ret = super().__repr__() ret += ' <%r>' % self.frame_id return ret class _DeferredLine: def __init__(self, filename, lineno, module_globals=None): self.filename = filename self.lineno = lineno module_globals = module_globals or {} self.module_globals = {k: v for k, v in module_globals.items() if k in ('__name__', '__loader__')} def __eq__(self, other): return (self.lineno, self.filename) == (other.lineno, other.filename) def __ne__(self, other): return (self.lineno, self.filename) != (other.lineno, other.filename) def __str__(self): if hasattr(self, '_line'): return self._line linecache.checkcache(self.filename) line = linecache.getline(self.filename, self.lineno, self.module_globals) if line: line = line.strip() else: line = None self._line = line return line def __repr__(self): return repr(str(self)) def __len__(self): return len(str(self)) def strip(self): return str(self).strip() def _extract_from_frame(f=None, limit=None): ret = [] if f is None: f = sys._getframe(1) # cross-impl yadayada if limit is None: limit = getattr(sys, 'tracebacklimit', 1000) n = 0 while f is not None and n < limit: filename = f.f_code.co_filename lineno = f.f_lineno name = f.f_code.co_name line = _DeferredLine(filename, lineno, f.f_globals) item = _TBItem(filename, lineno, name, line) item.frame_id = id(f) ret.append(item) f = f.f_back n += 1 ret.reverse() return ret def _extract_from_tb(tb, limit=None): ret = [] if limit is None: limit = getattr(sys, 'tracebacklimit', 1000) n = 0 while tb is not None and n < limit: filename = tb.tb_frame.f_code.co_filename lineno = tb.tb_lineno name = tb.tb_frame.f_code.co_name line = _DeferredLine(filename, lineno, tb.tb_frame.f_globals) item = _TBItem(filename, lineno, name, line) item.frame_id = id(tb.tb_frame) ret.append(item) tb = tb.tb_next n += 1 return ret # An Example/Prototest: class MathError(ExceptionCauseMixin, ValueError): pass def whoops_math(): return 1/0 def math_lol(n=0): if n < 3: return math_lol(n=n+1) try: return whoops_math() except ZeroDivisionError as zde: exc = MathError(zde, 'ya done messed up') raise exc def main(): try: math_lol() except ValueError as me: exc = MathError(me, 'hi') raise exc if __name__ == '__main__': try: main() except Exception: import pdb;pdb.post_mortem() raise boltons-25.0.0/boltons/fileutils.py000066400000000000000000000616161475005545200173310ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Virtually every Python programmer has used Python for wrangling disk contents, and ``fileutils`` collects solutions to some of the most commonly-found gaps in the standard library. """ import os import re import sys import stat import errno import fnmatch from shutil import copy2, copystat, Error __all__ = ['mkdir_p', 'atomic_save', 'AtomicSaver', 'FilePerms', 'iter_find_files', 'copytree'] FULL_PERMS = 0o777 RW_PERMS = 438 _SINGLE_FULL_PERM = 7 def mkdir_p(path): """Creates a directory and any parent directories that may need to be created along the way, without raising errors for any existing directories. This function mimics the behavior of the ``mkdir -p`` command available in Linux/BSD environments, but also works on Windows. """ try: os.makedirs(path) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): return raise return class FilePerms: """The :class:`FilePerms` type is used to represent standard POSIX filesystem permissions: * Read * Write * Execute Across three classes of user: * Owning (u)ser * Owner's (g)roup * Any (o)ther user This class assists with computing new permissions, as well as working with numeric octal ``777``-style and ``rwx``-style permissions. Currently it only considers the bottom 9 permission bits; it does not support sticky bits or more advanced permission systems. Args: user (str): A string in the 'rwx' format, omitting characters for which owning user's permissions are not provided. group (str): A string in the 'rwx' format, omitting characters for which owning group permissions are not provided. other (str): A string in the 'rwx' format, omitting characters for which owning other/world permissions are not provided. There are many ways to use :class:`FilePerms`: >>> FilePerms(user='rwx', group='xrw', other='wxr') # note character order FilePerms(user='rwx', group='rwx', other='rwx') >>> int(FilePerms('r', 'r', '')) 288 >>> oct(288)[-3:] # XXX Py3k '440' See also the :meth:`FilePerms.from_int` and :meth:`FilePerms.from_path` classmethods for useful alternative ways to construct :class:`FilePerms` objects. """ # TODO: consider more than the lower 9 bits class _FilePermProperty: _perm_chars = 'rwx' _perm_set = frozenset('rwx') _perm_val = {'r': 4, 'w': 2, 'x': 1} # for sorting def __init__(self, attribute, offset): self.attribute = attribute self.offset = offset def __get__(self, fp_obj, type_=None): if fp_obj is None: return self return getattr(fp_obj, self.attribute) def __set__(self, fp_obj, value): cur = getattr(fp_obj, self.attribute) if cur == value: return try: invalid_chars = set(str(value)) - self._perm_set except TypeError: raise TypeError('expected string, not %r' % value) if invalid_chars: raise ValueError('got invalid chars %r in permission' ' specification %r, expected empty string' ' or one or more of %r' % (invalid_chars, value, self._perm_chars)) def sort_key(c): return self._perm_val[c] new_value = ''.join(sorted(set(value), key=sort_key, reverse=True)) setattr(fp_obj, self.attribute, new_value) self._update_integer(fp_obj, new_value) def _update_integer(self, fp_obj, value): mode = 0 key = 'xwr' for symbol in value: bit = 2 ** key.index(symbol) mode |= (bit << (self.offset * 3)) fp_obj._integer |= mode def __init__(self, user='', group='', other=''): self._user, self._group, self._other = '', '', '' self._integer = 0 self.user = user self.group = group self.other = other @classmethod def from_int(cls, i): """Create a :class:`FilePerms` object from an integer. >>> FilePerms.from_int(0o644) # note the leading zero-oh for octal FilePerms(user='rw', group='r', other='r') """ i &= FULL_PERMS key = ('', 'x', 'w', 'xw', 'r', 'rx', 'rw', 'rwx') parts = [] while i: parts.append(key[i & _SINGLE_FULL_PERM]) i >>= 3 parts.reverse() return cls(*parts) @classmethod def from_path(cls, path): """Make a new :class:`FilePerms` object based on the permissions assigned to the file or directory at *path*. Args: path (str): Filesystem path of the target file. Here's an example that holds true on most systems: >>> import tempfile >>> 'r' in FilePerms.from_path(tempfile.gettempdir()).user True """ stat_res = os.stat(path) return cls.from_int(stat.S_IMODE(stat_res.st_mode)) def __int__(self): return self._integer # Sphinx tip: attribute docstrings come after the attribute user = _FilePermProperty('_user', 2) "Stores the ``rwx``-formatted *user* permission." group = _FilePermProperty('_group', 1) "Stores the ``rwx``-formatted *group* permission." other = _FilePermProperty('_other', 0) "Stores the ``rwx``-formatted *other* permission." def __repr__(self): cn = self.__class__.__name__ return ('%s(user=%r, group=%r, other=%r)' % (cn, self.user, self.group, self.other)) #### _TEXT_OPENFLAGS = os.O_RDWR | os.O_CREAT | os.O_EXCL if hasattr(os, 'O_NOINHERIT'): _TEXT_OPENFLAGS |= os.O_NOINHERIT if hasattr(os, 'O_NOFOLLOW'): _TEXT_OPENFLAGS |= os.O_NOFOLLOW _BIN_OPENFLAGS = _TEXT_OPENFLAGS if hasattr(os, 'O_BINARY'): _BIN_OPENFLAGS |= os.O_BINARY try: import fcntl as fcntl except ImportError: def set_cloexec(fd): "Dummy set_cloexec for platforms without fcntl support" pass else: def set_cloexec(fd): """Does a best-effort :func:`fcntl.fcntl` call to set a fd to be automatically closed by any future child processes. Implementation from the :mod:`tempfile` module. """ try: flags = fcntl.fcntl(fd, fcntl.F_GETFD, 0) except OSError: pass else: # flags read successfully, modify flags |= fcntl.FD_CLOEXEC fcntl.fcntl(fd, fcntl.F_SETFD, flags) return def atomic_save(dest_path, **kwargs): """A convenient interface to the :class:`AtomicSaver` type. Example: >>> try: ... with atomic_save("file.txt", text_mode=True) as fo: ... _ = fo.write('bye') ... 1/0 # will error ... fo.write('bye') ... except ZeroDivisionError: ... pass # at least our file.txt didn't get overwritten See the :class:`AtomicSaver` documentation for details. """ return AtomicSaver(dest_path, **kwargs) def path_to_unicode(path): if isinstance(path, str): return path encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() return path.decode(encoding) if os.name == 'nt': import ctypes from ctypes import c_wchar_p from ctypes.wintypes import DWORD, LPVOID _ReplaceFile = ctypes.windll.kernel32.ReplaceFile _ReplaceFile.argtypes = [c_wchar_p, c_wchar_p, c_wchar_p, DWORD, LPVOID, LPVOID] def replace(src, dst): # argument names match stdlib docs, docstring below try: # ReplaceFile fails if the dest file does not exist, so # first try to rename it into position os.rename(src, dst) return except OSError as we: if we.errno == errno.EEXIST: pass # continue with the ReplaceFile logic below else: raise src = path_to_unicode(src) dst = path_to_unicode(dst) res = _ReplaceFile(c_wchar_p(dst), c_wchar_p(src), None, 0, None, None) if not res: raise OSError(f'failed to replace {dst!r} with {src!r}') return def atomic_rename(src, dst, overwrite=False): "Rename *src* to *dst*, replacing *dst* if *overwrite is True" if overwrite: replace(src, dst) else: os.rename(src, dst) return else: # wrapper func for cross compat + docs def replace(src, dst): # os.replace does the same thing on unix return os.rename(src, dst) def atomic_rename(src, dst, overwrite=False): "Rename *src* to *dst*, replacing *dst* if *overwrite is True" if overwrite: os.rename(src, dst) else: os.link(src, dst) os.unlink(src) return _atomic_rename = atomic_rename # backwards compat replace.__doc__ = """Similar to :func:`os.replace` in Python 3.3+, this function will atomically create or replace the file at path *dst* with the file at path *src*. On Windows, this function uses the ReplaceFile API for maximum possible atomicity on a range of filesystems. """ class AtomicSaver: """``AtomicSaver`` is a configurable `context manager`_ that provides a writable :class:`file` which will be moved into place as long as no exceptions are raised within the context manager's block. These "part files" are created in the same directory as the destination path to ensure atomic move operations (i.e., no cross-filesystem moves occur). Args: dest_path (str): The path where the completed file will be written. overwrite (bool): Whether to overwrite the destination file if it exists at completion time. Defaults to ``True``. file_perms (int): Integer representation of file permissions for the newly-created file. Defaults are, when the destination path already exists, to copy the permissions from the previous file, or if the file did not exist, to respect the user's configured `umask`_, usually resulting in octal 0644 or 0664. text_mode (bool): Whether to open the destination file in text mode (i.e., ``'w'`` not ``'wb'``). Defaults to ``False`` (``wb``). part_file (str): Name of the temporary *part_file*. Defaults to *dest_path* + ``.part``. Note that this argument is just the filename, and not the full path of the part file. To guarantee atomic saves, part files are always created in the same directory as the destination path. overwrite_part (bool): Whether to overwrite the *part_file*, should it exist at setup time. Defaults to ``False``, which results in an :exc:`OSError` being raised on pre-existing part files. Be careful of setting this to ``True`` in situations when multiple threads or processes could be writing to the same part file. rm_part_on_exc (bool): Remove *part_file* on exception cases. Defaults to ``True``, but ``False`` can be useful for recovery in some cases. Note that resumption is not automatic and by default an :exc:`OSError` is raised if the *part_file* exists. Practically, the AtomicSaver serves a few purposes: * Avoiding overwriting an existing, valid file with a partially written one. * Providing a reasonable guarantee that a part file only has one writer at a time. * Optional recovery of partial data in failure cases. .. _context manager: https://docs.python.org/2/reference/compound_stmts.html#with .. _umask: https://en.wikipedia.org/wiki/Umask """ _default_file_perms = RW_PERMS # TODO: option to abort if target file modify date has changed since start? def __init__(self, dest_path, **kwargs): self.dest_path = dest_path self.overwrite = kwargs.pop('overwrite', True) self.file_perms = kwargs.pop('file_perms', None) self.overwrite_part = kwargs.pop('overwrite_part', False) self.part_filename = kwargs.pop('part_file', None) self.rm_part_on_exc = kwargs.pop('rm_part_on_exc', True) self.text_mode = kwargs.pop('text_mode', False) self.buffering = kwargs.pop('buffering', -1) if kwargs: raise TypeError(f'unexpected kwargs: {kwargs.keys()!r}') self.dest_path = os.path.abspath(self.dest_path) self.dest_dir = os.path.dirname(self.dest_path) if not self.part_filename: self.part_path = dest_path + '.part' else: self.part_path = os.path.join(self.dest_dir, self.part_filename) self.mode = 'w+' if self.text_mode else 'w+b' self.open_flags = _TEXT_OPENFLAGS if self.text_mode else _BIN_OPENFLAGS self.part_file = None def _open_part_file(self): do_chmod = True file_perms = self.file_perms if file_perms is None: try: # try to copy from file being replaced stat_res = os.stat(self.dest_path) file_perms = stat.S_IMODE(stat_res.st_mode) except OSError: # default if no destination file exists file_perms = self._default_file_perms do_chmod = False # respect the umask fd = os.open(self.part_path, self.open_flags, file_perms) set_cloexec(fd) self.part_file = os.fdopen(fd, self.mode, self.buffering) # if default perms are overridden by the user or previous dest_path # chmod away the effects of the umask if do_chmod: try: os.chmod(self.part_path, file_perms) except OSError: self.part_file.close() raise return def setup(self): """Called on context manager entry (the :keyword:`with` statement), the ``setup()`` method creates the temporary file in the same directory as the destination file. ``setup()`` tests for a writable directory with rename permissions early, as the part file may not be written to immediately (not using :func:`os.access` because of the potential issues of effective vs. real privileges). If the caller is not using the :class:`AtomicSaver` as a context manager, this method should be called explicitly before writing. """ if os.path.lexists(self.dest_path): if not self.overwrite: raise OSError(errno.EEXIST, 'Overwrite disabled and file already exists', self.dest_path) if self.overwrite_part and os.path.lexists(self.part_path): os.unlink(self.part_path) self._open_part_file() return def __enter__(self): self.setup() return self.part_file def __exit__(self, exc_type, exc_val, exc_tb): if self.part_file: # Ensure data is flushed and synced to disk before closing self.part_file.flush() os.fsync(self.part_file.fileno()) self.part_file.close() if exc_type: if self.rm_part_on_exc: try: os.unlink(self.part_path) except Exception: pass # avoid masking original error return try: atomic_rename(self.part_path, self.dest_path, overwrite=self.overwrite) except OSError: if self.rm_part_on_exc: try: os.unlink(self.part_path) except Exception: pass # avoid masking original error raise # could not save destination file return def iter_find_files(directory, patterns, ignored=None, include_dirs=False, max_depth=None): """Returns a generator that yields file paths under a *directory*, matching *patterns* using `glob`_ syntax (e.g., ``*.txt``). Also supports *ignored* patterns. Args: directory (str): Path that serves as the root of the search. Yielded paths will include this as a prefix. patterns (str or list): A single pattern or list of glob-formatted patterns to find under *directory*. ignored (str or list): A single pattern or list of glob-formatted patterns to ignore. include_dirs (bool): Whether to include directories that match patterns, as well. Defaults to ``False``. max_depth (int): traverse up to this level of subdirectory. I.e., 0 for the specified *directory* only, 1 for *directory* and one level of subdirectory. For example, finding Python files in the current directory: >>> _CUR_DIR = os.path.dirname(os.path.abspath(__file__)) >>> filenames = sorted(iter_find_files(_CUR_DIR, '*.py')) >>> os.path.basename(filenames[-1]) 'urlutils.py' Or, Python files while ignoring emacs lockfiles: >>> filenames = iter_find_files(_CUR_DIR, '*.py', ignored='.#*') .. _glob: https://en.wikipedia.org/wiki/Glob_%28programming%29 """ if isinstance(patterns, str): patterns = [patterns] pats_re = re.compile('|'.join([fnmatch.translate(p) for p in patterns])) if not ignored: ignored = [] elif isinstance(ignored, str): ignored = [ignored] ign_re = re.compile('|'.join([fnmatch.translate(p) for p in ignored])) start_depth = len(directory.split(os.path.sep)) for root, dirs, files in os.walk(directory): if max_depth is not None and (len(root.split(os.path.sep)) - start_depth) > max_depth: continue if include_dirs: for basename in dirs: if pats_re.match(basename): if ignored and ign_re.match(basename): continue filename = os.path.join(root, basename) yield filename for basename in files: if pats_re.match(basename): if ignored and ign_re.match(basename): continue filename = os.path.join(root, basename) yield filename return def copy_tree(src, dst, symlinks=False, ignore=None): """The ``copy_tree`` function is an exact copy of the built-in :func:`shutil.copytree`, with one key difference: it will not raise an exception if part of the tree already exists. It achieves this by using :func:`mkdir_p`. As of Python 3.8, you may pass :func:`shutil.copytree` the `dirs_exist_ok=True` flag to achieve the same effect. Args: src (str): Path of the source directory to copy. dst (str): Destination path. Existing directories accepted. symlinks (bool): If ``True``, copy symlinks rather than their contents. ignore (callable): A callable that takes a path and directory listing, returning the files within the listing to be ignored. For more details, check out :func:`shutil.copytree` and :func:`shutil.copy2`. """ names = os.listdir(src) if ignore is not None: ignored_names = ignore(src, names) else: ignored_names = set() mkdir_p(dst) errors = [] for name in names: if name in ignored_names: continue srcname = os.path.join(src, name) dstname = os.path.join(dst, name) try: if symlinks and os.path.islink(srcname): linkto = os.readlink(srcname) os.symlink(linkto, dstname) elif os.path.isdir(srcname): copytree(srcname, dstname, symlinks, ignore) else: # Will raise a SpecialFileError for unsupported file types copy2(srcname, dstname) # catch the Error from the recursive copytree so that we can # continue with other files except Error as e: errors.extend(e.args[0]) except OSError as why: errors.append((srcname, dstname, str(why))) try: copystat(src, dst) except OSError as why: errors.append((src, dst, str(why))) if errors: raise Error(errors) copytree = copy_tree # alias for drop-in replacement of shutil # like open(os.devnull) but with even fewer side effects class DummyFile: # TODO: raise ValueErrors on closed for all methods? # TODO: enforce read/write def __init__(self, path, mode='r', buffering=None): self.name = path self.mode = mode self.closed = False self.errors = None self.isatty = False self.encoding = None self.newlines = None self.softspace = 0 def close(self): self.closed = True def fileno(self): return -1 def flush(self): if self.closed: raise ValueError('I/O operation on a closed file') return def next(self): raise StopIteration() def read(self, size=0): if self.closed: raise ValueError('I/O operation on a closed file') return '' def readline(self, size=0): if self.closed: raise ValueError('I/O operation on a closed file') return '' def readlines(self, size=0): if self.closed: raise ValueError('I/O operation on a closed file') return [] def seek(self): if self.closed: raise ValueError('I/O operation on a closed file') return def tell(self): if self.closed: raise ValueError('I/O operation on a closed file') return 0 def truncate(self): if self.closed: raise ValueError('I/O operation on a closed file') return def write(self, string): if self.closed: raise ValueError('I/O operation on a closed file') return def writelines(self, list_of_strings): if self.closed: raise ValueError('I/O operation on a closed file') return def __next__(self): raise StopIteration() def __enter__(self): if self.closed: raise ValueError('I/O operation on a closed file') return def __exit__(self, exc_type, exc_val, exc_tb): return def rotate_file(filename, *, keep: int = 5): """ If *filename.ext* exists, it will be moved to *filename.1.ext*, with all conflicting filenames being moved up by one, dropping any files beyond *keep*. After rotation, *filename* will be available for creation as a new file. Fails if *filename* is not a file or if *keep* is not > 0. """ if keep < 1: raise ValueError(f'expected "keep" to be >=1, not {keep}') if not os.path.exists(filename): return if not os.path.isfile(filename): raise ValueError(f'expected {filename} to be a file') fn_root, fn_ext = os.path.splitext(filename) kept_names = [] for i in range(1, keep + 1): if fn_ext: kept_names.append(f'{fn_root}.{i}{fn_ext}') else: kept_names.append(f'{fn_root}.{i}') fns = [filename] + kept_names for orig_name, kept_name in reversed(list(zip(fns, fns[1:]))): if not os.path.exists(orig_name): continue os.rename(orig_name, kept_name) if os.path.exists(kept_names[-1]): os.remove(kept_names[-1]) return boltons-25.0.0/boltons/formatutils.py000066400000000000000000000306701475005545200176760ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """`PEP 3101`_ introduced the :meth:`str.format` method, and what would later be called "new-style" string formatting. For the sake of explicit correctness, it is probably best to refer to Python's dual string formatting capabilities as *bracket-style* and *percent-style*. There is overlap, but one does not replace the other. * Bracket-style is more pluggable, slower, and uses a method. * Percent-style is simpler, faster, and uses an operator. Bracket-style formatting brought with it a much more powerful toolbox, but it was far from a full one. :meth:`str.format` uses `more powerful syntax`_, but `the tools and idioms`_ for working with that syntax are not well-developed nor well-advertised. ``formatutils`` adds several functions for working with bracket-style format strings: * :class:`DeferredValue`: Defer fetching or calculating a value until format time. * :func:`get_format_args`: Parse the positional and keyword arguments out of a format string. * :func:`tokenize_format_str`: Tokenize a format string into literals and :class:`BaseFormatField` objects. * :func:`construct_format_field_str`: Assists in programmatic construction of format strings. * :func:`infer_positional_format_args`: Converts anonymous references in 2.7+ format strings to explicit positional arguments suitable for usage with Python 2.6. .. _more powerful syntax: https://docs.python.org/2/library/string.html#format-string-syntax .. _the tools and idioms: https://docs.python.org/2/library/string.html#string-formatting .. _PEP 3101: https://www.python.org/dev/peps/pep-3101/ """ # TODO: also include percent-formatting utils? # TODO: include lithoxyl.formatters.Formatter (or some adaptation)? import re from string import Formatter __all__ = ['DeferredValue', 'get_format_args', 'tokenize_format_str', 'construct_format_field_str', 'infer_positional_format_args', 'BaseFormatField'] _pos_farg_re = re.compile('({{)|' # escaped open-brace '(}})|' # escaped close-brace r'({[:!.\[}])') # anon positional format arg def construct_format_field_str(fname, fspec, conv): """ Constructs a format field string from the field name, spec, and conversion character (``fname``, ``fspec``, ``conv``). See Python String Formatting for more info. """ if fname is None: return '' ret = '{' + fname if conv: ret += '!' + conv if fspec: ret += ':' + fspec ret += '}' return ret def split_format_str(fstr): """Does very basic splitting of a format string, returns a list of strings. For full tokenization, see :func:`tokenize_format_str`. """ ret = [] for lit, fname, fspec, conv in Formatter().parse(fstr): if fname is None: ret.append((lit, None)) continue field_str = construct_format_field_str(fname, fspec, conv) ret.append((lit, field_str)) return ret def infer_positional_format_args(fstr): """Takes format strings with anonymous positional arguments, (e.g., "{}" and {:d}), and converts them into numbered ones for explicitness and compatibility with 2.6. Returns a string with the inferred positional arguments. """ # TODO: memoize ret, max_anon = '', 0 # look for {: or {! or {. or {[ or {} start, end, prev_end = 0, 0, 0 for match in _pos_farg_re.finditer(fstr): start, end, group = match.start(), match.end(), match.group() if prev_end < start: ret += fstr[prev_end:start] prev_end = end if group == '{{' or group == '}}': ret += group continue ret += f'{{{max_anon}{group[1:]}' max_anon += 1 ret += fstr[prev_end:] return ret # This approach is hardly exhaustive but it works for most builtins _INTCHARS = 'bcdoxXn' _FLOATCHARS = 'eEfFgGn%' _TYPE_MAP = dict([(x, int) for x in _INTCHARS] + [(x, float) for x in _FLOATCHARS]) _TYPE_MAP['s'] = str def get_format_args(fstr): """ Turn a format string into two lists of arguments referenced by the format string. One is positional arguments, and the other is named arguments. Each element of the list includes the name and the nominal type of the field. # >>> get_format_args("{noun} is {1:d} years old{punct}") # ([(1, )], [('noun', ), ('punct', )]) # XXX: Py3k >>> get_format_args("{noun} is {1:d} years old{punct}") == \ ([(1, int)], [('noun', str), ('punct', str)]) True """ # TODO: memoize formatter = Formatter() fargs, fkwargs, _dedup = [], [], set() def _add_arg(argname, type_char='s'): if argname not in _dedup: _dedup.add(argname) argtype = _TYPE_MAP.get(type_char, str) # TODO: unicode try: fargs.append((int(argname), argtype)) except ValueError: fkwargs.append((argname, argtype)) for lit, fname, fspec, conv in formatter.parse(fstr): if fname is not None: type_char = fspec[-1:] fname_list = re.split('[.[]', fname) if len(fname_list) > 1: raise ValueError('encountered compound format arg: %r' % fname) try: base_fname = fname_list[0] assert base_fname except (IndexError, AssertionError): raise ValueError('encountered anonymous positional argument') _add_arg(fname, type_char) for sublit, subfname, _, _ in formatter.parse(fspec): # TODO: positional and anon args not allowed here. if subfname is not None: _add_arg(subfname) return fargs, fkwargs def tokenize_format_str(fstr, resolve_pos=True): """Takes a format string, turns it into a list of alternating string literals and :class:`BaseFormatField` tokens. By default, also infers anonymous positional references into explicit, numbered positional references. To disable this behavior set *resolve_pos* to ``False``. """ ret = [] if resolve_pos: fstr = infer_positional_format_args(fstr) formatter = Formatter() for lit, fname, fspec, conv in formatter.parse(fstr): if lit: ret.append(lit) if fname is None: continue ret.append(BaseFormatField(fname, fspec, conv)) return ret class BaseFormatField: """A class representing a reference to an argument inside of a bracket-style format string. For instance, in ``"{greeting}, world!"``, there is a field named "greeting". These fields can have many options applied to them. See the Python docs on `Format String Syntax`_ for the full details. .. _Format String Syntax: https://docs.python.org/2/library/string.html#string-formatting """ def __init__(self, fname, fspec='', conv=None): self.set_fname(fname) self.set_fspec(fspec) self.set_conv(conv) def set_fname(self, fname): "Set the field name." path_list = re.split('[.[]', fname) # TODO self.base_name = path_list[0] self.fname = fname self.subpath = path_list[1:] self.is_positional = not self.base_name or self.base_name.isdigit() def set_fspec(self, fspec): "Set the field spec." fspec = fspec or '' subfields = [] for sublit, subfname, _, _ in Formatter().parse(fspec): if subfname is not None: subfields.append(subfname) self.subfields = subfields self.fspec = fspec self.type_char = fspec[-1:] self.type_func = _TYPE_MAP.get(self.type_char, str) def set_conv(self, conv): """There are only two built-in converters: ``s`` and ``r``. They are somewhat rare and appearlike ``"{ref!r}"``.""" # TODO self.conv = conv self.conv_func = None # TODO @property def fstr(self): "The current state of the field in string format." return construct_format_field_str(self.fname, self.fspec, self.conv) def __repr__(self): cn = self.__class__.__name__ args = [self.fname] if self.conv is not None: args.extend([self.fspec, self.conv]) elif self.fspec != '': args.append(self.fspec) args_repr = ', '.join([repr(a) for a in args]) return f'{cn}({args_repr})' def __str__(self): return self.fstr _UNSET = object() class DeferredValue: """:class:`DeferredValue` is a wrapper type, used to defer computing values which would otherwise be expensive to stringify and format. This is most valuable in areas like logging, where one would not want to waste time formatting a value for a log message which will subsequently be filtered because the message's log level was DEBUG and the logger was set to only emit CRITICAL messages. The :class:``DeferredValue`` is initialized with a callable that takes no arguments and returns the value, which can be of any type. By default DeferredValue only calls that callable once, and future references will get a cached value. This behavior can be disabled by setting *cache_value* to ``False``. Args: func (function): A callable that takes no arguments and computes the value being represented. cache_value (bool): Whether subsequent usages will call *func* again. Defaults to ``True``. >>> import sys >>> dv = DeferredValue(lambda: len(sys._current_frames())) >>> output = "works great in all {0} threads!".format(dv) PROTIP: To keep lines shorter, use: ``from formatutils import DeferredValue as DV`` """ def __init__(self, func, cache_value=True): self.func = func self.cache_value = cache_value self._value = _UNSET def get_value(self): """Computes, optionally caches, and returns the value of the *func*. If ``get_value()`` has been called before, a cached value may be returned depending on the *cache_value* option passed to the constructor. """ if self._value is not _UNSET and self.cache_value: value = self._value else: value = self.func() if self.cache_value: self._value = value return value def __int__(self): return int(self.get_value()) def __float__(self): return float(self.get_value()) def __str__(self): return str(self.get_value()) def __unicode__(self): return str(self.get_value()) def __repr__(self): return repr(self.get_value()) def __format__(self, fmt): value = self.get_value() pt = fmt[-1:] # presentation type type_conv = _TYPE_MAP.get(pt, str) try: return value.__format__(fmt) except (ValueError, TypeError): # TODO: this may be overkill return type_conv(value).__format__(fmt) # end formatutils.py boltons-25.0.0/boltons/funcutils.py000066400000000000000000001077501475005545200173450ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Python's built-in :mod:`functools` module builds several useful utilities on top of Python's first-class function support. ``funcutils`` generally stays in the same vein, adding to and correcting Python's standard metaprogramming facilities. """ import sys import re import inspect import functools import itertools from inspect import formatannotation from types import FunctionType, MethodType # For legacy compatibility. # boltons used to offer an implementation of total_ordering for Python <2.7 from functools import total_ordering as total_ordering try: from .typeutils import make_sentinel NO_DEFAULT = make_sentinel(var_name='NO_DEFAULT') except ImportError: NO_DEFAULT = object() def inspect_formatargspec( args, varargs=None, varkw=None, defaults=None, kwonlyargs=(), kwonlydefaults={}, annotations={}, formatarg=str, formatvarargs=lambda name: '*' + name, formatvarkw=lambda name: '**' + name, formatvalue=lambda value: '=' + repr(value), formatreturns=lambda text: ' -> ' + text, formatannotation=formatannotation): """Copy formatargspec from python 3.7 standard library. Python 3 has deprecated formatargspec and requested that Signature be used instead, however this requires a full reimplementation of formatargspec() in terms of creating Parameter objects and such. Instead of introducing all the object-creation overhead and having to reinvent from scratch, just copy their compatibility routine. """ def formatargandannotation(arg): result = formatarg(arg) if arg in annotations: result += ': ' + formatannotation(annotations[arg]) return result specs = [] if defaults: firstdefault = len(args) - len(defaults) for i, arg in enumerate(args): spec = formatargandannotation(arg) if defaults and i >= firstdefault: spec = spec + formatvalue(defaults[i - firstdefault]) specs.append(spec) if varargs is not None: specs.append(formatvarargs(formatargandannotation(varargs))) else: if kwonlyargs: specs.append('*') if kwonlyargs: for kwonlyarg in kwonlyargs: spec = formatargandannotation(kwonlyarg) if kwonlydefaults and kwonlyarg in kwonlydefaults: spec += formatvalue(kwonlydefaults[kwonlyarg]) specs.append(spec) if varkw is not None: specs.append(formatvarkw(formatargandannotation(varkw))) result = '(' + ', '.join(specs) + ')' if 'return' in annotations: result += formatreturns(formatannotation(annotations['return'])) return result def get_module_callables(mod, ignore=None): """Returns two maps of (*types*, *funcs*) from *mod*, optionally ignoring based on the :class:`bool` return value of the *ignore* callable. *mod* can be a string name of a module in :data:`sys.modules` or the module instance itself. """ if isinstance(mod, str): mod = sys.modules[mod] types, funcs = {}, {} for attr_name in dir(mod): if ignore and ignore(attr_name): continue try: attr = getattr(mod, attr_name) except Exception: continue try: attr_mod_name = attr.__module__ except AttributeError: continue if attr_mod_name != mod.__name__: continue if isinstance(attr, type): types[attr_name] = attr elif callable(attr): funcs[attr_name] = attr return types, funcs def mro_items(type_obj): """Takes a type and returns an iterator over all class variables throughout the type hierarchy (respecting the MRO). >>> sorted(set([k for k, v in mro_items(int) if not k.startswith('__') and 'bytes' not in k and not callable(v)])) ['denominator', 'imag', 'numerator', 'real'] """ # TODO: handle slots? return itertools.chain.from_iterable(ct.__dict__.items() for ct in type_obj.__mro__) def dir_dict(obj, raise_exc=False): """Return a dictionary of attribute names to values for a given object. Unlike ``obj.__dict__``, this function returns all attributes on the object, including ones on parent classes. """ # TODO: separate function for handling descriptors on types? ret = {} for k in dir(obj): try: ret[k] = getattr(obj, k) except Exception: if raise_exc: raise return ret def copy_function(orig, copy_dict=True): """Returns a shallow copy of the function, including code object, globals, closure, etc. >>> func = lambda: func >>> func() is func True >>> func_copy = copy_function(func) >>> func_copy() is func True >>> func_copy is not func True Args: orig (function): The function to be copied. Must be a function, not just any method or callable. copy_dict (bool): Also copy any attributes set on the function instance. Defaults to ``True``. """ ret = FunctionType(orig.__code__, orig.__globals__, name=orig.__name__, argdefs=getattr(orig, "__defaults__", None), closure=getattr(orig, "__closure__", None)) if hasattr(orig, "__kwdefaults__"): ret.__kwdefaults__ = orig.__kwdefaults__ if copy_dict: ret.__dict__.update(orig.__dict__) return ret def partial_ordering(cls): """Class decorator, similar to :func:`functools.total_ordering`, except it is used to define `partial orderings`_ (i.e., it is possible that *x* is neither greater than, equal to, or less than *y*). It assumes the presence of the ``__le__()`` and ``__ge__()`` method, but nothing else. It will not override any existing additional comparison methods. .. _partial orderings: https://en.wikipedia.org/wiki/Partially_ordered_set >>> @partial_ordering ... class MySet(set): ... def __le__(self, other): ... return self.issubset(other) ... def __ge__(self, other): ... return self.issuperset(other) ... >>> a = MySet([1,2,3]) >>> b = MySet([1,2]) >>> c = MySet([1,2,4]) >>> b < a True >>> b > a False >>> b < c True >>> a < c False >>> c > a False """ def __lt__(self, other): return self <= other and not self >= other def __gt__(self, other): return self >= other and not self <= other def __eq__(self, other): return self >= other and self <= other if not hasattr(cls, '__lt__'): cls.__lt__ = __lt__ if not hasattr(cls, '__gt__'): cls.__gt__ = __gt__ if not hasattr(cls, '__eq__'): cls.__eq__ = __eq__ return cls class InstancePartial(functools.partial): """:class:`functools.partial` is a huge convenience for anyone working with Python's great first-class functions. It allows developers to curry arguments and incrementally create simpler callables for a variety of use cases. Unfortunately there's one big gap in its usefulness: methods. Partials just don't get bound as methods and automatically handed a reference to ``self``. The ``InstancePartial`` type remedies this by inheriting from :class:`functools.partial` and implementing the necessary descriptor protocol. There are no other differences in implementation or usage. :class:`CachedInstancePartial`, below, has the same ability, but is slightly more efficient. """ @property def _partialmethod(self): # py3.13 switched from _partialmethod to __partialmethod__, this is kept for backwards compat <=py3.12 return self.__partialmethod__ @property def __partialmethod__(self): return functools.partialmethod(self.func, *self.args, **self.keywords) def __get__(self, obj, obj_type): return MethodType(self, obj) class CachedInstancePartial(functools.partial): """The ``CachedInstancePartial`` is virtually the same as :class:`InstancePartial`, adding support for method-usage to :class:`functools.partial`, except that upon first access, it caches the bound method on the associated object, speeding it up for future accesses, and bringing the method call overhead to about the same as non-``partial`` methods. See the :class:`InstancePartial` docstring for more details. """ @property def _partialmethod(self): # py3.13 switched from _partialmethod to __partialmethod__, this is kept for backwards compat <=py3.12 return self.__partialmethod__ @property def __partialmethod__(self): return functools.partialmethod(self.func, *self.args, **self.keywords) def __set_name__(self, obj_type, name): self.__name__ = name def __get__(self, obj, obj_type): # These assignments could've been in __init__, but there was # no simple way to do it without breaking one of PyPy or Py3. self.__name__ = getattr(self, "__name__", None) self.__doc__ = self.func.__doc__ self.__module__ = self.func.__module__ name = self.__name__ if obj is None: return MethodType(self, obj) try: # since this is a data descriptor, this block # is probably only hit once (per object) return obj.__dict__[name] except KeyError: obj.__dict__[name] = ret = MethodType(self, obj) return ret partial = CachedInstancePartial def format_invocation(name='', args=(), kwargs=None, **kw): """Given a name, positional arguments, and keyword arguments, format a basic Python-style function call. >>> print(format_invocation('func', args=(1, 2), kwargs={'c': 3})) func(1, 2, c=3) >>> print(format_invocation('a_func', args=(1,))) a_func(1) >>> print(format_invocation('kw_func', kwargs=[('a', 1), ('b', 2)])) kw_func(a=1, b=2) """ _repr = kw.pop('repr', repr) if kw: raise TypeError('unexpected keyword args: %r' % ', '.join(kw.keys())) kwargs = kwargs or {} a_text = ', '.join([_repr(a) for a in args]) if isinstance(kwargs, dict): kwarg_items = [(k, kwargs[k]) for k in sorted(kwargs)] else: kwarg_items = kwargs kw_text = ', '.join([f'{k}={_repr(v)}' for k, v in kwarg_items]) all_args_text = a_text if all_args_text and kw_text: all_args_text += ', ' all_args_text += kw_text return f'{name}({all_args_text})' def format_exp_repr(obj, pos_names, req_names=None, opt_names=None, opt_key=None): """Render an expression-style repr of an object, based on attribute names, which are assumed to line up with arguments to an initializer. >>> class Flag(object): ... def __init__(self, length, width, depth=None): ... self.length = length ... self.width = width ... self.depth = depth ... That's our Flag object, here are some example reprs for it: >>> flag = Flag(5, 10) >>> print(format_exp_repr(flag, ['length', 'width'], [], ['depth'])) Flag(5, 10) >>> flag2 = Flag(5, 15, 2) >>> print(format_exp_repr(flag2, ['length'], ['width', 'depth'])) Flag(5, width=15, depth=2) By picking the pos_names, req_names, opt_names, and opt_key, you can fine-tune how you want the repr to look. Args: obj (object): The object whose type name will be used and attributes will be checked pos_names (list): Required list of attribute names which will be rendered as positional arguments in the output repr. req_names (list): List of attribute names which will always appear in the keyword arguments in the output repr. Defaults to None. opt_names (list): List of attribute names which may appear in the keyword arguments in the output repr, provided they pass the *opt_key* check. Defaults to None. opt_key (callable): A function or callable which checks whether an opt_name should be in the repr. Defaults to a ``None``-check. """ cn = type(obj).__name__ req_names = req_names or [] opt_names = opt_names or [] uniq_names, all_names = set(), [] for name in req_names + opt_names: if name in uniq_names: continue uniq_names.add(name) all_names.append(name) if opt_key is None: opt_key = lambda v: v is None assert callable(opt_key) args = [getattr(obj, name, None) for name in pos_names] kw_items = [(name, getattr(obj, name, None)) for name in all_names] kw_items = [(name, val) for name, val in kw_items if not (name in opt_names and opt_key(val))] return format_invocation(cn, args, kw_items) def format_nonexp_repr(obj, req_names=None, opt_names=None, opt_key=None): """Format a non-expression-style repr Some object reprs look like object instantiation, e.g., App(r=[], mw=[]). This makes sense for smaller, lower-level objects whose state roundtrips. But a lot of objects contain values that don't roundtrip, like types and functions. For those objects, there is the non-expression style repr, which mimic's Python's default style to make a repr like so: >>> class Flag(object): ... def __init__(self, length, width, depth=None): ... self.length = length ... self.width = width ... self.depth = depth ... >>> flag = Flag(5, 10) >>> print(format_nonexp_repr(flag, ['length', 'width'], ['depth'])) If no attributes are specified or set, utilizes the id, not unlike Python's built-in behavior. >>> print(format_nonexp_repr(flag)) """ cn = obj.__class__.__name__ req_names = req_names or [] opt_names = opt_names or [] uniq_names, all_names = set(), [] for name in req_names + opt_names: if name in uniq_names: continue uniq_names.add(name) all_names.append(name) if opt_key is None: opt_key = lambda v: v is None assert callable(opt_key) items = [(name, getattr(obj, name, None)) for name in all_names] labels = [f'{name}={val!r}' for name, val in items if not (name in opt_names and opt_key(val))] if not labels: labels = ['id=%s' % id(obj)] ret = '<{} {}>'.format(cn, ' '.join(labels)) return ret # # # # # # Function builder # # # def wraps(func, injected=None, expected=None, **kw): """Decorator factory to apply update_wrapper() to a wrapper function. Modeled after built-in :func:`functools.wraps`. Returns a decorator that invokes update_wrapper() with the decorated function as the wrapper argument and the arguments to wraps() as the remaining arguments. Default arguments are as for update_wrapper(). This is a convenience function to simplify applying partial() to update_wrapper(). Same example as in update_wrapper's doc but with wraps: >>> from boltons.funcutils import wraps >>> >>> def print_return(func): ... @wraps(func) ... def wrapper(*args, **kwargs): ... ret = func(*args, **kwargs) ... print(ret) ... return ret ... return wrapper ... >>> @print_return ... def example(): ... '''docstring''' ... return 'example return value' >>> >>> val = example() example return value >>> example.__name__ 'example' >>> example.__doc__ 'docstring' """ return partial(update_wrapper, func=func, build_from=None, injected=injected, expected=expected, **kw) def update_wrapper(wrapper, func, injected=None, expected=None, build_from=None, **kw): """Modeled after the built-in :func:`functools.update_wrapper`, this function is used to make your wrapper function reflect the wrapped function's: * Name * Documentation * Module * Signature The built-in :func:`functools.update_wrapper` copies the first three, but does not copy the signature. This version of ``update_wrapper`` can copy the inner function's signature exactly, allowing seamless usage and :mod:`introspection `. Usage is identical to the built-in version:: >>> from boltons.funcutils import update_wrapper >>> >>> def print_return(func): ... def wrapper(*args, **kwargs): ... ret = func(*args, **kwargs) ... print(ret) ... return ret ... return update_wrapper(wrapper, func) ... >>> @print_return ... def example(): ... '''docstring''' ... return 'example return value' >>> >>> val = example() example return value >>> example.__name__ 'example' >>> example.__doc__ 'docstring' In addition, the boltons version of update_wrapper supports modifying the outer signature. By passing a list of *injected* argument names, those arguments will be removed from the outer wrapper's signature, allowing your decorator to provide arguments that aren't passed in. Args: wrapper (function) : The callable to which the attributes of *func* are to be copied. func (function): The callable whose attributes are to be copied. injected (list): An optional list of argument names which should not appear in the new wrapper's signature. expected (list): An optional list of argument names (or (name, default) pairs) representing new arguments introduced by the wrapper (the opposite of *injected*). See :meth:`FunctionBuilder.add_arg()` for more details. build_from (function): The callable from which the new wrapper is built. Defaults to *func*, unless *wrapper* is partial object built from *func*, in which case it defaults to *wrapper*. Useful in some specific cases where *wrapper* and *func* have the same arguments but differ on which are keyword-only and positional-only. update_dict (bool): Whether to copy other, non-standard attributes of *func* over to the wrapper. Defaults to True. inject_to_varkw (bool): Ignore missing arguments when a ``**kwargs``-type catch-all is present. Defaults to True. hide_wrapped (bool): Remove reference to the wrapped function(s) in the updated function. In opposition to the built-in :func:`functools.update_wrapper` bolton's version returns a copy of the function and does not modify anything in place. For more in-depth wrapping of functions, see the :class:`FunctionBuilder` type, on which update_wrapper was built. """ if injected is None: injected = [] elif isinstance(injected, str): injected = [injected] else: injected = list(injected) expected_items = _parse_wraps_expected(expected) if isinstance(func, (classmethod, staticmethod)): raise TypeError('wraps does not support wrapping classmethods and' ' staticmethods, change the order of wrapping to' ' wrap the underlying function: %r' % (getattr(func, '__func__', None),)) update_dict = kw.pop('update_dict', True) inject_to_varkw = kw.pop('inject_to_varkw', True) hide_wrapped = kw.pop('hide_wrapped', False) if kw: raise TypeError('unexpected kwargs: %r' % kw.keys()) if isinstance(wrapper, functools.partial) and func is wrapper.func: build_from = build_from or wrapper fb = FunctionBuilder.from_func(build_from or func) for arg in injected: try: fb.remove_arg(arg) except MissingArgument: if inject_to_varkw and fb.varkw is not None: continue # keyword arg will be caught by the varkw raise for arg, default in expected_items: fb.add_arg(arg, default) # may raise ExistingArgument if fb.is_async: fb.body = 'return await _call(%s)' % fb.get_invocation_str() else: fb.body = 'return _call(%s)' % fb.get_invocation_str() execdict = dict(_call=wrapper, _func=func) fully_wrapped = fb.get_func(execdict, with_dict=update_dict) if hide_wrapped and hasattr(fully_wrapped, '__wrapped__'): del fully_wrapped.__dict__['__wrapped__'] elif not hide_wrapped: fully_wrapped.__wrapped__ = func # ref to the original function (#115) return fully_wrapped def _parse_wraps_expected(expected): # expected takes a pretty powerful argument, it's processed # here. admittedly this would be less trouble if I relied on # OrderedDict (there's an impl of that in the commit history if # you look if expected is None: expected = [] elif isinstance(expected, str): expected = [(expected, NO_DEFAULT)] expected_items = [] try: expected_iter = iter(expected) except TypeError as e: raise ValueError('"expected" takes string name, sequence of string names,' ' iterable of (name, default) pairs, or a mapping of ' ' {name: default}, not %r (got: %r)' % (expected, e)) for argname in expected_iter: if isinstance(argname, str): # dict keys and bare strings try: default = expected[argname] except TypeError: default = NO_DEFAULT else: # pairs try: argname, default = argname except (TypeError, ValueError): raise ValueError('"expected" takes string name, sequence of string names,' ' iterable of (name, default) pairs, or a mapping of ' ' {name: default}, not %r') if not isinstance(argname, str): raise ValueError(f'all "expected" argnames must be strings, not {argname!r}') expected_items.append((argname, default)) return expected_items class FunctionBuilder: """The FunctionBuilder type provides an interface for programmatically creating new functions, either based on existing functions or from scratch. Values are passed in at construction or set as attributes on the instance. For creating a new function based of an existing one, see the :meth:`~FunctionBuilder.from_func` classmethod. At any point, :meth:`~FunctionBuilder.get_func` can be called to get a newly compiled function, based on the values configured. >>> fb = FunctionBuilder('return_five', doc='returns the integer 5', ... body='return 5') >>> f = fb.get_func() >>> f() 5 >>> fb.varkw = 'kw' >>> f_kw = fb.get_func() >>> f_kw(ignored_arg='ignored_val') 5 Note that function signatures themselves changed quite a bit in Python 3, so several arguments are only applicable to FunctionBuilder in Python 3. Except for *name*, all arguments to the constructor are keyword arguments. Args: name (str): Name of the function. doc (str): `Docstring`_ for the function, defaults to empty. module (str): Name of the module from which this function was imported. Defaults to None. body (str): String version of the code representing the body of the function. Defaults to ``'pass'``, which will result in a function which does nothing and returns ``None``. args (list): List of argument names, defaults to empty list, denoting no arguments. varargs (str): Name of the catch-all variable for positional arguments. E.g., "args" if the resultant function is to have ``*args`` in the signature. Defaults to None. varkw (str): Name of the catch-all variable for keyword arguments. E.g., "kwargs" if the resultant function is to have ``**kwargs`` in the signature. Defaults to None. defaults (tuple): A tuple containing default argument values for those arguments that have defaults. kwonlyargs (list): Argument names which are only valid as keyword arguments. **Python 3 only.** kwonlydefaults (dict): A mapping, same as normal *defaults*, but only for the *kwonlyargs*. **Python 3 only.** annotations (dict): Mapping of type hints and so forth. **Python 3 only.** filename (str): The filename that will appear in tracebacks. Defaults to "boltons.funcutils.FunctionBuilder". indent (int): Number of spaces with which to indent the function *body*. Values less than 1 will result in an error. dict (dict): Any other attributes which should be added to the functions compiled with this FunctionBuilder. All of these arguments are also made available as attributes which can be mutated as necessary. .. _Docstring: https://en.wikipedia.org/wiki/Docstring#Python """ _argspec_defaults = {'args': list, 'varargs': lambda: None, 'varkw': lambda: None, 'defaults': lambda: None, 'kwonlyargs': list, 'kwonlydefaults': dict, 'annotations': dict} @classmethod def _argspec_to_dict(cls, f): argspec = inspect.getfullargspec(f) return {attr: getattr(argspec, attr) for attr in cls._argspec_defaults} _defaults = {'doc': str, 'dict': dict, 'is_async': lambda: False, 'module': lambda: None, 'body': lambda: 'pass', 'indent': lambda: 4, "annotations": dict, 'filename': lambda: 'boltons.funcutils.FunctionBuilder'} _defaults.update(_argspec_defaults) _compile_count = itertools.count() def __init__(self, name, **kw): self.name = name for a, default_factory in self._defaults.items(): val = kw.pop(a, None) if val is None: val = default_factory() setattr(self, a, val) if kw: raise TypeError('unexpected kwargs: %r' % kw.keys()) return # def get_argspec(self): # TODO def get_sig_str(self, with_annotations=True): """Return function signature as a string. with_annotations is ignored on Python 2. On Python 3 signature will omit annotations if it is set to False. """ if with_annotations: annotations = self.annotations else: annotations = {} return inspect_formatargspec(self.args, self.varargs, self.varkw, [], self.kwonlyargs, {}, annotations) _KWONLY_MARKER = re.compile(r""" \* # a star \s* # followed by any amount of whitespace , # followed by a comma \s* # followed by any amount of whitespace """, re.VERBOSE) def get_invocation_str(self): kwonly_pairs = None formatters = {} if self.kwonlyargs: kwonly_pairs = {arg: arg for arg in self.kwonlyargs} formatters['formatvalue'] = lambda value: '=' + value sig = inspect_formatargspec(self.args, self.varargs, self.varkw, [], kwonly_pairs, kwonly_pairs, {}, **formatters) sig = self._KWONLY_MARKER.sub('', sig) return sig[1:-1] @classmethod def from_func(cls, func): """Create a new FunctionBuilder instance based on an existing function. The original function will not be stored or modified. """ # TODO: copy_body? gonna need a good signature regex. # TODO: might worry about __closure__? if not callable(func): raise TypeError(f'expected callable object, not {func!r}') if isinstance(func, functools.partial): kwargs = {'name': func.func.__name__, 'doc': func.func.__doc__, 'module': getattr(func.func, '__module__', None), # e.g., method_descriptor 'annotations': getattr(func.func, "__annotations__", {}), 'dict': getattr(func.func, '__dict__', {})} else: kwargs = {'name': func.__name__, 'doc': func.__doc__, 'module': getattr(func, '__module__', None), # e.g., method_descriptor 'annotations': getattr(func, "__annotations__", {}), 'dict': getattr(func, '__dict__', {})} kwargs.update(cls._argspec_to_dict(func)) if inspect.iscoroutinefunction(func): kwargs['is_async'] = True return cls(**kwargs) def get_func(self, execdict=None, add_source=True, with_dict=True): """Compile and return a new function based on the current values of the FunctionBuilder. Args: execdict (dict): The dictionary representing the scope in which the compilation should take place. Defaults to an empty dict. add_source (bool): Whether to add the source used to a special ``__source__`` attribute on the resulting function. Defaults to True. with_dict (bool): Add any custom attributes, if applicable. Defaults to True. To see an example of usage, see the implementation of :func:`~boltons.funcutils.wraps`. """ execdict = execdict or {} body = self.body or self._default_body tmpl = 'def {name}{sig_str}:' tmpl += '\n{body}' if self.is_async: tmpl = 'async ' + tmpl body = _indent(self.body, ' ' * self.indent) name = self.name.replace('<', '_').replace('>', '_') # lambdas src = tmpl.format(name=name, sig_str=self.get_sig_str(with_annotations=False), doc=self.doc, body=body) self._compile(src, execdict) func = execdict[name] func.__name__ = self.name func.__doc__ = self.doc func.__defaults__ = self.defaults func.__kwdefaults__ = self.kwonlydefaults func.__annotations__ = self.annotations if with_dict: func.__dict__.update(self.dict) func.__module__ = self.module # TODO: caller module fallback? if add_source: func.__source__ = src return func def get_defaults_dict(self): """Get a dictionary of function arguments with defaults and the respective values. """ ret = dict(reversed(list(zip(reversed(self.args), reversed(self.defaults or []))))) kwonlydefaults = getattr(self, 'kwonlydefaults', None) if kwonlydefaults: ret.update(kwonlydefaults) return ret def get_arg_names(self, only_required=False): arg_names = tuple(self.args) + tuple(getattr(self, 'kwonlyargs', ())) if only_required: defaults_dict = self.get_defaults_dict() arg_names = tuple([an for an in arg_names if an not in defaults_dict]) return arg_names def add_arg(self, arg_name, default=NO_DEFAULT, kwonly=False): """Add an argument with optional *default* (defaults to ``funcutils.NO_DEFAULT``). Pass *kwonly=True* to add a keyword-only argument """ if arg_name in self.args: raise ExistingArgument(f'arg {arg_name!r} already in func {self.name} arg list') if arg_name in self.kwonlyargs: raise ExistingArgument(f'arg {arg_name!r} already in func {self.name} kwonly arg list') if not kwonly: self.args.append(arg_name) if default is not NO_DEFAULT: self.defaults = (self.defaults or ()) + (default,) else: self.kwonlyargs.append(arg_name) if default is not NO_DEFAULT: self.kwonlydefaults[arg_name] = default def remove_arg(self, arg_name): """Remove an argument from this FunctionBuilder's argument list. The resulting function will have one less argument per call to this function. Args: arg_name (str): The name of the argument to remove. Raises a :exc:`ValueError` if the argument is not present. """ args = self.args d_dict = self.get_defaults_dict() try: args.remove(arg_name) except ValueError: try: self.kwonlyargs.remove(arg_name) except (AttributeError, ValueError): # missing from both exc = MissingArgument('arg %r not found in %s argument list:' ' %r' % (arg_name, self.name, args)) exc.arg_name = arg_name raise exc else: self.kwonlydefaults.pop(arg_name, None) else: d_dict.pop(arg_name, None) self.defaults = tuple([d_dict[a] for a in args if a in d_dict]) return def _compile(self, src, execdict): filename = ('<%s-%d>' % (self.filename, next(self._compile_count),)) try: code = compile(src, filename, 'single') exec(code, execdict) except Exception: raise return execdict class MissingArgument(ValueError): pass class ExistingArgument(ValueError): pass def _indent(text, margin, newline='\n', key=bool): "based on boltons.strutils.indent" indented_lines = [(margin + line if key(line) else line) for line in text.splitlines()] return newline.join(indented_lines) def noop(*args, **kwargs): """ Simple function that should be used when no effect is desired. An alternative to checking for an optional function type parameter. e.g. def decorate(func, pre_func=None, post_func=None): if pre_func: pre_func() func() if post_func: post_func() vs def decorate(func, pre_func=noop, post_func=noop): pre_func() func() post_func() """ return None # end funcutils.py boltons-25.0.0/boltons/gcutils.py000066400000000000000000000141251475005545200167740ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """The Python Garbage Collector (`GC`_) doesn't usually get too much attention, probably because: - Python's `reference counting`_ effectively handles the vast majority of unused objects - People are slowly learning to avoid implementing `object.__del__()`_ - The collection itself strikes a good balance between simplicity and power (`tunable generation sizes`_) - The collector itself is fast and rarely the cause of long pauses associated with GC in other runtimes Even so, for many applications, the time will come when the developer will need to track down: - Circular references - Misbehaving objects (locks, ``__del__()``) - Memory leaks - Or just ways to shave off a couple percent of execution time Thanks to the :mod:`gc` module, the GC is a well-instrumented entry point for exactly these tasks, and ``gcutils`` aims to facilitate it further. .. _GC: https://docs.python.org/2/glossary.html#term-garbage-collection .. _reference counting: https://docs.python.org/2/glossary.html#term-reference-count .. _object.__del__(): https://docs.python.org/2/glossary.html#term-reference-count .. _tunable generation sizes: https://docs.python.org/2/library/gc.html#gc.set_threshold """ # TODO: type survey import gc import sys __all__ = ['get_all', 'GCToggler', 'toggle_gc', 'toggle_gc_postcollect'] def get_all(type_obj, include_subtypes=True): """Get a list containing all instances of a given type. This will work for the vast majority of types out there. >>> class Ratking(object): pass >>> wiki, hak, sport = Ratking(), Ratking(), Ratking() >>> len(get_all(Ratking)) 3 However, there are some exceptions. For example, ``get_all(bool)`` returns an empty list because ``True`` and ``False`` are themselves built-in and not tracked. >>> get_all(bool) [] Still, it's not hard to see how this functionality can be used to find all instances of a leaking type and track them down further using :func:`gc.get_referrers` and :func:`gc.get_referents`. ``get_all()`` is optimized such that getting instances of user-created types is quite fast. Setting *include_subtypes* to ``False`` will further increase performance in cases where instances of subtypes aren't required. .. note:: There are no guarantees about the state of objects returned by ``get_all()``, especially in concurrent environments. For instance, it is possible for an object to be in the middle of executing its ``__init__()`` and be only partially constructed. """ # TODO: old-style classes if not isinstance(type_obj, type): raise TypeError('expected a type, not %r' % type_obj) try: type_is_tracked = gc.is_tracked(type_obj) except AttributeError: type_is_tracked = False # Python 2.6 and below don't get the speedup if type_is_tracked: to_check = gc.get_referrers(type_obj) else: to_check = gc.get_objects() if include_subtypes: ret = [x for x in to_check if isinstance(x, type_obj)] else: ret = [x for x in to_check if type(x) is type_obj] return ret _IS_PYPY = '__pypy__' in sys.builtin_module_names if _IS_PYPY: # pypy's gc is just different, y'all del get_all class GCToggler: """The ``GCToggler`` is a context-manager that allows one to safely take more control of your garbage collection schedule. Anecdotal experience says certain object-creation-heavy tasks see speedups of around 10% by simply doing one explicit collection at the very end, especially if most of the objects will stay resident. Two GCTogglers are already present in the ``gcutils`` module: - :data:`toggle_gc` simply turns off GC at context entrance, and re-enables at exit - :data:`toggle_gc_postcollect` does the same, but triggers an explicit collection after re-enabling. >>> with toggle_gc: ... x = [object() for i in range(1000)] Between those two instances, the ``GCToggler`` type probably won't be used much directly, but is documented for inheritance purposes. """ def __init__(self, postcollect=False): self.postcollect = postcollect def __enter__(self): gc.disable() def __exit__(self, exc_type, exc_val, exc_tb): gc.enable() if self.postcollect: gc.collect() toggle_gc = GCToggler() """A context manager for disabling GC for a code block. See :class:`GCToggler` for more details.""" toggle_gc_postcollect = GCToggler(postcollect=True) """A context manager for disabling GC for a code block, and collecting before re-enabling. See :class:`GCToggler` for more details.""" boltons-25.0.0/boltons/ioutils.py000066400000000000000000000431071475005545200170140ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # Coding decl above needed for rendering the emdash properly in the # documentation. """ Module ``ioutils`` implements a number of helper classes and functions which are useful when dealing with input, output, and bytestreams in a variety of ways. """ import os from io import BytesIO, IOBase from abc import ( ABCMeta, abstractmethod, abstractproperty, ) from errno import EINVAL from codecs import EncodedFile from tempfile import TemporaryFile from itertools import zip_longest READ_CHUNK_SIZE = 21333 """ Number of bytes to read at a time. The value is ~ 1/3rd of 64k which means that the value will easily fit in the L2 cache of most processors even if every codepoint in a string is three bytes long which makes it a nice fast default value. """ class SpooledIOBase(IOBase): """ A base class shared by the SpooledBytesIO and SpooledStringIO classes. The SpooledTemporaryFile class is missing several attributes and methods present in the StringIO implementation. This brings the api as close to parity as possible so that classes derived from SpooledIOBase can be used as near drop-in replacements to save memory. """ __metaclass__ = ABCMeta def __init__(self, max_size=5000000, dir=None): self._max_size = max_size self._dir = dir def _checkClosed(self, msg=None): """Raise a ValueError if file is closed""" if self.closed: raise ValueError('I/O operation on closed file.' if msg is None else msg) @abstractmethod def read(self, n=-1): """Read n characters from the buffer""" @abstractmethod def write(self, s): """Write into the buffer""" @abstractmethod def seek(self, pos, mode=0): """Seek to a specific point in a file""" @abstractmethod def readline(self, length=None): """Returns the next available line""" @abstractmethod def readlines(self, sizehint=0): """Returns a list of all lines from the current position forward""" def writelines(self, lines): """ Write lines to the file from an interable. NOTE: writelines() does NOT add line separators. """ self._checkClosed() for line in lines: self.write(line) @abstractmethod def rollover(self): """Roll file-like-object over into a real temporary file""" @abstractmethod def tell(self): """Return the current position""" @abstractproperty def buffer(self): """Should return a flo instance""" @abstractproperty def _rolled(self): """Returns whether the file has been rolled to a real file or not""" @abstractproperty def len(self): """Returns the length of the data""" def _get_softspace(self): return self.buffer.softspace def _set_softspace(self, val): self.buffer.softspace = val softspace = property(_get_softspace, _set_softspace) @property def _file(self): return self.buffer def close(self): return self.buffer.close() def flush(self): self._checkClosed() return self.buffer.flush() def isatty(self): self._checkClosed() return self.buffer.isatty() @property def closed(self): return self.buffer.closed @property def pos(self): return self.tell() @property def buf(self): return self.getvalue() def fileno(self): self.rollover() return self.buffer.fileno() def truncate(self, size=None): """ Truncate the contents of the buffer. Custom version of truncate that takes either no arguments (like the real SpooledTemporaryFile) or a single argument that truncates the value to a certain index location. """ self._checkClosed() if size is None: return self.buffer.truncate() if size < 0: raise OSError(EINVAL, "Negative size not allowed") # Emulate truncation to a particular location pos = self.tell() self.seek(size) self.buffer.truncate() if pos < size: self.seek(pos) def getvalue(self): """Return the entire files contents.""" self._checkClosed() pos = self.tell() self.seek(0) val = self.read() self.seek(pos) return val def seekable(self): return True def readable(self): return True def writable(self): return True def __next__(self): self._checkClosed() line = self.readline() if not line: pos = self.buffer.tell() self.buffer.seek(0, os.SEEK_END) if pos == self.buffer.tell(): raise StopIteration else: self.buffer.seek(pos) return line next = __next__ def __len__(self): return self.len def __iter__(self): self._checkClosed() return self def __enter__(self): self._checkClosed() return self def __exit__(self, *args): self._file.close() def __eq__(self, other): if isinstance(other, self.__class__): self_pos = self.tell() other_pos = other.tell() try: self.seek(0) other.seek(0) eq = True for self_line, other_line in zip_longest(self, other): if self_line != other_line: eq = False break self.seek(self_pos) other.seek(other_pos) except Exception: # Attempt to return files to original position if there were any errors try: self.seek(self_pos) except Exception: pass try: other.seek(other_pos) except Exception: pass raise else: return eq return False def __ne__(self, other): return not self.__eq__(other) def __bool__(self): return True def __del__(self): """Can fail when called at program exit so suppress traceback.""" try: self.close() except Exception: pass class SpooledBytesIO(SpooledIOBase): """ SpooledBytesIO is a spooled file-like-object that only accepts bytes. On Python 2.x this means the 'str' type; on Python 3.x this means the 'bytes' type. Bytes are written in and retrieved exactly as given, but it will raise TypeErrors if something other than bytes are written. Example:: >>> from boltons import ioutils >>> with ioutils.SpooledBytesIO() as f: ... f.write(b"Happy IO") ... _ = f.seek(0) ... isinstance(f.getvalue(), bytes) True """ def read(self, n=-1): self._checkClosed() return self.buffer.read(n) def write(self, s): self._checkClosed() if not isinstance(s, bytes): raise TypeError("bytes expected, got {}".format( type(s).__name__ )) if self.tell() + len(s) >= self._max_size: self.rollover() self.buffer.write(s) def seek(self, pos, mode=0): self._checkClosed() return self.buffer.seek(pos, mode) def readline(self, length=None): self._checkClosed() if length: return self.buffer.readline(length) else: return self.buffer.readline() def readlines(self, sizehint=0): return self.buffer.readlines(sizehint) def rollover(self): """Roll the StringIO over to a TempFile""" if not self._rolled: tmp = TemporaryFile(dir=self._dir) pos = self.buffer.tell() tmp.write(self.buffer.getvalue()) tmp.seek(pos) self.buffer.close() self._buffer = tmp @property def _rolled(self): return not isinstance(self.buffer, BytesIO) @property def buffer(self): try: return self._buffer except AttributeError: self._buffer = BytesIO() return self._buffer @property def len(self): """Determine the length of the file""" pos = self.tell() if self._rolled: self.seek(0) val = os.fstat(self.fileno()).st_size else: self.seek(0, os.SEEK_END) val = self.tell() self.seek(pos) return val def tell(self): self._checkClosed() return self.buffer.tell() class SpooledStringIO(SpooledIOBase): """ SpooledStringIO is a spooled file-like-object that only accepts unicode values. On Python 2.x this means the 'unicode' type and on Python 3.x this means the 'str' type. Values are accepted as unicode and then coerced into utf-8 encoded bytes for storage. On retrieval, the values are returned as unicode. Example:: >>> from boltons import ioutils >>> with ioutils.SpooledStringIO() as f: ... f.write(u"\u2014 Hey, an emdash!") ... _ = f.seek(0) ... isinstance(f.read(), str) True """ def __init__(self, *args, **kwargs): self._tell = 0 super().__init__(*args, **kwargs) def read(self, n=-1): self._checkClosed() ret = self.buffer.reader.read(n, n) self._tell = self.tell() + len(ret) return ret def write(self, s): self._checkClosed() if not isinstance(s, str): raise TypeError("str expected, got {}".format( type(s).__name__ )) current_pos = self.tell() if self.buffer.tell() + len(s.encode('utf-8')) >= self._max_size: self.rollover() self.buffer.write(s.encode('utf-8')) self._tell = current_pos + len(s) def _traverse_codepoints(self, current_position, n): """Traverse from current position to the right n codepoints""" dest = current_position + n while True: if current_position == dest: # By chance we've landed on the right position, break break # If the read would take us past the intended position then # seek only enough to cover the offset if current_position + READ_CHUNK_SIZE > dest: self.read(dest - current_position) break else: ret = self.read(READ_CHUNK_SIZE) # Increment our current position current_position += READ_CHUNK_SIZE # If we kept reading but there was nothing here, break # as we are at the end of the file if not ret: break return dest def seek(self, pos, mode=0): """Traverse from offset to the specified codepoint""" self._checkClosed() # Seek to position from the start of the file if mode == os.SEEK_SET: self.buffer.seek(0) self._traverse_codepoints(0, pos) self._tell = pos # Seek to new position relative to current position elif mode == os.SEEK_CUR: start_pos = self.tell() self._traverse_codepoints(self.tell(), pos) self._tell = start_pos + pos elif mode == os.SEEK_END: self.buffer.seek(0) dest_position = self.len - pos self._traverse_codepoints(0, dest_position) self._tell = dest_position else: raise ValueError( f"Invalid whence ({mode}, should be 0, 1, or 2)" ) return self.tell() def readline(self, length=None): self._checkClosed() ret = self.buffer.readline(length).decode('utf-8') self._tell = self.tell() + len(ret) return ret def readlines(self, sizehint=0): ret = [x.decode('utf-8') for x in self.buffer.readlines(sizehint)] self._tell = self.tell() + sum(len(x) for x in ret) return ret @property def buffer(self): try: return self._buffer except AttributeError: self._buffer = EncodedFile(BytesIO(), data_encoding='utf-8') return self._buffer @property def _rolled(self): return not isinstance(self.buffer.stream, BytesIO) def rollover(self): """Roll the buffer over to a TempFile""" if not self._rolled: tmp = EncodedFile(TemporaryFile(dir=self._dir), data_encoding='utf-8') pos = self.buffer.tell() tmp.write(self.buffer.getvalue()) tmp.seek(pos) self.buffer.close() self._buffer = tmp def tell(self): """Return the codepoint position""" self._checkClosed() return self._tell @property def len(self): """Determine the number of codepoints in the file""" pos = self.buffer.tell() self.buffer.seek(0) total = 0 while True: ret = self.read(READ_CHUNK_SIZE) if not ret: break total += len(ret) self.buffer.seek(pos) return total def is_text_fileobj(fileobj): if getattr(fileobj, 'encoding', False): # codecs.open and io.TextIOBase return True if getattr(fileobj, 'getvalue', False): # StringIO.StringIO / io.StringIO try: if isinstance(fileobj.getvalue(), str): return True except Exception: pass return False class MultiFileReader: """Takes a list of open files or file-like objects and provides an interface to read from them all contiguously. Like :func:`itertools.chain()`, but for reading files. >>> mfr = MultiFileReader(BytesIO(b'ab'), BytesIO(b'cd'), BytesIO(b'e')) >>> mfr.read(3).decode('ascii') u'abc' >>> mfr.read(3).decode('ascii') u'de' The constructor takes as many fileobjs as you hand it, and will raise a TypeError on non-file-like objects. A ValueError is raised when file-like objects are a mix of bytes- and text-handling objects (for instance, BytesIO and StringIO). """ def __init__(self, *fileobjs): if not all([callable(getattr(f, 'read', None)) and callable(getattr(f, 'seek', None)) for f in fileobjs]): raise TypeError('MultiFileReader expected file-like objects' ' with .read() and .seek()') if all([is_text_fileobj(f) for f in fileobjs]): # codecs.open and io.TextIOBase self._joiner = '' elif any([is_text_fileobj(f) for f in fileobjs]): raise ValueError('All arguments to MultiFileReader must handle' ' bytes OR text, not a mix') else: # open/file and io.BytesIO self._joiner = b'' self._fileobjs = fileobjs self._index = 0 def read(self, amt=None): """Read up to the specified *amt*, seamlessly bridging across files. Returns the appropriate type of string (bytes or text) for the input, and returns an empty string when the files are exhausted. """ if not amt: return self._joiner.join(f.read() for f in self._fileobjs) parts = [] while amt > 0 and self._index < len(self._fileobjs): parts.append(self._fileobjs[self._index].read(amt)) got = len(parts[-1]) if got < amt: self._index += 1 amt -= got return self._joiner.join(parts) def seek(self, offset, whence=os.SEEK_SET): """Enables setting position of the file cursor to a given *offset*. Currently only supports ``offset=0``. """ if whence != os.SEEK_SET: raise NotImplementedError( 'MultiFileReader.seek() only supports os.SEEK_SET') if offset != 0: raise NotImplementedError( 'MultiFileReader only supports seeking to start at this time') for f in self._fileobjs: f.seek(0) boltons-25.0.0/boltons/iterutils.py000066400000000000000000001603321475005545200173500ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """:mod:`itertools` is full of great examples of Python generator usage. However, there are still some critical gaps. ``iterutils`` fills many of those gaps with featureful, tested, and Pythonic solutions. Many of the functions below have two versions, one which returns an iterator (denoted by the ``*_iter`` naming pattern), and a shorter-named convenience form that returns a list. Some of the following are based on examples in itertools docs. """ import os import math import time import codecs import random import itertools from itertools import zip_longest from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable try: from .typeutils import make_sentinel _UNSET = make_sentinel('_UNSET') _REMAP_EXIT = make_sentinel('_REMAP_EXIT') except ImportError: _REMAP_EXIT = object() _UNSET = object() def is_iterable(obj): """Similar in nature to :func:`callable`, ``is_iterable`` returns ``True`` if an object is `iterable`_, ``False`` if not. >>> is_iterable([]) True >>> is_iterable(object()) False .. _iterable: https://docs.python.org/2/glossary.html#term-iterable """ try: iter(obj) except TypeError: return False return True def is_scalar(obj): """A near-mirror of :func:`is_iterable`. Returns ``False`` if an object is an iterable container type. Strings are considered scalar as well, because strings are more often treated as whole values as opposed to iterables of 1-character substrings. >>> is_scalar(object()) True >>> is_scalar(range(10)) False >>> is_scalar('hello') True """ return not is_iterable(obj) or isinstance(obj, (str, bytes)) def is_collection(obj): """The opposite of :func:`is_scalar`. Returns ``True`` if an object is an iterable other than a string. >>> is_collection(object()) False >>> is_collection(range(10)) True >>> is_collection('hello') False """ return is_iterable(obj) and not isinstance(obj, (str, bytes)) def split(src, sep=None, maxsplit=None): """Splits an iterable based on a separator. Like :meth:`str.split`, but for all iterables. Returns a list of lists. >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None]) [['hi', 'hello'], ['sup'], ['soap']] See :func:`split_iter` docs for more info. """ return list(split_iter(src, sep, maxsplit)) def split_iter(src, sep=None, maxsplit=None): """Splits an iterable based on a separator, *sep*, a max of *maxsplit* times (no max by default). *sep* can be: * a single value * an iterable of separators * a single-argument callable that returns True when a separator is encountered ``split_iter()`` yields lists of non-separator values. A separator will never appear in the output. >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None])) [['hi', 'hello'], ['sup'], ['soap']] Note that ``split_iter`` is based on :func:`str.split`, so if *sep* is ``None``, ``split()`` **groups** separators. If empty lists are desired between two contiguous ``None`` values, simply use ``sep=[None]``: >>> list(split_iter(['hi', 'hello', None, None, 'sup', None])) [['hi', 'hello'], ['sup']] >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None])) [['hi', 'hello'], [], ['sup'], []] Using a callable separator: >>> falsy_sep = lambda x: not x >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep)) [['hi', 'hello'], [], ['sup'], []] See :func:`split` for a list-returning version. """ if not is_iterable(src): raise TypeError('expected an iterable') if maxsplit is not None: maxsplit = int(maxsplit) if maxsplit == 0: yield [src] return if callable(sep): sep_func = sep elif not is_scalar(sep): sep = frozenset(sep) def sep_func(x): return x in sep else: def sep_func(x): return x == sep cur_group = [] split_count = 0 for s in src: if maxsplit is not None and split_count >= maxsplit: def sep_func(x): return False if sep_func(s): if sep is None and not cur_group: # If sep is none, str.split() "groups" separators # check the str.split() docs for more info continue split_count += 1 yield cur_group cur_group = [] else: cur_group.append(s) if cur_group or sep is not None: yield cur_group return def lstrip(iterable, strip_value=None): """Strips values from the beginning of an iterable. Stripped items will match the value of the argument strip_value. Functionality is analogous to that of the method str.lstrip. Returns a list. >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo') ['Bar', 'Bam'] """ return list(lstrip_iter(iterable, strip_value)) def lstrip_iter(iterable, strip_value=None): """Strips values from the beginning of an iterable. Stripped items will match the value of the argument strip_value. Functionality is analogous to that of the method str.lstrip. Returns a generator. >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo')) ['Bar', 'Bam'] """ iterator = iter(iterable) for i in iterator: if i != strip_value: yield i break for i in iterator: yield i def rstrip(iterable, strip_value=None): """Strips values from the end of an iterable. Stripped items will match the value of the argument strip_value. Functionality is analogous to that of the method str.rstrip. Returns a list. >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam') ['Foo', 'Bar'] """ return list(rstrip_iter(iterable, strip_value)) def rstrip_iter(iterable, strip_value=None): """Strips values from the end of an iterable. Stripped items will match the value of the argument strip_value. Functionality is analogous to that of the method str.rstrip. Returns a generator. >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam')) ['Foo', 'Bar'] """ iterator = iter(iterable) for i in iterator: if i == strip_value: cache = list() cache.append(i) broken = False for i in iterator: if i == strip_value: cache.append(i) else: broken = True break if not broken: # Return to caller here because the end of the return # iterator has been reached yield from cache yield i def strip(iterable, strip_value=None): """Strips values from the beginning and end of an iterable. Stripped items will match the value of the argument strip_value. Functionality is analogous to that of the method str.strip. Returns a list. >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu') ['Foo', 'Bar', 'Bam'] """ return list(strip_iter(iterable, strip_value)) def strip_iter(iterable, strip_value=None): """Strips values from the beginning and end of an iterable. Stripped items will match the value of the argument strip_value. Functionality is analogous to that of the method str.strip. Returns a generator. >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')) ['Foo', 'Bar', 'Bam'] """ return rstrip_iter(lstrip_iter(iterable, strip_value), strip_value) def chunked(src, size, count=None, **kw): """Returns a list of *count* chunks, each with *size* elements, generated from iterable *src*. If *src* is not evenly divisible by *size*, the final chunk will have fewer than *size* elements. Provide the *fill* keyword argument to provide a pad value and enable padding, otherwise no padding will take place. >>> chunked(range(10), 3) [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] >>> chunked(range(10), 3, fill=None) [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]] >>> chunked(range(10), 3, count=2) [[0, 1, 2], [3, 4, 5]] See :func:`chunked_iter` for more info. """ chunk_iter = chunked_iter(src, size, **kw) if count is None: return list(chunk_iter) else: return list(itertools.islice(chunk_iter, count)) def _validate_positive_int(value, name, strictly_positive=True): value = int(value) if value < 0 or (strictly_positive and value == 0): raise ValueError('expected a positive integer ' + name) return value def chunked_iter(src, size, **kw): """Generates *size*-sized chunks from *src* iterable. Unless the optional *fill* keyword argument is provided, iterables not evenly divisible by *size* will have a final chunk that is smaller than *size*. >>> list(chunked_iter(range(10), 3)) [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] >>> list(chunked_iter(range(10), 3, fill=None)) [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]] Note that ``fill=None`` in fact uses ``None`` as the fill value. """ # TODO: add count kwarg? if not is_iterable(src): raise TypeError('expected an iterable') size = _validate_positive_int(size, 'chunk size') do_fill = True try: fill_val = kw.pop('fill') except KeyError: do_fill = False fill_val = None if kw: raise ValueError('got unexpected keyword arguments: %r' % kw.keys()) if not src: return def postprocess(chk): return chk if isinstance(src, (str, bytes)): def postprocess(chk, _sep=type(src)()): return _sep.join(chk) if isinstance(src, bytes): def postprocess(chk): return bytes(chk) src_iter = iter(src) while True: cur_chunk = list(itertools.islice(src_iter, size)) if not cur_chunk: break lc = len(cur_chunk) if lc < size and do_fill: cur_chunk[lc:] = [fill_val] * (size - lc) yield postprocess(cur_chunk) return def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False): """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*. Optionally, a start of the input can be set via *input_offset*, and and overlap between the chunks may be specified via *overlap_size*. Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0* are always at the beginning of the chunk. Returns an iterator of (start, end) tuples, one tuple per chunk. >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5)) [(10, 15), (15, 20)] >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1)) [(10, 15), (14, 19), (18, 20)] >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2)) [(10, 15), (13, 18), (16, 20)] >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False)) [(4, 9), (9, 14), (14, 19)] >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True)) [(4, 5), (5, 10), (10, 15), (15, 19)] >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False)) [(2, 7), (6, 11), (10, 15), (14, 17)] >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True)) [(2, 5), (4, 9), (8, 13), (12, 17)] >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True)) [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)] """ input_size = _validate_positive_int( input_size, 'input_size', strictly_positive=False) chunk_size = _validate_positive_int(chunk_size, 'chunk_size') input_offset = _validate_positive_int( input_offset, 'input_offset', strictly_positive=False) overlap_size = _validate_positive_int( overlap_size, 'overlap_size', strictly_positive=False) input_stop = input_offset + input_size if align: initial_chunk_len = chunk_size - \ input_offset % (chunk_size - overlap_size) if initial_chunk_len != overlap_size: yield (input_offset, min(input_offset + initial_chunk_len, input_stop)) if input_offset + initial_chunk_len >= input_stop: return input_offset = input_offset + initial_chunk_len - overlap_size for i in range(input_offset, input_stop, chunk_size - overlap_size): yield (i, min(i + chunk_size, input_stop)) if i + chunk_size >= input_stop: return def pairwise(src, end=_UNSET): """Convenience function for calling :func:`windowed` on *src*, with *size* set to 2. >>> pairwise(range(5)) [(0, 1), (1, 2), (2, 3), (3, 4)] >>> pairwise([]) [] Unless *end* is set, the number of pairs is always one less than the number of elements in the iterable passed in, except on an empty input, which will return an empty list. With *end* set, a number of pairs equal to the length of *src* is returned, with the last item of the last pair being equal to *end*. >>> list(pairwise(range(3), end=None)) [(0, 1), (1, 2), (2, None)] This way, *end* values can be useful as sentinels to signal the end of the iterable. """ return windowed(src, 2, fill=end) def pairwise_iter(src, end=_UNSET): """Convenience function for calling :func:`windowed_iter` on *src*, with *size* set to 2. >>> list(pairwise_iter(range(5))) [(0, 1), (1, 2), (2, 3), (3, 4)] >>> list(pairwise_iter([])) [] Unless *end* is set, the number of pairs is always one less than the number of elements in the iterable passed in, or zero, when *src* is empty. With *end* set, a number of pairs equal to the length of *src* is returned, with the last item of the last pair being equal to *end*. >>> list(pairwise_iter(range(3), end=None)) [(0, 1), (1, 2), (2, None)] This way, *end* values can be useful as sentinels to signal the end of the iterable. For infinite iterators, setting *end* has no effect. """ return windowed_iter(src, 2, fill=end) def windowed(src, size, fill=_UNSET): """Returns tuples with exactly length *size*. If *fill* is unset and the iterable is too short to make a window of length *size*, no tuples are returned. See :func:`windowed_iter` for more. """ return list(windowed_iter(src, size, fill=fill)) def windowed_iter(src, size, fill=_UNSET): """Returns tuples with length *size* which represent a sliding window over iterable *src*. >>> list(windowed_iter(range(7), 3)) [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)] If *fill* is unset, and the iterable is too short to make a window of length *size*, then no window tuples are returned. >>> list(windowed_iter(range(3), 5)) [] With *fill* set, the iterator always yields a number of windows equal to the length of the *src* iterable. >>> windowed(range(4), 3, fill=None) [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] This way, *fill* values can be useful to signal the end of the iterable. For infinite iterators, setting *fill* has no effect. """ tees = itertools.tee(src, size) if fill is _UNSET: try: for i, t in enumerate(tees): for _ in range(i): next(t) except StopIteration: return zip([]) return zip(*tees) for i, t in enumerate(tees): for _ in range(i): try: next(t) except StopIteration: continue return zip_longest(*tees, fillvalue=fill) def xfrange(stop, start=None, step=1.0): """Same as :func:`frange`, but generator-based instead of returning a list. >>> tuple(xfrange(1, 3, step=0.75)) (1.0, 1.75, 2.5) See :func:`frange` for more details. """ if not step: raise ValueError('step must be non-zero') if start is None: start, stop = 0.0, stop * 1.0 else: # swap when all args are used stop, start = start * 1.0, stop * 1.0 cur = start while cur < stop: yield cur cur += step def frange(stop, start=None, step=1.0): """A :func:`range` clone for float-based ranges. >>> frange(5) [0.0, 1.0, 2.0, 3.0, 4.0] >>> frange(6, step=1.25) [0.0, 1.25, 2.5, 3.75, 5.0] >>> frange(100.5, 101.5, 0.25) [100.5, 100.75, 101.0, 101.25] >>> frange(5, 0) [] >>> frange(5, 0, step=-1.25) [5.0, 3.75, 2.5, 1.25] """ if not step: raise ValueError('step must be non-zero') if start is None: start, stop = 0.0, stop * 1.0 else: # swap when all args are used stop, start = start * 1.0, stop * 1.0 count = int(math.ceil((stop - start) / step)) ret = [None] * count if not ret: return ret ret[0] = start for i in range(1, count): ret[i] = ret[i - 1] + step return ret def backoff(start, stop, count=None, factor=2.0, jitter=False): """Returns a list of geometrically-increasing floating-point numbers, suitable for usage with `exponential backoff`_. Exactly like :func:`backoff_iter`, but without the ``'repeat'`` option for *count*. See :func:`backoff_iter` for more details. .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff >>> backoff(1, 10) [1.0, 2.0, 4.0, 8.0, 10.0] """ if count == 'repeat': raise ValueError("'repeat' supported in backoff_iter, not backoff") return list(backoff_iter(start, stop, count=count, factor=factor, jitter=jitter)) def backoff_iter(start, stop, count=None, factor=2.0, jitter=False): """Generates a sequence of geometrically-increasing floats, suitable for usage with `exponential backoff`_. Starts with *start*, increasing by *factor* until *stop* is reached, optionally stopping iteration once *count* numbers are yielded. *factor* defaults to 2. In general retrying with properly-configured backoff creates a better-behaved component for a larger service ecosystem. .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff >>> list(backoff_iter(1.0, 10.0, count=5)) [1.0, 2.0, 4.0, 8.0, 10.0] >>> list(backoff_iter(1.0, 10.0, count=8)) [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0] >>> list(backoff_iter(0.25, 100.0, factor=10)) [0.25, 2.5, 25.0, 100.0] A simplified usage example: .. code-block:: python for timeout in backoff_iter(0.25, 5.0): try: res = network_call() break except Exception as e: log(e) time.sleep(timeout) An enhancement for large-scale systems would be to add variation, or *jitter*, to timeout values. This is done to avoid a thundering herd on the receiving end of the network call. Finally, for *count*, the special value ``'repeat'`` can be passed to continue yielding indefinitely. Args: start (float): Positive number for baseline. stop (float): Positive number for maximum. count (int): Number of steps before stopping iteration. Defaults to the number of steps between *start* and *stop*. Pass the string, `'repeat'`, to continue iteration indefinitely. factor (float): Rate of exponential increase. Defaults to `2.0`, e.g., `[1, 2, 4, 8, 16]`. jitter (float): A factor between `-1.0` and `1.0`, used to uniformly randomize and thus spread out timeouts in a distributed system, avoiding rhythm effects. Positive values use the base backoff curve as a maximum, negative values use the curve as a minimum. Set to 1.0 or `True` for a jitter approximating Ethernet's time-tested backoff solution. Defaults to `False`. """ start = float(start) stop = float(stop) factor = float(factor) if start < 0.0: raise ValueError('expected start >= 0, not %r' % start) if factor < 1.0: raise ValueError('expected factor >= 1.0, not %r' % factor) if stop == 0.0: raise ValueError('expected stop >= 0') if stop < start: raise ValueError('expected stop >= start, not %r' % stop) if count is None: denom = start if start else 1 count = 1 + math.ceil(math.log(stop/denom, factor)) count = count if start else count + 1 if count != 'repeat' and count < 0: raise ValueError('count must be positive or "repeat", not %r' % count) if jitter: jitter = float(jitter) if not (-1.0 <= jitter <= 1.0): raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter) cur, i = start, 0 while count == 'repeat' or i < count: if not jitter: cur_ret = cur elif jitter: cur_ret = cur - (cur * jitter * random.random()) yield cur_ret i += 1 if cur == 0: cur = 1 elif cur < stop: cur *= factor if cur > stop: cur = stop return def bucketize(src, key=bool, value_transform=None, key_filter=None): """Group values in the *src* iterable by the value returned by *key*. >>> bucketize(range(5)) {False: [0], True: [1, 2, 3, 4]} >>> is_odd = lambda x: x % 2 == 1 >>> bucketize(range(5), is_odd) {False: [0, 2, 4], True: [1, 3]} *key* is :class:`bool` by default, but can either be a callable or a string or a list if it is a string, it is the name of the attribute on which to bucketize objects. >>> bucketize([1+1j, 2+2j, 1, 2], key='real') {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]} if *key* is a list, it contains the buckets where to put each object >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2]) {0: [1, 4], 1: [2], 2: [365, 98]} Value lists are not deduplicated: >>> bucketize([None, None, None, 'hello']) {False: [None, None, None], True: ['hello']} Bucketize into more than 3 groups >>> bucketize(range(10), lambda x: x % 3) {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]} ``bucketize`` has a couple of advanced options useful in certain cases. *value_transform* can be used to modify values as they are added to buckets, and *key_filter* will allow excluding certain buckets from being collected. >>> bucketize(range(5), value_transform=lambda x: x*x) {False: [0], True: [1, 4, 9, 16]} >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1) {0: [0, 3, 6, 9], 2: [2, 5, 8]} Note in some of these examples there were at most two keys, ``True`` and ``False``, and each key present has a list with at least one item. See :func:`partition` for a version specialized for binary use cases. """ if not is_iterable(src): raise TypeError('expected an iterable') elif isinstance(key, list): if len(key) != len(src): raise ValueError("key and src have to be the same length") src = zip(key, src) if isinstance(key, str): def key_func(x): return getattr(x, key, x) elif callable(key): key_func = key elif isinstance(key, list): def key_func(x): return x[0] else: raise TypeError('expected key to be callable or a string or a list') if value_transform is None: def value_transform(x): return x if not callable(value_transform): raise TypeError('expected callable value transform function') if isinstance(key, list): f = value_transform def value_transform(x): return f(x[1]) ret = {} for val in src: key_of_val = key_func(val) if key_filter is None or key_filter(key_of_val): ret.setdefault(key_of_val, []).append(value_transform(val)) return ret def partition(src, key=bool): """No relation to :meth:`str.partition`, ``partition`` is like :func:`bucketize`, but for added convenience returns a tuple of ``(truthy_values, falsy_values)``. >>> nonempty, empty = partition(['', '', 'hi', '', 'bye']) >>> nonempty ['hi', 'bye'] *key* defaults to :class:`bool`, but can be carefully overridden to use either a function that returns either ``True`` or ``False`` or a string name of the attribute on which to partition objects. >>> import string >>> is_digit = lambda x: x in string.digits >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit) >>> ''.join(decimal_digits), ''.join(hexletters) ('0123456789', 'abcdefABCDEF') """ bucketized = bucketize(src, key) return bucketized.get(True, []), bucketized.get(False, []) def unique(src, key=None): """``unique()`` returns a list of unique values, as determined by *key*, in the order they first appeared in the input iterable, *src*. >>> ones_n_zeros = '11010110001010010101010' >>> ''.join(unique(ones_n_zeros)) '10' See :func:`unique_iter` docs for more details. """ return list(unique_iter(src, key)) def unique_iter(src, key=None): """Yield unique elements from the iterable, *src*, based on *key*, in the order in which they first appeared in *src*. >>> repetitious = [1, 2, 3] * 10 >>> list(unique_iter(repetitious)) [1, 2, 3] By default, *key* is the object itself, but *key* can either be a callable or, for convenience, a string name of the attribute on which to uniqueify objects, falling back on identity when the attribute is not present. >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes'] >>> list(unique_iter(pleasantries, key=lambda x: len(x))) ['hi', 'hello', 'bye'] """ if not is_iterable(src): raise TypeError('expected an iterable, not %r' % type(src)) if key is None: def key_func(x): return x elif callable(key): key_func = key elif isinstance(key, str): def key_func(x): return getattr(x, key, x) else: raise TypeError('"key" expected a string or callable, not %r' % key) seen = set() for i in src: k = key_func(i) if k not in seen: seen.add(k) yield i return def redundant(src, key=None, groups=False): """The complement of :func:`unique()`. By default returns non-unique/duplicate values as a list of the *first* redundant value in *src*. Pass ``groups=True`` to get groups of all values with redundancies, ordered by position of the first redundant value. This is useful in conjunction with some normalizing *key* function. >>> redundant([1, 2, 3, 4]) [] >>> redundant([1, 2, 3, 2, 3, 3, 4]) [2, 3] >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True) [[2, 2], [3, 3, 3]] An example using a *key* function to do case-insensitive redundancy detection. >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower) ['Hi'] >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower) [['hi', 'Hi', 'HI']] *key* should also be used when the values in *src* are not hashable. .. note:: This output of this function is designed for reporting duplicates in contexts when a unique input is desired. Due to the grouped return type, there is no streaming equivalent of this function for the time being. """ if key is None: pass elif callable(key): key_func = key elif isinstance(key, (str, bytes)): def key_func(x): return getattr(x, key, x) else: raise TypeError('"key" expected a string or callable, not %r' % key) seen = {} # key to first seen item redundant_order = [] redundant_groups = {} for i in src: k = key_func(i) if key else i if k not in seen: seen[k] = i else: if k in redundant_groups: if groups: redundant_groups[k].append(i) else: redundant_order.append(k) redundant_groups[k] = [seen[k], i] if not groups: ret = [redundant_groups[k][1] for k in redundant_order] else: ret = [redundant_groups[k] for k in redundant_order] return ret def one(src, default=None, key=None): """Along the same lines as builtins, :func:`all` and :func:`any`, and similar to :func:`first`, ``one()`` returns the single object in the given iterable *src* that evaluates to ``True``, as determined by callable *key*. If unset, *key* defaults to :class:`bool`. If no such objects are found, *default* is returned. If *default* is not passed, ``None`` is returned. If *src* has more than one object that evaluates to ``True``, or if there is no object that fulfills such condition, return *default*. It's like an `XOR`_ over an iterable. >>> one((True, False, False)) True >>> one((True, False, True)) >>> one((0, 0, 'a')) 'a' >>> one((0, False, None)) >>> one((True, True), default=False) False >>> bool(one(('', 1))) True >>> one((10, 20, 30, 42), key=lambda i: i > 40) 42 See `Martín Gaitán's original repo`_ for further use cases. .. _Martín Gaitán's original repo: https://github.com/mgaitan/one .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or """ ones = list(itertools.islice(filter(key, src), 2)) return ones[0] if len(ones) == 1 else default def first(iterable, default=None, key=None): """Return first element of *iterable* that evaluates to ``True``, else return ``None`` or optional *default*. Similar to :func:`one`. >>> first([0, False, None, [], (), 42]) 42 >>> first([0, False, None, [], ()]) is None True >>> first([0, False, None, [], ()], default='ohai') 'ohai' >>> import re >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)']) >>> m.group(1) 'bc' The optional *key* argument specifies a one-argument predicate function like that used for *filter()*. The *key* argument, if supplied, should be in keyword form. For example, finding the first even number in an iterable: >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0) 4 Contributed by Hynek Schlawack, author of `the original standalone module`_. .. _the original standalone module: https://github.com/hynek/first """ return next(filter(key, iterable), default) def flatten_iter(iterable): """``flatten_iter()`` yields all the elements from *iterable* while collapsing any nested iterables. >>> nested = [[1, 2], [[3], [4, 5]]] >>> list(flatten_iter(nested)) [1, 2, 3, 4, 5] """ for item in iterable: if isinstance(item, Iterable) and not isinstance(item, (str, bytes)): yield from flatten_iter(item) else: yield item def flatten(iterable): """``flatten()`` returns a collapsed list of all the elements from *iterable* while collapsing any nested iterables. >>> nested = [[1, 2], [[3], [4, 5]]] >>> flatten(nested) [1, 2, 3, 4, 5] """ return list(flatten_iter(iterable)) def same(iterable, ref=_UNSET): """``same()`` returns ``True`` when all values in *iterable* are equal to one another, or optionally a reference value, *ref*. Similar to :func:`all` and :func:`any` in that it evaluates an iterable and returns a :class:`bool`. ``same()`` returns ``True`` for empty iterables. >>> same([]) True >>> same([1]) True >>> same(['a', 'a', 'a']) True >>> same(range(20)) False >>> same([[], []]) True >>> same([[], []], ref='test') False """ iterator = iter(iterable) if ref is _UNSET: ref = next(iterator, ref) return all(val == ref for val in iterator) def default_visit(path, key, value): # print('visit(%r, %r, %r)' % (path, key, value)) return key, value # enable the extreme: monkeypatching iterutils with a different default_visit _orig_default_visit = default_visit def default_enter(path, key, value): # print('enter(%r, %r)' % (key, value)) if isinstance(value, (str, bytes)): return value, False elif isinstance(value, Mapping): return value.__class__(), ItemsView(value) elif isinstance(value, Sequence): return value.__class__(), enumerate(value) elif isinstance(value, Set): return value.__class__(), enumerate(value) else: # files, strings, other iterables, and scalars are not # traversed return value, False def default_exit(path, key, old_parent, new_parent, new_items): # print('exit(%r, %r, %r, %r, %r)' # % (path, key, old_parent, new_parent, new_items)) ret = new_parent if isinstance(new_parent, Mapping): new_parent.update(new_items) elif isinstance(new_parent, Sequence): vals = [v for i, v in new_items] try: new_parent.extend(vals) except AttributeError: ret = new_parent.__class__(vals) # tuples elif isinstance(new_parent, Set): vals = [v for i, v in new_items] try: new_parent.update(vals) except AttributeError: ret = new_parent.__class__(vals) # frozensets else: raise RuntimeError('unexpected iterable type: %r' % type(new_parent)) return ret def remap(root, visit=default_visit, enter=default_enter, exit=default_exit, **kwargs): """The remap ("recursive map") function is used to traverse and transform nested structures. Lists, tuples, sets, and dictionaries are just a few of the data structures nested into heterogeneous tree-like structures that are so common in programming. Unfortunately, Python's built-in ways to manipulate collections are almost all flat. List comprehensions may be fast and succinct, but they do not recurse, making it tedious to apply quick changes or complex transforms to real-world data. remap goes where list comprehensions cannot. Here's an example of removing all Nones from some data: >>> from pprint import pprint >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None}, ... 'Babylon 5': 6, 'Dr. Who': None} >>> pprint(remap(reviews, lambda p, k, v: v is not None)) {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}} Notice how both Nones have been removed despite the nesting in the dictionary. Not bad for a one-liner, and that's just the beginning. See `this remap cookbook`_ for more delicious recipes. .. _this remap cookbook: http://sedimental.org/remap.html remap takes four main arguments: the object to traverse and three optional callables which determine how the remapped object will be created. Args: root: The target object to traverse. By default, remap supports iterables like :class:`list`, :class:`tuple`, :class:`dict`, and :class:`set`, but any object traversable by *enter* will work. visit (callable): This function is called on every item in *root*. It must accept three positional arguments, *path*, *key*, and *value*. *path* is simply a tuple of parents' keys. *visit* should return the new key-value pair. It may also return ``True`` as shorthand to keep the old item unmodified, or ``False`` to drop the item from the new structure. *visit* is called after *enter*, on the new parent. The *visit* function is called for every item in root, including duplicate items. For traversable values, it is called on the new parent object, after all its children have been visited. The default visit behavior simply returns the key-value pair unmodified. enter (callable): This function controls which items in *root* are traversed. It accepts the same arguments as *visit*: the path, the key, and the value of the current item. It returns a pair of the blank new parent, and an iterator over the items which should be visited. If ``False`` is returned instead of an iterator, the value will not be traversed. The *enter* function is only called once per unique value. The default enter behavior support mappings, sequences, and sets. Strings and all other iterables will not be traversed. exit (callable): This function determines how to handle items once they have been visited. It gets the same three arguments as the other functions -- *path*, *key*, *value* -- plus two more: the blank new parent object returned from *enter*, and a list of the new items, as remapped by *visit*. Like *enter*, the *exit* function is only called once per unique value. The default exit behavior is to simply add all new items to the new parent, e.g., using :meth:`list.extend` and :meth:`dict.update` to add to the new parent. Immutable objects, such as a :class:`tuple` or :class:`namedtuple`, must be recreated from scratch, but use the same type as the new parent passed back from the *enter* function. reraise_visit (bool): A pragmatic convenience for the *visit* callable. When set to ``False``, remap ignores any errors raised by the *visit* callback. Items causing exceptions are kept. See examples for more details. trace (bool): Pass ``trace=True`` to print out the entire traversal. Or pass a tuple of ``'visit'``, ``'enter'``, or ``'exit'`` to print only the selected events. remap is designed to cover the majority of cases with just the *visit* callable. While passing in multiple callables is very empowering, remap is designed so very few cases should require passing more than one function. When passing *enter* and *exit*, it's common and easiest to build on the default behavior. Simply add ``from boltons.iterutils import default_enter`` (or ``default_exit``), and have your enter/exit function call the default behavior before or after your custom logic. See `this example`_. Duplicate and self-referential objects (aka reference loops) are automatically handled internally, `as shown here`_. .. _this example: http://sedimental.org/remap.html#sort_all_lists .. _as shown here: http://sedimental.org/remap.html#corner_cases """ # TODO: improve argument formatting in sphinx doc # TODO: enter() return (False, items) to continue traverse but cancel copy? if not callable(visit): raise TypeError('visit expected callable, not: %r' % visit) if not callable(enter): raise TypeError('enter expected callable, not: %r' % enter) if not callable(exit): raise TypeError('exit expected callable, not: %r' % exit) reraise_visit = kwargs.pop('reraise_visit', True) trace = kwargs.pop('trace', ()) if trace is True: trace = ('visit', 'enter', 'exit') elif isinstance(trace, str): trace = (trace,) if not isinstance(trace, (tuple, list, set)): raise TypeError('trace expected tuple of event names, not: %r' % trace) trace_enter, trace_exit, trace_visit = 'enter' in trace, 'exit' in trace, 'visit' in trace if kwargs: raise TypeError('unexpected keyword arguments: %r' % kwargs.keys()) path, registry, stack = (), {}, [(None, root)] new_items_stack = [] while stack: key, value = stack.pop() id_value = id(value) if key is _REMAP_EXIT: key, new_parent, old_parent = value id_value = id(old_parent) path, new_items = new_items_stack.pop() if trace_exit: print(' .. remap exit:', path, '-', key, '-', old_parent, '-', new_parent, '-', new_items) value = exit(path, key, old_parent, new_parent, new_items) if trace_exit: print(' .. remap exit result:', value) registry[id_value] = value if not new_items_stack: continue elif id_value in registry: value = registry[id_value] else: if trace_enter: print(' .. remap enter:', path, '-', key, '-', value) res = enter(path, key, value) if trace_enter: print(' .. remap enter result:', res) try: new_parent, new_items = res except TypeError: # TODO: handle False? raise TypeError('enter should return a tuple of (new_parent,' ' items_iterator), not: %r' % res) if new_items is not False: # traverse unless False is explicitly passed registry[id_value] = new_parent new_items_stack.append((path, [])) if value is not root: path += (key,) stack.append((_REMAP_EXIT, (key, new_parent, value))) if new_items: stack.extend(reversed(list(new_items))) if trace_enter: print(' .. remap stack size now:', len(stack)) continue if visit is _orig_default_visit: # avoid function call overhead by inlining identity operation visited_item = (key, value) else: try: if trace_visit: print(' .. remap visit:', path, '-', key, '-', value) visited_item = visit(path, key, value) except Exception: if reraise_visit: raise visited_item = True if visited_item is False: if trace_visit: print(' .. remap visit result: ') continue # drop elif visited_item is True: visited_item = (key, value) if trace_visit: print(' .. remap visit result:', visited_item) # TODO: typecheck? # raise TypeError('expected (key, value) from visit(),' # ' not: %r' % visited_item) try: new_items_stack[-1][1].append(visited_item) except IndexError: raise TypeError('expected remappable root, not: %r' % root) return value class PathAccessError(KeyError, IndexError, TypeError): """An amalgamation of KeyError, IndexError, and TypeError, representing what can occur when looking up a path in a nested object. """ def __init__(self, exc, seg, path): self.exc = exc self.seg = seg self.path = path def __repr__(self): cn = self.__class__.__name__ return f'{cn}({self.exc!r}, {self.seg!r}, {self.path!r})' def __str__(self): return ('could not access %r from path %r, got error: %r' % (self.seg, self.path, self.exc)) def get_path(root, path, default=_UNSET): """Retrieve a value from a nested object via a tuple representing the lookup path. >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}} >>> get_path(root, ('a', 'b', 'c', 2, 0)) 3 The path tuple format is intentionally consistent with that of :func:`remap`, but a single dotted string can also be passed. One of get_path's chief aims is improved error messaging. EAFP is great, but the error messages are not. For instance, ``root['a']['b']['c'][2][1]`` gives back ``IndexError: list index out of range`` What went out of range where? get_path currently raises ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2, 1), got error: IndexError('list index out of range',)``, a subclass of IndexError and KeyError. You can also pass a default that covers the entire operation, should the lookup fail at any level. Args: root: The target nesting of dictionaries, lists, or other objects supporting ``__getitem__``. path (tuple): A sequence of strings and integers to be successively looked up within *root*. A dot-separated (``a.b``) string may also be passed. default: The value to be returned should any ``PathAccessError`` exceptions be raised. """ if isinstance(path, str): path = path.split('.') cur = root try: for seg in path: try: cur = cur[seg] except (KeyError, IndexError) as exc: raise PathAccessError(exc, seg, path) except TypeError as exc: # either string index in a list, or a parent that # doesn't support indexing try: seg = int(seg) cur = cur[seg] except (ValueError, KeyError, IndexError, TypeError): if not is_iterable(cur): exc = TypeError('%r object is not indexable' % type(cur).__name__) raise PathAccessError(exc, seg, path) except PathAccessError: if default is _UNSET: raise return default return cur def research(root, query=lambda p, k, v: True, reraise=False, enter=default_enter): """The :func:`research` function uses :func:`remap` to recurse over any data nested in *root*, and find values which match a given criterion, specified by the *query* callable. Results are returned as a list of ``(path, value)`` pairs. The paths are tuples in the same format accepted by :func:`get_path`. This can be useful for comparing values nested in two or more different structures. Here's a simple example that finds all integers: >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None} >>> res = research(root, query=lambda p, k, v: isinstance(v, int)) >>> print(sorted(res)) [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)] Note how *query* follows the same, familiar ``path, key, value`` signature as the ``visit`` and ``enter`` functions on :func:`remap`, and returns a :class:`bool`. Args: root: The target object to search. Supports the same types of objects as :func:`remap`, including :class:`list`, :class:`tuple`, :class:`dict`, and :class:`set`. query (callable): The function called on every object to determine whether to include it in the search results. The callable must accept three arguments, *path*, *key*, and *value*, commonly abbreviated *p*, *k*, and *v*, same as *enter* and *visit* from :func:`remap`. reraise (bool): Whether to reraise exceptions raised by *query* or to simply drop the result that caused the error. With :func:`research` it's easy to inspect the details of a data structure, like finding values that are at a certain depth (using ``len(p)``) and much more. If more advanced functionality is needed, check out the code and make your own :func:`remap` wrapper, and consider `submitting a patch`_! .. _submitting a patch: https://github.com/mahmoud/boltons/pulls """ ret = [] if not callable(query): raise TypeError('query expected callable, not: %r' % query) def _enter(path, key, value): try: if query(path, key, value): ret.append((path + (key,), value)) except Exception: if reraise: raise return enter(path, key, value) remap(root, enter=_enter) return ret # TODO: recollect() # TODO: refilter() # TODO: reiter() # GUID iterators: 10x faster and somewhat more compact than uuid. class GUIDerator: """The GUIDerator is an iterator that yields a globally-unique identifier (GUID) on every iteration. The GUIDs produced are hexadecimal strings. Testing shows it to be around 12x faster than the uuid module. By default it is also more compact, partly due to its default 96-bit (24-hexdigit) length. 96 bits of randomness means that there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more or less uniqueness is desired, the *size* argument can be adjusted accordingly. Args: size (int): character length of the GUID, defaults to 24. Lengths between 20 and 36 are considered valid. The GUIDerator has built-in fork protection that causes it to detect a fork on next iteration and reseed accordingly. """ def __init__(self, size=24): self.size = size if size < 20 or size > 36: raise ValueError('expected 20 < size <= 36') import hashlib self._sha1 = hashlib.sha1 self.count = itertools.count() self.reseed() def reseed(self): import socket self.pid = os.getpid() self.salt = '-'.join([str(self.pid), socket.gethostname() or '', str(time.time()), os.urandom(6).hex()]) return def __iter__(self): return self def __next__(self): if os.getpid() != self.pid: self.reseed() target_bytes = (self.salt + str(next(self.count))).encode('utf8') hash_text = self._sha1(target_bytes).hexdigest()[:self.size] return hash_text next = __next__ class SequentialGUIDerator(GUIDerator): """Much like the standard GUIDerator, the SequentialGUIDerator is an iterator that yields a globally-unique identifier (GUID) on every iteration. The GUIDs produced are hexadecimal strings. The SequentialGUIDerator differs in that it picks a starting GUID value and increments every iteration. This yields GUIDs which are of course unique, but also ordered and lexicographically sortable. The SequentialGUIDerator is around 50% faster than the normal GUIDerator, making it almost 20x as fast as the built-in uuid module. By default it is also more compact, partly due to its 96-bit (24-hexdigit) default length. 96 bits of randomness means that there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more or less uniqueness is desired, the *size* argument can be adjusted accordingly. Args: size (int): character length of the GUID, defaults to 24. Note that with SequentialGUIDerator there is a chance of GUIDs growing larger than the size configured. The SequentialGUIDerator has built-in fork protection that causes it to detect a fork on next iteration and reseed accordingly. """ def reseed(self): super().reseed() start_str = self._sha1(self.salt.encode('utf8')).hexdigest() self.start = int(start_str[:self.size], 16) self.start |= (1 << ((self.size * 4) - 2)) def __next__(self): if os.getpid() != self.pid: self.reseed() return '%x' % (next(self.count) + self.start) next = __next__ guid_iter = GUIDerator() seq_guid_iter = SequentialGUIDerator() def soft_sorted(iterable, first=None, last=None, key=None, reverse=False): """For when you care about the order of some elements, but not about others. Use this to float to the top and/or sink to the bottom a specific ordering, while sorting the rest of the elements according to normal :func:`sorted` rules. >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two']) ['one', 'two', 'a', 'b'] >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True) [6, 5, 3, 1, 0, 2, 4] >>> import string >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower)) 'aA1023456789cCdDeEfFbB' Args: iterable (list): A list or other iterable to sort. first (list): A sequence to enforce for elements which should appear at the beginning of the returned list. last (list): A sequence to enforce for elements which should appear at the end of the returned list. key (callable): Callable used to generate a comparable key for each item to be sorted, same as the key in :func:`sorted`. Note that entries in *first* and *last* should be the keys for the items. Defaults to passthrough/the identity function. reverse (bool): Whether or not elements not explicitly ordered by *first* and *last* should be in reverse order or not. Returns a new list in sorted order. """ first = first or [] last = last or [] key = key or (lambda x: x) seq = list(iterable) other = [x for x in seq if not ( (first and key(x) in first) or (last and key(x) in last))] other.sort(key=key, reverse=reverse) if first: first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x))) if last: last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x))) return first + other + last def untyped_sorted(iterable, key=None, reverse=False): """A version of :func:`sorted` which will happily sort an iterable of heterogeneous types and return a new list, similar to legacy Python's behavior. >>> untyped_sorted(['abc', 2.0, 1, 2, 'def']) [1, 2.0, 2, 'abc', 'def'] Note how mutually orderable types are sorted as expected, as in the case of the integers and floats above. .. note:: Results may vary across Python versions and builds, but the function will produce a sorted list, except in the case of explicitly unorderable objects. """ class _Wrapper: slots = ('obj',) def __init__(self, obj): self.obj = obj def __lt__(self, other): obj = key(self.obj) if key is not None else self.obj other = key(other.obj) if key is not None else other.obj try: ret = obj < other except TypeError: ret = ((type(obj).__name__, id(type(obj)), obj) < (type(other).__name__, id(type(other)), other)) return ret if key is not None and not callable(key): raise TypeError('expected function or callable object for key, not: %r' % key) return sorted(iterable, key=_Wrapper, reverse=reverse) """ May actually be faster to do an isinstance check for a str path $ python -m timeit -s "x = [1]" "x[0]" 10000000 loops, best of 3: 0.0207 usec per loop $ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass" 10000000 loops, best of 3: 0.029 usec per loop $ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass" 1000000 loops, best of 3: 0.315 usec per loop # setting up try/except is fast, only around 0.01us # actually triggering the exception takes almost 10x as long $ python -m timeit -s "x = [1]" "isinstance(x, basestring)" 10000000 loops, best of 3: 0.141 usec per loop $ python -m timeit -s "x = [1]" "isinstance(x, str)" 10000000 loops, best of 3: 0.131 usec per loop $ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass" 1000000 loops, best of 3: 0.443 usec per loop $ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass" 1000000 loops, best of 3: 0.544 usec per loop """ boltons-25.0.0/boltons/jsonutils.py000066400000000000000000000232101475005545200173470ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """``jsonutils`` aims to provide various helpers for working with JSON. Currently it focuses on providing a reliable and intuitive means of working with `JSON Lines`_-formatted files. .. _JSON Lines: http://jsonlines.org/ """ import io import os import json DEFAULT_BLOCKSIZE = 4096 __all__ = ['JSONLIterator', 'reverse_iter_lines'] def reverse_iter_lines(file_obj, blocksize=DEFAULT_BLOCKSIZE, preseek=True, encoding=None): """Returns an iterator over the lines from a file object, in reverse order, i.e., last line first, first line last. Uses the :meth:`file.seek` method of file objects, and is tested compatible with :class:`file` objects, as well as :class:`StringIO.StringIO`. Args: file_obj (file): An open file object. Note that ``reverse_iter_lines`` mutably reads from the file and other functions should not mutably interact with the file object after being passed. Files can be opened in bytes or text mode. blocksize (int): The block size to pass to :meth:`file.read()`. Warning: keep this a fairly large multiple of 2, defaults to 4096. preseek (bool): Tells the function whether or not to automatically seek to the end of the file. Defaults to ``True``. ``preseek=False`` is useful in cases when the file cursor is already in position, either at the end of the file or in the middle for relative reverse line generation. """ # This function is a bit of a pain because it attempts to be byte/text agnostic try: encoding = encoding or file_obj.encoding except AttributeError: # BytesIO encoding = None else: encoding = 'utf-8' # need orig_obj to keep alive otherwise __del__ on the TextWrapper will close the file orig_obj = file_obj try: file_obj = orig_obj.detach() except (AttributeError, io.UnsupportedOperation): pass empty_bytes, newline_bytes, empty_text = b'', b'\n', '' if preseek: file_obj.seek(0, os.SEEK_END) buff = empty_bytes cur_pos = file_obj.tell() while 0 < cur_pos: read_size = min(blocksize, cur_pos) cur_pos -= read_size file_obj.seek(cur_pos, os.SEEK_SET) cur = file_obj.read(read_size) buff = cur + buff lines = buff.splitlines() if len(lines) < 2 or lines[0] == empty_bytes: continue if buff[-1:] == newline_bytes: yield empty_text if encoding else empty_bytes for line in lines[:0:-1]: yield line.decode(encoding) if encoding else line buff = lines[0] if buff: yield buff.decode(encoding) if encoding else buff """ TODO: allow passthroughs for: json.load(fp[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, object_pairs_hook[, **kw]]]]]]]]) """ class JSONLIterator: """The ``JSONLIterator`` is used to iterate over JSON-encoded objects stored in the `JSON Lines format`_ (one object per line). Most notably it has the ability to efficiently read from the bottom of files, making it very effective for reading in simple append-only JSONL use cases. It also has the ability to start from anywhere in the file and ignore corrupted lines. Args: file_obj (file): An open file object. ignore_errors (bool): Whether to skip over lines that raise an error on deserialization (:func:`json.loads`). reverse (bool): Controls the direction of the iteration. Defaults to ``False``. If set to ``True`` and *rel_seek* is unset, seeks to the end of the file before iteration begins. rel_seek (float): Used to preseek the start position of iteration. Set to 0.0 for the start of the file, 1.0 for the end, and anything in between. .. _JSON Lines format: http://jsonlines.org/ """ def __init__(self, file_obj, ignore_errors=False, reverse=False, rel_seek=None): self._reverse = bool(reverse) self._file_obj = file_obj self.ignore_errors = ignore_errors if rel_seek is None: if reverse: rel_seek = 1.0 elif not -1.0 < rel_seek < 1.0: raise ValueError("'rel_seek' expected a float between" " -1.0 and 1.0, not %r" % rel_seek) elif rel_seek < 0: rel_seek = 1.0 - rel_seek self._rel_seek = rel_seek self._blocksize = 4096 if rel_seek is not None: self._init_rel_seek() if self._reverse: self._line_iter = reverse_iter_lines(self._file_obj, blocksize=self._blocksize, preseek=False) else: self._line_iter = iter(self._file_obj) @property def cur_byte_pos(self): "A property representing where in the file the iterator is reading." return self._file_obj.tell() def _align_to_newline(self): "Aligns the file object's position to the next newline." fo, bsize = self._file_obj, self._blocksize cur, total_read = '', 0 cur_pos = fo.tell() while '\n' not in cur: cur = fo.read(bsize) total_read += bsize try: newline_offset = cur.index('\n') + total_read - bsize except ValueError: raise # TODO: seek to end? fo.seek(cur_pos + newline_offset) def _init_rel_seek(self): "Sets the file object's position to the relative location set above." rs, fo = self._rel_seek, self._file_obj if rs == 0.0: fo.seek(0, os.SEEK_SET) else: fo.seek(0, os.SEEK_END) size = fo.tell() if rs == 1.0: self._cur_pos = size else: target = int(size * rs) fo.seek(target, os.SEEK_SET) self._align_to_newline() self._cur_pos = fo.tell() def __iter__(self): return self def next(self): """Yields one :class:`dict` loaded with :func:`json.loads`, advancing the file object by one line. Raises :exc:`StopIteration` upon reaching the end of the file (or beginning, if ``reverse`` was set to ``True``. """ while 1: line = next(self._line_iter).lstrip() if not line: continue try: obj = json.loads(line) except Exception: if not self.ignore_errors: raise continue return obj __next__ = next if __name__ == '__main__': def _main(): import sys if '-h' in sys.argv or '--help' in sys.argv: print('loads one or more JSON Line files for basic validation.') return verbose = False if '-v' in sys.argv or '--verbose' in sys.argv: verbose = True file_count, obj_count = 0, 0 filenames = sys.argv[1:] for filename in filenames: if filename in ('-h', '--help', '-v', '--verbose'): continue file_count += 1 with open(filename, 'rb') as file_obj: iterator = JSONLIterator(file_obj) cur_obj_count = 0 while 1: try: next(iterator) except ValueError: print('error reading object #%s around byte %s in %s' % (cur_obj_count + 1, iterator.cur_byte_pos, filename)) return except StopIteration: break obj_count += 1 cur_obj_count += 1 if verbose and obj_count and obj_count % 100 == 0: sys.stdout.write('.') if obj_count % 10000: sys.stdout.write('%s\n' % obj_count) if verbose: print('files checked: %s' % file_count) print('objects loaded: %s' % obj_count) return _main() boltons-25.0.0/boltons/listutils.py000066400000000000000000000274701475005545200173650ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Python's builtin :class:`list` is a very fast and efficient sequence type, but it could be better for certain access patterns, such as non-sequential insertion into a large lists. ``listutils`` provides a pure-Python solution to this problem. For utilities for working with iterables and lists, check out :mod:`iterutils`. For the a :class:`list`-based version of :class:`collections.namedtuple`, check out :mod:`namedutils`. """ import operator from math import log as math_log from itertools import chain, islice try: from .typeutils import make_sentinel _MISSING = make_sentinel(var_name='_MISSING') except ImportError: _MISSING = object() # TODO: expose splaylist? __all__ = ['BList', 'BarrelList'] # TODO: comparators # TODO: keep track of list lengths and bisect to the right list for # faster getitem (and slightly slower setitem and delitem ops) class BarrelList(list): """The ``BarrelList`` is a :class:`list` subtype backed by many dynamically-scaled sublists, to provide better scaling and random insertion/deletion characteristics. It is a subtype of the builtin :class:`list` and has an identical API, supporting indexing, slicing, sorting, etc. If application requirements call for something more performant, consider the `blist module available on PyPI`_. The name comes by way of Kurt Rose, who said it reminded him of barrel shifters. Not sure how, but it's BList-like, so the name stuck. BList is of course a reference to `B-trees`_. Args: iterable: An optional iterable of initial values for the list. >>> blist = BList(range(100000)) >>> blist.pop(50000) 50000 >>> len(blist) 99999 >>> len(blist.lists) # how many underlying lists 8 >>> slice_idx = blist.lists[0][-1] >>> blist[slice_idx:slice_idx + 2] BarrelList([11637, 11638]) Slicing is supported and works just fine across list borders, returning another instance of the BarrelList. .. _blist module available on PyPI: https://pypi.python.org/pypi/blist .. _B-trees: https://en.wikipedia.org/wiki/B-tree """ _size_factor = 1520 "This size factor is the result of tuning using the tune() function below." def __init__(self, iterable=None): self.lists = [[]] if iterable: self.extend(iterable) @property def _cur_size_limit(self): len_self, size_factor = len(self), self._size_factor return int(round(size_factor * math_log(len_self + 2, 2))) def _translate_index(self, index): if index < 0: index += len(self) rel_idx, lists = index, self.lists for list_idx in range(len(lists)): len_list = len(lists[list_idx]) if rel_idx < len_list: break rel_idx -= len_list if rel_idx < 0: return None, None return list_idx, rel_idx def _balance_list(self, list_idx): if list_idx < 0: list_idx += len(self.lists) cur_list, len_self = self.lists[list_idx], len(self) size_limit = self._cur_size_limit if len(cur_list) > size_limit: half_limit = size_limit // 2 while len(cur_list) > half_limit: next_list_idx = list_idx + 1 self.lists.insert(next_list_idx, cur_list[-half_limit:]) del cur_list[-half_limit:] return True return False def insert(self, index, item): if len(self.lists) == 1: self.lists[0].insert(index, item) self._balance_list(0) else: list_idx, rel_idx = self._translate_index(index) if list_idx is None: raise IndexError() self.lists[list_idx].insert(rel_idx, item) self._balance_list(list_idx) return def append(self, item): self.lists[-1].append(item) def extend(self, iterable): self.lists[-1].extend(iterable) def pop(self, *a): lists = self.lists if len(lists) == 1 and not a: return self.lists[0].pop() index = a and a[0] if index == () or index is None or index == -1: ret = lists[-1].pop() if len(lists) > 1 and not lists[-1]: lists.pop() else: list_idx, rel_idx = self._translate_index(index) if list_idx is None: raise IndexError() ret = lists[list_idx].pop(rel_idx) self._balance_list(list_idx) return ret def iter_slice(self, start, stop, step=None): iterable = self # TODO: optimization opportunities abound # start_list_idx, stop_list_idx = 0, len(self.lists) if start is None: start = 0 if stop is None: stop = len(self) if step is not None and step < 0: step = -step start, stop = -start, -stop - 1 iterable = reversed(self) if start < 0: start += len(self) # start_list_idx, start_rel_idx = self._translate_index(start) if stop < 0: stop += len(self) # stop_list_idx, stop_rel_idx = self._translate_index(stop) return islice(iterable, start, stop, step) def del_slice(self, start, stop, step=None): if step is not None and abs(step) > 1: # punt new_list = chain(self.iter_slice(0, start, step), self.iter_slice(stop, None, step)) self.lists[0][:] = new_list self._balance_list(0) return if start is None: start = 0 if stop is None: stop = len(self) start_list_idx, start_rel_idx = self._translate_index(start) stop_list_idx, stop_rel_idx = self._translate_index(stop) if start_list_idx is None: raise IndexError() if stop_list_idx is None: raise IndexError() if start_list_idx == stop_list_idx: del self.lists[start_list_idx][start_rel_idx:stop_rel_idx] elif start_list_idx < stop_list_idx: del self.lists[start_list_idx + 1:stop_list_idx] del self.lists[start_list_idx][start_rel_idx:] del self.lists[stop_list_idx][:stop_rel_idx] else: assert False, ('start list index should never translate to' ' greater than stop list index') __delslice__ = del_slice @classmethod def from_iterable(cls, it): return cls(it) def __iter__(self): return chain.from_iterable(self.lists) def __reversed__(self): return chain.from_iterable(reversed(l) for l in reversed(self.lists)) def __len__(self): return sum([len(l) for l in self.lists]) def __contains__(self, item): for cur in self.lists: if item in cur: return True return False def __getitem__(self, index): try: start, stop, step = index.start, index.stop, index.step except AttributeError: index = operator.index(index) else: iter_slice = self.iter_slice(start, stop, step) ret = self.from_iterable(iter_slice) return ret list_idx, rel_idx = self._translate_index(index) if list_idx is None: raise IndexError() return self.lists[list_idx][rel_idx] def __delitem__(self, index): try: start, stop, step = index.start, index.stop, index.step except AttributeError: index = operator.index(index) else: self.del_slice(start, stop, step) return list_idx, rel_idx = self._translate_index(index) if list_idx is None: raise IndexError() del self.lists[list_idx][rel_idx] def __setitem__(self, index, item): try: start, stop, step = index.start, index.stop, index.step except AttributeError: index = operator.index(index) else: if len(self.lists) == 1: self.lists[0][index] = item else: tmp = list(self) tmp[index] = item self.lists[:] = [tmp] self._balance_list(0) return list_idx, rel_idx = self._translate_index(index) if list_idx is None: raise IndexError() self.lists[list_idx][rel_idx] = item def __getslice__(self, start, stop): iter_slice = self.iter_slice(start, stop, 1) return self.from_iterable(iter_slice) def __setslice__(self, start, stop, sequence): if len(self.lists) == 1: self.lists[0][start:stop] = sequence else: tmp = list(self) tmp[start:stop] = sequence self.lists[:] = [tmp] self._balance_list(0) return def __repr__(self): return f'{self.__class__.__name__}({list(self)!r})' def sort(self): # poor pythonist's mergesort, it's faster than sorted(self) # when the lists' average length is greater than 512. if len(self.lists) == 1: self.lists[0].sort() else: for li in self.lists: li.sort() tmp_sorted = sorted(chain.from_iterable(self.lists)) del self.lists[:] self.lists[0] = tmp_sorted self._balance_list(0) def reverse(self): for cur in self.lists: cur.reverse() self.lists.reverse() def count(self, item): return sum([cur.count(item) for cur in self.lists]) def index(self, item): len_accum = 0 for cur in self.lists: try: rel_idx = cur.index(item) return len_accum + rel_idx except ValueError: len_accum += len(cur) raise ValueError(f'{item!r} is not in list') BList = BarrelList class SplayList(list): """Like a `splay tree`_, the SplayList facilitates moving higher utility items closer to the front of the list for faster access. .. _splay tree: https://en.wikipedia.org/wiki/Splay_tree """ def shift(self, item_index, dest_index=0): if item_index == dest_index: return item = self.pop(item_index) self.insert(dest_index, item) def swap(self, item_index, dest_index): self[dest_index], self[item_index] = self[item_index], self[dest_index] boltons-25.0.0/boltons/mathutils.py000066400000000000000000000174051475005545200173400ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """This module provides useful math functions on top of Python's built-in :mod:`math` module. """ from math import ceil as _ceil, floor as _floor import bisect import binascii def clamp(x, lower=float('-inf'), upper=float('inf')): """Limit a value to a given range. Args: x (int or float): Number to be clamped. lower (int or float): Minimum value for x. upper (int or float): Maximum value for x. The returned value is guaranteed to be between *lower* and *upper*. Integers, floats, and other comparable types can be mixed. >>> clamp(1.0, 0, 5) 1.0 >>> clamp(-1.0, 0, 5) 0 >>> clamp(101.0, 0, 5) 5 >>> clamp(123, upper=5) 5 Similar to `numpy's clip`_ function. .. _numpy's clip: http://docs.scipy.org/doc/numpy/reference/generated/numpy.clip.html """ if upper < lower: raise ValueError('expected upper bound (%r) >= lower bound (%r)' % (upper, lower)) return min(max(x, lower), upper) def ceil(x, options=None): """Return the ceiling of *x*. If *options* is set, return the smallest integer or float from *options* that is greater than or equal to *x*. Args: x (int or float): Number to be tested. options (iterable): Optional iterable of arbitrary numbers (ints or floats). >>> VALID_CABLE_CSA = [1.5, 2.5, 4, 6, 10, 25, 35, 50] >>> ceil(3.5, options=VALID_CABLE_CSA) 4 >>> ceil(4, options=VALID_CABLE_CSA) 4 """ if options is None: return _ceil(x) options = sorted(options) i = bisect.bisect_left(options, x) if i == len(options): raise ValueError("no ceil options greater than or equal to: %r" % x) return options[i] def floor(x, options=None): """Return the floor of *x*. If *options* is set, return the largest integer or float from *options* that is less than or equal to *x*. Args: x (int or float): Number to be tested. options (iterable): Optional iterable of arbitrary numbers (ints or floats). >>> VALID_CABLE_CSA = [1.5, 2.5, 4, 6, 10, 25, 35, 50] >>> floor(3.5, options=VALID_CABLE_CSA) 2.5 >>> floor(2.5, options=VALID_CABLE_CSA) 2.5 """ if options is None: return _floor(x) options = sorted(options) i = bisect.bisect_right(options, x) if not i: raise ValueError("no floor options less than or equal to: %r" % x) return options[i - 1] class Bits: ''' An immutable bit-string or bit-array object. Provides list-like access to bits as bools, as well as bitwise masking and shifting operators. Bits also make it easy to convert between many different useful representations: * bytes -- good for serializing raw binary data * int -- good for incrementing (e.g. to try all possible values) * list of bools -- good for iterating over or treating as flags * hex/bin string -- good for human readability ''' __slots__ = ('val', 'len') def __init__(self, val=0, len_=None): if type(val) is not int: if type(val) is list: val = ''.join(['1' if e else '0' for e in val]) if type(val) is bytes: val = val.decode('ascii') if type(val) is str: if len_ is None: len_ = len(val) if val.startswith('0x'): len_ = (len_ - 2) * 4 if val.startswith('0x'): val = int(val, 16) else: if val: val = int(val, 2) else: val = 0 if type(val) is not int: raise TypeError(f'initialized with bad type: {type(val).__name__}') if val < 0: raise ValueError('Bits cannot represent negative values') if len_ is None: len_ = len(f'{val:b}') if val > 2 ** len_: raise ValueError(f'value {val} cannot be represented with {len_} bits') self.val = val # data is stored internally as integer self.len = len_ def __getitem__(self, k): if type(k) is slice: return Bits(self.as_bin()[k]) if type(k) is int: if k >= self.len: raise IndexError(k) return bool((1 << (self.len - k - 1)) & self.val) raise TypeError(type(k)) def __len__(self): return self.len def __eq__(self, other): if type(self) is not type(other): return NotImplemented return self.val == other.val and self.len == other.len def __or__(self, other): if type(self) is not type(other): return NotImplemented return Bits(self.val | other.val, max(self.len, other.len)) def __and__(self, other): if type(self) is not type(other): return NotImplemented return Bits(self.val & other.val, max(self.len, other.len)) def __lshift__(self, other): return Bits(self.val << other, self.len + other) def __rshift__(self, other): return Bits(self.val >> other, self.len - other) def __hash__(self): return hash(self.val) def as_list(self): return [c == '1' for c in self.as_bin()] def as_bin(self): return f'{{0:0{self.len}b}}'.format(self.val) def as_hex(self): # make template to pad out to number of bytes necessary to represent bits tmpl = f'%0{2 * (self.len // 8 + ((self.len % 8) != 0))}X' ret = tmpl % self.val return ret def as_int(self): return self.val def as_bytes(self): return binascii.unhexlify(self.as_hex()) @classmethod def from_list(cls, list_): return cls(list_) @classmethod def from_bin(cls, bin): return cls(bin) @classmethod def from_hex(cls, hex): if isinstance(hex, bytes): hex = hex.decode('ascii') if not hex.startswith('0x'): hex = '0x' + hex return cls(hex) @classmethod def from_int(cls, int_, len_=None): return cls(int_, len_) @classmethod def from_bytes(cls, bytes_): return cls.from_hex(binascii.hexlify(bytes_)) def __repr__(self): cn = self.__class__.__name__ return f"{cn}('{self.as_bin()}')" boltons-25.0.0/boltons/mboxutils.py000066400000000000000000000134621475005545200173530ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Useful utilities for working with the `mbox`_-formatted mailboxes. Credit to Mark Williams for these. .. _mbox: https://en.wikipedia.org/wiki/Mbox """ import mailbox import tempfile DEFAULT_MAXMEM = 4 * 1024 * 1024 # 4MB class mbox_readonlydir(mailbox.mbox): """A subclass of :class:`mailbox.mbox` suitable for use with mboxs insides a read-only mail directory, e.g., ``/var/mail``. Otherwise the API is exactly the same as the built-in mbox. Deletes messages via truncation, in the manner of `Heirloom mailx`_. Args: path (str): Path to the mbox file. factory (type): Message type (defaults to :class:`rfc822.Message`) create (bool): Create mailbox if it does not exist. (defaults to ``True``) maxmem (int): Specifies, in bytes, the largest sized mailbox to attempt to copy into memory. Larger mailboxes will be copied incrementally which is more hazardous. (defaults to 4MB) .. note:: Because this truncates and rewrites parts of the mbox file, this class can corrupt your mailbox. Only use this if you know the built-in :class:`mailbox.mbox` does not work for your use case. .. _Heirloom mailx: http://heirloom.sourceforge.net/mailx.html """ def __init__(self, path, factory=None, create=True, maxmem=1024 * 1024): mailbox.mbox.__init__(self, path, factory, create) self.maxmem = maxmem def flush(self): """Write any pending changes to disk. This is called on mailbox close and is usually not called explicitly. .. note:: This deletes messages via truncation. Interruptions may corrupt your mailbox. """ # Appending and basic assertions are the same as in mailbox.mbox.flush. if not self._pending: if self._pending_sync: # Messages have only been added, so syncing the file # is enough. mailbox._sync_flush(self._file) self._pending_sync = False return # In order to be writing anything out at all, self._toc must # already have been generated (and presumably has been modified # by adding or deleting an item). assert self._toc is not None # Check length of self._file; if it's changed, some other process # has modified the mailbox since we scanned it. self._file.seek(0, 2) cur_len = self._file.tell() if cur_len != self._file_length: raise mailbox.ExternalClashError('Size of mailbox file changed ' '(expected %i, found %i)' % (self._file_length, cur_len)) self._file.seek(0) # Truncation logic begins here. Mostly the same except we # can use tempfile because we're not doing rename(2). with tempfile.TemporaryFile() as new_file: new_toc = {} self._pre_mailbox_hook(new_file) for key in sorted(self._toc.keys()): start, stop = self._toc[key] self._file.seek(start) self._pre_message_hook(new_file) new_start = new_file.tell() while True: buffer = self._file.read(min(4096, stop - self._file.tell())) if buffer == '': break new_file.write(buffer) new_toc[key] = (new_start, new_file.tell()) self._post_message_hook(new_file) self._file_length = new_file.tell() self._file.seek(0) new_file.seek(0) # Copy back our messages if self._file_length <= self.maxmem: self._file.write(new_file.read()) else: while True: buffer = new_file.read(4096) if not buffer: break self._file.write(buffer) # Delete the rest. self._file.truncate() # Same wrap up. self._toc = new_toc self._pending = False self._pending_sync = False if self._locked: mailbox._lock_file(self._file, dotlock=False) boltons-25.0.0/boltons/namedutils.py000066400000000000000000000351551475005545200174750ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """\ The ``namedutils`` module defines two lightweight container types: :class:`namedtuple` and :class:`namedlist`. Both are subtypes of built-in sequence types, which are very fast and efficient. They simply add named attribute accessors for specific indexes within themselves. The :class:`namedtuple` is identical to the built-in :class:`collections.namedtuple`, with a couple of enhancements, including a ``__repr__`` more suitable to inheritance. The :class:`namedlist` is the mutable counterpart to the :class:`namedtuple`, and is much faster and lighter-weight than full-blown :class:`object`. Consider this if you're implementing nodes in a tree, graph, or other mutable data structure. If you want an even skinnier approach, you'll probably have to look to C. """ import sys as _sys from collections import OrderedDict from keyword import iskeyword as _iskeyword from operator import itemgetter as _itemgetter __all__ = ['namedlist', 'namedtuple'] # Tiny templates _repr_tmpl = '{name}=%r' _imm_field_tmpl = '''\ {name} = _property(_itemgetter({index:d}), doc='Alias for field {index:d}') ''' _m_field_tmpl = '''\ {name} = _property(_itemgetter({index:d}), _itemsetter({index:d}), doc='Alias for field {index:d}') ''' ################################################################# ### namedtuple ################################################################# _namedtuple_tmpl = '''\ class {typename}(tuple): '{typename}({arg_list})' __slots__ = () _fields = {field_names!r} def __new__(_cls, {arg_list}): # TODO: tweak sig to make more extensible 'Create new instance of {typename}({arg_list})' return _tuple.__new__(_cls, ({arg_list})) @classmethod def _make(cls, iterable, new=_tuple.__new__, len=len): 'Make a new {typename} object from a sequence or iterable' result = new(cls, iterable) if len(result) != {num_fields:d}: raise TypeError('Expected {num_fields:d}' ' arguments, got %d' % len(result)) return result def __repr__(self): 'Return a nicely formatted representation string' tmpl = self.__class__.__name__ + '({repr_fmt})' return tmpl % self def _asdict(self): 'Return a new OrderedDict which maps field names to their values' return OrderedDict(zip(self._fields, self)) def _replace(_self, **kwds): 'Return a new {typename} object replacing field(s) with new values' result = _self._make(map(kwds.pop, {field_names!r}, _self)) if kwds: raise ValueError('Got unexpected field names: %r' % kwds.keys()) return result def __getnewargs__(self): 'Return self as a plain tuple. Used by copy and pickle.' return tuple(self) __dict__ = _property(_asdict) def __getstate__(self): 'Exclude the OrderedDict from pickling' # wat pass {field_defs} ''' def namedtuple(typename, field_names, verbose=False, rename=False): """Returns a new subclass of tuple with named fields. >>> Point = namedtuple('Point', ['x', 'y']) >>> Point.__doc__ # docstring for the new class 'Point(x, y)' >>> p = Point(11, y=22) # instantiate with pos args or keywords >>> p[0] + p[1] # indexable like a plain tuple 33 >>> x, y = p # unpack like a regular tuple >>> x, y (11, 22) >>> p.x + p.y # fields also accessible by name 33 >>> d = p._asdict() # convert to a dictionary >>> d['x'] 11 >>> Point(**d) # convert from a dictionary Point(x=11, y=22) >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields Point(x=100, y=22) """ # Validate the field names. At the user's option, either generate an error # message or automatically replace the field name with a valid name. if isinstance(field_names, str): field_names = field_names.replace(',', ' ').split() field_names = [str(x) for x in field_names] if rename: seen = set() for index, name in enumerate(field_names): if (not all(c.isalnum() or c == '_' for c in name) or _iskeyword(name) or not name or name[0].isdigit() or name.startswith('_') or name in seen): field_names[index] = '_%d' % index seen.add(name) for name in [typename] + field_names: if not all(c.isalnum() or c == '_' for c in name): raise ValueError('Type names and field names can only contain ' 'alphanumeric characters and underscores: %r' % name) if _iskeyword(name): raise ValueError('Type names and field names cannot be a ' 'keyword: %r' % name) if name[0].isdigit(): raise ValueError('Type names and field names cannot start with ' 'a number: %r' % name) seen = set() for name in field_names: if name.startswith('_') and not rename: raise ValueError('Field names cannot start with an underscore: ' '%r' % name) if name in seen: raise ValueError('Encountered duplicate field name: %r' % name) seen.add(name) # Fill-in the class template fmt_kw = {'typename': typename} fmt_kw['field_names'] = tuple(field_names) fmt_kw['num_fields'] = len(field_names) fmt_kw['arg_list'] = repr(tuple(field_names)).replace("'", "")[1:-1] fmt_kw['repr_fmt'] = ', '.join(_repr_tmpl.format(name=name) for name in field_names) fmt_kw['field_defs'] = '\n'.join(_imm_field_tmpl.format(index=index, name=name) for index, name in enumerate(field_names)) class_definition = _namedtuple_tmpl.format(**fmt_kw) if verbose: print(class_definition) # Execute the template string in a temporary namespace and support # tracing utilities by setting a value for frame.f_globals['__name__'] namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename, OrderedDict=OrderedDict, _property=property, _tuple=tuple) try: exec(class_definition, namespace) except SyntaxError as e: raise SyntaxError(e.msg + ':\n' + class_definition) result = namespace[typename] # For pickling to work, the __module__ variable needs to be set to the frame # where the named tuple is created. Bypass this step in environments where # sys._getframe is not defined (Jython for example) or sys._getframe is not # defined for arguments greater than 0 (IronPython). try: frame = _sys._getframe(1) result.__module__ = frame.f_globals.get('__name__', '__main__') except (AttributeError, ValueError): pass return result ################################################################# ### namedlist ################################################################# _namedlist_tmpl = '''\ class {typename}(list): '{typename}({arg_list})' __slots__ = () _fields = {field_names!r} def __new__(_cls, {arg_list}): # TODO: tweak sig to make more extensible 'Create new instance of {typename}({arg_list})' return _list.__new__(_cls, ({arg_list})) def __init__(self, {arg_list}): # tuple didn't need this but list does return _list.__init__(self, ({arg_list})) @classmethod def _make(cls, iterable, new=_list, len=len): 'Make a new {typename} object from a sequence or iterable' # why did this function exist? why not just star the # iterable like below? result = cls(*iterable) if len(result) != {num_fields:d}: raise TypeError('Expected {num_fields:d} arguments,' ' got %d' % len(result)) return result def __repr__(self): 'Return a nicely formatted representation string' tmpl = self.__class__.__name__ + '({repr_fmt})' return tmpl % tuple(self) def _asdict(self): 'Return a new OrderedDict which maps field names to their values' return OrderedDict(zip(self._fields, self)) def _replace(_self, **kwds): 'Return a new {typename} object replacing field(s) with new values' result = _self._make(map(kwds.pop, {field_names!r}, _self)) if kwds: raise ValueError('Got unexpected field names: %r' % kwds.keys()) return result def __getnewargs__(self): 'Return self as a plain list. Used by copy and pickle.' return tuple(self) __dict__ = _property(_asdict) def __getstate__(self): 'Exclude the OrderedDict from pickling' # wat pass {field_defs} ''' def namedlist(typename, field_names, verbose=False, rename=False): """Returns a new subclass of list with named fields. >>> Point = namedlist('Point', ['x', 'y']) >>> Point.__doc__ # docstring for the new class 'Point(x, y)' >>> p = Point(11, y=22) # instantiate with pos args or keywords >>> p[0] + p[1] # indexable like a plain list 33 >>> x, y = p # unpack like a regular list >>> x, y (11, 22) >>> p.x + p.y # fields also accessible by name 33 >>> d = p._asdict() # convert to a dictionary >>> d['x'] 11 >>> Point(**d) # convert from a dictionary Point(x=11, y=22) >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields Point(x=100, y=22) """ # Validate the field names. At the user's option, either generate an error # message or automatically replace the field name with a valid name. if isinstance(field_names, str): field_names = field_names.replace(',', ' ').split() field_names = [str(x) for x in field_names] if rename: seen = set() for index, name in enumerate(field_names): if (not all(c.isalnum() or c == '_' for c in name) or _iskeyword(name) or not name or name[0].isdigit() or name.startswith('_') or name in seen): field_names[index] = '_%d' % index seen.add(name) for name in [typename] + field_names: if not all(c.isalnum() or c == '_' for c in name): raise ValueError('Type names and field names can only contain ' 'alphanumeric characters and underscores: %r' % name) if _iskeyword(name): raise ValueError('Type names and field names cannot be a ' 'keyword: %r' % name) if name[0].isdigit(): raise ValueError('Type names and field names cannot start with ' 'a number: %r' % name) seen = set() for name in field_names: if name.startswith('_') and not rename: raise ValueError('Field names cannot start with an underscore: ' '%r' % name) if name in seen: raise ValueError('Encountered duplicate field name: %r' % name) seen.add(name) # Fill-in the class template fmt_kw = {'typename': typename} fmt_kw['field_names'] = tuple(field_names) fmt_kw['num_fields'] = len(field_names) fmt_kw['arg_list'] = repr(tuple(field_names)).replace("'", "")[1:-1] fmt_kw['repr_fmt'] = ', '.join(_repr_tmpl.format(name=name) for name in field_names) fmt_kw['field_defs'] = '\n'.join(_m_field_tmpl.format(index=index, name=name) for index, name in enumerate(field_names)) class_definition = _namedlist_tmpl.format(**fmt_kw) if verbose: print(class_definition) def _itemsetter(key): def _itemsetter(obj, value): obj[key] = value return _itemsetter # Execute the template string in a temporary namespace and support # tracing utilities by setting a value for frame.f_globals['__name__'] namespace = dict(_itemgetter=_itemgetter, _itemsetter=_itemsetter, __name__='namedlist_%s' % typename, OrderedDict=OrderedDict, _property=property, _list=list) try: exec(class_definition, namespace) except SyntaxError as e: raise SyntaxError(e.msg + ':\n' + class_definition) result = namespace[typename] # For pickling to work, the __module__ variable needs to be set to # the frame where the named list is created. Bypass this step in # environments where sys._getframe is not defined (Jython for # example) or sys._getframe is not defined for arguments greater # than 0 (IronPython). try: frame = _sys._getframe(1) result.__module__ = frame.f_globals.get('__name__', '__main__') except (AttributeError, ValueError): pass return result boltons-25.0.0/boltons/pathutils.py000066400000000000000000000145411475005545200173410ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ Functions for working with filesystem paths. The :func:`expandpath` function expands the tilde to $HOME and environment variables to their values. The :func:`augpath` function creates variants of an existing path without having to spend multiple lines of code splitting it up and stitching it back together. The :func:`shrinkuser` function replaces your home directory with a tilde. """ from os.path import (expanduser, expandvars, join, normpath, split, splitext) import os __all__ = [ 'augpath', 'shrinkuser', 'expandpath', ] def augpath(path, suffix='', prefix='', ext=None, base=None, dpath=None, multidot=False): """ Augment a path by modifying its components. Creates a new path with a different extension, basename, directory, prefix, and/or suffix. A prefix is inserted before the basename. A suffix is inserted between the basename and the extension. The basename and extension can be replaced with a new one. Essentially a path is broken down into components (dpath, base, ext), and then recombined as (dpath, prefix, base, suffix, ext) after replacing any specified component. Args: path (str | PathLike): a path to augment suffix (str, default=''): placed between the basename and extension prefix (str, default=''): placed in front of the basename ext (str, default=None): if specified, replaces the extension base (str, default=None): if specified, replaces the basename without extension dpath (str | PathLike, default=None): if specified, replaces the directory multidot (bool, default=False): Allows extensions to contain multiple dots. Specifically, if False, everything after the last dot in the basename is the extension. If True, everything after the first dot in the basename is the extension. Returns: str: augmented path Example: >>> path = 'foo.bar' >>> suffix = '_suff' >>> prefix = 'pref_' >>> ext = '.baz' >>> newpath = augpath(path, suffix, prefix, ext=ext, base='bar') >>> print('newpath = %s' % (newpath,)) newpath = pref_bar_suff.baz Example: >>> augpath('foo.bar') 'foo.bar' >>> augpath('foo.bar', ext='.BAZ') 'foo.BAZ' >>> augpath('foo.bar', suffix='_') 'foo_.bar' >>> augpath('foo.bar', prefix='_') '_foo.bar' >>> augpath('foo.bar', base='baz') 'baz.bar' >>> augpath('foo.tar.gz', ext='.zip', multidot=True) 'foo.zip' >>> augpath('foo.tar.gz', ext='.zip', multidot=False) 'foo.tar.zip' >>> augpath('foo.tar.gz', suffix='_new', multidot=True) 'foo_new.tar.gz' """ # Breakup path orig_dpath, fname = split(path) if multidot: # The first dot defines the extension parts = fname.split('.', 1) orig_base = parts[0] orig_ext = '' if len(parts) == 1 else '.' + parts[1] else: # The last dot defines the extension orig_base, orig_ext = splitext(fname) # Replace parts with specified augmentations if dpath is None: dpath = orig_dpath if ext is None: ext = orig_ext if base is None: base = orig_base # Recombine into new path new_fname = ''.join((prefix, base, suffix, ext)) newpath = join(dpath, new_fname) return newpath def shrinkuser(path, home='~'): """ Inverse of :func:`os.path.expanduser`. Args: path (str | PathLike): path in system file structure home (str, default='~'): symbol used to replace the home path. Defaults to '~', but you might want to use '$HOME' or '%USERPROFILE%' instead. Returns: str: path: shortened path replacing the home directory with a tilde Example: >>> path = expanduser('~') >>> assert path != '~' >>> assert shrinkuser(path) == '~' >>> assert shrinkuser(path + '1') == path + '1' >>> assert shrinkuser(path + '/1') == join('~', '1') >>> assert shrinkuser(path + '/1', '$HOME') == join('$HOME', '1') """ path = normpath(path) userhome_dpath = expanduser('~') if path.startswith(userhome_dpath): if len(path) == len(userhome_dpath): path = home elif path[len(userhome_dpath)] == os.path.sep: path = home + path[len(userhome_dpath):] return path def expandpath(path): """ Shell-like expansion of environment variables and tilde home directory. Args: path (str | PathLike): the path to expand Returns: str : expanded path Example: >>> import os >>> os.environ['SPAM'] = 'eggs' >>> assert expandpath('~/$SPAM') == expanduser('~/eggs') >>> assert expandpath('foo') == 'foo' """ path = expanduser(path) path = expandvars(path) return path boltons-25.0.0/boltons/queueutils.py000066400000000000000000000167421475005545200175360ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Python comes with a many great data structures, from :class:`dict` to :class:`collections.deque`, and no shortage of serviceable algorithm implementations, from :func:`sorted` to :mod:`bisect`. But priority queues are curiously relegated to an example documented in :mod:`heapq`. Even there, the approach presented is not full-featured and object-oriented. There is a built-in priority queue, :class:`Queue.PriorityQueue`, but in addition to its austere API, it carries the double-edged sword of threadsafety, making it fine for multi-threaded, multi-consumer applications, but high-overhead for cooperative/single-threaded use cases. The ``queueutils`` module currently provides two Queue implementations: :class:`HeapPriorityQueue`, based on a heap, and :class:`SortedPriorityQueue`, based on a sorted list. Both use a unified API based on :class:`BasePriorityQueue` to facilitate testing the slightly different performance characteristics on various application use cases. >>> pq = PriorityQueue() >>> pq.add('low priority task', 0) >>> pq.add('high priority task', 2) >>> pq.add('medium priority task 1', 1) >>> pq.add('medium priority task 2', 1) >>> len(pq) 4 >>> pq.pop() 'high priority task' >>> pq.peek() 'medium priority task 1' >>> len(pq) 3 """ from heapq import heappush, heappop from bisect import insort import itertools try: from .typeutils import make_sentinel _REMOVED = make_sentinel(var_name='_REMOVED') except ImportError: _REMOVED = object() try: from .listutils import BList # see BarrelList docstring for notes except ImportError: BList = list __all__ = ['PriorityQueue', 'BasePriorityQueue', 'HeapPriorityQueue', 'SortedPriorityQueue'] # TODO: make Base a real abstract class # TODO: add uniqueification class BasePriorityQueue: """The abstract base class for the other PriorityQueues in this module. Override the ``_backend_type`` class attribute, as well as the :meth:`_push_entry` and :meth:`_pop_entry` staticmethods for custom subclass behavior. (Don't forget to use :func:`staticmethod`). Args: priority_key (callable): A function that takes *priority* as passed in by :meth:`add` and returns a real number representing the effective priority. """ # negating priority means larger numbers = higher priority _default_priority_key = staticmethod(lambda p: -float(p or 0)) _backend_type = list def __init__(self, **kw): self._pq = self._backend_type() self._entry_map = {} self._counter = itertools.count() self._get_priority = kw.pop('priority_key', self._default_priority_key) if kw: raise TypeError('unexpected keyword arguments: %r' % kw.keys()) @staticmethod def _push_entry(backend, entry): pass # abstract @staticmethod def _pop_entry(backend): pass # abstract def add(self, task, priority=None): """ Add a task to the queue, or change the *task*'s priority if *task* is already in the queue. *task* can be any hashable object, and *priority* defaults to ``0``. Higher values representing higher priority, but this behavior can be controlled by setting *priority_key* in the constructor. """ priority = self._get_priority(priority) if task in self._entry_map: self.remove(task) count = next(self._counter) entry = [priority, count, task] self._entry_map[task] = entry self._push_entry(self._pq, entry) def remove(self, task): """Remove a task from the priority queue. Raises :exc:`KeyError` if the *task* is absent. """ entry = self._entry_map.pop(task) entry[-1] = _REMOVED def _cull(self, raise_exc=True): "Remove entries marked as removed by previous :meth:`remove` calls." while self._pq: priority, count, task = self._pq[0] if task is _REMOVED: self._pop_entry(self._pq) continue return if raise_exc: raise IndexError('empty priority queue') def peek(self, default=_REMOVED): """Read the next value in the queue without removing it. Returns *default* on an empty queue, or raises :exc:`KeyError` if *default* is not set. """ try: self._cull() _, _, task = self._pq[0] except IndexError: if default is not _REMOVED: return default raise IndexError('peek on empty queue') return task def pop(self, default=_REMOVED): """Remove and return the next value in the queue. Returns *default* on an empty queue, or raises :exc:`KeyError` if *default* is not set. """ try: self._cull() _, _, task = self._pop_entry(self._pq) del self._entry_map[task] except IndexError: if default is not _REMOVED: return default raise IndexError('pop on empty queue') return task def __len__(self): "Return the number of tasks in the queue." return len(self._entry_map) class HeapPriorityQueue(BasePriorityQueue): """A priority queue inherited from :class:`BasePriorityQueue`, backed by a list and based on the :func:`heapq.heappop` and :func:`heapq.heappush` functions in the built-in :mod:`heapq` module. """ @staticmethod def _pop_entry(backend): return heappop(backend) @staticmethod def _push_entry(backend, entry): heappush(backend, entry) class SortedPriorityQueue(BasePriorityQueue): """A priority queue inherited from :class:`BasePriorityQueue`, based on the :func:`bisect.insort` approach for in-order insertion into a sorted list. """ _backend_type = BList @staticmethod def _pop_entry(backend): return backend.pop(0) @staticmethod def _push_entry(backend, entry): insort(backend, entry) PriorityQueue = SortedPriorityQueue boltons-25.0.0/boltons/setutils.py000066400000000000000000001013121475005545200171710ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """\ The :class:`set` type brings the practical expressiveness of set theory to Python. It has a very rich API overall, but lacks a couple of fundamental features. For one, sets are not ordered. On top of this, sets are not indexable, i.e, ``my_set[8]`` will raise an :exc:`TypeError`. The :class:`IndexedSet` type remedies both of these issues without compromising on the excellent complexity characteristics of Python's built-in set implementation. """ from bisect import bisect_left from collections.abc import MutableSet from itertools import chain, islice import operator try: from .typeutils import make_sentinel _MISSING = make_sentinel(var_name='_MISSING') except ImportError: _MISSING = object() __all__ = ['IndexedSet', 'complement'] _COMPACTION_FACTOR = 8 # TODO: inherit from set() # TODO: .discard_many(), .remove_many() # TODO: raise exception on non-set params? # TODO: technically reverse operators should probably reverse the # order of the 'other' inputs and put self last (to try and maintain # insertion order) class IndexedSet(MutableSet): """``IndexedSet`` is a :class:`collections.MutableSet` that maintains insertion order and uniqueness of inserted elements. It's a hybrid type, mostly like an OrderedSet, but also :class:`list`-like, in that it supports indexing and slicing. Args: other (iterable): An optional iterable used to initialize the set. >>> x = IndexedSet(list(range(4)) + list(range(8))) >>> x IndexedSet([0, 1, 2, 3, 4, 5, 6, 7]) >>> x - set(range(2)) IndexedSet([2, 3, 4, 5, 6, 7]) >>> x[-1] 7 >>> fcr = IndexedSet('freecreditreport.com') >>> ''.join(fcr[:fcr.index('.')]) 'frecditpo' Standard set operators and interoperation with :class:`set` are all supported: >>> fcr & set('cash4gold.com') IndexedSet(['c', 'd', 'o', '.', 'm']) As you can see, the ``IndexedSet`` is almost like a ``UniqueList``, retaining only one copy of a given value, in the order it was first added. For the curious, the reason why IndexedSet does not support setting items based on index (i.e, ``__setitem__()``), consider the following dilemma:: my_indexed_set = [A, B, C, D] my_indexed_set[2] = A At this point, a set requires only one *A*, but a :class:`list` would overwrite *C*. Overwriting *C* would change the length of the list, meaning that ``my_indexed_set[2]`` would not be *A*, as expected with a list, but rather *D*. So, no ``__setitem__()``. Otherwise, the API strives to be as complete a union of the :class:`list` and :class:`set` APIs as possible. """ def __init__(self, other=None): self.item_index_map = dict() self.item_list = [] self.dead_indices = [] self._compactions = 0 self._c_max_size = 0 if other: self.update(other) # internal functions @property def _dead_index_count(self): return len(self.item_list) - len(self.item_index_map) def _compact(self): if not self.dead_indices: return self._compactions += 1 dead_index_count = self._dead_index_count items, index_map = self.item_list, self.item_index_map self._c_max_size = max(self._c_max_size, len(items)) for i, item in enumerate(self): items[i] = item index_map[item] = i del items[-dead_index_count:] del self.dead_indices[:] def _cull(self): ded = self.dead_indices if not ded: return items, ii_map = self.item_list, self.item_index_map if not ii_map: del items[:] del ded[:] elif len(ded) > 384: self._compact() elif self._dead_index_count > (len(items) / _COMPACTION_FACTOR): self._compact() elif items[-1] is _MISSING: # get rid of dead right hand side num_dead = 1 while items[-(num_dead + 1)] is _MISSING: num_dead += 1 if ded and ded[-1][1] == len(items): del ded[-1] del items[-num_dead:] def _get_real_index(self, index): if index < 0: index += len(self) if not self.dead_indices: return index real_index = index for d_start, d_stop in self.dead_indices: if real_index < d_start: break real_index += d_stop - d_start return real_index def _get_apparent_index(self, index): if index < 0: index += len(self) if not self.dead_indices: return index apparent_index = index for d_start, d_stop in self.dead_indices: if index < d_start: break apparent_index -= d_stop - d_start return apparent_index def _add_dead(self, start, stop=None): # TODO: does not handle when the new interval subsumes # multiple existing intervals dints = self.dead_indices if stop is None: stop = start + 1 cand_int = [start, stop] if not dints: dints.append(cand_int) return int_idx = bisect_left(dints, cand_int) dint = dints[int_idx - 1] d_start, d_stop = dint if start <= d_start <= stop: dint[0] = start elif start <= d_stop <= stop: dint[1] = stop else: dints.insert(int_idx, cand_int) return # common operations (shared by set and list) def __len__(self): return len(self.item_index_map) def __contains__(self, item): return item in self.item_index_map def __iter__(self): return (item for item in self.item_list if item is not _MISSING) def __reversed__(self): item_list = self.item_list return (item for item in reversed(item_list) if item is not _MISSING) def __repr__(self): return f'{self.__class__.__name__}({list(self)!r})' def __eq__(self, other): if isinstance(other, IndexedSet): return len(self) == len(other) and list(self) == list(other) try: return set(self) == set(other) except TypeError: return False @classmethod def from_iterable(cls, it): "from_iterable(it) -> create a set from an iterable" return cls(it) # set operations def add(self, item): "add(item) -> add item to the set" if item not in self.item_index_map: self.item_index_map[item] = len(self.item_list) self.item_list.append(item) def remove(self, item): "remove(item) -> remove item from the set, raises if not present" try: didx = self.item_index_map.pop(item) except KeyError: raise KeyError(item) self.item_list[didx] = _MISSING self._add_dead(didx) self._cull() def discard(self, item): "discard(item) -> discard item from the set (does not raise)" try: self.remove(item) except KeyError: pass def clear(self): "clear() -> empty the set" del self.item_list[:] del self.dead_indices[:] self.item_index_map.clear() def isdisjoint(self, other): "isdisjoint(other) -> return True if no overlap with other" iim = self.item_index_map for k in other: if k in iim: return False return True def issubset(self, other): "issubset(other) -> return True if other contains this set" if len(other) < len(self): return False for k in self.item_index_map: if k not in other: return False return True def issuperset(self, other): "issuperset(other) -> return True if set contains other" if len(other) > len(self): return False iim = self.item_index_map for k in other: if k not in iim: return False return True def union(self, *others): "union(*others) -> return a new set containing this set and others" return self.from_iterable(chain(self, *others)) def iter_intersection(self, *others): "iter_intersection(*others) -> iterate over elements also in others" for k in self: for other in others: if k not in other: break else: yield k return def intersection(self, *others): "intersection(*others) -> get a set with overlap of this and others" if len(others) == 1: other = others[0] return self.from_iterable(k for k in self if k in other) return self.from_iterable(self.iter_intersection(*others)) def iter_difference(self, *others): "iter_difference(*others) -> iterate over elements not in others" for k in self: for other in others: if k in other: break else: yield k return def difference(self, *others): "difference(*others) -> get a new set with elements not in others" if len(others) == 1: other = others[0] return self.from_iterable(k for k in self if k not in other) return self.from_iterable(self.iter_difference(*others)) def symmetric_difference(self, *others): "symmetric_difference(*others) -> XOR set of this and others" ret = self.union(*others) return ret.difference(self.intersection(*others)) __or__ = __ror__ = union __and__ = __rand__ = intersection __sub__ = difference __xor__ = __rxor__ = symmetric_difference def __rsub__(self, other): vals = [x for x in other if x not in self] return type(other)(vals) # in-place set operations def update(self, *others): "update(*others) -> add values from one or more iterables" if not others: return # raise? elif len(others) == 1: other = others[0] else: other = chain(others) for o in other: self.add(o) def intersection_update(self, *others): "intersection_update(*others) -> discard self.difference(*others)" for val in self.difference(*others): self.discard(val) def difference_update(self, *others): "difference_update(*others) -> discard self.intersection(*others)" if self in others: self.clear() for val in self.intersection(*others): self.discard(val) def symmetric_difference_update(self, other): # note singular 'other' "symmetric_difference_update(other) -> in-place XOR with other" if self is other: self.clear() for val in other: if val in self: self.discard(val) else: self.add(val) def __ior__(self, *others): self.update(*others) return self def __iand__(self, *others): self.intersection_update(*others) return self def __isub__(self, *others): self.difference_update(*others) return self def __ixor__(self, *others): self.symmetric_difference_update(*others) return self def iter_slice(self, start, stop, step=None): "iterate over a slice of the set" iterable = self if start is not None: start = self._get_real_index(start) if stop is not None: stop = self._get_real_index(stop) if step is not None and step < 0: step = -step iterable = reversed(self) return islice(iterable, start, stop, step) # list operations def __getitem__(self, index): try: start, stop, step = index.start, index.stop, index.step except AttributeError: index = operator.index(index) else: iter_slice = self.iter_slice(start, stop, step) return self.from_iterable(iter_slice) if index < 0: index += len(self) real_index = self._get_real_index(index) try: ret = self.item_list[real_index] except IndexError: raise IndexError('IndexedSet index out of range') return ret def pop(self, index=None): "pop(index) -> remove the item at a given index (-1 by default)" item_index_map = self.item_index_map len_self = len(item_index_map) if index is None or index == -1 or index == len_self - 1: ret = self.item_list.pop() del item_index_map[ret] else: real_index = self._get_real_index(index) ret = self.item_list[real_index] self.item_list[real_index] = _MISSING del item_index_map[ret] self._add_dead(real_index) self._cull() return ret def count(self, val): "count(val) -> count number of instances of value (0 or 1)" if val in self.item_index_map: return 1 return 0 def reverse(self): "reverse() -> reverse the contents of the set in-place" reversed_list = list(reversed(self)) self.item_list[:] = reversed_list for i, item in enumerate(self.item_list): self.item_index_map[item] = i del self.dead_indices[:] def sort(self, **kwargs): "sort() -> sort the contents of the set in-place" sorted_list = sorted(self, **kwargs) if sorted_list == self.item_list: return self.item_list[:] = sorted_list for i, item in enumerate(self.item_list): self.item_index_map[item] = i del self.dead_indices[:] def index(self, val): "index(val) -> get the index of a value, raises if not present" try: return self._get_apparent_index(self.item_index_map[val]) except KeyError: cn = self.__class__.__name__ raise ValueError(f'{val!r} is not in {cn}') def complement(wrapped): """Given a :class:`set`, convert it to a **complement set**. Whereas a :class:`set` keeps track of what it contains, a `complement set `_ keeps track of what it does *not* contain. For example, look what happens when we intersect a normal set with a complement set:: >>> list(set(range(5)) & complement(set([2, 3]))) [0, 1, 4] We get the everything in the left that wasn't in the right, because intersecting with a complement is the same as subtracting a normal set. Args: wrapped (set): A set or any other iterable which should be turned into a complement set. All set methods and operators are supported by complement sets, between other :func:`complement`-wrapped sets and/or regular :class:`set` objects. Because a complement set only tracks what elements are *not* in the set, functionality based on set contents is unavailable: :func:`len`, :func:`iter` (and for loops), and ``.pop()``. But a complement set can always be turned back into a regular set by complementing it again: >>> s = set(range(5)) >>> complement(complement(s)) == s True .. note:: An empty complement set corresponds to the concept of a `universal set `_ from mathematics. Complement sets by example ^^^^^^^^^^^^^^^^^^^^^^^^^^ Many uses of sets can be expressed more simply by using a complement. Rather than trying to work out in your head the proper way to invert an expression, you can just throw a complement on the set. Consider this example of a name filter:: >>> class NamesFilter(object): ... def __init__(self, allowed): ... self._allowed = allowed ... ... def filter(self, names): ... return [name for name in names if name in self._allowed] >>> NamesFilter(set(['alice', 'bob'])).filter(['alice', 'bob', 'carol']) ['alice', 'bob'] What if we want to just express "let all the names through"? We could try to enumerate all of the expected names:: ``NamesFilter({'alice', 'bob', 'carol'})`` But this is very brittle -- what if at some point over this object is changed to filter ``['alice', 'bob', 'carol', 'dan']``? Even worse, what about the poor programmer who next works on this piece of code? They cannot tell whether the purpose of the large allowed set was "allow everything", or if 'dan' was excluded for some subtle reason. A complement set lets the programmer intention be expressed succinctly and directly:: NamesFilter(complement(set())) Not only is this code short and robust, it is easy to understand the intention. """ if type(wrapped) is _ComplementSet: return wrapped.complemented() if type(wrapped) is frozenset: return _ComplementSet(excluded=wrapped) return _ComplementSet(excluded=set(wrapped)) def _norm_args_typeerror(other): '''normalize args and raise type-error if there is a problem''' if type(other) in (set, frozenset): inc, exc = other, None elif type(other) is _ComplementSet: inc, exc = other._included, other._excluded else: raise TypeError('argument must be another set or complement(set)') return inc, exc def _norm_args_notimplemented(other): '''normalize args and return NotImplemented (for overloaded operators)''' if type(other) in (set, frozenset): inc, exc = other, None elif type(other) is _ComplementSet: inc, exc = other._included, other._excluded else: return NotImplemented, None return inc, exc class _ComplementSet: """ helper class for complement() that implements the set methods """ __slots__ = ('_included', '_excluded') def __init__(self, included=None, excluded=None): if included is None: assert type(excluded) in (set, frozenset) elif excluded is None: assert type(included) in (set, frozenset) else: raise ValueError('one of included or excluded must be a set') self._included, self._excluded = included, excluded def __repr__(self): if self._included is None: return f'complement({repr(self._excluded)})' return f'complement(complement({repr(self._included)}))' def complemented(self): '''return a complement of the current set''' if type(self._included) is frozenset or type(self._excluded) is frozenset: return _ComplementSet(included=self._excluded, excluded=self._included) return _ComplementSet( included=None if self._excluded is None else set(self._excluded), excluded=None if self._included is None else set(self._included)) __invert__ = complemented def complement(self): '''convert the current set to its complement in-place''' self._included, self._excluded = self._excluded, self._included def __contains__(self, item): if self._included is None: return not item in self._excluded return item in self._included def add(self, item): if self._included is None: if item in self._excluded: self._excluded.remove(item) else: self._included.add(item) def remove(self, item): if self._included is None: self._excluded.add(item) else: self._included.remove(item) def pop(self): if self._included is None: raise NotImplementedError # self.missing.add(random.choice(gc.objects())) return self._included.pop() def intersection(self, other): try: return self & other except NotImplementedError: raise TypeError('argument must be another set or complement(set)') def __and__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + return _ComplementSet(included=inc - self._excluded) else: # - - return _ComplementSet(excluded=self._excluded.union(other._excluded)) else: if inc is None: # + - return _ComplementSet(included=exc - self._included) else: # + + return _ComplementSet(included=self._included.intersection(inc)) __rand__ = __and__ def __iand__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + self._excluded = inc - self._excluded # TODO: do this in place? else: # - - self._excluded |= exc else: if inc is None: # + - self._included -= exc self._included, self._excluded = None, self._included else: # + + self._included &= inc return self def union(self, other): try: return self | other except NotImplementedError: raise TypeError('argument must be another set or complement(set)') def __or__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + return _ComplementSet(excluded=self._excluded - inc) else: # - - return _ComplementSet(excluded=self._excluded.intersection(exc)) else: if inc is None: # + - return _ComplementSet(excluded=exc - self._included) else: # + + return _ComplementSet(included=self._included.union(inc)) __ror__ = __or__ def __ior__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + self._excluded -= inc else: # - - self._excluded &= exc else: if inc is None: # + - self._included, self._excluded = None, exc - self._included # TODO: do this in place? else: # + + self._included |= inc return self def update(self, items): if type(items) in (set, frozenset): inc, exc = items, None elif type(items) is _ComplementSet: inc, exc = items._included, items._excluded else: inc, exc = frozenset(items), None if self._included is None: if exc is None: # - + self._excluded &= inc else: # - - self._excluded.discard(exc) else: if inc is None: # + - self._included &= exc self._included, self._excluded = None, self._excluded else: # + + self._included.update(inc) def discard(self, items): if type(items) in (set, frozenset): inc, exc = items, None elif type(items) is _ComplementSet: inc, exc = items._included, items._excluded else: inc, exc = frozenset(items), None if self._included is None: if exc is None: # - + self._excluded.update(inc) else: # - - self._included, self._excluded = exc - self._excluded, None else: if inc is None: # + - self._included &= exc else: # + + self._included.discard(inc) def symmetric_difference(self, other): try: return self ^ other except NotImplementedError: raise TypeError('argument must be another set or complement(set)') def __xor__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + return _ComplementSet(excluded=self._excluded - inc) else: # - - return _ComplementSet(included=self._excluded.symmetric_difference(exc)) else: if inc is None: # + - return _ComplementSet(excluded=exc - self._included) else: # + + return _ComplementSet(included=self._included.symmetric_difference(inc)) __rxor__ = __xor__ def symmetric_difference_update(self, other): inc, exc = _norm_args_typeerror(other) if self._included is None: if exc is None: # - + self._excluded |= inc else: # - - self._excluded.symmetric_difference_update(exc) self._included, self._excluded = self._excluded, None else: if inc is None: # + - self._included |= exc self._included, self._excluded = None, self._included else: # + + self._included.symmetric_difference_update(inc) def isdisjoint(self, other): inc, exc = _norm_args_typeerror(other) if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + return inc.issubset(self._excluded) else: # - - return False else: if inc is None: # + - return self._included.issubset(exc) else: # + + return self._included.isdisjoint(inc) def issubset(self, other): '''everything missing from other is also missing from self''' try: return self <= other except NotImplementedError: raise TypeError('argument must be another set or complement(set)') def __le__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + return False else: # - - return self._excluded.issupserset(exc) else: if inc is None: # + - return self._included.isdisjoint(exc) else: # + + return self._included.issubset(inc) def __lt__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + return False else: # - - return self._excluded > exc else: if inc is None: # + - return self._included.isdisjoint(exc) else: # + + return self._included < inc def issuperset(self, other): '''everything missing from self is also missing from super''' try: return self >= other except NotImplementedError: raise TypeError('argument must be another set or complement(set)') def __ge__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + return not self._excluded.intersection(inc) else: # - - return self._excluded.issubset(exc) else: if inc is None: # + - return False else: # + + return self._included.issupserset(inc) def __gt__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + return not self._excluded.intersection(inc) else: # - - return self._excluded < exc else: if inc is None: # + - return False else: # + + return self._included > inc def difference(self, other): try: return self - other except NotImplementedError: raise TypeError('argument must be another set or complement(set)') def __sub__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + return _ComplementSet(excluded=self._excluded | inc) else: # - - return _ComplementSet(included=exc - self._excluded) else: if inc is None: # + - return _ComplementSet(included=self._included & exc) else: # + + return _ComplementSet(included=self._included.difference(inc)) def __rsub__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented # rsub, so the expression being evaluated is "other - self" if self._included is None: if exc is None: # - + return _ComplementSet(included=inc & self._excluded) else: # - - return _ComplementSet(included=self._excluded - exc) else: if inc is None: # + - return _ComplementSet(excluded=exc | self._included) else: # + + return _ComplementSet(included=inc.difference(self._included)) def difference_update(self, other): try: self -= other except NotImplementedError: raise TypeError('argument must be another set or complement(set)') def __isub__(self, other): inc, exc = _norm_args_notimplemented(other) if inc is NotImplemented: return NotImplemented if self._included is None: if exc is None: # - + self._excluded |= inc else: # - - self._included, self._excluded = exc - self._excluded, None else: if inc is None: # + - self._included &= exc else: # + + self._included.difference_update(inc) return self def __eq__(self, other): return ( type(self) is type(other) and self._included == other._included and self._excluded == other._excluded) or ( type(other) in (set, frozenset) and self._included == other) def __hash__(self): return hash(self._included) ^ hash(self._excluded) def __len__(self): if self._included is not None: return len(self._included) raise NotImplementedError('complemented sets have undefined length') def __iter__(self): if self._included is not None: return iter(self._included) raise NotImplementedError('complemented sets have undefined contents') def __bool__(self): if self._included is not None: return bool(self._included) return True boltons-25.0.0/boltons/socketutils.py000066400000000000000000000723141475005545200176770ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """At its heart, Python can be viewed as an extension of the C programming language. Springing from the most popular systems programming language has made Python itself a great language for systems programming. One key to success in this domain is Python's very serviceable :mod:`socket` module and its :class:`socket.socket` type. The ``socketutils`` module provides natural next steps to the ``socket`` builtin: straightforward, tested building blocks for higher-level protocols. The :class:`BufferedSocket` wraps an ordinary socket, providing a layer of intuitive buffering for both sending and receiving. This facilitates parsing messages from streams, i.e., all sockets with type ``SOCK_STREAM``. The BufferedSocket enables receiving until the next relevant token, up to a certain size, or until the connection is closed. For all of these, it provides consistent APIs to size limiting, as well as timeouts that are compatible with multiple concurrency paradigms. Use it to parse the next one-off text or binary socket protocol you encounter. This module also provides the :class:`NetstringSocket`, a pure-Python implementation of `the Netstring protocol`_, built on top of the :class:`BufferedSocket`, serving as a ready-made, production-grade example. Special thanks to `Kurt Rose`_ for his original authorship and all his contributions on this module. Also thanks to `Daniel J. Bernstein`_, the original author of `Netstring`_. .. _the Netstring protocol: https://en.wikipedia.org/wiki/Netstring .. _Kurt Rose: https://github.com/doublereedkurt .. _Daniel J. Bernstein: https://cr.yp.to/ .. _Netstring: https://cr.yp.to/proto/netstrings.txt """ import time import socket try: from threading import RLock except Exception: class RLock: 'Dummy reentrant lock for builds without threads' def __enter__(self): pass def __exit__(self, exctype, excinst, exctb): pass try: from .typeutils import make_sentinel _UNSET = make_sentinel(var_name='_UNSET') except ImportError: _UNSET = object() DEFAULT_TIMEOUT = 10 # 10 seconds DEFAULT_MAXSIZE = 32 * 1024 # 32kb _RECV_LARGE_MAXSIZE = 1024 ** 5 # 1PB class BufferedSocket: """Mainly provides recv_until and recv_size. recv, send, sendall, and peek all function as similarly as possible to the built-in socket API. This type has been tested against both the built-in socket type as well as those from gevent and eventlet. It also features support for sockets with timeouts set to 0 (aka nonblocking), provided the caller is prepared to handle the EWOULDBLOCK exceptions. Args: sock (socket): The connected socket to be wrapped. timeout (float): The default timeout for sends and recvs, in seconds. Set to ``None`` for no timeout, and 0 for nonblocking. Defaults to *sock*'s own timeout if already set, and 10 seconds otherwise. maxsize (int): The default maximum number of bytes to be received into the buffer before it is considered full and raises an exception. Defaults to 32 kilobytes. recvsize (int): The number of bytes to recv for every lower-level :meth:`socket.recv` call. Defaults to *maxsize*. *timeout* and *maxsize* can both be overridden on individual socket operations. All ``recv`` methods return bytestrings (:class:`bytes`) and can raise :exc:`socket.error`. :exc:`Timeout`, :exc:`ConnectionClosed`, and :exc:`MessageTooLong` all inherit from :exc:`socket.error` and exist to provide better error messages. Received bytes are always buffered, even if an exception is raised. Use :meth:`BufferedSocket.getrecvbuffer` to retrieve partial recvs. BufferedSocket does not replace the built-in socket by any means. While the overlapping parts of the API are kept parallel to the built-in :class:`socket.socket`, BufferedSocket does not inherit from socket, and most socket functionality is only available on the underlying socket. :meth:`socket.getpeername`, :meth:`socket.getsockname`, :meth:`socket.fileno`, and others are only available on the underlying socket that is wrapped. Use the ``BufferedSocket.sock`` attribute to access it. See the examples for more information on how to use BufferedSockets with built-in sockets. The BufferedSocket is threadsafe, but consider the semantics of your protocol before accessing a single socket from multiple threads. Similarly, once the BufferedSocket is constructed, avoid using the underlying socket directly. Only use it for operations unrelated to messages, e.g., :meth:`socket.getpeername`. """ def __init__(self, sock, timeout=_UNSET, maxsize=DEFAULT_MAXSIZE, recvsize=_UNSET): self.sock = sock self.rbuf = b'' self.sbuf = [] self.maxsize = int(maxsize) if timeout is _UNSET: if self.sock.gettimeout() is None: self.timeout = DEFAULT_TIMEOUT else: self.timeout = self.sock.gettimeout() else: if timeout is None: self.timeout = timeout else: self.timeout = float(timeout) if recvsize is _UNSET: self._recvsize = self.maxsize else: self._recvsize = int(recvsize) self._send_lock = RLock() self._recv_lock = RLock() def settimeout(self, timeout): "Set the default *timeout* for future operations, in seconds." self.timeout = timeout def gettimeout(self): return self.timeout def setblocking(self, blocking): self.timeout = None if blocking else 0.0 def setmaxsize(self, maxsize): """Set the default maximum buffer size *maxsize* for future operations, in bytes. Does not truncate the current buffer. """ self.maxsize = maxsize def getrecvbuffer(self): "Returns the receive buffer bytestring (rbuf)." with self._recv_lock: return self.rbuf def getsendbuffer(self): "Returns a copy of the send buffer list." with self._send_lock: return b''.join(self.sbuf) def recv(self, size, flags=0, timeout=_UNSET): """Returns **up to** *size* bytes, using the internal buffer before performing a single :meth:`socket.recv` operation. Args: size (int): The maximum number of bytes to receive. flags (int): Kept for API compatibility with sockets. Only the default, ``0``, is valid. timeout (float): The timeout for this operation. Can be ``0`` for nonblocking and ``None`` for no timeout. Defaults to the value set in the constructor of BufferedSocket. If the operation does not complete in *timeout* seconds, a :exc:`Timeout` is raised. Much like the built-in :class:`socket.socket`, if this method returns an empty string, then the socket is closed and recv buffer is empty. Further calls to recv will raise :exc:`socket.error`. """ with self._recv_lock: if timeout is _UNSET: timeout = self.timeout if flags: raise ValueError("non-zero flags not supported: %r" % flags) if len(self.rbuf) >= size: data, self.rbuf = self.rbuf[:size], self.rbuf[size:] return data if self.rbuf: ret, self.rbuf = self.rbuf, b'' return ret self.sock.settimeout(timeout) try: data = self.sock.recv(self._recvsize) except socket.timeout: raise Timeout(timeout) # check the rbuf attr for more if len(data) > size: data, self.rbuf = data[:size], data[size:] return data def peek(self, size, timeout=_UNSET): """Returns *size* bytes from the socket and/or internal buffer. Bytes are retained in BufferedSocket's internal recv buffer. To only see bytes in the recv buffer, use :meth:`getrecvbuffer`. Args: size (int): The exact number of bytes to peek at timeout (float): The timeout for this operation. Can be 0 for nonblocking and None for no timeout. Defaults to the value set in the constructor of BufferedSocket. If the appropriate number of bytes cannot be fetched from the buffer and socket before *timeout* expires, then a :exc:`Timeout` will be raised. If the connection is closed, a :exc:`ConnectionClosed` will be raised. """ with self._recv_lock: if len(self.rbuf) >= size: return self.rbuf[:size] data = self.recv_size(size, timeout=timeout) self.rbuf = data + self.rbuf return data def recv_close(self, timeout=_UNSET, maxsize=_UNSET): """Receive until the connection is closed, up to *maxsize* bytes. If more than *maxsize* bytes are received, raises :exc:`MessageTooLong`. """ # recv_close works by using recv_size to request maxsize data, # and ignoring ConnectionClose, returning and clearing the # internal buffer instead. It raises an exception if # ConnectionClosed isn't raised. with self._recv_lock: if maxsize is _UNSET: maxsize = self.maxsize if maxsize is None: maxsize = _RECV_LARGE_MAXSIZE try: recvd = self.recv_size(maxsize + 1, timeout) except ConnectionClosed: ret, self.rbuf = self.rbuf, b'' else: # put extra received bytes (now in rbuf) after recvd self.rbuf = recvd + self.rbuf size_read = min(maxsize, len(self.rbuf)) raise MessageTooLong(size_read) # check receive buffer return ret def recv_until(self, delimiter, timeout=_UNSET, maxsize=_UNSET, with_delimiter=False): """Receive until *delimiter* is found, *maxsize* bytes have been read, or *timeout* is exceeded. Args: delimiter (bytes): One or more bytes to be searched for in the socket stream. timeout (float): The timeout for this operation. Can be 0 for nonblocking and None for no timeout. Defaults to the value set in the constructor of BufferedSocket. maxsize (int): The maximum size for the internal buffer. Defaults to the value set in the constructor. with_delimiter (bool): Whether or not to include the delimiter in the output. ``False`` by default, but ``True`` is useful in cases where one is simply forwarding the messages. ``recv_until`` will raise the following exceptions: * :exc:`Timeout` if more than *timeout* seconds expire. * :exc:`ConnectionClosed` if the underlying socket is closed by the sending end. * :exc:`MessageTooLong` if the delimiter is not found in the first *maxsize* bytes. * :exc:`socket.error` if operating in nonblocking mode (*timeout* equal to 0), or if some unexpected socket error occurs, such as operating on a closed socket. """ with self._recv_lock: if maxsize is _UNSET: maxsize = self.maxsize if maxsize is None: maxsize = _RECV_LARGE_MAXSIZE if timeout is _UNSET: timeout = self.timeout len_delimiter = len(delimiter) sock = self.sock recvd = bytearray(self.rbuf) start = time.time() find_offset_start = 0 # becomes a negative index below if not timeout: # covers None (no timeout) and 0 (nonblocking) sock.settimeout(timeout) try: while 1: offset = recvd.find(delimiter, find_offset_start, maxsize) if offset != -1: # str.find returns -1 when no match found if with_delimiter: # include delimiter in return offset += len_delimiter rbuf_offset = offset else: rbuf_offset = offset + len_delimiter break elif len(recvd) > maxsize: raise MessageTooLong(maxsize, delimiter) # see rbuf if timeout: cur_timeout = timeout - (time.time() - start) if cur_timeout <= 0.0: raise socket.timeout() sock.settimeout(cur_timeout) nxt = sock.recv(self._recvsize) if not nxt: args = (len(recvd), delimiter) msg = ('connection closed after reading %s bytes' ' without finding symbol: %r' % args) raise ConnectionClosed(msg) # check the recv buffer recvd.extend(nxt) find_offset_start = -len(nxt) - len_delimiter + 1 except socket.timeout: self.rbuf = bytes(recvd) msg = ('read %s bytes without finding delimiter: %r' % (len(recvd), delimiter)) raise Timeout(timeout, msg) # check the recv buffer except Exception: self.rbuf = bytes(recvd) raise val, self.rbuf = bytes(recvd[:offset]), bytes(recvd[rbuf_offset:]) return val def recv_size(self, size, timeout=_UNSET): """Read off of the internal buffer, then off the socket, until *size* bytes have been read. Args: size (int): number of bytes to read before returning. timeout (float): The timeout for this operation. Can be 0 for nonblocking and None for no timeout. Defaults to the value set in the constructor of BufferedSocket. If the appropriate number of bytes cannot be fetched from the buffer and socket before *timeout* expires, then a :exc:`Timeout` will be raised. If the connection is closed, a :exc:`ConnectionClosed` will be raised. """ with self._recv_lock: if timeout is _UNSET: timeout = self.timeout chunks = [] total_bytes = 0 try: start = time.time() self.sock.settimeout(timeout) nxt = self.rbuf or self.sock.recv(self._recvsize) while nxt: total_bytes += len(nxt) if total_bytes >= size: break chunks.append(nxt) if timeout: cur_timeout = timeout - (time.time() - start) if cur_timeout <= 0.0: raise socket.timeout() self.sock.settimeout(cur_timeout) nxt = self.sock.recv(self._recvsize) else: msg = ('connection closed after reading %s of %s requested' ' bytes' % (total_bytes, size)) raise ConnectionClosed(msg) # check recv buffer except socket.timeout: self.rbuf = b''.join(chunks) msg = f'read {total_bytes} of {size} bytes' raise Timeout(timeout, msg) # check recv buffer except Exception: # received data is still buffered in the case of errors self.rbuf = b''.join(chunks) raise extra_bytes = total_bytes - size if extra_bytes: last, self.rbuf = nxt[:-extra_bytes], nxt[-extra_bytes:] else: last, self.rbuf = nxt, b'' chunks.append(last) return b''.join(chunks) def send(self, data, flags=0, timeout=_UNSET): """Send the contents of the internal send buffer, as well as *data*, to the receiving end of the connection. Returns the total number of bytes sent. If no exception is raised, all of *data* was sent and the internal send buffer is empty. Args: data (bytes): The bytes to send. flags (int): Kept for API compatibility with sockets. Only the default 0 is valid. timeout (float): The timeout for this operation. Can be 0 for nonblocking and None for no timeout. Defaults to the value set in the constructor of BufferedSocket. Will raise :exc:`Timeout` if the send operation fails to complete before *timeout*. In the event of an exception, use :meth:`BufferedSocket.getsendbuffer` to see which data was unsent. """ with self._send_lock: if timeout is _UNSET: timeout = self.timeout if flags: raise ValueError("non-zero flags not supported") sbuf = self.sbuf sbuf.append(data) if len(sbuf) > 1: sbuf[:] = [b''.join([s for s in sbuf if s])] self.sock.settimeout(timeout) start, total_sent = time.time(), 0 try: while sbuf[0]: sent = self.sock.send(sbuf[0]) total_sent += sent sbuf[0] = sbuf[0][sent:] if timeout: cur_timeout = timeout - (time.time() - start) if cur_timeout <= 0.0: raise socket.timeout() self.sock.settimeout(cur_timeout) except socket.timeout: raise Timeout(timeout, '%s bytes unsent' % len(sbuf[0])) return total_sent def sendall(self, data, flags=0, timeout=_UNSET): """A passthrough to :meth:`~BufferedSocket.send`, retained for parallelism to the :class:`socket.socket` API. """ return self.send(data, flags, timeout) def flush(self): "Send the contents of the internal send buffer." with self._send_lock: self.send(b'') return def buffer(self, data): "Buffer *data* bytes for the next send operation." with self._send_lock: self.sbuf.append(data) return # # # # # # Passing through some socket basics # # # def getsockname(self): """Convenience function to return the wrapped socket's own address. See :meth:`socket.getsockname` for more details. """ return self.sock.getsockname() def getpeername(self): """Convenience function to return the remote address to which the wrapped socket is connected. See :meth:`socket.getpeername` for more details. """ return self.sock.getpeername() def getsockopt(self, level, optname, buflen=None): """Convenience function passing through to the wrapped socket's :meth:`socket.getsockopt`. """ args = (level, optname) if buflen is not None: args += (buflen,) return self.sock.getsockopt(*args) def setsockopt(self, level, optname, value): """Convenience function passing through to the wrapped socket's :meth:`socket.setsockopt`. """ return self.sock.setsockopt(level, optname, value) @property def type(self): """A passthrough to the wrapped socket's type. Valid usages should only ever see :data:`socket.SOCK_STREAM`. """ return self.sock.type @property def family(self): """A passthrough to the wrapped socket's family. BufferedSocket supports all widely-used families, so this read-only attribute can be one of :data:`socket.AF_INET` for IP, :data:`socket.AF_INET6` for IPv6, and :data:`socket.AF_UNIX` for UDS. """ return self.sock.family @property def proto(self): """A passthrough to the wrapped socket's protocol. The ``proto`` attribute is very rarely used, so it's always 0, meaning "the default" protocol. Pretty much all the practical information is in :attr:`~BufferedSocket.type` and :attr:`~BufferedSocket.family`, so you can go back to never thinking about this. """ return self.sock.proto # # # # # # Now for some more advanced interpretations of the builtin socket # # # def fileno(self): """Returns the file descriptor of the wrapped socket. -1 if it has been closed on this end. Note that this makes the BufferedSocket selectable, i.e., usable for operating system event loops without any external libraries. Keep in mind that the operating system cannot know about data in BufferedSocket's internal buffer. Exercise discipline with calling ``recv*`` functions. """ return self.sock.fileno() def close(self): """Closes the wrapped socket, and empties the internal buffers. The send buffer is not flushed automatically, so if you have been calling :meth:`~BufferedSocket.buffer`, be sure to call :meth:`~BufferedSocket.flush` before calling this method. After calling this method, future socket operations will raise :exc:`socket.error`. """ with self._recv_lock: with self._send_lock: self.rbuf = b'' self.rbuf_unconsumed = self.rbuf self.sbuf[:] = [] self.sock.close() return def shutdown(self, how): """Convenience method which passes through to the wrapped socket's :meth:`~socket.shutdown`. Semantics vary by platform, so no special internal handling is done with the buffers. This method exists to facilitate the most common usage, wherein a full ``shutdown`` is followed by a :meth:`~BufferedSocket.close`. Developers requiring more support, please open `an issue`_. .. _an issue: https://github.com/mahmoud/boltons/issues """ with self._recv_lock: with self._send_lock: self.sock.shutdown(how) return # end BufferedSocket class Error(socket.error): """A subclass of :exc:`socket.error` from which all other ``socketutils`` exceptions inherit. When using :class:`BufferedSocket` and other ``socketutils`` types, generally you want to catch one of the specific exception types below, or :exc:`socket.error`. """ pass class ConnectionClosed(Error): """Raised when receiving and the connection is unexpectedly closed from the sending end. Raised from :class:`BufferedSocket`'s :meth:`~BufferedSocket.peek`, :meth:`~BufferedSocket.recv_until`, and :meth:`~BufferedSocket.recv_size`, and never from its :meth:`~BufferedSocket.recv` or :meth:`~BufferedSocket.recv_close`. """ pass class MessageTooLong(Error): """Raised from :meth:`BufferedSocket.recv_until` and :meth:`BufferedSocket.recv_closed` when more than *maxsize* bytes are read without encountering the delimiter or a closed connection, respectively. """ def __init__(self, bytes_read=None, delimiter=None): msg = 'message exceeded maximum size' if bytes_read is not None: msg += f'. {bytes_read} bytes read' if delimiter is not None: msg += f'. Delimiter not found: {delimiter!r}' super().__init__(msg) class Timeout(socket.timeout, Error): """Inheriting from :exc:`socket.timeout`, Timeout is used to indicate when a socket operation did not complete within the time specified. Raised from any of :class:`BufferedSocket`'s ``recv`` methods. """ def __init__(self, timeout, extra=""): msg = 'socket operation timed out' if timeout is not None: msg += ' after %sms.' % (timeout * 1000) if extra: msg += ' ' + extra super().__init__(msg) class NetstringSocket: """ Reads and writes using the netstring protocol. More info: https://en.wikipedia.org/wiki/Netstring Even more info: http://cr.yp.to/proto/netstrings.txt """ def __init__(self, sock, timeout=DEFAULT_TIMEOUT, maxsize=DEFAULT_MAXSIZE): self.bsock = BufferedSocket(sock) self.timeout = timeout self.maxsize = maxsize self._msgsize_maxsize = len(str(maxsize)) + 1 # len(str()) == log10 def fileno(self): return self.bsock.fileno() def settimeout(self, timeout): self.timeout = timeout def setmaxsize(self, maxsize): self.maxsize = maxsize self._msgsize_maxsize = self._calc_msgsize_maxsize(maxsize) def _calc_msgsize_maxsize(self, maxsize): return len(str(maxsize)) + 1 # len(str()) == log10 def read_ns(self, timeout=_UNSET, maxsize=_UNSET): if timeout is _UNSET: timeout = self.timeout if maxsize is _UNSET: maxsize = self.maxsize msgsize_maxsize = self._msgsize_maxsize else: msgsize_maxsize = self._calc_msgsize_maxsize(maxsize) size_prefix = self.bsock.recv_until(b':', timeout=timeout, maxsize=msgsize_maxsize) try: size = int(size_prefix) except ValueError: raise NetstringInvalidSize('netstring message size must be valid' ' integer, not %r' % size_prefix) if size > maxsize: raise NetstringMessageTooLong(size, maxsize) payload = self.bsock.recv_size(size) if self.bsock.recv(1) != b',': raise NetstringProtocolError("expected trailing ',' after message") return payload def write_ns(self, payload): size = len(payload) if size > self.maxsize: raise NetstringMessageTooLong(size, self.maxsize) data = str(size).encode('ascii') + b':' + payload + b',' self.bsock.send(data) class NetstringProtocolError(Error): "Base class for all of socketutils' Netstring exception types." pass class NetstringInvalidSize(NetstringProtocolError): """NetstringInvalidSize is raised when the ``:``-delimited size prefix of the message does not contain a valid integer. Message showing valid size:: 5:hello, Here the ``5`` is the size. Anything in this prefix position that is not parsable as a Python integer (i.e., :class:`int`) will raise this exception. """ def __init__(self, msg): super().__init__(msg) class NetstringMessageTooLong(NetstringProtocolError): """NetstringMessageTooLong is raised when the size prefix contains a valid integer, but that integer is larger than the :class:`NetstringSocket`'s configured *maxsize*. When this exception is raised, it's recommended to simply close the connection instead of trying to recover. """ def __init__(self, size, maxsize): msg = ('netstring message length exceeds configured maxsize: %s > %s' % (size, maxsize)) super().__init__(msg) """ attrs worth adding/passing through: properties: type, proto For its main functionality, BufferedSocket can wrap any object that has the following methods: - gettimeout() - settimeout() - recv(size) - send(data) The following methods are passed through: ... """ # TODO: buffered socket check socket.type == SOCK_STREAM? # TODO: make recv_until support taking a regex # TODO: including the delimiter in the recv_until return is not # necessary, as ConnectionClosed differentiates empty messages # from socket closes. boltons-25.0.0/boltons/statsutils.py000066400000000000000000000723431475005545200175470ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """``statsutils`` provides tools aimed primarily at descriptive statistics for data analysis, such as :func:`mean` (average), :func:`median`, :func:`variance`, and many others, The :class:`Stats` type provides all the main functionality of the ``statsutils`` module. A :class:`Stats` object wraps a given dataset, providing all statistical measures as property attributes. These attributes cache their results, which allows efficient computation of multiple measures, as many measures rely on other measures. For example, relative standard deviation (:attr:`Stats.rel_std_dev`) relies on both the mean and standard deviation. The Stats object caches those results so no rework is done. The :class:`Stats` type's attributes have module-level counterparts for convenience when the computation reuse advantages do not apply. >>> stats = Stats(range(42)) >>> stats.mean 20.5 >>> mean(range(42)) 20.5 Statistics is a large field, and ``statsutils`` is focused on a few basic techniques that are useful in software. The following is a brief introduction to those techniques. For a more in-depth introduction, `Statistics for Software `_, an article I wrote on the topic. It introduces key terminology vital to effective usage of statistics. Statistical moments ------------------- Python programmers are probably familiar with the concept of the *mean* or *average*, which gives a rough quantitiative middle value by which a sample can be can be generalized. However, the mean is just the first of four `moment`_-based measures by which a sample or distribution can be measured. The four `Standardized moments`_ are: 1. `Mean`_ - :func:`mean` - theoretical middle value 2. `Variance`_ - :func:`variance` - width of value dispersion 3. `Skewness`_ - :func:`skewness` - symmetry of distribution 4. `Kurtosis`_ - :func:`kurtosis` - "peakiness" or "long-tailed"-ness For more information check out `the Moment article on Wikipedia`_. .. _moment: https://en.wikipedia.org/wiki/Moment_(mathematics) .. _Standardized moments: https://en.wikipedia.org/wiki/Standardized_moment .. _Mean: https://en.wikipedia.org/wiki/Mean .. _Variance: https://en.wikipedia.org/wiki/Variance .. _Skewness: https://en.wikipedia.org/wiki/Skewness .. _Kurtosis: https://en.wikipedia.org/wiki/Kurtosis .. _the Moment article on Wikipedia: https://en.wikipedia.org/wiki/Moment_(mathematics) Keep in mind that while these moments can give a bit more insight into the shape and distribution of data, they do not guarantee a complete picture. Wildly different datasets can have the same values for all four moments, so generalize wisely. Robust statistics ----------------- Moment-based statistics are notorious for being easily skewed by outliers. The whole field of robust statistics aims to mitigate this dilemma. ``statsutils`` also includes several robust statistical methods: * `Median`_ - The middle value of a sorted dataset * `Trimean`_ - Another robust measure of the data's central tendency * `Median Absolute Deviation`_ (MAD) - A robust measure of variability, a natural counterpart to :func:`variance`. * `Trimming`_ - Reducing a dataset to only the middle majority of data is a simple way of making other estimators more robust. .. _Median: https://en.wikipedia.org/wiki/Median .. _Trimean: https://en.wikipedia.org/wiki/Trimean .. _Median Absolute Deviation: https://en.wikipedia.org/wiki/Median_absolute_deviation .. _Trimming: https://en.wikipedia.org/wiki/Trimmed_estimator Online and Offline Statistics ----------------------------- Unrelated to computer networking, `online`_ statistics involve calculating statistics in a `streaming`_ fashion, without all the data being available. The :class:`Stats` type is meant for the more traditional offline statistics when all the data is available. For pure-Python online statistics accumulators, look at the `Lithoxyl`_ system instrumentation package. .. _Online: https://en.wikipedia.org/wiki/Online_algorithm .. _streaming: https://en.wikipedia.org/wiki/Streaming_algorithm .. _Lithoxyl: https://github.com/mahmoud/lithoxyl """ import bisect from math import floor, ceil from collections import Counter class _StatsProperty: def __init__(self, name, func): self.name = name self.func = func self.internal_name = '_' + name doc = func.__doc__ or '' pre_doctest_doc, _, _ = doc.partition('>>>') self.__doc__ = pre_doctest_doc def __get__(self, obj, objtype=None): if obj is None: return self if not obj.data: return obj.default try: return getattr(obj, self.internal_name) except AttributeError: setattr(obj, self.internal_name, self.func(obj)) return getattr(obj, self.internal_name) class Stats: """The ``Stats`` type is used to represent a group of unordered statistical datapoints for calculations such as mean, median, and variance. Args: data (list): List or other iterable containing numeric values. default (float): A value to be returned when a given statistical measure is not defined. 0.0 by default, but ``float('nan')`` is appropriate for stricter applications. use_copy (bool): By default Stats objects copy the initial data into a new list to avoid issues with modifications. Pass ``False`` to disable this behavior. is_sorted (bool): Presorted data can skip an extra sorting step for a little speed boost. Defaults to False. """ def __init__(self, data, default=0.0, use_copy=True, is_sorted=False): self._use_copy = use_copy self._is_sorted = is_sorted if use_copy: self.data = list(data) else: self.data = data self.default = default cls = self.__class__ self._prop_attr_names = [a for a in dir(self) if isinstance(getattr(cls, a, None), _StatsProperty)] self._pearson_precision = 0 def __len__(self): return len(self.data) def __iter__(self): return iter(self.data) def _get_sorted_data(self): """When using a copy of the data, it's better to have that copy be sorted, but we do it lazily using this method, in case no sorted measures are used. I.e., if median is never called, sorting would be a waste. When not using a copy, it's presumed that all optimizations are on the user. """ if not self._use_copy: return sorted(self.data) elif not self._is_sorted: self.data.sort() return self.data def clear_cache(self): """``Stats`` objects automatically cache intermediary calculations that can be reused. For instance, accessing the ``std_dev`` attribute after the ``variance`` attribute will be significantly faster for medium-to-large datasets. If you modify the object by adding additional data points, call this function to have the cached statistics recomputed. """ for attr_name in self._prop_attr_names: attr_name = getattr(self.__class__, attr_name).internal_name if not hasattr(self, attr_name): continue delattr(self, attr_name) return def _calc_count(self): """The number of items in this Stats object. Returns the same as :func:`len` on a Stats object, but provided for pandas terminology parallelism. >>> Stats(range(20)).count 20 """ return len(self.data) count = _StatsProperty('count', _calc_count) def _calc_mean(self): """ The arithmetic mean, or "average". Sum of the values divided by the number of values. >>> mean(range(20)) 9.5 >>> mean(list(range(19)) + [949]) # 949 is an arbitrary outlier 56.0 """ return sum(self.data, 0.0) / len(self.data) mean = _StatsProperty('mean', _calc_mean) def _calc_max(self): """ The maximum value present in the data. >>> Stats([2, 1, 3]).max 3 """ if self._is_sorted: return self.data[-1] return max(self.data) max = _StatsProperty('max', _calc_max) def _calc_min(self): """ The minimum value present in the data. >>> Stats([2, 1, 3]).min 1 """ if self._is_sorted: return self.data[0] return min(self.data) min = _StatsProperty('min', _calc_min) def _calc_median(self): """ The median is either the middle value or the average of the two middle values of a sample. Compared to the mean, it's generally more resilient to the presence of outliers in the sample. >>> median([2, 1, 3]) 2 >>> median(range(97)) 48 >>> median(list(range(96)) + [1066]) # 1066 is an arbitrary outlier 48 """ return self._get_quantile(self._get_sorted_data(), 0.5) median = _StatsProperty('median', _calc_median) def _calc_iqr(self): """Inter-quartile range (IQR) is the difference between the 75th percentile and 25th percentile. IQR is a robust measure of dispersion, like standard deviation, but safer to compare between datasets, as it is less influenced by outliers. >>> iqr([1, 2, 3, 4, 5]) 2 >>> iqr(range(1001)) 500 """ return self.get_quantile(0.75) - self.get_quantile(0.25) iqr = _StatsProperty('iqr', _calc_iqr) def _calc_trimean(self): """The trimean is a robust measure of central tendency, like the median, that takes the weighted average of the median and the upper and lower quartiles. >>> trimean([2, 1, 3]) 2.0 >>> trimean(range(97)) 48.0 >>> trimean(list(range(96)) + [1066]) # 1066 is an arbitrary outlier 48.0 """ sorted_data = self._get_sorted_data() gq = lambda q: self._get_quantile(sorted_data, q) return (gq(0.25) + (2 * gq(0.5)) + gq(0.75)) / 4.0 trimean = _StatsProperty('trimean', _calc_trimean) def _calc_variance(self): """\ Variance is the average of the squares of the difference between each value and the mean. >>> variance(range(97)) 784.0 """ global mean # defined elsewhere in this file return mean(self._get_pow_diffs(2)) variance = _StatsProperty('variance', _calc_variance) def _calc_std_dev(self): """\ Standard deviation. Square root of the variance. >>> std_dev(range(97)) 28.0 """ return self.variance ** 0.5 std_dev = _StatsProperty('std_dev', _calc_std_dev) def _calc_median_abs_dev(self): """\ Median Absolute Deviation is a robust measure of statistical dispersion: http://en.wikipedia.org/wiki/Median_absolute_deviation >>> median_abs_dev(range(97)) 24.0 """ global median # defined elsewhere in this file sorted_vals = sorted(self.data) x = float(median(sorted_vals)) return median([abs(x - v) for v in sorted_vals]) median_abs_dev = _StatsProperty('median_abs_dev', _calc_median_abs_dev) mad = median_abs_dev # convenience def _calc_rel_std_dev(self): """\ Standard deviation divided by the absolute value of the average. http://en.wikipedia.org/wiki/Relative_standard_deviation >>> print('%1.3f' % rel_std_dev(range(97))) 0.583 """ abs_mean = abs(self.mean) if abs_mean: return self.std_dev / abs_mean else: return self.default rel_std_dev = _StatsProperty('rel_std_dev', _calc_rel_std_dev) def _calc_skewness(self): """\ Indicates the asymmetry of a curve. Positive values mean the bulk of the values are on the left side of the average and vice versa. http://en.wikipedia.org/wiki/Skewness See the module docstring for more about statistical moments. >>> skewness(range(97)) # symmetrical around 48.0 0.0 >>> left_skewed = skewness(list(range(97)) + list(range(10))) >>> right_skewed = skewness(list(range(97)) + list(range(87, 97))) >>> round(left_skewed, 3), round(right_skewed, 3) (0.114, -0.114) """ data, s_dev = self.data, self.std_dev if len(data) > 1 and s_dev > 0: return (sum(self._get_pow_diffs(3)) / float((len(data) - 1) * (s_dev ** 3))) else: return self.default skewness = _StatsProperty('skewness', _calc_skewness) def _calc_kurtosis(self): """\ Indicates how much data is in the tails of the distribution. The result is always positive, with the normal "bell-curve" distribution having a kurtosis of 3. http://en.wikipedia.org/wiki/Kurtosis See the module docstring for more about statistical moments. >>> kurtosis(range(9)) 1.99125 With a kurtosis of 1.99125, [0, 1, 2, 3, 4, 5, 6, 7, 8] is more centrally distributed than the normal curve. """ data, s_dev = self.data, self.std_dev if len(data) > 1 and s_dev > 0: return (sum(self._get_pow_diffs(4)) / float((len(data) - 1) * (s_dev ** 4))) else: return 0.0 kurtosis = _StatsProperty('kurtosis', _calc_kurtosis) def _calc_pearson_type(self): precision = self._pearson_precision skewness = self.skewness kurtosis = self.kurtosis beta1 = skewness ** 2.0 beta2 = kurtosis * 1.0 # TODO: range checks? c0 = (4 * beta2) - (3 * beta1) c1 = skewness * (beta2 + 3) c2 = (2 * beta2) - (3 * beta1) - 6 if round(c1, precision) == 0: if round(beta2, precision) == 3: return 0 # Normal else: if beta2 < 3: return 2 # Symmetric Beta elif beta2 > 3: return 7 elif round(c2, precision) == 0: return 3 # Gamma else: k = c1 ** 2 / (4 * c0 * c2) if k < 0: return 1 # Beta raise RuntimeError('missed a spot') pearson_type = _StatsProperty('pearson_type', _calc_pearson_type) @staticmethod def _get_quantile(sorted_data, q): data, n = sorted_data, len(sorted_data) idx = q / 1.0 * (n - 1) idx_f, idx_c = int(floor(idx)), int(ceil(idx)) if idx_f == idx_c: return data[idx_f] return (data[idx_f] * (idx_c - idx)) + (data[idx_c] * (idx - idx_f)) def get_quantile(self, q): """Get a quantile from the dataset. Quantiles are floating point values between ``0.0`` and ``1.0``, with ``0.0`` representing the minimum value in the dataset and ``1.0`` representing the maximum. ``0.5`` represents the median: >>> Stats(range(100)).get_quantile(0.5) 49.5 """ q = float(q) if not 0.0 <= q <= 1.0: raise ValueError('expected q between 0.0 and 1.0, not %r' % q) elif not self.data: return self.default return self._get_quantile(self._get_sorted_data(), q) def get_zscore(self, value): """Get the z-score for *value* in the group. If the standard deviation is 0, 0 inf or -inf will be returned to indicate whether the value is equal to, greater than or below the group's mean. """ mean = self.mean if self.std_dev == 0: if value == mean: return 0 if value > mean: return float('inf') if value < mean: return float('-inf') return (float(value) - mean) / self.std_dev def trim_relative(self, amount=0.15): """A utility function used to cut a proportion of values off each end of a list of values. This has the effect of limiting the effect of outliers. Args: amount (float): A value between 0.0 and 0.5 to trim off of each side of the data. .. note: This operation modifies the data in-place. It does not make or return a copy. """ trim = float(amount) if not 0.0 <= trim < 0.5: raise ValueError('expected amount between 0.0 and 0.5, not %r' % trim) size = len(self.data) size_diff = int(size * trim) if size_diff == 0.0: return self.data = self._get_sorted_data()[size_diff:-size_diff] self.clear_cache() def _get_pow_diffs(self, power): """ A utility function used for calculating statistical moments. """ m = self.mean return [(v - m) ** power for v in self.data] def _get_bin_bounds(self, count=None, with_max=False): if not self.data: return [0.0] # TODO: raise? data = self.data len_data, min_data, max_data = len(data), min(data), max(data) if len_data < 4: if not count: count = len_data dx = (max_data - min_data) / float(count) bins = [min_data + (dx * i) for i in range(count)] elif count is None: # freedman algorithm for fixed-width bin selection q25, q75 = self.get_quantile(0.25), self.get_quantile(0.75) dx = 2 * (q75 - q25) / (len_data ** (1 / 3.0)) bin_count = max(1, int(ceil((max_data - min_data) / dx))) bins = [min_data + (dx * i) for i in range(bin_count + 1)] bins = [b for b in bins if b < max_data] else: dx = (max_data - min_data) / float(count) bins = [min_data + (dx * i) for i in range(count)] if with_max: bins.append(float(max_data)) return bins def get_histogram_counts(self, bins=None, **kw): """Produces a list of ``(bin, count)`` pairs comprising a histogram of the Stats object's data, using fixed-width bins. See :meth:`Stats.format_histogram` for more details. Args: bins (int): maximum number of bins, or list of floating-point bin boundaries. Defaults to the output of Freedman's algorithm. bin_digits (int): Number of digits used to round down the bin boundaries. Defaults to 1. The output of this method can be stored and/or modified, and then passed to :func:`statsutils.format_histogram_counts` to achieve the same text formatting as the :meth:`~Stats.format_histogram` method. This can be useful for snapshotting over time. """ bin_digits = int(kw.pop('bin_digits', 1)) if kw: raise TypeError('unexpected keyword arguments: %r' % kw.keys()) if not bins: bins = self._get_bin_bounds() else: try: bin_count = int(bins) except TypeError: try: bins = [float(x) for x in bins] except Exception: raise ValueError('bins expected integer bin count or list' ' of float bin boundaries, not %r' % bins) if self.min < bins[0]: bins = [self.min] + bins else: bins = self._get_bin_bounds(bin_count) # floor and ceil really should have taken ndigits, like round() round_factor = 10.0 ** bin_digits bins = [floor(b * round_factor) / round_factor for b in bins] bins = sorted(set(bins)) idxs = [bisect.bisect(bins, d) - 1 for d in self.data] count_map = Counter(idxs) bin_counts = [(b, count_map.get(i, 0)) for i, b in enumerate(bins)] return bin_counts def format_histogram(self, bins=None, **kw): """Produces a textual histogram of the data, using fixed-width bins, allowing for simple visualization, even in console environments. >>> data = list(range(20)) + list(range(5, 15)) + [10] >>> print(Stats(data).format_histogram(width=30)) 0.0: 5 ######### 4.4: 8 ############### 8.9: 11 #################### 13.3: 5 ######### 17.8: 2 #### In this histogram, five values are between 0.0 and 4.4, eight are between 4.4 and 8.9, and two values lie between 17.8 and the max. You can specify the number of bins, or provide a list of bin boundaries themselves. If no bins are provided, as in the example above, `Freedman's algorithm`_ for bin selection is used. Args: bins (int): Maximum number of bins for the histogram. Also accepts a list of floating-point bin boundaries. If the minimum boundary is still greater than the minimum value in the data, that boundary will be implicitly added. Defaults to the bin boundaries returned by `Freedman's algorithm`_. bin_digits (int): Number of digits to round each bin to. Note that bins are always rounded down to avoid clipping any data. Defaults to 1. width (int): integer number of columns in the longest line in the histogram. Defaults to console width on Python 3.3+, or 80 if that is not available. format_bin (callable): Called on each bin to create a label for the final output. Use this function to add units, such as "ms" for milliseconds. Should you want something more programmatically reusable, see the :meth:`~Stats.get_histogram_counts` method, the output of is used by format_histogram. The :meth:`~Stats.describe` method is another useful summarization method, albeit less visual. .. _Freedman's algorithm: https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule """ width = kw.pop('width', None) format_bin = kw.pop('format_bin', None) bin_counts = self.get_histogram_counts(bins=bins, **kw) return format_histogram_counts(bin_counts, width=width, format_bin=format_bin) def describe(self, quantiles=None, format=None): """Provides standard summary statistics for the data in the Stats object, in one of several convenient formats. Args: quantiles (list): A list of numeric values to use as quantiles in the resulting summary. All values must be 0.0-1.0, with 0.5 representing the median. Defaults to ``[0.25, 0.5, 0.75]``, representing the standard quartiles. format (str): Controls the return type of the function, with one of three valid values: ``"dict"`` gives back a :class:`dict` with the appropriate keys and values. ``"list"`` is a list of key-value pairs in an order suitable to pass to an OrderedDict or HTML table. ``"text"`` converts the values to text suitable for printing, as seen below. Here is the information returned by a default ``describe``, as presented in the ``"text"`` format: >>> stats = Stats(range(1, 8)) >>> print(stats.describe(format='text')) count: 7 mean: 4.0 std_dev: 2.0 mad: 2.0 min: 1 0.25: 2.5 0.5: 4 0.75: 5.5 max: 7 For more advanced descriptive statistics, check out my blog post on the topic `Statistics for Software `_. """ if format is None: format = 'dict' elif format not in ('dict', 'list', 'text'): raise ValueError('invalid format for describe,' ' expected one of "dict"/"list"/"text", not %r' % format) quantiles = quantiles or [0.25, 0.5, 0.75] q_items = [] for q in quantiles: q_val = self.get_quantile(q) q_items.append((str(q), q_val)) items = [('count', self.count), ('mean', self.mean), ('std_dev', self.std_dev), ('mad', self.mad), ('min', self.min)] items.extend(q_items) items.append(('max', self.max)) if format == 'dict': ret = dict(items) elif format == 'list': ret = items elif format == 'text': ret = '\n'.join(['{}{}'.format((label + ':').ljust(10), val) for label, val in items]) return ret def describe(data, quantiles=None, format=None): """A convenience function to get standard summary statistics useful for describing most data. See :meth:`Stats.describe` for more details. >>> print(describe(range(7), format='text')) count: 7 mean: 3.0 std_dev: 2.0 mad: 2.0 min: 0 0.25: 1.5 0.5: 3 0.75: 4.5 max: 6 See :meth:`Stats.format_histogram` for another very useful summarization that uses textual visualization. """ return Stats(data).describe(quantiles=quantiles, format=format) def _get_conv_func(attr_name): def stats_helper(data, default=0.0): return getattr(Stats(data, default=default, use_copy=False), attr_name) return stats_helper for attr_name, attr in list(Stats.__dict__.items()): if isinstance(attr, _StatsProperty): if attr_name in ('max', 'min', 'count'): # don't shadow builtins continue if attr_name in ('mad',): # convenience aliases continue func = _get_conv_func(attr_name) func.__doc__ = attr.func.__doc__ globals()[attr_name] = func delattr(Stats, '_calc_' + attr_name) # cleanup del attr del attr_name del func def format_histogram_counts(bin_counts, width=None, format_bin=None): """The formatting logic behind :meth:`Stats.format_histogram`, which takes the output of :meth:`Stats.get_histogram_counts`, and passes them to this function. Args: bin_counts (list): A list of bin values to counts. width (int): Number of character columns in the text output, defaults to 80 or console width in Python 3.3+. format_bin (callable): Used to convert bin values into string labels. """ lines = [] if not format_bin: format_bin = lambda v: v if not width: try: import shutil # python 3 convenience width = shutil.get_terminal_size()[0] except Exception: width = 80 bins = [b for b, _ in bin_counts] count_max = max([count for _, count in bin_counts]) count_cols = len(str(count_max)) labels = ['%s' % format_bin(b) for b in bins] label_cols = max([len(l) for l in labels]) tmp_line = '{}: {} #'.format('x' * label_cols, count_max) bar_cols = max(width - len(tmp_line), 3) line_k = float(bar_cols) / count_max tmpl = "{label:>{label_cols}}: {count:>{count_cols}} {bar}" for label, (bin_val, count) in zip(labels, bin_counts): bar_len = int(round(count * line_k)) bar = ('#' * bar_len) or '|' line = tmpl.format(label=label, label_cols=label_cols, count=count, count_cols=count_cols, bar=bar) lines.append(line) return '\n'.join(lines) boltons-25.0.0/boltons/strutils.py000066400000000000000000001251101475005545200172100ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """So much practical programming involves string manipulation, which Python readily accommodates. Still, there are dozens of basic and common capabilities missing from the standard library, several of them provided by ``strutils``. """ import builtins import re import sys import uuid import zlib import string import unicodedata import collections from collections.abc import Mapping from gzip import GzipFile from html.parser import HTMLParser from html import entities as htmlentitydefs from io import BytesIO as StringIO __all__ = ['camel2under', 'under2camel', 'slugify', 'split_punct_ws', 'unit_len', 'ordinalize', 'cardinalize', 'pluralize', 'singularize', 'asciify', 'is_ascii', 'is_uuid', 'html2text', 'strip_ansi', 'bytes2human', 'find_hashtags', 'a10n', 'gzip_bytes', 'gunzip_bytes', 'iter_splitlines', 'indent', 'escape_shell_args', 'args2cmd', 'args2sh', 'parse_int_list', 'format_int_list', 'complement_int_list', 'int_ranges_from_int_list', 'MultiReplace', 'multi_replace', 'unwrap_text', 'removeprefix'] _punct_ws_str = string.punctuation + string.whitespace _punct_re = re.compile('[' + _punct_ws_str + ']+') _camel2under_re = re.compile('((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z]))') def camel2under(camel_string): """Converts a camelcased string to underscores. Useful for turning a class name into a function name. >>> camel2under('BasicParseTest') 'basic_parse_test' """ return _camel2under_re.sub(r'_\1', camel_string).lower() def under2camel(under_string): """Converts an underscored string to camelcased. Useful for turning a function name into a class name. >>> under2camel('complex_tokenizer') 'ComplexTokenizer' """ return ''.join(w.capitalize() or '_' for w in under_string.split('_')) def slugify(text, delim='_', lower=True, ascii=False): """ A basic function that turns text full of scary characters (i.e., punctuation and whitespace), into a relatively safe lowercased string separated only by the delimiter specified by *delim*, which defaults to ``_``. The *ascii* convenience flag will :func:`asciify` the slug if you require ascii-only slugs. >>> slugify('First post! Hi!!!!~1 ') 'first_post_hi_1' >>> slugify("Kurt Gödel's pretty cool.", ascii=True) == \ b'kurt_goedel_s_pretty_cool' True """ ret = delim.join(split_punct_ws(text)) or delim if text else '' if ascii: ret = asciify(ret) if lower: ret = ret.lower() return ret def split_punct_ws(text): """While :meth:`str.split` will split on whitespace, :func:`split_punct_ws` will split on punctuation and whitespace. This used internally by :func:`slugify`, above. >>> split_punct_ws('First post! Hi!!!!~1 ') ['First', 'post', 'Hi', '1'] """ return [w for w in _punct_re.split(text) if w] def unit_len(sized_iterable, unit_noun='item'): # TODO: len_units()/unitize()? """Returns a plain-English description of an iterable's :func:`len()`, conditionally pluralized with :func:`cardinalize`, detailed below. >>> print(unit_len(range(10), 'number')) 10 numbers >>> print(unit_len('aeiou', 'vowel')) 5 vowels >>> print(unit_len([], 'worry')) No worries """ count = len(sized_iterable) units = cardinalize(unit_noun, count) if count: return f'{count} {units}' return f'No {units}' _ORDINAL_MAP = {'1': 'st', '2': 'nd', '3': 'rd'} # 'th' is the default def ordinalize(number, ext_only=False): """Turns *number* into its cardinal form, i.e., 1st, 2nd, 3rd, 4th, etc. If the last character isn't a digit, it returns the string value unchanged. Args: number (int or str): Number to be cardinalized. ext_only (bool): Whether to return only the suffix. Default ``False``. >>> print(ordinalize(1)) 1st >>> print(ordinalize(3694839230)) 3694839230th >>> print(ordinalize('hi')) hi >>> print(ordinalize(1515)) 1515th """ numstr, ext = str(number), '' if numstr and numstr[-1] in string.digits: try: # first check for teens if numstr[-2] == '1': ext = 'th' else: # all other cases ext = _ORDINAL_MAP.get(numstr[-1], 'th') except IndexError: # single digit numbers (will reach here based on [-2] above) ext = _ORDINAL_MAP.get(numstr[-1], 'th') if ext_only: return ext else: return numstr + ext def cardinalize(unit_noun, count): """Conditionally pluralizes a singular word *unit_noun* if *count* is not one, preserving case when possible. >>> vowels = 'aeiou' >>> print(len(vowels), cardinalize('vowel', len(vowels))) 5 vowels >>> print(3, cardinalize('Wish', 3)) 3 Wishes """ if count == 1: return unit_noun return pluralize(unit_noun) def singularize(word): """Semi-intelligently converts an English plural *word* to its singular form, preserving case pattern. >>> singularize('chances') 'chance' >>> singularize('Activities') 'Activity' >>> singularize('Glasses') 'Glass' >>> singularize('FEET') 'FOOT' """ orig_word, word = word, word.strip().lower() if not word or word in _IRR_S2P: return orig_word irr_singular = _IRR_P2S.get(word) if irr_singular: singular = irr_singular elif not word.endswith('s'): return orig_word elif len(word) == 2: singular = word[:-1] # or just return word? elif word.endswith('ies') and word[-4:-3] not in 'aeiou': singular = word[:-3] + 'y' elif word.endswith('es') and word[-3] == 's': singular = word[:-2] else: singular = word[:-1] return _match_case(orig_word, singular) def pluralize(word): """Semi-intelligently converts an English *word* from singular form to plural, preserving case pattern. >>> pluralize('friend') 'friends' >>> pluralize('enemy') 'enemies' >>> pluralize('Sheep') 'Sheep' """ orig_word, word = word, word.strip().lower() if not word or word in _IRR_P2S: return orig_word irr_plural = _IRR_S2P.get(word) if irr_plural: plural = irr_plural elif word.endswith('y') and word[-2:-1] not in 'aeiou': plural = word[:-1] + 'ies' elif word[-1] == 's' or word.endswith('ch') or word.endswith('sh'): plural = word if word.endswith('es') else word + 'es' else: plural = word + 's' return _match_case(orig_word, plural) def _match_case(master, disciple): if not master.strip(): return disciple if master.lower() == master: return disciple.lower() elif master.upper() == master: return disciple.upper() elif master.title() == master: return disciple.title() return disciple # Singular to plural map of irregular pluralizations _IRR_S2P = {'addendum': 'addenda', 'alga': 'algae', 'alumna': 'alumnae', 'alumnus': 'alumni', 'analysis': 'analyses', 'antenna': 'antennae', 'appendix': 'appendices', 'axis': 'axes', 'bacillus': 'bacilli', 'bacterium': 'bacteria', 'basis': 'bases', 'beau': 'beaux', 'bison': 'bison', 'bureau': 'bureaus', 'cactus': 'cacti', 'calf': 'calves', 'child': 'children', 'corps': 'corps', 'corpus': 'corpora', 'crisis': 'crises', 'criterion': 'criteria', 'curriculum': 'curricula', 'datum': 'data', 'deer': 'deer', 'diagnosis': 'diagnoses', 'die': 'dice', 'dwarf': 'dwarves', 'echo': 'echoes', 'elf': 'elves', 'ellipsis': 'ellipses', 'embargo': 'embargoes', 'emphasis': 'emphases', 'erratum': 'errata', 'fireman': 'firemen', 'fish': 'fish', 'focus': 'foci', 'foot': 'feet', 'formula': 'formulae', 'formula': 'formulas', 'fungus': 'fungi', 'genus': 'genera', 'goose': 'geese', 'half': 'halves', 'hero': 'heroes', 'hippopotamus': 'hippopotami', 'hoof': 'hooves', 'hypothesis': 'hypotheses', 'index': 'indices', 'knife': 'knives', 'leaf': 'leaves', 'life': 'lives', 'loaf': 'loaves', 'louse': 'lice', 'man': 'men', 'matrix': 'matrices', 'means': 'means', 'medium': 'media', 'memorandum': 'memoranda', 'millennium': 'milennia', 'moose': 'moose', 'mosquito': 'mosquitoes', 'mouse': 'mice', 'nebula': 'nebulae', 'neurosis': 'neuroses', 'nucleus': 'nuclei', 'oasis': 'oases', 'octopus': 'octopi', 'offspring': 'offspring', 'ovum': 'ova', 'ox': 'oxen', 'paralysis': 'paralyses', 'parenthesis': 'parentheses', 'person': 'people', 'phenomenon': 'phenomena', 'potato': 'potatoes', 'radius': 'radii', 'scarf': 'scarves', 'scissors': 'scissors', 'self': 'selves', 'sense': 'senses', 'series': 'series', 'sheep': 'sheep', 'shelf': 'shelves', 'species': 'species', 'stimulus': 'stimuli', 'stratum': 'strata', 'syllabus': 'syllabi', 'symposium': 'symposia', 'synopsis': 'synopses', 'synthesis': 'syntheses', 'tableau': 'tableaux', 'that': 'those', 'thesis': 'theses', 'thief': 'thieves', 'this': 'these', 'tomato': 'tomatoes', 'tooth': 'teeth', 'torpedo': 'torpedoes', 'vertebra': 'vertebrae', 'veto': 'vetoes', 'vita': 'vitae', 'watch': 'watches', 'wife': 'wives', 'wolf': 'wolves', 'woman': 'women'} # Reverse index of the above _IRR_P2S = {v: k for k, v in _IRR_S2P.items()} HASHTAG_RE = re.compile(r"(?:^|\s)[##]{1}(\w+)", re.UNICODE) def find_hashtags(string): """Finds and returns all hashtags in a string, with the hashmark removed. Supports full-width hashmarks for Asian languages and does not false-positive on URL anchors. >>> find_hashtags('#atag http://asite/#ananchor') ['atag'] ``find_hashtags`` also works with unicode hashtags. """ # the following works, doctest just struggles with it # >>> find_hashtags(u"can't get enough of that dignity chicken #肯德基 woo") # [u'\u80af\u5fb7\u57fa'] return HASHTAG_RE.findall(string) def a10n(string): """That thing where "internationalization" becomes "i18n", what's it called? Abbreviation? Oh wait, no: ``a10n``. (It's actually a form of `numeronym`_.) >>> a10n('abbreviation') 'a10n' >>> a10n('internationalization') 'i18n' >>> a10n('') '' .. _numeronym: http://en.wikipedia.org/wiki/Numeronym """ if len(string) < 3: return string return f'{string[0]}{len(string[1:-1])}{string[-1]}' # Based on https://en.wikipedia.org/wiki/ANSI_escape_code#Escape_sequences ANSI_SEQUENCES = re.compile(r''' \x1B # Sequence starts with ESC, i.e. hex 0x1B (?: [@-Z\\-_] # Second byte: # all 0x40–0x5F range but CSI char, i.e ASCII @A–Z\]^_ | # Or \[ # CSI sequences, starting with [ [0-?]* # Parameter bytes: # range 0x30–0x3F, ASCII 0–9:;<=>? [ -/]* # Intermediate bytes: # range 0x20–0x2F, ASCII space and !"#$%&'()*+,-./ [@-~] # Final byte # range 0x40–0x7E, ASCII @A–Z[\]^_`a–z{|}~ ) ''', re.VERBOSE) def strip_ansi(text): """Strips ANSI escape codes from *text*. Useful for the occasional time when a log or redirected output accidentally captures console color codes and the like. >>> strip_ansi('\x1b[0m\x1b[1;36mart\x1b[46;34m') 'art' Supports str, bytes and bytearray content as input. Returns the same type as the input. There's a lot of ANSI art available for testing on `sixteencolors.net`_. This function does not interpret or render ANSI art, but you can do so with `ansi2img`_ or `escapes.js`_. .. _sixteencolors.net: http://sixteencolors.net .. _ansi2img: http://www.bedroomlan.org/projects/ansi2img .. _escapes.js: https://github.com/atdt/escapes.js """ # TODO: move to cliutils.py # Transform any ASCII-like content to unicode to allow regex to match, and # save input type for later. target_type = None # Unicode type aliased to str is code-smell for Boltons in Python 3 env. if isinstance(text, (bytes, bytearray)): target_type = type(text) text = text.decode('utf-8') cleaned = ANSI_SEQUENCES.sub('', text) # Transform back the result to the same bytearray type provided by the user. if target_type and target_type != type(cleaned): cleaned = target_type(cleaned, 'utf-8') return cleaned def asciify(text, ignore=False): """Converts a unicode or bytestring, *text*, into a bytestring with just ascii characters. Performs basic deaccenting for all you Europhiles out there. Also, a gentle reminder that this is a **utility**, primarily meant for slugification. Whenever possible, make your application work **with** unicode, not against it. Args: text (str): The string to be asciified. ignore (bool): Configures final encoding to ignore remaining unasciified string instead of replacing it. >>> asciify('Beyoncé') == b'Beyonce' True """ try: try: return text.encode('ascii') except UnicodeDecodeError: # this usually means you passed in a non-unicode string text = text.decode('utf-8') return text.encode('ascii') except UnicodeEncodeError: mode = 'replace' if ignore: mode = 'ignore' transd = unicodedata.normalize('NFKD', text.translate(DEACCENT_MAP)) ret = transd.encode('ascii', mode) return ret def is_ascii(text): """Check if a string or bytestring, *text*, is composed of ascii characters only. Raises :exc:`ValueError` if argument is not text. Args: text (str): The string to be checked. >>> is_ascii('Beyoncé') False >>> is_ascii('Beyonce') True """ if isinstance(text, str): try: text.encode('ascii') except UnicodeEncodeError: return False elif isinstance(text, bytes): try: text.decode('ascii') except UnicodeDecodeError: return False else: raise ValueError('expected text or bytes, not %r' % type(text)) return True class DeaccenterDict(dict): "A small caching dictionary for deaccenting." def __missing__(self, key): ch = self.get(key) if ch is not None: return ch try: de = unicodedata.decomposition(chr(key)) p1, _, p2 = de.rpartition(' ') if int(p2, 16) == 0x308: ch = self.get(key) else: ch = int(p1, 16) except (IndexError, ValueError): ch = self.get(key, key) self[key] = ch return ch # http://chmullig.com/2009/12/python-unicode-ascii-ifier/ # For something more complete, investigate the unidecode # or isounidecode packages, which are capable of performing # crude transliteration. _BASE_DEACCENT_MAP = { 0xc6: "AE", # Æ LATIN CAPITAL LETTER AE 0xd0: "D", # Ð LATIN CAPITAL LETTER ETH 0xd8: "OE", # Ø LATIN CAPITAL LETTER O WITH STROKE 0xde: "Th", # Þ LATIN CAPITAL LETTER THORN 0xc4: 'Ae', # Ä LATIN CAPITAL LETTER A WITH DIAERESIS 0xd6: 'Oe', # Ö LATIN CAPITAL LETTER O WITH DIAERESIS 0xdc: 'Ue', # Ü LATIN CAPITAL LETTER U WITH DIAERESIS 0xc0: "A", # À LATIN CAPITAL LETTER A WITH GRAVE 0xc1: "A", # Á LATIN CAPITAL LETTER A WITH ACUTE 0xc3: "A", # Ã LATIN CAPITAL LETTER A WITH TILDE 0xc7: "C", # Ç LATIN CAPITAL LETTER C WITH CEDILLA 0xc8: "E", # È LATIN CAPITAL LETTER E WITH GRAVE 0xc9: "E", # É LATIN CAPITAL LETTER E WITH ACUTE 0xca: "E", # Ê LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0xcc: "I", # Ì LATIN CAPITAL LETTER I WITH GRAVE 0xcd: "I", # Í LATIN CAPITAL LETTER I WITH ACUTE 0xd2: "O", # Ò LATIN CAPITAL LETTER O WITH GRAVE 0xd3: "O", # Ó LATIN CAPITAL LETTER O WITH ACUTE 0xd5: "O", # Õ LATIN CAPITAL LETTER O WITH TILDE 0xd9: "U", # Ù LATIN CAPITAL LETTER U WITH GRAVE 0xda: "U", # Ú LATIN CAPITAL LETTER U WITH ACUTE 0xdf: "ss", # ß LATIN SMALL LETTER SHARP S 0xe6: "ae", # æ LATIN SMALL LETTER AE 0xf0: "d", # ð LATIN SMALL LETTER ETH 0xf8: "oe", # ø LATIN SMALL LETTER O WITH STROKE 0xfe: "th", # þ LATIN SMALL LETTER THORN, 0xe4: 'ae', # ä LATIN SMALL LETTER A WITH DIAERESIS 0xf6: 'oe', # ö LATIN SMALL LETTER O WITH DIAERESIS 0xfc: 'ue', # ü LATIN SMALL LETTER U WITH DIAERESIS 0xe0: "a", # à LATIN SMALL LETTER A WITH GRAVE 0xe1: "a", # á LATIN SMALL LETTER A WITH ACUTE 0xe3: "a", # ã LATIN SMALL LETTER A WITH TILDE 0xe7: "c", # ç LATIN SMALL LETTER C WITH CEDILLA 0xe8: "e", # è LATIN SMALL LETTER E WITH GRAVE 0xe9: "e", # é LATIN SMALL LETTER E WITH ACUTE 0xea: "e", # ê LATIN SMALL LETTER E WITH CIRCUMFLEX 0xec: "i", # ì LATIN SMALL LETTER I WITH GRAVE 0xed: "i", # í LATIN SMALL LETTER I WITH ACUTE 0xf2: "o", # ò LATIN SMALL LETTER O WITH GRAVE 0xf3: "o", # ó LATIN SMALL LETTER O WITH ACUTE 0xf5: "o", # õ LATIN SMALL LETTER O WITH TILDE 0xf9: "u", # ù LATIN SMALL LETTER U WITH GRAVE 0xfa: "u", # ú LATIN SMALL LETTER U WITH ACUTE 0x2018: "'", # ‘ LEFT SINGLE QUOTATION MARK 0x2019: "'", # ’ RIGHT SINGLE QUOTATION MARK 0x201c: '"', # “ LEFT DOUBLE QUOTATION MARK 0x201d: '"', # ” RIGHT DOUBLE QUOTATION MARK } DEACCENT_MAP = DeaccenterDict(_BASE_DEACCENT_MAP) _SIZE_SYMBOLS = ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y') _SIZE_BOUNDS = [(1024 ** i, sym) for i, sym in enumerate(_SIZE_SYMBOLS)] _SIZE_RANGES = list(zip(_SIZE_BOUNDS, _SIZE_BOUNDS[1:])) def bytes2human(nbytes, ndigits=0): """Turns an integer value of *nbytes* into a human readable format. Set *ndigits* to control how many digits after the decimal point should be shown (default ``0``). >>> bytes2human(128991) '126K' >>> bytes2human(100001221) '95M' >>> bytes2human(0, 2) '0.00B' """ abs_bytes = abs(nbytes) for (size, symbol), (next_size, next_symbol) in _SIZE_RANGES: if abs_bytes <= next_size: break hnbytes = float(nbytes) / size return '{hnbytes:.{ndigits}f}{symbol}'.format(hnbytes=hnbytes, ndigits=ndigits, symbol=symbol) class HTMLTextExtractor(HTMLParser): def __init__(self): self.reset() self.strict = False self.convert_charrefs = True self.result = [] def handle_data(self, d): self.result.append(d) def handle_charref(self, number): if number[0] == 'x' or number[0] == 'X': codepoint = int(number[1:], 16) else: codepoint = int(number) self.result.append(chr(codepoint)) def handle_entityref(self, name): try: codepoint = htmlentitydefs.name2codepoint[name] except KeyError: self.result.append('&' + name + ';') else: self.result.append(chr(codepoint)) def get_text(self): return ''.join(self.result) def html2text(html): """Strips tags from HTML text, returning markup-free text. Also, does a best effort replacement of entities like " " >>> r = html2text(u'Test &(\u0394ημώ)') >>> r == u'Test &(\u0394\u03b7\u03bc\u03ce)' True """ # based on answers to http://stackoverflow.com/questions/753052/ s = HTMLTextExtractor() s.feed(html) return s.get_text() _EMPTY_GZIP_BYTES = b'\x1f\x8b\x08\x089\xf3\xb9U\x00\x03empty\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00' _NON_EMPTY_GZIP_BYTES = b'\x1f\x8b\x08\x08\xbc\xf7\xb9U\x00\x03not_empty\x00K\xaa,I-N\xcc\xc8\xafT\xe4\x02\x00\xf3nb\xbf\x0b\x00\x00\x00' def gunzip_bytes(bytestring): """The :mod:`gzip` module is great if you have a file or file-like object, but what if you just have bytes. StringIO is one possibility, but it's often faster, easier, and simpler to just use this one-liner. Use this tried-and-true utility function to decompress gzip from bytes. >>> gunzip_bytes(_EMPTY_GZIP_BYTES) == b'' True >>> gunzip_bytes(_NON_EMPTY_GZIP_BYTES).rstrip() == b'bytesahoy!' True """ return zlib.decompress(bytestring, 16 + zlib.MAX_WBITS) def gzip_bytes(bytestring, level=6): """Turn some bytes into some compressed bytes. >>> len(gzip_bytes(b'a' * 10000)) 46 Args: bytestring (bytes): Bytes to be compressed level (int): An integer, 1-9, controlling the speed/compression. 1 is fastest, least compressed, 9 is slowest, but most compressed. Note that all levels of gzip are pretty fast these days, though it's not really a competitor in compression, at any level. """ out = StringIO() f = GzipFile(fileobj=out, mode='wb', compresslevel=level) f.write(bytestring) f.close() return out.getvalue() _line_ending_re = re.compile(r'(\r\n|\n|\x0b|\f|\r|\x85|\x2028|\x2029)', re.UNICODE) def iter_splitlines(text): r"""Like :meth:`str.splitlines`, but returns an iterator of lines instead of a list. Also similar to :meth:`file.next`, as that also lazily reads and yields lines from a file. This function works with a variety of line endings, but as always, be careful when mixing line endings within a file. >>> list(iter_splitlines('\nhi\nbye\n')) ['', 'hi', 'bye', ''] >>> list(iter_splitlines('\r\nhi\rbye\r\n')) ['', 'hi', 'bye', ''] >>> list(iter_splitlines('')) [] """ prev_end, len_text = 0, len(text) # print('last: %r' % last_idx) # start, end = None, None for match in _line_ending_re.finditer(text): start, end = match.start(1), match.end(1) # print(start, end) if prev_end <= start: yield text[prev_end:start] if end == len_text: yield '' prev_end = end tail = text[prev_end:] if tail: yield tail return def indent(text, margin, newline='\n', key=bool): """The missing counterpart to the built-in :func:`textwrap.dedent`. Args: text (str): The text to indent. margin (str): The string to prepend to each line. newline (str): The newline used to rejoin the lines (default: ``\\n``) key (callable): Called on each line to determine whether to indent it. Default: :class:`bool`, to ensure that empty lines do not get whitespace added. """ indented_lines = [(margin + line if key(line) else line) for line in iter_splitlines(text)] return newline.join(indented_lines) def is_uuid(obj, version=4): """Check the argument is either a valid UUID object or string. Args: obj (object): The test target. Strings and UUID objects supported. version (int): The target UUID version, set to 0 to skip version check. >>> is_uuid('e682ccca-5a4c-4ef2-9711-73f9ad1e15ea') True >>> is_uuid('0221f0d9-d4b9-11e5-a478-10ddb1c2feb9') False >>> is_uuid('0221f0d9-d4b9-11e5-a478-10ddb1c2feb9', version=1) True """ if not isinstance(obj, uuid.UUID): try: obj = uuid.UUID(obj) except (TypeError, ValueError, AttributeError): return False if version and obj.version != int(version): return False return True def escape_shell_args(args, sep=' ', style=None): """Returns an escaped version of each string in *args*, according to *style*. Args: args (list): A list of arguments to escape and join together sep (str): The separator used to join the escaped arguments. style (str): The style of escaping to use. Can be one of ``cmd`` or ``sh``, geared toward Windows and Linux/BSD/etc., respectively. If *style* is ``None``, then it is picked according to the system platform. See :func:`args2cmd` and :func:`args2sh` for details and example output for each style. """ if not style: style = 'cmd' if sys.platform == 'win32' else 'sh' if style == 'sh': return args2sh(args, sep=sep) elif style == 'cmd': return args2cmd(args, sep=sep) raise ValueError("style expected one of 'cmd' or 'sh', not %r" % style) _find_sh_unsafe = re.compile(r'[^a-zA-Z0-9_@%+=:,./-]').search def args2sh(args, sep=' '): """Return a shell-escaped string version of *args*, separated by *sep*, based on the rules of sh, bash, and other shells in the Linux/BSD/MacOS ecosystem. >>> print(args2sh(['aa', '[bb]', "cc'cc", 'dd"dd'])) aa '[bb]' 'cc'"'"'cc' 'dd"dd' As you can see, arguments with no special characters are not escaped, arguments with special characters are quoted with single quotes, and single quotes themselves are quoted with double quotes. Double quotes are handled like any other special character. Based on code from the :mod:`pipes`/:mod:`shlex` modules. Also note that :mod:`shlex` and :mod:`argparse` have functions to split and parse strings escaped in this manner. """ ret_list = [] for arg in args: if not arg: ret_list.append("''") continue if _find_sh_unsafe(arg) is None: ret_list.append(arg) continue # use single quotes, and put single quotes into double quotes # the string $'b is then quoted as '$'"'"'b' ret_list.append("'" + arg.replace("'", "'\"'\"'") + "'") return ' '.join(ret_list) def args2cmd(args, sep=' '): r"""Return a shell-escaped string version of *args*, separated by *sep*, using the same rules as the Microsoft C runtime. >>> print(args2cmd(['aa', '[bb]', "cc'cc", 'dd"dd'])) aa [bb] cc'cc dd\"dd As you can see, escaping is through backslashing and not quoting, and double quotes are the only special character. See the comment in the code for more details. Based on internal code from the :mod:`subprocess` module. """ # technique description from subprocess below """ 1) Arguments are delimited by white space, which is either a space or a tab. 2) A string surrounded by double quotation marks is interpreted as a single argument, regardless of white space contained within. A quoted string can be embedded in an argument. 3) A double quotation mark preceded by a backslash is interpreted as a literal double quotation mark. 4) Backslashes are interpreted literally, unless they immediately precede a double quotation mark. 5) If backslashes immediately precede a double quotation mark, every pair of backslashes is interpreted as a literal backslash. If the number of backslashes is odd, the last backslash escapes the next double quotation mark as described in rule 3. See http://msdn.microsoft.com/en-us/library/17w5ykft.aspx or search http://msdn.microsoft.com for "Parsing C++ Command-Line Arguments" """ result = [] needquote = False for arg in args: bs_buf = [] # Add a space to separate this argument from the others if result: result.append(' ') needquote = (" " in arg) or ("\t" in arg) or not arg if needquote: result.append('"') for c in arg: if c == '\\': # Don't know if we need to double yet. bs_buf.append(c) elif c == '"': # Double backslashes. result.append('\\' * len(bs_buf)*2) bs_buf = [] result.append('\\"') else: # Normal char if bs_buf: result.extend(bs_buf) bs_buf = [] result.append(c) # Add remaining backslashes, if any. if bs_buf: result.extend(bs_buf) if needquote: result.extend(bs_buf) result.append('"') return ''.join(result) def parse_int_list(range_string, delim=',', range_delim='-'): """Returns a sorted list of positive integers based on *range_string*. Reverse of :func:`format_int_list`. Args: range_string (str): String of comma separated positive integers or ranges (e.g. '1,2,4-6,8'). Typical of a custom page range string used in printer dialogs. delim (char): Defaults to ','. Separates integers and contiguous ranges of integers. range_delim (char): Defaults to '-'. Indicates a contiguous range of integers. >>> parse_int_list('1,3,5-8,10-11,15') [1, 3, 5, 6, 7, 8, 10, 11, 15] """ output = [] for x in range_string.strip().split(delim): # Range if range_delim in x: range_limits = list(map(int, x.split(range_delim))) output += list(range(min(range_limits), max(range_limits)+1)) # Empty String elif not x: continue # Integer else: output.append(int(x)) return sorted(output) def format_int_list(int_list, delim=',', range_delim='-', delim_space=False): """Returns a sorted range string from a list of positive integers (*int_list*). Contiguous ranges of integers are collapsed to min and max values. Reverse of :func:`parse_int_list`. Args: int_list (list): List of positive integers to be converted into a range string (e.g. [1,2,4,5,6,8]). delim (char): Defaults to ','. Separates integers and contiguous ranges of integers. range_delim (char): Defaults to '-'. Indicates a contiguous range of integers. delim_space (bool): Defaults to ``False``. If ``True``, adds a space after all *delim* characters. >>> format_int_list([1,3,5,6,7,8,10,11,15]) '1,3,5-8,10-11,15' """ output = [] contig_range = collections.deque() for x in sorted(int_list): # Handle current (and first) value. if len(contig_range) < 1: contig_range.append(x) # Handle current value, given multiple previous values are contiguous. elif len(contig_range) > 1: delta = x - contig_range[-1] # Current value is contiguous. if delta == 1: contig_range.append(x) # Current value is non-contiguous. elif delta > 1: range_substr = '{:d}{}{:d}'.format(min(contig_range), range_delim, max(contig_range)) output.append(range_substr) contig_range.clear() contig_range.append(x) # Current value repeated. else: continue # Handle current value, given no previous contiguous integers else: delta = x - contig_range[0] # Current value is contiguous. if delta == 1: contig_range.append(x) # Current value is non-contiguous. elif delta > 1: output.append(f'{contig_range.popleft():d}') contig_range.append(x) # Current value repeated. else: continue # Handle the last value. else: # Last value is non-contiguous. if len(contig_range) == 1: output.append(f'{contig_range.popleft():d}') contig_range.clear() # Last value is part of contiguous range. elif len(contig_range) > 1: range_substr = '{:d}{}{:d}'.format(min(contig_range), range_delim, max(contig_range)) output.append(range_substr) contig_range.clear() if delim_space: output_str = (delim+' ').join(output) else: output_str = delim.join(output) return output_str def complement_int_list( range_string, range_start=0, range_end=None, delim=',', range_delim='-'): """ Returns range string that is the complement of the one provided as *range_string* parameter. These range strings are of the kind produce by :func:`format_int_list`, and parseable by :func:`parse_int_list`. Args: range_string (str): String of comma separated positive integers or ranges (e.g. '1,2,4-6,8'). Typical of a custom page range string used in printer dialogs. range_start (int): A positive integer from which to start the resulting range. Value is inclusive. Defaults to ``0``. range_end (int): A positive integer from which the produced range is stopped. Value is exclusive. Defaults to the maximum value found in the provided ``range_string``. delim (char): Defaults to ','. Separates integers and contiguous ranges of integers. range_delim (char): Defaults to '-'. Indicates a contiguous range of integers. >>> complement_int_list('1,3,5-8,10-11,15') '0,2,4,9,12-14' >>> complement_int_list('1,3,5-8,10-11,15', range_start=0) '0,2,4,9,12-14' >>> complement_int_list('1,3,5-8,10-11,15', range_start=1) '2,4,9,12-14' >>> complement_int_list('1,3,5-8,10-11,15', range_start=2) '2,4,9,12-14' >>> complement_int_list('1,3,5-8,10-11,15', range_start=3) '4,9,12-14' >>> complement_int_list('1,3,5-8,10-11,15', range_end=15) '0,2,4,9,12-14' >>> complement_int_list('1,3,5-8,10-11,15', range_end=14) '0,2,4,9,12-13' >>> complement_int_list('1,3,5-8,10-11,15', range_end=13) '0,2,4,9,12' >>> complement_int_list('1,3,5-8,10-11,15', range_end=20) '0,2,4,9,12-14,16-19' >>> complement_int_list('1,3,5-8,10-11,15', range_end=0) '' >>> complement_int_list('1,3,5-8,10-11,15', range_start=-1) '0,2,4,9,12-14' >>> complement_int_list('1,3,5-8,10-11,15', range_end=-1) '' >>> complement_int_list('1,3,5-8', range_start=1, range_end=1) '' >>> complement_int_list('1,3,5-8', range_start=2, range_end=2) '' >>> complement_int_list('1,3,5-8', range_start=2, range_end=3) '2' >>> complement_int_list('1,3,5-8', range_start=-10, range_end=-5) '' >>> complement_int_list('1,3,5-8', range_start=20, range_end=10) '' >>> complement_int_list('') '' """ int_list = set(parse_int_list(range_string, delim, range_delim)) if range_end is None: if int_list: range_end = max(int_list) + 1 else: range_end = range_start complement_values = set( range(range_end)) - int_list - set(range(range_start)) return format_int_list(complement_values, delim, range_delim) def int_ranges_from_int_list(range_string, delim=',', range_delim='-'): """ Transform a string of ranges (*range_string*) into a tuple of tuples. Args: range_string (str): String of comma separated positive integers or ranges (e.g. '1,2,4-6,8'). Typical of a custom page range string used in printer dialogs. delim (char): Defaults to ','. Separates integers and contiguous ranges of integers. range_delim (char): Defaults to '-'. Indicates a contiguous range of integers. >>> int_ranges_from_int_list('1,3,5-8,10-11,15') ((1, 1), (3, 3), (5, 8), (10, 11), (15, 15)) >>> int_ranges_from_int_list('1') ((1, 1),) >>> int_ranges_from_int_list('') () """ int_tuples = [] # Normalize the range string to our internal format for processing. range_string = format_int_list( parse_int_list(range_string, delim, range_delim)) if range_string: for bounds in range_string.split(','): if '-' in bounds: start, end = bounds.split('-') else: start, end = bounds, bounds int_tuples.append((int(start), int(end))) return tuple(int_tuples) class MultiReplace: """ MultiReplace is a tool for doing multiple find/replace actions in one pass. Given a mapping of values to be replaced it allows for all of the matching values to be replaced in a single pass which can save a lot of performance on very large strings. In addition to simple replace, it also allows for replacing based on regular expressions. Keyword Arguments: :type regex: bool :param regex: Treat search keys as regular expressions [Default: False] :type flags: int :param flags: flags to pass to the regex engine during compile Dictionary Usage:: from boltons import strutils s = strutils.MultiReplace({ 'foo': 'zoo', 'cat': 'hat', 'bat': 'kraken' }) new = s.sub('The foo bar cat ate a bat') new == 'The zoo bar hat ate a kraken' Iterable Usage:: from boltons import strutils s = strutils.MultiReplace([ ('foo', 'zoo'), ('cat', 'hat'), ('bat', 'kraken)' ]) new = s.sub('The foo bar cat ate a bat') new == 'The zoo bar hat ate a kraken' The constructor can be passed a dictionary or other mapping as well as an iterable of tuples. If given an iterable, the substitution will be run in the order the replacement values are specified in the iterable. This is also true if it is given an OrderedDict. If given a dictionary then the order will be non-deterministic:: >>> 'foo bar baz'.replace('foo', 'baz').replace('baz', 'bar') 'bar bar bar' >>> m = MultiReplace({'foo': 'baz', 'baz': 'bar'}) >>> m.sub('foo bar baz') 'baz bar bar' This is because the order of replacement can matter if you're inserting something that might be replaced by a later substitution. Pay attention and if you need to rely on order then consider using a list of tuples instead of a dictionary. """ def __init__(self, sub_map, **kwargs): """Compile any regular expressions that have been passed.""" options = { 'regex': False, 'flags': 0, } options.update(kwargs) self.group_map = {} regex_values = [] if isinstance(sub_map, Mapping): sub_map = sub_map.items() for idx, vals in enumerate(sub_map): group_name = f'group{idx}' if isinstance(vals[0], str): # If we're not treating input strings like a regex, escape it if not options['regex']: exp = re.escape(vals[0]) else: exp = vals[0] else: exp = vals[0].pattern regex_values.append(f'(?P<{group_name}>{exp})') self.group_map[group_name] = vals[1] self.combined_pattern = re.compile( '|'.join(regex_values), flags=options['flags'] ) def _get_value(self, match): """Given a match object find replacement value.""" group_dict = match.groupdict() key = [x for x in group_dict if group_dict[x]][0] return self.group_map[key] def sub(self, text): """ Run substitutions on the input text. Given an input string, run all substitutions given in the constructor. """ return self.combined_pattern.sub(self._get_value, text) def multi_replace(text, sub_map, **kwargs): """ Shortcut function to invoke MultiReplace in a single call. Example Usage:: from boltons.strutils import multi_replace new = multi_replace( 'The foo bar cat ate a bat', {'foo': 'zoo', 'cat': 'hat', 'bat': 'kraken'} ) new == 'The zoo bar hat ate a kraken' """ m = MultiReplace(sub_map, **kwargs) return m.sub(text) def unwrap_text(text, ending='\n\n'): r""" Unwrap text, the natural complement to :func:`textwrap.wrap`. >>> text = "Short \n lines \nwrapped\nsmall.\n\nAnother\nparagraph." >>> unwrap_text(text) 'Short lines wrapped small.\n\nAnother paragraph.' Args: text: A string to unwrap. ending (str): The string to join all unwrapped paragraphs by. Pass ``None`` to get the list. Defaults to '\n\n' for compatibility with Markdown and RST. """ all_grafs = [] cur_graf = [] for line in text.splitlines(): line = line.strip() if line: cur_graf.append(line) else: all_grafs.append(' '.join(cur_graf)) cur_graf = [] if cur_graf: all_grafs.append(' '.join(cur_graf)) if ending is None: return all_grafs return ending.join(all_grafs) def removeprefix(text: str, prefix: str) -> str: r""" Remove `prefix` from start of `text` if present. Backport of `str.removeprefix` for Python versions less than 3.9. Args: text: A string to remove the prefix from. prefix: The string to remove from the beginning of `text`. """ if text.startswith(prefix): return text[len(prefix):] return text boltons-25.0.0/boltons/tableutils.py000066400000000000000000000526421475005545200175000ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """If there is one recurring theme in ``boltons``, it is that Python has excellent datastructures that constitute a good foundation for most quick manipulations, as well as building applications. However, Python usage has grown much faster than builtin data structure power. Python has a growing need for more advanced general-purpose data structures which behave intuitively. The :class:`Table` class is one example. When handed one- or two-dimensional data, it can provide useful, if basic, text and HTML renditions of small to medium sized data. It also heuristically handles recursive data of various formats (lists, dicts, namedtuples, objects). For more advanced :class:`Table`-style manipulation check out the `pandas`_ DataFrame. .. _pandas: http://pandas.pydata.org/ """ from html import escape as html_escape import types from itertools import islice from collections.abc import Sequence, Mapping, MutableSequence try: from .typeutils import make_sentinel _MISSING = make_sentinel(var_name='_MISSING') except ImportError: _MISSING = object() """ Some idle feature thoughts: * shift around column order without rearranging data * gotta make it so you can add additional items, not just initialize with * maybe a shortcut would be to allow adding of Tables to other Tables * what's the perf of preallocating lists and overwriting items versus starting from empty? * is it possible to effectively tell the difference between when a Table is from_data()'d with a single row (list) or with a list of lists? * CSS: white-space pre-line or pre-wrap maybe? * Would be nice to support different backends (currently uses lists exclusively). Sometimes large datasets come in list-of-dicts and list-of-tuples format and it's desirable to cut down processing overhead. TODO: make iterable on rows? """ __all__ = ['Table'] def to_text(obj, maxlen=None): try: text = str(obj) except Exception: try: text = str(repr(obj)) except Exception: text = str(object.__repr__(obj)) if maxlen and len(text) > maxlen: text = text[:maxlen - 3] + '...' # TODO: inverse of ljust/rjust/center return text def escape_html(obj, maxlen=None): text = to_text(obj, maxlen=maxlen) return html_escape(text, quote=True) _DNR = {type(None), bool, complex, float, type(NotImplemented), slice, str, bytes, int, types.FunctionType, types.MethodType, types.BuiltinFunctionType, types.GeneratorType} class UnsupportedData(TypeError): pass class InputType: def __init__(self, *a, **kw): pass def get_entry_seq(self, data_seq, headers): return [self.get_entry(entry, headers) for entry in data_seq] class DictInputType(InputType): def check_type(self, obj): return isinstance(obj, Mapping) def guess_headers(self, obj): return sorted(obj.keys()) def get_entry(self, obj, headers): return [obj.get(h) for h in headers] def get_entry_seq(self, obj, headers): return [[ci.get(h) for h in headers] for ci in obj] class ObjectInputType(InputType): def check_type(self, obj): return type(obj) not in _DNR and hasattr(obj, '__class__') def guess_headers(self, obj): headers = [] for attr in dir(obj): # an object's __dict__ could technically have non-string keys try: val = getattr(obj, attr) except Exception: # seen on greenlet: `run` shows in dir() but raises # AttributeError. Also properties misbehave. continue if callable(val): continue headers.append(attr) return headers def get_entry(self, obj, headers): values = [] for h in headers: try: values.append(getattr(obj, h)) except Exception: values.append(None) return values # might be better to hardcode list support since it's so close to the # core or might be better to make this the copy-style from_* importer # and have the non-copy style be hardcoded in __init__ class ListInputType(InputType): def check_type(self, obj): return isinstance(obj, MutableSequence) def guess_headers(self, obj): return None def get_entry(self, obj, headers): return obj def get_entry_seq(self, obj_seq, headers): return obj_seq class TupleInputType(InputType): def check_type(self, obj): return isinstance(obj, tuple) def guess_headers(self, obj): return None def get_entry(self, obj, headers): return list(obj) def get_entry_seq(self, obj_seq, headers): return [list(t) for t in obj_seq] class NamedTupleInputType(InputType): def check_type(self, obj): return hasattr(obj, '_fields') and isinstance(obj, tuple) def guess_headers(self, obj): return list(obj._fields) def get_entry(self, obj, headers): return [getattr(obj, h, None) for h in headers] def get_entry_seq(self, obj_seq, headers): return [[getattr(obj, h, None) for h in headers] for obj in obj_seq] class Table: """ This Table class is meant to be simple, low-overhead, and extensible. Its most common use would be for translation between in-memory data structures and serialization formats, such as HTML and console-ready text. As such, it stores data in list-of-lists format, and *does not* copy lists passed in. It also reserves the right to modify those lists in a "filling" process, whereby short lists are extended to the width of the table (usually determined by number of headers). This greatly reduces overhead and processing/validation that would have to occur otherwise. General description of headers behavior: Headers describe the columns, but are not part of the data, however, if the *headers* argument is omitted, Table tries to infer header names from the data. It is possible to have a table with no headers, just pass in ``headers=None``. Supported inputs: * :class:`list` of :class:`list` objects * :class:`dict` (list/single) * :class:`object` (list/single) * :class:`collections.namedtuple` (list/single) * TODO: DB API cursor? * TODO: json Supported outputs: * HTML * Pretty text (also usable as GF Markdown) * TODO: CSV * TODO: json * TODO: json lines To minimize resident size, the Table data is stored as a list of lists. """ # order definitely matters here _input_types = [DictInputType(), ListInputType(), NamedTupleInputType(), TupleInputType(), ObjectInputType()] _html_tr, _html_tr_close = '', '' _html_th, _html_th_close = '', '' _html_td, _html_td_close = '', '' _html_thead, _html_thead_close = '', '' _html_tbody, _html_tbody_close = '', '' # _html_tfoot, _html_tfoot_close = '', '' _html_table_tag, _html_table_tag_close = '', '
' def __init__(self, data=None, headers=_MISSING, metadata=None): if headers is _MISSING: headers = [] if data: headers, data = list(data[0]), islice(data, 1, None) self.headers = headers or [] self.metadata = metadata or {} self._data = [] self._width = 0 self.extend(data) def extend(self, data): """ Append the given data to the end of the Table. """ if not data: return self._data.extend(data) self._set_width() self._fill() def _set_width(self, reset=False): if reset: self._width = 0 if self._width: return if self.headers: self._width = len(self.headers) return self._width = max([len(d) for d in self._data]) def _fill(self): width, filler = self._width, [None] if not width: return for d in self._data: rem = width - len(d) if rem > 0: d.extend(filler * rem) return @classmethod def from_dict(cls, data, headers=_MISSING, max_depth=1, metadata=None): """Create a Table from a :class:`dict`. Operates the same as :meth:`from_data`, but forces interpretation of the data as a Mapping. """ return cls.from_data(data=data, headers=headers, max_depth=max_depth, _data_type=DictInputType(), metadata=metadata) @classmethod def from_list(cls, data, headers=_MISSING, max_depth=1, metadata=None): """Create a Table from a :class:`list`. Operates the same as :meth:`from_data`, but forces the interpretation of the data as a Sequence. """ return cls.from_data(data=data, headers=headers, max_depth=max_depth, _data_type=ListInputType(), metadata=metadata) @classmethod def from_object(cls, data, headers=_MISSING, max_depth=1, metadata=None): """Create a Table from an :class:`object`. Operates the same as :meth:`from_data`, but forces the interpretation of the data as an object. May be useful for some :class:`dict` and :class:`list` subtypes. """ return cls.from_data(data=data, headers=headers, max_depth=max_depth, _data_type=ObjectInputType(), metadata=metadata) @classmethod def from_data(cls, data, headers=_MISSING, max_depth=1, **kwargs): """Create a Table from any supported data, heuristically selecting how to represent the data in Table format. Args: data (object): Any object or iterable with data to be imported to the Table. headers (iterable): An iterable of headers to be matched to the data. If not explicitly passed, headers will be guessed for certain datatypes. max_depth (int): The level to which nested Tables should be created (default: 1). _data_type (InputType subclass): For advanced use cases, do not guess the type of the input data, use this data type instead. """ # TODO: seen/cycle detection/reuse ? # maxdepth follows the same behavior as find command # i.e., it doesn't work if max_depth=0 is passed in metadata = kwargs.pop('metadata', None) _data_type = kwargs.pop('_data_type', None) if max_depth < 1: # return data instead? return cls(headers=headers, metadata=metadata) is_seq = isinstance(data, Sequence) if is_seq: if not data: return cls(headers=headers, metadata=metadata) to_check = data[0] if not _data_type: for it in cls._input_types: if it.check_type(to_check): _data_type = it break else: # not particularly happy about this rewind-y approach is_seq = False to_check = data else: if type(data) in _DNR: # hmm, got scalar data. # raise an exception or make an exception, nahmsayn? return cls([[data]], headers=headers, metadata=metadata) to_check = data if not _data_type: for it in cls._input_types: if it.check_type(to_check): _data_type = it break else: raise UnsupportedData('unsupported data type %r' % type(data)) if headers is _MISSING: headers = _data_type.guess_headers(to_check) if is_seq: entries = _data_type.get_entry_seq(data, headers) else: entries = [_data_type.get_entry(data, headers)] if max_depth > 1: new_max_depth = max_depth - 1 for i, entry in enumerate(entries): for j, cell in enumerate(entry): if type(cell) in _DNR: # optimization to avoid function overhead continue try: entries[i][j] = cls.from_data(cell, max_depth=new_max_depth) except UnsupportedData: continue return cls(entries, headers=headers, metadata=metadata) def __len__(self): return len(self._data) def __getitem__(self, idx): return self._data[idx] def __repr__(self): cn = self.__class__.__name__ if self.headers: return f'{cn}(headers={self.headers!r}, data={self._data!r})' else: return f'{cn}({self._data!r})' def to_html(self, orientation=None, wrapped=True, with_headers=True, with_newlines=True, with_metadata=False, max_depth=1): """Render this Table to HTML. Configure the structure of Table HTML by subclassing and overriding ``_html_*`` class attributes. Args: orientation (str): one of 'auto', 'horizontal', or 'vertical' (or the first letter of any of those). Default 'auto'. wrapped (bool): whether or not to include the wrapping '
' tags. Default ``True``, set to ``False`` if appending multiple Table outputs or an otherwise customized HTML wrapping tag is needed. with_newlines (bool): Set to ``True`` if output should include added newlines to make the HTML more readable. Default ``False``. with_metadata (bool/str): Set to ``True`` if output should be preceded with a Table of preset metadata, if it exists. Set to special value ``'bottom'`` if the metadata Table HTML should come *after* the main HTML output. max_depth (int): Indicate how deeply to nest HTML tables before simply reverting to :func:`repr`-ing the nested data. Returns: A text string of the HTML of the rendered table. """ lines = [] headers = [] if with_metadata and self.metadata: metadata_table = Table.from_data(self.metadata, max_depth=max_depth) metadata_html = metadata_table.to_html(with_headers=True, with_newlines=with_newlines, with_metadata=False, max_depth=max_depth) if with_metadata != 'bottom': lines.append(metadata_html) lines.append('
') if with_headers and self.headers: headers.extend(self.headers) headers.extend([None] * (self._width - len(self.headers))) if wrapped: lines.append(self._html_table_tag) orientation = orientation or 'auto' ol = orientation[0].lower() if ol == 'a': ol = 'h' if len(self) > 1 else 'v' if ol == 'h': self._add_horizontal_html_lines(lines, headers=headers, max_depth=max_depth) elif ol == 'v': self._add_vertical_html_lines(lines, headers=headers, max_depth=max_depth) else: raise ValueError("expected one of 'auto', 'vertical', or" " 'horizontal', not %r" % orientation) if with_metadata and self.metadata and with_metadata == 'bottom': lines.append('
') lines.append(metadata_html) if wrapped: lines.append(self._html_table_tag_close) sep = '\n' if with_newlines else '' return sep.join(lines) def get_cell_html(self, value): """Called on each value in an HTML table. By default it simply escapes the HTML. Override this method to add additional conditions and behaviors, but take care to ensure the final output is HTML escaped. """ return escape_html(value) def _add_horizontal_html_lines(self, lines, headers, max_depth): esc = self.get_cell_html new_depth = max_depth - 1 if max_depth > 1 else max_depth if max_depth > 1: new_depth = max_depth - 1 if headers: _thth = self._html_th_close + self._html_th lines.append(self._html_thead) lines.append(self._html_tr + self._html_th + _thth.join([esc(h) for h in headers]) + self._html_th_close + self._html_tr_close) lines.append(self._html_thead_close) trtd, _tdtd, _td_tr = (self._html_tr + self._html_td, self._html_td_close + self._html_td, self._html_td_close + self._html_tr_close) lines.append(self._html_tbody) for row in self._data: if max_depth > 1: _fill_parts = [] for cell in row: if isinstance(cell, Table): _fill_parts.append(cell.to_html(max_depth=new_depth)) else: _fill_parts.append(esc(cell)) else: _fill_parts = [esc(c) for c in row] lines.append(''.join([trtd, _tdtd.join(_fill_parts), _td_tr])) lines.append(self._html_tbody_close) def _add_vertical_html_lines(self, lines, headers, max_depth): esc = self.get_cell_html new_depth = max_depth - 1 if max_depth > 1 else max_depth tr, th, _th = self._html_tr, self._html_th, self._html_th_close td, _tdtd = self._html_td, self._html_td_close + self._html_td _td_tr = self._html_td_close + self._html_tr_close for i in range(self._width): line_parts = [tr] if headers: line_parts.extend([th, esc(headers[i]), _th]) if max_depth > 1: new_depth = max_depth - 1 _fill_parts = [] for row in self._data: cell = row[i] if isinstance(cell, Table): _fill_parts.append(cell.to_html(max_depth=new_depth)) else: _fill_parts.append(esc(row[i])) else: _fill_parts = [esc(row[i]) for row in self._data] line_parts.extend([td, _tdtd.join(_fill_parts), _td_tr]) lines.append(''.join(line_parts)) def to_text(self, with_headers=True, maxlen=None): """Get the Table's textual representation. Only works well for Tables with non-recursive data. Args: with_headers (bool): Whether to include a header row at the top. maxlen (int): Max length of data in each cell. """ lines = [] widths = [] headers = list(self.headers) text_data = [[to_text(cell, maxlen=maxlen) for cell in row] for row in self._data] for idx in range(self._width): cur_widths = [len(cur) for cur in text_data] if with_headers: cur_widths.append(len(to_text(headers[idx], maxlen=maxlen))) widths.append(max(cur_widths)) if with_headers: lines.append(' | '.join([h.center(widths[i]) for i, h in enumerate(headers)])) lines.append('-|-'.join(['-' * w for w in widths])) for row in text_data: lines.append(' | '.join([cell.center(widths[j]) for j, cell in enumerate(row)])) return '\n'.join(lines) boltons-25.0.0/boltons/tbutils.py000066400000000000000000000734121475005545200170140ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """One of the oft-cited tenets of Python is that it is better to ask forgiveness than permission. That is, there are many cases where it is more inclusive and correct to handle exceptions than spend extra lines and execution time checking for conditions. This philosophy makes good exception handling features all the more important. Unfortunately Python's :mod:`traceback` module is woefully behind the times. The ``tbutils`` module provides two disparate but complementary featuresets: 1. With :class:`ExceptionInfo` and :class:`TracebackInfo`, the ability to extract, construct, manipulate, format, and serialize exceptions, tracebacks, and callstacks. 2. With :class:`ParsedException`, the ability to find and parse tracebacks from captured output such as logs and stdout. There is also the :class:`ContextualTracebackInfo` variant of :class:`TracebackInfo`, which includes much more information from each frame of the callstack, including values of locals and neighboring lines of code. """ import re import sys import linecache # TODO: chaining primitives? what are real use cases where these help? # TODO: print_* for backwards compatibility # __all__ = ['extract_stack', 'extract_tb', 'format_exception', # 'format_exception_only', 'format_list', 'format_stack', # 'format_tb', 'print_exc', 'format_exc', 'print_exception', # 'print_last', 'print_stack', 'print_tb'] __all__ = ['ExceptionInfo', 'TracebackInfo', 'Callpoint', 'ContextualExceptionInfo', 'ContextualTracebackInfo', 'ContextualCallpoint', 'print_exception', 'ParsedException'] class Callpoint: """The Callpoint is a lightweight object used to represent a single entry in the code of a call stack. It stores the code-related metadata of a given frame. Available attributes are the same as the parameters below. Args: func_name (str): the function name lineno (int): the line number module_name (str): the module name module_path (str): the filesystem path of the module lasti (int): the index of bytecode execution line (str): the single-line code content (if available) """ __slots__ = ('func_name', 'lineno', 'module_name', 'module_path', 'lasti', 'line') def __init__(self, module_name, module_path, func_name, lineno, lasti, line=None): self.func_name = func_name self.lineno = lineno self.module_name = module_name self.module_path = module_path self.lasti = lasti self.line = line def to_dict(self): "Get a :class:`dict` copy of the Callpoint. Useful for serialization." ret = {} for slot in self.__slots__: try: val = getattr(self, slot) except AttributeError: pass else: ret[slot] = str(val) if isinstance(val, _DeferredLine) else val return ret @classmethod def from_current(cls, level=1): "Creates a Callpoint from the location of the calling function." frame = sys._getframe(level) return cls.from_frame(frame) @classmethod def from_frame(cls, frame): "Create a Callpoint object from data extracted from the given frame." func_name = frame.f_code.co_name lineno = frame.f_lineno module_name = frame.f_globals.get('__name__', '') module_path = frame.f_code.co_filename lasti = frame.f_lasti line = _DeferredLine(module_path, lineno, frame.f_globals) return cls(module_name, module_path, func_name, lineno, lasti, line=line) @classmethod def from_tb(cls, tb): """Create a Callpoint from the traceback of the current exception. Main difference with :meth:`from_frame` is that ``lineno`` and ``lasti`` come from the traceback, which is to say the line that failed in the try block, not the line currently being executed (in the except block). """ func_name = tb.tb_frame.f_code.co_name lineno = tb.tb_lineno lasti = tb.tb_lasti module_name = tb.tb_frame.f_globals.get('__name__', '') module_path = tb.tb_frame.f_code.co_filename line = _DeferredLine(module_path, lineno, tb.tb_frame.f_globals) return cls(module_name, module_path, func_name, lineno, lasti, line=line) def __repr__(self): cn = self.__class__.__name__ args = [getattr(self, s, None) for s in self.__slots__] if not any(args): return super().__repr__() else: return '{}({})'.format(cn, ', '.join([repr(a) for a in args])) def tb_frame_str(self): """Render the Callpoint as it would appear in a standard printed Python traceback. Returns a string with filename, line number, function name, and the actual code line of the error on up to two lines. """ ret = ' File "{}", line {}, in {}\n'.format(self.module_path, self.lineno, self.func_name) if self.line: ret += f' {str(self.line).strip()}\n' return ret class _DeferredLine: """The _DeferredLine type allows Callpoints and TracebackInfos to be constructed without potentially hitting the filesystem, as is the normal behavior of the standard Python :mod:`traceback` and :mod:`linecache` modules. Calling :func:`str` fetches and caches the line. Args: filename (str): the path of the file containing the line lineno (int): the number of the line in question module_globals (dict): an optional dict of module globals, used to handle advanced use cases using custom module loaders. """ __slots__ = ('filename', 'lineno', '_line', '_mod_name', '_mod_loader') def __init__(self, filename, lineno, module_globals=None): self.filename = filename self.lineno = lineno if module_globals is None: self._mod_name = None self._mod_loader = None else: self._mod_name = module_globals.get('__name__') self._mod_loader = module_globals.get('__loader__') def __eq__(self, other): return (self.lineno, self.filename) == (other.lineno, other.filename) def __ne__(self, other): return not self == other def __str__(self): ret = getattr(self, '_line', None) if ret is not None: return ret try: linecache.checkcache(self.filename) mod_globals = {'__name__': self._mod_name, '__loader__': self._mod_loader} line = linecache.getline(self.filename, self.lineno, mod_globals) line = line.rstrip() except KeyError: line = '' self._line = line return line def __repr__(self): return repr(str(self)) def __len__(self): return len(str(self)) # TODO: dedup frames, look at __eq__ on _DeferredLine class TracebackInfo: """The TracebackInfo class provides a basic representation of a stack trace, be it from an exception being handled or just part of normal execution. It is basically a wrapper around a list of :class:`Callpoint` objects representing frames. Args: frames (list): A list of frame objects in the stack. .. note :: ``TracebackInfo`` can represent both exception tracebacks and non-exception tracebacks (aka stack traces). As a result, there is no ``TracebackInfo.from_current()``, as that would be ambiguous. Instead, call :meth:`TracebackInfo.from_frame` without the *frame* argument for a stack trace, or :meth:`TracebackInfo.from_traceback` without the *tb* argument for an exception traceback. """ callpoint_type = Callpoint def __init__(self, frames): self.frames = frames @classmethod def from_frame(cls, frame=None, level=1, limit=None): """Create a new TracebackInfo *frame* by recurring up in the stack a max of *limit* times. If *frame* is unset, get the frame from :func:`sys._getframe` using *level*. Args: frame (types.FrameType): frame object from :func:`sys._getframe` or elsewhere. Defaults to result of :func:`sys.get_frame`. level (int): If *frame* is unset, the desired frame is this many levels up the stack from the invocation of this method. Default ``1`` (i.e., caller of this method). limit (int): max number of parent frames to extract (defaults to :data:`sys.tracebacklimit`) """ ret = [] if frame is None: frame = sys._getframe(level) if limit is None: limit = getattr(sys, 'tracebacklimit', 1000) n = 0 while frame is not None and n < limit: item = cls.callpoint_type.from_frame(frame) ret.append(item) frame = frame.f_back n += 1 ret.reverse() return cls(ret) @classmethod def from_traceback(cls, tb=None, limit=None): """Create a new TracebackInfo from the traceback *tb* by recurring up in the stack a max of *limit* times. If *tb* is unset, get the traceback from the currently handled exception. If no exception is being handled, raise a :exc:`ValueError`. Args: frame (types.TracebackType): traceback object from :func:`sys.exc_info` or elsewhere. If absent or set to ``None``, defaults to ``sys.exc_info()[2]``, and raises a :exc:`ValueError` if no exception is currently being handled. limit (int): max number of parent frames to extract (defaults to :data:`sys.tracebacklimit`) """ ret = [] if tb is None: tb = sys.exc_info()[2] if tb is None: raise ValueError('no tb set and no exception being handled') if limit is None: limit = getattr(sys, 'tracebacklimit', 1000) n = 0 while tb is not None and n < limit: item = cls.callpoint_type.from_tb(tb) ret.append(item) tb = tb.tb_next n += 1 return cls(ret) @classmethod def from_dict(cls, d): "Complements :meth:`TracebackInfo.to_dict`." # TODO: check this. return cls(d['frames']) def to_dict(self): """Returns a dict with a list of :class:`Callpoint` frames converted to dicts. """ return {'frames': [f.to_dict() for f in self.frames]} def __len__(self): return len(self.frames) def __iter__(self): return iter(self.frames) def __repr__(self): cn = self.__class__.__name__ if self.frames: frame_part = f' last={self.frames[-1]!r}' else: frame_part = '' return f'<{cn} frames={len(self.frames)}{frame_part}>' def __str__(self): return self.get_formatted() def get_formatted(self): """Returns a string as formatted in the traditional Python built-in style observable when an exception is not caught. In other words, mimics :func:`traceback.format_tb` and :func:`traceback.format_stack`. """ ret = 'Traceback (most recent call last):\n' ret += ''.join([f.tb_frame_str() for f in self.frames]) return ret class ExceptionInfo: """An ExceptionInfo object ties together three main fields suitable for representing an instance of an exception: The exception type name, a string representation of the exception itself (the exception message), and information about the traceback (stored as a :class:`TracebackInfo` object). These fields line up with :func:`sys.exc_info`, but unlike the values returned by that function, ExceptionInfo does not hold any references to the real exception or traceback. This property makes it suitable for serialization or long-term retention, without worrying about formatting pitfalls, circular references, or leaking memory. Args: exc_type (str): The exception type name. exc_msg (str): String representation of the exception value. tb_info (TracebackInfo): Information about the stack trace of the exception. Like the :class:`TracebackInfo`, ExceptionInfo is most commonly instantiated from one of its classmethods: :meth:`from_exc_info` or :meth:`from_current`. """ #: Override this in inherited types to control the TracebackInfo type used tb_info_type = TracebackInfo def __init__(self, exc_type, exc_msg, tb_info): # TODO: additional fields for SyntaxErrors self.exc_type = exc_type self.exc_msg = exc_msg self.tb_info = tb_info @classmethod def from_exc_info(cls, exc_type, exc_value, traceback): """Create an :class:`ExceptionInfo` object from the exception's type, value, and traceback, as returned by :func:`sys.exc_info`. See also :meth:`from_current`. """ type_str = exc_type.__name__ type_mod = exc_type.__module__ if type_mod not in ("__main__", "__builtin__", "exceptions", "builtins"): type_str = f'{type_mod}.{type_str}' val_str = _some_str(exc_value) tb_info = cls.tb_info_type.from_traceback(traceback) return cls(type_str, val_str, tb_info) @classmethod def from_current(cls): """Create an :class:`ExceptionInfo` object from the current exception being handled, by way of :func:`sys.exc_info`. Will raise an exception if no exception is currently being handled. """ return cls.from_exc_info(*sys.exc_info()) def to_dict(self): """Get a :class:`dict` representation of the ExceptionInfo, suitable for JSON serialization. """ return {'exc_type': self.exc_type, 'exc_msg': self.exc_msg, 'exc_tb': self.tb_info.to_dict()} def __repr__(self): cn = self.__class__.__name__ try: len_frames = len(self.tb_info.frames) last_frame = f', last={self.tb_info.frames[-1]!r}' except Exception: len_frames = 0 last_frame = '' args = (cn, self.exc_type, self.exc_msg, len_frames, last_frame) return '<%s [%s: %s] (%s frames%s)>' % args def get_formatted(self): """Returns a string formatted in the traditional Python built-in style observable when an exception is not caught. In other words, mimics :func:`traceback.format_exception`. """ # TODO: add SyntaxError formatting tb_str = self.tb_info.get_formatted() return ''.join([tb_str, f'{self.exc_type}: {self.exc_msg}']) def get_formatted_exception_only(self): return f'{self.exc_type}: {self.exc_msg}' class ContextualCallpoint(Callpoint): """The ContextualCallpoint is a :class:`Callpoint` subtype with the exact same API and storing two additional values: 1. :func:`repr` outputs for local variables from the Callpoint's scope 2. A number of lines before and after the Callpoint's line of code The ContextualCallpoint is used by the :class:`ContextualTracebackInfo`. """ def __init__(self, *a, **kw): self.local_reprs = kw.pop('local_reprs', {}) self.pre_lines = kw.pop('pre_lines', []) self.post_lines = kw.pop('post_lines', []) super().__init__(*a, **kw) @classmethod def from_frame(cls, frame): "Identical to :meth:`Callpoint.from_frame`" ret = super().from_frame(frame) ret._populate_local_reprs(frame.f_locals) ret._populate_context_lines() return ret @classmethod def from_tb(cls, tb): "Identical to :meth:`Callpoint.from_tb`" ret = super().from_tb(tb) ret._populate_local_reprs(tb.tb_frame.f_locals) ret._populate_context_lines() return ret def _populate_context_lines(self, pivot=8): DL, lineno = _DeferredLine, self.lineno try: module_globals = self.line.module_globals except AttributeError: module_globals = None start_line = max(0, lineno - pivot) pre_lines = [DL(self.module_path, ln, module_globals) for ln in range(start_line, lineno)] self.pre_lines[:] = pre_lines post_lines = [DL(self.module_path, ln, module_globals) for ln in range(lineno + 1, lineno + 1 + pivot)] self.post_lines[:] = post_lines return def _populate_local_reprs(self, f_locals): local_reprs = self.local_reprs for k, v in f_locals.items(): try: local_reprs[k] = repr(v) except Exception: surrogate = '' % type(v).__name__ local_reprs[k] = surrogate return def to_dict(self): """ Same principle as :meth:`Callpoint.to_dict`, but with the added contextual values. With ``ContextualCallpoint.to_dict()``, each frame will now be represented like:: {'func_name': 'print_example', 'lineno': 0, 'module_name': 'example_module', 'module_path': '/home/example/example_module.pyc', 'lasti': 0, 'line': 'print "example"', 'locals': {'variable': '"value"'}, 'pre_lines': ['variable = "value"'], 'post_lines': []} The locals dictionary and line lists are copies and can be mutated freely. """ ret = super().to_dict() ret['locals'] = dict(self.local_reprs) # get the line numbers and textual lines # without assuming DeferredLines start_line = self.lineno - len(self.pre_lines) pre_lines = [{'lineno': start_line + i, 'line': str(l)} for i, l in enumerate(self.pre_lines)] # trim off leading empty lines for i, item in enumerate(pre_lines): if item['line']: break if i: pre_lines = pre_lines[i:] ret['pre_lines'] = pre_lines # now post_lines post_lines = [{'lineno': self.lineno + i, 'line': str(l)} for i, l in enumerate(self.post_lines)] _last = 0 for i, item in enumerate(post_lines): if item['line']: _last = i post_lines = post_lines[:_last + 1] ret['post_lines'] = post_lines return ret class ContextualTracebackInfo(TracebackInfo): """The ContextualTracebackInfo type is a :class:`TracebackInfo` subtype that is used by :class:`ContextualExceptionInfo` and uses the :class:`ContextualCallpoint` as its frame-representing primitive. """ callpoint_type = ContextualCallpoint class ContextualExceptionInfo(ExceptionInfo): """The ContextualTracebackInfo type is a :class:`TracebackInfo` subtype that uses the :class:`ContextualCallpoint` as its frame-representing primitive. It carries with it most of the exception information required to recreate the widely recognizable "500" page for debugging Django applications. """ tb_info_type = ContextualTracebackInfo # TODO: clean up & reimplement -- specifically for syntax errors def format_exception_only(etype, value): """Format the exception part of a traceback. The arguments are the exception type and value such as given by sys.last_type and sys.last_value. The return value is a list of strings, each ending in a newline. Normally, the list contains a single string; however, for SyntaxError exceptions, it contains several lines that (when printed) display detailed information about where the syntax error occurred. The message indicating which exception occurred is always the last string in the list. """ # Gracefully handle (the way Python 2.4 and earlier did) the case of # being called with (None, None). if etype is None: return [_format_final_exc_line(etype, value)] stype = etype.__name__ smod = etype.__module__ if smod not in ("__main__", "builtins", "exceptions"): stype = smod + '.' + stype if not issubclass(etype, SyntaxError): return [_format_final_exc_line(stype, value)] # It was a syntax error; show exactly where the problem was found. lines = [] filename = value.filename or "" lineno = str(value.lineno) or '?' lines.append(f' File "{filename}", line {lineno}\n') badline = value.text offset = value.offset if badline is not None: lines.append(' %s\n' % badline.strip()) if offset is not None: caretspace = badline.rstrip('\n')[:offset].lstrip() # non-space whitespace (likes tabs) must be kept for alignment caretspace = ((c.isspace() and c or ' ') for c in caretspace) # only three spaces to account for offset1 == pos 0 lines.append(' %s^\n' % ''.join(caretspace)) msg = value.msg or "" lines.append(f"{stype}: {msg}\n") return lines # TODO: use asciify, improved if necessary def _some_str(value): try: return str(value) except Exception: pass return '' % type(value).__name__ def _format_final_exc_line(etype, value): valuestr = _some_str(value) if value is None or not valuestr: line = "%s\n" % etype else: line = f"{etype}: {valuestr}\n" return line def print_exception(etype, value, tb, limit=None, file=None): """Print exception up to 'limit' stack trace entries from 'tb' to 'file'. This differs from print_tb() in the following ways: (1) if traceback is not None, it prints a header "Traceback (most recent call last):"; (2) it prints the exception type and value after the stack trace; (3) if type is SyntaxError and value has the appropriate format, it prints the line where the syntax error occurred with a caret on the next line indicating the approximate position of the error. """ if file is None: file = sys.stderr if tb: tbi = TracebackInfo.from_traceback(tb, limit) print(str(tbi), end='', file=file) for line in format_exception_only(etype, value): print(line, end='', file=file) def fix_print_exception(): """ Sets the default exception hook :func:`sys.excepthook` to the :func:`tbutils.print_exception` that uses all the ``tbutils`` facilities to provide a consistent output behavior. """ sys.excepthook = print_exception _frame_re = re.compile(r'^File "(?P.+)", line (?P\d+)' r', in (?P.+)$') _se_frame_re = re.compile(r'^File "(?P.+)", line (?P\d+)') _underline_re = re.compile(r'^[~^ ]*$') # TODO: ParsedException generator over large bodies of text class ParsedException: """Stores a parsed traceback and exception as would be typically output by :func:`sys.excepthook` or :func:`traceback.print_exception`. .. note: Does not currently store SyntaxError details such as column. """ def __init__(self, exc_type_name, exc_msg, frames=None): self.exc_type = exc_type_name self.exc_msg = exc_msg self.frames = list(frames or []) @property def source_file(self): """ The file path of module containing the function that raised the exception, or None if not available. """ try: return self.frames[-1]['filepath'] except IndexError: return None def to_dict(self): "Get a copy as a JSON-serializable :class:`dict`." return {'exc_type': self.exc_type, 'exc_msg': self.exc_msg, 'frames': list(self.frames)} def __repr__(self): cn = self.__class__.__name__ return ('%s(%r, %r, frames=%r)' % (cn, self.exc_type, self.exc_msg, self.frames)) def to_string(self): """Formats the exception and its traceback into the standard format, as returned by the traceback module. ``ParsedException.from_string(text).to_string()`` should yield ``text``. .. note:: Note that this method does not output "anchors" (e.g., ``~~~~~^^``), as were added in Python 3.13. See the built-in ``traceback`` module if these are necessary. """ lines = ['Traceback (most recent call last):'] for frame in self.frames: lines.append(' File "{}", line {}, in {}'.format(frame['filepath'], frame['lineno'], frame['funcname'])) source_line = frame.get('source_line') if source_line: lines.append(f' {source_line}') if self.exc_msg: lines.append(f'{self.exc_type}: {self.exc_msg}') else: lines.append(f'{self.exc_type}') return '\n'.join(lines) @classmethod def from_string(cls, tb_str): """Parse a traceback and exception from the text *tb_str*. This text is expected to have been decoded, otherwise it will be interpreted as UTF-8. This method does not search a larger body of text for tracebacks. If the first line of the text passed does not match one of the known patterns, a :exc:`ValueError` will be raised. This method will ignore trailing text after the end of the first traceback. Args: tb_str (str): The traceback text (:class:`unicode` or UTF-8 bytes) """ if not isinstance(tb_str, str): tb_str = tb_str.decode('utf-8') tb_lines = tb_str.lstrip().splitlines() # First off, handle some ignored exceptions. These can be the # result of exceptions raised by __del__ during garbage # collection while tb_lines: cl = tb_lines[-1] if cl.startswith('Exception ') and cl.endswith('ignored'): tb_lines.pop() else: break if tb_lines and tb_lines[0].strip() == 'Traceback (most recent call last):': start_line = 1 frame_re = _frame_re elif len(tb_lines) > 1 and tb_lines[-2].lstrip().startswith('^'): # This is to handle the slight formatting difference # associated with SyntaxErrors, which also don't really # have tracebacks start_line = 0 frame_re = _se_frame_re else: raise ValueError('unrecognized traceback string format') frames = [] line_no = start_line while True: frame_line = tb_lines[line_no].strip() frame_match = frame_re.match(frame_line) if frame_match: frame_dict = frame_match.groupdict() try: next_line = tb_lines[line_no + 1] except IndexError: # We read what we could next_line = '' next_line_stripped = next_line.strip() if ( frame_re.match(next_line_stripped) or # The exception message will not be indented # This check is to avoid overrunning on eval-like # tracebacks where the last frame doesn't have source # code in the traceback not next_line.startswith(' ') ): frame_dict['source_line'] = '' else: frame_dict['source_line'] = next_line_stripped line_no += 1 if _underline_re.match(tb_lines[line_no + 1]): # To deal with anchors line_no += 1 else: break line_no += 1 frames.append(frame_dict) try: exc_line = '\n'.join(tb_lines[line_no:]) exc_type, _, exc_msg = exc_line.partition(': ') except Exception: exc_type, exc_msg = '', '' return cls(exc_type, exc_msg, frames) ParsedTB = ParsedException # legacy alias boltons-25.0.0/boltons/timeutils.py000066400000000000000000000475241475005545200173520ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Python's :mod:`datetime` module provides some of the most complex and powerful primitives in the Python standard library. Time is nontrivial, but thankfully its support is first-class in Python. ``dateutils`` provides some additional tools for working with time. Additionally, timeutils provides a few basic utilities for working with timezones in Python. The Python :mod:`datetime` module's documentation describes how to create a :class:`~datetime.datetime`-compatible :class:`~datetime.tzinfo` subtype. It even provides a few examples. The following module defines usable forms of the timezones in those docs, as well as a couple other useful ones, :data:`UTC` (aka GMT) and :data:`LocalTZ` (representing the local timezone as configured in the operating system). For timezones beyond these, as well as a higher degree of accuracy in corner cases, check out `pytz`_ and `dateutil`_. .. _pytz: https://pypi.python.org/pypi/pytz .. _dateutil: https://dateutil.readthedocs.io/en/stable/index.html """ import re import time import bisect import operator from datetime import tzinfo, timedelta, date, datetime, timezone # For legacy compatibility. # boltons used to offer an implementation of total_seconds for Python <2.7 total_seconds = timedelta.total_seconds def dt_to_timestamp(dt): """Converts from a :class:`~datetime.datetime` object to an integer timestamp, suitable interoperation with :func:`time.time` and other `Epoch-based timestamps`. .. _Epoch-based timestamps: https://en.wikipedia.org/wiki/Unix_time >>> timestamp = int(time.time()) >>> utc_dt = datetime.fromtimestamp(timestamp, timezone.utc) >>> timestamp - dt_to_timestamp(utc_dt) 0.0 ``dt_to_timestamp`` supports both timezone-aware and naïve :class:`~datetime.datetime` objects. Note that it assumes naïve datetime objects are implied UTC, such as those generated with :meth:`datetime.datetime.utcnow`. If your datetime objects are local time, such as those generated with :meth:`datetime.datetime.now`, first convert it using the :meth:`datetime.datetime.replace` method with ``tzinfo=`` :class:`LocalTZ` object in this module, then pass the result of that to ``dt_to_timestamp``. """ if dt.tzinfo: td = dt - EPOCH_AWARE else: td = dt.replace(tzinfo=timezone.utc) - EPOCH_AWARE return timedelta.total_seconds(td) _NONDIGIT_RE = re.compile(r'\D') def isoparse(iso_str): """Parses the limited subset of `ISO8601-formatted time`_ strings as returned by :meth:`datetime.datetime.isoformat`. >>> epoch_dt = datetime.fromtimestamp(0, timezone.utc).replace(tzinfo=None) >>> iso_str = epoch_dt.isoformat() >>> print(iso_str) 1970-01-01T00:00:00 >>> isoparse(iso_str) datetime.datetime(1970, 1, 1, 0, 0) >>> utcnow = datetime.now(timezone.utc).replace(tzinfo=None) >>> utcnow == isoparse(utcnow.isoformat()) True For further datetime parsing, see the `iso8601`_ package for strict ISO parsing and `dateutil`_ package for loose parsing and more. .. _ISO8601-formatted time: https://en.wikipedia.org/wiki/ISO_8601 .. _iso8601: https://pypi.python.org/pypi/iso8601 .. _dateutil: https://pypi.python.org/pypi/python-dateutil """ dt_args = [int(p) for p in _NONDIGIT_RE.split(iso_str)] return datetime(*dt_args) _BOUNDS = [(0, timedelta(seconds=1), 'second'), (1, timedelta(seconds=60), 'minute'), (1, timedelta(seconds=3600), 'hour'), (1, timedelta(days=1), 'day'), (1, timedelta(days=7), 'week'), (2, timedelta(days=30), 'month'), (1, timedelta(days=365), 'year')] _BOUNDS = [(b[0] * b[1], b[1], b[2]) for b in _BOUNDS] _BOUND_DELTAS = [b[0] for b in _BOUNDS] _FLOAT_PATTERN = r'[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?' _PARSE_TD_RE = re.compile(r"((?P%s)\s*(?P\w)\w*)" % _FLOAT_PATTERN) _PARSE_TD_KW_MAP = {unit[0]: unit + 's' for _, _, unit in reversed(_BOUNDS[:-2])} def parse_timedelta(text): """Robustly parses a short text description of a time period into a :class:`datetime.timedelta`. Supports weeks, days, hours, minutes, and seconds, with or without decimal points: Args: text (str): Text to parse. Returns: datetime.timedelta Raises: ValueError: on parse failure. >>> parse_td('1d 2h 3.5m 0s') == timedelta(days=1, seconds=7410) True Also supports full words and whitespace. >>> parse_td('2 weeks 1 day') == timedelta(days=15) True Negative times are supported, too: >>> parse_td('-1.5 weeks 3m 20s') == timedelta(days=-11, seconds=43400) True """ td_kwargs = {} for match in _PARSE_TD_RE.finditer(text): value, unit = match.group('value'), match.group('unit') try: unit_key = _PARSE_TD_KW_MAP[unit] except KeyError: raise ValueError('invalid time unit %r, expected one of %r' % (unit, _PARSE_TD_KW_MAP.keys())) try: value = float(value) except ValueError: raise ValueError('invalid time value for unit %r: %r' % (unit, value)) td_kwargs[unit_key] = value return timedelta(**td_kwargs) parse_td = parse_timedelta # legacy alias def _cardinalize_time_unit(unit, value): # removes dependency on strutils; nice and simple because # all time units cardinalize normally if value == 1: return unit return unit + 's' def decimal_relative_time(d, other=None, ndigits=0, cardinalize=True): """Get a tuple representing the relative time difference between two :class:`~datetime.datetime` objects or one :class:`~datetime.datetime` and now. Args: d (datetime): The first datetime object. other (datetime): An optional second datetime object. If unset, defaults to the current time as determined :meth:`datetime.utcnow`. ndigits (int): The number of decimal digits to round to, defaults to ``0``. cardinalize (bool): Whether to pluralize the time unit if appropriate, defaults to ``True``. Returns: (float, str): A tuple of the :class:`float` difference and respective unit of time, pluralized if appropriate and *cardinalize* is set to ``True``. Unlike :func:`relative_time`, this method's return is amenable to localization into other languages and custom phrasing and formatting. >>> now = datetime.now(timezone.utc).replace(tzinfo=None) >>> decimal_relative_time(now - timedelta(days=1, seconds=3600), now) (1.0, 'day') >>> decimal_relative_time(now - timedelta(seconds=0.002), now, ndigits=5) (0.002, 'seconds') >>> decimal_relative_time(now, now - timedelta(days=900), ndigits=1) (-2.5, 'years') """ if other is None: other = datetime.now(timezone.utc).replace(tzinfo=None) diff = other - d diff_seconds = timedelta.total_seconds(diff) abs_diff = abs(diff) b_idx = bisect.bisect(_BOUND_DELTAS, abs_diff) - 1 bbound, bunit, bname = _BOUNDS[b_idx] f_diff = diff_seconds / timedelta.total_seconds(bunit) rounded_diff = round(f_diff, ndigits) if cardinalize: return rounded_diff, _cardinalize_time_unit(bname, abs(rounded_diff)) return rounded_diff, bname def relative_time(d, other=None, ndigits=0): """Get a string representation of the difference between two :class:`~datetime.datetime` objects or one :class:`~datetime.datetime` and the current time. Handles past and future times. Args: d (datetime): The first datetime object. other (datetime): An optional second datetime object. If unset, defaults to the current time as determined :meth:`datetime.utcnow`. ndigits (int): The number of decimal digits to round to, defaults to ``0``. Returns: A short English-language string. >>> now = datetime.now(timezone.utc).replace(tzinfo=None) >>> relative_time(now, ndigits=1) '0 seconds ago' >>> relative_time(now - timedelta(days=1, seconds=36000), ndigits=1) '1.4 days ago' >>> relative_time(now + timedelta(days=7), now, ndigits=1) '1 week from now' """ drt, unit = decimal_relative_time(d, other, ndigits, cardinalize=True) phrase = 'ago' if drt < 0: phrase = 'from now' return f'{abs(drt):g} {unit} {phrase}' def strpdate(string, format): """Parse the date string according to the format in `format`. Returns a :class:`date` object. Internally, :meth:`datetime.strptime` is used to parse the string and thus conversion specifiers for time fields (e.g. `%H`) may be provided; these will be parsed but ignored. Args: string (str): The date string to be parsed. format (str): The `strptime`_-style date format string. Returns: datetime.date .. _`strptime`: https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior >>> strpdate('2016-02-14', '%Y-%m-%d') datetime.date(2016, 2, 14) >>> strpdate('26/12 (2015)', '%d/%m (%Y)') datetime.date(2015, 12, 26) >>> strpdate('20151231 23:59:59', '%Y%m%d %H:%M:%S') datetime.date(2015, 12, 31) >>> strpdate('20160101 00:00:00.001', '%Y%m%d %H:%M:%S.%f') datetime.date(2016, 1, 1) """ whence = datetime.strptime(string, format) return whence.date() def daterange(start, stop, step=1, inclusive=False): """In the spirit of :func:`range` and :func:`xrange`, the `daterange` generator that yields a sequence of :class:`~datetime.date` objects, starting at *start*, incrementing by *step*, until *stop* is reached. When *inclusive* is True, the final date may be *stop*, **if** *step* falls evenly on it. By default, *step* is one day. See details below for many more details. Args: start (datetime.date): The starting date The first value in the sequence. stop (datetime.date): The stopping date. By default not included in return. Can be `None` to yield an infinite sequence. step (int): The value to increment *start* by to reach *stop*. Can be an :class:`int` number of days, a :class:`datetime.timedelta`, or a :class:`tuple` of integers, `(year, month, day)`. Positive and negative *step* values are supported. inclusive (bool): Whether or not the *stop* date can be returned. *stop* is only returned when a *step* falls evenly on it. >>> christmas = date(year=2015, month=12, day=25) >>> boxing_day = date(year=2015, month=12, day=26) >>> new_year = date(year=2016, month=1, day=1) >>> for day in daterange(christmas, new_year): ... print(repr(day)) datetime.date(2015, 12, 25) datetime.date(2015, 12, 26) datetime.date(2015, 12, 27) datetime.date(2015, 12, 28) datetime.date(2015, 12, 29) datetime.date(2015, 12, 30) datetime.date(2015, 12, 31) >>> for day in daterange(christmas, boxing_day): ... print(repr(day)) datetime.date(2015, 12, 25) >>> for day in daterange(date(2017, 5, 1), date(2017, 8, 1), ... step=(0, 1, 0), inclusive=True): ... print(repr(day)) datetime.date(2017, 5, 1) datetime.date(2017, 6, 1) datetime.date(2017, 7, 1) datetime.date(2017, 8, 1) *Be careful when using stop=None, as this will yield an infinite sequence of dates.* """ if not isinstance(start, date): raise TypeError("start expected datetime.date instance") if stop and not isinstance(stop, date): raise TypeError("stop expected datetime.date instance or None") try: y_step, m_step, d_step = step except TypeError: y_step, m_step, d_step = 0, 0, step else: y_step, m_step = int(y_step), int(m_step) if isinstance(d_step, int): d_step = timedelta(days=int(d_step)) elif isinstance(d_step, timedelta): pass else: raise ValueError('step expected int, timedelta, or tuple' ' (year, month, day), not: %r' % step) m_step += y_step * 12 if stop is None: finished = lambda now, stop: False elif start <= stop: finished = operator.gt if inclusive else operator.ge else: finished = operator.lt if inclusive else operator.le now = start while not finished(now, stop): yield now if m_step: m_y_step, cur_month = divmod((now.month - 1) + m_step, 12) now = now.replace(year=now.year + m_y_step, month=(cur_month + 1)) now = now + d_step return # Timezone support (brought in from tzutils) ZERO = timedelta(0) HOUR = timedelta(hours=1) class ConstantTZInfo(tzinfo): """ A :class:`~datetime.tzinfo` subtype whose *offset* remains constant (no daylight savings). Args: name (str): Name of the timezone. offset (datetime.timedelta): Offset of the timezone. """ def __init__(self, name="ConstantTZ", offset=ZERO): self.name = name self.offset = offset @property def utcoffset_hours(self): return timedelta.total_seconds(self.offset) / (60 * 60) def utcoffset(self, dt): return self.offset def tzname(self, dt): return self.name def dst(self, dt): return ZERO def __repr__(self): cn = self.__class__.__name__ return f'{cn}(name={self.name!r}, offset={self.offset!r})' UTC = ConstantTZInfo('UTC') EPOCH_AWARE = datetime.fromtimestamp(0, UTC) class LocalTZInfo(tzinfo): """The ``LocalTZInfo`` type takes data available in the time module about the local timezone and makes a practical :class:`datetime.tzinfo` to represent the timezone settings of the operating system. For a more in-depth integration with the operating system, check out `tzlocal`_. It builds on `pytz`_ and implements heuristics for many versions of major operating systems to provide the official ``pytz`` tzinfo, instead of the LocalTZ generalization. .. _tzlocal: https://pypi.python.org/pypi/tzlocal .. _pytz: https://pypi.python.org/pypi/pytz """ _std_offset = timedelta(seconds=-time.timezone) _dst_offset = _std_offset if time.daylight: _dst_offset = timedelta(seconds=-time.altzone) def is_dst(self, dt): dt_t = (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.weekday(), 0, -1) local_t = time.localtime(time.mktime(dt_t)) return local_t.tm_isdst > 0 def utcoffset(self, dt): if self.is_dst(dt): return self._dst_offset return self._std_offset def dst(self, dt): if self.is_dst(dt): return self._dst_offset - self._std_offset return ZERO def tzname(self, dt): return time.tzname[self.is_dst(dt)] def __repr__(self): return '%s()' % self.__class__.__name__ LocalTZ = LocalTZInfo() def _first_sunday_on_or_after(dt): days_to_go = 6 - dt.weekday() if days_to_go: dt += timedelta(days_to_go) return dt # US DST Rules # # This is a simplified (i.e., wrong for a few cases) set of rules for US # DST start and end times. For a complete and up-to-date set of DST rules # and timezone definitions, visit the Olson Database (or try pytz): # http://www.twinsun.com/tz/tz-link.htm # http://sourceforge.net/projects/pytz/ (might not be up-to-date) # # In the US, since 2007, DST starts at 2am (standard time) on the second # Sunday in March, which is the first Sunday on or after Mar 8. DSTSTART_2007 = datetime(1, 3, 8, 2) # and ends at 2am (DST time; 1am standard time) on the first Sunday of Nov. DSTEND_2007 = datetime(1, 11, 1, 1) # From 1987 to 2006, DST used to start at 2am (standard time) on the first # Sunday in April and to end at 2am (DST time; 1am standard time) on the last # Sunday of October, which is the first Sunday on or after Oct 25. DSTSTART_1987_2006 = datetime(1, 4, 1, 2) DSTEND_1987_2006 = datetime(1, 10, 25, 1) # From 1967 to 1986, DST used to start at 2am (standard time) on the last # Sunday in April (the one on or after April 24) and to end at 2am (DST time; # 1am standard time) on the last Sunday of October, which is the first Sunday # on or after Oct 25. DSTSTART_1967_1986 = datetime(1, 4, 24, 2) DSTEND_1967_1986 = DSTEND_1987_2006 class USTimeZone(tzinfo): """Copied directly from the Python docs, the ``USTimeZone`` is a :class:`datetime.tzinfo` subtype used to create the :data:`Eastern`, :data:`Central`, :data:`Mountain`, and :data:`Pacific` tzinfo types. """ def __init__(self, hours, reprname, stdname, dstname): self.stdoffset = timedelta(hours=hours) self.reprname = reprname self.stdname = stdname self.dstname = dstname def __repr__(self): return self.reprname def tzname(self, dt): if self.dst(dt): return self.dstname else: return self.stdname def utcoffset(self, dt): return self.stdoffset + self.dst(dt) def dst(self, dt): if dt is None or dt.tzinfo is None: # An exception may be sensible here, in one or both cases. # It depends on how you want to treat them. The default # fromutc() implementation (called by the default astimezone() # implementation) passes a datetime with dt.tzinfo is self. return ZERO assert dt.tzinfo is self # Find start and end times for US DST. For years before 1967, return # ZERO for no DST. if 2006 < dt.year: dststart, dstend = DSTSTART_2007, DSTEND_2007 elif 1986 < dt.year < 2007: dststart, dstend = DSTSTART_1987_2006, DSTEND_1987_2006 elif 1966 < dt.year < 1987: dststart, dstend = DSTSTART_1967_1986, DSTEND_1967_1986 else: return ZERO start = _first_sunday_on_or_after(dststart.replace(year=dt.year)) end = _first_sunday_on_or_after(dstend.replace(year=dt.year)) # Can't compare naive to aware objects, so strip the timezone # from dt first. if start <= dt.replace(tzinfo=None) < end: return HOUR else: return ZERO Eastern = USTimeZone(-5, "Eastern", "EST", "EDT") Central = USTimeZone(-6, "Central", "CST", "CDT") Mountain = USTimeZone(-7, "Mountain", "MST", "MDT") Pacific = USTimeZone(-8, "Pacific", "PST", "PDT") boltons-25.0.0/boltons/typeutils.py000066400000000000000000000132661475005545200173710ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Python's built-in :mod:`functools` module builds several useful utilities on top of Python's first-class function support. ``typeutils`` attempts to do the same for metaprogramming with types and instances. """ import sys from collections import deque _issubclass = issubclass def make_sentinel(name='_MISSING', var_name=None): """Creates and returns a new **instance** of a new class, suitable for usage as a "sentinel", a kind of singleton often used to indicate a value is missing when ``None`` is a valid input. Args: name (str): Name of the Sentinel var_name (str): Set this name to the name of the variable in its respective module enable pickleability. Note: pickleable sentinels should be global constants at the top level of their module. >>> make_sentinel(var_name='_MISSING') _MISSING The most common use cases here in boltons are as default values for optional function arguments, partly because of its less-confusing appearance in automatically generated documentation. Sentinels also function well as placeholders in queues and linked lists. .. note:: By design, additional calls to ``make_sentinel`` with the same values will not produce equivalent objects. >>> make_sentinel('TEST') == make_sentinel('TEST') False >>> type(make_sentinel('TEST')) == type(make_sentinel('TEST')) False """ class Sentinel: def __init__(self): self.name = name self.var_name = var_name def __repr__(self): if self.var_name: return self.var_name return f'{self.__class__.__name__}({self.name!r})' if var_name: def __reduce__(self): return self.var_name def __bool__(self): return False def __copy__(self): return self def __deepcopy__(self, _memo): return self if var_name: frame = sys._getframe(1) module = frame.f_globals.get('__name__') if not module or module not in sys.modules: raise ValueError('Pickleable sentinel objects (with var_name) can only' ' be created from top-level module scopes') Sentinel.__module__ = module return Sentinel() def issubclass(subclass, baseclass): """Just like the built-in :func:`issubclass`, this function checks whether *subclass* is inherited from *baseclass*. Unlike the built-in function, this ``issubclass`` will simply return ``False`` if either argument is not suitable (e.g., if *subclass* is not an instance of :class:`type`), instead of raising :exc:`TypeError`. Args: subclass (type): The target class to check. baseclass (type): The base class *subclass* will be checked against. >>> class MyObject(object): pass ... >>> issubclass(MyObject, object) # always a fun fact True >>> issubclass('hi', 'friend') False """ try: return _issubclass(subclass, baseclass) except TypeError: return False def get_all_subclasses(cls): """Recursively finds and returns a :class:`list` of all types inherited from *cls*. >>> class A(object): ... pass ... >>> class B(A): ... pass ... >>> class C(B): ... pass ... >>> class D(A): ... pass ... >>> [t.__name__ for t in get_all_subclasses(A)] ['B', 'D', 'C'] >>> [t.__name__ for t in get_all_subclasses(B)] ['C'] """ try: to_check = deque(cls.__subclasses__()) except (AttributeError, TypeError): raise TypeError('expected type object, not %r' % cls) seen, ret = set(), [] while to_check: cur = to_check.popleft() if cur in seen: continue ret.append(cur) seen.add(cur) to_check.extend(cur.__subclasses__()) return ret class classproperty: """Much like a :class:`property`, but the wrapped get function is a class method. For simplicity, only read-only properties are implemented. """ def __init__(self, fn): self.fn = fn def __get__(self, instance, cls): return self.fn(cls) boltons-25.0.0/boltons/urlutils.py000066400000000000000000001602211475005545200172040ustar00rootroot00000000000000# Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """:mod:`urlutils` is a module dedicated to one of software's most versatile, well-aged, and beloved data structures: the URL, also known as the `Uniform Resource Locator`_. Among other things, this module is a full reimplementation of URLs, without any reliance on the :mod:`urlparse` or :mod:`urllib` standard library modules. The centerpiece and top-level interface of urlutils is the :class:`URL` type. Also featured is the :func:`find_all_links` convenience function. Some low-level functions and constants are also below. The implementations in this module are based heavily on `RFC 3986`_ and `RFC 3987`_, and incorporates details from several other RFCs and `W3C documents`_. .. _Uniform Resource Locator: https://en.wikipedia.org/wiki/Uniform_Resource_Locator .. _RFC 3986: https://tools.ietf.org/html/rfc3986 .. _RFC 3987: https://tools.ietf.org/html/rfc3987 .. _W3C documents: https://www.w3.org/TR/uri-clarification/ """ import re import socket import string from unicodedata import normalize # The unreserved URI characters (per RFC 3986 Section 2.3) _UNRESERVED_CHARS = frozenset('~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz') # URL parsing regex (based on RFC 3986 Appendix B, with modifications) _URL_RE = re.compile(r'^((?P[^:/?#]+):)?' r'((?P<_netloc_sep>//)(?P[^/?#]*))?' r'(?P[^?#]*)' r'(\?(?P[^#]*))?' r'(#(?P.*))?') _HEX_CHAR_MAP = {(a + b).encode('ascii'): chr(int(a + b, 16)).encode('charmap') for a in string.hexdigits for b in string.hexdigits} _ASCII_RE = re.compile('([\x00-\x7f]+)') # This port list painstakingly curated by hand searching through # https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml # and # https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml SCHEME_PORT_MAP = {'acap': 674, 'afp': 548, 'dict': 2628, 'dns': 53, 'file': None, 'ftp': 21, 'git': 9418, 'gopher': 70, 'http': 80, 'https': 443, 'imap': 143, 'ipp': 631, 'ipps': 631, 'irc': 194, 'ircs': 6697, 'ldap': 389, 'ldaps': 636, 'mms': 1755, 'msrp': 2855, 'msrps': None, 'mtqp': 1038, 'nfs': 111, 'nntp': 119, 'nntps': 563, 'pop': 110, 'prospero': 1525, 'redis': 6379, 'rsync': 873, 'rtsp': 554, 'rtsps': 322, 'rtspu': 5005, 'sftp': 22, 'smb': 445, 'snmp': 161, 'ssh': 22, 'steam': None, 'svn': 3690, 'telnet': 23, 'ventrilo': 3784, 'vnc': 5900, 'wais': 210, 'ws': 80, 'wss': 443, 'xmpp': None} # This list of schemes that don't use authorities is also from the link above. NO_NETLOC_SCHEMES = {'urn', 'about', 'bitcoin', 'blob', 'data', 'geo', 'magnet', 'mailto', 'news', 'pkcs11', 'sip', 'sips', 'tel'} # As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc # RFC 3986 section 2.2, Reserved Characters _GEN_DELIMS = frozenset(':/?#[]@') _SUB_DELIMS = frozenset("!$&'()*+,;=") _ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS _USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS _USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE _PATH_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(':@') _PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE _FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set('/?') _FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE _QUERY_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set('&=+') _QUERY_DELIMS = _ALL_DELIMS - _QUERY_SAFE class URLParseError(ValueError): """Exception inheriting from :exc:`ValueError`, raised when failing to parse a URL. Mostly raised on invalid ports and IPv6 addresses. """ pass DEFAULT_ENCODING = 'utf8' def to_unicode(obj): try: return str(obj) except UnicodeDecodeError: return str(obj, encoding=DEFAULT_ENCODING) # regex from gruber via tornado # doesn't support ipv6 # doesn't support mailto (netloc-less schemes) _FIND_ALL_URL_RE = re.compile(r"""\b((?:([\w-]+):(/{1,3})|www[.])(?:(?:(?:[^\s&()<>]|&|")*(?:[^!"#$%'()*+,.:;<=>?@\[\]^`{|}~\s]))|(?:\((?:[^\s&()]|&|")*\)))+)""") def find_all_links(text, with_text=False, default_scheme='https', schemes=()): """This function uses heuristics to searches plain text for strings that look like URLs, returning a :class:`list` of :class:`URL` objects. It supports limiting the accepted schemes, and returning interleaved text as well. >>> find_all_links('Visit https://boltons.rtfd.org!') [URL(u'https://boltons.rtfd.org')] >>> find_all_links('Visit https://boltons.rtfd.org!', with_text=True) [u'Visit ', URL(u'https://boltons.rtfd.org'), u'!'] Args: text (str): The text to search. with_text (bool): Whether or not to interleave plaintext blocks with the returned URL objects. Having all tokens can be useful for transforming the text, e.g., replacing links with HTML equivalents. Defaults to ``False``. default_scheme (str): Many URLs are written without the scheme component. This function can match a reasonable subset of those, provided *default_scheme* is set to a string. Set to ``False`` to disable matching scheme-less URLs. Defaults to ``'https'``. schemes (list): A list of strings that a URL's scheme must match in order to be included in the results. Defaults to empty, which matches all schemes. .. note:: Currently this function does not support finding IPv6 addresses or URLs with netloc-less schemes, like mailto. """ text = to_unicode(text) prev_end, start, end = 0, None, None ret = [] _add = ret.append def _add_text(t): if ret and isinstance(ret[-1], str): ret[-1] += t else: _add(t) for match in _FIND_ALL_URL_RE.finditer(text): start, end = match.start(1), match.end(1) if prev_end < start and with_text: _add(text[prev_end:start]) prev_end = end try: cur_url_text = match.group(0) cur_url = URL(cur_url_text) if not cur_url.scheme: if default_scheme: cur_url = URL(default_scheme + '://' + cur_url_text) else: _add_text(text[start:end]) continue if schemes and cur_url.scheme not in schemes: _add_text(text[start:end]) else: _add(cur_url) except URLParseError: # currently this should only be hit with broken port # strings. the regex above doesn't support ipv6 addresses if with_text: _add_text(text[start:end]) if with_text: tail = text[prev_end:] if tail: _add_text(tail) return ret def _make_quote_map(safe_chars): ret = {} # v is included in the dict for py3 mostly, because bytestrings # are iterables of ints, of course! for i, v in zip(range(256), range(256)): c = chr(v) if c in safe_chars: ret[c] = ret[v] = c else: ret[c] = ret[v] = f'%{i:02X}' return ret _USERINFO_PART_QUOTE_MAP = _make_quote_map(_USERINFO_SAFE) _PATH_PART_QUOTE_MAP = _make_quote_map(_PATH_SAFE) _QUERY_PART_QUOTE_MAP = _make_quote_map(_QUERY_SAFE) _FRAGMENT_QUOTE_MAP = _make_quote_map(_FRAGMENT_SAFE) def quote_path_part(text, full_quote=True): """ Percent-encode a single segment of a URL path. """ if full_quote: bytestr = normalize('NFC', to_unicode(text)).encode('utf8') return ''.join([_PATH_PART_QUOTE_MAP[b] for b in bytestr]) return ''.join([_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t for t in text]) def quote_query_part(text, full_quote=True): """ Percent-encode a single query string key or value. """ if full_quote: bytestr = normalize('NFC', to_unicode(text)).encode('utf8') return ''.join([_QUERY_PART_QUOTE_MAP[b] for b in bytestr]) return ''.join([_QUERY_PART_QUOTE_MAP[t] if t in _QUERY_DELIMS else t for t in text]) def quote_fragment_part(text, full_quote=True): """Quote the fragment part of the URL. Fragments don't have subdelimiters, so the whole URL fragment can be passed. """ if full_quote: bytestr = normalize('NFC', to_unicode(text)).encode('utf8') return ''.join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr]) return ''.join([_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t for t in text]) def quote_userinfo_part(text, full_quote=True): """Quote special characters in either the username or password section of the URL. Note that userinfo in URLs is considered deprecated in many circles (especially browsers), and support for percent-encoded userinfo can be spotty. """ if full_quote: bytestr = normalize('NFC', to_unicode(text)).encode('utf8') return ''.join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr]) return ''.join([_USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS else t for t in text]) def unquote(string, encoding='utf-8', errors='replace'): """Percent-decode a string, by replacing %xx escapes with their single-character equivalent. The optional *encoding* and *errors* parameters specify how to decode percent-encoded sequences into Unicode characters, as accepted by the :meth:`bytes.decode()` method. By default, percent-encoded sequences are decoded with UTF-8, and invalid sequences are replaced by a placeholder character. >>> unquote(u'abc%20def') u'abc def' """ if '%' not in string: string.split return string if encoding is None: encoding = 'utf-8' if errors is None: errors = 'replace' bits = _ASCII_RE.split(string) res = [bits[0]] append = res.append for i in range(1, len(bits), 2): append(unquote_to_bytes(bits[i]).decode(encoding, errors)) append(bits[i + 1]) return ''.join(res) def unquote_to_bytes(string): """unquote_to_bytes('abc%20def') -> b'abc def'.""" # Note: strings are encoded as UTF-8. This is only an issue if it contains # unescaped non-ASCII characters, which URIs should not. if not string: # Is it a string-like object? string.split return b'' if isinstance(string, str): string = string.encode('utf-8') bits = string.split(b'%') if len(bits) == 1: return string # import pdb;pdb.set_trace() res = [bits[0]] append = res.append for item in bits[1:]: try: append(_HEX_CHAR_MAP[item[:2]]) append(item[2:]) except KeyError: append(b'%') append(item) return b''.join(res) def register_scheme(text, uses_netloc=None, default_port=None): """Registers new scheme information, resulting in correct port and slash behavior from the URL object. There are dozens of standard schemes preregistered, so this function is mostly meant for proprietary internal customizations or stopgaps on missing standards information. If a scheme seems to be missing, please `file an issue`_! Args: text (str): Text representing the scheme. (the 'http' in 'http://hatnote.com') uses_netloc (bool): Does the scheme support specifying a network host? For instance, "http" does, "mailto" does not. default_port (int): The default port, if any, for netloc-using schemes. .. _file an issue: https://github.com/mahmoud/boltons/issues """ text = text.lower() if default_port is not None: try: default_port = int(default_port) except ValueError: raise ValueError('default_port expected integer or None, not %r' % (default_port,)) if uses_netloc is True: SCHEME_PORT_MAP[text] = default_port elif uses_netloc is False: if default_port is not None: raise ValueError('unexpected default port while specifying' ' non-netloc scheme: %r' % default_port) NO_NETLOC_SCHEMES.add(text) elif uses_netloc is not None: raise ValueError('uses_netloc expected True, False, or None') return def resolve_path_parts(path_parts): """Normalize the URL path by resolving segments of '.' and '..', resulting in a dot-free path. See RFC 3986 section 5.2.4, Remove Dot Segments. """ # TODO: what to do with multiple slashes ret = [] for part in path_parts: if part == '.': pass elif part == '..': if ret and (len(ret) > 1 or ret[0]): # prevent unrooting ret.pop() else: ret.append(part) if list(path_parts[-1:]) in (['.'], ['..']): ret.append('') return ret class cachedproperty: """The ``cachedproperty`` is used similar to :class:`property`, except that the wrapped method is only called once. This is commonly used to implement lazy attributes. After the property has been accessed, the value is stored on the instance itself, using the same name as the cachedproperty. This allows the cache to be cleared with :func:`delattr`, or through manipulating the object's ``__dict__``. """ def __init__(self, func): self.__doc__ = getattr(func, '__doc__') self.func = func def __get__(self, obj, objtype=None): if obj is None: return self value = obj.__dict__[self.func.__name__] = self.func(obj) return value def __repr__(self): cn = self.__class__.__name__ return f'<{cn} func={self.func}>' class URL: r"""The URL is one of the most ubiquitous data structures in the virtual and physical landscape. From blogs to billboards, URLs are so common, that it's easy to overlook their complexity and power. There are 8 parts of a URL, each with its own semantics and special characters: * :attr:`~URL.scheme` * :attr:`~URL.username` * :attr:`~URL.password` * :attr:`~URL.host` * :attr:`~URL.port` * :attr:`~URL.path` * :attr:`~URL.query_params` (query string parameters) * :attr:`~URL.fragment` Each is exposed as an attribute on the URL object. RFC 3986 offers this brief structural summary of the main URL components:: foo://user:pass@example.com:8042/over/there?name=ferret#nose \_/ \_______/ \_________/ \__/\_________/ \_________/ \__/ | | | | | | | scheme userinfo host port path query fragment And here's how that example can be manipulated with the URL type: >>> url = URL('foo://example.com:8042/over/there?name=ferret#nose') >>> print(url.host) example.com >>> print(url.get_authority()) example.com:8042 >>> print(url.qp['name']) # qp is a synonym for query_params ferret URL's approach to encoding is that inputs are decoded as much as possible, and data remains in this decoded state until re-encoded using the :meth:`~URL.to_text()` method. In this way, it's similar to Python's current approach of encouraging immediate decoding of bytes to text. Note that URL instances are mutable objects. If an immutable representation of the URL is desired, the string from :meth:`~URL.to_text()` may be used. For an immutable, but almost-as-featureful, URL object, check out the `hyperlink package`_. .. _hyperlink package: https://github.com/mahmoud/hyperlink """ # public attributes (for comparison, see __eq__): _cmp_attrs = ('scheme', 'uses_netloc', 'username', 'password', 'family', 'host', 'port', 'path', 'query_params', 'fragment') def __init__(self, url=''): # TODO: encoding param. The encoding that underlies the # percent-encoding is always utf8 for IRIs, but can be Latin-1 # for other usage schemes. ud = DEFAULT_PARSED_URL if url: if isinstance(url, URL): url = url.to_text() # better way to copy URLs? elif isinstance(url, bytes): try: url = url.decode(DEFAULT_ENCODING) except UnicodeDecodeError as ude: raise URLParseError('expected text or %s-encoded bytes.' ' try decoding the url bytes and' ' passing the result. (got: %s)' % (DEFAULT_ENCODING, ude)) ud = parse_url(url) _e = '' self.scheme = ud['scheme'] or _e self._netloc_sep = ud['_netloc_sep'] or _e self.username = (unquote(ud['username']) if '%' in (ud['username'] or _e) else ud['username'] or _e) self.password = (unquote(ud['password']) if '%' in (ud['password'] or _e) else ud['password'] or _e) self.family = ud['family'] if not ud['host']: self.host = _e else: try: self.host = ud['host'].encode("ascii") except UnicodeEncodeError: self.host = ud['host'] # already non-ascii text else: self.host = self.host.decode("idna") self.port = ud['port'] self.path_parts = tuple([unquote(p) if '%' in p else p for p in (ud['path'] or _e).split('/')]) self._query = ud['query'] or _e self.fragment = (unquote(ud['fragment']) if '%' in (ud['fragment'] or _e) else ud['fragment'] or _e) # TODO: possibly use None as marker for empty vs missing return @classmethod def from_parts(cls, scheme=None, host=None, path_parts=(), query_params=(), fragment='', port=None, username=None, password=None): """Build a new URL from parts. Note that the respective arguments are not in the order they would appear in a URL: Args: scheme (str): The scheme of a URL, e.g., 'http' host (str): The host string, e.g., 'hatnote.com' path_parts (tuple): The individual text segments of the path, e.g., ('post', '123') query_params (dict): An OMD, dict, or list of (key, value) pairs representing the keys and values of the URL's query parameters. fragment (str): The fragment of the URL, e.g., 'anchor1' port (int): The integer port of URL, automatic defaults are available for registered schemes. username (str): The username for the userinfo part of the URL. password (str): The password for the userinfo part of the URL. Note that this method does relatively little validation. :meth:`URL.to_text()` should be used to check if any errors are produced while composing the final textual URL. """ ret = cls() ret.scheme = scheme ret.host = host ret.path_parts = tuple(path_parts) or ('',) ret.query_params.update(query_params) ret.fragment = fragment ret.port = port ret.username = username ret.password = password return ret @cachedproperty def query_params(self): """The parsed form of the query string of the URL, represented as a :class:`~dictutils.OrderedMultiDict`. Also available as the handy alias ``qp``. >>> url = URL('http://boltons.readthedocs.io/?utm_source=doctest&python=great') >>> url.qp.keys() [u'utm_source', u'python'] """ return QueryParamDict.from_text(self._query) qp = query_params @property def path(self): "The URL's path, in text form." return '/'.join([quote_path_part(p, full_quote=False) for p in self.path_parts]) @path.setter def path(self, path_text): self.path_parts = tuple([unquote(p) if '%' in p else p for p in to_unicode(path_text).split('/')]) return @property def uses_netloc(self): """Whether or not a URL uses :code:`:` or :code:`://` to separate the scheme from the rest of the URL depends on the scheme's own standard definition. There is no way to infer this behavior from other parts of the URL. A scheme either supports network locations or it does not. The URL type's approach to this is to check for explicitly registered schemes, with common schemes like HTTP preregistered. This is the same approach taken by :mod:`urlparse`. URL adds two additional heuristics if the scheme as a whole is not registered. First, it attempts to check the subpart of the scheme after the last ``+`` character. This adds intuitive behavior for schemes like ``git+ssh``. Second, if a URL with an unrecognized scheme is loaded, it will maintain the separator it sees. >>> print(URL('fakescheme://test.com').to_text()) fakescheme://test.com >>> print(URL('mockscheme:hello:world').to_text()) mockscheme:hello:world """ default = self._netloc_sep if self.scheme in SCHEME_PORT_MAP: return True if self.scheme in NO_NETLOC_SCHEMES: return False if self.scheme.split('+')[-1] in SCHEME_PORT_MAP: return True return default @property def default_port(self): """Return the default port for the currently-set scheme. Returns ``None`` if the scheme is unrecognized. See :func:`register_scheme` above. If :attr:`~URL.port` matches this value, no port is emitted in the output of :meth:`~URL.to_text()`. Applies the same '+' heuristic detailed in :meth:`URL.uses_netloc`. """ try: return SCHEME_PORT_MAP[self.scheme] except KeyError: return SCHEME_PORT_MAP.get(self.scheme.split('+')[-1]) def normalize(self, with_case=True): """Resolve any "." and ".." references in the path, as well as normalize scheme and host casing. To turn off case normalization, pass ``with_case=False``. More information can be found in `Section 6.2.2 of RFC 3986`_. .. _Section 6.2.2 of RFC 3986: https://tools.ietf.org/html/rfc3986#section-6.2.2 """ self.path_parts = resolve_path_parts(self.path_parts) if with_case: self.scheme = self.scheme.lower() self.host = self.host.lower() return def navigate(self, dest): """Factory method that returns a _new_ :class:`URL` based on a given destination, *dest*. Useful for navigating those relative links with ease. The newly created :class:`URL` is normalized before being returned. >>> url = URL('http://boltons.readthedocs.io') >>> url.navigate('en/latest/') URL(u'http://boltons.readthedocs.io/en/latest/') Args: dest (str): A string or URL object representing the destination More information can be found in `Section 5 of RFC 3986`_. .. _Section 5 of RFC 3986: https://tools.ietf.org/html/rfc3986#section-5 """ orig_dest = None if not isinstance(dest, URL): dest, orig_dest = URL(dest), dest if dest.scheme and dest.host: # absolute URLs replace everything, but don't make an # extra copy if we don't have to return URL(dest) if orig_dest is None else dest query_params = dest.query_params if dest.path: if dest.path.startswith('/'): # absolute path new_path_parts = list(dest.path_parts) else: # relative path new_path_parts = list(self.path_parts[:-1]) \ + list(dest.path_parts) else: new_path_parts = list(self.path_parts) if not query_params: query_params = self.query_params ret = self.from_parts(scheme=dest.scheme or self.scheme, host=dest.host or self.host, port=dest.port or self.port, path_parts=new_path_parts, query_params=query_params, fragment=dest.fragment, username=dest.username or self.username, password=dest.password or self.password) ret.normalize() return ret def get_authority(self, full_quote=False, with_userinfo=False): """Used by URL schemes that have a network location, :meth:`~URL.get_authority` combines :attr:`username`, :attr:`password`, :attr:`host`, and :attr:`port` into one string, the *authority*, that is used for connecting to a network-accessible resource. Used internally by :meth:`~URL.to_text()` and can be useful for labeling connections. >>> url = URL('ftp://user@ftp.debian.org:2121/debian/README') >>> print(url.get_authority()) ftp.debian.org:2121 >>> print(url.get_authority(with_userinfo=True)) user@ftp.debian.org:2121 Args: full_quote (bool): Whether or not to apply IDNA encoding. Defaults to ``False``. with_userinfo (bool): Whether or not to include username and password, technically part of the authority. Defaults to ``False``. """ parts = [] _add = parts.append if self.username and with_userinfo: _add(quote_userinfo_part(self.username)) if self.password: _add(':') _add(quote_userinfo_part(self.password)) _add('@') if self.host: if self.family == socket.AF_INET6: _add('[') _add(self.host) _add(']') elif full_quote: _add(self.host.encode('idna').decode('ascii')) else: _add(self.host) # TODO: 0 port? if self.port and self.port != self.default_port: _add(':') _add(str(self.port)) return ''.join(parts) def to_text(self, full_quote=False): """Render a string representing the current state of the URL object. >>> url = URL('http://listen.hatnote.com') >>> url.fragment = 'en' >>> print(url.to_text()) http://listen.hatnote.com#en By setting the *full_quote* flag, the URL can either be fully quoted or minimally quoted. The most common characteristic of an encoded-URL is the presence of percent-encoded text (e.g., %60). Unquoted URLs are more readable and suitable for display, whereas fully-quoted URLs are more conservative and generally necessary for sending over the network. """ scheme = self.scheme path = '/'.join([quote_path_part(p, full_quote=full_quote) for p in self.path_parts]) authority = self.get_authority(full_quote=full_quote, with_userinfo=True) query_string = self.query_params.to_text(full_quote=full_quote) fragment = quote_fragment_part(self.fragment, full_quote=full_quote) parts = [] _add = parts.append if scheme: _add(scheme) _add(':') if authority: _add('//') _add(authority) elif (scheme and path[:2] != '//' and self.uses_netloc): _add('//') if path: if scheme and authority and path[:1] != '/': _add('/') # TODO: i think this is here because relative paths # with absolute authorities = undefined _add(path) if query_string: _add('?') _add(query_string) if fragment: _add('#') _add(fragment) return ''.join(parts) def __repr__(self): cn = self.__class__.__name__ return f'{cn}({self.to_text()!r})' def __str__(self): return self.to_text() def __unicode__(self): return self.to_text() def __eq__(self, other): for attr in self._cmp_attrs: if not getattr(self, attr) == getattr(other, attr, None): return False return True def __ne__(self, other): return not self == other try: from socket import inet_pton except ImportError: # from https://gist.github.com/nnemkin/4966028 import ctypes class _sockaddr(ctypes.Structure): _fields_ = [("sa_family", ctypes.c_short), ("__pad1", ctypes.c_ushort), ("ipv4_addr", ctypes.c_byte * 4), ("ipv6_addr", ctypes.c_byte * 16), ("__pad2", ctypes.c_ulong)] WSAStringToAddressA = ctypes.windll.ws2_32.WSAStringToAddressA WSAAddressToStringA = ctypes.windll.ws2_32.WSAAddressToStringA def inet_pton(address_family, ip_string): addr = _sockaddr() ip_string = ip_string.encode('ascii') addr.sa_family = address_family addr_size = ctypes.c_int(ctypes.sizeof(addr)) if WSAStringToAddressA(ip_string, address_family, None, ctypes.byref(addr), ctypes.byref(addr_size)) != 0: raise OSError(ctypes.FormatError()) if address_family == socket.AF_INET: return ctypes.string_at(addr.ipv4_addr, 4) if address_family == socket.AF_INET6: return ctypes.string_at(addr.ipv6_addr, 16) raise OSError('unknown address family') def parse_host(host): """\ Low-level function used to parse the host portion of a URL. Returns a tuple of (family, host) where *family* is a :mod:`socket` module constant or ``None``, and host is a string. >>> parse_host('googlewebsite.com') == (None, 'googlewebsite.com') True >>> parse_host('[::1]') == (socket.AF_INET6, '::1') True >>> parse_host('192.168.1.1') == (socket.AF_INET, '192.168.1.1') True Odd doctest formatting above due to py3's switch from int to enums for :mod:`socket` constants. """ if not host: return None, '' if ':' in host and '[' == host[0] and ']' == host[-1]: host = host[1:-1] try: inet_pton(socket.AF_INET6, host) except OSError as se: raise URLParseError(f'invalid IPv6 host: {host!r} ({se!r})') except UnicodeEncodeError: pass # TODO: this can't be a real host right? else: family = socket.AF_INET6 return family, host try: inet_pton(socket.AF_INET, host) except (OSError, UnicodeEncodeError): family = None # not an IP else: family = socket.AF_INET return family, host def parse_url(url_text): """\ Used to parse the text for a single URL into a dictionary, used internally by the :class:`URL` type. Note that "URL" has a very narrow, standards-based definition. While :func:`parse_url` may raise :class:`URLParseError` under a very limited number of conditions, such as non-integer port, a surprising number of strings are technically valid URLs. For instance, the text ``"url"`` is a valid URL, because it is a relative path. In short, do not expect this function to validate form inputs or other more colloquial usages of URLs. >>> res = parse_url('http://127.0.0.1:3000/?a=1') >>> sorted(res.keys()) # res is a basic dictionary ['_netloc_sep', 'authority', 'family', 'fragment', 'host', 'password', 'path', 'port', 'query', 'scheme', 'username'] """ url_text = str(url_text) # raise TypeError('parse_url expected text, not %r' % url_str) um = _URL_RE.match(url_text) try: gs = um.groupdict() except AttributeError: raise URLParseError('could not parse url: %r' % url_text) au_text = gs['authority'] user, pw, hostinfo = None, None, au_text if au_text: userinfo, sep, hostinfo = au_text.rpartition('@') if sep: # TODO: empty userinfo error? user, _, pw = userinfo.partition(':') host, port = None, None if hostinfo: host, sep, port_str = hostinfo.partition(':') if sep: if host and host[0] == '[' and ']' in port_str: host_right, _, port_str = port_str.partition(']') host = host + ':' + host_right + ']' if port_str and port_str[0] == ':': port_str = port_str[1:] try: port = int(port_str) except ValueError: if port_str: # empty ports ok according to RFC 3986 6.2.3 raise URLParseError('expected integer for port, not %r' % port_str) port = None family, host = parse_host(host) gs['username'] = user gs['password'] = pw gs['family'] = family gs['host'] = host gs['port'] = port return gs DEFAULT_PARSED_URL = parse_url('') def parse_qsl(qs, keep_blank_values=True, encoding=DEFAULT_ENCODING): """ Converts a query string into a list of (key, value) pairs. """ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] ret = [] for pair in pairs: if not pair: continue key, _, value = pair.partition('=') if not value: if keep_blank_values: value = None else: continue key = unquote(key.replace('+', ' ')) if value: value = unquote(value.replace('+', ' ')) ret.append((key, value)) return ret """ # What follows is the OrderedMultiDict from dictutils.py, circa # 20161021, used for the QueryParamDict, toward the bottom. """ from collections.abc import KeysView, ValuesView, ItemsView from itertools import zip_longest try: from .typeutils import make_sentinel _MISSING = make_sentinel(var_name='_MISSING') except ImportError: _MISSING = object() PREV, NEXT, KEY, VALUE, SPREV, SNEXT = range(6) class OrderedMultiDict(dict): """A MultiDict is a dictionary that can have multiple values per key and the OrderedMultiDict (OMD) is a MultiDict that retains original insertion order. Common use cases include: * handling query strings parsed from URLs * inverting a dictionary to create a reverse index (values to keys) * stacking data from multiple dictionaries in a non-destructive way The OrderedMultiDict constructor is identical to the built-in :class:`dict`, and overall the API constitutes an intuitive superset of the built-in type: >>> omd = OrderedMultiDict() >>> omd['a'] = 1 >>> omd['b'] = 2 >>> omd.add('a', 3) >>> omd.get('a') 3 >>> omd.getlist('a') [1, 3] Some non-:class:`dict`-like behaviors also make an appearance, such as support for :func:`reversed`: >>> list(reversed(omd)) ['b', 'a'] Note that unlike some other MultiDicts, this OMD gives precedence to the most recent value added. ``omd['a']`` refers to ``3``, not ``1``. >>> omd OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]) >>> omd.poplast('a') 3 >>> omd OrderedMultiDict([('a', 1), ('b', 2)]) >>> omd.pop('a') 1 >>> omd OrderedMultiDict([('b', 2)]) If you want a safe-to-modify or flat dictionary, use :meth:`OrderedMultiDict.todict()`. >>> from pprint import pprint as pp # preserve printed ordering >>> omd = OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]) >>> pp(omd.todict()) {'a': 3, 'b': 2} >>> pp(omd.todict(multi=True)) {'a': [1, 3], 'b': [2]} With ``multi=False``, items appear with the keys in to original insertion order, alongside the most-recently inserted value for that key. >>> OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]).items(multi=False) [('a', 3), ('b', 2)] .. warning:: ``dict(omd)`` changed behavior `in Python 3.7 `_ due to changes made to support the transition from :class:`collections.OrderedDict` to the built-in dictionary being ordered. Before 3.7, the result would be a new dictionary, with values that were lists, similar to ``omd.todict(multi=True)`` (but only shallow-copy; the lists were direct references to OMD internal structures). From 3.7 onward, the values became singular, like ``omd.todict(multi=False)``. For reliable cross-version behavior, just use :meth:`~OrderedMultiDict.todict()`. """ def __new__(cls, *a, **kw): ret = super().__new__(cls) ret._clear_ll() return ret def __init__(self, *args, **kwargs): if len(args) > 1: raise TypeError('%s expected at most 1 argument, got %s' % (self.__class__.__name__, len(args))) super().__init__() if args: self.update_extend(args[0]) if kwargs: self.update(kwargs) def __getstate__(self): return list(self.iteritems(multi=True)) def __setstate__(self, state): self.clear() self.update_extend(state) def _clear_ll(self): try: _map = self._map except AttributeError: _map = self._map = {} self.root = [] _map.clear() self.root[:] = [self.root, self.root, None] def _insert(self, k, v): root = self.root cells = self._map.setdefault(k, []) last = root[PREV] cell = [last, root, k, v] last[NEXT] = root[PREV] = cell cells.append(cell) def add(self, k, v): """Add a single value *v* under a key *k*. Existing values under *k* are preserved. """ values = super().setdefault(k, []) self._insert(k, v) values.append(v) def addlist(self, k, v): """Add an iterable of values underneath a specific key, preserving any values already under that key. >>> omd = OrderedMultiDict([('a', -1)]) >>> omd.addlist('a', range(3)) >>> omd OrderedMultiDict([('a', -1), ('a', 0), ('a', 1), ('a', 2)]) Called ``addlist`` for consistency with :meth:`getlist`, but tuples and other sequences and iterables work. """ if not v: return self_insert = self._insert values = super().setdefault(k, []) for subv in v: self_insert(k, subv) values.extend(v) def get(self, k, default=None): """Return the value for key *k* if present in the dictionary, else *default*. If *default* is not given, ``None`` is returned. This method never raises a :exc:`KeyError`. To get all values under a key, use :meth:`OrderedMultiDict.getlist`. """ return super().get(k, [default])[-1] def getlist(self, k, default=_MISSING): """Get all values for key *k* as a list, if *k* is in the dictionary, else *default*. The list returned is a copy and can be safely mutated. If *default* is not given, an empty :class:`list` is returned. """ try: return super().__getitem__(k)[:] except KeyError: if default is _MISSING: return [] return default def clear(self): "Empty the dictionary." super().clear() self._clear_ll() def setdefault(self, k, default=_MISSING): """If key *k* is in the dictionary, return its value. If not, insert *k* with a value of *default* and return *default*. *default* defaults to ``None``. See :meth:`dict.setdefault` for more information. """ if not super().__contains__(k): self[k] = None if default is _MISSING else default return self[k] def copy(self): "Return a shallow copy of the dictionary." return self.__class__(self.iteritems(multi=True)) @classmethod def fromkeys(cls, keys, default=None): """Create a dictionary from a list of keys, with all the values set to *default*, or ``None`` if *default* is not set. """ return cls([(k, default) for k in keys]) def update(self, E, **F): """Add items from a dictionary or iterable (and/or keyword arguments), overwriting values under an existing key. See :meth:`dict.update` for more details. """ # E and F are throwback names to the dict() __doc__ if E is self: return self_add = self.add if isinstance(E, OrderedMultiDict): for k in E: if k in self: del self[k] for k, v in E.iteritems(multi=True): self_add(k, v) elif callable(getattr(E, 'keys', None)): for k in E.keys(): self[k] = E[k] else: seen = set() seen_add = seen.add for k, v in E: if k not in seen and k in self: del self[k] seen_add(k) self_add(k, v) for k in F: self[k] = F[k] return def update_extend(self, E, **F): """Add items from a dictionary, iterable, and/or keyword arguments without overwriting existing items present in the dictionary. Like :meth:`update`, but adds to existing keys instead of overwriting them. """ if E is self: iterator = iter(E.items()) elif isinstance(E, OrderedMultiDict): iterator = E.iteritems(multi=True) elif hasattr(E, 'keys'): iterator = ((k, E[k]) for k in E.keys()) else: iterator = E self_add = self.add for k, v in iterator: self_add(k, v) def __setitem__(self, k, v): if super().__contains__(k): self._remove_all(k) self._insert(k, v) super().__setitem__(k, [v]) def __getitem__(self, k): return super().__getitem__(k)[-1] def __delitem__(self, k): super().__delitem__(k) self._remove_all(k) def __eq__(self, other): if self is other: return True try: if len(other) != len(self): return False except TypeError: return False if isinstance(other, OrderedMultiDict): selfi = self.iteritems(multi=True) otheri = other.iteritems(multi=True) zipped_items = zip_longest(selfi, otheri, fillvalue=(None, None)) for (selfk, selfv), (otherk, otherv) in zipped_items: if selfk != otherk or selfv != otherv: return False if not(next(selfi, _MISSING) is _MISSING and next(otheri, _MISSING) is _MISSING): # leftovers (TODO: watch for StopIteration?) return False return True elif hasattr(other, 'keys'): for selfk in self: try: other[selfk] == self[selfk] except KeyError: return False return True return False def __ne__(self, other): return not (self == other) def __ior__(self, other): self.update(other) return self def pop(self, k, default=_MISSING): """Remove all values under key *k*, returning the most-recently inserted value. Raises :exc:`KeyError` if the key is not present and no *default* is provided. """ try: return self.popall(k)[-1] except KeyError: if default is _MISSING: raise KeyError(k) return default def popall(self, k, default=_MISSING): """Remove all values under key *k*, returning them in the form of a list. Raises :exc:`KeyError` if the key is not present and no *default* is provided. """ super_self = super() if super_self.__contains__(k): self._remove_all(k) if default is _MISSING: return super_self.pop(k) return super_self.pop(k, default) def poplast(self, k=_MISSING, default=_MISSING): """Remove and return the most-recently inserted value under the key *k*, or the most-recently inserted key if *k* is not provided. If no values remain under *k*, it will be removed from the OMD. Raises :exc:`KeyError` if *k* is not present in the dictionary, or the dictionary is empty. """ if k is _MISSING: if self: k = self.root[PREV][KEY] else: if default is _MISSING: raise KeyError('empty %r' % type(self)) return default try: self._remove(k) except KeyError: if default is _MISSING: raise KeyError(k) return default values = super().__getitem__(k) v = values.pop() if not values: super().__delitem__(k) return v def _remove(self, k): values = self._map[k] cell = values.pop() cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV] if not values: del self._map[k] def _remove_all(self, k): values = self._map[k] while values: cell = values.pop() cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV] del self._map[k] def iteritems(self, multi=False): """Iterate over the OMD's items in insertion order. By default, yields only the most-recently inserted value for each key. Set *multi* to ``True`` to get all inserted items. """ root = self.root curr = root[NEXT] if multi: while curr is not root: yield curr[KEY], curr[VALUE] curr = curr[NEXT] else: for key in self.iterkeys(): yield key, self[key] def iterkeys(self, multi=False): """Iterate over the OMD's keys in insertion order. By default, yields each key once, according to the most recent insertion. Set *multi* to ``True`` to get all keys, including duplicates, in insertion order. """ root = self.root curr = root[NEXT] if multi: while curr is not root: yield curr[KEY] curr = curr[NEXT] else: yielded = set() yielded_add = yielded.add while curr is not root: k = curr[KEY] if k not in yielded: yielded_add(k) yield k curr = curr[NEXT] def itervalues(self, multi=False): """Iterate over the OMD's values in insertion order. By default, yields the most-recently inserted value per unique key. Set *multi* to ``True`` to get all values according to insertion order. """ for k, v in self.iteritems(multi=multi): yield v def todict(self, multi=False): """Gets a basic :class:`dict` of the items in this dictionary. Keys are the same as the OMD, values are the most recently inserted values for each key. Setting the *multi* arg to ``True`` is yields the same result as calling :class:`dict` on the OMD, except that all the value lists are copies that can be safely mutated. """ if multi: return {k: self.getlist(k) for k in self} return {k: self[k] for k in self} def sorted(self, key=None, reverse=False): """Similar to the built-in :func:`sorted`, except this method returns a new :class:`OrderedMultiDict` sorted by the provided key function, optionally reversed. Args: key (callable): A callable to determine the sort key of each element. The callable should expect an **item** (key-value pair tuple). reverse (bool): Set to ``True`` to reverse the ordering. >>> omd = OrderedMultiDict(zip(range(3), range(3))) >>> omd.sorted(reverse=True) OrderedMultiDict([(2, 2), (1, 1), (0, 0)]) Note that the key function receives an **item** (key-value tuple), so the recommended signature looks like: >>> omd = OrderedMultiDict(zip('hello', 'world')) >>> omd.sorted(key=lambda i: i[1]) # i[0] is the key, i[1] is the val OrderedMultiDict([('o', 'd'), ('l', 'l'), ('e', 'o'), ('l', 'r'), ('h', 'w')]) """ cls = self.__class__ return cls(sorted(self.iteritems(multi=True), key=key, reverse=reverse)) def sortedvalues(self, key=None, reverse=False): """Returns a copy of the :class:`OrderedMultiDict` with the same keys in the same order as the original OMD, but the values within each keyspace have been sorted according to *key* and *reverse*. Args: key (callable): A single-argument callable to determine the sort key of each element. The callable should expect an **item** (key-value pair tuple). reverse (bool): Set to ``True`` to reverse the ordering. >>> omd = OrderedMultiDict() >>> omd.addlist('even', [6, 2]) >>> omd.addlist('odd', [1, 5]) >>> omd.add('even', 4) >>> omd.add('odd', 3) >>> somd = omd.sortedvalues() >>> somd.getlist('even') [2, 4, 6] >>> somd.keys(multi=True) == omd.keys(multi=True) True >>> omd == somd False >>> somd OrderedMultiDict([('even', 2), ('even', 4), ('odd', 1), ('odd', 3), ('even', 6), ('odd', 5)]) As demonstrated above, contents and key order are retained. Only value order changes. """ try: superself_iteritems = super().iteritems() except AttributeError: superself_iteritems = super().items() # (not reverse) because they pop off in reverse order for reinsertion sorted_val_map = {k: sorted(v, key=key, reverse=(not reverse)) for k, v in superself_iteritems} ret = self.__class__() for k in self.iterkeys(multi=True): ret.add(k, sorted_val_map[k].pop()) return ret def inverted(self): """Returns a new :class:`OrderedMultiDict` with values and keys swapped, like creating dictionary transposition or reverse index. Insertion order is retained and all keys and values are represented in the output. >>> omd = OMD([(0, 2), (1, 2)]) >>> omd.inverted().getlist(2) [0, 1] Inverting twice yields a copy of the original: >>> omd.inverted().inverted() OrderedMultiDict([(0, 2), (1, 2)]) """ return self.__class__((v, k) for k, v in self.iteritems(multi=True)) def counts(self): """Returns a mapping from key to number of values inserted under that key. Like :py:class:`collections.Counter`, but returns a new :class:`OrderedMultiDict`. """ # Returns an OMD because Counter/OrderedDict may not be # available, and neither Counter nor dict maintain order. super_getitem = super().__getitem__ return self.__class__((k, len(super_getitem(k))) for k in self) def keys(self, multi=False): """Returns a list containing the output of :meth:`iterkeys`. See that method's docs for more details. """ return list(self.iterkeys(multi=multi)) def values(self, multi=False): """Returns a list containing the output of :meth:`itervalues`. See that method's docs for more details. """ return list(self.itervalues(multi=multi)) def items(self, multi=False): """Returns a list containing the output of :meth:`iteritems`. See that method's docs for more details. """ return list(self.iteritems(multi=multi)) def __iter__(self): return self.iterkeys() def __reversed__(self): root = self.root curr = root[PREV] lengths = {} lengths_sd = lengths.setdefault get_values = super().__getitem__ while curr is not root: k = curr[KEY] vals = get_values(k) if lengths_sd(k, 1) == len(vals): yield k lengths[k] += 1 curr = curr[PREV] def __repr__(self): cn = self.__class__.__name__ kvs = ', '.join([repr((k, v)) for k, v in self.iteritems(multi=True)]) return f'{cn}([{kvs}])' def viewkeys(self): "OMD.viewkeys() -> a set-like object providing a view on OMD's keys" return KeysView(self) def viewvalues(self): "OMD.viewvalues() -> an object providing a view on OMD's values" return ValuesView(self) def viewitems(self): "OMD.viewitems() -> a set-like object providing a view on OMD's items" return ItemsView(self) try: # try to import the built-in one anyways from .dictutils import OrderedMultiDict except ImportError: pass OMD = OrderedMultiDict class QueryParamDict(OrderedMultiDict): """A subclass of :class:`~dictutils.OrderedMultiDict` specialized for representing query string values. Everything is fully unquoted on load and all parsed keys and values are strings by default. As the name suggests, multiple values are supported and insertion order is preserved. >>> qp = QueryParamDict.from_text(u'key=val1&key=val2&utm_source=rtd') >>> qp.getlist('key') [u'val1', u'val2'] >>> qp['key'] u'val2' >>> qp.add('key', 'val3') >>> qp.to_text() 'key=val1&key=val2&utm_source=rtd&key=val3' See :class:`~dictutils.OrderedMultiDict` for more API features. """ @classmethod def from_text(cls, query_string): """ Parse *query_string* and return a new :class:`QueryParamDict`. """ pairs = parse_qsl(query_string, keep_blank_values=True) return cls(pairs) def to_text(self, full_quote=False): """ Render and return a query string. Args: full_quote (bool): Whether or not to percent-quote special characters or leave them decoded for readability. """ ret_list = [] for k, v in self.iteritems(multi=True): key = quote_query_part(to_unicode(k), full_quote=full_quote) if v is None: ret_list.append(key) else: val = quote_query_part(to_unicode(v), full_quote=full_quote) ret_list.append('='.join((key, val))) return '&'.join(ret_list) # end urlutils.py boltons-25.0.0/docs/000077500000000000000000000000001475005545200142155ustar00rootroot00000000000000boltons-25.0.0/docs/Makefile000066400000000000000000000163651475005545200156700ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " applehelp to make an Apple Help Book" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" @echo " coverage to run coverage check of the documentation (if enabled)" clean: rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/boltons.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/boltons.qhc" applehelp: $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @echo @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." @echo "N.B. You won't be able to view it unless you put it in" \ "~/Library/Documentation/Help or install it in your application" \ "bundle." devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/boltons" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/boltons" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." coverage: $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ "results in $(BUILDDIR)/coverage/python.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." boltons-25.0.0/docs/_templates/000077500000000000000000000000001475005545200163525ustar00rootroot00000000000000boltons-25.0.0/docs/_templates/page.html000066400000000000000000000004461475005545200201600ustar00rootroot00000000000000{% extends "!page.html" %} {% block menu %} {{ super() }} {% endblock %} boltons-25.0.0/docs/architecture.rst000066400000000000000000000066041475005545200174370ustar00rootroot00000000000000Architecture ============ ``boltons`` has a minimalist architecture: remain as consistent, and self-contained as possible, with an eye toward maintaining its range of use cases and usage patterns as wide as possible. .. _arch_integration: Integration ----------- Utility libraries are often used extensively within a project, and because they are not often fundamental to the architecture of the application, simplicity and stability may take precedence over version recency. In these cases, developers can: 1. Copy the whole ``boltons`` package into a project. 2. Copy just the ``utils.py`` file that a project requires. Boltons take this into account by design. The ``boltons`` package depends on no packages, making it easy for inclusion into a project. Furthermore, virtually all individual modules have been written to be as self-contained as possible, allowing cherrypicking of functionality into projects. Design of a ``bolton`` ---------------------- ``boltons`` aims to be a living library, an ever-expanding collection of tested and true utilities. For a bolton to be a bolton, it should: 1. Be pure-Python and as self-contained as possible. 2. Perform a common task or fulfill a common role. 3. Demonstrate and mitigate some insufficiency in the standard library. 4. Strive for the standard set forth by the standard library by striking a balance between best practice and "good enough", correctness and common sense. When in doubt, ask, "what would the standard library do?" 5. Have approachable documentation with at least one helpful :mod:`doctest`, links to relevant standard library functionality, as well as any 3rd-party packages that provide further capabilities. Finally, boltons should be substantial implementations of commonly trivialized stumbling blocks and not the other way around. The larger the problem solved, the less likely the functionality is suitable for inclusion in boltons; boltons are fundamental and self-contained, not sweeping and architecture-defining. Themes of ``boltons`` --------------------- ``boltons`` has had a wide variety of inspirations over the years, but a definite set of themes have emerged: 1. From the Python docs: 1. :mod:`~boltons.queueutils` - `heapq docs`_ 2. :mod:`~boltons.iterutils` - `itertools docs`_ 3. :mod:`~boltons.timeutils` - `datetime docs`_ 2. Reimplementations and tweaks of the standard library: 1. :func:`boltons.fileutils.copytree` - :func:`shutil.copytree` 2. :class:`boltons.namedutils.namedtuple` - :func:`collections.namedtuple` 3. One-off implementations discovered in multiple other libraries' ``utils.py`` or equivalent 1. :func:`boltons.strutils.slugify` 2. :func:`boltons.strutils.bytes2human` 3. :func:`boltons.timeutils.relative_time` 4. More powerful multi-purpose data structures 1. :class:`boltons.dictutils.OrderedMultiDict` 2. :class:`boltons.setutils.IndexedSet` 3. :class:`boltons.listutils.BList` 4. :class:`boltons.namedutils.namedlist` 5. :class:`boltons.tableutils.Table` 5. Personal practice and experience 1. :mod:`boltons.debugutils` 2. :mod:`boltons.gcutils` 3. :mod:`boltons.tbutils` .. _heapq docs: https://docs.python.org/2/library/heapq.html#priority-queue-implementation-notes .. _itertools docs: https://docs.python.org/2/library/itertools.html#recipes .. _datetime docs: https://docs.python.org/2/library/datetime.html#tzinfo-objects boltons-25.0.0/docs/cacheutils.rst000066400000000000000000000034151475005545200170760ustar00rootroot00000000000000``cacheutils`` - Caches and caching =================================== .. automodule:: boltons.cacheutils Least-Recently Inserted (LRI) ----------------------------- The :class:`LRI` is the simpler cache, implementing a very simple first-in, first-out (FIFO) approach to cache eviction. If the use case calls for simple, very-low overhead caching, such as somewhat expensive local operations (e.g., string operations), then the LRI is likely the right choice. .. autoclass:: boltons.cacheutils.LRI :members: Least-Recently Used (LRU) ------------------------- The :class:`LRU` is the more advanced cache, but it's still quite simple. When it reaches capacity, a new insertion replaces the least-recently used item. This strategy makes the LRU a more effective cache than the LRI for a wide variety of applications, but also entails more operations for all of its APIs, especially reads. Unlike the :class:`LRI`, the LRU has threadsafety built in. .. autoclass:: boltons.cacheutils.LRU :members: Automatic function caching -------------------------- Continuing in the theme of cache tunability and experimentation, ``cacheutils`` also offers a pluggable way to cache function return values: the :func:`cached` function decorator and the :func:`cachedmethod` method decorator. .. autofunction:: boltons.cacheutils.cached .. autofunction:: boltons.cacheutils.cachedmethod Similar functionality can be found in Python 3.4's :func:`functools.lru_cache` decorator, but the functools approach does not support the same cache strategy modification, nor does it support sharing the cache object across multiple functions. .. autofunction:: boltons.cacheutils.cachedproperty Threshold-bounded Counting -------------------------- .. autoclass:: boltons.cacheutils.ThresholdCounter :members: boltons-25.0.0/docs/conf.py000066400000000000000000000230031475005545200155120ustar00rootroot00000000000000# # boltons documentation build configuration file, created by # sphinx-quickstart on Sat Mar 21 00:34:18 2015. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import os import sys import sphinx from pprint import pprint # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. CUR_PATH = os.path.dirname(os.path.abspath(__file__)) PROJECT_PATH = os.path.abspath(CUR_PATH + '/../') PACKAGE_PATH = os.path.abspath(CUR_PATH + '/../boltons/') sys.path.insert(0, PROJECT_PATH) sys.path.insert(0, PACKAGE_PATH) pprint(os.environ) def get_mod_stats(): # TODO: docstring percentage. import pkgutil from boltons.funcutils import get_module_callables mod_count = 0 tot_type_count = 0 tot_func_count = 0 ignore = lambda attr_name: attr_name.startswith('_') for _, mod_name, _ in pkgutil.iter_modules([PACKAGE_PATH]): if not mod_name.endswith('utils'): continue mod = __import__(mod_name) types, funcs = get_module_callables(mod, ignore=ignore) if not len(types) and not len(funcs): continue mod_count += 1 tot_type_count += len(types) tot_func_count += len(funcs) ret = (mod_count, tot_type_count, tot_func_count) print ('==== %s modules ==== %s types ==== %s funcs ====' % ret) return ret B_MOD_COUNT, B_TYPE_COUNT, B_FUNC_COUNT = get_mod_stats() rst_epilog = """ .. |b_mod_count| replace:: {mod_count} .. |b_type_count| replace:: {type_count} .. |b_func_count| replace:: {func_count} """.format(mod_count=B_MOD_COUNT, type_count=B_TYPE_COUNT, func_count=B_FUNC_COUNT) # -- General configuration ------------------------------------------------ autosummary_generate = True # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', ] # Read the Docs is version 1.2 as of writing if sphinx.version_info[:2] < (1, 3): extensions.append('sphinxcontrib.napoleon') else: extensions.append('sphinx.ext.napoleon') # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # General information about the project. project = 'boltons' copyright = '2025, Mahmoud Hashemi' author = 'Mahmoud Hashemi' version = '25.0' release = '25.0.0' if os.name != 'nt': today_fmt = '%B %d, %Y' exclude_patterns = ['_build'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'python': ('https://docs.python.org/', None)} # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. on_rtd = os.environ.get('READTHEDOCS', None) == 'True' html_theme = 'sphinx_rtd_theme' if not on_rtd: # only import and set the theme if we're building docs locally import sphinx_rtd_theme html_theme_path = ['_themes', sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # TEMP: see https://github.com/rtfd/readthedocs.org/issues/1692 # Add RTD Theme Path. #if 'html_theme_path' in globals(): # html_theme_path.append('/home/docs/checkouts/readthedocs.org/readthedocs/templates/sphinx') #else: # html_theme_path = ['_themes', '/home/docs/checkouts/readthedocs.org/readthedocs/templates/sphinx'] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. #html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' #html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # Now only 'ja' uses this config value #html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. #html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. htmlhelp_basename = 'boltonsdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', # Latex figure (float) alignment #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'boltons.tex', 'boltons Documentation', 'Mahmoud Hashemi', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'boltons', 'boltons Documentation', [author], 1) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'boltons', 'boltons Documentation', author, 'boltons', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. #texinfo_no_detailmenu = False boltons-25.0.0/docs/debugutils.rst000066400000000000000000000002161475005545200171150ustar00rootroot00000000000000``debugutils`` - Debugging utilities ==================================== .. automodule:: boltons.debugutils :members: :undoc-members: boltons-25.0.0/docs/dictutils.rst000066400000000000000000000002131475005545200167470ustar00rootroot00000000000000``dictutils`` - Mapping types (OMD) =================================== .. automodule:: boltons.dictutils :members: :undoc-members: boltons-25.0.0/docs/ecoutils.rst000066400000000000000000000001651475005545200166000ustar00rootroot00000000000000``ecoutils`` - Ecosystem analytics ================================== .. automodule:: boltons.ecoutils :members: boltons-25.0.0/docs/fileutils.rst000066400000000000000000000032151475005545200167500ustar00rootroot00000000000000``fileutils`` - Filesystem helpers ================================== .. automodule:: boltons.fileutils Creating, Finding, and Copying ------------------------------ Python's :mod:`os`, :mod:`os.path`, and :mod:`shutil` modules provide good coverage of file wrangling fundamentals, and these functions help close a few remaining gaps. .. autofunction:: boltons.fileutils.mkdir_p .. autofunction:: boltons.fileutils.iter_find_files .. autofunction:: boltons.fileutils.copytree .. autofunction:: boltons.fileutils.rotate_file Atomic File Saving ------------------ Ideally, the road to success should never put current progress at risk. And that's exactly why :func:`atomic_save` and :class:`AtomicSaver` exist. Using the same API as a writable file, all output is saved to a temporary file, and when the file is closed, the old file is replaced by the new file in a single system call, portable across all major operating systems. No more partially-written or partially-overwritten files. .. autofunction:: boltons.fileutils.atomic_save .. autoclass:: boltons.fileutils.AtomicSaver .. autofunction:: boltons.fileutils.atomic_rename .. autofunction:: boltons.fileutils.replace File Permissions ---------------- Linux, BSD, Mac OS, and other Unix-like operating systems all share a simple, foundational file permission structure that is commonly complicit in accidental access denial, as well as file leakage. :class:`FilePerms` was built to increase clarity and cut down on permission-related accidents when working with files from Python code. .. autoclass:: boltons.fileutils.FilePerms Miscellaneous ------------- .. autoclass:: boltons.fileutils.DummyFile boltons-25.0.0/docs/formatutils.rst000066400000000000000000000002331475005545200173160ustar00rootroot00000000000000``formatutils`` - ``str.format()`` toolbox ========================================== .. automodule:: boltons.formatutils :members: :undoc-members: boltons-25.0.0/docs/funcutils.rst000066400000000000000000000021631475005545200167650ustar00rootroot00000000000000``funcutils`` - ``functools`` fixes =================================== .. automodule:: boltons.funcutils .. contents:: Sections :depth: 3 :local: Decoration ---------- `Decorators`_ are among Python's most elegant and succinct language features, and boltons adds one special function to make them even more powerful. .. _Decorators: https://en.wikipedia.org/wiki/Python_syntax_and_semantics#Decorators .. autofunction:: wraps Function construction --------------------- Functions are so key to programming in Python that there will even arise times where Python functions must be constructed in Python. Thankfully, Python is a dynamic enough to make this possible. Boltons makes it easy. .. autoclass:: FunctionBuilder :members: Improved ``partial`` -------------------- .. autoclass:: partial .. autoclass:: InstancePartial .. autoclass:: CachedInstancePartial Miscellaneous metaprogramming ----------------------------- .. autofunction:: copy_function .. autofunction:: dir_dict .. autofunction:: mro_items .. autofunction:: format_invocation .. autofunction:: format_exp_repr .. autofunction:: format_nonexp_repr boltons-25.0.0/docs/gcutils.rst000066400000000000000000000002171475005545200164210ustar00rootroot00000000000000``gcutils`` - Garbage collecting tools ====================================== .. automodule:: boltons.gcutils :members: :undoc-members: boltons-25.0.0/docs/index.rst000066400000000000000000000103501475005545200160550ustar00rootroot00000000000000.. boltons documentation master file, created on Sat Mar 21 00:34:18 2015. boltons ======= *boltons should be builtins.* |release| |calver| |changelog| **Boltons** is a set of pure-Python utilities in the same spirit as — and yet conspicuously missing from — `the standard library`_, including: * :func:`Atomic file saving `, bolted on with :mod:`~boltons.fileutils` * A highly-optimized :class:`~boltons.dictutils.OrderedMultiDict`, in :mod:`~boltons.dictutils` * Two types of :class:`~boltons.queueutils.PriorityQueue`, in :mod:`~boltons.queueutils` * :func:`Chunked ` and :func:`windowed ` iteration, in :mod:`~boltons.iterutils` * A full-featured :class:`~boltons.tbutils.TracebackInfo` type, for representing stack traces, in :mod:`~boltons.tbutils` * A lightweight :class:`UTC timezone ` available in :mod:`~boltons.timeutils`. * Recursive mapping for nested data transforms, with :func:`remap ` And that's just a small selection. As of |today|, ``boltons`` is |b_type_count| types and |b_func_count| functions, spread across |b_mod_count| modules. See them all in the :ref:`genindex`, and see what's new by `checking the CHANGELOG`_. .. counts are appx 50, 75, and 23, respectively, as of initial docs writing .. in mid 2016, the counts are now 62, 112, and 25, respectively .. _the standard library: https://docs.python.org/3/library/index.html .. _checking the CHANGELOG: https://github.com/mahmoud/boltons/blob/master/CHANGELOG.md .. |release| image:: https://img.shields.io/pypi/v/boltons.svg :target: https://pypi.python.org/pypi/boltons .. |calver| image:: https://img.shields.io/badge/calver-YY.MINOR.MICRO-22bfda.svg :target: http://calver.org .. |changelog| image:: https://img.shields.io/badge/CHANGELOG-UPDATED-b84ad6.svg :target: https://github.com/mahmoud/boltons/blob/master/CHANGELOG.md Installation and Integration ---------------------------- Boltons can be added to a project in a few ways. There's the obvious one:: pip install boltons On macOS, it can also be installed via `MacPorts`_:: sudo port install py-boltons Then dozens of boltons are just an import away:: from boltons.cacheutils import LRU lru_cache = LRU() lru_cache['result'] = 'success' Due to the nature of utilities, application developers might want to consider other integration options. See the :ref:`Integration ` section of the architecture document for more details. Boltons is tested against Python 3.7-3.13, as well as PyPy3. .. _MacPorts: https://ports.macports.org/port/py-boltons/summary Third-party packages -------------------- The majority of boltons strive to be "good enough" for a wide range of basic uses, leaving advanced use cases to Python's `myriad specialized 3rd-party libraries`_. In many cases the respective ``boltons`` module will describe 3rd-party alternatives worth investigating when use cases outgrow ``boltons``. If you've found a natural "next-step" library worth mentioning, :ref:`consider filing an issue `! .. _myriad specialized 3rd-party libraries: https://pypi.python.org/pypi .. _gaps: Gaps ---- Found something missing in the standard library that should be in ``boltons``? Found something missing in ``boltons``? First, take a moment to read the very brief :doc:`architecture` statement to make sure the functionality would be a good fit. Then, if you are very motivated, submit `a Pull Request`_. Otherwise, submit a short feature request on `the Issues page`_, and we will figure something out. .. _a Pull Request: https://github.com/mahmoud/boltons/pulls .. _the Issues Page: https://github.com/mahmoud/boltons/issues Section listing --------------- .. toctree:: :maxdepth: 2 architecture cacheutils debugutils dictutils ecoutils fileutils formatutils funcutils gcutils ioutils iterutils jsonutils listutils mathutils mboxutils namedutils pathutils queueutils setutils socketutils statsutils strutils tableutils tbutils timeutils typeutils urlutils (For a quick reference you can ctrl-F, see the :ref:`genindex`.) boltons-25.0.0/docs/ioutils.rst000066400000000000000000000055671475005545200164540ustar00rootroot00000000000000``ioutils`` - Input/output enhancements ======================================= .. automodule:: boltons.ioutils Spooled Temporary Files ----------------------- Spooled Temporary Files are file-like objects that start out mapped to in-memory objects, but automatically roll over to a temporary file once they reach a certain (configurable) threshold. Unfortunately the built-in SpooledTemporaryFile class in Python does not implement the exact API that some common classes like StringIO do. SpooledTemporaryFile also spools all of it's in-memory files as cStringIO instances. cStringIO instances cannot be deep-copied, and they don't work with the zip library either. This along with the incompatible api makes it useless for several use-cases. To combat this but still gain the memory savings and usefulness of a true spooled file-like-object, two custom classes have been implemented which have a compatible API. .. _spooledbytesio: SpooledBytesIO ^^^^^^^^^^^^^^ .. autoclass:: boltons.ioutils.SpooledBytesIO .. _spooledstringio: SpooledStringIO ^^^^^^^^^^^^^^^ .. autoclass:: boltons.ioutils.SpooledStringIO Examples -------- It's not uncommon to find excessive usage of StringIO in older Python code. A SpooledTemporaryFile would be a nice replacement if one wanted to reduce memory overhead, but unfortunately its api differs too much. This is a good candidate for :ref:`spooledbytesio` as it is api compatible and thus may be used as a drop-in replacement. Old Code:: flo = StringIO() flo.write(gigantic_string) Updated:: from boltons.ioutils import SpooledBytesIO flo = SpooledBytesIO() flo.write(gigantic_string) Another good use case is downloading a file from some remote location. It's nice to keep it in memory if it's small, but writing a large file into memory can make servers quite grumpy. If the file being downloaded happens to be a zip file then things are worse. You can't use a normal SpooledTemporaryFile because it isn't compatible. A :ref:`spooledbytesio` instance is a good alternative. Here is a simple example using the requests library to download a zip file:: from zipfile import ZipFile import requests from boltons import ioutils # Using a context manager with stream=True ensures the connection is closed. See: # http://docs.python-requests.org/en/master/user/advanced/#body-content-workflow with requests.get("http://127.0.0.1/test_file.zip", stream=True) as r: if r.status_code == 200: with ioutils.SpooledBytesIO() as flo: for chunk in r.iter_content(chunk_size=64000): flo.write(chunk) flo.seek(0) zip_doc = ZipFile(flo) # Print all the files in the zip print(zip_doc.namelist()) Multiple Files -------------- .. _multifilereader: MultiFileReader ^^^^^^^^^^^^^^^ .. autoclass:: boltons.ioutils.MultiFileReader boltons-25.0.0/docs/iterutils.rst000066400000000000000000000055741475005545200170060ustar00rootroot00000000000000``iterutils`` - ``itertools`` improvements ========================================== .. automodule:: boltons.iterutils .. contents:: Sections :depth: 3 :local: .. _iteration: Iteration --------- These are generators and convenient :class:`list`-producing counterparts comprising several common patterns of iteration not present in the standard library. .. autofunction:: chunked .. autofunction:: chunked_iter .. autofunction:: chunk_ranges .. autofunction:: pairwise .. autofunction:: pairwise_iter .. autofunction:: windowed .. autofunction:: windowed_iter .. autofunction:: unique .. autofunction:: unique_iter .. autofunction:: redundant Stripping and splitting ----------------------- A couple of :class:`str`-inspired mechanics that have come in handy on iterables, too: .. autofunction:: split .. autofunction:: split_iter .. autofunction:: strip .. autofunction:: strip_iter .. autofunction:: lstrip .. autofunction:: lstrip_iter .. autofunction:: rstrip .. autofunction:: rstrip_iter Nested ------ Nested data structures are common. Yet virtually all of Python's compact iteration tools work with flat data: list comprehensions, map/filter, generator expressions, itertools, even other iterutils. The functions below make working with nested iterables and other containers as succinct and powerful as Python itself. .. autofunction:: remap .. autofunction:: get_path .. autofunction:: research .. autofunction:: flatten .. autofunction:: flatten_iter Numeric ------- Number sequences are an obvious target of Python iteration, such as the built-in :func:`range`, and :func:`itertools.count`. Like the :ref:`iteration` members above, these return iterators and lists, but take numeric inputs instead of iterables. .. autofunction:: backoff .. autofunction:: backoff_iter .. autofunction:: frange .. autofunction:: xfrange Categorization -------------- These functions operate on iterables, dividing into groups based on a given condition. .. autofunction:: bucketize .. autofunction:: partition Sorting ------- The built-in :func:`sorted()` is great, but what do you do when you want to partially override the sort order? .. autofunction:: soft_sorted .. autofunction:: untyped_sorted Reduction --------- :func:`reduce` is a powerful function, but it is also very open-ended and not always the most readable. The standard library recognized this with the addition of :func:`sum`, :func:`all`, and :func:`any`. All these functions take a basic operator (``+``, ``and``, and ``or``) and use the operator to turn an iterable into a single value. Functions in this category follow that same spirit, turning iterables like lists into single values: .. autofunction:: one .. autofunction:: first .. autofunction:: same Type Checks ----------- In the same vein as the feature-checking builtin, :func:`callable`. .. autofunction:: is_iterable .. autofunction:: is_scalar .. autofunction:: is_collection boltons-25.0.0/docs/jsonutils.rst000066400000000000000000000002071475005545200170000ustar00rootroot00000000000000``jsonutils`` - JSON interactions ================================= .. automodule:: boltons.jsonutils :members: :undoc-members: boltons-25.0.0/docs/listutils.rst000066400000000000000000000002151475005545200170010ustar00rootroot00000000000000``listutils`` - ``list`` derivatives ==================================== .. automodule:: boltons.listutils :members: :undoc-members: boltons-25.0.0/docs/make.bat000066400000000000000000000161161475005545200156270ustar00rootroot00000000000000@ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=_build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . set I18NSPHINXOPTS=%SPHINXOPTS% . if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. xml to make Docutils-native XML files echo. pseudoxml to make pseudoxml-XML files for display purposes echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled echo. coverage to run coverage check of the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) REM Check if sphinx-build is available and fallback to Python version if any %SPHINXBUILD% 2> nul if errorlevel 9009 goto sphinx_python goto sphinx_ok :sphinx_python set SPHINXBUILD=python -m sphinx.__init__ %SPHINXBUILD% 2> nul if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) :sphinx_ok if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\boltons.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\boltons.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdf" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf cd %~dp0 echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdfja" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf-ja cd %~dp0 echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) if "%1" == "coverage" ( %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage if errorlevel 1 exit /b 1 echo. echo.Testing of coverage in the sources finished, look at the ^ results in %BUILDDIR%/coverage/python.txt. goto end ) if "%1" == "xml" ( %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml if errorlevel 1 exit /b 1 echo. echo.Build finished. The XML files are in %BUILDDIR%/xml. goto end ) if "%1" == "pseudoxml" ( %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml if errorlevel 1 exit /b 1 echo. echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. goto end ) :end boltons-25.0.0/docs/mathutils.rst000066400000000000000000000014171475005545200167640ustar00rootroot00000000000000``mathutils`` - Mathematical functions ====================================== .. automodule:: boltons.mathutils .. autoclass:: boltons.mathutils.Bits :members: :undoc-members: Alternative Rounding Functions ------------------------------ .. autofunction:: boltons.mathutils.clamp .. autofunction:: boltons.mathutils.ceil .. autofunction:: boltons.mathutils.floor Note: :func:`ceil` and :func:`floor` functions are based on `this example`_ using from the :mod:`bisect` module in the standard library. Refer to this `StackOverflow Answer`_ for further information regarding the performance impact of this approach. .. _this example: https://docs.python.org/3/library/bisect.html#searching-sorted-lists .. _StackOverflow Answer: http://stackoverflow.com/a/12141511/811740 boltons-25.0.0/docs/mboxutils.rst000066400000000000000000000002211475005545200167700ustar00rootroot00000000000000``mboxutils`` - Unix mailbox utilities ====================================== .. automodule:: boltons.mboxutils :members: :undoc-members: boltons-25.0.0/docs/namedutils.rst000066400000000000000000000002241475005545200171120ustar00rootroot00000000000000``namedutils`` - Lightweight containers ======================================= .. automodule:: boltons.namedutils :members: :undoc-members: boltons-25.0.0/docs/pathutils.rst000066400000000000000000000002011475005545200167550ustar00rootroot00000000000000``pathutils`` - Filesystem fun ============================== .. automodule:: boltons.pathutils :members: :undoc-members: boltons-25.0.0/docs/queueutils.rst000066400000000000000000000002061475005545200171520ustar00rootroot00000000000000``queueutils`` - Priority queues ================================ .. automodule:: boltons.queueutils :members: :undoc-members: boltons-25.0.0/docs/requirements-rtd.txt000066400000000000000000000000501475005545200202630ustar00rootroot00000000000000sphinxcontrib-napoleon sphinx-rtd-theme boltons-25.0.0/docs/setutils.rst000066400000000000000000000001651475005545200166250ustar00rootroot00000000000000``setutils`` - ``IndexedSet`` type ================================== .. automodule:: boltons.setutils :members: boltons-25.0.0/docs/socketutils.rst000066400000000000000000000016441475005545200173250ustar00rootroot00000000000000``socketutils`` - ``socket`` wrappers ===================================== .. automodule:: boltons.socketutils BufferedSocket -------------- .. autoclass:: boltons.socketutils.BufferedSocket :members: Exceptions ^^^^^^^^^^ These are a few exceptions that derive from :exc:`socket.error` and provide clearer code and better error messages. .. autoexception:: boltons.socketutils.Error .. autoexception:: boltons.socketutils.Timeout .. autoexception:: boltons.socketutils.ConnectionClosed .. autoexception:: boltons.socketutils.MessageTooLong Netstring --------- .. autoclass:: boltons.socketutils.NetstringSocket :members: Nestring Exceptions ^^^^^^^^^^^^^^^^^^^ These are a few higher-level exceptions for Netstring connections. .. autoexception:: boltons.socketutils.NetstringProtocolError .. autoexception:: boltons.socketutils.NetstringInvalidSize .. autoexception:: boltons.socketutils.NetstringMessageTooLong boltons-25.0.0/docs/statsutils.rst000066400000000000000000000002261475005545200171660ustar00rootroot00000000000000``statsutils`` - Statistics fundamentals ======================================== .. automodule:: boltons.statsutils :members: :undoc-members: boltons-25.0.0/docs/strutils.rst000066400000000000000000000002051475005545200166350ustar00rootroot00000000000000``strutils`` - Text manipulation ================================= .. automodule:: boltons.strutils :members: :undoc-members: boltons-25.0.0/docs/tableutils.rst000066400000000000000000000002121475005545200171120ustar00rootroot00000000000000``tableutils`` - 2D data structure ================================== .. automodule:: boltons.tableutils :members: :undoc-members: boltons-25.0.0/docs/tbutils.rst000066400000000000000000000002001475005545200164250ustar00rootroot00000000000000``tbutils`` - Tracebacks and call stacks ======================================== .. automodule:: boltons.tbutils :members: boltons-25.0.0/docs/timeutils.rst000066400000000000000000000034451475005545200167740ustar00rootroot00000000000000``timeutils`` - ``datetime`` additions ====================================== .. automodule:: boltons.timeutils .. autofunction:: daterange .. autofunction:: isoparse .. autofunction:: parse_timedelta .. autofunction:: strpdate .. autofunction:: total_seconds .. autofunction:: dt_to_timestamp .. autofunction:: relative_time .. autofunction:: decimal_relative_time General timezones ----------------- By default, :class:`datetime.datetime` objects are "naïve", meaning they lack attached timezone information. These objects can be useful for many operations, but many operations require timezone-aware datetimes. The two most important timezones in programming are Coordinated Universal Time (`UTC`_) and the local timezone of the host running your code. Boltons provides two :class:`datetime.tzinfo` subtypes for working with them: .. _UTC: https://en.wikipedia.org/wiki/Coordinated_Universal_Time .. note:: These days, Python has a `built-in UTC`_, and the UTC tzinfo here, while equivalent, is just for backwards compat. .. autoattribute:: boltons.timeutils.UTC .. autodata:: boltons.timeutils.LocalTZ .. autoclass:: boltons.timeutils.ConstantTZInfo .. _built-in UTC: https://docs.python.org/3/library/datetime.html#datetime.timezone.utc US timezones ------------ These four US timezones were implemented in the :mod:`datetime` documentation and have been reproduced here in boltons for convenience. More in-depth support is provided by `dateutil`_ and `pytz`_. .. _dateutil: https://dateutil.readthedocs.io/en/stable/index.html .. _pytz: https://pypi.python.org/pypi/pytz .. autoattribute:: boltons.timeutils.Eastern .. autoattribute:: boltons.timeutils.Central .. autoattribute:: boltons.timeutils.Mountain .. autoattribute:: boltons.timeutils.Pacific .. autoclass:: boltons.timeutils.USTimeZone boltons-25.0.0/docs/typeutils.rst000066400000000000000000000001541475005545200170110ustar00rootroot00000000000000``typeutils`` - Type handling ============================= .. automodule:: boltons.typeutils :members: boltons-25.0.0/docs/urlutils.rst000066400000000000000000000135031475005545200166340ustar00rootroot00000000000000``urlutils`` - Structured URL ============================= .. automodule:: boltons.urlutils .. versionadded:: 17.2 The URL type ------------ .. autoclass:: boltons.urlutils.URL .. attribute:: URL.scheme The scheme is an ASCII string, normally lowercase, which specifies the semantics for the rest of the URL, as well as network protocol in many cases. For example, "http" in "http://hatnote.com". .. attribute:: URL.username The username is a string used by some schemes for authentication. For example, "public" in "ftp://public@example.com". .. attribute:: URL.password The password is a string also used for authentication. Technically deprecated by `RFC 3986 Section 7.5`_, they're still used in cases when the URL is private or the password is public. For example "password" in "db://private:password@127.0.0.1". .. _RFC 3986 Section 7.5: https://tools.ietf.org/html/rfc3986#section-7.5 .. attribute:: URL.host The host is a string used to resolve the network location of the resource, either empty, a domain, or IP address (v4 or v6). "example.com", "127.0.0.1", and "::1" are all good examples of host strings. Per spec, fully-encoded output from :attr:`~URL.to_text()` is `IDNA encoded`_ for compatibility with DNS. .. _IDNA encoded: https://en.wikipedia.org/wiki/Internationalized_domain_name#Example_of_IDNA_encoding .. attribute:: URL.port The port is an integer used, along with :attr:`host`, in connecting to network locations. ``8080`` is the port in "http://localhost:8080/index.html". .. note:: As is the case for 80 for HTTP and 22 for SSH, many schemes have default ports, and `Section 3.2.3 of RFC 3986`_ states that when a URL's port is the same as its scheme's default port, the port should not be emitted:: >>> URL(u'https://github.com:443/mahmoud/boltons').to_text() u'https://github.com/mahmoud/boltons' Custom schemes can register their port with :func:`~boltons.urlutils.register_scheme`. See :attr:`URL.default_port` for more info. .. _Section 3.2.3 of RFC 3986: https://tools.ietf.org/html/rfc3986#section-3.2.3 .. attribute:: URL.path The string starting with the first leading slash after the authority part of the URL, ending with the first question mark. Often percent-quoted for network use. "/a/b/c" is the path of "http://example.com/a/b/c?d=e". .. attribute:: URL.path_parts The :class:`tuple` form of :attr:`~URL.path`, split on slashes. Empty slash segments are preserved, including that of the leading slash:: >>> url = URL(u'http://example.com/a/b/c') >>> url.path_parts (u'', u'a', u'b', u'c') .. attribute:: URL.query_params An instance of :class:`~boltons.urlutils.QueryParamDict`, an :class:`~boltons.dictutils.OrderedMultiDict` subtype, mapping textual keys and values which follow the first question mark after the :attr:`path`. Also available as the handy alias ``qp``:: >>> url = URL('http://boltons.readthedocs.io/en/latest/?utm_source=docs&sphinx=ok') >>> url.qp.keys() [u'utm_source', u'sphinx'] Also percent-encoded for network use cases. .. attribute:: URL.fragment The string following the first '#' after the :attr:`query_params` until the end of the URL. It has no inherent internal structure, and is percent-quoted. .. automethod:: URL.from_parts .. automethod:: URL.to_text .. autoattribute:: URL.default_port .. autoattribute:: URL.uses_netloc .. automethod:: URL.get_authority .. automethod:: URL.normalize .. automethod:: URL.navigate Related functions ~~~~~~~~~~~~~~~~~ .. autofunction:: boltons.urlutils.find_all_links .. autofunction:: boltons.urlutils.register_scheme Low-level functions ------------------- A slew of functions used internally by :class:`~boltons.urlutils.URL`. .. autofunction:: boltons.urlutils.parse_url .. autofunction:: boltons.urlutils.parse_host .. autofunction:: boltons.urlutils.parse_qsl .. autofunction:: boltons.urlutils.resolve_path_parts .. autoclass:: boltons.urlutils.QueryParamDict :members: Quoting ~~~~~~~ URLs have many parts, and almost as many individual "quoting" (encoding) strategies. .. autofunction:: boltons.urlutils.quote_userinfo_part .. autofunction:: boltons.urlutils.quote_path_part .. autofunction:: boltons.urlutils.quote_query_part .. autofunction:: boltons.urlutils.quote_fragment_part There is however, only one unquoting strategy: .. autofunction:: boltons.urlutils.unquote Useful constants ---------------- .. attribute:: boltons.urlutils.SCHEME_PORT_MAP A mapping of URL schemes to their protocols' default ports. Painstakingly assembled from the `IANA scheme registry`_, `port registry`_, and independent research. Keys are lowercase strings, values are integers or None, with None indicating that the scheme does not have a default port (or may not support ports at all):: >>> boltons.urlutils.SCHEME_PORT_MAP['http'] 80 >>> boltons.urlutils.SCHEME_PORT_MAP['file'] None See :attr:`URL.port` for more info on how it is used. See :attr:`~boltons.urlutils.NO_NETLOC_SCHEMES` for more scheme info. Also `available in JSON`_. .. _IANA scheme registry: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml .. _port registry: https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml .. _available in JSON: https://gist.github.com/mahmoud/2fe281a8daaff26cfe9c15d2c5bf5c8b .. attribute:: boltons.urlutils.NO_NETLOC_SCHEMES This is a :class:`set` of schemes explicitly do not support network resolution, such as "mailto" and "urn". boltons-25.0.0/misc/000077500000000000000000000000001475005545200142205ustar00rootroot00000000000000boltons-25.0.0/misc/bench_omd.py000066400000000000000000000061671475005545200165220ustar00rootroot00000000000000import string import sys sys.path.append('/home/mahmoud/projects/lithoxyl/') import time import lithoxyl from lithoxyl import sinks, logger from dictutils import OMD, FastIterOrderedMultiDict from collections import OrderedDict as OD q_sink = lithoxyl.sinks.QuantileSink() log = lithoxyl.logger.BaseLogger('bench_stats', sinks=[q_sink]) times = 10 size = 10000 redun = 2 _rng = range(size / redun) * redun _unique_keys = set(_rng) _bad_rng = range(size, size + size) _pairs = zip(_rng, _rng) # order matters because 'pop' mutates _shared_actions = ('setitem', 'iteritems', 'iterkeys', 'getitem', 'keyerror', 'pop') _multi_actions = ('multi_iteritems',) _all_actions = ('init',) + _multi_actions + _shared_actions MULTI_IMPLS = (FastIterOrderedMultiDict, OMD) try: from werkzeug.datastructures import MultiDict, OrderedMultiDict as WOMD MULTI_IMPLS += (WOMD, MultiDict) except ImportError: print('(installing werkzeug is recommended for full comparison)') ALL_IMPLS = MULTI_IMPLS + (OD, dict) def bench(): for impl in ALL_IMPLS: q_sink = lithoxyl.sinks.QuantileSink() impl_name = '.'.join([impl.__module__, impl.__name__]) log = lithoxyl.logger.BaseLogger(impl_name, sinks=[q_sink]) print() print('+ %s' % impl_name) for _ in range(times): with log.info('total'): for _ in range(times): with log.info('init'): target_dict = impl(_pairs) if impl in MULTI_IMPLS: _actions = _multi_actions + _shared_actions else: _actions = _shared_actions for action in _actions: action_func = globals()['_do_' + action] with log.info(action): action_func(target_dict) for action in _all_actions: try: best_msecs = q_sink.qas[impl_name][action].min * 1000 print(f' - {action} - {best_msecs:g} ms') except KeyError: pass best_msecs = q_sink.qas[impl_name]['total'].min * 1000 median_msecs = q_sink.qas[impl_name]['total'].median * 1000 print(' > ran %d loops of %d items each, best time: %g ms, median time: %g ms' % (times, size, best_msecs, median_msecs)) print() return def _do_setitem(target_dict): for k, i in enumerate(string.lowercase): target_dict[k] = i def _do_iteritems(target_dict): [_ for _ in target_dict.iteritems()] def _do_iterkeys(target_dict): [_ for _ in target_dict.iterkeys()] def _do_multi_iteritems(target_dict): [_ for _ in target_dict.iteritems(multi=True)] def _do_multi_iterkeys(target_dict): [_ for _ in target_dict.iterkeys(multi=True)] def _do_getitem(target_dict): for k in _rng: target_dict[k] def _do_keyerror(target_dict): for k in _bad_rng: try: target_dict[k] except KeyError: pass def _do_pop(target_dict): for k in _unique_keys: target_dict.pop(k) assert not target_dict if __name__ == '__main__': bench() boltons-25.0.0/misc/linkify_changelog.py000066400000000000000000000024361475005545200202530ustar00rootroot00000000000000import re import sys BASE_RTD_URL = 'http://boltons.readthedocs.org/en/latest/' BASE_ISSUES_URL = 'https://github.com/mahmoud/boltons/issues/' _issues_re = re.compile(r'#(\d+)') _member_re = re.compile(r'((\w+utils)\.[a-zA-Z0-9_.]+)') URL_MAP = {} def sub_member_match(match): full_name = match.group(1) mod_name = match.group(2) url = BASE_RTD_URL + mod_name + '.html#boltons.' + full_name ret = f'[{full_name}][{full_name}]' URL_MAP[full_name] = url # print ret return ret def sub_issue_match(match): link_text = match.group(0) issue_num = match.group(1) link_target = 'i%s' % issue_num link_url = BASE_ISSUES_URL + issue_num ret = f'[{link_text}][{link_target}]' URL_MAP[link_target] = link_url # print ret return ret def main(): try: cl_filename = sys.argv[1] except IndexError: cl_filename = 'CHANGELOG.md' cl_text = open(cl_filename).read().decode('utf-8') ret = _member_re.sub(sub_member_match, cl_text) ret = _issues_re.sub(sub_issue_match, ret) link_map_lines = [] for (name, url) in sorted(URL_MAP.items()): link_map_lines.append(f'[{name}]: {url}') print(ret) print() print() print('\n'.join(link_map_lines)) print() if __name__ == '__main__': main() boltons-25.0.0/misc/table_html_app.py000066400000000000000000000127451475005545200175560ustar00rootroot00000000000000import os import json import clastic from clastic import Application from clastic.render import JSONRender from clastic.middleware import GetParamMiddleware from clastic import Response from clastic.sinter import getargspec from boltons.tableutils import Table _DATA = json.load(open('meta_stats.json')) _CUR_PATH = os.path.dirname(os.path.abspath(clastic.__file__)) _CA_PATH = _CUR_PATH + '/_clastic_assets' _CSS_PATH = _CA_PATH + '/common.css' _STYLE = open(_CSS_PATH).read() def fetch_json(url): import urllib2 response = urllib2.urlopen(url) content = response.read() data = json.loads(content) return data class AutoTableRenderer: _html_doctype = '' _html_wrapper, _html_wrapper_close = '', '' _html_table_tag = '' _html_style_content = _STYLE def __init__(self, max_depth=4, orientation='auto'): self.max_depth = max_depth self.orientation = orientation def _html_format_ep(self, route): # TODO: callable object endpoints? module_name = route.endpoint.__module__ try: func_name = route.endpoint.func_name except: func_name = repr(route.endpoint) args, _, _, _ = getargspec(route.endpoint) argstr = ', '.join(args) title = ('

%s
%s(%s)

' % (module_name, func_name, argstr)) return title def __call__(self, context, _route): content_parts = [self._html_wrapper] if self._html_style_content: content_parts.extend(['']) content_parts.append('') title = self._html_format_ep(_route) content_parts.append(title) table = Table.from_data(context, max_depth=self.max_depth) table._html_table_tag = self._html_table_tag content = table.to_html(max_depth=self.max_depth, orientation=self.orientation) content_parts.append(content) content_parts.append('') content_parts.append(self._html_wrapper_close) return Response('\n'.join(content_parts), mimetype='text/html') class BasicRender: _default_mime = 'application/json' _format_mime_map = {'html': 'text/html', 'json': 'application/json'} def __init__(self, dev_mode=True, qp_name='format'): self.qp_name = qp_name self.json_render = JSONRender(dev_mode=dev_mode) self.autotable_render = AutoTableRenderer() def render_response(self, request, context, _route): from collections.abc import Sized if isinstance(context, str): # already serialized if self._guess_json(context): return Response(context, mimetype="application/json") elif '=3.2,<4"] build-backend = "flit_core.buildapi" # A brief checklist for release: # * tox # * git commit (if applicable) # * Bump pyproject.toml version off of -dev # * git commit -a -m "bump version for x.y.z release" # * rm -rf dist/* # * flit build # * flit publish # * bump docs/conf.py version # * git commit # * git tag -a x.y.z -m "brief summary" # * write CHANGELOG # * git commit # * bump pyproject.toml version onto n+1 dev # * git commit # * git push boltons-25.0.0/requirements-test.txt000066400000000000000000000001721475005545200175260ustar00rootroot00000000000000coverage==7.2.7 pytest==7.4.4; python_version < "3.8" pytest==8.3.4; python_version >= "3.8" pytest-cov==4.1.0 tox==4.8.0 boltons-25.0.0/setup.cfg000066400000000000000000000000431475005545200151030ustar00rootroot00000000000000[metadata] license_files = LICENSE boltons-25.0.0/tests/000077500000000000000000000000001475005545200144275ustar00rootroot00000000000000boltons-25.0.0/tests/.coveragerc000066400000000000000000000003441475005545200165510ustar00rootroot00000000000000; command for getting a coverage report: ; pytest --doctest-modules --cov=boltons --cov-report html --cov-config tests/.coveragerc tests/ boltons/ ; (run from project directory) [run] branch = True [html] directory = htmlcov boltons-25.0.0/tests/__init__.py000066400000000000000000000000001475005545200165260ustar00rootroot00000000000000boltons-25.0.0/tests/conftest.py000066400000000000000000000014161475005545200166300ustar00rootroot00000000000000import sys import re _VERSION_MARKER = re.compile(r'_py(?P\d)(?P\d)?') def pytest_ignore_collect(path, config): """ Ignore tests that end with _pyX, where X does not equal this interpreter's major version. """ filename = path.basename modulename = filename.split('.', 1)[0] match = _VERSION_MARKER.search(modulename) if not match: return False major_version = match.group('major_version') minor_version = match.group('minor_version') if minor_version: version_match = (int(major_version), int(minor_version)) == sys.version_info[:2] else: version_match = int(major_version) == sys.version_info[0] return not version_match # because this is an _ignore_ (not an include) boltons-25.0.0/tests/jsonl_test_data.txt000066400000000000000000000000471475005545200203460ustar00rootroot00000000000000{} {"1": 1} {"2": 2} {"3": 3} {"4": 4} boltons-25.0.0/tests/newlines_test_data.txt000066400000000000000000000000461475005545200210440ustar00rootroot00000000000000a b c d e f g hijklmnop qrstuv wxyz boltons-25.0.0/tests/test_cacheutils.py000066400000000000000000000306441475005545200201730ustar00rootroot00000000000000import string import sys from abc import abstractmethod, ABCMeta import pytest from boltons.cacheutils import LRU, LRI, cached, cachedmethod, cachedproperty, MinIDMap, ThresholdCounter class CountingCallable: def __init__(self): self.call_count = 0 def __call__(self, *a, **kw): self.call_count += 1 return self.call_count def test_lru_add(): cache = LRU(max_size=3) for i in range(4): cache[i] = i assert len(cache) == 3 assert 0 not in cache def test_lri(): cache_size = 10 bc = LRI(cache_size, on_miss=lambda k: k.upper()) for idx, char in enumerate(string.ascii_letters): x = bc[char] assert x == char.upper() least_recent_insert_index = idx - cache_size if least_recent_insert_index >= 0: # least recently inserted object evicted assert len(bc) == cache_size for char in string.ascii_letters[least_recent_insert_index+1:idx]: assert char in bc # test that reinserting an existing key changes eviction behavior bc[string.ascii_letters[-cache_size+1]] = "new value" least_recently_inserted_key = string.ascii_letters[-cache_size+2] bc["unreferenced_key"] = "value" keys_in_cache = [ string.ascii_letters[i] for i in range(-cache_size + 1, 0) if string.ascii_letters[i] != least_recently_inserted_key ] keys_in_cache.append("unreferenced_key") assert len(bc) == cache_size for k in keys_in_cache: assert k in bc def test_lri_cache_eviction(): """ Regression test Original LRI implementation had a bug where the specified cache size only supported `max_size` number of inserts to the cache, rather than support `max_size` number of keys in the cache. This would result in some unintuitive behavior, where a key is evicted recently inserted value would be evicted from the cache if the key inserted was inserted `max_size` keys earlier. """ test_cache = LRI(2) # dequeue: (key1); dict keys: (key1) test_cache["key1"] = "value1" # dequeue: (key1, key1); dict keys: (key1) test_cache["key1"] = "value1" # dequeue: (key1, key1, key2); dict keys: (key1, key2) test_cache["key2"] = "value2" # dequeue: (key1, key2, key3); dict keys: (key2, key3) test_cache["key3"] = "value3" # will error here since we evict key1 from the cache and it doesn't # exist in the dict anymore test_cache["key3"] = "value3" def test_cache_sizes_on_repeat_insertions(): """ Regression test Original LRI implementation had an unbounded size of memory regardless of the value for its `max_size` parameter due to a naive insertion algorithm onto an underlying deque data structure. To prevent memory leaks, this test will assert that a cache does not grow past its max size given values of a uniform memory footprint """ caches_to_test = (LRU, LRI) for cache_type in caches_to_test: test_cache = cache_type(2) # note strings are used to force allocation of memory test_cache["key1"] = "1" test_cache["key2"] = "1" initial_list_size = len(test_cache._get_flattened_ll()) for k in test_cache: for __ in range(100): test_cache[k] = "1" list_size_after_inserts = len(test_cache._get_flattened_ll()) assert initial_list_size == list_size_after_inserts def test_lru_basic(): lru = LRU(max_size=1) repr(lru) # sanity lru['hi'] = 0 lru['bye'] = 1 assert len(lru) == 1 lru['bye'] assert lru.get('hi') is None del lru['bye'] assert 'bye' not in lru assert len(lru) == 0 assert not lru try: lru.pop('bye') except KeyError: pass else: assert False default = object() assert lru.pop('bye', default) is default try: lru.popitem() except KeyError: pass else: assert False lru['another'] = 1 assert lru.popitem() == ('another', 1) lru['yet_another'] = 2 assert lru.pop('yet_another') == 2 lru['yet_another'] = 3 assert lru.pop('yet_another', default) == 3 lru['yet_another'] = 4 lru.clear() assert not lru lru['yet_another'] = 5 second_lru = LRU(max_size=1) assert lru.copy() == lru second_lru['yet_another'] = 5 assert second_lru == lru assert lru == second_lru lru.update(LRU(max_size=2, values=[('a', 1), ('b', 2)])) assert len(lru) == 1 assert 'yet_another' not in lru lru.setdefault('x', 2) assert dict(lru) == {'x': 2} lru.setdefault('x', 3) assert dict(lru) == {'x': 2} assert lru != second_lru assert second_lru != lru @pytest.mark.parametrize("lru_class", [LRU, LRI]) def test_lru_dict_replacement(lru_class): # see issue #348 cache = lru_class() # Add an entry. cache['a'] = 1 # Normal __getitem__ access. assert cache['a'] == 1 # passes. # Convert to dict. assert dict(cache) == {'a': 1} # passes. # Another way to access the only value. assert list(cache.values())[0] == 1 # passes. # Replace the existing 'a' entry with a new value. cache['a'] = 200 # __getitem__ works as expected. assert cache['a'] == 200 # passes. # Both dict and accessing via values() return the old entry: 1. assert dict(cache) == {'a': 200} # fails. assert list(cache.values())[0] == 200 def test_lru_with_dupes(): SIZE = 2 lru = LRU(max_size=SIZE) for i in [0, 0, 1, 1, 2, 2]: lru[i] = i assert _test_linkage(lru._anchor, SIZE + 1), 'linked list invalid' def test_lru_with_dupes_2(): "From Issue #55, h/t github.com/mt" SIZE = 3 lru = LRU(max_size=SIZE) keys = ['A', 'A', 'B', 'A', 'C', 'B', 'D', 'E'] for i, k in enumerate(keys): lru[k] = 'HIT' assert _test_linkage(lru._anchor, SIZE + 1), 'linked list invalid' return def _test_linkage(dll, max_count=10000, prev_idx=0, next_idx=1): """A function to test basic invariants of doubly-linked lists (with links made of Python lists). 1. Test that the list is not longer than a certain length 2. That the forward links (indicated by `next_idx`) correspond to the backward links (indicated by `prev_idx`). The `dll` parameter is the root/anchor link of the list. """ start = cur = dll i = 0 prev = None while 1: if i > max_count: raise Exception("did not return to anchor link after %r rounds" % max_count) if prev is not None and cur is start: break prev = cur cur = cur[next_idx] if cur[prev_idx] is not prev: raise Exception('prev_idx does not point to prev at i = %r' % i) i += 1 return True def test_cached_dec(): lru = LRU() inner_func = CountingCallable() func = cached(lru)(inner_func) assert inner_func.call_count == 0 func() assert inner_func.call_count == 1 func() assert inner_func.call_count == 1 func('man door hand hook car door') assert inner_func.call_count == 2 return def test_unscoped_cached_dec(): lru = LRU() inner_func = CountingCallable() func = cached(lru)(inner_func) other_inner_func = CountingCallable() other_func = cached(lru)(other_inner_func) assert inner_func.call_count == 0 func('a') assert inner_func.call_count == 1 func('a') other_func('a') assert other_inner_func.call_count == 0 return def test_callable_cached_dec(): lru = LRU() get_lru = lambda: lru inner_func = CountingCallable() func = cached(get_lru)(inner_func) assert inner_func.call_count == 0 func() assert inner_func.call_count == 1 func() assert inner_func.call_count == 1 lru.clear() func() assert inner_func.call_count == 2 func() assert inner_func.call_count == 2 print(repr(func)) return def test_cachedmethod(): class Car: def __init__(self, cache=None): self.h_cache = LRI() if cache is None else cache self.door_count = 0 self.hook_count = 0 self.hand_count = 0 @cachedmethod('h_cache') def hand(self, *a, **kw): self.hand_count += 1 @cachedmethod(lambda obj: obj.h_cache) def hook(self, *a, **kw): self.hook_count += 1 @cachedmethod('h_cache', scoped=False) def door(self, *a, **kw): self.door_count += 1 car = Car() # attribute name-style assert car.hand_count == 0 car.hand('h', a='nd') assert car.hand_count == 1 car.hand('h', a='nd') assert car.hand_count == 1 # callable-style assert car.hook_count == 0 car.hook() assert car.hook_count == 1 car.hook() assert car.hook_count == 1 # Ensure that non-selfish caches share the cache nicely lru = LRU() car_one = Car(cache=lru) assert car_one.door_count == 0 car_one.door('bob') assert car_one.door_count == 1 car_one.door('bob') assert car_one.door_count == 1 car_two = Car(cache=lru) assert car_two.door_count == 0 car_two.door('bob') assert car_two.door_count == 0 # try unbound for kicks Car.door(Car(), 'bob') # always check the repr print(repr(car_two.door)) print(repr(Car.door)) return def test_cachedmethod_maintains_func_abstraction(): ABC = ABCMeta('ABC', (object,), {}) class Car(ABC): def __init__(self, cache=None): self.h_cache = LRI() if cache is None else cache self.hand_count = 0 @cachedmethod('h_cache') @abstractmethod def hand(self, *a, **kw): self.hand_count += 1 with pytest.raises(TypeError): Car() def test_cachedproperty(): class Proper: def __init__(self): self.expensive_func = CountingCallable() @cachedproperty def useful_attr(self): """Useful DocString""" return self.expensive_func() prop = Proper() assert prop.expensive_func.call_count == 0 assert prop.useful_attr == 1 assert prop.expensive_func.call_count == 1 assert prop.useful_attr == 1 assert prop.expensive_func.call_count == 1 # Make sure original DocString is accessible assert Proper.useful_attr.__doc__ == "Useful DocString" prop.useful_attr += 1 # would not be possible with normal properties assert prop.useful_attr == 2 delattr(prop, 'useful_attr') assert prop.expensive_func.call_count == 1 assert prop.useful_attr assert prop.expensive_func.call_count == 2 repr(Proper.useful_attr) def test_cachedproperty_maintains_func_abstraction(): ABC = ABCMeta('ABC', (object,), {}) class AbstractExpensiveCalculator(ABC): @cachedproperty @abstractmethod def calculate(self): pass with pytest.raises(TypeError): AbstractExpensiveCalculator() def test_min_id_map(): import sys if '__pypy__' in sys.builtin_module_names: return # TODO: pypy still needs some work midm = MinIDMap() class Foo: def __init__(self, val): self.val = val # use this circular array to have them periodically collected ref_wheel = [None, None, None] for i in range(1000): nxt = Foo(i) ref_wheel[i % len(ref_wheel)] = nxt assert midm.get(nxt) <= len(ref_wheel) if i % 10 == 0: midm.drop(nxt) # test __iter__ assert sorted([f.val for f in list(midm)[:10]]) == list(range(1000 - len(ref_wheel), 1000)) items = list(midm.iteritems()) assert isinstance(items[0][0], Foo) assert sorted(item[1] for item in items) == list(range(0, len(ref_wheel))) def test_threshold_counter(): tc = ThresholdCounter(threshold=0.1) tc.add(1) assert tc.items() == [(1, 1)] tc.update([2] * 10) assert tc.get(1) == 0 tc.add(5) assert 5 in tc assert len(list(tc.elements())) == 11 assert tc.threshold == 0.1 assert tc.get_common_count() == 11 assert tc.get_uncommon_count() == 1 # bc the initial 1 was dropped assert round(tc.get_commonality(), 2) == 0.92 assert tc.most_common(2) == [(2, 10), (5, 1)] assert list(tc.elements()) == ([2] * 10) + [5] assert tc[2] == 10 assert len(tc) == 2 assert sorted(tc.keys()) == [2, 5] assert sorted(tc.values()) == [1, 10] assert sorted(tc.items()) == [(2, 10), (5, 1)] boltons-25.0.0/tests/test_debugutils_trace.py000066400000000000000000000032651475005545200213730ustar00rootroot00000000000000from collections import namedtuple from pytest import raises from boltons.debugutils import wrap_trace def test_trace_dict(): target = {} wrapped = wrap_trace(target) assert target is not wrapped assert isinstance(wrapped, dict) wrapped['a'] = 'A' assert target['a'] == 'A' assert len(wrapped) == len(target) wrapped.pop('a') assert 'a' not in target with raises(AttributeError): wrapped.nonexistent_attr = 'nope' return def test_trace_bytes(): target = b'Hello' wrapped = wrap_trace(target) assert target is not wrapped assert isinstance(wrapped, bytes) assert len(wrapped) == len(target) assert wrapped.decode('utf-8') == 'Hello' assert wrapped.lower() == target.lower() def test_trace_exc(): class TestException(Exception): pass target = TestException('exceptions can be a good thing') wrapped = wrap_trace(target) try: raise wrapped except TestException as te: assert te.args == target.args def test_trace_which(): class Config: def __init__(self, value): self.value = value config = Config('first') wrapped = wrap_trace(config, which='__setattr__') wrapped.value = 'second' assert config.value == 'second' def test_trace_namedtuple(): TargetType = namedtuple('TargetType', 'x y z') target = TargetType(1, 2, 3) wrapped = wrap_trace(target) assert wrapped == (1, 2, 3) def test_trace_oldstyle(): class Oldie: test = object() def get_test(self): return self.test oldie = Oldie() wrapped = wrap_trace(oldie) assert wrapped.get_test() is oldie.test return boltons-25.0.0/tests/test_dictutils.py000066400000000000000000000316531475005545200200540ustar00rootroot00000000000000import sys import pytest from boltons.dictutils import OMD, OneToOne, ManyToMany, FrozenDict, subdict, FrozenHashError _ITEMSETS = [[], [('a', 1), ('b', 2), ('c', 3)], [('A', 'One'), ('A', 'One'), ('A', 'One')], [('Z', -1), ('Y', -2), ('Y', -2)], [('a', 1), ('b', 2), ('a', 3), ('c', 4)]] def test_dict_init(): d = dict(_ITEMSETS[1]) omd = OMD(d) assert omd['a'] == 1 assert omd['b'] == 2 assert omd['c'] == 3 assert len(omd) == 3 assert omd.getlist('a') == [1] assert omd == d def test_todict(): omd = OMD(_ITEMSETS[2]) assert len(omd) == 1 assert omd['A'] == 'One' d = omd.todict(multi=True) assert len(d) == 1 assert d['A'] == ['One', 'One', 'One'] flat = omd.todict() assert flat['A'] == 'One' for itemset in _ITEMSETS: omd = OMD(itemset) d = dict(itemset) flat = omd.todict() assert flat == d return def test_eq(): omd = OMD(_ITEMSETS[3]) assert omd == omd assert not (omd != omd) omd2 = OMD(_ITEMSETS[3]) assert omd == omd2 assert omd2 == omd assert not (omd != omd2) d = dict(_ITEMSETS[3]) assert d == omd omd3 = OMD(d) assert omd != omd3 def test_copy(): for itemset in _ITEMSETS: omd = OMD(itemset) omd_c = omd.copy() assert omd == omd_c if omd_c: omd_c.pop(itemset[0][0]) assert omd != omd_c return def test_omd_pickle(): import pickle empty = OMD() pickled = pickle.dumps(empty) roundtripped = pickle.loads(pickled) assert roundtripped == empty nonempty = OMD([('a', 1), ('b', 2), ('b', 3)]) roundtripped = pickle.loads(pickle.dumps(nonempty)) assert roundtripped == nonempty assert roundtripped.getlist('b') == [2, 3] def test_clear(): for itemset in _ITEMSETS: omd = OMD(itemset) omd.clear() assert len(omd) == 0 assert not omd omd.clear() assert not omd omd['a'] = 22 assert omd omd.clear() assert not omd def test_types(): from collections.abc import MutableMapping omd = OMD() assert isinstance(omd, dict) assert isinstance(omd, MutableMapping) def test_multi_correctness(): size = 100 redun = 5 _rng = range(size) _rng_redun = list(range(size//redun)) * redun _pairs = zip(_rng_redun, _rng) omd = OMD(_pairs) for multi in (True, False): vals = [x[1] for x in omd.iteritems(multi=multi)] strictly_ascending = all([x < y for x, y in zip(vals, vals[1:])]) assert strictly_ascending return def test_kv_consistency(): for itemset in _ITEMSETS: omd = OMD(itemset) for multi in (True, False): items = omd.items(multi=multi) keys = omd.keys(multi=multi) values = omd.values(multi=multi) assert keys == [x[0] for x in items] assert values == [x[1] for x in items] return def test_update_basic(): omd = OMD(_ITEMSETS[1]) omd2 = OMD({'a': 10}) omd.update(omd2) assert omd['a'] == 10 assert omd.getlist('a') == [10] omd2_c = omd2.copy() omd2_c.pop('a') assert omd2 != omd2_c def test_update(): for first, second in zip(_ITEMSETS, _ITEMSETS[1:]): omd1 = OMD(first) omd2 = OMD(second) ref1 = dict(first) ref2 = dict(second) omd1.update(omd2) ref1.update(ref2) assert omd1.todict() == ref1 omd1_repr = repr(omd1) omd1.update(omd1) assert omd1_repr == repr(omd1) def test_update_extend(): for first, second in zip(_ITEMSETS, _ITEMSETS[1:] + [[]]): omd1 = OMD(first) omd2 = OMD(second) ref = dict(first) orig_keys = set(omd1) ref.update(second) omd1.update_extend(omd2) for k in omd2: assert len(omd1.getlist(k)) >= len(omd2.getlist(k)) assert omd1.todict() == ref assert orig_keys <= set(omd1) def test_invert(): for items in _ITEMSETS: omd = OMD(items) iomd = omd.inverted() # first, test all items made the jump assert len(omd.items(multi=True)) == len(iomd.items(multi=True)) for val in omd.values(): assert val in iomd # all values present as keys def test_poplast(): for items in _ITEMSETS[1:]: omd = OMD(items) assert omd.poplast() == items[-1][-1] def test_pop(): omd = OMD() omd.add('even', 0) omd.add('odd', 1) omd.add('even', 2) assert omd.pop('odd') == 1 assert omd.pop('odd', 99) == 99 try: omd.pop('odd') assert False except KeyError: pass assert len(omd) == 1 assert len(omd.items(multi=True)) == 2 def test_addlist(): omd = OMD() omd.addlist('a', [1, 2, 3]) omd.addlist('b', [4, 5]) assert omd.keys() == ['a', 'b'] assert len(list(omd.iteritems(multi=True))) == 5 e_omd = OMD() e_omd.addlist('a', []) assert e_omd.keys() == [] assert len(list(e_omd.iteritems(multi=True))) == 0 def test_pop_all(): omd = OMD() omd.add('even', 0) omd.add('odd', 1) omd.add('even', 2) assert omd.popall('odd') == [1] assert len(omd) == 1 try: omd.popall('odd') assert False except KeyError: pass assert omd.popall('odd', None) is None assert omd.popall('even') == [0, 2] assert len(omd) == 0 assert omd.popall('nope', None) is None assert OMD().popall('', None) is None def test_reversed(): from collections import OrderedDict for items in _ITEMSETS: omd = OMD(items) od = OrderedDict(items) for ik, ok in zip(reversed(od), reversed(omd)): assert ik == ok r100 = range(100) omd = OMD(zip(r100, r100)) for i in r100: omd.add(i, i) r100 = list(reversed(r100)) assert list(reversed(omd)) == r100 omd = OMD() assert list(reversed(omd)) == list(reversed(omd.keys())) for i in range(20): for j in range(i): omd.add(i, i) assert list(reversed(omd)) == list(reversed(omd.keys())) def test_setdefault(): omd = OMD() empty_list = [] x = omd.setdefault('1', empty_list) assert x is empty_list y = omd.setdefault('2') assert y is None assert omd.setdefault('1', None) is empty_list e_omd = OMD() e_omd.addlist(1, []) assert e_omd.popall(1, None) is None assert len(e_omd) == 0 def test_ior(): omd_a = OMD(_ITEMSETS[1]) omd_b = OMD(_ITEMSETS[2]) omd_c = OMD(_ITEMSETS[1]) omd_a_id = id(omd_a) omd_a |= omd_b omd_c.update(omd_b) assert omd_a_id == id(omd_a) assert omd_a == omd_c ## END OMD TESTS import string def test_subdict(): cap_map = {x: x.upper() for x in string.hexdigits} assert len(cap_map) == 22 assert len(subdict(cap_map, drop=['a'])) == 21 assert 'a' not in subdict(cap_map, drop=['a']) assert len(subdict(cap_map, keep=['a', 'b'])) == 2 def test_subdict_keep_type(): omd = OMD({'a': 'A'}) assert subdict(omd) == omd assert type(subdict(omd)) is OMD def test_one_to_one(): e = OneToOne({1:2}) def ck(val, inv): assert (e, e.inv) == (val, inv) ck({1:2}, {2:1}) e[2] = 3 ck({1:2, 2:3}, {3:2, 2:1}) e.clear() ck({}, {}) e[1] = 1 ck({1:1}, {1:1}) e[1] = 2 ck({1:2}, {2:1}) e[3] = 2 ck({3:2}, {2:3}) del e[3] ck({}, {}) e[1] = 2 e.inv[2] = 3 ck({3:2}, {2:3}) del e.inv[2] ck({}, {}) assert OneToOne({1:2, 3:4}).copy().inv == {2:1, 4:3} e[1] = 2 e.pop(1) ck({}, {}) e[1] = 2 e.inv.pop(2) ck({}, {}) e[1] = 2 e.popitem() ck({}, {}) e.setdefault(1) ck({1: None}, {None: 1}) e.inv.setdefault(2) ck({1: None, None: 2}, {None: 1, 2: None}) e.clear() e.update({}) ck({}, {}) e.update({1: 2}, cat="dog") ck({1:2, "cat":"dog"}, {2: 1, "dog":"cat"}) # try various overlapping values oto = OneToOne({'a': 0, 'b': 0}) assert len(oto) == len(oto.inv) == 1 oto['c'] = 0 assert len(oto) == len(oto.inv) == 1 assert oto.inv[0] == 'c' oto.update({'z': 0, 'y': 0}) assert len(oto) == len(oto.inv) == 1 # test out unique classmethod with pytest.raises(ValueError): OneToOne.unique({'a': 0, 'b': 0}) return def test_many_to_many(): m2m = ManyToMany() assert len(m2m) == 0 assert not m2m m2m.add(1, 'a') assert m2m m2m.add(1, 'b') assert len(m2m) == 1 assert m2m[1] == frozenset(['a', 'b']) assert m2m.inv['a'] == frozenset([1]) del m2m.inv['a'] assert m2m[1] == frozenset(['b']) assert 1 in m2m del m2m.inv['b'] assert 1 not in m2m m2m[1] = ('a', 'b') assert set(m2m.iteritems()) == {(1, 'a'), (1, 'b')} m2m.remove(1, 'a') m2m.remove(1, 'b') assert 1 not in m2m m2m.update([(1, 'a'), (2, 'b')]) assert m2m.get(2) == frozenset(('b',)) assert m2m.get(3) == frozenset(()) assert ManyToMany(['ab', 'cd']) == ManyToMany(['ba', 'dc']).inv assert ManyToMany(ManyToMany(['ab', 'cd'])) == ManyToMany(['ab', 'cd']) m2m = ManyToMany({'a': 'b'}) m2m.replace('a', 'B') # also test the repr while we're at it assert repr(m2m) == repr(ManyToMany([("B", "b")])) assert repr(m2m).startswith('ManyToMany(') and 'B' in repr(m2m) def test_frozendict(): efd = FrozenDict() assert isinstance(efd, dict) assert len(efd) == 0 assert not efd assert repr(efd) == "FrozenDict({})" data = {'a': 'A', 'b': 'B'} fd = FrozenDict(data) assert bool(fd) assert len(fd) == 2 assert fd['a'] == 'A' assert fd['b'] == 'B' assert sorted(fd.keys()) == ['a', 'b'] assert sorted(fd.values()) == ['A', 'B'] assert sorted(fd.items()) == [('a', 'A'), ('b', 'B')] assert 'a' in fd assert 'c' not in fd assert hash(fd) fd_map = {'fd': fd} assert fd_map['fd'] is fd with pytest.raises(TypeError): fd['c'] = 'C' with pytest.raises(TypeError): del fd['a'] with pytest.raises(TypeError): fd.update(x='X') with pytest.raises(TypeError): fd.setdefault('x', []) with pytest.raises(TypeError): fd.pop('c') with pytest.raises(TypeError): fd.popitem() with pytest.raises(TypeError): fd.clear() import pickle fkfd = FrozenDict.fromkeys([2, 4, 6], value=0) assert pickle.loads(pickle.dumps(fkfd)) == fkfd assert sorted(fkfd.updated({8: 0}).keys()) == [2, 4, 6, 8] # try something with an unhashable value unfd = FrozenDict({'a': ['A']}) with pytest.raises(TypeError) as excinfo: {unfd: 'val'} assert excinfo.type is FrozenHashError with pytest.raises(TypeError) as excinfo2: {unfd: 'val'} assert excinfo.value is excinfo2.value # test cached exception return @pytest.mark.skipif(sys.version_info < (3, 9), reason="requires python3.9 or higher") def test_frozendict_ior(): data = {'a': 'A', 'b': 'B'} fd = FrozenDict(data) with pytest.raises(TypeError, match=".*FrozenDict.*immutable.*"): fd |= fd def test_frozendict_api(): # all the read-only methods that are fine through_methods = ['__class__', '__cmp__', '__contains__', '__delattr__', '__dir__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__getitem__', '__getstate__', '__gt__', '__init__', '__iter__', '__le__', '__len__', '__lt__', '__ne__', '__new__', '__or__', '__reduce__', '__reversed__', '__ror__', '__setattr__', '__sizeof__', '__str__', 'copy', 'get', 'has_key', 'items', 'iteritems', 'iterkeys', 'itervalues', 'keys', 'values', 'viewitems', 'viewkeys', 'viewvalues'] fd = FrozenDict() ret = [] for attrname in dir(fd): if attrname == '_hash': # in the dir, even before it's set continue attr = getattr(fd, attrname) if not callable(attr): continue if getattr(FrozenDict, attrname) == getattr(dict, attrname, None) and attrname not in through_methods: assert attrname == False ret.append(attrname) import copy assert copy.copy(fd) is fd boltons-25.0.0/tests/test_ecoutils.py000066400000000000000000000004161475005545200176700ustar00rootroot00000000000000import sys from boltons import ecoutils def test_basic(): # basic sanity test prof = ecoutils.get_profile() assert prof['python']['bin'] == sys.executable def test_scrub(): prof = ecoutils.get_profile(scrub=True) assert prof['username'] == '-' boltons-25.0.0/tests/test_fileutils.py000066400000000000000000000052151475005545200200430ustar00rootroot00000000000000import os.path from boltons import fileutils from boltons.fileutils import FilePerms, iter_find_files from boltons.strutils import removeprefix BOLTONS_PATH = os.path.dirname(os.path.abspath(fileutils.__file__)) def test_fileperms(): up = FilePerms() up.other = '' up.user = 'xrw' up.group = 'rrrwx' try: up.other = 'nope' except ValueError: # correctly raised ValueError on invalid chars pass assert repr(up) == "FilePerms(user='rwx', group='rwx', other='')" assert up.user == 'rwx' assert oct(int(up)) == '0o770' assert int(FilePerms()) == 0 def test_iter_find_files(): def _to_baseless_list(paths): return [removeprefix(p, BOLTONS_PATH).lstrip(os.path.sep) for p in paths] assert 'fileutils.py' in _to_baseless_list(iter_find_files(BOLTONS_PATH, patterns=['*.py'])) boltons_parent = os.path.dirname(BOLTONS_PATH) assert 'fileutils.py' in _to_baseless_list(iter_find_files(boltons_parent, patterns=['*.py'])) assert 'fileutils.py' not in _to_baseless_list(iter_find_files(boltons_parent, patterns=['*.py'], max_depth=0)) def test_rotate_file_no_rotation(tmp_path): file_path = tmp_path / 'test_file.txt' fileutils.rotate_file(file_path) assert not file_path.exists() def test_rotate_file_one_rotation(tmp_path): file_path = tmp_path / 'test_file.txt' file_path.write_text('test content') assert file_path.exists() fileutils.rotate_file(file_path) assert not file_path.exists() assert (tmp_path / 'test_file.1.txt').exists() def test_rotate_file_full_rotation(tmp_path): file_path = tmp_path / 'test_file.txt' file_path.write_text('test content 0') for i in range(1, 5): cur_path = tmp_path / f'test_file.{i}.txt' cur_path.write_text(f'test content {i}') assert cur_path.exists() fileutils.rotate_file(file_path, keep=5) assert not file_path.exists() for i in range(1, 5): cur_path = tmp_path / f'test_file.{i}.txt' assert cur_path.read_text() == f'test content {i-1}' assert not (tmp_path / 'test_file.5.txt').exists() def test_rotate_file_full_rotation_no_ext(tmp_path): file_path = tmp_path / 'test_file' file_path.write_text('test content 0') for i in range(1, 5): cur_path = tmp_path / f'test_file.{i}' cur_path.write_text(f'test content {i}') assert cur_path.exists() fileutils.rotate_file(file_path, keep=5) assert not file_path.exists() for i in range(1, 5): cur_path = tmp_path / f'test_file.{i}' assert cur_path.read_text() == f'test content {i-1}' assert not (tmp_path / 'test_file.5').exists() boltons-25.0.0/tests/test_formatutils.py000066400000000000000000000040151475005545200204110ustar00rootroot00000000000000import re from collections import namedtuple from boltons.formatutils import (get_format_args, split_format_str, tokenize_format_str, infer_positional_format_args, DeferredValue as DV) PFAT = namedtuple("PositionalFormatArgTest", "fstr arg_vals res") _PFATS = [PFAT('{} {} {}', ('hi', 'hello', 'bye'), "hi hello bye"), PFAT('{:d} {}', (1, 2), "1 2"), PFAT('{!s} {!r}', ('str', 'repr'), "str 'repr'"), PFAT('{[hi]}, {.__name__!r}', ({'hi': 'hi'}, re), "hi, 're'"), PFAT('{{joek}} ({} {})', ('so', 'funny'), "{joek} (so funny)")] def test_pos_infer(): for i, (tmpl, args, res) in enumerate(_PFATS): converted = infer_positional_format_args(tmpl) assert converted.format(*args) == res _TEST_TMPLS = ["example 1: {hello}", "example 2: {hello:*10}", "example 3: {hello:*{width}}", "example 4: {hello!r:{fchar}{width}}, {width}, yes", "example 5: {0}, {1:d}, {2:f}, {1}", "example 6: {}, {}, {}, {1}"] def test_get_fstr_args(): results = [] for t in _TEST_TMPLS: inferred_t = infer_positional_format_args(t) res = get_format_args(inferred_t) assert res def test_split_fstr(): results = [] for t in _TEST_TMPLS: res = split_format_str(t) results.append(res) assert res def test_tokenize_format_str(): results = [] for t in _TEST_TMPLS: res = tokenize_format_str(t) results.append(res) assert res def test_deferredvalue(): def myfunc(): myfunc.called += 1 return 123 myfunc.called = 0 dv = DV(myfunc) assert str(dv) == '123' assert myfunc.called == 1 assert str(dv) == '123' assert myfunc.called == 1 dv.cache_value = False assert str(dv) == '123' assert myfunc.called == 2 assert str(dv) == '123' assert myfunc.called == 3 boltons-25.0.0/tests/test_funcutils.py000066400000000000000000000043361475005545200200620ustar00rootroot00000000000000from boltons.funcutils import (copy_function, total_ordering, format_invocation, InstancePartial, CachedInstancePartial, noop) class Greeter: def __init__(self, greeting): self.greeting = greeting def greet(self, excitement='.'): return self.greeting.capitalize() + excitement partial_greet = InstancePartial(greet, excitement='!') cached_partial_greet = CachedInstancePartial(greet, excitement='...') def native_greet(self): return self.greet(';') class SubGreeter(Greeter): pass def test_partials(): g = SubGreeter('hello') assert g.greet() == 'Hello.' assert g.native_greet() == 'Hello;' assert g.partial_greet() == 'Hello!' assert g.cached_partial_greet() == 'Hello...' assert CachedInstancePartial(g.greet, excitement='s')() == 'Hellos' g.native_greet = 'native reassigned' assert g.native_greet == 'native reassigned' g.partial_greet = 'partial reassigned' assert g.partial_greet == 'partial reassigned' g.cached_partial_greet = 'cached_partial reassigned' assert g.cached_partial_greet == 'cached_partial reassigned' def test_copy_function(): def callee(): return 1 callee_copy = copy_function(callee) assert callee is not callee_copy assert callee() == callee_copy() def test_total_ordering(): @total_ordering class Number: def __init__(self, val): self.val = int(val) def __gt__(self, other): return self.val > other def __eq__(self, other): return self.val == other num = Number(3) assert num > 0 assert num == 3 assert num < 5 assert num >= 2 assert num != 1 def test_format_invocation(): assert format_invocation('d') == "d()" assert format_invocation('f', ('a', 'b')) == "f('a', 'b')" assert format_invocation('g', (), {'x': 'y'}) == "g(x='y')" assert format_invocation('h', ('a', 'b'), {'x': 'y', 'z': 'zz'}) == "h('a', 'b', x='y', z='zz')" def test_noop(): assert noop() is None assert noop(1, 2) is None assert noop(a=1, b=2) is None boltons-25.0.0/tests/test_funcutils_fb.py000066400000000000000000000174131475005545200205310ustar00rootroot00000000000000import pytest from boltons.funcutils import wraps, FunctionBuilder def pita_wrap(flag=False): def cedar_dec(func): @wraps(func) def cedar_wrapper(*a, **kw): return (flag, func.__name__, func(*a, **kw)) return cedar_wrapper return cedar_dec def wrappable_func(a, b): return a, b def wrappable_varkw_func(a, b, **kw): return a, b def test_wraps_basic(): @pita_wrap(flag=True) def simple_func(): '''"""a tricky docstring"""''' return 'hello' assert simple_func() == (True, 'simple_func', 'hello') assert simple_func.__doc__ == '''"""a tricky docstring"""''' assert callable(simple_func.__wrapped__) assert simple_func.__wrapped__() == 'hello' assert simple_func.__wrapped__.__doc__ == '''"""a tricky docstring"""''' @pita_wrap(flag=False) def less_simple_func(arg='hello'): return arg assert less_simple_func() == (False, 'less_simple_func', 'hello') assert less_simple_func(arg='bye') == (False, 'less_simple_func', 'bye') with pytest.raises(TypeError): simple_func(no_such_arg='nope') @pita_wrap(flag=False) def default_non_roundtrippable_repr(x=lambda y: y + 1): return x(1) assert default_non_roundtrippable_repr() == ( False, 'default_non_roundtrippable_repr', 2) def test_wraps_injected(): def inject_string(func): @wraps(func, injected="a") def wrapped(*args, **kwargs): return func(1, *args, **kwargs) return wrapped assert inject_string(wrappable_func)(2) == (1, 2) def inject_list(func): @wraps(func, injected=["b"]) def wrapped(a, *args, **kwargs): return func(a, 2, *args, **kwargs) return wrapped assert inject_list(wrappable_func)(1) == (1, 2) def inject_nonexistent_arg(func): @wraps(func, injected=["X"]) def wrapped(*args, **kwargs): return func(*args, **kwargs) return wrapped with pytest.raises(ValueError): inject_nonexistent_arg(wrappable_func) def inject_missing_argument(func): @wraps(func, injected="c") def wrapped(*args, **kwargs): return func(1, *args, **kwargs) return wrapped def inject_misc_argument(func): # inject_to_varkw is default True, just being explicit @wraps(func, injected="c", inject_to_varkw=True) def wrapped(*args, **kwargs): return func(c=1, *args, **kwargs) return wrapped assert inject_misc_argument(wrappable_varkw_func)(1, 2) == (1, 2) def inject_misc_argument_no_varkw(func): @wraps(func, injected="c", inject_to_varkw=False) def wrapped(*args, **kwargs): return func(c=1, *args, **kwargs) return wrapped with pytest.raises(ValueError): inject_misc_argument_no_varkw(wrappable_varkw_func) def test_wraps_update_dict(): def updated_dict(func): @wraps(func, update_dict=True) def wrapped(*args, **kwargs): return func(*args, **kwargs) return wrapped def f(a, b): return a, b f.something = True assert getattr(updated_dict(f), 'something') def test_wraps_unknown_args(): def fails(func): @wraps(func, foo="bar") def wrapped(*args, **kwargs): return func(*args, **kwargs) return wrapped with pytest.raises(TypeError): fails(wrappable_func) def test_FunctionBuilder_invalid_args(): with pytest.raises(TypeError): FunctionBuilder(name="fails", foo="bar") def test_FunctionBuilder_invalid_body(): with pytest.raises(SyntaxError): FunctionBuilder(name="fails", body="*").get_func() def test_FunctionBuilder_modify(): fb = FunctionBuilder('return_five', doc='returns the integer 5', body='return 5') f = fb.get_func() assert f() == 5 fb.varkw = 'kw' f_kw = fb.get_func() assert f_kw(ignored_arg='ignored_val') == 5 def test_wraps_wrappers(): call_list = [] def call_list_appender(func): @wraps(func) def appender(*a, **kw): call_list.append((a, kw)) return func(*a, **kw) return appender with pytest.raises(TypeError): class Num: def __init__(self, num): self.num = num @call_list_appender @classmethod def added(cls, x, y=1): return cls(x + y) return def test_FunctionBuilder_add_arg(): fb = FunctionBuilder('return_five', doc='returns the integer 5', body='return 5') f = fb.get_func() assert f() == 5 fb.add_arg('val') f = fb.get_func() assert f(val='ignored') == 5 with pytest.raises(ValueError) as excinfo: fb.add_arg('val') excinfo.typename == 'ExistingArgument' fb = FunctionBuilder('return_val', doc='returns the value', body='return val') broken_func = fb.get_func() with pytest.raises(NameError): broken_func() fb.add_arg('val', default='default_val') better_func = fb.get_func() assert better_func() == 'default_val' assert better_func('positional') == 'positional' assert better_func(val='keyword') == 'keyword' def test_wraps_expected(): def expect_string(func): @wraps(func, expected="c") def wrapped(*args, **kwargs): args, c = args[:2], args[-1] return func(*args, **kwargs) + (c,) return wrapped expected_string = expect_string(wrappable_func) assert expected_string(1, 2, 3) == (1, 2, 3) with pytest.raises(TypeError) as excinfo: expected_string(1, 2) # a rough way of making sure we got the kind of error we expected assert 'argument' in repr(excinfo.value) def expect_list(func): @wraps(func, expected=["c"]) def wrapped(*args, **kwargs): args, c = args[:2], args[-1] return func(*args, **kwargs) + (c,) return wrapped assert expect_list(wrappable_func)(1, 2, c=4) == (1, 2, 4) def expect_pair(func): @wraps(func, expected=[('c', 5)]) def wrapped(*args, **kwargs): args, c = args[:2], args[-1] return func(*args, **kwargs) + (c,) return wrapped assert expect_pair(wrappable_func)(1, 2) == (1, 2, 5) def expect_dict(func): @wraps(func, expected={'c': 6}) def wrapped(*args, **kwargs): args, c = args[:2], args[-1] return func(*args, **kwargs) + (c,) return wrapped assert expect_dict(wrappable_func)(1, 2) == (1, 2, 6) def test_defaults_dict(): def example(req, test='default'): return req fb_example = FunctionBuilder.from_func(example) assert 'test' in fb_example.args dd = fb_example.get_defaults_dict() assert dd['test'] == 'default' assert 'req' not in dd def test_get_arg_names(): def example(req, test='default'): return req fb_example = FunctionBuilder.from_func(example) assert 'test' in fb_example.args assert fb_example.get_arg_names() == ('req', 'test') assert fb_example.get_arg_names(only_required=True) == ('req',) @pytest.mark.parametrize( "args, varargs, varkw, defaults, invocation_str, sig_str", [ (["a", "b"], None, None, None, "a, b", "(a, b)"), (None, "args", "kwargs", None, "*args, **kwargs", "(*args, **kwargs)"), ("a", None, None, dict(a="a"), "a", "(a)"), ], ) def test_get_invocation_sig_str( args, varargs, varkw, defaults, invocation_str, sig_str ): fb = FunctionBuilder( name='return_five', body='return 5', args=args, varargs=varargs, varkw=varkw, defaults=defaults ) assert fb.get_invocation_str() == invocation_str assert fb.get_sig_str() == sig_str boltons-25.0.0/tests/test_funcutils_fb_py3.py000066400000000000000000000220021475005545200213120ustar00rootroot00000000000000import time import inspect import functools from collections import defaultdict import pytest from boltons.funcutils import wraps, FunctionBuilder, update_wrapper, copy_function import boltons.funcutils as funcutils def wrappable_func(a, b): return a, b def wrappable_varkw_func(a, b, **kw): return a, b def pita_wrap(flag=False): def cedar_dec(func): @wraps(func) def cedar_wrapper(*a, **kw): return (flag, func.__name__, func(*a, **kw)) return cedar_wrapper return cedar_dec def test_wraps_py3(): @pita_wrap(flag=True) def annotations(a: int, b: float=1, c: defaultdict=()) -> defaultdict: return a, b, c assert annotations(0) == (True, "annotations", (0, 1, ())) assert annotations.__annotations__ == {'a': int, 'b': float, 'c': defaultdict, 'return': defaultdict} @pita_wrap(flag=False) def kwonly_arg(a, *, b, c=2): return a, b, c with pytest.raises(TypeError): kwonly_arg(0) assert kwonly_arg(0, b=1) == (False, "kwonly_arg", (0, 1, 2)) assert kwonly_arg(0, b=1, c=3) == (False, "kwonly_arg", (0, 1, 3)) @pita_wrap(flag=True) def kwonly_non_roundtrippable_repr(*, x=lambda y: y + 1): return x(1) assert kwonly_non_roundtrippable_repr() == ( True, 'kwonly_non_roundtrippable_repr', 2) def test_copy_function_kw_defaults_py3(): # test that the copy works with keyword-only defaults f = lambda x, *, y=2: x * y f_copy = copy_function(f) assert f(21) == f_copy(21) == 42 @pytest.mark.parametrize('partial_kind', (functools, funcutils)) def test_update_wrapper_partial(partial_kind): wrapper = partial_kind.partial(wrappable_varkw_func, b=1) fully_wrapped = update_wrapper(wrapper, wrappable_varkw_func) assert fully_wrapped(1) == (1, 1) def test_remove_kwonly_arg(): # example adapted from https://github.com/mahmoud/boltons/issues/123 def darkhelm_inject_loop(func): sig = inspect.signature(func) loop_param = sig.parameters['loop'].replace(default=None) sig = sig.replace(parameters=[loop_param]) def add_loop(args, kwargs): bargs = sig.bind(*args, **kwargs) bargs.apply_defaults() if bargs.arguments['loop'] is None: bargs.arguments['loop'] = "don't look at me, I just use gevent" return bargs.arguments def wrapper(*args, **kwargs): return func(**add_loop(args, kwargs)) return wraps(func, injected=['loop'])(wrapper) @darkhelm_inject_loop def example(test='default', *, loop='lol'): return loop fb_example = FunctionBuilder.from_func(example) assert 'test' in fb_example.args assert fb_example.get_defaults_dict()['test'] == 'default' assert 'loop' not in fb_example.kwonlyargs assert 'loop' not in fb_example.kwonlydefaults def test_defaults_dict(): def example(req, test='default', *, loop='lol'): return loop fb_example = FunctionBuilder.from_func(example) assert 'test' in fb_example.args dd = fb_example.get_defaults_dict() assert dd['test'] == 'default' assert dd['loop'] == 'lol' assert 'req' not in dd def test_get_arg_names(): def example(req, test='default', *, loop='lol'): return loop fb_example = FunctionBuilder.from_func(example) assert 'test' in fb_example.args assert fb_example.get_arg_names() == ('req', 'test', 'loop') assert fb_example.get_arg_names(only_required=True) == ('req',) @pytest.mark.parametrize('signature,should_match', [('a, *, b', True), ('a,*,b', True), ('a, * , b', True), ('a, *,\nb', True), ('a, *\n,b', True), ('a, b', False), ('a, *args', False), ('a, *args, **kwargs', False), ('*args', False), ('*args, **kwargs', False)]) def test_FunctionBuilder_KWONLY_MARKER(signature, should_match): """ _KWONLY_MARKER matches the keyword-only argument separator, regardless of whitespace. Note: it assumes the signature is valid Python. """ matched = bool(FunctionBuilder._KWONLY_MARKER.search(signature)) message = "{!r}: should_match was {}, but result was {}".format( signature, should_match, matched) assert bool(matched) == should_match, message def test_FunctionBuilder_add_arg_kwonly(): fb = FunctionBuilder('return_val', doc='returns the value', body='return val') broken_func = fb.get_func() with pytest.raises(NameError): broken_func() fb.add_arg('val', default='default_val', kwonly=True) better_func = fb.get_func() assert better_func() == 'default_val' with pytest.raises(ValueError): fb.add_arg('val') assert better_func(val='keyword') == 'keyword' with pytest.raises(TypeError): assert better_func('positional') return @pytest.mark.parametrize( "args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, invocation_str, sig_str", [ ( None, "args", "kwargs", None, "a", dict(a="a"), "*args, a=a, **kwargs", "(*args, a, **kwargs)", ) ], ) def test_get_invocation_sig_str( args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, invocation_str, sig_str, ): fb = FunctionBuilder( name="return_five", body="return 5", args=args, varargs=varargs, varkw=varkw, defaults=defaults, kwonlyargs=kwonlyargs, kwonlydefaults=kwonlydefaults, ) assert fb.get_invocation_str() == invocation_str assert fb.get_sig_str() == sig_str def test_wraps_inner_kwarg_only(): """from https://github.com/mahmoud/boltons/issues/261 mh responds to the issue: You'll notice that when kw-only args are involved the first time (wraps(f)(g)) it works fine. The other way around, however, wraps(g)(f) fails, because by the very nature of funcutils.wraps, you're trying to give f the same signature as g. And f's signature is not like g's. g supports positional b and f() does not. If you want to make a wrapper which converts a keyword-only argument to one that can be positional or keyword only, that'll require a different approach for now. A potential fix would be to pass all function arguments as keywords. But doubt that's the right direction, because, while I have yet to add positional argument only support, that'll definitely throw a wrench into things. """ from boltons.funcutils import wraps def g(a: float, b=10): return a * b def f(a: int, *, b=1): return a * b # all is well here... assert f(3) == 3 assert g(3) == 30 assert wraps(f)(g)(3) == 3 # yay, g got the f default (not so with functools.wraps!) # but this doesn't work with pytest.raises(TypeError): wraps(g)(f)(3) return def test_wraps_async(): # from https://github.com/mahmoud/boltons/issues/194 import asyncio def delayed(func): @wraps(func) async def wrapped(*args, **kw): await asyncio.sleep(1.0) return await func(*args, **kw) return wrapped async def f(): await asyncio.sleep(0.1) assert asyncio.iscoroutinefunction(f) f2 = delayed(f) assert asyncio.iscoroutinefunction(f2) # from https://github.com/mahmoud/boltons/pull/195 def yolo(): def make_time_decorator(wrapped): @wraps(wrapped) async def decorator(*args, **kw): return (await wrapped(*args, **kw)) return decorator return make_time_decorator @yolo() async def foo(x): await asyncio.sleep(x) start_time = time.monotonic() asyncio.run(foo(0.3)) duration = time.monotonic() - start_time # lol windows py37 somehow completes this in under 0.3 # "assert 0.29700000000002547 > 0.3" https://ci.appveyor.com/project/mahmoud/boltons/builds/22261051/job/3jfq1tq2233csqp6 assert duration > 0.25 def test_wraps_hide_wrapped(): new_func = wraps(wrappable_func, injected='b')(lambda a: wrappable_func(a, b=1)) new_sig = inspect.signature(new_func, follow_wrapped=True) assert list(new_sig.parameters.keys()) == ['a', 'b'] new_func = wraps(wrappable_func, injected='b', hide_wrapped=True)(lambda a: wrappable_func(a, b=1)) new_sig = inspect.signature(new_func, follow_wrapped=True) assert list(new_sig.parameters.keys()) == ['a'] new_func = wraps(wrappable_func, injected='b')(lambda a: wrappable_func(a, b=1)) new_new_func = wraps(new_func, injected='a', hide_wrapped=True)(lambda: new_func(a=1)) new_new_sig = inspect.signature(new_new_func, follow_wrapped=True) assert len(new_new_sig.parameters) == 0 boltons-25.0.0/tests/test_gcutils.py000066400000000000000000000013341475005545200175130ustar00rootroot00000000000000import sys import time if '__pypy__' not in sys.builtin_module_names: # pypy's gc really is different from boltons.gcutils import get_all, toggle_gc_postcollect def test_get_all(): class TestType: pass tt = TestType() assert len(get_all(TestType)) == 1 assert len(get_all(bool)) == 0 return def test_toggle_gc_postcollect(): COUNT = int(1e6) start = time.time() with toggle_gc_postcollect: x = [{} for x in range(COUNT)] no_gc_time = time.time() - start start = time.time() x = [{} for x in range(COUNT)] with_gc_time = time.time() - start time_diff = no_gc_time < with_gc_time boltons-25.0.0/tests/test_ioutils.py000066400000000000000000000460561475005545200175430ustar00rootroot00000000000000import io import os import codecs import random import string from tempfile import mkdtemp from unittest import TestCase from zipfile import ZipFile, ZIP_DEFLATED from boltons import ioutils CUR_FILE_PATH = os.path.abspath(__file__) class AssertionsMixin: def assertIsNone(self, item, msg=None): self.assertTrue(item is None, msg) class BaseTestMixin: """ A set of tests that work the same for SpooledBtyesIO and SpooledStringIO """ def test_getvalue_norollover(self): """Make sure getvalue function works with in-memory flo""" self.spooled_flo.write(self.test_str) self.assertEqual(self.spooled_flo.getvalue(), self.test_str) def test_getvalue_rollover(self): """Make sure getvalue function works with on-disk flo""" self.spooled_flo.write(self.test_str) self.assertFalse(self.spooled_flo._rolled) self.spooled_flo.rollover() self.assertEqual(self.spooled_flo.getvalue(), self.test_str) self.assertTrue(self.spooled_flo._rolled) def test_rollover_custom_directory(self): """dir keyword argument is passed to TemporaryFile instantiation""" custom_dir = mkdtemp() try: # Re-instantiate self.spooled_flo with the custom dir argument _spooled_flo = type(self.spooled_flo)(dir=custom_dir) self.assertEqual(_spooled_flo._dir, custom_dir) # TemporaryFile is kind of a black box, we can't really test it # since the directory entry for the file is removed immediately # after the file is created. So we can't check path using fd.name # or listdir(custom_dir). We could either convert rollover() to # use NamedtemporaryFile-s or assume it's well tested enough that # passing dir= into the constructor will work as expected. We'll # call rollover() with the dir attribute set just to ensure # nothing has gone absurdly wrong. _spooled_flo.write(self.test_str) _spooled_flo.rollover() self.assertEqual(_spooled_flo.getvalue(), self.test_str) self.assertTrue(_spooled_flo._rolled) _spooled_flo.close() finally: os.rmdir(custom_dir) def test_compare_err(self): """Read-heads are reset if a comparison raises an error.""" def _monkey_err(*args, **kwargs): raise Exception('A sad error has occurred today') a = self.spooled_flo.__class__() a.write(self.test_str) b = self.spooled_flo.__class__() b.write(self.test_str) a.seek(1) b.seek(2) b.__next__ = _monkey_err try: a == b except Exception: pass self.assertEqual(a.tell(), 1) self.assertEqual(b.tell(), 2) def test_truncate_noargs_norollover(self): """Test truncating with no args with in-memory flo""" self.spooled_flo.write(self.test_str) self.spooled_flo.seek(10) self.spooled_flo.truncate() self.assertEqual(self.spooled_flo.getvalue(), self.test_str[:10]) def test_truncate_noargs_rollover(self): """Test truncating with no args with on-disk flo""" self.spooled_flo.write(self.test_str) self.spooled_flo.seek(10) self.spooled_flo.rollover() self.spooled_flo.truncate() self.assertEqual(self.spooled_flo.getvalue(), self.test_str[:10]) def test_truncate_with_args_norollover(self): """Test truncating to a value with in-memory flo""" self.spooled_flo.write(self.test_str) self.spooled_flo.seek(5) self.spooled_flo.truncate(10) self.assertEqual(self.spooled_flo.getvalue(), self.test_str[:10]) def test_truncate_with_args_rollover(self): """Test truncating to a value with on-disk flo""" self.spooled_flo.write(self.test_str) self.spooled_flo.seek(5) self.spooled_flo.rollover() self.spooled_flo.truncate(10) self.assertEqual(self.spooled_flo.getvalue(), self.test_str[:10]) def test_type_error_too_many_args(self): """Make sure TypeError raised if too many args passed to truncate""" self.spooled_flo.write(self.test_str) self.assertRaises(TypeError, self.spooled_flo.truncate, 0, 10) def test_io_error_negative_truncate(self): """Make sure IOError raised trying to truncate with negative value""" self.spooled_flo.write(self.test_str) self.assertRaises(IOError, self.spooled_flo.truncate, -1) def test_compare_different_instances(self): """Make sure two different instance types are not considered equal""" a = ioutils.SpooledBytesIO() a.write(b"I am equal!") b = ioutils.SpooledStringIO() b.write("I am equal!") self.assertNotEqual(a, b) def test_compare_unequal_instances(self): """Comparisons of non-SpooledIOBase classes should fail""" self.assertNotEqual("Bummer dude", self.spooled_flo) def test_set_softspace_attribute(self): """Ensure softspace attribute can be retrieved and set""" self.spooled_flo.softspace = True self.assertTrue(self.spooled_flo.softspace) def test_set_softspace_attribute_rolled(self): """Ensure softspace attribute can be retrieved and set if rolled""" self.spooled_flo.softspace = True self.assertTrue(self.spooled_flo.softspace) self.spooled_flo.rollover() self.spooled_flo.softspace = True self.assertTrue(self.spooled_flo.softspace) def test_buf_property(self): """'buf' property returns the same value as getvalue()""" self.assertEqual(self.spooled_flo.buf, self.spooled_flo.getvalue()) def test_pos_property(self): """'pos' property returns the same value as tell()""" self.assertEqual(self.spooled_flo.pos, self.spooled_flo.tell()) def test_closed_property(self): """'closed' property works as expected""" self.assertFalse(self.spooled_flo.closed) self.spooled_flo.close() self.assertTrue(self.spooled_flo.closed) def test_readline(self): """Make readline returns expected values""" self.spooled_flo.write(self.test_str_lines) self.spooled_flo.seek(0) self.assertEqual(self.spooled_flo.readline().rstrip(self.linesep), self.test_str_lines.split(self.linesep)[0]) def test_readlines(self): """Make sure readlines returns expected values""" self.spooled_flo.write(self.test_str_lines) self.spooled_flo.seek(0) self.assertEqual( [x.rstrip(self.linesep) for x in self.spooled_flo.readlines()], self.test_str_lines.split(self.linesep) ) def test_next(self): """Make next returns expected values""" self.spooled_flo.write(self.test_str_lines) self.spooled_flo.seek(0) self.assertEqual(self.spooled_flo.next().rstrip(self.linesep), self.test_str_lines.split(self.linesep)[0]) def test_isatty(self): """Make sure we can check if the value is a tty""" # This should simply not fail self.assertTrue(self.spooled_flo.isatty() is True or self.spooled_flo.isatty() is False) def test_truthy(self): """Make sure empty instances are still considered truthy""" self.spooled_flo.seek(0) self.spooled_flo.truncate() if not self.spooled_flo: raise AssertionError("Instance is not truthy") def test_instance_check(self): """Instance checks against IOBase succeed.""" if not isinstance(self.spooled_flo, io.IOBase): raise AssertionError(f'{type(self.spooled_flo)} is not an instance of IOBase') def test_closed_file_method_valueerrors(self): """ValueError raised on closed files for certain methods.""" self.spooled_flo.close() methods = ( 'flush', 'isatty', 'pos', 'buf', 'truncate', '__next__', '__iter__', '__enter__', 'read', 'readline', 'tell', ) for method_name in methods: with self.assertRaises(ValueError): getattr(self.spooled_flo, method_name)() class TestSpooledBytesIO(TestCase, BaseTestMixin, AssertionsMixin): linesep = os.linesep.encode('ascii') def setUp(self): self.spooled_flo = ioutils.SpooledBytesIO() self.test_str = b"Armado en los EE, UU. para S. P. Richards co.," self.test_str_lines = ( f"Text with:{os.linesep}newlines!".encode('ascii') ) self.data_type = bytes def test_compare_not_equal_instances(self): """Make sure instances with different values fail == check.""" a = ioutils.SpooledBytesIO() a.write(b"I am a!") b = ioutils.SpooledBytesIO() b.write(b"I am b!") self.assertNotEqual(a, b) def test_compare_two_equal_instances(self): """Make sure we can compare instances""" a = ioutils.SpooledBytesIO() a.write(b"I am equal!") b = ioutils.SpooledBytesIO() b.write(b"I am equal!") self.assertEqual(a, b) def test_auto_rollover(self): """Make sure file rolls over to disk after max_size reached""" tmp = ioutils.SpooledBytesIO(max_size=10) tmp.write(b"The quick brown fox jumped over the lazy dogs.") self.assertTrue(tmp._rolled) def test_use_as_context_mgr(self): """Make sure SpooledBytesIO can be used as a context manager""" test_str = b"Armado en los EE, UU. para S. P. Richards co.," with ioutils.SpooledBytesIO() as f: f.write(test_str) self.assertEqual(f.getvalue(), test_str) def test_len_no_rollover(self): """Make sure len works with in-memory flo""" self.spooled_flo.write(self.test_str) self.assertEqual(self.spooled_flo.len, len(self.test_str)) self.assertEqual(len(self.spooled_flo), len(self.test_str)) def test_len_rollover(self): """Make sure len works with on-disk flo""" self.spooled_flo.write(self.test_str) self.spooled_flo.rollover() self.assertEqual(self.spooled_flo.len, len(self.test_str)) self.assertEqual(len(self.spooled_flo), len(self.test_str)) def test_invalid_type(self): """Ensure TypeError raised when writing unicode to SpooledBytesIO""" self.assertRaises(TypeError, self.spooled_flo.write, "hi") def test_flush_after_rollover(self): """Make sure we can flush before and after rolling to a real file""" self.spooled_flo.write(self.test_str) self.assertIsNone(self.spooled_flo.flush()) self.spooled_flo.rollover() self.assertIsNone(self.spooled_flo.flush()) def test_zip_compat(self): """Make sure object is compatible with ZipFile library""" self.spooled_flo.seek(0) self.spooled_flo.truncate() doc = ZipFile(self.spooled_flo, 'w', ZIP_DEFLATED) doc.writestr("content.txt", "test") self.assertTrue('content.txt' in doc.namelist()) doc.close() def test_iter(self): """Make sure iter works as expected""" self.spooled_flo.write(b"a\nb") self.spooled_flo.seek(0) self.assertEqual([x for x in self.spooled_flo], [b"a\n", b"b"]) def test_writelines(self): """An iterable of lines can be written""" lines = [b"1", b"2", b"3"] expected = b"123" self.spooled_flo.writelines(lines) self.assertEqual(self.spooled_flo.getvalue(), expected) class TestSpooledStringIO(TestCase, BaseTestMixin, AssertionsMixin): linesep = os.linesep def setUp(self): self.spooled_flo = ioutils.SpooledStringIO() self.test_str = "Remember kids, always use an emdash: '\u2014'" self.test_str_lines = f"Text with\u2014{os.linesep}newlines!" self.data_type = str def test_compare_not_equal_instances(self): """Make sure instances with different values fail == check.""" a = ioutils.SpooledStringIO() a.write("I am a!") b = ioutils.SpooledStringIO() b.write("I am b!") self.assertNotEqual(a, b) def test_compare_two_equal_instances(self): """Make sure we can compare instances""" a = ioutils.SpooledStringIO() a.write("I am equal!") b = ioutils.SpooledStringIO() b.write("I am equal!") self.assertEqual(a, b) def test_auto_rollover(self): """Make sure file rolls over to disk after max_size reached""" tmp = ioutils.SpooledStringIO(max_size=10) tmp.write("The quick brown fox jumped over the lazy dogs.") self.assertTrue(tmp._rolled) def test_use_as_context_mgr(self): """Make sure SpooledStringIO can be used as a context manager""" test_str = "Armado en los EE, UU. para S. P. Richards co.," with ioutils.SpooledStringIO() as f: f.write(test_str) self.assertEqual(f.getvalue(), test_str) def test_len_no_rollover(self): """Make sure len property works with in-memory flo""" self.spooled_flo.write(self.test_str) self.assertEqual(self.spooled_flo.len, len(self.test_str)) def test_len_rollover(self): """Make sure len property works with on-disk flo""" self.spooled_flo.write(self.test_str) self.spooled_flo.rollover() self.assertEqual(self.spooled_flo.len, len(self.test_str)) def test_invalid_type(self): """Ensure TypeError raised when writing bytes to SpooledStringIO""" self.assertRaises(TypeError, self.spooled_flo.write, b"hi") def test_tell_codepoints(self): """Verify tell() returns codepoint position, not bytes position""" self.spooled_flo.write(self.test_str) self.spooled_flo.seek(0) self.spooled_flo.read(40) self.assertEqual(self.spooled_flo.tell(), 40) self.spooled_flo.seek(10) self.assertEqual(self.spooled_flo.tell(), 10) def test_codepoints_all_enc(self): """"Test getting read, seek, tell, on various codepoints""" test_str = "\u2014\u2014\u2014" self.spooled_flo.write(test_str) self.spooled_flo.seek(1) self.assertEqual(self.spooled_flo.read(), "\u2014\u2014") self.assertEqual(len(self.spooled_flo), len(test_str)) def test_seek_codepoints_SEEK_END(self): """Make sure seek() moves to codepoints relative to file end""" self.spooled_flo.write(self.test_str) ret = self.spooled_flo.seek(0, os.SEEK_END) self.assertEqual(ret, len(self.test_str)) def test_seek_codepoints_large_SEEK_END(self): """Make sure seek() moves to codepoints relative to file end""" test_str = "".join(random.choice(string.ascii_letters) for x in range(34000)) self.spooled_flo.write(test_str) ret = self.spooled_flo.seek(0, os.SEEK_END) self.assertEqual(ret, len(test_str)) def test_seek_codepoints_SEEK_SET(self): """Make sure seek() moves to codepoints relative to file start""" self.spooled_flo.write(self.test_str) ret = self.spooled_flo.seek(3, os.SEEK_SET) self.assertEqual(ret, 3) def test_seek_codepoints_large_SEEK_SET(self): """Make sure seek() moves to codepoints relative to file start""" test_str = "".join(random.choice(string.ascii_letters) for x in range(34000)) self.spooled_flo.write(test_str) ret = self.spooled_flo.seek(33000, os.SEEK_SET) self.assertEqual(ret, 33000) def test_seek_codepoints_SEEK_CUR(self): """Make sure seek() moves to codepoints relative to current_position""" test_str = "\u2014\u2014\u2014" self.spooled_flo.write(test_str) self.spooled_flo.seek(1) self.assertEqual(self.spooled_flo.tell(), 1) ret = self.spooled_flo.seek(2, os.SEEK_CUR) self.assertEqual(ret, 3) def test_seek_codepoints_large_SEEK_CUR(self): """Make sure seek() moves to codepoints relative to current_position""" test_str = "".join(random.choice(string.ascii_letters) for x in range(34000)) self.spooled_flo.write(test_str) self.spooled_flo.seek(1) ret = self.spooled_flo.seek(33000, os.SEEK_CUR) self.assertEqual(ret, 33001) def test_x80_codepoint(self): """Make sure x80 codepoint doesn't confuse read value""" test_str = '\x8000' self.spooled_flo.write(test_str) self.spooled_flo.seek(0) self.assertEqual(len(self.spooled_flo.read(2)), 2) self.assertEqual(self.spooled_flo.read(), '0') def test_seek_encoded(self): """Make sure reading works when bytes exceeds read val""" test_str = "\u2014\u2014\u2014" self.spooled_flo.write(test_str) self.spooled_flo.seek(0) self.assertEqual(self.spooled_flo.read(3), test_str) def test_iter(self): """Make sure iter works as expected""" self.spooled_flo.write("a\nb") self.spooled_flo.seek(0) self.assertEqual([x for x in self.spooled_flo], ["a\n", "b"]) def test_writelines(self): """An iterable of lines can be written""" lines = ["1", "2", "3"] expected = "123" self.spooled_flo.writelines(lines) self.assertEqual(self.spooled_flo.getvalue(), expected) class TestMultiFileReader(TestCase): def test_read_seek_bytes(self): r = ioutils.MultiFileReader(io.BytesIO(b'narf'), io.BytesIO(b'troz')) self.assertEqual([b'nar', b'ftr', b'oz'], list(iter(lambda: r.read(3), b''))) r.seek(0) self.assertEqual(b'narftroz', r.read()) def test_read_seek_text(self): r = ioutils.MultiFileReader(io.StringIO('narf'), io.StringIO('troz')) self.assertEqual(['nar', 'ftr', 'oz'], list(iter(lambda: r.read(3), ''))) r.seek(0) self.assertEqual('narftroz', r.read()) def test_no_mixed_bytes_and_text(self): self.assertRaises(ValueError, ioutils.MultiFileReader, io.BytesIO(b'narf'), io.StringIO('troz')) def test_open(self): with open(CUR_FILE_PATH) as f: r_file_str = f.read() with open(CUR_FILE_PATH) as f1: with open(CUR_FILE_PATH) as f2: mfr = ioutils.MultiFileReader(f1, f2) r_double_file_str = mfr.read() assert r_double_file_str == (r_file_str * 2) with open(CUR_FILE_PATH, 'rb') as f: rb_file_str = f.read() with open(CUR_FILE_PATH, 'rb') as f1: with open(CUR_FILE_PATH, 'rb') as f2: mfr = ioutils.MultiFileReader(f1, f2) rb_double_file_str = mfr.read() assert rb_double_file_str == (rb_file_str * 2) utf8_file_str = codecs.open(CUR_FILE_PATH, encoding='utf8').read() f1, f2 = (codecs.open(CUR_FILE_PATH, encoding='utf8'), codecs.open(CUR_FILE_PATH, encoding='utf8')) mfr = ioutils.MultiFileReader(f1, f2) utf8_double_file_str = mfr.read() assert utf8_double_file_str == (utf8_file_str * 2) boltons-25.0.0/tests/test_iterutils.py000066400000000000000000000450401475005545200200670ustar00rootroot00000000000000import os import pytest from boltons.dictutils import OMD from boltons.iterutils import (first, pairwise, pairwise_iter, windowed, windowed_iter, remap, research, default_enter, default_exit, get_path) from boltons.namedutils import namedtuple CUR_PATH = os.path.abspath(__file__) isbool = lambda x: isinstance(x, bool) isint = lambda x: isinstance(x, int) odd = lambda x: isint(x) and x % 2 != 0 even = lambda x: isint(x) and x % 2 == 0 is_meaning_of_life = lambda x: x == 42 class TestFirst: def test_empty_iterables(self): """ Empty iterables return None. """ s = set() l = [] assert first(s) is None assert first(l) is None def test_default_value(self): """ Empty iterables + a default value return the default value. """ s = set() l = [] assert first(s, default=42) == 42 assert first(l, default=3.14) == 3.14 l = [0, False, []] assert first(l, default=3.14) == 3.14 def test_selection(self): """ Success cases with and without a key function. """ l = [(), 0, False, 3, []] assert first(l, default=42) == 3 assert first(l, key=isint) == 0 assert first(l, key=isbool) is False assert first(l, key=odd) == 3 assert first(l, key=even) == 0 assert first(l, key=is_meaning_of_life) is None class TestRemap: # TODO: test namedtuples and other immutable containers def test_basic_clone(self): orig = {"a": "b", "c": [1, 2]} assert orig == remap(orig) orig2 = [{1: 2}, {"a": "b", "c": [1, 2, {"cat": "dog"}]}] assert orig2 == remap(orig2) def test_empty(self): assert [] == remap([]) assert {} == remap({}) assert set() == remap(set()) def test_unremappable(self): obj = object() with pytest.raises(TypeError): remap(obj) def test_basic_upper(self): orig = {'a': 1, 'b': object(), 'c': {'d': set()}} remapped = remap(orig, lambda p, k, v: (k.upper(), v)) assert orig['a'] == remapped['A'] assert orig['b'] == remapped['B'] assert orig['c']['d'] == remapped['C']['D'] def test_item_drop(self): orig = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] even_items = remap(orig, lambda p, k, v: not (v % 2)) assert even_items == [0, 2, 4, 6, 8] def test_noncallables(self): with pytest.raises(TypeError): remap([], visit='test') with pytest.raises(TypeError): remap([], enter='test') with pytest.raises(TypeError): remap([], exit='test') def test_sub_selfref(self): coll = [0, 1, 2, 3] sub = [] sub.append(sub) coll.append(sub) with pytest.raises(RuntimeError): # if equal, should recurse infinitely assert coll == remap(coll) def test_root_selfref(self): selfref = [0, 1, 2, 3] selfref.append(selfref) with pytest.raises(RuntimeError): assert selfref == remap(selfref) selfref2 = {} selfref2['self'] = selfref2 with pytest.raises(RuntimeError): assert selfref2 == remap(selfref2) def test_duperef(self): val = ['hello'] duperef = [val, val] remapped = remap(duperef) assert remapped[0] is remapped[1] assert remapped[0] is not duperef[0] def test_namedtuple(self): """TODO: this fails right now because namedtuples' __new__ is overridden to accept arguments. remap's default_enter tries to create an empty namedtuple and gets a TypeError. Could make it so that immutable types actually don't create a blank new parent and instead use the old_parent as a placeholder, creating a new one at exit-time from the value's __class__ (how default_exit works now). But even then it would have to *args in the values, as namedtuple constructors don't take an iterable. """ Point = namedtuple('Point', 'x y') point_map = {'origin': [Point(0, 0)]} with pytest.raises(TypeError): remapped = remap(point_map) assert isinstance(remapped['origin'][0], Point) def test_path(self): path_map = {} # test visit's path target_str = 'test' orig = [[[target_str]]] ref_path = (0, 0, 0) def visit(path, key, value): if value is target_str: path_map['target_str'] = path + (key,) return key, value remapped = remap(orig, visit=visit) assert remapped == orig assert path_map['target_str'] == ref_path # test enter's path target_obj = object() orig = {'a': {'b': {'c': {'d': ['e', target_obj, 'f']}}}} ref_path = ('a', 'b', 'c', 'd', 1) def enter(path, key, value): if value is target_obj: path_map['target_obj'] = path + (key,) return default_enter(path, key, value) remapped = remap(orig, enter=enter) assert remapped == orig assert path_map['target_obj'] == ref_path # test exit's path target_set = frozenset([1, 7, 3, 8]) orig = [0, 1, 2, [3, 4, [5, target_set]]] ref_path = (3, 2, 1) def exit(path, key, old_parent, new_parent, new_items): if old_parent is target_set: path_map['target_set'] = path + (key,) return default_exit(path, key, old_parent, new_parent, new_items) remapped = remap(orig, exit=exit) assert remapped == orig assert path_map['target_set'] == ref_path def test_reraise_visit(self): root = {'A': 'b', 1: 2} key_to_lower = lambda p, k, v: (k.lower(), v) with pytest.raises(AttributeError): remap(root, key_to_lower) remapped = remap(root, key_to_lower, reraise_visit=False) assert remapped['a'] == 'b' assert remapped[1] == 2 def test_drop_nones(self): orig = {'a': 1, 'b': None, 'c': [3, None, 4, None]} ref = {'a': 1, 'c': [3, 4]} drop_none = lambda p, k, v: v is not None remapped = remap(orig, visit=drop_none) assert remapped == ref orig = [None] * 100 remapped = remap(orig, drop_none) assert not remapped def test_dict_to_omd(self): def enter(path, key, value): if isinstance(value, dict): return OMD(), sorted(value.items()) return default_enter(path, key, value) orig = [{'title': 'Wild Palms', 'ratings': {1: 1, 2: 3, 3: 5, 4: 6, 5: 3}}, {'title': 'Twin Peaks', 'ratings': {1: 3, 2: 2, 3: 8, 4: 12, 5: 15}}] remapped = remap(orig, enter=enter) assert remapped == orig assert isinstance(remapped[0], OMD) assert isinstance(remapped[0]['ratings'], OMD) assert isinstance(remapped[1], OMD) assert isinstance(remapped[1]['ratings'], OMD) def test_sort_all_lists(self): def exit(path, key, old_parent, new_parent, new_items): # NB: in this case, I'd normally use *a, **kw ret = default_exit(path, key, old_parent, new_parent, new_items) if isinstance(ret, list): ret.sort() return ret # NB: Airplane model numbers (Boeing and Airbus) orig = [[[7, 0, 7], [7, 2, 7], [7, 7, 7], [7, 3, 7]], [[3, 8, 0], [3, 2, 0], [3, 1, 9], [3, 5, 0]]] ref = [[[0, 2, 3], [0, 3, 5], [0, 3, 8], [1, 3, 9]], [[0, 7, 7], [2, 7, 7], [3, 7, 7], [7, 7, 7]]] remapped = remap(orig, exit=exit) assert remapped == ref def test_collector_pattern(self): all_interests = set() def enter(path, key, value): try: all_interests.update(value['interests']) except: pass return default_enter(path, key, value) orig = [{'name': 'Kate', 'interests': ['theater', 'manga'], 'dads': [{'name': 'Chris', 'interests': ['biking', 'python']}]}, {'name': 'Avery', 'interests': ['museums', 'pears'], 'dads': [{'name': 'Kurt', 'interests': ['python', 'recursion']}]}] ref = {'python', 'recursion', 'biking', 'museums', 'pears', 'theater', 'manga'} remap(orig, enter=enter) assert all_interests == ref def test_add_length(self): def exit(path, key, old_parent, new_parent, new_items): ret = default_exit(path, key, old_parent, new_parent, new_items) try: ret['review_length'] = len(ret['review']) except: pass return ret orig = {'Star Trek': {'TNG': {'stars': 10, 'review': "Episodic AND deep. <3 Data."}, 'DS9': {'stars': 8.5, 'review': "Like TNG, but with a story and no Data."}, 'ENT': {'stars': None, 'review': "Can't review what you can't watch."}}, 'Babylon 5': {'stars': 6, 'review': "Sophomoric, like a bitter laugh."}, 'Dr. Who': {'stars': None, 'review': "800 episodes is too many to review."}} remapped = remap(orig, exit=exit) assert (remapped['Star Trek']['TNG']['review_length'] < remapped['Star Trek']['DS9']['review_length']) def test_prepop(self): """Demonstrating normalization and ID addition through prepopulating the objects with an enter callback. """ base_obj = {'name': None, 'rank': None, 'id': 1} def enter(path, key, value): new_parent, new_items = default_enter(path, key, value) try: new_parent.update(base_obj) base_obj['id'] += 1 except: pass return new_parent, new_items orig = [{'name': 'Firefox', 'rank': 1}, {'name': 'Chrome', 'rank': 2}, {'name': 'IE'}] ref = [{'name': 'Firefox', 'rank': 1, 'id': 1}, {'name': 'Chrome', 'rank': 2, 'id': 2}, {'name': 'IE', 'rank': None, 'id': 3}] remapped = remap(orig, enter=enter) assert remapped == ref def test_remap_set(self): # explicit test for sets to make sure #84 is covered s = {1, 2, 3} assert remap(s) == s fs = frozenset([1, 2, 3]) assert remap(fs) == fs def test_remap_file(self): with open(CUR_PATH, 'rb') as f: x = {'a': [1, 2, 3], 'f': [f]} assert remap(x) == x f.read() assert remap(x) == x f.close() # see #146 assert remap(x) == x return class TestGetPath: def test_depth_one(self): root = ['test'] assert get_path(root, (0,)) == 'test' assert get_path(root, '0') == 'test' root = {'key': 'value'} assert get_path(root, ('key',)) == 'value' assert get_path(root, 'key') == 'value' def test_depth_two(self): root = {'key': ['test']} assert get_path(root, ('key', 0)) == 'test' assert get_path(root, 'key.0') == 'test' def test_research(): root = {} with pytest.raises(TypeError): research(root, query=None) root = {'a': 'a'} res = research(root, query=lambda p, k, v: v == 'a') assert len(res) == 1 assert res[0] == (('a',), 'a') def broken_query(p, k, v): raise RuntimeError() with pytest.raises(RuntimeError): research(root, broken_query, reraise=True) # empty results with default, reraise=False assert research(root, broken_query) == [] def test_research_custom_enter(): # see #368 from types import SimpleNamespace as NS root = NS( a='a', b='b', c=NS(aa='aa') ) def query(path, key, value): return value.startswith('a') def custom_enter(path, key, value): if isinstance(value, NS): return [], value.__dict__.items() return default_enter(path, key, value) with pytest.raises(TypeError): research(root, query) assert research(root, query, enter=custom_enter) == [(('a',), 'a'), (('c', 'aa'), 'aa')] def test_backoff_basic(): from boltons.iterutils import backoff assert backoff(1, 16) == [1.0, 2.0, 4.0, 8.0, 16.0] assert backoff(1, 1) == [1.0] assert backoff(2, 15) == [2.0, 4.0, 8.0, 15.0] def test_backoff_repeat(): from boltons.iterutils import backoff_iter fives = [] for val in backoff_iter(5, 5, count='repeat'): fives.append(val) if len(fives) >= 1000: break assert fives == [5] * 1000 def test_backoff_zero_start(): from boltons.iterutils import backoff assert backoff(0, 16) == [0.0, 1.0, 2.0, 4.0, 8.0, 16.0] assert backoff(0, 15) == [0.0, 1.0, 2.0, 4.0, 8.0, 15.0] slow_backoff = [round(x, 2) for x in backoff(0, 2.9, factor=1.2)] assert slow_backoff == [0.0, 1.0, 1.2, 1.44, 1.73, 2.07, 2.49, 2.9] def test_backoff_validation(): from boltons.iterutils import backoff with pytest.raises(ValueError): backoff(8, 2) with pytest.raises(ValueError): backoff(1, 0) with pytest.raises(ValueError): backoff(-1, 10) with pytest.raises(ValueError): backoff(2, 8, factor=0) with pytest.raises(ValueError): backoff(2, 8, jitter=20) def test_backoff_jitter(): from boltons.iterutils import backoff start, stop = 1, 256 unjittered = backoff(start, stop) jittered = backoff(start, stop, jitter=True) assert len(unjittered) == len(jittered) assert [u >= j for u, j in zip(unjittered, jittered)] neg_jittered = backoff(start, stop, jitter=-0.01) assert len(unjittered) == len(neg_jittered) assert [u <= j for u, j in zip(unjittered, neg_jittered)] o_jittered = backoff(start, stop, jitter=-0.0) assert len(unjittered) == len(o_jittered) assert [u == j for u, j in zip(unjittered, o_jittered)] nonconst_jittered = backoff(stop, stop, count=5, jitter=True) assert len(nonconst_jittered) == 5 # no two should be equal realistically assert len(set(nonconst_jittered)) == 5 def test_guiderator(): import string from boltons.iterutils import GUIDerator guid_iter = GUIDerator() guid = next(guid_iter) assert guid assert len(guid) == guid_iter.size assert all([c in string.hexdigits for c in guid]) guid2 = next(guid_iter) assert guid != guid2 # custom size guid_iter = GUIDerator(size=26) assert len(next(guid_iter)) == 26 def test_seqguiderator(): import string from boltons.iterutils import SequentialGUIDerator as GUIDerator guid_iter = GUIDerator() guid = next(guid_iter) assert guid assert len(guid) == guid_iter.size assert all([c in string.hexdigits for c in guid]) guid2 = next(guid_iter) assert guid != guid2 # custom size for x in range(10000): guid_iter = GUIDerator(size=26) assert len(next(guid_iter)) == 26 def test_chunked_bytes(): # see #231 from boltons.iterutils import chunked assert chunked(b'123', 2) in (['12', '3'], [b'12', b'3']) def test_chunk_ranges(): from boltons.iterutils import chunk_ranges assert list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5)) == [(10, 15), (15, 20)] assert list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1)) == [(10, 15), (14, 19), (18, 20)] assert list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2)) == [(10, 15), (13, 18), (16, 20)] assert list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False)) == [(4, 9), (9, 14), (14, 19)] assert list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True)) == [(4, 5), (5, 10), (10, 15), (15, 19)] assert list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False)) == [(2, 7), (6, 11), (10, 15), (14, 17)] assert list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True)) == [(2, 5), (4, 9), (8, 13), (12, 17)] assert list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True)) == [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)] assert list(chunk_ranges(input_offset=3, input_size=2, chunk_size=5, overlap_size=1, align=True)) == [(3, 5)] def test_lstrip(): from boltons.iterutils import lstrip assert lstrip([0,1,0,2,0,3,0],0) == [1,0,2,0,3,0] assert lstrip([0,0,0,1,0,2,0,3,0],0) == [1,0,2,0,3,0] assert lstrip([]) == [] def test_rstrip(): from boltons.iterutils import rstrip assert rstrip([0,1,0,2,0,3,0],0) == [0,1,0,2,0,3] assert rstrip([0,1,0,2,0,3,0,0,0],0) == [0,1,0,2,0,3] assert rstrip([]) == [] def test_strip(): from boltons.iterutils import strip assert strip([0,1,0,2,0,3,0],0) == [1,0,2,0,3] assert strip([0,0,0,1,0,2,0,3,0,0,0],0) == [1,0,2,0,3] assert strip([]) == [] def test_pairwise_filled(): assert pairwise(range(4)) == [(0, 1), (1, 2), (2, 3)] assert pairwise(range(4), end=None) == [(0, 1), (1, 2), (2, 3), (3, None)] assert pairwise([]) == [] assert pairwise([1], end=None) == [(1, None)] assert list(pairwise_iter(range(4))) == [(0, 1), (1, 2), (2, 3)] assert list(pairwise_iter(range(4), end=None)) == [(0, 1), (1, 2), (2, 3), (3, None)] def test_windowed_filled(): assert windowed(range(4), 3) == [(0, 1, 2), (1, 2, 3)] assert windowed(range(4), 3, fill=None) == [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] assert windowed([], 3) == [] assert windowed([], 3, fill=None) == [] assert windowed([1, 2], 3, fill=None) == [(1, 2, None), (2, None, None)] assert list(windowed_iter(range(4), 3)) == [(0, 1, 2), (1, 2, 3)] assert list(windowed_iter(range(4), 3, fill=None)) == [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] boltons-25.0.0/tests/test_jsonutils.py000066400000000000000000000024131475005545200200720ustar00rootroot00000000000000import os from boltons.jsonutils import (JSONLIterator, DEFAULT_BLOCKSIZE, reverse_iter_lines) CUR_PATH = os.path.dirname(os.path.abspath(__file__)) NEWLINES_DATA_PATH = CUR_PATH + '/newlines_test_data.txt' JSONL_DATA_PATH = CUR_PATH + '/jsonl_test_data.txt' def _test_reverse_iter_lines(filename, blocksize=DEFAULT_BLOCKSIZE): fo = open(filename) reference = fo.read() fo.seek(0, os.SEEK_SET) rev_lines = list(reverse_iter_lines(fo, blocksize)) assert '\n'.join(rev_lines[::-1]) == reference def _test_reverse_iter_lines_bytes(filename, blocksize=DEFAULT_BLOCKSIZE): fo = open(filename, 'rb') reference = fo.read() fo.seek(0, os.SEEK_SET) rev_lines = list(reverse_iter_lines(fo, blocksize)) assert os.linesep.encode('ascii').join(rev_lines[::-1]) == reference def test_reverse_iter_lines(): for blocksize in (2, 4, 16, 4096): _test_reverse_iter_lines(NEWLINES_DATA_PATH, blocksize) _test_reverse_iter_lines_bytes(NEWLINES_DATA_PATH, blocksize) def test_jsonl_iterator(): ref = [{'4': 4}, {'3': 3}, {'2': 2}, {'1': 1}, {}] jsonl_iter = JSONLIterator(open(JSONL_DATA_PATH), reverse=True) jsonl_list = list(jsonl_iter) assert jsonl_list == ref boltons-25.0.0/tests/test_listutils.py000066400000000000000000000106461475005545200201030ustar00rootroot00000000000000import sys from boltons.listutils import SplayList, BarrelList def test_splay_list(): splay = SplayList(range(10)) splay.swap(0, 9) assert splay[0] == 9 assert splay[-1] == 0 splay.shift(-2) assert splay[0] == 8 assert splay[-1] == 0 assert len(splay) == 10 def test_barrel_list(): bl = BarrelList() bl.insert(0, 0) assert bl[0] == 0 assert len(bl) == 1 bl.insert(1, 1) assert list(bl) == [0, 1] bl.insert(0, -1) assert list(bl) == [-1, 0, 1] bl.extend(range(int(1e5))) assert len(bl) == (1e5 + 3) bl._balance_list(0) assert len(bl) == (1e5 + 3) bl.pop(50000) assert len(bl) == (1e5 + 3 - 1) bl2 = BarrelList(TEST_INTS) bl2.sort() assert list(bl2[:5]) == [0, 74, 80, 96, 150] assert list(bl2[:-5:-1]) == [50508, 46607, 46428, 43442] # a hundred thousand integers bl3 = BarrelList(range(int(1e5))) for i in range(10000): # move the middle ten thou to the beginning bl3.insert(0, bl3.pop(len(bl3) // 2)) assert len(bl3) == 1e5 # length didn't change assert bl3[0] == 40001 # first value changed as expected assert bl3[-1] == sorted(bl3)[-1] # last value didn't change del bl3[10:5000] assert bl3[0] == 40001 assert len(bl3) == 1e5 - (5000 - 10) # length stayed co bl3[:20:2] = range(0, -10, -1) assert bl3[6] == -3 # some truly tricky stepping/slicing works # roughly increasing random integers # [ord(i) * x for i, x in zip(os.urandom(1024), range(1024))] TEST_INTS = [0, 74, 96, 183, 456, 150, 1098, 665, 1752, 1053, 190, 561, 2244, 2964, 2534, 750, 80, 612, 3780, 2698, 1320, 4935, 5324, 3220, 672, 2925, 6474, 6507, 3892, 4814, 1470, 6913, 6112, 6072, 7854, 8540, 4212, 7770, 8702, 1404, 2240, 902, 5250, 10320, 9680, 8775, 2392, 11327, 10368, 8085, 7750, 9333, 8008, 11395, 6858, 8690, 14112, 5358, 13572, 3304, 9000, 11712, 1426, 12033, 11136, 10270, 10626, 11189, 7208, 966, 9380, 16614, 10368, 11680, 8214, 16350, 4712, 5082, 13572, 15879, 6800, 8667, 17548, 9628, 3360, 19550, 1634, 20619, 1232, 17978, 22950, 10829, 5612, 22692, 10058, 21375, 19680, 5626, 15582, 18216, 2200, 20402, 24174, 3090, 19864, 2520, 15794, 18511, 4212, 18530, 8470, 7992, 5152, 4294, 456, 6095, 26564, 9477, 14042, 7259, 14520, 28677, 9394, 5289, 7812, 1625, 17514, 7493, 24704, 903, 33150, 1834, 11352, 20615, 32562, 540, 6256, 12878, 276, 17236, 6300, 25380, 9088, 27742, 4752, 33930, 7008, 22491, 7992, 2831, 34200, 18271, 22648, 6426, 15862, 26660, 29484, 2826, 14378, 26553, 16000, 18998, 28998, 16626, 19024, 22275, 18426, 21042, 29064, 30927, 26010, 14193, 9976, 30621, 12354, 33600, 40832, 20178, 20292, 12709, 26640, 29865, 23660, 20862, 34592, 28305, 24180, 38709, 18800, 11907, 28120, 34189, 4992, 8106, 14938, 6435, 12936, 31914, 1782, 995, 9800, 28542, 22018, 27608, 30396, 28700, 26986, 50508, 5616, 46607, 2310, 41356, 26712, 8733, 43442, 33755, 21384, 40145, 26160, 46428, 30360] test_barrel_list() if __name__ == '__main__': _TUNE_SETUP = """\ from boltons.listutils import BarrelList bl = BarrelList() bl._size_factor = %s bl.extend(range(int(%s))) """ def tune(): from collections import defaultdict import gc from timeit import timeit data_size = 1e5 old_size_factor = size_factor = 512 all_times = defaultdict(list) min_times = {} step = 512 while abs(step) > 4: gc.collect() for x in range(3): tottime = timeit('bl.insert(0, bl.pop(len(bl)//2))', _TUNE_SETUP % (size_factor, data_size), number=10000) all_times[size_factor].append(tottime) min_time = round(min(all_times[size_factor]), 3) min_times[size_factor] = min_time print(size_factor, min_time, step) if min_time > (min_times[old_size_factor] + 0.002): step = -step // 2 old_size_factor = size_factor size_factor += step print(tottime) try: tune() # main() except Exception as e: import pdb;pdb.post_mortem() raise boltons-25.0.0/tests/test_mathutils.py000066400000000000000000000064571475005545200200660ustar00rootroot00000000000000from pytest import raises from boltons.mathutils import clamp, ceil, floor, Bits import math INF, NAN = float('inf'), float('nan') OPTIONS = [1618, 1378, 166, 1521, 2347, 2016, 879, 2123, 269.3, 1230, 66, 425.2, 250, 2399, 2314, 439, 247, 208, 379, 1861] OPTIONS_SORTED = sorted(OPTIONS) OUT_OF_RANGE_LOWER = 60 OUT_OF_RANGE_UPPER = 2500 VALID_LOWER = 247 VALID_UPPER = 2314 VALID_BETWEEN = 248.5 def test_clamp_examples(): """some examples for clamp()""" assert 0 == clamp(0, 0, 1) == clamp(-1, 0, 1) assert 0 == clamp(-1, lower=0) assert 1 == clamp(1, 0, 1) == clamp(5, 0, 1) assert 1 == clamp(5, upper=1) assert 0.5 == clamp(7, upper=0.5) assert 1 == clamp(7.7, upper=1) def test_clamp_transparent(): """clamp(x) should equal x because both limits are omitted""" assert clamp(0) == 0 assert clamp(1) == 1 assert clamp(10**100) == 10**100 assert clamp(INF) == INF assert clamp(-INF) == -INF assert math.isnan(clamp(NAN)) def test_ceil_basic(): assert ceil(VALID_LOWER, OPTIONS) == VALID_LOWER assert ceil(VALID_UPPER, OPTIONS) == VALID_UPPER assert ceil(VALID_BETWEEN, OPTIONS) == 250 def test_ceil_sorted(): assert ceil(VALID_LOWER, OPTIONS) == ceil(VALID_LOWER, OPTIONS_SORTED) assert ceil(VALID_UPPER, OPTIONS) == ceil(VALID_UPPER, OPTIONS_SORTED) assert ceil(VALID_BETWEEN, OPTIONS) == ceil(VALID_BETWEEN, OPTIONS_SORTED) def test_ceil_oor_lower(): assert min(OPTIONS) == ceil(OUT_OF_RANGE_LOWER, OPTIONS) def test_ceil_oor_upper(): with raises(ValueError): ceil(OUT_OF_RANGE_UPPER, OPTIONS) def test_floor_basic(): assert floor(VALID_LOWER, OPTIONS) == VALID_LOWER assert floor(VALID_UPPER, OPTIONS) == VALID_UPPER assert floor(VALID_LOWER, OPTIONS) == 247 def test_floor_sorted(): assert floor(VALID_LOWER, OPTIONS) == floor(VALID_LOWER, OPTIONS_SORTED) assert floor(VALID_UPPER, OPTIONS) == floor(VALID_UPPER, OPTIONS_SORTED) assert floor(VALID_BETWEEN, OPTIONS) == floor(VALID_BETWEEN, OPTIONS_SORTED) def test_floor_oor_upper(): assert max(OPTIONS) == floor(OUT_OF_RANGE_UPPER, OPTIONS) def test_floor_oor_lower(): with raises(ValueError): floor(OUT_OF_RANGE_LOWER, OPTIONS) def test_bits(): def chk(a, b): assert a == b, a chk(Bits('10')[:1], Bits('1')) chk(Bits('10')[1:], Bits('0')) chk(Bits('10')[0], True) chk(Bits('10')[1], False) chk(Bits('0000100')[4], True) chk(Bits('10').as_list(), [True, False]) chk(Bits('10').as_int(), 2) chk(Bits('10').as_bin(), '10') chk(Bits('1111').as_hex(), '0F') chk(Bits('10'), Bits([True, False])) chk(Bits('10'), Bits(2)) chk(Bits('01') | Bits('10'), Bits('11')) chk(Bits('01') & Bits('10'), Bits('00')) chk(Bits('11') >> 1, Bits('1')) chk(Bits('1') << 1, Bits('10')) assert Bits('0') != Bits('00') # test roundtrip as_/from_hex chk(Bits.from_hex(Bits('10101010').as_hex()), Bits('10101010')) # test roundtrip as_/from_bytes chk( Bits.from_bytes(Bits('10101010').as_bytes()), Bits('10101010')) # pile of roundtripping chk(Bits.from_int( Bits.from_bin( Bits.from_list( Bits('101').as_list() ).as_bin() ).as_int() ), Bits('101')) boltons-25.0.0/tests/test_namedutils.py000066400000000000000000000010401475005545200202000ustar00rootroot00000000000000from pickle import loads, dumps from boltons.namedutils import namedlist, namedtuple Point = namedtuple('Point', 'x, y', rename=True) MutablePoint = namedlist('MutablePoint', 'x, y', rename=True) def test_namedlist(): p = MutablePoint(x=10, y=20) assert p == [10, 20] p[0] = 11 assert p == [11, 20] p.x = 12 assert p == [12, 20] def test_namedlist_pickle(): p = MutablePoint(x=10, y=20) assert p == loads(dumps(p)) def test_namedtuple_pickle(): p = Point(x=10, y=20) assert p == loads(dumps(p)) boltons-25.0.0/tests/test_queueutils.py000066400000000000000000000020301475005545200202400ustar00rootroot00000000000000from boltons.queueutils import SortedPriorityQueue, HeapPriorityQueue def _test_priority_queue(queue_type): pq = queue_type() item1 = 'a' item2 = 'b' item3 = 'c' pq.add(item1) pq.remove(item1) # integer priorities pq.add(item1, 2) pq.add(item2, 9) pq.add(item3, 7) assert len(pq) == 3 assert item2 == pq.pop() assert len(pq) == 2 assert item3 == pq.pop() assert len(pq) == 1 assert item1 == pq.pop() assert len(pq) == 0 # float priorities pq.add(item1, 0.2) pq.add(item2, 0.9) pq.add(item3, 0.7) assert len(pq) == 3 assert item2 == pq.pop() assert len(pq) == 2 assert item3 == pq.pop() assert len(pq) == 1 assert item1 == pq.pop() assert len(pq) == 0 try: pq.pop() except IndexError: pass else: assert False, 'priority queue should be empty' return def test_heap_queue(): _test_priority_queue(HeapPriorityQueue) def test_sorted_queue(): _test_priority_queue(SortedPriorityQueue) boltons-25.0.0/tests/test_setutils.py000066400000000000000000000143221475005545200177160ustar00rootroot00000000000000from pytest import raises from boltons.setutils import IndexedSet, _MISSING, complement def test_indexed_set_basic(): zero2nine = IndexedSet(range(10)) five2nine = zero2nine & IndexedSet(range(5, 15)) x = IndexedSet(five2nine) x |= {10} assert list(zero2nine) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] assert set(zero2nine) == {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} assert list(five2nine) == [5, 6, 7, 8, 9] assert x == IndexedSet([5, 6, 7, 8, 9, 10]) assert x[-1] == 10 assert zero2nine ^ five2nine == IndexedSet([0, 1, 2, 3, 4]) assert x[:3] == IndexedSet([5, 6, 7]) assert x[2:4:-1] == IndexedSet([8, 7]) def test_indexed_set_rsub(): "From #252" assert (set('abc') - IndexedSet('bcd')) == {'a'} assert (IndexedSet('abc') - IndexedSet('bcd')) == IndexedSet(['a']) assert (frozenset('abc') - IndexedSet('bcd')) == frozenset(['a']) def test_indexed_set_mutate(): thou = IndexedSet(range(1000)) assert (thou.pop(), thou.pop()) == (999, 998) assert (thou.pop(499), thou.pop(499)) == (499, 500) ref = [495, 496, 497, 498, 501, 502, 503, 504, 505, 506] assert [thou[i] for i in range(495, 505)] == ref assert len(thou) == 996 while len(thou) > 600: dead_idx_len = len(thou.dead_indices) dead_idx_count = thou._dead_index_count thou.pop(0) new_dead_idx_len = len(thou.dead_indices) if new_dead_idx_len < dead_idx_len: assert dead_idx_count > 0 # 124, 109, 95 assert len(thou) == 600 assert thou._dead_index_count == 67 assert not any([thou[i] is _MISSING for i in range(len(thou))]) thou &= IndexedSet(range(500, 503)) assert thou == IndexedSet([501, 502]) return def big_popper(): # more of a benchmark than a test from os import urandom import time big_set = IndexedSet(range(100000)) rands = [ord(r) for r in urandom(len(big_set))] start_time, start_size = time.time(), len(big_set) while len(big_set) > 10000: if len(big_set) % 10000 == 0: print(len(big_set) / 10000) rand = rands.pop() big_set.pop(rand) big_set.pop(-rand) end_time, end_size = time.time(), len(big_set) print() print('popped {} items in {} seconds'.format(start_size - end_size, end_time - start_time)) def test_complement_set(): '''exercises a bunch of code-paths but doesn't really confirm math identities''' assert complement(complement(set())) == set() sab = set('ab') sbc = set('bc') cab = complement('ab') cbc = complement('bc') cc = complement('c') sc = set('c') u = complement(set()) assert repr(sab) in repr(cab) # non-mutating tests assert cab != cbc assert complement(cab) == sab assert complement(cbc) == sbc assert 'a' not in cab assert 'c' in cab assert (sab & cbc) == (sab - sbc) # set theory invariant assert not (cab < sab) # complement never subset of set assert not (sab < cab) assert not (cbc < sab) assert not (cbc < cab) # not subsets of each other assert sab < cc assert cab < (cab | cbc) assert (cab | cbc) > cab assert cc > sab assert not (cab > sab) assert not cab.isdisjoint(cc) # complements never disjoint assert cab.isdisjoint(sab) assert not cab.isdisjoint(sc) assert (cab | sab) == u assert (cab | cc) == u assert (cab | cbc) == complement('b') assert (sab | cab) == (cbc | sbc) assert (sab & cab) == (cbc & sbc) assert (sab ^ cab) == (cbc ^ sbc) assert cab - cc == sc assert cab - sab == cab assert sab - cab == sab assert (cab ^ cbc | set('b')) == (sab | sbc) everything = complement(frozenset()) assert everything in everything # https://en.wikipedia.org/wiki/Russell%27s_paradox assert bool(cab) assert not complement(u) # destructive testing cab ^= sab cab ^= sab cab &= sab cab &= cbc cab |= sab cab |= cbc cab -= sab cab.add(5) cab.remove(5) cab.update(sab) cab.discard(sab) cab.update(cbc) cab.add(complement(frozenset())) # frozen complement can be a member of complement set assert len({complement(frozenset()): 1, complement(frozenset()): 2}) == 1 # hash works with raises(NotImplementedError): cab.pop() with raises(NotImplementedError): len(cab) with raises(NotImplementedError): iter(cab) ~cab cab.complement() cab.complemented() class OpOverloader: # tests that operators properly return NotImplemented so they will defer to # another class implementation if available def __and__(self, other): return self __rand__ = __iand__ = __or__ = __ror__ = __ior__ = __xor__ = __rxor__ = __sub__ = __isub__ = __and__ def __le__(self, other): return True __lt__ = __ge__ = __gt__ = __le__ ops = OpOverloader() def opsmash(a, b): a &= b; a |= b; a -= b; a ^= b a > b; a >= b; a < b; a <= b return (((a & b) | b) - b) ^ b with raises(TypeError): opsmash(cab, object()) assert opsmash(ops, cab) == ops assert opsmash(cab, ops) == ops def test_iset_index_method(): original_list = list(range(8, 20)) + list(range(8)) indexed_list = IndexedSet() for i in original_list: indexed_list.add(i) for i in original_list: index = indexed_list.index(i) # if we're removing them in order, the target value should always be at index 0 assert index == 0 indexed_list.pop(index) indexed_list = IndexedSet(range(10)) for i in reversed(range(10)): if i % 2: continue index = indexed_list.index(i) assert i == indexed_list.pop(index) indexed_list = IndexedSet(range(32)) for i in list(indexed_list): if i % 3: index = indexed_list.index(i) assert i == indexed_list.pop(index) indexed_list = IndexedSet(range(10)) for i in range(10): if i < 3: continue index = indexed_list.index(i) assert i == indexed_list.pop(index) indexed_list = IndexedSet(range(32)) for i in list(indexed_list): if i % 3: index = indexed_list.index(i) assert i == indexed_list.pop(index) boltons-25.0.0/tests/test_socketutils.py000066400000000000000000000271611475005545200204200ustar00rootroot00000000000000import sys import time import errno import socket import threading from boltons.socketutils import (BufferedSocket, NetstringSocket, ConnectionClosed, NetstringMessageTooLong, MessageTooLong, Timeout) import pytest # skip if there's no socketpair pytestmark = pytest.mark.skipif(getattr(socket, 'socketpair', None) is None, reason='no socketpair (likely Py2 on Windows)') def test_short_lines(): for ms in (2, 4, 6, 1024, None): x, y = socket.socketpair() bs = BufferedSocket(x) y.sendall(b'1\n2\n3\n') assert bs.recv_until(b'\n', maxsize=ms) == b'1' assert bs.recv_until(b'\n', maxsize=ms) == b'2' y.close() assert bs.recv_close(maxsize=ms) == b'3\n' try: bs.recv_size(1) except ConnectionClosed: pass else: assert False, 'expected ConnectionClosed' bs.close() return def test_multibyte_delim(): """Primarily tests recv_until with various maxsizes and True/False for with_delimiter. """ delim = b'\r\n' for with_delim in (True, False): if with_delim: cond_delim = b'\r\n' else: cond_delim = b'' empty = b'' small_one = b'1' big_two = b'2' * 2048 for ms in (3, 5, 1024, None): x, y = socket.socketpair() bs = BufferedSocket(x) y.sendall(empty + delim) y.sendall(small_one + delim) y.sendall(big_two + delim) kwargs = {'maxsize': ms, 'with_delimiter': with_delim} assert bs.recv_until(delim, **kwargs) == empty + cond_delim assert bs.recv_until(delim, **kwargs) == small_one + cond_delim try: assert bs.recv_until(delim, **kwargs) == big_two + cond_delim except MessageTooLong: if ms is None: assert False, 'unexpected MessageTooLong' else: if ms is not None: assert False, 'expected MessageTooLong' return def test_props(): x, y = socket.socketpair() bs = BufferedSocket(x) assert bs.type == x.type assert bs.proto == x.proto assert bs.family == x.family return def test_buffers(): x, y = socket.socketpair() bx, by = BufferedSocket(x), BufferedSocket(y) assert by.getrecvbuffer() == b'' assert by.getsendbuffer() == b'' assert bx.getrecvbuffer() == b'' by.buffer(b'12') by.sendall(b'3') assert bx.recv_size(1) == b'1' assert bx.getrecvbuffer() == b'23' return def test_client_disconnecting(): def get_bs_pair(): x, y = socket.socketpair() bx, by = BufferedSocket(x), BufferedSocket(y) # sanity check by.sendall(b'123') bx.recv_size(3) == b'123' return bx, by bx, by = get_bs_pair() assert bx.fileno() > 0 bx.close() assert bx.getrecvbuffer() == b'' try: bx.recv(1) except OSError: pass else: assert False, 'expected socket.error on closed recv' assert bx.fileno() == -1 by.buffer(b'123') assert by.getsendbuffer() try: by.flush() except OSError: assert by.getsendbuffer() == b'123' else: if sys.platform != 'win32': # Windows socketpairs are kind of bad assert False, 'expected socket.error broken pipe' try: by.shutdown(socket.SHUT_RDWR) except OSError: # Mac sockets are already shut down at this point. See #71. if sys.platform != 'darwin': raise by.close() assert not by.getsendbuffer() try: by.send(b'123') except OSError: pass else: assert False, 'expected socket.error on closed send' return def test_split_delim(): delim = b'\r\n' first = b'1234\r' second = b'\n5' x, y = socket.socketpair() bs = BufferedSocket(x) y.sendall(first) try: bs.recv_until(delim, timeout=0.0001) except Timeout: pass y.sendall(second) assert bs.recv_until(delim, with_delimiter=True) == b'1234\r\n' assert bs.recv_size(1) == b'5' return def test_basic_nonblocking(): delim = b'\n' # test with per-call timeout x, y = socket.socketpair() bs = BufferedSocket(x) try: bs.recv_until(delim, timeout=0) except OSError as se: assert se.errno == errno.EWOULDBLOCK y.sendall(delim) # sending an empty message, effectively assert bs.recv_until(delim) == b'' # test with instance-level default timeout x, y = socket.socketpair() bs = BufferedSocket(x, timeout=0) try: bs.recv_until(delim) except OSError as se: assert se.errno == errno.EWOULDBLOCK y.sendall(delim) assert bs.recv_until(delim) == b'' # test with setblocking(0) on the underlying socket x, y = socket.socketpair() x.setblocking(0) bs = BufferedSocket(x) try: bs.recv_until(delim) except OSError as se: assert se.errno == errno.EWOULDBLOCK y.sendall(delim) assert bs.recv_until(delim) == b'' return def test_simple_buffered_socket_passthroughs(): x, y = socket.socketpair() bs = BufferedSocket(x) assert bs.getsockname() == x.getsockname() assert bs.getpeername() == x.getpeername() def test_timeout_setters_getters(): x, y = socket.socketpair() bs = BufferedSocket(x) assert bs.settimeout(1.0) is None assert bs.gettimeout() == 1.0 assert bs.setblocking(False) is None assert bs.gettimeout() == 0.0 assert bs.setblocking(True) is None assert bs.gettimeout() is None def netstring_server(server_socket): "A basic netstring server loop, supporting a few operations" try: while True: clientsock, addr = server_socket.accept() client = NetstringSocket(clientsock) while 1: request = client.read_ns() if request == b'close': clientsock.close() break elif request == b'shutdown': return elif request == b'reply4k': client.write_ns(b'a' * 4096) elif request == b'ping': client.write_ns(b'pong') elif request == b'reply128k': client.setmaxsize(128 * 1024) client.write_ns(b'huge' * 32 * 1024) # 128kb client.setmaxsize(32768) # back to default except Exception as e: print('netstring_server exiting with error: %r' % e) raise def test_socketutils_netstring(): """A holistic feature test of BufferedSocket via the NetstringSocket wrapper. Runs """ print("running self tests") # Set up server server_socket = socket.socket() server_socket.bind(('127.0.0.1', 0)) # localhost with ephemeral port server_socket.listen(100) ip, port = server_socket.getsockname() start_server = lambda: netstring_server(server_socket) threading.Thread(target=start_server).start() # set up client def client_connect(): clientsock = socket.create_connection((ip, port)) client = NetstringSocket(clientsock) return client # connect, ping-pong client = client_connect() client.write_ns(b'ping') assert client.read_ns() == b'pong' s = time.time() for i in range(1000): client.write_ns(b'ping') assert client.read_ns() == b'pong' dur = time.time() - s print("netstring ping-pong latency", dur, "ms") s = time.time() for i in range(1000): client.write_ns(b'ping') resps = [] for i in range(1000): resps.append(client.read_ns()) e = time.time() assert all([r == b'pong' for r in resps]) assert client.bsock.getrecvbuffer() == b'' dur = e - s print("netstring pipelined ping-pong latency", dur, "ms") # tell the server to close the socket and then try a failure case client.write_ns(b'close') try: client.read_ns() raise Exception('read from closed socket') except ConnectionClosed: print("raised ConnectionClosed correctly") # test big messages client = client_connect() client.setmaxsize(128 * 1024) client.write_ns(b'reply128k') res = client.read_ns() assert len(res) == (128 * 1024) client.write_ns(b'close') # test that read timeouts work client = client_connect() client.settimeout(0.1) try: client.read_ns() raise Exception('did not timeout') except Timeout: print("read_ns raised timeout correctly") client.write_ns(b'close') # test that netstring max sizes work client = client_connect() client.setmaxsize(2048) client.write_ns(b'reply4k') try: client.read_ns() raise Exception('read more than maxsize') except NetstringMessageTooLong: print("raised MessageTooLong correctly") try: client.bsock.recv_until(b'b', maxsize=4096) raise Exception('recv_until did not raise MessageTooLong') except MessageTooLong: print("raised MessageTooLong correctly") assert client.bsock.recv_size(4097) == b'a' * 4096 + b',' print('correctly maintained buffer after exception raised') # test BufferedSocket read timeouts with recv_until and recv_size client.bsock.settimeout(0.01) try: client.bsock.recv_until(b'a') raise Exception('recv_until did not raise Timeout') except Timeout: print('recv_until correctly raised Timeout') try: client.bsock.recv_size(1) raise Exception('recv_size did not raise Timeout') except Timeout: print('recv_size correctly raised Timeout') client.write_ns(b'shutdown') print("all passed") def netstring_server_timeout_override(server_socket): """Netstring socket has an unreasonably low timeout, however it should be overridden by the `read_ns` argument.""" try: while True: clientsock, addr = server_socket.accept() client = NetstringSocket(clientsock, timeout=0.01) while 1: request = client.read_ns(1) if request == b'close': clientsock.close() break elif request == b'shutdown': return elif request == b'ping': client.write_ns(b'pong') except Exception as e: print('netstring_server exiting with error: %r' % e) raise def test_socketutils_netstring_timeout(): """Tests that server socket timeout is overridden by the argument to read call. Server has timeout of 10 ms, and we will sleep for 20 ms. If timeout is not overridden correctly, a timeout exception will be raised.""" print("running timeout test") # Set up server server_socket = socket.socket() server_socket.bind(('127.0.0.1', 0)) # localhost with ephemeral port server_socket.listen(100) ip, port = server_socket.getsockname() start_server = lambda: netstring_server_timeout_override(server_socket) threading.Thread(target=start_server).start() # set up client def client_connect(): clientsock = socket.create_connection((ip, port)) client = NetstringSocket(clientsock) return client # connect, ping-pong client = client_connect() time.sleep(0.02) client.write_ns(b'ping') assert client.read_ns() == b'pong' client.write_ns(b'shutdown') print("no timeout occurred - all good.")boltons-25.0.0/tests/test_statsutils.py000066400000000000000000000021001475005545200202500ustar00rootroot00000000000000from boltons.statsutils import Stats def test_stats_basic(): da = Stats(range(20)) assert da.mean == 9.5 assert round(da.std_dev, 2) == 5.77 assert da.variance == 33.25 assert da.skewness == 0 assert round(da.kurtosis, 1) == 1.9 assert da.median == 9.5 def _test_pearson(): import random from statsutils import pearson_type def get_pt(dist): vals = [dist() for x in range(10000)] pt = pearson_type(vals) return pt for x in range(3): # pt = get_pt(dist=lambda: random.normalvariate(15, 5)) # expect 0, normal # pt = get_pt(dist=lambda: random.weibullvariate(2, 3)) # gets 1, beta, weibull not specifically supported # pt = get_pt(dist=lambda: random.gammavariate(2, 3)) # expect 3, gamma # pt = get_pt(dist=lambda: random.betavariate(2, 3)) # expect 1, beta # pt = get_pt(dist=lambda: random.expovariate(0.2)) # expect 3, beta pt = get_pt(dist=lambda: random.uniform(0.0, 10.0)) # gets 2 print('pearson type:', pt) # import pdb;pdb.set_trace() boltons-25.0.0/tests/test_statsutils_histogram.py000066400000000000000000000073061475005545200223420ustar00rootroot00000000000000from boltons.statsutils import Stats # [round(random.normalvariate(10, 3), 3) for i in range(100)] NORM_DATA = [12.975, 8.341, 12.27, 12.799, 15.443, 6.739, 10.572, 14.863, 3.723, 9.825, 7.716, 12.218, 11.641, 9.02, 13.037, 11.175, 13.156, 11.706, 8.184, 13.306, 9.845, 11.665, 14.298, 12.021, 8.419, 10.209, 10.698, 6.559, 10.346, 9.895, 11.742, 13.391, 10.587, 6.639, 10.23, 8.841, 10.511, 6.033, 5.767, 8.482, 9.517, 9.039, 11.111, 13.845, 4.331, 5.323, 14.486, 14.875, 10.005, 6.367, 12.18, 11.69, 13.97, 4.14, 7.979, 11.114, 4.126, 10.028, 9.295, 10.078, 14.615, 7.055, 7.641, 9.037, 9.933, 10.077, 14.174, 14.645, 10.398, 10.238, 9.067, 4.841, 13.159, 15.829, 8.464, 7.47, 11.858, 9.885, 11.978, 5.418, 12.19, 8.206, 10.755, 6.455, 10.019, 11.594, 9.082, 10.245, 12.321, 8.508, 9.711, 5.5, 15.001, 9.922, 7.864, 7.794, 10.546, 9.203, 8.798, 9.853] SIMPLE_RANGE_DATA = range(110) LAYER_RANGE_DATA = (list(range(100)) + list(range(20, 80)) + list(range(40, 60))) EMPTY_DATA = [] ALL_DATASETS = [EMPTY_DATA, LAYER_RANGE_DATA, SIMPLE_RANGE_DATA, NORM_DATA] def test_check_sum(): for data in ALL_DATASETS: for bin_size in [0, 1, 10, 99]: # bin_size=0 tests freedman stats = Stats(data) hist_counts = stats.get_histogram_counts() hist_counts_sum = sum([c for _, c in hist_counts]) assert len(data) == hist_counts_sum if not data: continue assert min(data) >= hist_counts[0][0] assert max(data) >= hist_counts[-1][0] return def test_norm_regression(): stats = Stats(NORM_DATA) assert stats.format_histogram(width=80) == NORM_DATA_FREEDMAN_OUTPUT assert stats.format_histogram(10, width=80) == NORM_DATA_TEN_BIN_OUTPUT subpar_bin_out = stats.format_histogram([12.0], width=80) assert subpar_bin_out == NORM_DATA_SINGLE_SUBPAR_BIN_OUTPUT format_bin_out = stats.format_histogram(5, width=80, format_bin=lambda b: '%sms' % b) assert format_bin_out == NORM_DATA_FORMAT_BIN_OUTPUT NORM_DATA_FREEDMAN_OUTPUT = """\ 3.7: 5 ################ 5.2: 10 ################################ 6.8: 9 ############################# 8.3: 21 ################################################################### 9.9: 22 ###################################################################### 11.4: 16 ################################################### 13.0: 10 ################################ 14.5: 7 ######################""" NORM_DATA_TEN_BIN_OUTPUT = """\ 3.7: 5 ############## 4.9: 5 ############## 6.1: 6 ################# 7.3: 12 ################################## 8.5: 11 ############################### 9.7: 25 ###################################################################### 10.9: 12 ################################## 12.1: 12 ################################## 13.4: 5 ############## 14.6: 7 ####################""" # make sure the minimum gets added NORM_DATA_SINGLE_SUBPAR_BIN_OUTPUT = """\ 3.7: 75 ###################################################################### 12.0: 25 #######################""" NORM_DATA_FORMAT_BIN_OUTPUT = """\ 3.7ms: 10 ################### 6.1ms: 18 ################################## 8.5ms: 36 #################################################################### 10.9ms: 24 ############################################# 13.4ms: 12 #######################""" def main(): print(Stats(NORM_DATA).format_histogram(10)) if __name__ == '__main__': main() boltons-25.0.0/tests/test_strutils.py000066400000000000000000000131321475005545200177310ustar00rootroot00000000000000import re import uuid from unittest import TestCase from boltons import strutils def test_strip_ansi(): assert strutils.strip_ansi( '\x1b[0m\x1b[1;36mart\x1b[46;34m\xdc') == 'art\xdc' assert strutils.strip_ansi( '\x1b[0m\x1b[1;36mart\x1b[46;34m\xdc') == 'artÜ' assert strutils.strip_ansi( '╒══════╕\n│ \x1b[1mCell\x1b[0m │\n╘══════╛') == ( '╒══════╕\n' '│ Cell │\n' '╘══════╛') assert strutils.strip_ansi( 'ls\r\n\x1B[00m\x1b[01;31mfile.zip\x1b[00m\r\n\x1b[01;31m') == \ 'ls\r\nfile.zip\r\n' assert strutils.strip_ansi( '\t\u001b[0;35mIP\u001b[0m\t\u001b[0;36m192.1.0.2\u001b[0m') == \ '\tIP\t192.1.0.2' assert strutils.strip_ansi('(╯°□°)╯︵ \x1b[1m┻━┻\x1b[0m') == ( '(╯°□°)╯︵ ┻━┻') assert strutils.strip_ansi('(╯°□°)╯︵ \x1b[1m┻━┻\x1b[0m') == ( '(╯°□°)╯︵ ┻━┻') assert strutils.strip_ansi( b'(\xe2\x95\xaf\xc2\xb0\xe2\x96\xa1\xc2\xb0)\xe2\x95\xaf\xef\xb8' b'\xb5 \x1b[1m\xe2\x94\xbb\xe2\x94\x81\xe2\x94\xbb\x1b[0m') == ( b'(\xe2\x95\xaf\xc2\xb0\xe2\x96\xa1\xc2\xb0)\xe2\x95\xaf\xef\xb8' b'\xb5 \xe2\x94\xbb\xe2\x94\x81\xe2\x94\xbb') assert strutils.strip_ansi( bytearray('(╯°□°)╯︵ \x1b[1m┻━┻\x1b[0m', 'utf-8')) == \ bytearray( b'(\xe2\x95\xaf\xc2\xb0\xe2\x96\xa1\xc2\xb0)\xe2\x95\xaf\xef\xb8' b'\xb5 \xe2\x94\xbb\xe2\x94\x81\xe2\x94\xbb') def test_asciify(): ref = 'Beyoncé' b = strutils.asciify(ref) assert len(b) == len(b) assert b[-1:].decode('ascii') == 'e' def test_indent(): to_indent = '\nabc\ndef\n\nxyz\n' ref = '\n abc\n def\n\n xyz\n' assert strutils.indent(to_indent, ' ') == ref def test_is_uuid(): assert strutils.is_uuid(uuid.uuid4()) == True assert strutils.is_uuid(uuid.uuid4(), version=1) == False assert strutils.is_uuid(str(uuid.uuid4())) == True assert strutils.is_uuid(str(uuid.uuid4()), version=1) == False assert strutils.is_uuid(set('garbage')) == False def test_parse_int_list(): assert strutils.parse_int_list("1,3,5-8,10-11,15") == [1, 3, 5, 6, 7, 8, 10, 11, 15] assert strutils.parse_int_list("1,3,5-8,10-11,15,") == [1, 3, 5, 6, 7, 8, 10, 11, 15] assert strutils.parse_int_list(",1,3,5-8,10-11,15") == [1, 3, 5, 6, 7, 8, 10, 11, 15] assert strutils.parse_int_list(" 1, 3 ,5-8,10-11,15 ") == [1, 3, 5, 6, 7, 8, 10, 11, 15] assert strutils.parse_int_list("3,1,5-8,10-11,15") == [1, 3, 5, 6, 7, 8, 10, 11, 15] assert strutils.parse_int_list("5-8") == [5, 6, 7, 8] assert strutils.parse_int_list("8-5") == [5, 6, 7, 8] def test_format_int_list(): assert strutils.format_int_list([1, 3, 5, 6, 7, 8, 10, 11, 15]) == '1,3,5-8,10-11,15' assert strutils.format_int_list([5, 6, 7, 8]) == '5-8' assert strutils.format_int_list([1, 3, 5, 6, 7, 8, 10, 11, 15], delim_space=True) == '1, 3, 5-8, 10-11, 15' assert strutils.format_int_list([5, 6, 7, 8], delim_space=True) == '5-8' class TestMultiReplace(TestCase): def test_simple_substitutions(self): """Test replacing multiple values.""" m = strutils.MultiReplace({r'cat': 'kedi', r'purple': 'mor', }) self.assertEqual(m.sub('The cat is purple'), 'The kedi is mor') def test_shortcut_function(self): """Test replacing multiple values.""" self.assertEqual( strutils.multi_replace( 'The cat is purple', {r'cat': 'kedi', r'purple': 'mor', } ), 'The kedi is mor' ) def test_substitutions_in_word(self): """Test replacing multiple values that are substrings of a word.""" m = strutils.MultiReplace({r'cat': 'kedi', r'purple': 'mor', }) self.assertEqual(m.sub('Thecatispurple'), 'Thekediismor') def test_sub_with_regex(self): """Test substitutions with a regular expression.""" m = strutils.MultiReplace({ r'cat': 'kedi', r'purple': 'mor', r'q\w+?t': 'dinglehopper' }, regex=True) self.assertEqual( m.sub('The purple cat ate a quart of jelly'), 'The mor kedi ate a dinglehopper of jelly' ) def test_sub_with_list(self): """Test substitutions from an iterable instead of a dictionary.""" m = strutils.MultiReplace([ (r'cat', 'kedi'), (r'purple', 'mor'), (r'q\w+?t', 'dinglehopper'), ], regex=True) self.assertEqual( m.sub('The purple cat ate a quart of jelly'), 'The mor kedi ate a dinglehopper of jelly' ) def test_sub_with_compiled_regex(self): """Test substitutions where some regular expressiosn are compiled.""" exp = re.compile(r'q\w+?t') m = strutils.MultiReplace([ (r'cat', 'kedi'), (r'purple', 'mor'), (exp, 'dinglehopper'), ]) self.assertEqual( m.sub('The purple cat ate a quart of jelly'), 'The mor kedi ate a dinglehopper of jelly' ) def test_substitutions_with_regex_chars(self): """Test replacing values that have special regex characters.""" m = strutils.MultiReplace({'cat.+': 'kedi', r'purple': 'mor', }) self.assertEqual(m.sub('The cat.+ is purple'), 'The kedi is mor') def test_roundzip(): aaa = b'a' * 10000 assert strutils.gunzip_bytes(strutils.gzip_bytes(aaa)) == aaa assert strutils.gunzip_bytes(strutils.gzip_bytes(b'')) == b'' boltons-25.0.0/tests/test_tableutils.py000066400000000000000000000026721475005545200202170ustar00rootroot00000000000000from boltons.tableutils import Table def test_table_lists(): data_lists = [['id', 'name'], [1, 'John Doe'], [2, 'Dale Simmons']] t1 = Table(data_lists) assert set(t1.headers) == {'id', 'name'} assert len(t1) == 2 assert 'John Doe' in repr(t1) T2_REF_HTML = """
id1
nameJohn Doe
""" T3_REF_HTML = """
idname
1John Doe
2Dale Simmons
3Kurt Rose
4None
""" def test_table_dicts(): data_dicts = [{'id': 1, 'name': 'John Doe'}, {'id': 2, 'name': 'Dale Simmons'}] t2 = Table.from_dict(data_dicts[0]) t3 = Table.from_dict(data_dicts) t3.extend([[3, 'Kurt Rose'], [4]]) assert set(t2.headers) == {'id', 'name'} assert len(t2) == 1 # the sorted() stuff handles ordering differences between versions # TODO: should maybe change Table to sort the headers of dicts and such? assert sorted(t2.to_html()) == sorted(T2_REF_HTML) assert sorted(t3.to_html()) == sorted(T3_REF_HTML) assert t3.to_text() def test_table_obj(): class TestType: def __init__(self): self.greeting = 'hi' t4 = Table.from_object(TestType()) assert len(t4) == 1 assert 'greeting' in t4.headers boltons-25.0.0/tests/test_tbutils.py000066400000000000000000000060541475005545200175330ustar00rootroot00000000000000import json import sys from io import StringIO from boltons.tbutils import (TracebackInfo, ExceptionInfo, print_exception, fix_print_exception, ContextualCallpoint, ContextualExceptionInfo) def test_exception_info(): # test ExceptionInfo and TracebackInfo and hooks, via StringIOs builtin_exc_hook = sys.excepthook fix_print_exception() tbi_str = '' def test(): raise ValueError('yay fun') fake_stderr1 = StringIO() fake_stderr2 = StringIO() sys.stderr = fake_stderr1 try: test() except: exc, _, exc_traceback = sys.exc_info() tbi = TracebackInfo.from_traceback(exc_traceback) exc_info = ExceptionInfo.from_exc_info(*sys.exc_info()) exc_info2 = ExceptionInfo.from_current() tbi_str = str(tbi) print_exception(*sys.exc_info(), file=fake_stderr2) new_exc_hook_res = fake_stderr2.getvalue() builtin_exc_hook(*sys.exc_info()) builtin_exc_hook_res = fake_stderr1.getvalue() finally: sys.stderr = sys.__stderr__ # Single frame single_frame_str = tbi.frames[-1].tb_frame_str() assert 'in test' in single_frame_str assert 'yay fun' in single_frame_str # Traceback info assert len(tbi_str.splitlines()) == 5 assert 'yay fun' in tbi_str # Full except hook output assert 'ValueError: yay fun' in new_exc_hook_res assert "ValueError('yay fun')" in new_exc_hook_res assert len(new_exc_hook_res) > len(tbi_str) if sys.version_info <= (3, 12): # output diverges with Python 3.13+, see https://github.com/mahmoud/boltons/issues/365 # TLDR tbutils only has minimal handling for anchors (e.g., ~~~~^^) assert new_exc_hook_res == builtin_exc_hook_res def test_contextual(): def func1(): return func2() def func2(): x = 5 return func3() def func3(): return ContextualCallpoint.from_current(level=2) callpoint = func1() assert callpoint.func_name == 'func2' line = str(callpoint.line) assert line.startswith(' ') assert line.strip() == 'return func3()' assert 'func2' in repr(callpoint) try: json.dumps(callpoint.to_dict()) except TypeError: raise AssertionError("to_dict result is not JSON serializable") def func_a(): a = 1 raise Exception('func_a exception') def func_b(): b = 2 return func_a() def func_c(): c = 3 return func_b() try: func_c() except Exception as e: ctx_ei = ContextualExceptionInfo.from_current() ctx_ei_str = ctx_ei.get_formatted() ctx_ei_lines = ctx_ei_str.splitlines() assert ctx_ei_lines[-1] == 'Exception: func_a exception' assert ctx_ei_lines[0] == 'Traceback (most recent call last):' assert len(ctx_ei_lines) == 10 assert "Exception('func_a exception')" in ctx_ei_str assert ctx_ei.tb_info.frames[2].local_reprs['b'] == '2' boltons-25.0.0/tests/test_tbutils_parsed_exc.py000066400000000000000000000063571475005545200217360ustar00rootroot00000000000000from boltons.tbutils import ParsedException def test_parsed_exc_basic(): _tb_str = """\ Traceback (most recent call last): File "example.py", line 2, in plarp NameError: name 'plarp' is not defined""" parsed_tb = ParsedException.from_string(_tb_str) print(parsed_tb) assert parsed_tb.exc_type == 'NameError' assert parsed_tb.exc_msg == "name 'plarp' is not defined" assert parsed_tb.frames == [{'source_line': 'plarp', 'filepath': 'example.py', 'lineno': '2', 'funcname': ''}] assert parsed_tb.to_string() == _tb_str def test_parsed_exc_nosrcline(): """just making sure that everything can be parsed even if there is a line without source and also if the exception has no message""" _tb_str = """\ Traceback (most recent call last): File "/home/mahmoud/virtualenvs/chert/bin/chert", line 9, in load_entry_point('chert==0.2.1.dev0', 'console_scripts', 'chert')() File "/home/mahmoud/projects/chert/chert/core.py", line 1281, in main ch.process() File "/home/mahmoud/projects/chert/chert/core.py", line 741, in process self.load() File "", line 2, in load File "/home/mahmoud/projects/lithoxyl/lithoxyl/logger.py", line 291, in logged_func return func_to_log(*a, **kw) File "/home/mahmoud/projects/chert/chert/core.py", line 775, in load raise RuntimeError RuntimeError""" parsed_tb = ParsedException.from_string(_tb_str) assert parsed_tb.exc_type == 'RuntimeError' assert parsed_tb.exc_msg == '' assert len(parsed_tb.frames) == 6 assert parsed_tb.frames[3] == {'source_line': '', 'filepath': '', 'lineno': '2', 'funcname': 'load'} assert parsed_tb.to_string() == _tb_str def test_parsed_exc_with_anchor(): """parse a traceback with anchor lines beneath source lines""" _tb_str = """\ Traceback (most recent call last): File "main.py", line 3, in print(add(1, "two")) ^^^^^^^^^^^^^ File "add.py", line 2, in add return a + b ~~^~~ TypeError: unsupported operand type(s) for +: 'int' and 'str'""" parsed_tb = ParsedException.from_string(_tb_str) assert parsed_tb.exc_type == 'TypeError' assert parsed_tb.exc_msg == "unsupported operand type(s) for +: 'int' and 'str'" assert parsed_tb.frames == [{'source_line': 'print(add(1, "two"))', 'filepath': 'main.py', 'lineno': '3', 'funcname': ''}, {'source_line': 'return a + b', 'filepath': 'add.py', 'lineno': '2', 'funcname': 'add'}] # Note: not checking the anchor lines (indices 3, 6) because column details not currently stored in ParsedException _tb_str_lines = _tb_str.splitlines() _tb_str_without_anchor = "\n".join(_tb_str_lines[:3] + _tb_str_lines[4:6] + _tb_str_lines[7:]) assert parsed_tb.to_string() == _tb_str_without_anchorboltons-25.0.0/tests/test_timeutils.py000066400000000000000000000037221475005545200200630ustar00rootroot00000000000000from datetime import timedelta, date import pytest from boltons.timeutils import daterange def test_daterange_years(): new_year = date(2017, 1, 1) bit_rollover = date(2038, 1, 19) new_years_remaining = daterange(new_year, bit_rollover, step=(1, 0, 0)) assert len(list(new_years_remaining)) == 22 y2025 = date(2025, 1, 1) bakers_years_til_2025 = list(daterange(new_year, y2025, step=(1, 1, 0))) assert len(bakers_years_til_2025) == 8 assert bakers_years_til_2025[-1] == date(2024, 8, 1) assert bakers_years_til_2025[-1] == date(2024, 8, 1) years_from_2025 = list(daterange(y2025, new_year, step=(-1, 0, 0), inclusive=True)) assert years_from_2025[0] == date(2025, 1, 1) assert years_from_2025[-1] == date(2017, 1, 1) def test_daterange_years_step(): start_day = date(year=2012, month=12, day=25) end_day = date(year=2016, month=1, day=1) dates = list(daterange(start_day, end_day, step=(1, 0, 0), inclusive=False)) expected = [date(year=2012, month=12, day=25), date(year=2013, month=12, day=25), date(year=2014, month=12, day=25), date(year=2015, month=12, day=25)] assert dates == expected dates = list(daterange(start_day, end_day, step=(0, 13, 0), inclusive=False)) expected = [date(year=2012, month=12, day=25), date(year=2014, month=1, day=25), date(year=2015, month=2, day=25)] assert dates == expected def test_daterange_infinite(): today = date.today() infinite_dates = daterange(today, None) for i in range(10): assert next(infinite_dates) == today + timedelta(days=i) def test_daterange_with_same_start_stop(): today = date.today() date_range = daterange(today, today) with pytest.raises(StopIteration): next(date_range) date_range_inclusive = daterange(today, today, inclusive=True) assert next(date_range_inclusive) == today with pytest.raises(StopIteration): next(date_range_inclusive) boltons-25.0.0/tests/test_typeutils.py000066400000000000000000000005731475005545200201070ustar00rootroot00000000000000import copy import pickle from boltons.typeutils import make_sentinel NOT_SET = make_sentinel('not_set', var_name='NOT_SET') def test_sentinel_falsiness(): assert not NOT_SET def test_sentinel_pickle(): assert pickle.dumps(NOT_SET) def test_sentinel_copy(): test = make_sentinel('test') assert test is copy.copy(test) assert test is copy.deepcopy(test) boltons-25.0.0/tests/test_urlutils.py000066400000000000000000000402431475005545200177260ustar00rootroot00000000000000import pytest from boltons import urlutils from boltons.urlutils import URL, _URL_RE, find_all_links # fully quoted urls that should round trip TEST_URLS = [ 'http://googlewebsite.com/e-shops.aspx', 'http://example.com:8080/search?q=123&business=Nothing%20Special', 'http://hatnote.com:9000?arg=1&arg=2&arg=3', 'https://xn--bcher-kva.ch', 'http://xn--ggbla1c4e.xn--ngbc5azd/', 'http://tools.ietf.org/html/rfc3986#section-3.4', 'http://wiki:pedia@hatnote.com', 'ftp://ftp.rfc-editor.org/in-notes/tar/RFCs0001-0500.tar.gz', 'http://[1080:0:0:0:8:800:200C:417A]/index.html', 'ssh://192.0.2.16:2222/', 'https://[::101.45.75.219]:80/?hi=bye', 'ldap://[::192.9.5.5]/dc=example,dc=com??sub?(sn=Jensen)', 'mailto:me@example.com?to=me@example.com&body=hi%20http://wikipedia.org', 'news:alt.rec.motorcycle', 'tel:+1-800-867-5309', 'urn:oasis:member:A00024:x', ('magnet:?xt=urn:btih:1a42b9e04e122b97a5254e3df77ab3c4b7da725f&dn=Puppy%' '20Linux%20precise-5.7.1.iso&tr=udp://tracker.openbittorrent.com:80&' 'tr=udp://tracker.publicbt.com:80&tr=udp://tracker.istole.it:6969&' 'tr=udp://tracker.ccc.de:80&tr=udp://open.demonii.com:1337'), # from twisted: "http://localhost", "http://localhost/", "http://localhost/foo", "http://localhost/foo/", "http://localhost/foo!!bar/", "http://localhost/foo%20bar/", "http://localhost/foo%2Fbar/", "http://localhost/foo?n", "http://localhost/foo?n=v", "http://localhost/foo?n=/a/b", "http://example.com/foo!@$bar?b!@z=123", "http://localhost/asd?a=asd%20sdf/345", "http://(%2525)/(%2525)?(%2525)&(%2525)=(%2525)#(%2525)", "http://(%C3%A9)/(%C3%A9)?(%C3%A9)&(%C3%A9)=(%C3%A9)#(%C3%A9)" ] @pytest.fixture(scope="module", params=TEST_URLS) def test_url(request): param = request.param return param def test_regex(test_url): match = _URL_RE.match(test_url) assert match.groupdict() def test_roundtrip(test_url): result = URL(test_url).to_text(full_quote=True) assert test_url == result def test_basic(): u1 = URL('http://googlewebsite.com/e-shops.aspx') assert isinstance(u1.to_text(), str) assert u1.host == 'googlewebsite.com' def test_utf8_url(): url_bytes = (b'http://\xd9\x85\xd8\xab\xd8\xa7\xd9\x84' b'.\xd8\xa2\xd8\xb2\xd9\x85\xd8\xa7' b'\xdb\x8c\xd8\xb4\xdb\x8c') url = URL(url_bytes) assert url.scheme == 'http' assert url.host == 'مثال.آزمایشی' def test_idna(): u1 = URL('http://bücher.ch') assert u1.host == 'bücher.ch' assert u1.to_text(full_quote=True) == 'http://xn--bcher-kva.ch' assert u1.to_text(full_quote=False) == 'http://bücher.ch' u2 = URL('https://xn--bcher-kva.ch') assert u2.host == 'bücher.ch' assert u2.to_text(full_quote=True) == 'https://xn--bcher-kva.ch' assert u2.to_text(full_quote=False) == 'https://bücher.ch' def test_query_params(test_url): url_obj = URL(test_url) if not url_obj.query_params or url_obj.fragment: return qp_text = url_obj.query_params.to_text(full_quote=True) assert test_url.endswith(qp_text) def test_iri_query(): url = URL('http://minerals.mountain.ore/?rock=\N{SHAMROCK}') assert url.query_params['rock'] == '\N{SHAMROCK}' assert url.query_params.to_text(full_quote=True).endswith('%E2%98%98') def test_iri_path(): url = URL('http://minerals.mountain.ore/rock/\N{SHAMROCK}/') assert url.path == '/rock/\N{SHAMROCK}/' assert url.to_text(full_quote=True).endswith('%E2%98%98/') def test_url_copy(): url = URL('http://example.com/foo?bar=baz') url_copy = URL(url) assert url == url_copy def test_invalid_port(): with pytest.raises(ValueError): URL('http://reader.googlewebsite.com:neverforget') def test_invalid_ipv6(): invalid_ipv6_ips = ['2001::0234:C1ab::A0:aabc:003F', '2001::1::3F'] for ip in invalid_ipv6_ips: with pytest.raises(ValueError): URL('http://[' + ip + ']') def test_parse_url(): expected = {'family': 2, 'password': None, 'fragment': None, 'authority': '127.0.0.1:3000', 'port': 3000, 'query': 'a=1', '_netloc_sep': '//', 'path': '/', 'scheme': 'http', 'host': '127.0.0.1', 'username': None} res = urlutils.parse_url('http://127.0.0.1:3000/?a=1') assert res == expected def test_parse_equals_in_qp_value(): u = URL('http://localhost/?=x=x=x') assert u.qp[''] == 'x=x=x' assert u.to_text() == 'http://localhost/?=x%3Dx%3Dx' u = URL('http://localhost/?foo=x=x=x&bar=y') assert u.qp['foo'] == 'x=x=x' assert u.qp['bar'] == 'y' def test_identical_equal(): u = URL('http://example.com/path?query=param#frag') assert u == u def test_equal(): u = URL('http://example.com/path?query=param#frag') bono = URL('http://example.com/path?query=param#frag') assert bono == u def test_not_equal(): u = URL('http://example.com/path?query=param1#frag') bono = URL('http://example.com/path?query=param2#frag') assert bono != u def _test_bad_utf8(): # not part of the API bad_bin_url = 'http://xn--9ca.com/%00%FF/%C3%A9' url = URL(bad_bin_url) expected = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/' '%00%FF/' '\N{LATIN SMALL LETTER E WITH ACUTE}') actual = url.to_text() assert expected == actual def test_userinfo(): url = URL('http://someuser:somepassword@example.com/some-segment@ignore') assert url.username == 'someuser' assert url.password == 'somepassword' assert url.to_text() == 'http://someuser:somepassword@example.com/some-segment@ignore' def test_quoted_userinfo(): url = URL('http://wikipedia.org') url.username = 'user' url.password = 'p@ss' assert url.to_text(full_quote=True) == 'http://user:p%40ss@wikipedia.org' url = URL('http://beyonc\xe9:b\xe9b@tmp.com') # assert url.to_text(full_quote=False) == u'http://beyoncé:b%C3%A9b@tmp.com' assert url.to_text(full_quote=True) == 'http://beyonc%C3%A9:b%C3%A9b@tmp.com' def test_mailto(): mt = 'mailto:mahmoud@hatnote.com' url = URL(mt) assert url.scheme == 'mailto' assert url.to_text() == mt # Examples from RFC 3986 section 5.4, Reference Resolution Examples # painstakingly copied from the lovingly transcribed version in # twisted's test_url, with inapplicable cases removed REL_URL_BASE = 'http://a/b/c/d;p?q' REL_URL_TEST_CASES = [ # "Normal" #('g:h', 'g:h'), # Not supported: scheme with relative path ('g', 'http://a/b/c/g'), ('./g', 'http://a/b/c/g'), ('g/', 'http://a/b/c/g/'), ('/g', 'http://a/g'), (';x', 'http://a/b/c/;x'), ('g;x', 'http://a/b/c/g;x'), ('', 'http://a/b/c/d;p?q'), ('.', 'http://a/b/c/'), ('./', 'http://a/b/c/'), ('..', 'http://a/b/'), ('../', 'http://a/b/'), ('../g', 'http://a/b/g'), ('../..', 'http://a/'), ('../../', 'http://a/'), ('../../g', 'http://a/g'), # Abnormal examples # ".." cannot be used to change the authority component of a URI. ('../../../g', 'http://a/g'), # TODO (rooted?) ('../../../../g', 'http://a/g'), # TODO (rooted)? # Only include "." and ".." when they are only part of a larger segment, # not by themselves. ('/./g', 'http://a/g'), ('/../g', 'http://a/g'), ('g.', 'http://a/b/c/g.'), ('.g', 'http://a/b/c/.g'), ('g..', 'http://a/b/c/g..'), ('..g', 'http://a/b/c/..g'), # Unnecessary or nonsensical forms of "." and "..". ('./../g', 'http://a/b/g'), ('./g/.', 'http://a/b/c/g/'), ('g/./h', 'http://a/b/c/g/h'), ('g/../h', 'http://a/b/c/h'), ('g;x=1/./y', 'http://a/b/c/g;x=1/y'), ('g;x=1/../y', 'http://a/b/c/y'), ] def test_rel_navigate(): for suffix, expected in REL_URL_TEST_CASES: url = URL(REL_URL_BASE) new_url = url.navigate(suffix) assert new_url.to_text() == expected new_url = url.navigate(URL(suffix)) assert new_url.to_text() == expected return def test_navigate(): orig_text = 'http://a.b/c/d?e#f' orig = URL(orig_text) navd = orig.navigate('') # fragment removed on empty navigate assert navd.to_text() == 'http://a.b/c/d?e' # query also removed on non-empty navigate (interp'd as rel path) navd = orig.navigate('dd') assert navd.to_text() == 'http://a.b/c/dd' # check trailing slash navd = orig.navigate('dd/') assert navd.to_text() == 'http://a.b/c/dd/' # path removed on absolute path navigate navd = orig.navigate('/C') assert navd.to_text() == 'http://a.b/C' # only query string navd = orig.navigate('?e=E&ee=EE') assert navd.to_text() == 'http://a.b/c/d?e=E&ee=EE' # only fragment navd = orig.navigate('#FFF') assert navd.to_text() == 'http://a.b/c/d?e#FFF' # an odd case, bears more consideration perhaps navd = orig.navigate('https:') assert navd.to_text() == 'https://a.b/c/d?e' # another odd one, host only navd = orig.navigate('//newhost') assert navd.to_text() == 'http://newhost/c/d?e' # absolute URLs (with scheme + host) replace everything _dest_text = 'http://hatnote.com' _dest = URL(_dest_text) navd = orig.navigate(_dest) assert _dest is not navd # make sure copies are made assert navd.to_text() == _dest_text navd = orig.navigate(_dest_text) assert navd.to_text() == _dest_text @pytest.mark.parametrize( ('expected', 'base', 'paths'), [ ('https://host/b', 'https://host', ('a', '/b', )), ('https://host/b', 'https://host', ('a', 'b', )), ('https://host/a/b', 'https://host', ('a/', 'b', )), ('https://host/b', 'https://host', ('/a', 'b', )), ('https://host/a/b', 'https://host/a/', (None, 'b', )), ('https://host/b', 'https://host/a', (None, 'b', )), ]) def test_chained_navigate(expected, base, paths): """Chained :meth:`navigate` calls produces correct results.""" url = URL(base) for path in paths: url = url.navigate(path) assert expected == url.to_text() # TODO: RFC3986 6.2.3 (not just for query add, either) # def test_add_query(): # url = URL('http://www.example.com') # url.qp['key'] = 'value' # assert url.to_text() == 'http://www.example.com/?key=value' def test_self_normalize(): url = URL('http://hatnote.com/a/../../b?k=v#hashtags') url.normalize() assert url.to_text() == 'http://hatnote.com/b?k=v#hashtags' def test_normalize_with_case(): # from RFC 3986 Section 6.2.2 url1 = URL('example://a/b/c/%7Bfoo%7D') url2 = URL('eXAMPLE://a/./b/../b/%63/%7bfoo%7d') assert url1 != url2 url1.normalize() url2.normalize() assert url1 == url2 def test_netloc_slashes(): # basic sanity checks url = URL('mailto:mahmoud@hatnote.com') assert url.scheme == 'mailto' assert url.to_text() == 'mailto:mahmoud@hatnote.com' url = URL('http://hatnote.com') assert url.scheme == 'http' assert url.to_text() == 'http://hatnote.com' # test that unrecognized schemes stay consistent with '//' url = URL('newscheme:a:b:c') assert url.scheme == 'newscheme' assert url.to_text() == 'newscheme:a:b:c' url = URL('newerscheme://a/b/c') assert url.scheme == 'newerscheme' assert url.to_text() == 'newerscheme://a/b/c' # test that reasonable guesses are made url = URL('git+ftp://gitstub.biz/glyph/lefkowitz') assert url.scheme == 'git+ftp' assert url.to_text() == 'git+ftp://gitstub.biz/glyph/lefkowitz' url = URL('what+mailto:freerealestate@enotuniq.org') assert url.scheme == 'what+mailto' assert url.to_text() == 'what+mailto:freerealestate@enotuniq.org' url = URL() url.scheme = 'ztp' url.path = '/x/y/z' assert url.to_text() == 'ztp:/x/y/z' # also works when the input doesn't include '//' url = URL() url.scheme = 'git+ftp' url.path = '/x/y/z/' assert url.to_text() == 'git+ftp:///x/y/z/' # really why would this ever come up but ok url = URL('file:///path/to/heck') url.scheme = 'mailto' assert url.to_text() == 'mailto:/path/to/heck' return # (link_text, expected_urls) # adapted from tornado test suite FAL_TESTS = [("hello http://world.com/!", ["http://world.com/"]), ("hello http://world.com/with?param=true&stuff=yes", ["http://world.com/with?param=true&stuff=yes"]), ("http://url.com/w(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", ["http://url.com/w"]), ("http://url.com/withmany.......................................", ["http://url.com/withmany"]), ("http://url.com/withmany((((((((((((((((((((((((((((((((((a)", ["http://url.com/withmany"]), # some examples from http://daringfireball.net/2009/11/liberal_regex_for_matching_urls ("http://foo.com/blah_blah", ["http://foo.com/blah_blah"]), ("http://foo.com/blah_blah/", ["http://foo.com/blah_blah/"]), ("(Something like http://foo.com/blah_blah)", ["http://foo.com/blah_blah"]), ("http://foo.com/blah_blah_(wikipedia)", ["http://foo.com/blah_blah_(wikipedia)"]), ("http://foo.com/blah_(blah)_(wikipedia)_blah", ["http://foo.com/blah_(blah)_(wikipedia)_blah"]), ("http://foo.com/blah_blah.", ["http://foo.com/blah_blah"]), ("http://foo.com/blah_blah/.", ["http://foo.com/blah_blah/"]), ("", ["http://foo.com/blah_blah"]), ("", ["http://foo.com/blah_blah/"]), ("http://foo.com/blah_blah,", ["http://foo.com/blah_blah"]), ("http://www.example.com/wpstyle/?p=364.", ["http://www.example.com/wpstyle/?p=364"]), ("rdar://1234", ["rdar://1234"]), ("rdar:/1234", ["rdar:/1234"]), ("http://userid:password@example.com:8080", ["http://userid:password@example.com:8080"]), ("http://userid@example.com", ["http://userid@example.com"]), ("message://%3c330e7f8409726r6a4ba78dkf1fd71420c1bf6ff@mail.gmail.com%3e", ["message://%3C330e7f8409726r6a4ba78dkf1fd71420c1bf6ff@mail.gmail.com%3e"]), ("http://\u27a1.ws/\u4a39", ["http://\u27a1.ws/\u4a39"]), ("http://example.com", ["http://example.com"]), ("Just a www.example.com link.", ["https://www.example.com"]), ("www.a-link.com", ["https://www.a-link.com"]), ("www.a-link.com and www.b-link.com/blogs extra", ["https://www.a-link.com", "https://www.b-link.com/blogs"]) ] def test_find_all_links_basic(): target = """hi my name is prince nigeria, please visit my website http://richprince.biz or if that's blocked try https://getprince.ly! Thanks for your attention.bye! PS if those ports are blocked, how about trying https://crownbux.afamilycompany:broken/affiliate PPS if all else fails you can always mailto:thePrince@machovelli.an """ urls = find_all_links(target) assert len(urls) == 2 def test_find_all_links(): prefix = "a little something before, " suffix = " a bit of another after." for content, expected_links in FAL_TESTS: text = prefix + content + suffix links = find_all_links(text) assert len(links) == len(expected_links) for link, expected in zip(links, expected_links): assert link.to_text(full_quote=False) == expected link_tokens = find_all_links(text, with_text=True) assert link_tokens[0].startswith(prefix) assert link_tokens[-1].endswith(suffix) def test_unicodey(): unicodey = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/' '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}' '?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}=' '\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}' '#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}') url = URL(unicodey) assert url.host == 'é.com' assert url.path_parts[1] == '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}' assert url.to_text(full_quote=False) == unicodey fully_quoted = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA' assert url.to_text(full_quote=True) == fully_quoted def test_str_repr(): assert str(URL("http://googlewebsite.com/e-shops.aspx")) == "http://googlewebsite.com/e-shops.aspx" boltons-25.0.0/tox.ini000066400000000000000000000003201475005545200145730ustar00rootroot00000000000000[tox] envlist = py37,py39,py310,py311,py312,py313,pypy3 [testenv] changedir = .tox deps = -rrequirements-test.txt commands = pytest --doctest-modules {envsitepackagesdir}/boltons {toxinidir}/tests {posargs}