pax_global_header00006660000000000000000000000064144442241360014516gustar00rootroot0000000000000052 comment=d9503c08f8f83ce8a89bfcc45fec136868756243 natsort-8.4.0/000077500000000000000000000000001444422413600132215ustar00rootroot00000000000000natsort-8.4.0/.coveragerc000066400000000000000000000006061444422413600153440ustar00rootroot00000000000000[report] # Regexes for lines to exclude from consideration exclude_lines = # Have to re-enable the standard pragma pragma: no cover # Don't complain if tests don't hit defensive assertion code: raise AssertionError raise NotImplementedError raise$ # Don't complain if non-runnable code isn't run: if 0: if __name__ == .__main__.: ignore_errors = True natsort-8.4.0/.github/000077500000000000000000000000001444422413600145615ustar00rootroot00000000000000natsort-8.4.0/.github/ISSUE_TEMPLATE/000077500000000000000000000000001444422413600167445ustar00rootroot00000000000000natsort-8.4.0/.github/ISSUE_TEMPLATE/bug_report.md000066400000000000000000000013761444422413600214450ustar00rootroot00000000000000--- name: Bug report about: Report unexpected behavior, a crash, or incorrect results --- **Describe the bug** A clear and concise description of what the bug is. **Expected behavior** A clear and concise description of what you expected to happen. **Environment (please complete the following information):** - Python Version: [e.g. 3.6] - OS [e.g. Windows, Fedora] - If the bug involves `LOCALE` or `humansorted`: - Is `PyICU` installed? - Do you have a locale set? If so, to what? **To Reproduce** Include a Minimum, Complete, Verifiable Example. If there is a traceback (or error message), **please** include the *entire* traceback (or error message), even if you think it is too big. See https://stackoverflow.com/help/mcve for an explanation. natsort-8.4.0/.github/ISSUE_TEMPLATE/feature_request.md000066400000000000000000000011231444422413600224660ustar00rootroot00000000000000--- name: Feature request about: Suggest or request an enhancement --- **Describe the feature or enhancement** Be as descriptive and precise as possible. **Provide a concrete example of how the feature or enhancement will improve `natsort`** Code examples are an excellent way to show how this feature or enhancement will help. To make your case stronger, show the current workaround due to the lack of the feature. What is the return-on-investment for including the feature or enhancement? **Would you be willing to submit a Pull Request for this feature?** Extra help is *always* welcome. natsort-8.4.0/.github/ISSUE_TEMPLATE/question.md000066400000000000000000000003711444422413600211360ustar00rootroot00000000000000--- name: Question about: Inquiry about natsort --- - [ ] I have read the [`natsort` documentation](https://natsort.readthedocs.io/en/master/) and the [README](https://github.com/SethMMorton/natsort#natsort), and my question is still not answered natsort-8.4.0/.github/workflows/000077500000000000000000000000001444422413600166165ustar00rootroot00000000000000natsort-8.4.0/.github/workflows/code-quality.yml000066400000000000000000000035161444422413600217460ustar00rootroot00000000000000name: Code Quality # Only run on branches (e.g. not tags) on: push: branches: - "*" pull_request: branches: - "*" jobs: formatting: name: Formatting runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.8' - name: Install black run: pip install black - name: Run black run: black --quiet --check --diff . static-analysis: name: Static Analysis runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.8' - name: Install Flake8 run: pip install flake8 flake8-import-order flake8-bugbear pep8-naming - name: Run Flake8 run: flake8 type-checking: name: Type Checking runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.8' - name: Install MyPy run: pip install mypy hypothesis pytest pytest-mock fastnumbers - name: Run MyPy run: mypy --strict natsort tests package-validation: name: Package Validation runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.8' - name: Install Validators run: pip install twine check-manifest - name: Run Validation run: | check-manifest --ignore ".github*,*.md,.coveragerc" python setup.py sdist twine check dist/* natsort-8.4.0/.github/workflows/deploy.yml000066400000000000000000000012401444422413600206320ustar00rootroot00000000000000name: Deploy # Only run on tagged commits on: push: tags: - "*" jobs: deploy: name: Deploy runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: 3.9 - name: Build Source Distribution and Wheel run: | pip install wheel python setup.py sdist --format=gztar pip wheel . -w dist - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.pypi_token_password }} natsort-8.4.0/.github/workflows/tests.yml000066400000000000000000000040241444422413600205030ustar00rootroot00000000000000name: Tests # Only run on branches (e.g. not tags) on: push: branches: - "*" pull_request: branches: - "*" jobs: tests: name: Tests runs-on: ${{ matrix.os }} strategy: matrix: python-version: [3.7, 3.8, 3.9, "3.10", "3.11"] os: [ubuntu-latest] extras: [false] include: - {python-version: 3.9, os: windows-latest, extras: false} - {python-version: 3.9, os: macos-latest, extras: false} - {python-version: 3.9, os: ubuntu-latest, extras: true} steps: - name: Checkout code uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install Locales if: matrix.os == 'ubuntu-latest' run: | sudo apt-get update sudo apt-get install language-pack-de language-pack-en language-pack-cs - name: Install ICU if: matrix.extras run: sudo apt-get install libicu-dev - name: Install Dependencies run: | python -m pip install --upgrade pip python -m pip install tox tox-gh-actions codecov - name: Set Extras Environment if: matrix.extras run: echo WITH_EXTRAS=fast,icu >> $GITHUB_ENV - name: Run Tests run: tox - name: Generate Coverage Report run: coverage xml - name: Upload to CodeCov uses: codecov/codecov-action@v3 test-bsd: name: Test on FreeBSD runs-on: macos-12 steps: - name: Checkout code uses: actions/checkout@v3 - name: Install and Run Tests uses: vmactions/freebsd-vm@v0 with: prepare: | pkg install -y python3 run: | python3 -m venv .venv source .venv/bin/activate.csh pip install --upgrade pip pip install pytest pytest-mock hypothesis python -m pytest --hypothesis-profile=slow-tests natsort-8.4.0/.gitignore000066400000000000000000000006051444422413600152120ustar00rootroot00000000000000*.py[co] # Packages *.egg *.eggs *.egg-info dist build eggs parts bin var sdist develop-eggs .installed.cfg .python-version # We are using MANIFEST.in instead MANIFEST # Installer logs pip-log.txt # Unit test / coverage reports .hypothesis .coverage .tox .cache .pytest_cache .pytest .envrc .venv #Translations *.mo #Mr Developer .mr.developer.cfg # PyCharm .idea # VSCode .vscode natsort-8.4.0/CHANGELOG.md000066400000000000000000000657661444422413600150560ustar00rootroot00000000000000Unreleased --- [8.4.0] - 2023-06-19 --- ### Changed - The changelog now only explictly exists in the repository (issue [#162](https://github.com/SethMMorton/natsort/issues/162)) ### Fixed - Inputs with spaces near adjascent to the extension are now sorted correctly for `os_sorted` on Windows (issues [#165](https://github.com/SethMMorton/natsort/issues/165) and [#166](https://github.com/SethMMorton/natsort/pull/166)) [8.3.1] - 2023-03-01 --- ### Fixed - Broken test found on FreeBSD. **This change has no effect outside fixing tests** (issue [#161](https://github.com/SethMMorton/natsort/issues/161)) [8.3.0] - 2023-02-27 --- ### Added - The `PRESORT` option to the `ns` enum to attain consistent sort order in certain corner cases (issue [#149](https://github.com/SethMMorton/natsort/issues/149)) - Logic to ensure `None` and NaN are sorted in a consistent order (issue [#149](https://github.com/SethMMorton/natsort/issues/149)) - Explict Python 3.11 support ### Changed - Only convert to `str` if necessary in `os_sorted` ([@Dobatymo](https://github.com/Dobatymo), issues [#157](https://github.com/SethMMorton/natsort/issues/157) and [#158](https://github.com/SethMMorton/natsort/issues/158)) - Attempt to use new `fastnumbers` functionality if available - Move non-API documentation to the GitHub wiki ### Removed - Support for EOL Python 3.6 [8.2.0] - 2022-09-01 --- ### Changed - Auto-coerce `pathlib.Path` objects to `str` since it is the least astonishing behavior ([@Gilthans](https://github.com/Gilthans), issues [#152](https://github.com/SethMMorton/natsort/issues/152) and [#153](https://github.com/SethMMorton/natsort/issues/153)) - Reduce strictness of type hints to avoid over-constraining client code (issues [#154](https://github.com/SethMMorton/natsort/issues/154) and [#155](https://github.com/SethMMorton/natsort/issues/155)) [8.1.0] - 2022-01-30 --- ### Changed - When using `ns.PATH`, only split off a maximum of two suffixes from a file name (issues [#145](https://github.com/SethMMorton/natsort/issues/145) and [#146](https://github.com/SethMMorton/natsort/issues/146)). [8.0.2] - 2021-12-14 --- ### Fixed - Bug where sorting paths fail if one of the paths is '.' (issues [#142](https://github.com/SethMMorton/natsort/issues/142) and [#143](https://github.com/SethMMorton/natsort/issues/143)) [8.0.1] - 2021-12-10 --- ### Fixed - Compose unicode characters when using locale to ensure sorting is correct across all locales (issues [#140](https://github.com/SethMMorton/natsort/issues/140) and [#141](https://github.com/SethMMorton/natsort/issues/141)) [8.0.0] - 2021-11-03 --- - Re-release 7.2.0 as 8.0.0 because introduction of type hints can break CI builds (issue [#139](https://github.com/SethMMorton/natsort/issues/139)) [7.2.0] - 2021-11-02 (Yanked) --- ### Added - Type hints (contributions from [@thethiny](https://github.com/thethiny) and [@domdfcoding](https://github.com/domdfcoding), issues [#132](https://github.com/SethMMorton/natsort/issues/132), [#135](https://github.com/SethMMorton/natsort/issues/135), and [#138](https://github.com/SethMMorton/natsort/issues/138)) - Explicit testing for Python 3.10 ### Removed - Support for Python 3.4 and Python 3.5 [7.1.1] - 2021-01-24 --- ### Changed - Use GitHub Actions instead of Travis-CI (issue [#125](https://github.com/SethMMorton/natsort/issues/125)) - No longer pin testing dependencies (issue [#126](https://github.com/SethMMorton/natsort/issues/126)) ### Fixed - Correct a minor typo ([@madphysicist](https://github.com/madphysicist), issue [#127](https://github.com/SethMMorton/natsort/issues/127)) [7.1.0] - 2020-11-19 --- ### Added - ``os_sorted``, ``os_sort_keygen``, and ``os_sort_key`` to better support sorting like the file browser on the current operating system - this closes the long-standing issue [#41](https://github.com/SethMMorton/natsort/issues/41) - Support for Python 3.9 ([@swt2c](https://github.com/swt2c), issue [#119](https://github.com/SethMMorton/natsort/issues/119)) ### Changed - MacOS unit tests run on native Python - Treat `None` like `NaN` internally to avoid `TypeError` (issue [#117](https://github.com/SethMMorton/natsort/issues/117)) - No longer fail tests every time a new Python version is released (issue [#122](https://github.com/SethMMorton/natsort/issues/122)) ### Fixed - Various typos, missing figures, and out-of-date information in the "How it works" - Fix typo in CHANGELOG ([@graingert](https://github.com/graingert), issue [#113](https://github.com/SethMMorton/natsort/issues/113)) - Updated "How it works" to account for Pandas updates ([@kuraga](https://github.com/kuraga), issue [#116](https://github.com/SethMMorton/natsort/issues/116)) [7.0.1] - 2020-01-27 --- ### Fixed - Bug where that caused incorrect sorting when using locales that have a `"."` character as the thousands separator. [7.0.0] - 2020-01-08 --- ### Added - Ability to deploy directly from TravisCI ([@hugovk](https://github.com/hugovk), issue [#106](https://github.com/SethMMorton/natsort/issues/106)) - Release checklist in `RELEASING.md` ([@hugovk](https://github.com/hugovk), issue [#106](https://github.com/SethMMorton/natsort/issues/106)) ### Changed - Updated auxiliary shell scripts to be written in python, and added ability to call these from `tox` - Improved Travis-CI experience - Update testing dependency versions ### Removed - Support for Python 2 [6.2.0] - 2019-11-13 --- ### Added - Support for Python 3.8 ([@hugovk](https://github.com/hugovk), issue [#104](https://github.com/SethMMorton/natsort/issues/104)) ### Changed - `index_natsorted` internally now uses tuples for index-element pairs instead of lists - Added a TOC to the README - Python 3.4 is no longer included in testing ### Fixed - Pin testing dependencies to prevent CI breaking due to third-party library changes ### Removed - Introduction page in documentation [6.1.0] - 2019-11-09 --- ### Added - Expose `numeric_regex_chooser` as a public function for ease in making key functions - Example in the documentation on how to sort numbers with units - Automated testing support for macos and Windows (issue [#91](https://github.com/SethMMorton/natsort/issues/91)) ### Changed - Update CHANGELOG format to style from https://keepachangelog.com/ (issue [#92](https://github.com/SethMMorton/natsort/issues/92)) ### Fixed - Removed dependency on `sudo` in TravisCI configuration ([@hugovk](https://github.com/hugovk), issue [#99](https://github.com/SethMMorton/natsort/issues/99)) - Documentation typos ([@jdufresne](https://github.com/jdufresne), issue [#94](https://github.com/SethMMorton/natsort/issues/94)) ([@cpburnz](https://github.com/cpburnz), issue [#95](https://github.com/SethMMorton/natsort/issues/95)) [6.0.0] - 2019-02-04 --- ### Changed - Simply Travis-CI configuration ([@jdufresne](https://github.com/jdufresne), issue [#88](https://github.com/SethMMorton/natsort/issues/88)) ### Fixed - Fix README rendering in PyPI ([@altendky](https://github.com/altendky), issue [#89](https://github.com/SethMMorton/natsort/issues/89)) ### Removed - Drop support for Python 2.6 and 3.3 ([@jdufresne](https://github.com/jdufresne), issue [#70](https://github.com/SethMMorton/natsort/issues/70)) - Remove deprecated APIs (kwargs `number_type`, `signed`, `exp`, `as_path`, `py3_safe`; enums `ns.TYPESAFE`, `ns.DIGIT`, `ns.VERSION`; functions `versorted`, `index_versorted`) (issue [#81](https://github.com/SethMMorton/natsort/issues/81)) - Remove `pipenv` as a dependency for building (issue [#86](https://github.com/SethMMorton/natsort/issues/86)) [5.5.0] - 2018-11-18 --- ### Added - `CHANGELOG.rst` to the top-level of the repository (issue [#85](https://github.com/SethMMorton/natsort/issues/85)) ### Changed - Documentation, packaging, and CI cleanup ([@jdufresne](https://github.com/jdufresne), issues [#69](https://github.com/SethMMorton/natsort/issues/69), [#71](https://github.com/SethMMorton/natsort/issues/71)-[#80](https://github.com/SethMMorton/natsort/issues/80)) - Consolidate API documentation into a single page (issue [#82](https://github.com/SethMMorton/natsort/issues/82)) ### Deprecated - Formally deprecated old or misleading APIs (issue [#83](https://github.com/SethMMorton/natsort/issues/83)) ### Fixed - Add back support for very old versions of setuptools (issue [#84](https://github.com/SethMMorton/natsort/issues/84)) [5.4.1] - 2018-09-09 --- ### Changed - Code format and quality checking infrastructure (issue [#68](https://github.com/SethMMorton/natsort/issues/68)) ### Fixed - Error in a newly added test (issues [#65](https://github.com/SethMMorton/natsort/issues/65) and [#67](https://github.com/SethMMorton/natsort/issues/67)) [5.4.0] - 2018-09-06 --- ### Changed - Re-expose `natsort_key` as "public" and remove the associated `DeprecationWarning` - Better developer documentation - Refactor tests (issue [#66](https://github.com/SethMMorton/natsort/issues/66)) - Bump allowed [`fastnumbers`](https://github.com/SethMMorton/fastnumbers) version [5.3.3] - 2018-07-07 --- ### Added - Enable Python 3.7 support in Travis-CI (issue [#61](https://github.com/SethMMorton/natsort/issues/61)) ### Changed - Update docs with a FAQ and quick how-it-works (issue [#60](https://github.com/SethMMorton/natsort/issues/60)) ### Fixed - `StopIteration` error in the testing code [5.3.2] - 2018-05-17 --- ### Fixed - Bug that prevented install on old versions of `setuptools` (issues [#55](https://github.com/SethMMorton/natsort/issues/55) and [#56](https://github.com/SethMMorton/natsort/issues/56)) - Revert layout from `src/natsort/` back to `natsort/` to make user testing simpler (issues [#57](https://github.com/SethMMorton/natsort/issues/57) and [#58](https://github.com/SethMMorton/natsort/issues/58)) [5.3.1] - 2018-05-14 --- ### Added - [`bumpversion`](https://github.com/c4urself/bump2version) infrastructure - Extras can be installed by "[]" notation ### Changed - No bugfixes or features, just infrastructure and installation updates - Move to defining dependencies with `Pipfile` - Development layout is now `src/natsort/` instead of `natsort/` [5.3.0] - 2018-04-20 --- ### Added - Ability to consider unicode-decimal numbers as numbers (issues [#52](https://github.com/SethMMorton/natsort/issues/52) and [#54](https://github.com/SethMMorton/natsort/issues/54)) ### Fixed - Bug in assessing [`fastnumbers`](https://github.com/SethMMorton/fastnumbers) version at import-time ([@hholzgra](https://github.com/hholzgra), issues [#51](https://github.com/SethMMorton/natsort/issues/51) and [#53](https://github.com/SethMMorton/natsort/issues/53)) [5.2.0] - 2018-02-14 --- ### Added - `ns.NUMAFTER` to cause numbers to be placed after non-numbers (issues [#48](https://github.com/SethMMorton/natsort/issues/48) and [#49](https://github.com/SethMMorton/natsort/issues/49)) - `natcmp` function (Python 2 only) ([@rinslow](https://github.com/rinslow), issue [#47](https://github.com/SethMMorton/natsort/issues/47)) [5.1.1] - 2017-11-11 --- ### Added - Additional unicode number support for Python 3.7 - Information on how to install and test (issue [#46](https://github.com/SethMMorton/natsort/issues/46)) [5.1.0] - 2017-08-19 --- ### Changed - All Unicode input is now normalized (issue [#44](https://github.com/SethMMorton/natsort/issues/44) and #45](https://github.com/SethMMorton/natsort/issues/45)) ### Fixed - `StopIteration` warning on Python 3.6+ ([@lykinsbd](https://github.com/lykinsbd), issues [#42](https://github.com/SethMMorton/natsort/issues/42) and [#43](https://github.com/SethMMorton/natsort/issues/43)) [5.0.3] - 2017-04-30 --- - Improved development infrastructure - Migrated documentation to ReadTheDocs [5.0.2] - 2017-01-02 --- ### Added - Additional unicode number support for Python 3.6 - "how does it work?" section to the documentation ### Changed - Renamed several internal functions and variables to improve clarity - Improved documentation examples [5.0.1] - 2016-06-04 --- ### Added - The `ns` enum attributes can now be imported from the top-level namespace ### Fixed - Bug with the `from natsort import *` mechanism - Bug with using `natsort` with `python -OO` (issues [#38](https://github.com/SethMMorton/natsort/issues/38) and [#39](https://github.com/SethMMorton/natsort/issues/39)) [5.0.0] - 2016-05-08 --- ### Added - `chain_functions` function for convenience in creating a complex user-given `key` from several existing functions ### Changed - `ns.LOCALE`/`humansorted` now accounts for thousands separators (issue [#36](https://github.com/SethMMorton/natsort/issues/36)) - Refactored entire codebase to be more functional (as in use functions as units). Previously, the code was rather monolithic and difficult to follow. The goal is that with the code existing in smaller units, contributing will be easier (issue [#37](https://github.com/SethMMorton/natsort/issues/37)) - Increased speed of execution (came for free with the new functional approach because the new factory function paradigm eliminates most `if` branches during execution). For the most cases, the code is 30-40% faster than version 4.0.4. If using `ns.LOCALE` or `humansorted`, the code is 1100% faster than version 4.0.4 - Improved clarity of documentation with regards to locale-aware sorting ### Deprecated - `ns.TYPESAFE` option as it is now always on (due to a new iterator-based algorithm, the typesafe function is now cheap) [4.0.4] - 2015-11-01 --- ### Changed - Improved coverage of unit tests - Unit tests use new and improved hypothesis library ### Fixed - Compatibility issues with Python 3.5 [4.0.3] - 2015-06-25 --- ### Fixed - Bad install on last release (sorry guys!) (issue [#30](https://github.com/SethMMorton/natsort/issues/30)) [4.0.2] - 2015-06-24 --- ### Changed - Consolidated under-the-hood compatibility functionality ### Fixed - Python 2.6 and Python 3.2 compatibility. Unit testing is now performed for these versions ([@dpetzold](https://github.com/dpetzold), issue [#29](https://github.com/SethMMorton/natsort/issues/29)) [4.0.1] - 2015-06-04 --- ### Added - Support for sorting NaN by internally converting to -Infinity or +Infinity (issue [#27](https://github.com/SethMMorton/natsort/issues/27)) [4.0.0] - 2015-05-17 --- ### Changed - Made default behavior of `natsort` search for unsigned ints, rather than signed floats. This is a backwards-incompatible change but in 99% of use cases it should not require any end-user changes (issue [#20](https://github.com/SethMMorton/natsort/issues/20)) - Improved handling of locale-aware sorting on systems where the underlying locale library is broken (issue [#34](https://github.com/SethMMorton/natsort/issues/34))) - Greatly improved all unit tests by adding the `hypothesis` library [3.5.6] - 2015-04-06 --- ### Added - `UNGROUPLETTERS` algorithm to get the case-grouping behavior of an ordinal sort when using `LOCALE` (issue [#23](https://github.com/SethMMorton/natsort/issues/23)) - Convenience functions `decoder`, `as_ascii`, and `as_utf8` for dealing with bytes types [3.5.5] - 2015-04-04 --- ### Added - `realsorted` and `index_realsorted` functions for forward-compatibility with >= 4.0.0 ### Changed - Made explanation of when to use `TYPESAFE` more clear in the docs [3.5.4] - 2015-04-02 --- ### Fixed - Bug where a `TypeError` was raised if a string containing a leading number was sorted with alpha-only strings when `LOCALE` is used (issue [#22](https://github.com/SethMMorton/natsort/issues/22)) [3.5.3] - 2015-03-26 --- ### Changed - Documentation updates to better describe locale bug, and illustrate upcoming default behavior change - Internal improvements, including making test suite more granular ### Fixed - Bug where `--reverse-filter` option in shell script was not getting checked for correctness [3.5.2] - 2015-01-13 --- ### Added - A `pathlib.Path` object is converted to a `str` if `ns.PATH` is enabled (issue [#16](https://github.com/SethMMorton/natsort/issues/16)) [3.5.1] - 2014-09-25 --- ### Changed - Refactored modules so that only the public API was in `natsort.py` and `ns_enum.py` - Refactored all import statements to be absolute, not relative ### Fixed - Bug that caused list/tuples to fail when using `ns.LOWECASEFIRST` or `ns.IGNORECASE` (issue [#15](https://github.com/SethMMorton/natsort/issues/15)) [3.5.0] - 2014-09-02 --- ### Added - `alg` argument to the `natsort` functions. This argument accepts an enum that is used to indicate the options the user wishes to use. The `number_type`, `signed`, `exp`, `as_path`, and `py3_safe` options are being deprecated and will become (undocumented) keyword-only options in `natsort` version 4.0.0 - The `humansorted` convenience function as a convenience to locale-aware sorting - The user can now modify how `natsort` handles the case of non-numeric characters (issue [#14](https://github.com/SethMMorton/natsort/issues/14)) - The user can now instruct `natsort` to use locale-aware sorting, which allows `natsort` to perform true "human sorting" (issue [#14](https://github.com/SethMMorton/natsort/issues/14)) - Locale functionality to the shell script [3.4.1] - 2014-08-12 --- ### Changed - `natsort` will now use the [`fastnumbers`](https://github.com/SethMMorton/fastnumbers) module if it is installed. This gives up to an extra 30% boost in speed over the previous performance enhancements - Made documentation point to more `natsort` resources, and also added a new example in the examples section [3.4.0] - 2014-07-19 --- ### Added - `natsort_keygen` function that will generate a wrapped version of `natsort_key` that is easier to call. `natsort_key` is now set to deprecate at natsort version 4.0.0 - `as_path` option to `natsorted` & co. that will try to treat input strings as filepaths. This will help yield correct results for OS-generated inputs like `['/p/q/o.x', '/p/q (1)/o.x', '/p/q (10)/o.x', '/p/q/o (1).x']` (issue [#3](https://github.com/SethMMorton/natsort/issues/3)) - `order_by_index` function to help in using the output of `index_natsorted` and `index_versorted` - `reverse` option to `natsorted` & co. to make it's API more similar to the builtin 'sorted' - More unit tests - Auxiliary test code that helps in profiling and stress-testing - Support for coveralls.io ### Changed - Massive performance enhancements for string input (1.8x-2.0x), at the expense of reduction in speed for numeric input (~2.0x) - note that sorting numbers\ still only takes 0.6x the time of sorting strings - Entire codebase is now PyFlakes and PEP8 compliant - Reworked the documentation, moving most of it to PyPI's hosting platform ### Fixed - Bug that caused user's options to the `natsort_key` to not be passed on to recursive calls of `natsort_key` (issue [#12](https://github.com/SethMMorton/natsort/issues/12)) [3.3.0] - 2014-06-28 --- ### Added - `versorted` method for more convenient sorting of versions (issue [#11](https://github.com/SethMMorton/natsort/issues/11)) - Unit test coverage (99%) ### Changed - Updated command-line tool `--number_type` option with 'version' and 'ver' to make it more clear how to sort version numbers - Moved unit-testing mechanism from being docstring-based to actual unit tests in actual functions (issue [#10](https://github.com/SethMMorton/natsort/issues/10)) - Made docstrings for public functions mirror the README API - Connected `natsort` development to Travis-CI to help ensure quality releases [3.2.1] - 2014-06-20 --- ### Fixed - Re-"Fixed" unorderable types issue on Python 3.x - this workaround is for when the problem occurs in the middle of the string (issue [#7](https://github.com/SethMMorton/natsort/issues/7) again) [3.2.0] - 2014-05-07 --- ### Fixed - "Fixed" unorderable types issue on Python 3.x with a workaround that attempts to replicate the Python 2.x behavior by putting all the numbers (or strings that begin with numbers) first (issue [#7](https://github.com/SethMMorton/natsort/issues/7)) ### Removed - Now explicitly excluding `__pycache__` from releases by adding a prune statement to MANIFEST.in [3.1.2] - 2014-05-05 --- ### Added - `setup.cfg` to support universal wheels (issue [#6](https://github.com/SethMMorton/natsort/issues/6)) - Python 3.0 and Python 3.1 as requiring the argparse module [3.1.1] - 2014-03-01 --- ### Added - Ability to sort lists of lists (issue [#5](https://github.com/SethMMorton/natsort/issues/5)) ### Changed - Cleaned up import statements [3.1.0] - 2014-01-20 --- ### Added - `signed` and `exp` options to allow finer tuning of the sorting - Doctests - New shell script options that correspond to `signed` and `exp` - In the shell script the user can now specify multiple numbers to exclude or multiple ranges ### Changed - Entire codebase now works for both Python 2 and Python 3 without needing to run `2to3` - Updated all doctests - Further simplified the `natsort` base code by removing unneeded functions. - Simplified documentation where possible - Improved the shell script code - Made the shell script documentation less "path"-centric to make it clear it is not just for sorting file paths ### Removed - The shell script filesystem-based options because these can be achieved better though a pipeline by which to filter [3.0.2] - 2013-10-01 --- ### Changed - Made float, int, and digit searching algorithms all share the same base function - Made the `__version__` variable available when importing the module ### Fixed - Outdated comments [3.0.1] - 2013-08-15 --- ### Added - Support for unicode strings (issue [#2](https://github.com/SethMMorton/natsort/issues/2)) ### Fixed - Empty string removal function ### Removed - Extraneous `string2int` function [3.0.0] - 2013-07-13 --- ### Added - A `number_type` argument to the sorting functions to specify how liberal to be when deciding what a number is ### Changed - Reworked the documentation [2.2.0] - 2013-06-25 --- ### Added - `key` attribute to `natsorted` and `index_natsorted` so that it mimics the functionality of the built-in `sorted` (issue [#1](https://github.com/SethMMorton/natsort/issues/1)) - Tests to reflect the new functionality, as well as tests demonstrating how to get similar functionality using `natsort_key` [2.1.0] - 2012-12-05 --- ### Changed - Reorganized package - Now using a platform independent shell script generator (`entry_points` from distribute) - Can now execute `natsort` from command line with `python -m natsort` as well [2.0.2] - 2012-11-30 --- ### Added - The `use_2to3` option to `setup.py` - Include `distribute_setup.py` to the distribution - Dependency to the `argparse` module (for python2.6) [2.0.1] - 2012-11-21 --- ### Added - Tests into the natsort.py file itself ### Changed - Reorganized directory structure [2.0.0] - 2012-11-16 --- ### Added - Better README documentation - Doctests ### Changed - Sorting algorithm to support floats (including exponentials) and basic version number support [8.4.0]: https://github.com/SethMMorton/natsort/compare/8.3.1...8.4.0 [8.3.1]: https://github.com/SethMMorton/natsort/compare/8.3.0...8.3.1 [8.3.0]: https://github.com/SethMMorton/natsort/compare/8.2.0...8.3.0 [8.2.0]: https://github.com/SethMMorton/natsort/compare/8.1.0...8.2.0 [8.1.0]: https://github.com/SethMMorton/natsort/compare/8.0.2...8.1.0 [8.0.2]: https://github.com/SethMMorton/natsort/compare/8.0.1...8.0.2 [8.0.1]: https://github.com/SethMMorton/natsort/compare/8.0.0...8.0.1 [8.0.0]: https://github.com/SethMMorton/natsort/compare/7.2.0...8.0.0 [7.2.0]: https://github.com/SethMMorton/natsort/compare/7.1.1...7.2.0 [7.1.1]: https://github.com/SethMMorton/natsort/compare/7.1.0...7.1.1 [7.1.0]: https://github.com/SethMMorton/natsort/compare/7.0.1...7.1.0 [7.0.1]: https://github.com/SethMMorton/natsort/compare/7.0.0...7.0.1 [7.0.0]: https://github.com/SethMMorton/natsort/compare/6.2.0...7.0.0 [6.2.0]: https://github.com/SethMMorton/natsort/compare/6.1.0...6.2.0 [6.1.0]: https://github.com/SethMMorton/natsort/compare/6.0.0...6.1.0 [6.0.0]: https://github.com/SethMMorton/natsort/compare/5.5.0...6.0.0 [5.5.0]: https://github.com/SethMMorton/natsort/compare/5.4.1...5.5.0 [5.4.1]: https://github.com/SethMMorton/natsort/compare/5.4.0...5.4.1 [5.4.0]: https://github.com/SethMMorton/natsort/compare/5.3.3...5.4.0 [5.3.3]: https://github.com/SethMMorton/natsort/compare/5.3.2...5.3.3 [5.3.2]: https://github.com/SethMMorton/natsort/compare/5.3.1...5.3.2 [5.3.1]: https://github.com/SethMMorton/natsort/compare/5.3.0...5.3.1 [5.3.0]: https://github.com/SethMMorton/natsort/compare/5.2.0...5.3.0 [5.2.0]: https://github.com/SethMMorton/natsort/compare/5.1.1...5.2.0 [5.1.1]: https://github.com/SethMMorton/natsort/compare/5.1.0...5.1.1 [5.1.0]: https://github.com/SethMMorton/natsort/compare/5.0.3...5.1.0 [5.0.3]: https://github.com/SethMMorton/natsort/compare/5.0.2...5.0.3 [5.0.2]: https://github.com/SethMMorton/natsort/compare/5.0.1...5.0.2 [5.0.1]: https://github.com/SethMMorton/natsort/compare/5.0.0...5.0.1 [5.0.0]: https://github.com/SethMMorton/natsort/compare/4.0.4...5.0.0 [4.0.4]: https://github.com/SethMMorton/natsort/compare/4.0.3...4.0.4 [4.0.3]: https://github.com/SethMMorton/natsort/compare/4.0.2...4.0.3 [4.0.2]: https://github.com/SethMMorton/natsort/compare/4.0.1...4.0.2 [4.0.1]: https://github.com/SethMMorton/natsort/compare/4.0.0...4.0.1 [4.0.0]: https://github.com/SethMMorton/natsort/compare/3.5.6...4.0.0 [3.5.6]: https://github.com/SethMMorton/natsort/compare/3.5.5...3.5.6 [3.5.5]: https://github.com/SethMMorton/natsort/compare/3.5.4...3.5.5 [3.5.4]: https://github.com/SethMMorton/natsort/compare/3.5.3...3.5.4 [3.5.3]: https://github.com/SethMMorton/natsort/compare/3.5.2...3.5.3 [3.5.2]: https://github.com/SethMMorton/natsort/compare/3.5.1...3.5.2 [3.5.1]: https://github.com/SethMMorton/natsort/compare/3.5.0...3.5.1 [3.5.0]: https://github.com/SethMMorton/natsort/compare/3.4.1...3.5.0 [3.4.1]: https://github.com/SethMMorton/natsort/compare/3.4.0...3.4.1 [3.4.0]: https://github.com/SethMMorton/natsort/compare/3.3.0...3.4.0 [3.3.0]: https://github.com/SethMMorton/natsort/compare/3.2.1...3.3.0 [3.2.1]: https://github.com/SethMMorton/natsort/compare/3.2.0...3.2.1 [3.2.0]: https://github.com/SethMMorton/natsort/compare/3.1.2...3.2.0 [3.1.2]: https://github.com/SethMMorton/natsort/compare/3.1.1...3.1.2 [3.1.1]: https://github.com/SethMMorton/natsort/compare/3.1.0...3.1.1 [3.1.0]: https://github.com/SethMMorton/natsort/compare/3.0.2...3.1.0 [3.0.2]: https://github.com/SethMMorton/natsort/compare/3.0.1...3.0.2 [3.0.1]: https://github.com/SethMMorton/natsort/compare/3.0.0...3.0.1 [3.0.0]: https://github.com/SethMMorton/natsort/compare/2.2.0...3.0.0 [2.2.0]: https://github.com/SethMMorton/natsort/compare/2.1.0...2.2.0 [2.1.0]: https://github.com/SethMMorton/natsort/compare/2.0.2...2.1.0 [2.0.2]: https://github.com/SethMMorton/natsort/compare/2.0.1...2.0.2 [2.0.1]: https://github.com/SethMMorton/natsort/compare/2.0.0...2.0.1 [2.0.0]: https://github.com/SethMMorton/natsort/releases/tag/2.0.0 natsort-8.4.0/CODE_OF_CONDUCT.md000066400000000000000000000063101444422413600160200ustar00rootroot00000000000000# Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at drtuba78@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct.html][version] [homepage]: https://www.contributor-covenant.org/ [version]: https://www.contributor-covenant.org/version/1/4/code-of-conduct.html natsort-8.4.0/CONTRIBUTING.md000066400000000000000000000044701444422413600154570ustar00rootroot00000000000000# Contributing If you have an idea for how to improve `natsort`, please contribute! It can be as simple as a bug fix or documentation update, or as complicated as a more robust algorithm. Contributions that change the public API of `natsort` will have to ensure that the library does not become less usable after the contribution and is backwards-compatible (unless there is a good reason not to be). Located in the `dev/` folder is development collateral such as formatting and patching scripts. The only development collateral not in the `dev/` folder are those files that are expected to exist in the the top-level directory (such as `setup.py`, `tox.ini`, and CI configuration). All of these scripts can either be run with the python stdandard library, or have hooks in `tox`. I do not have strong opinions on how one should contribute, so I have copy/pasted some text verbatim from the [Contributor's Guide](http://docs.python-requests.org/en/latest/dev/contributing/) section of the [requests](https://github.com/kennethreitz/requests) library in lieu of coming up with my own. > ### Steps for Submitting Code > When contributing code, you'll want to follow this checklist: > - Fork the repository on GitHub. > - Run the tests to confirm they all pass on your system. If they don't, you'll need to investigate why they fail. If you're unable to diagnose this yourself, raise it as a bug report. > - Write tests that demonstrate your bug or feature. Ensure that they fail. > - Make your change. > - Run the entire test suite again, confirming that all tests pass including the ones you just added. > - Send a GitHub Pull Request to the main repository's main branch. GitHub Pull Requests are the expected method of code collaboration on this project. > ### Documentation Contributions > Documentation improvements are always welcome! The documentation files live in the docs/ directory of the codebase. They're written in [reStructuredText](http://docutils.sourceforge.net/rst.html), and use [Sphinx](http://sphinx-doc.org/index.html) to generate the full suite of documentation. > When contributing documentation, please do your best to follow the style of the documentation files. This means a soft-limit of 79 characters wide in your text files and a semi-formal, yet friendly and approachable, prose style. natsort-8.4.0/LICENSE000066400000000000000000000020471444422413600142310ustar00rootroot00000000000000Copyright (c) 2012-2023 Seth M. Morton Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. natsort-8.4.0/MANIFEST.in000066400000000000000000000003061444422413600147560ustar00rootroot00000000000000include LICENSE include CHANGELOG.md include tox.ini include RELEASING.md recursive-include mypy_stubs *.pyi graft dev graft docs graft natsort graft tests global-exclude *.py[cod] __pycache__ *.so natsort-8.4.0/README.rst000066400000000000000000000500411444422413600147100ustar00rootroot00000000000000natsort ======= .. image:: https://img.shields.io/pypi/v/natsort.svg :target: https://pypi.org/project/natsort/ .. image:: https://img.shields.io/pypi/pyversions/natsort.svg :target: https://pypi.org/project/natsort/ .. image:: https://img.shields.io/pypi/l/natsort.svg :target: https://github.com/SethMMorton/natsort/blob/main/LICENSE .. image:: https://github.com/SethMMorton/natsort/workflows/Tests/badge.svg :target: https://github.com/SethMMorton/natsort/actions .. image:: https://codecov.io/gh/SethMMorton/natsort/branch/main/graph/badge.svg :target: https://codecov.io/gh/SethMMorton/natsort .. image:: https://img.shields.io/pypi/dw/natsort.svg :target: https://pypi.org/project/natsort/ Simple yet flexible natural sorting in Python. - Source Code: https://github.com/SethMMorton/natsort - Downloads: https://pypi.org/project/natsort/ - Documentation: https://natsort.readthedocs.io/ - `Examples and Recipes`_ - `How Does Natsort Work?`_ - `API`_ - `Quick Description`_ - `Quick Examples`_ - `FAQ`_ - `Requirements`_ - `Optional Dependencies`_ - `Installation`_ - `How to Run Tests`_ - `How to Build Documentation`_ - `Dropped Deprecated APIs`_ - `History`_ **NOTE**: Please see the `Dropped Deprecated APIs`_ section for changes. Quick Description ----------------- When you try to sort a list of strings that contain numbers, the normal python sort algorithm sorts lexicographically, so you might not get the results that you expect: .. code-block:: pycon >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] >>> sorted(a) ['1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '2 ft 7 in', '7 ft 6 in'] Notice that it has the order ('1', '10', '2') - this is because the list is being sorted in lexicographical order, which sorts numbers like you would letters (i.e. 'b', 'ba', 'c'). `natsort`_ provides a function `natsorted()`_ that helps sort lists "naturally" ("naturally" is rather ill-defined, but in general it means sorting based on meaning and not computer code point). Using `natsorted()`_ is simple: .. code-block:: pycon >>> from natsort import natsorted >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] >>> natsorted(a) ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] `natsorted()`_ identifies numbers anywhere in a string and sorts them naturally. Below are some other things you can do with `natsort`_ (also see the `Examples and Recipes`_ for a quick start guide, or the `API`_ for complete details). **Note**: `natsorted()`_ is designed to be a drop-in replacement for the built-in `sorted()`_ function. Like `sorted()`_, `natsorted()`_ `does not sort in-place`. To sort a list and assign the output to the same variable, you must explicitly assign the output to a variable: .. code-block:: pycon >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] >>> natsorted(a) ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] >>> print(a) # 'a' was not sorted; "natsorted" simply returned a sorted list ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] >>> a = natsorted(a) # Now 'a' will be sorted because the sorted list was assigned to 'a' >>> print(a) ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] Please see `Generating a Reusable Sorting Key and Sorting In-Place`_ for an alternate way to sort in-place naturally. Quick Examples -------------- - `Sorting Versions`_ - `Sort Paths Like My File Browser (e.g. Windows Explorer on Windows)`_ - `Sorting by Real Numbers (i.e. Signed Floats)`_ - `Locale-Aware Sorting (or "Human Sorting")`_ - `Further Customizing Natsort`_ - `Sorting Mixed Types`_ - `Handling Bytes`_ - `Generating a Reusable Sorting Key and Sorting In-Place`_ - `Other Useful Things`_ Sorting Versions ++++++++++++++++ `natsort`_ does not actually *comprehend* version numbers. It just so happens that the most common versioning schemes are designed to work with standard natural sorting techniques; these schemes include ``MAJOR.MINOR``, ``MAJOR.MINOR.PATCH``, ``YEAR.MONTH.DAY``. If your data conforms to a scheme like this, then it will work out-of-the-box with `natsorted()`_ (as of `natsort`_ version >= 4.0.0): .. code-block:: pycon >>> a = ['version-1.9', 'version-2.0', 'version-1.11', 'version-1.10'] >>> natsorted(a) ['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0'] If you need to versions that use a more complicated scheme, please see `these version sorting examples`_. Sort Paths Like My File Browser (e.g. Windows Explorer on Windows) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Prior to `natsort`_ version 7.1.0, it was a common request to be able to sort paths like Windows Explorer. As of `natsort`_ 7.1.0, the function `os_sorted()`_ has been added to provide users the ability to sort in the order that their file browser might sort (e.g Windows Explorer on Windows, Finder on MacOS, Dolphin/Nautilus/Thunar/etc. on Linux). .. code-block:: python import os from natsort import os_sorted print(os_sorted(os.listdir())) # The directory sorted like your file browser might show Output will be different depending on the operating system you are on. For users **not** on Windows (e.g. MacOS/Linux) it is **strongly** recommended to also install `PyICU`_, which will help `natsort`_ give results that match most file browsers. If this is not installed, it will fall back on Python's built-in `locale`_ module and will give good results for most input, but will give poor results for special characters. Sorting by Real Numbers (i.e. Signed Floats) ++++++++++++++++++++++++++++++++++++++++++++ This is useful in scientific data analysis (and was the default behavior of `natsorted()`_ for `natsort`_ version < 4.0.0). Use the `realsorted()`_ function: .. code-block:: pycon >>> from natsort import realsorted, ns >>> # Note that when interpreting as signed floats, the below numbers are >>> # +5.10, -3.00, +5.30, +2.00 >>> a = ['position5.10.data', 'position-3.data', 'position5.3.data', 'position2.data'] >>> natsorted(a) ['position2.data', 'position5.3.data', 'position5.10.data', 'position-3.data'] >>> natsorted(a, alg=ns.REAL) ['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data'] >>> realsorted(a) # shortcut for natsorted with alg=ns.REAL ['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data'] Locale-Aware Sorting (or "Human Sorting") +++++++++++++++++++++++++++++++++++++++++ This is where the non-numeric characters are also ordered based on their meaning, not on their ordinal value, and a locale-dependent thousands separator and decimal separator is accounted for in the number. This can be achieved with the `humansorted()`_ function: .. code-block:: pycon >>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana'] >>> natsorted(a) ['Apple', 'Banana', 'apple14,689', 'apple15', 'banana'] >>> import locale >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') 'en_US.UTF-8' >>> natsorted(a, alg=ns.LOCALE) ['apple15', 'apple14,689', 'Apple', 'banana', 'Banana'] >>> from natsort import humansorted >>> humansorted(a) # shortcut for natsorted with alg=ns.LOCALE ['apple15', 'apple14,689', 'Apple', 'banana', 'Banana'] You may find you need to explicitly set the locale to get this to work (as shown in the example). Please see `locale issues`_ and the `Optional Dependencies`_ section below before using the `humansorted()`_ function. Further Customizing Natsort +++++++++++++++++++++++++++ If you need to combine multiple algorithm modifiers (such as ``ns.REAL``, ``ns.LOCALE``, and ``ns.IGNORECASE``), you can combine the options using the bitwise OR operator (``|``). For example, .. code-block:: pycon >>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana'] >>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) ['Apple', 'apple15', 'apple14,689', 'Banana', 'banana'] >>> # The ns enum provides long and short forms for each option. >>> ns.LOCALE == ns.L True >>> # You can also customize the convenience functions, too. >>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) == realsorted(a, alg=ns.L | ns.IC) True >>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) == humansorted(a, alg=ns.R | ns.IC) True All of the available customizations can be found in the documentation for `the ns enum`_. You can also add your own custom transformation functions with the ``key`` argument. These can be used with ``alg`` if you wish. .. code-block:: pycon >>> a = ['apple2.50', '2.3apple'] >>> natsorted(a, key=lambda x: x.replace('apple', ''), alg=ns.REAL) ['2.3apple', 'apple2.50'] Sorting Mixed Types +++++++++++++++++++ You can mix and match `int`_, `float`_, and `str`_ types when you sort: .. code-block:: pycon >>> a = ['4.5', 6, 2.0, '5', 'a'] >>> natsorted(a) [2.0, '4.5', '5', 6, 'a'] >>> # sorted(a) would raise an "unorderable types" TypeError Handling Bytes ++++++++++++++ `natsort`_ does not officially support the `bytes`_ type, but convenience functions are provided that help you decode to `str`_ first: .. code-block:: pycon >>> from natsort import as_utf8 >>> a = [b'a', 14.0, 'b'] >>> # natsorted(a) would raise a TypeError (bytes() < str()) >>> natsorted(a, key=as_utf8) == [14.0, b'a', 'b'] True >>> a = [b'a56', b'a5', b'a6', b'a40'] >>> # natsorted(a) would return the same results as sorted(a) >>> natsorted(a, key=as_utf8) == [b'a5', b'a6', b'a40', b'a56'] True Generating a Reusable Sorting Key and Sorting In-Place ++++++++++++++++++++++++++++++++++++++++++++++++++++++ Under the hood, `natsorted()`_ works by generating a custom sorting key using `natsort_keygen()`_ and then passes that to the built-in `sorted()`_. You can use the `natsort_keygen()`_ function yourself to generate a custom sorting key to sort in-place using the `list.sort()`_ method. .. code-block:: pycon >>> from natsort import natsort_keygen >>> natsort_key = natsort_keygen() >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] >>> natsorted(a) == sorted(a, key=natsort_key) True >>> a.sort(key=natsort_key) >>> a ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] All of the algorithm customizations mentioned in the `Further Customizing Natsort`_ section can also be applied to `natsort_keygen()`_ through the *alg* keyword option. Other Useful Things +++++++++++++++++++ - recursively descend into lists of lists - automatic unicode normalization of input data - `controlling the case-sensitivity`_ - `sorting file paths correctly`_ - `allow custom sorting keys`_ - `accounting for units`_ FAQ --- How do I debug `natsorted()`_? The best way to debug `natsorted()`_ is to generate a key using `natsort_keygen()`_ with the same options being passed to `natsorted()`_. One can take a look at exactly what is being done with their input using this key - it is highly recommended to `look at this issue describing how to debug`_ for *how* to debug, and also to review the `How Does Natsort Work?`_ page for *why* `natsort`_ is doing that to your data. If you are trying to sort custom classes and running into trouble, please take a look at https://github.com/SethMMorton/natsort/issues/60. In short, custom classes are not likely to be sorted correctly if one relies on the behavior of ``__lt__`` and the other rich comparison operators in their custom class - it is better to use a ``key`` function with `natsort`_, or use the `natsort`_ key as part of your rich comparison operator definition. `natsort`_ gave me results I didn't expect, and it's a terrible library! Did you try to debug using the above advice? If so, and you still cannot figure out the error, then please `file an issue`_. How *does* `natsort`_ work? If you don't want to read `How Does Natsort Work?`_, here is a quick primer. `natsort`_ provides a `key function`_ that can be passed to `list.sort()`_ or `sorted()`_ in order to modify the default sorting behavior. This key is generated on-demand with the key generator `natsort_keygen()`_. `natsorted()`_ is essentially a wrapper for the following code: .. code-block:: pycon >>> from natsort import natsort_keygen >>> natsort_key = natsort_keygen() >>> sorted(['1', '10', '2'], key=natsort_key) ['1', '2', '10'] Users can further customize `natsort`_ sorting behavior with the ``key`` and/or ``alg`` options (see details in the `Further Customizing Natsort`_ section). The key generated by `natsort_keygen()`_ *always* returns a `tuple`_. It does so in the following way (*some details omitted for clarity*): 1. Assume the input is a string, and attempt to split it into numbers and non-numbers using regular expressions. Numbers are then converted into either `int`_ or `float`_. 2. If the above fails because the input is not a string, assume the input is some other sequence (e.g. `list`_ or `tuple`_), and recursively apply the key to each element of the sequence. 3. If the above fails because the input is not iterable, assume the input is an `int`_ or `float`_, and just return the input in a `tuple`_. Because a `tuple`_ is always returned, a `TypeError`_ should not be common unless one tries to do something odd like sort an `int`_ against a `list`_. Shell script ------------ `natsort`_ comes with a shell script called `natsort`_, or can also be called from the command line with ``python -m natsort``. Check out the `shell script wiki documentation`_ for more details. Requirements ------------ `natsort`_ requires Python 3.7 or greater. Optional Dependencies --------------------- fastnumbers +++++++++++ The most efficient sorting can occur if you install the `fastnumbers`_ package (version >=2.0.0); it helps with the string to number conversions. `natsort`_ will still run (efficiently) without the package, but if you need to squeeze out that extra juice it is recommended you include this as a dependency. `natsort`_ will not require (or check) that `fastnumbers`_ is installed at installation. PyICU +++++ It is recommended that you install `PyICU`_ if you wish to sort in a locale-dependent manner, see this page on `locale issues`_ for an explanation why. Installation ------------ Use ``pip``! .. code-block:: console $ pip install natsort If you want to install the `Optional Dependencies`_, you can use the `"extras" notation`_ at installation time to install those dependencies as well - use ``fast`` for `fastnumbers`_ and ``icu`` for `PyICU`_. .. code-block:: console # Install both optional dependencies. $ pip install natsort[fast,icu] # Install just fastnumbers $ pip install natsort[fast] How to Run Tests ---------------- Please note that `natsort`_ is NOT set-up to support ``python setup.py test``. The recommended way to run tests is with `tox`_. After installing ``tox``, running tests is as simple as executing the following in the `natsort`_ directory: .. code-block:: console $ tox ``tox`` will create virtual a virtual environment for your tests and install all the needed testing requirements for you. You can specify a particular python version with the ``-e`` flag, e.g. ``tox -e py36``. Static analysis is done with ``tox -e flake8``. You can see all available testing environments with ``tox --listenvs``. How to Build Documentation -------------------------- If you want to build the documentation for `natsort`_, it is recommended to use ``tox``: .. code-block:: console $ tox -e docs This will place the documentation in ``build/sphinx/html``. Dropped Deprecated APIs ----------------------- In `natsort`_ version 6.0.0, the following APIs and functions were removed - ``number_type`` keyword argument (deprecated since 3.4.0) - ``signed`` keyword argument (deprecated since 3.4.0) - ``exp`` keyword argument (deprecated since 3.4.0) - ``as_path`` keyword argument (deprecated since 3.4.0) - ``py3_safe`` keyword argument (deprecated since 3.4.0) - ``ns.TYPESAFE`` (deprecated since version 5.0.0) - ``ns.DIGIT`` (deprecated since version 5.0.0) - ``ns.VERSION`` (deprecated since version 5.0.0) - ``versorted()`` (discouraged since version 4.0.0, officially deprecated since version 5.5.0) - ``index_versorted()`` (discouraged since version 4.0.0, officially deprecated since version 5.5.0) In general, if you want to determine if you are using deprecated APIs you can run your code with the following flag .. code-block:: console $ python -Wdefault::DeprecationWarning my-code.py By default `DeprecationWarnings`_ are not shown, but this will cause them to be shown. Alternatively, you can just set the environment variable ``PYTHONWARNINGS`` to "default::DeprecationWarning" and then run your code. Author ------ Seth M. Morton History ------- Please visit the changelog `on GitHub`_. .. _natsort: https://natsort.readthedocs.io/en/stable/index.html .. _natsorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.natsorted .. _natsort_keygen(): https://natsort.readthedocs.io/en/stable/api.html#natsort.natsort_keygen .. _realsorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.realsorted .. _humansorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.humansorted .. _os_sorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.os_sorted .. _the ns enum: https://natsort.readthedocs.io/en/stable/api.html#natsort.ns .. _fastnumbers: https://github.com/SethMMorton/fastnumbers .. _sorted(): https://docs.python.org/3/library/functions.html#sorted .. _list.sort(): https://docs.python.org/3/library/stdtypes.html#list.sort .. _key function: https://docs.python.org/3/howto/sorting.html#key-functions .. _locale: https://docs.python.org/3/library/locale.html .. _int: https://docs.python.org/3/library/functions.html#int .. _float: https://docs.python.org/3/library/functions.html#float .. _str: https://docs.python.org/3/library/stdtypes.html#str .. _bytes: https://docs.python.org/3/library/stdtypes.html#bytes .. _list: https://docs.python.org/3/library/stdtypes.html#list .. _tuple: https://docs.python.org/3/library/stdtypes.html#tuple .. _TypeError: https://docs.python.org/3/library/exceptions.html#TypeError .. _DeprecationWarnings: https://docs.python.org/3/library/exceptions.html#DeprecationWarning .. _"extras" notation: https://packaging.python.org/tutorials/installing-packages/#installing-setuptools-extras .. _PyICU: https://pypi.org/project/PyICU .. _tox: https://tox.readthedocs.io/en/latest/ .. _Examples and Recipes: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes .. _How Does Natsort Work?: https://github.com/SethMMorton/natsort/wiki/How-Does-Natsort-Work%3F .. _API: https://natsort.readthedocs.io/en/stable/api.html .. _on GitHub: https://github.com/SethMMorton/natsort/blob/main/CHANGELOG.md .. _file an issue: https://github.com/SethMMorton/natsort/issues/new .. _look at this issue describing how to debug: https://github.com/SethMMorton/natsort/issues/13#issuecomment-50422375 .. _controlling the case-sensitivity: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#controlling-case-when-sorting .. _sorting file paths correctly: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#sort-os-generated-paths .. _allow custom sorting keys: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#using-a-custom-sorting-key .. _accounting for units: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#accounting-for-units-when-sorting .. _these version sorting examples: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#sorting-more-expressive-versioning-schemes .. _locale issues: https://github.com/SethMMorton/natsort/wiki/Possible-Issues-with-natsort.humansorted-or-ns.LOCALE .. _shell script wiki documentation: https://github.com/SethMMorton/natsort/wiki/Shell-Scriptnatsort-8.4.0/RELEASING.md000066400000000000000000000022301444422413600150510ustar00rootroot00000000000000# Release Checklist - [ ] Get main to the appropriate code release state. [GitHub Actions](https://github.com/SethMMorton/natsort/actions) must be passing: [![Build Status](https://github.com/SethMMorton/natsort/workflows/Tests/badge.svg)](https://github.com/SethMMorton/natsort/actions) - [ ] Ensure that the `CHANGELOG.md` includes the changes made since last release. Please follow the style outlined in https://keepachangelog.com/. All new entries should be added into the "Unreleased" section. - [ ] Bump the version number. Specify either "major", "minor", or "patch": ```bash tox -e bump patch ``` This will take care of updating the `CHANGELOG.md` with the correct release information. - [ ] Push the bumped commit: ```bash git push ``` - [ ] Push the tag: ```bash git push --tags ``` - [ ] Check that the tagged [GitHub Actions build](https://github.com/SethMMorton/natsort/actions) has deployed correctly to [PyPI](https://pypi.org/project/natsort/#history). - [ ] Check installation: ```bash python -m pip uninstall -y natsort && python -m pip install -U natsort ``` natsort-8.4.0/dev/000077500000000000000000000000001444422413600137775ustar00rootroot00000000000000natsort-8.4.0/dev/README.md000066400000000000000000000014151444422413600152570ustar00rootroot00000000000000# Development Collateral This file contains some files useful for development. - `bump.py` - Execute `bumpversion` then post-processes the CHANGELOG to handle corner-cases that `bumpversion` cannot. Requires [`bump2version`](https://github.com/c4urself/bump2version), which is the maintained fork of [`bumpversion`](https://github.com/peritus/bumpversion). It is not really intended to be called directly, but instead through `tox -e bump`. - `clean.py` - This file cleans most files that are created during development. Run in the project home directory. It is not really intended to be called directly, but instead through `tox -e clean`. - `generate_new_unicode_numbers.py` is used to update `natsort/unicode_numeric_hex.py` when new Python versions are released. natsort-8.4.0/dev/bump.py000077500000000000000000000044161444422413600153240ustar00rootroot00000000000000#! /usr/bin/env python """ Cross-platform bump of version with special CHANGELOG modification. INTENDED TO BE CALLED FROM PROJECT ROOT, NOT FROM dev/! """ import subprocess import sys try: bump_type = sys.argv[1] except IndexError: sys.exit("Must pass 'bump_type' argument!") else: if bump_type not in ("major", "minor", "patch"): sys.exit('bump_type must be one of "major", "minor", or "patch"!') def git(cmd, *args): """Wrapper for calling git""" try: subprocess.run(["git", cmd, *args], check=True, text=True) except subprocess.CalledProcessError as e: print("Call to git failed!", file=sys.stderr) print("STDOUT:", e.stdout, file=sys.stderr) print("STDERR:", e.stderr, file=sys.stderr) sys.exit(e.returncode) def bumpversion(severity, *args, catch=False): """Wrapper for calling bumpversion""" cmd = ["bump2version", *args, severity] try: if catch: return subprocess.run( cmd, check=True, capture_output=True, text=True ).stdout else: subprocess.run(cmd, check=True, text=True) except subprocess.CalledProcessError as e: print("Call to bump2version failed!", file=sys.stderr) print("STDOUT:", e.stdout, file=sys.stderr) print("STDERR:", e.stderr, file=sys.stderr) sys.exit(e.returncode) # Do a dry run of the bump to find what the current version is and what it will become. data = bumpversion(bump_type, "--dry-run", "--list", catch=True) data = dict(x.split("=") for x in data.splitlines()) # Execute the bumpversion. bumpversion(bump_type) # Post-process the changelog with things that bumpversion is not good at updating. with open("CHANGELOG.md") as fl: changelog = fl.read().replace( "", "\n[{new}]: {url}/{current}...{new}".format( new=data["new_version"], current=data["current_version"], url="https://github.com/SethMMorton/natsort/compare", ), ) with open("CHANGELOG.md", "w") as fl: fl.write(changelog) # Finally, add the CHANGELOG.md changes to the previous commit. git("add", "CHANGELOG.md") git("commit", "--amend", "--no-edit") git("tag", "--force", data["new_version"], "HEAD") natsort-8.4.0/dev/clean.py000077500000000000000000000014141444422413600154360ustar00rootroot00000000000000#! /usr/bin/env python """ Cross-platform clean of working directory. INTENDED TO BE CALLED FROM PROJECT ROOT, NOT FROM dev/! """ import pathlib import shutil # Directories to obliterate dirs = [ pathlib.Path("build"), pathlib.Path("dist"), pathlib.Path(".pytest_cache"), pathlib.Path(".hypothesis"), pathlib.Path(".tox"), ] dirs += pathlib.Path.cwd().glob("*.egg-info") for d in dirs: if d.is_dir(): shutil.rmtree(d, ignore_errors=True) elif d.is_file(): d.unlink() # just in case there is a file. # Clean up any stray __pycache__. for d in pathlib.Path.cwd().rglob("__pycache__"): shutil.rmtree(d, ignore_errors=True) # Shouldn't be any .pyc left, but just in case for f in pathlib.Path.cwd().rglob("*.pyc"): f.unlink() natsort-8.4.0/dev/generate_new_unicode_numbers.py000077500000000000000000000024231444422413600222610ustar00rootroot00000000000000#! /usr/bin/env python # -*- coding: utf-8 -*- """ Generate the numeric hex list of unicode numerals """ import os import os.path import sys import unicodedata # This is intended to be called from project root. Enforce this. this_file = os.path.abspath(__file__) this_base = os.path.basename(this_file) cwd = os.path.abspath(os.getcwd()) desired_this_file = os.path.join(cwd, "dev", this_base) if this_file != desired_this_file: sys.exit(this_base + " must be called from project root") # We will write the new numeric hex collection to a natsort package file. target = os.path.join(cwd, "natsort", "unicode_numeric_hex.py") with open(target, "w") as fl: print( '''# -*- coding: utf-8 -*- """ Contains all possible non-ASCII unicode numbers. """ # Rather than determine what unicode characters are numeric on the fly which # would incur a startup runtime penalty, the hex values are hard-coded below. numeric_hex = (''', file=fl, ) # Write out each individual hex value. for i in range(0x110000): try: a = chr(i) except ValueError: break if a in "0123456789": continue if unicodedata.numeric(a, None) is not None: print(" 0x{:X},".format(i), file=fl) print(")", file=fl) natsort-8.4.0/docs/000077500000000000000000000000001444422413600141515ustar00rootroot00000000000000natsort-8.4.0/docs/api.rst000066400000000000000000000112001444422413600154460ustar00rootroot00000000000000.. default-domain:: py .. currentmodule:: natsort .. _api: natsort API =========== .. contents:: :local: Standard API ------------ :func:`~natsort.natsorted` ++++++++++++++++++++++++++ .. autofunction:: natsorted The :class:`~natsort.ns` enum +++++++++++++++++++++++++++++ .. autodata:: ns :annotation: :func:`~natsort.natsort_key` ++++++++++++++++++++++++++++ .. autofunction:: natsort_key :func:`~natsort.natsort_keygen` +++++++++++++++++++++++++++++++ .. autofunction:: natsort_keygen :func:`~natsort.os_sort_key` ++++++++++++++++++++++++++++ .. autofunction:: os_sort_key :func:`~natsort.os_sort_keygen` +++++++++++++++++++++++++++++++ .. autofunction:: os_sort_keygen Convenience Functions --------------------- :func:`~natsort.os_sorted` +++++++++++++++++++++++++++ .. autofunction:: os_sorted :func:`~natsort.realsorted` +++++++++++++++++++++++++++ .. autofunction:: realsorted :func:`~natsort.humansorted` ++++++++++++++++++++++++++++ .. autofunction:: humansorted :func:`~natsort.index_natsorted` ++++++++++++++++++++++++++++++++ .. autofunction:: index_natsorted :func:`~natsort.index_realsorted` +++++++++++++++++++++++++++++++++ .. autofunction:: index_realsorted :func:`~natsort.index_humansorted` ++++++++++++++++++++++++++++++++++ .. autofunction:: index_humansorted :func:`~natsort.order_by_index` +++++++++++++++++++++++++++++++ .. autofunction:: order_by_index .. _bytes_help: Help With Bytes +++++++++++++++ The official stance of :mod:`natsort` is to not support `bytes` for sorting; there is just too much that can go wrong when trying to automate conversion between `bytes` and `str`. But rather than completely give up on `bytes`, :mod:`natsort` provides three functions that make it easy to quickly decode `bytes` to `str` so that sorting is possible. .. autofunction:: decoder .. autofunction:: as_ascii .. autofunction:: as_utf8 .. _function_help: Help With Creating Function Keys ++++++++++++++++++++++++++++++++ If you need to create a complicated *key* argument to (for example) :func:`natsorted` that is actually multiple functions called one after the other, the following function can help you easily perform this action. It is used internally to :mod:`natsort`, and has been exposed publicly for the convenience of the user. .. autofunction:: chain_functions If you need to be able to search your input for numbers using the same definition as :mod:`natsort`, you can do so using the following function. Given your chosen algorithm (selected using the :class:`~natsort.ns` enum), the corresponding regular expression to locate numbers will be returned. .. autofunction:: numeric_regex_chooser Help With Type Hinting ++++++++++++++++++++++ If you need to explicitly specify the types that natsort accepts or returns in your code, the following types have been exposed for your convenience. +--------------------------------+----------------------------------------------------------------------------------------+ | Type | Purpose | +================================+========================================================================================+ |:attr:`natsort.NatsortKeyType` | Returned by :func:`natsort.natsort_keygen`, and type of :attr:`natsort.natsort_key` | +--------------------------------+----------------------------------------------------------------------------------------+ |:attr:`natsort.OSSortKeyType` | Returned by :func:`natsort.os_sort_keygen`, and type of :attr:`natsort.os_sort_key` | +--------------------------------+----------------------------------------------------------------------------------------+ |:attr:`natsort.KeyType` | Type of `key` argument to :func:`natsort.natsorted` and :func:`natsort.natsort_keygen` | +--------------------------------+----------------------------------------------------------------------------------------+ |:attr:`natsort.NatsortInType` | The input type of :attr:`natsort.NatsortKeyType` | +--------------------------------+----------------------------------------------------------------------------------------+ |:attr:`natsort.NatsortOutType` | The output type of :attr:`natsort.NatsortKeyType` | +--------------------------------+----------------------------------------------------------------------------------------+ |:attr:`natsort.NSType` | The type of the :class:`ns` enum | +--------------------------------+----------------------------------------------------------------------------------------+ natsort-8.4.0/docs/changelog.rst000066400000000000000000000001611444422413600166300ustar00rootroot00000000000000.. _changelog: Changelog ========= Please visit https://github.com/SethMMorton/natsort/blob/main/CHANGELOG.md. natsort-8.4.0/docs/conf.py000066400000000000000000000212061444422413600154510ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # natsort documentation build configuration file, created by # sphinx-quickstart on Thu Jul 17 21:01:29 2014. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.insert(0, os.path.abspath('.')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.intersphinx", "sphinx.ext.mathjax", "sphinx.ext.napoleon", ] autodoc_typehints = "none" # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix of source filenames. source_suffix = [".rst"] # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = "index" # General information about the project. project = "natsort" # noinspection PyShadowingBuiltins copyright = "2014, Seth M. Morton" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The full version, including alpha/beta/rc tags. release = "8.4.0" # The short X.Y version. version = ".".join(release.split(".")[0:2]) # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # exclude_patterns = ['solar/*'] # The reST default role (used for this markup: `text`) to use for all # documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" highlight_language = "python" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. # keep_warnings = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. on_rtd = os.environ.get("READTHEDOCS") == "True" if on_rtd: html_theme = "default" else: import sphinx_rtd_theme # noqa: F401 html_theme = "sphinx_rtd_theme" # html_theme = 'solar' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. html_theme_path = ["."] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ['_static'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. # html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = "natsortdoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ ("index", "natsort.tex", "natsort Documentation", "Seth M. Morton", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [("index", "natsort", "natsort Documentation", ["Seth M. Morton"], 1)] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( "index", "natsort", "natsort Documentation", "Seth M. Morton", "natsort", "One line description of project.", "Miscellaneous", ), ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # texinfo_no_detailmenu = False # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} natsort-8.4.0/docs/examples.rst000066400000000000000000000003451444422413600165230ustar00rootroot00000000000000.. default-domain:: py .. currentmodule:: natsort .. _examples: Examples and Recipes ==================== This page has been moved to the `natsort wiki `_. natsort-8.4.0/docs/howitworks.rst000066400000000000000000000006461444422413600171310ustar00rootroot00000000000000.. default-domain:: py .. currentmodule:: natsort .. _howitworks: How Does Natsort Work? ====================== This page has been moved to the `natsort wiki `_. Special Cases Everywhere! ------------------------- This page has been `moved as well `_. natsort-8.4.0/docs/index.rst000066400000000000000000000020051444422413600160070ustar00rootroot00000000000000.. natsort documentation master file, created by sphinx-quickstart on Thu Jul 17 21:01:29 2014. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. natsort: Simple yet flexible natural sorting in Python. ======================================================= - Source Code: https://github.com/SethMMorton/natsort - Downloads: https://pypi.org/project/natsort/ - Documentation: https://natsort.readthedocs.io/ Please see the `GitHub main page `_ for everything else, including - Quick description - Basic examples - FAQ - Requirements and optional dependencies - Installation instructions - Testing instructions - Deprecation schedule .. toctree:: :maxdepth: 2 :numbered: howitworks.rst examples.rst api.rst locale_issues.rst shell.rst changelog.rst Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` natsort-8.4.0/docs/locale_issues.rst000066400000000000000000000005431444422413600175370ustar00rootroot00000000000000.. default-domain:: py .. currentmodule:: natsort .. _locale_issues: Possible Issues with :func:`~natsort.humansorted` or ``ns.LOCALE`` ================================================================== This page has been moved to the `natsort wiki `_. natsort-8.4.0/docs/shell.rst000066400000000000000000000003061444422413600160110ustar00rootroot00000000000000.. default-domain:: py .. currentmodule:: natsort .. _shell: Shell Script ============ This page has been moved to the `natsort wiki `_. natsort-8.4.0/mypy_stubs/000077500000000000000000000000001444422413600154375ustar00rootroot00000000000000natsort-8.4.0/mypy_stubs/icu.pyi000066400000000000000000000011241444422413600167400ustar00rootroot00000000000000from typing import overload @overload def Locale() -> str: ... @overload def Locale(x: str) -> str: ... class UCollAttribute: NUMERIC_COLLATION: int class UCollAttributeValue: ON: int class DecimalFormatSymbols: kGroupingSeparatorSymbol: int kDecimalSeparatorSymbol: int def __init__(self, locale: str) -> None: ... def getSymbol(self, symbol: int) -> str: ... class Collator: @classmethod def createInstance(cls, locale: str) -> Collator: ... def getSortKey(self, source: str) -> bytes: ... def setAttribute(self, attr: int, value: int) -> None: ... natsort-8.4.0/natsort/000077500000000000000000000000001444422413600147135ustar00rootroot00000000000000natsort-8.4.0/natsort/__init__.py000066400000000000000000000021671444422413600170320ustar00rootroot00000000000000# -*- coding: utf-8 -*- from natsort.natsort import ( NatsortKeyType, OSSortKeyType, as_ascii, as_utf8, decoder, humansorted, index_humansorted, index_natsorted, index_realsorted, natsort_key, natsort_keygen, natsorted, numeric_regex_chooser, order_by_index, os_sort_key, os_sort_keygen, os_sorted, realsorted, ) from natsort.ns_enum import NSType, ns from natsort.utils import KeyType, NatsortInType, NatsortOutType, chain_functions __version__ = "8.4.0" __all__ = [ "natsort_key", "natsort_keygen", "natsorted", "humansorted", "realsorted", "index_natsorted", "index_humansorted", "index_realsorted", "order_by_index", "decoder", "as_ascii", "as_utf8", "ns", "chain_functions", "numeric_regex_chooser", "os_sort_key", "os_sort_keygen", "os_sorted", "NatsortKeyType", "OSSortKeyType", "KeyType", "NatsortInType", "NatsortOutType", "NSType", ] # Add the ns keys to this namespace for convenience. globals().update({name: value for name, value in ns.__members__.items()}) natsort-8.4.0/natsort/__main__.py000066400000000000000000000243521444422413600170130ustar00rootroot00000000000000# -*- coding: utf-8 -*- import argparse import sys from typing import Callable, Iterable, List, Optional, Pattern, Tuple, Union, cast import natsort from natsort.utils import regex_chooser Num = Union[float, int] NumIter = Iterable[Num] NumPair = Tuple[Num, Num] NumPairIter = Iterable[NumPair] NumConverter = Callable[[str], Num] class TypedArgs(argparse.Namespace): paths: bool filter: Optional[List[NumPair]] reverse_filter: Optional[List[NumPair]] exclude: List[Num] reverse: bool number_type: str nosign: bool sign: bool noexp: bool locale: bool entries: List[str] def __init__( self, filter: Optional[List[NumPair]] = None, reverse_filter: Optional[List[NumPair]] = None, exclude: Optional[List[Num]] = None, paths: bool = False, reverse: bool = False, ) -> None: """Used by testing only""" self.filter = filter self.reverse_filter = reverse_filter self.exclude = [] if exclude is None else exclude self.paths = paths self.reverse = reverse self.number_type = "int" self.signed = False self.exp = True self.locale = False def main(*arguments: str) -> None: """ Performs a natural sort on entries given on the command-line. Arguments are read from sys.argv. """ from argparse import ArgumentParser, RawDescriptionHelpFormatter from textwrap import dedent parser = ArgumentParser( description=dedent(cast(str, main.__doc__)), formatter_class=RawDescriptionHelpFormatter, ) parser.add_argument( "--version", action="version", version="%(prog)s {}".format(natsort.__version__), ) parser.add_argument( "-p", "--paths", default=False, action="store_true", help="Interpret the input as file paths. This is not " "strictly necessary to sort all file paths, but in cases " 'where there are OS-generated file paths like "Folder/" ' 'and "Folder (1)/", this option is needed to make the ' 'paths sorted in the order you expect ("Folder/" before ' '"Folder (1)/").', ) parser.add_argument( "-f", "--filter", nargs=2, type=float, metavar=("LOW", "HIGH"), action="append", help="Used for keeping only the entries that have a number " "falling in the given range.", ) parser.add_argument( "-F", "--reverse-filter", nargs=2, type=float, metavar=("LOW", "HIGH"), action="append", dest="reverse_filter", help="Used for excluding the entries that have a number " "falling in the given range.", ) parser.add_argument( "-e", "--exclude", type=float, action="append", help="Used to exclude an entry that contains a specific number.", ) parser.add_argument( "-r", "--reverse", action="store_true", default=False, help="Returns in reversed order.", ) parser.add_argument( "-t", "--number-type", "--number_type", dest="number_type", choices=("int", "float", "real", "f", "i", "r"), default="int", help='Choose the type of number to search for. "float" will search ' 'for floating-point numbers. "int" will only search for ' 'integers. "real" is a shortcut for "float" with --sign. ' '"i" is a synonym for "int", "f" is a synonym for ' '"float", and "r" is a synonym for "real".' "The default is %(default)s.", ) parser.add_argument( "--nosign", default=False, action="store_false", dest="signed", help='Do not consider "+" or "-" as part of a number, i.e. do not ' "take sign into consideration. This is the default.", ) parser.add_argument( "-s", "--sign", default=False, action="store_true", dest="signed", help='Consider "+" or "-" as part of a number, i.e. ' "take sign into consideration. The default is unsigned.", ) parser.add_argument( "--noexp", default=True, action="store_false", dest="exp", help="Do not consider an exponential as part of a number, i.e. 1e4, " 'would be considered as 1, "e", and 4, not as 10000. This only ' "effects the --number-type=float.", ) parser.add_argument( "-l", "--locale", action="store_true", default=False, help="Causes natsort to use locale-aware sorting. You will get the " "best results if you install PyICU.", ) parser.add_argument( "entries", nargs="*", default=sys.stdin, help="The entries to sort. Taken from stdin if nothing is given on " "the command line.", ) args = parser.parse_args(arguments or None, namespace=TypedArgs()) # Make sure the filter range is given properly. Does nothing if no filter args.filter = check_filters(args.filter) args.reverse_filter = check_filters(args.reverse_filter) # Remove trailing whitespace from all the entries entries = [e.strip() for e in args.entries] # Sort by directory then by file within directory and print. sort_and_print_entries(entries, args) def range_check(low: Num, high: Num) -> NumPair: """ Verify that that given range has a low lower than the high. Parameters ---------- low : {float, int} high : {float, int} Returns ------- tuple : low, high Raises ------ ValueError Low is greater than or equal to high. """ if low >= high: raise ValueError("low >= high") else: return low, high def check_filters(filters: Optional[NumPairIter]) -> Optional[List[NumPair]]: """ Execute range_check for every element of an iterable. Parameters ---------- filters : iterable The collection of filters to check. Each element must be a two-element tuple of floats or ints. Returns ------- The input as-is, or None if it evaluates to False. Raises ------ ValueError Low is greater than or equal to high for any element. """ if not filters: return None try: return [range_check(f[0], f[1]) for f in filters] except ValueError as err: raise ValueError("Error in --filter: " + str(err)) def keep_entry_range( entry: str, lows: NumIter, highs: NumIter, converter: NumConverter, regex: Pattern[str], ) -> bool: """ Check if an entry falls into a desired range. Every number in the entry will be extracted using *regex*, if any are within a given low to high range the entry will be kept. Parameters ---------- entry : str lows : iterable Collection of low values against which to compare the entry. highs : iterable Collection of high values against which to compare the entry. converter : callable Function to convert a string to a number. regex : regex object Regular expression to locate numbers in a string. Returns ------- True if the entry should be kept, False otherwise. """ return any( low <= converter(num) <= high for num in regex.findall(entry) for low, high in zip(lows, highs) ) def keep_entry_value( entry: str, values: NumIter, converter: NumConverter, regex: Pattern[str] ) -> bool: """ Check if an entry does not match a given value. Every number in the entry will be extracted using *regex*, if any match a given value the entry will not be kept. Parameters ---------- entry : str values : iterable Collection of values against which to compare the entry. converter : callable Function to convert a string to a number. regex : regex object Regular expression to locate numbers in a string. Returns ------- True if the entry should be kept, False otherwise. """ return not any(converter(num) in values for num in regex.findall(entry)) def sort_and_print_entries(entries: List[str], args: TypedArgs) -> None: """Sort the entries, applying the filters first if necessary.""" # Extract the proper number type. is_float = args.number_type in ("float", "real", "f", "r") signed = args.signed or args.number_type in ("real", "r") alg: int = ( natsort.ns.FLOAT * is_float | natsort.ns.SIGNED * signed | natsort.ns.NOEXP * (not args.exp) | natsort.ns.PATH * args.paths | natsort.ns.LOCALE * args.locale ) # Pre-remove entries that don't pass the filtering criteria # Make sure we use the same searching algorithm for filtering # as for sorting. do_filter = args.filter is not None or args.reverse_filter is not None if do_filter or args.exclude: inp_options = ( natsort.ns.FLOAT * is_float | natsort.ns.SIGNED * signed | natsort.ns.NOEXP * (not args.exp) ) regex = regex_chooser(inp_options) if args.filter is not None: lows, highs = ([f[0] for f in args.filter], [f[1] for f in args.filter]) entries = [ entry for entry in entries if keep_entry_range(entry, lows, highs, float, regex) ] if args.reverse_filter is not None: lows, highs = ( [f[0] for f in args.reverse_filter], [f[1] for f in args.reverse_filter], ) entries = [ entry for entry in entries if not keep_entry_range(entry, lows, highs, float, regex) ] if args.exclude: exclude = set(args.exclude) entries = [ entry for entry in entries if keep_entry_value(entry, exclude, float, regex) ] # Print off the sorted results for entry in natsort.natsorted(entries, reverse=args.reverse, alg=alg): print(entry) if __name__ == "__main__": try: main() except ValueError as a: sys.exit(str(a)) except KeyboardInterrupt: sys.exit(1) natsort-8.4.0/natsort/compat/000077500000000000000000000000001444422413600161765ustar00rootroot00000000000000natsort-8.4.0/natsort/compat/__init__.py000066400000000000000000000000001444422413600202750ustar00rootroot00000000000000natsort-8.4.0/natsort/compat/fake_fastnumbers.py000066400000000000000000000061151444422413600220720ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ This module is intended to replicate some of the functionality from the fastnumbers module in the event that module is not installed. """ import unicodedata from typing import Callable, FrozenSet, Union from natsort.unicode_numbers import decimal_chars _NAN_INF = [ "INF", "INf", "Inf", "inF", "iNF", "InF", "inf", "iNf", "NAN", "nan", "NaN", "nAn", "naN", "NAn", "nAN", "Nan", ] _NAN_INF.extend(["+" + x[:2] for x in _NAN_INF] + ["-" + x[:2] for x in _NAN_INF]) NAN_INF = frozenset(_NAN_INF) ASCII_NUMS = "0123456789+-" POTENTIAL_FIRST_CHAR = frozenset(decimal_chars + list(ASCII_NUMS + ".")) StrOrFloat = Union[str, float] StrOrInt = Union[str, int] def fast_float( x: str, key: Callable[[str], str] = lambda x: x, nan: float = float("inf"), _uni: Callable[[str, StrOrFloat], StrOrFloat] = unicodedata.numeric, _nan_inf: FrozenSet[str] = NAN_INF, _first_char: FrozenSet[str] = POTENTIAL_FIRST_CHAR, ) -> StrOrFloat: """ Convert a string to a float quickly, return input as-is if not possible. We don't need to accept all input that the real fast_int accepts because natsort is controlling what is passed to this function. Parameters ---------- x : str String to attempt to convert to a float. key : callable Single-argument function to apply to *x* if conversion fails. nan : float Value to return instead of NaN if NaN would be returned. Returns ------- *str* or *float* """ if x[0] in _first_char or x.lstrip()[:3] in _nan_inf: try: ret = float(x) return nan if ret != ret else ret except ValueError: try: return _uni(x, key(x)) if len(x) == 1 else key(x) except TypeError: # pragma: no cover return key(x) else: try: return _uni(x, key(x)) if len(x) == 1 else key(x) except TypeError: # pragma: no cover return key(x) def fast_int( x: str, key: Callable[[str], str] = lambda x: x, _uni: Callable[[str, StrOrInt], StrOrInt] = unicodedata.digit, _first_char: FrozenSet[str] = POTENTIAL_FIRST_CHAR, ) -> StrOrInt: """ Convert a string to a int quickly, return input as-is if not possible. We don't need to accept all input that the real fast_int accepts because natsort is controlling what is passed to this function. Parameters ---------- x : str String to attempt to convert to an int. key : callable Single-argument function to apply to *x* if conversion fails. Returns ------- *str* or *int* """ if x[0] in _first_char: try: return int(x) except ValueError: try: return _uni(x, key(x)) if len(x) == 1 else key(x) except TypeError: # pragma: no cover return key(x) else: try: return _uni(x, key(x)) if len(x) == 1 else key(x) except TypeError: # pragma: no cover return key(x) natsort-8.4.0/natsort/compat/fastnumbers.py000066400000000000000000000044161444422413600211060ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Interface for natsort to access fastnumbers functions without having to worry if it is actually installed. """ import re from typing import Callable, Iterable, Iterator, Tuple, Union StrOrFloat = Union[str, float] StrOrInt = Union[str, int] __all__ = ["try_float", "try_int"] def is_supported_fastnumbers( fastnumbers_version: str, minimum: Tuple[int, int, int] = (2, 0, 0) ) -> bool: match = re.match( r"^(\d+)\.(\d+)(\.(\d+))?([ab](\d+))?$", fastnumbers_version, flags=re.ASCII, ) if not match: raise ValueError( "Invalid fastnumbers version number '{}'".format(fastnumbers_version) ) (major, minor, patch) = match.group(1, 2, 4) return (int(major), int(minor), int(patch)) >= minimum # If the user has fastnumbers installed, they will get great speed # benefits. If not, we use the simulated functions that come with natsort. try: # noinspection PyPackageRequirements from fastnumbers import fast_float, fast_int, __version__ as fn_ver # Require >= version 2.0.0. if not is_supported_fastnumbers(fn_ver): raise ImportError # pragma: no cover # For versions of fastnumbers with mapping capability, use that if is_supported_fastnumbers(fn_ver, (5, 0, 0)): del fast_float, fast_int from fastnumbers import try_float, try_int except ImportError: from natsort.compat.fake_fastnumbers import fast_float, fast_int # type: ignore # Re-map the old-or-compatibility functions fast_float/fast_int to the # newer API of try_float/try_int. If we already imported try_float/try_int # then there is nothing to do. if "try_float" not in globals(): def try_float( # type: ignore[no-redef] # noqa: F811 x: Iterable[str], map: bool, nan: float = float("inf"), on_fail: Callable[[str], str] = lambda x: x, ) -> Iterator[StrOrFloat]: assert map is True return (fast_float(y, nan=nan, key=on_fail) for y in x) if "try_int" not in globals(): def try_int( # type: ignore[no-redef] # noqa: F811 x: Iterable[str], map: bool, on_fail: Callable[[str], str] = lambda x: x, ) -> Iterator[StrOrInt]: assert map is True return (fast_int(y, key=on_fail) for y in x) natsort-8.4.0/natsort/compat/locale.py000066400000000000000000000104411444422413600200070ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Interface for natsort to access locale functionality without having to worry about if it is using PyICU or the built-in locale. """ import sys from typing import Callable, Union, cast StrOrBytes = Union[str, bytes] TrxfmFunc = Callable[[str], StrOrBytes] # This string should be sorted after any other byte string because # it contains the max unicode character repeated 20 times. # You would need some odd data to come after that. null_string = "" null_string_max = chr(sys.maxunicode) * 20 # This variable could be str or bytes depending on the locale library # being used, so give the type-checker this information. null_string_locale: StrOrBytes null_string_locale_max: StrOrBytes # strxfrm can be buggy (especially on OSX and *possibly* some other # BSD-based systems), so prefer icu if available. try: # noqa: C901 import icu from locale import getlocale null_string_locale = b"" # This string should in theory be sorted after any other byte # string because it contains the max byte char repeated many times. # You would need some odd data to come after that. null_string_locale_max = b"x7f" * 50 def dumb_sort() -> bool: return False # If using icu, get the locale from the current global locale, def get_icu_locale() -> str: language_code, encoding = getlocale() if language_code is None or encoding is None: # pragma: no cover return icu.Locale() return icu.Locale(f"{language_code}.{encoding}") def get_strxfrm() -> TrxfmFunc: return icu.Collator.createInstance(get_icu_locale()).getSortKey def get_thousands_sep() -> str: sep = icu.DecimalFormatSymbols.kGroupingSeparatorSymbol return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep) def get_decimal_point() -> str: sep = icu.DecimalFormatSymbols.kDecimalSeparatorSymbol return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep) except ImportError: import locale from locale import strxfrm null_string_locale = null_string null_string_locale_max = null_string_max # On some systems, locale is broken and does not sort in the expected # order. We will try to detect this and compensate. def dumb_sort() -> bool: return strxfrm("A") < strxfrm("a") def get_strxfrm() -> TrxfmFunc: return strxfrm def get_thousands_sep() -> str: sep = cast(str, locale.localeconv()["thousands_sep"]) # If this locale library is broken, some of the thousands separator # characters are incorrectly blank. Here is a lookup table of the # corrections I am aware of. if dumb_sort(): language_code, encoding = locale.getlocale() if language_code is None or encoding is None: # No locale loaded, default to ',' return "," loc = f"{language_code}.{encoding}" return { "de_DE.ISO8859-15": ".", "es_ES.ISO8859-1": ".", "de_AT.ISO8859-1": ".", "de_at": "\xa0", "nl_NL.UTF-8": ".", "es_es": ".", "fr_CH.ISO8859-15": "\xa0", "fr_CA.ISO8859-1": "\xa0", "de_CH.ISO8859-1": ".", "fr_FR.ISO8859-15": "\xa0", "nl_NL.ISO8859-1": ".", "ca_ES.UTF-8": ".", "nl_NL.ISO8859-15": ".", "de_ch": "'", "ca_es": ".", "de_AT.ISO8859-15": ".", "ca_ES.ISO8859-1": ".", "de_AT.UTF-8": ".", "es_ES.UTF-8": ".", "fr_fr": "\xa0", "es_ES.ISO8859-15": ".", "de_DE.ISO8859-1": ".", "nl_nl": ".", "fr_ch": "\xa0", "fr_ca": "\xa0", "de_DE.UTF-8": ".", "ca_ES.ISO8859-15": ".", "de_CH.ISO8859-15": ".", "fr_FR.ISO8859-1": "\xa0", "fr_CH.ISO8859-1": "\xa0", "de_de": ".", "fr_FR.UTF-8": "\xa0", "fr_CA.ISO8859-15": "\xa0", }.get(loc, sep) else: return sep def get_decimal_point() -> str: return cast(str, locale.localeconv()["decimal_point"]) natsort-8.4.0/natsort/natsort.py000066400000000000000000000564321444422413600167710ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Along with ns_enum.py, this module contains all of the natsort public API. The majority of the "work" is defined in utils.py. """ import platform from functools import partial from operator import itemgetter from pathlib import PurePath from typing import ( Any, Callable, Iterable, Iterator, List, Optional, Sequence, Tuple, TypeVar, cast, ) import natsort.compat.locale from natsort import utils from natsort.ns_enum import NSType, NS_DUMB, ns from natsort.utils import NatsortInType, NatsortOutType # Common input and output types T = TypeVar("T") NatsortInTypeT = TypeVar("NatsortInTypeT", bound=NatsortInType) # The type that natsort_key returns NatsortKeyType = Callable[[NatsortInType], NatsortOutType] # Types for os_sorted OSSortKeyType = Callable[[NatsortInType], NatsortOutType] def decoder(encoding: str) -> Callable[[Any], Any]: """ Return a function that can be used to decode bytes to unicode. Parameters ---------- encoding : str The codec to use for decoding. This must be a valid unicode codec. Returns ------- decode_function A function that takes a single argument and attempts to decode it using the supplied codec. Any `UnicodeErrors` are raised. If the argument was not of `bytes` type, it is simply returned as-is. See Also -------- as_ascii as_utf8 Examples -------- >>> f = decoder('utf8') >>> f(b'bytes') == 'bytes' True >>> f(12345) == 12345 True >>> # On Python 3, without decoder this would return [b'a10', b'a2'] >>> natsorted([b'a10', b'a2'], key=decoder('utf8')) == [b'a2', b'a10'] True >>> # On Python 3, without decoder this would raise a TypeError. >>> natsorted([b'a10', 'a2'], key=decoder('utf8')) == ['a2', b'a10'] True """ return partial(utils.do_decoding, encoding=encoding) def as_ascii(s: Any) -> Any: """ Function to decode an input with the ASCII codec, or return as-is. Parameters ---------- s : object Returns ------- output If the input was of type `bytes`, the return value is a `str` decoded with the ASCII codec. Otherwise, the return value is identically the input. See Also -------- decoder """ return utils.do_decoding(s, "ascii") def as_utf8(s: Any) -> Any: """ Function to decode an input with the UTF-8 codec, or return as-is. Parameters ---------- s : object Returns ------- output If the input was of type `bytes`, the return value is a `str` decoded with the UTF-8 codec. Otherwise, the return value is identically the input. See Also -------- decoder """ return utils.do_decoding(s, "utf-8") def natsort_keygen( key: Optional[Callable[[Any], NatsortInType]] = None, alg: NSType = ns.DEFAULT ) -> Callable[[Any], NatsortOutType]: """ Generate a key to sort strings and numbers naturally. This key is designed for use as the `key` argument to functions such as the `sorted` builtin. The user may customize the generated function with the arguments to `natsort_keygen`, including an optional `key` function. Parameters ---------- key : callable, optional A key used to manipulate the input value before parsing for numbers. It is **not** applied recursively. It should accept a single argument and return a single value. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.INT`. Returns ------- out : function A function that parses input for natural sorting that is suitable for passing as the `key` argument to functions such as `sorted`. See Also -------- natsorted natsort_key Examples -------- `natsort_keygen` is a convenient way to create a custom key to sort lists in-place (for example).:: >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] >>> a.sort(key=natsort_keygen(alg=ns.REAL)) >>> a ['num-3', 'num2', 'num5.10', 'num5.3'] """ try: ns.DEFAULT | alg except TypeError: msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'" raise ValueError(msg + ", got {}".format(str(alg))) # Add the NS_DUMB option if the locale library is broken. if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort(): alg |= NS_DUMB # Set some variables that will be passed to the factory functions if alg & ns.NUMAFTER: if alg & ns.LOCALEALPHA: sep = natsort.compat.locale.null_string_locale_max else: sep = natsort.compat.locale.null_string_max pre_sep = natsort.compat.locale.null_string_max else: if alg & ns.LOCALEALPHA: sep = natsort.compat.locale.null_string_locale else: sep = natsort.compat.locale.null_string pre_sep = natsort.compat.locale.null_string regex = utils.regex_chooser(alg) # Create the functions that will be used to split strings. input_transform = utils.input_string_transform_factory(alg) component_transform = utils.string_component_transform_factory(alg) final_transform = utils.final_data_transform_factory(alg, sep, pre_sep) # Create the high-level parsing functions for strings, bytes, and numbers. string_func = utils.parse_string_factory( alg, sep, regex.split, input_transform, component_transform, final_transform ) if alg & ns.PATH: string_func = utils.parse_path_factory(string_func) bytes_func = utils.parse_bytes_factory(alg) num_func = utils.parse_number_or_none_factory(alg, sep, pre_sep) # Return the natsort key with the parsing path pre-chosen. return partial( utils.natsort_key, key=key, string_func=string_func, bytes_func=bytes_func, num_func=num_func, ) # Exposed for simplicity if one needs the default natsort key. natsort_key = natsort_keygen() natsort_key.__doc__ = """\ natsort_key(val) The default natural sorting key. This is the output of :func:`natsort_keygen` with default values. See Also -------- natsort_keygen """ def natsorted( seq: Iterable[T], key: Optional[Callable[[T], NatsortInType]] = None, reverse: bool = False, alg: NSType = ns.DEFAULT, ) -> List[T]: """ Sorts an iterable naturally. Parameters ---------- seq : iterable The input to sort. key : callable, optional A key used to determine how to sort each element of the iterable. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.INT`. Returns ------- out: list The sorted input. See Also -------- natsort_keygen : Generates the key that makes natural sorting possible. realsorted : A wrapper for ``natsorted(seq, alg=ns.REAL)``. humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``. index_natsorted : Returns the sorted indexes from `natsorted`. os_sorted : Sort according to your operating system's rules. Examples -------- Use `natsorted` just like the builtin `sorted`:: >>> a = ['num3', 'num5', 'num2'] >>> natsorted(a) ['num2', 'num3', 'num5'] """ if alg & ns.PRESORT: seq = sorted(seq, reverse=reverse, key=str) return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg)) def humansorted( seq: Iterable[T], key: Optional[Callable[[T], NatsortInType]] = None, reverse: bool = False, alg: NSType = ns.DEFAULT, ) -> List[T]: """ Convenience function to properly sort non-numeric characters. This is a wrapper around ``natsorted(seq, alg=ns.LOCALE)``. Parameters ---------- seq : iterable The input to sort. key : callable, optional A key used to determine how to sort each element of the sequence. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.LOCALE`. Returns ------- out : list The sorted input. See Also -------- index_humansorted : Returns the sorted indexes from `humansorted`. Notes ----- Please read :ref:`locale_issues` before using `humansorted`. Examples -------- Use `humansorted` just like the builtin `sorted`:: >>> a = ['Apple', 'Banana', 'apple', 'banana'] >>> natsorted(a) ['Apple', 'Banana', 'apple', 'banana'] >>> humansorted(a) ['apple', 'Apple', 'banana', 'Banana'] """ return natsorted(seq, key, reverse, alg | ns.LOCALE) def realsorted( seq: Iterable[T], key: Optional[Callable[[T], NatsortInType]] = None, reverse: bool = False, alg: NSType = ns.DEFAULT, ) -> List[T]: """ Convenience function to properly sort signed floats. A signed float in a string could be "a-5.7". This is a wrapper around ``natsorted(seq, alg=ns.REAL)``. The behavior of :func:`realsorted` for `natsort` version >= 4.0.0 was the default behavior of :func:`natsorted` for `natsort` version < 4.0.0. Parameters ---------- seq : iterable The input to sort. key : callable, optional A key used to determine how to sort each element of the sequence. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.REAL`. Returns ------- out : list The sorted input. See Also -------- index_realsorted : Returns the sorted indexes from `realsorted`. Examples -------- Use `realsorted` just like the builtin `sorted`:: >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] >>> natsorted(a) ['num2', 'num5.3', 'num5.10', 'num-3'] >>> realsorted(a) ['num-3', 'num2', 'num5.10', 'num5.3'] """ return natsorted(seq, key, reverse, alg | ns.REAL) def index_natsorted( seq: Iterable[T], key: Optional[Callable[[T], NatsortInType]] = None, reverse: bool = False, alg: NSType = ns.DEFAULT, ) -> List[int]: """ Determine the list of the indexes used to sort the input sequence. Sorts a sequence naturally, but returns a list of sorted the indexes and not the sorted list itself. This list of indexes can be used to sort multiple lists by the sorted order of the given sequence. Parameters ---------- seq : iterable The input to sort. key : callable, optional A key used to determine how to sort each element of the sequence. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.INT`. Returns ------- out : tuple The ordered indexes of the input. See Also -------- natsorted order_by_index Examples -------- Use index_natsorted if you want to sort multiple lists by the sorted order of one list:: >>> a = ['num3', 'num5', 'num2'] >>> b = ['foo', 'bar', 'baz'] >>> index = index_natsorted(a) >>> index [2, 0, 1] >>> # Sort both lists by the sort order of a >>> order_by_index(a, index) ['num2', 'num3', 'num5'] >>> order_by_index(b, index) ['baz', 'foo', 'bar'] """ newkey: Callable[[Tuple[int, T]], NatsortInType] if key is None: newkey = itemgetter(1) else: def newkey(x: Tuple[int, T]) -> NatsortInType: return cast(Callable[[T], NatsortInType], key)(itemgetter(1)(x)) # Pair the index and sequence together, then sort by element index_seq_pair = [(x, y) for x, y in enumerate(seq)] if alg & ns.PRESORT: index_seq_pair.sort(reverse=reverse, key=lambda x: str(itemgetter(1)(x))) index_seq_pair.sort(reverse=reverse, key=natsort_keygen(newkey, alg)) return [x for x, _ in index_seq_pair] def index_humansorted( seq: Iterable[T], key: Optional[Callable[[T], NatsortInType]] = None, reverse: bool = False, alg: NSType = ns.DEFAULT, ) -> List[int]: """ This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``. Parameters ---------- seq: iterable The input to sort. key: callable, optional A key used to determine how to sort each element of the sequence. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.LOCALE`. Returns ------- out : tuple The ordered indexes of the input. See Also -------- humansorted order_by_index Notes ----- Please read :ref:`locale_issues` before using `humansorted`. Examples -------- Use `index_humansorted` just like the builtin `sorted`:: >>> a = ['Apple', 'Banana', 'apple', 'banana'] >>> index_humansorted(a) [2, 0, 3, 1] """ return index_natsorted(seq, key, reverse, alg | ns.LOCALE) def index_realsorted( seq: Iterable[T], key: Optional[Callable[[T], NatsortInType]] = None, reverse: bool = False, alg: NSType = ns.DEFAULT, ) -> List[int]: """ This is a wrapper around ``index_natsorted(seq, alg=ns.REAL)``. Parameters ---------- seq: iterable The input to sort. key: callable, optional A key used to determine how to sort each element of the sequence. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.REAL`. Returns ------- out : tuple The ordered indexes of the input. See Also -------- realsorted order_by_index Examples -------- Use `index_realsorted` just like the builtin `sorted`:: >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] >>> index_realsorted(a) [1, 3, 0, 2] """ return index_natsorted(seq, key, reverse, alg | ns.REAL) def order_by_index( seq: Sequence[Any], index: Iterable[int], iter: bool = False ) -> Iterable[Any]: """ Order a given sequence by an index sequence. The output of `index_natsorted` is a sequence of integers (index) that correspond to how its input sequence **would** be sorted. The idea is that this index can be used to reorder multiple sequences by the sorted order of the first sequence. This function is a convenient wrapper to apply this ordering to a sequence. Parameters ---------- seq : sequence The sequence to order. index : iterable The iterable that indicates how to order `seq`. It should be the same length as `seq` and consist of integers only. iter : {{True, False}}, optional If `True`, the ordered sequence is returned as a iterator; otherwise it is returned as a list. The default is `False`. Returns ------- out : {{list, iterator}} The sequence ordered by `index`, as a `list` or as an iterator (depending on the value of `iter`). See Also -------- index_natsorted index_humansorted index_realsorted Examples -------- `order_by_index` is a convenience function that helps you apply the result of `index_natsorted`:: >>> a = ['num3', 'num5', 'num2'] >>> b = ['foo', 'bar', 'baz'] >>> index = index_natsorted(a) >>> index [2, 0, 1] >>> # Sort both lists by the sort order of a >>> order_by_index(a, index) ['num2', 'num3', 'num5'] >>> order_by_index(b, index) ['baz', 'foo', 'bar'] """ return (seq[i] for i in index) if iter else [seq[i] for i in index] def numeric_regex_chooser(alg: NSType) -> str: """ Select an appropriate regex for the type of number of interest. Parameters ---------- alg : ns enum Used to indicate the regular expression to select. Returns ------- regex : str Regular expression string that matches the desired number type. """ # Remove the leading and trailing parens return utils.regex_chooser(alg).pattern[1:-1] def _split_apply( v: Any, key: Optional[Callable[[T], NatsortInType]] = None, treat_base: bool = True ) -> Iterator[str]: if key is not None: v = key(v) if not isinstance(v, (str, PurePath)): v = str(v) return utils.path_splitter(v, treat_base=treat_base) # Choose the implementation based on the host OS if platform.system() == "Windows": from ctypes import wintypes, windll # type: ignore from functools import cmp_to_key _windows_sort_cmp = windll.Shlwapi.StrCmpLogicalW _windows_sort_cmp.argtypes = [wintypes.LPWSTR, wintypes.LPWSTR] _windows_sort_cmp.restype = wintypes.INT _winsort_key = cmp_to_key(_windows_sort_cmp) def os_sort_keygen( key: Optional[Callable[[Any], NatsortInType]] = None ) -> Callable[[Any], NatsortOutType]: return cast( Callable[[Any], NatsortOutType], lambda x: tuple(map(_winsort_key, _split_apply(x, key, treat_base=False))), ) else: # For UNIX-based platforms, ICU performs MUCH better than locale # at replicating the file explorer's sort order. We will use # ICU's ability to do basic natural sorting as it also better # replicates than what natsort does by default. # # However, if the user does not have ICU installed then fall back # on natsort's default handling for paths with locale turned on # which will give good results in most cases (e.g. when there aren't # a bunch of special characters). try: import icu except ImportError: # No ICU installed def os_sort_keygen( key: Optional[Callable[[Any], NatsortInType]] = None ) -> Callable[[Any], NatsortOutType]: return natsort_keygen(key=key, alg=ns.LOCALE | ns.PATH | ns.IGNORECASE) else: # ICU installed def os_sort_keygen( key: Optional[Callable[[Any], NatsortInType]] = None ) -> Callable[[Any], NatsortOutType]: loc = natsort.compat.locale.get_icu_locale() collator = icu.Collator.createInstance(loc) collator.setAttribute( icu.UCollAttribute.NUMERIC_COLLATION, icu.UCollAttributeValue.ON ) return lambda x: tuple(map(collator.getSortKey, _split_apply(x, key))) os_sort_keygen.__doc__ = """ Generate a sorting key to replicate your file browser's sort order See :func:`os_sorted` for description and caveats. Returns ------- out : function A function that parses input for OS path sorting that is suitable for passing as the `key` argument to functions such as `sorted`. See Also -------- os_sort_key os_sorted Notes ----- On Windows, this will implicitly coerce all inputs to str before collating. """ os_sort_key = os_sort_keygen() os_sort_key.__doc__ = """ os_sort_key(val) The default key to replicate your file browser's sort order This is the output of :func:`os_sort_keygen` with default values. See Also -------- os_sort_keygen """ def os_sorted( seq: Iterable[T], key: Optional[Callable[[T], NatsortInType]] = None, reverse: bool = False, presort: bool = False, ) -> List[T]: """ Sort elements in the same order as your operating system's file browser .. warning:: The resulting function will generate results that will be different depending on your platform. This is intentional. On Windows, this will sort with the same order as Windows Explorer. On MacOS/Linux, you will get different results depending on whether or not you have :mod:`pyicu` installed. - If you have :mod:`pyicu` installed, you will get results that are the same as (or very close to) the same order as your operating system's file browser. - If you do not have :mod:`pyicu` installed, then this will give the same results as if you used ``ns.LOCALE``, ``ns.PATH``, and ``ns.IGNORECASE`` with :func:`natsorted`. If you do not have special characters this will give correct results, but once special characters are added you should lower your expectations. It is *strongly* recommended to have :mod:`pyicu` installed on MacOS/Linux if you want correct sort results. It does *not* take into account if a path is a directory or a file when sorting. Parameters ---------- seq : iterable The input to sort. Each element must be of type str. key : callable, optional A key used to determine how to sort each element of the sequence. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. presort : {{True, False}}, optional Equivalent to adding ``ns.PRESORT``, see :class:`ns` for documentation. The default is `False`. Returns ------- out : list The sorted input. See Also -------- natsorted os_sort_keygen Notes ----- This will implicitly coerce all inputs to str before collating. """ if presort: seq = sorted(seq, reverse=reverse, key=str) return sorted(seq, reverse=reverse, key=os_sort_keygen(key)) natsort-8.4.0/natsort/ns_enum.py000066400000000000000000000164011444422413600167330ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ This module defines the "ns" enum for natsort is used to determine what algorithm natsort uses. """ import enum import itertools import typing _counter = itertools.count(0) class ns(enum.IntEnum): # noqa: N801 """ Enum to control the `natsort` algorithm. This class acts like an enum to control the `natsort` algorithm. The user may select several options simultaneously by or'ing the options together. For example, to choose ``ns.INT``, ``ns.PATH``, and ``ns.LOCALE``, you could do ``ns.INT | ns.LOCALE | ns.PATH``. Each function in the :mod:`natsort` package has an `alg` option that accepts this enum to allow fine control over how your input is sorted. Each option has a shortened 1- or 2-letter form. .. note:: Please read :ref:`locale_issues` before using ``ns.LOCALE``. Attributes ---------- INT, I (default) The default - parse numbers as integers. FLOAT, F Tell `natsort` to parse numbers as floats. UNSIGNED, U (default) Tell `natsort` to ignore any sign (i.e. "-" or "+") to the immediate left of a number. This is the default. SIGNED, S Tell `natsort` to take into account any sign (i.e. "-" or "+") to the immediate left of a number. REAL, R This is a shortcut for ``ns.FLOAT | ns.SIGNED``, which is useful when attempting to sort real numbers. NOEXP, N Tell `natsort` to not search for exponents as part of a float number. For example, with `NOEXP` the number "5.6E5" would be interpreted as `5.6`, `"E"`, and `5` instead of `560000`. NUMAFTER, NA Tell `natsort` to sort numbers after non-numbers. By default numbers will be ordered before non-numbers. PATH, P Tell `natsort` to interpret strings as filesystem paths, so they will be split according to the filesystem separator (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the file extension, if any. Without this, lists of file paths like ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted properly; 'Folder/' will be placed at the end, not at the front. It is the same as setting the old `as_path` option to `True`. COMPATIBILITYNORMALIZE, CN Use the "NFKD" unicode normalization form on input rather than the default "NFD". This will transform characters such as '⑦' into '7'. Please see https://stackoverflow.com/a/7934397/1399279, https://stackoverflow.com/a/7931547/1399279, and https://unicode.org/reports/tr15/ for full details into unicode normalization. LOCALE, L Tell `natsort` to be locale-aware when sorting. This includes both proper sorting of alphabetical characters as well as proper handling of locale-dependent decimal separators and thousands separators. This is a shortcut for ``ns.LOCALEALPHA | ns.LOCALENUM``. Your sorting results will vary depending on your current locale. LOCALEALPHA, LA Tell `natsort` to be locale-aware when sorting, but only for alphabetical characters. LOCALENUM, LN Tell `natsort` to be locale-aware when sorting, but only for decimal separators and thousands separators. IGNORECASE, IC Tell `natsort` to ignore case when sorting. For example, ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as ``['apple', 'Apple', 'Banana', 'banana']``. LOWERCASEFIRST, LF Tell `natsort` to put lowercase letters before uppercase letters when sorting. For example, ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as ``['apple', 'banana', 'Apple', 'Banana']`` (the default order would be ``['Apple', 'Banana', 'apple', 'banana']`` which is the order from a purely ordinal sort). Useless when used with `IGNORECASE`. Please note that if used with ``LOCALE``, this actually has the reverse effect and will put uppercase first (this is because ``LOCALE`` already puts lowercase first); you may use this to your advantage if you need to modify the order returned with ``LOCALE``. GROUPLETTERS, G Tell `natsort` to group lowercase and uppercase letters together when sorting. For example, ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as ``['Apple', 'apple', 'Banana', 'banana']``. Useless when used with `IGNORECASE`; use with `LOWERCASEFIRST` to reverse the order of upper and lower case. Generally not needed with `LOCALE`. CAPITALFIRST, C Only used when `LOCALE` is enabled. Tell `natsort` to put all capitalized words before non-capitalized words. This is essentially the inverse of `GROUPLETTERS`, and is the default Python sorting behavior without `LOCALE`. UNGROUPLETTERS, UG An alias for `CAPITALFIRST`. NANLAST, NL If an NaN shows up in the input, this instructs `natsort` to treat these as +Infinity and place them after all the other numbers. By default, an NaN be treated as -Infinity and be placed first. Note that this ``None`` is treated like NaN internally. PRESORT, PS Sort the input as strings before sorting with the `nasort` algorithm. This can help eliminate inconsistent sorting in cases where two different strings represent the same number. For example, "a1" and "a01" both are internally represented as ("a", "1), so without `PRESORT` the order of these two values would depend on the order they appeared in the input (because Python's `sorted` is a stable sorting algorithm). Notes ----- If you prefer to use `import natsort as ns` as opposed to `from natsort import natsorted, ns`, the `ns` options are available as top-level imports. >>> import natsort as ns >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] >>> ns.natsorted(a, alg=ns.REAL) == ns.natsorted(a, alg=ns.ns.REAL) True """ # The below are the base ns options. The values will be stored as powers # of two so bitmasks can be used to extract the user's requested options. FLOAT = F = 1 << next(_counter) SIGNED = S = 1 << next(_counter) NOEXP = N = 1 << next(_counter) PATH = P = 1 << next(_counter) LOCALEALPHA = LA = 1 << next(_counter) LOCALENUM = LN = 1 << next(_counter) IGNORECASE = IC = 1 << next(_counter) LOWERCASEFIRST = LF = 1 << next(_counter) GROUPLETTERS = G = 1 << next(_counter) UNGROUPLETTERS = CAPITALFIRST = C = UG = 1 << next(_counter) NANLAST = NL = 1 << next(_counter) COMPATIBILITYNORMALIZE = CN = 1 << next(_counter) NUMAFTER = NA = 1 << next(_counter) PRESORT = PS = 1 << next(_counter) # Following were previously options but are now defaults. DEFAULT = 0 INT = I = 0 # noqa: E741 UNSIGNED = U = 0 # The following are bitwise-OR combinations of other fields. REAL = R = FLOAT | SIGNED LOCALE = L = LOCALEALPHA | LOCALENUM # The below is private for internal use only. NS_DUMB = 1 << 31 # An integer can be used in place of the ns enum so make the # type to use for this enum a union of it and an inteter. NSType = typing.Union[ns, int] natsort-8.4.0/natsort/py.typed000066400000000000000000000000001444422413600164000ustar00rootroot00000000000000natsort-8.4.0/natsort/unicode_numbers.py000066400000000000000000000023751444422413600204550ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Pre-determine the collection of unicode decimals, digits, and numerals. """ import unicodedata from natsort.unicode_numeric_hex import numeric_hex # Convert each hex into the literal Unicode character. # Stop if a ValueError is raised in case of a narrow Unicode build. # The extra check with unicodedata is in case this Python version # does not support some characters. numeric_chars = [] for a in numeric_hex: try: character = chr(a) except ValueError: # pragma: no cover break if unicodedata.numeric(character, None) is None: continue # pragma: no cover numeric_chars.append(character) # The digit characters are a subset of the numerals. digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None] # The decimal characters are a subset of the numerals # (probably of the digits, but let's be safe). decimal_chars = [a for a in numeric_chars if unicodedata.decimal(a, None) is not None] # Create a single string with the above data. decimals = "".join(decimal_chars) digits = "".join(digit_chars) numeric = "".join(numeric_chars) digits_no_decimals = "".join([x for x in digits if x not in decimals]) numeric_no_decimals = "".join([x for x in numeric if x not in decimals]) natsort-8.4.0/natsort/unicode_numeric_hex.py000066400000000000000000000556331444422413600213150ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Contains all possible non-ASCII unicode numbers. """ # Rather than determine what unicode characters are numeric on the fly which # would incur a startup runtime penalty, the hex values are hard-coded below. numeric_hex = ( 0xB2, 0xB3, 0xB9, 0xBC, 0xBD, 0xBE, 0x660, 0x661, 0x662, 0x663, 0x664, 0x665, 0x666, 0x667, 0x668, 0x669, 0x6F0, 0x6F1, 0x6F2, 0x6F3, 0x6F4, 0x6F5, 0x6F6, 0x6F7, 0x6F8, 0x6F9, 0x7C0, 0x7C1, 0x7C2, 0x7C3, 0x7C4, 0x7C5, 0x7C6, 0x7C7, 0x7C8, 0x7C9, 0x966, 0x967, 0x968, 0x969, 0x96A, 0x96B, 0x96C, 0x96D, 0x96E, 0x96F, 0x9E6, 0x9E7, 0x9E8, 0x9E9, 0x9EA, 0x9EB, 0x9EC, 0x9ED, 0x9EE, 0x9EF, 0x9F4, 0x9F5, 0x9F6, 0x9F7, 0x9F8, 0x9F9, 0xA66, 0xA67, 0xA68, 0xA69, 0xA6A, 0xA6B, 0xA6C, 0xA6D, 0xA6E, 0xA6F, 0xAE6, 0xAE7, 0xAE8, 0xAE9, 0xAEA, 0xAEB, 0xAEC, 0xAED, 0xAEE, 0xAEF, 0xB66, 0xB67, 0xB68, 0xB69, 0xB6A, 0xB6B, 0xB6C, 0xB6D, 0xB6E, 0xB6F, 0xB72, 0xB73, 0xB74, 0xB75, 0xB76, 0xB77, 0xBE6, 0xBE7, 0xBE8, 0xBE9, 0xBEA, 0xBEB, 0xBEC, 0xBED, 0xBEE, 0xBEF, 0xBF0, 0xBF1, 0xBF2, 0xC66, 0xC67, 0xC68, 0xC69, 0xC6A, 0xC6B, 0xC6C, 0xC6D, 0xC6E, 0xC6F, 0xC78, 0xC79, 0xC7A, 0xC7B, 0xC7C, 0xC7D, 0xC7E, 0xCE6, 0xCE7, 0xCE8, 0xCE9, 0xCEA, 0xCEB, 0xCEC, 0xCED, 0xCEE, 0xCEF, 0xD58, 0xD59, 0xD5A, 0xD5B, 0xD5C, 0xD5D, 0xD5E, 0xD66, 0xD67, 0xD68, 0xD69, 0xD6A, 0xD6B, 0xD6C, 0xD6D, 0xD6E, 0xD6F, 0xD70, 0xD71, 0xD72, 0xD73, 0xD74, 0xD75, 0xD76, 0xD77, 0xD78, 0xDE6, 0xDE7, 0xDE8, 0xDE9, 0xDEA, 0xDEB, 0xDEC, 0xDED, 0xDEE, 0xDEF, 0xE50, 0xE51, 0xE52, 0xE53, 0xE54, 0xE55, 0xE56, 0xE57, 0xE58, 0xE59, 0xED0, 0xED1, 0xED2, 0xED3, 0xED4, 0xED5, 0xED6, 0xED7, 0xED8, 0xED9, 0xF20, 0xF21, 0xF22, 0xF23, 0xF24, 0xF25, 0xF26, 0xF27, 0xF28, 0xF29, 0xF2A, 0xF2B, 0xF2C, 0xF2D, 0xF2E, 0xF2F, 0xF30, 0xF31, 0xF32, 0xF33, 0x1040, 0x1041, 0x1042, 0x1043, 0x1044, 0x1045, 0x1046, 0x1047, 0x1048, 0x1049, 0x1090, 0x1091, 0x1092, 0x1093, 0x1094, 0x1095, 0x1096, 0x1097, 0x1098, 0x1099, 0x1369, 0x136A, 0x136B, 0x136C, 0x136D, 0x136E, 0x136F, 0x1370, 0x1371, 0x1372, 0x1373, 0x1374, 0x1375, 0x1376, 0x1377, 0x1378, 0x1379, 0x137A, 0x137B, 0x137C, 0x16EE, 0x16EF, 0x16F0, 0x17E0, 0x17E1, 0x17E2, 0x17E3, 0x17E4, 0x17E5, 0x17E6, 0x17E7, 0x17E8, 0x17E9, 0x17F0, 0x17F1, 0x17F2, 0x17F3, 0x17F4, 0x17F5, 0x17F6, 0x17F7, 0x17F8, 0x17F9, 0x1810, 0x1811, 0x1812, 0x1813, 0x1814, 0x1815, 0x1816, 0x1817, 0x1818, 0x1819, 0x1946, 0x1947, 0x1948, 0x1949, 0x194A, 0x194B, 0x194C, 0x194D, 0x194E, 0x194F, 0x19D0, 0x19D1, 0x19D2, 0x19D3, 0x19D4, 0x19D5, 0x19D6, 0x19D7, 0x19D8, 0x19D9, 0x19DA, 0x1A80, 0x1A81, 0x1A82, 0x1A83, 0x1A84, 0x1A85, 0x1A86, 0x1A87, 0x1A88, 0x1A89, 0x1A90, 0x1A91, 0x1A92, 0x1A93, 0x1A94, 0x1A95, 0x1A96, 0x1A97, 0x1A98, 0x1A99, 0x1B50, 0x1B51, 0x1B52, 0x1B53, 0x1B54, 0x1B55, 0x1B56, 0x1B57, 0x1B58, 0x1B59, 0x1BB0, 0x1BB1, 0x1BB2, 0x1BB3, 0x1BB4, 0x1BB5, 0x1BB6, 0x1BB7, 0x1BB8, 0x1BB9, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C47, 0x1C48, 0x1C49, 0x1C50, 0x1C51, 0x1C52, 0x1C53, 0x1C54, 0x1C55, 0x1C56, 0x1C57, 0x1C58, 0x1C59, 0x2070, 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079, 0x2080, 0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087, 0x2088, 0x2089, 0x2150, 0x2151, 0x2152, 0x2153, 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D, 0x215E, 0x215F, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A, 0x216B, 0x216C, 0x216D, 0x216E, 0x216F, 0x2170, 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176, 0x2177, 0x2178, 0x2179, 0x217A, 0x217B, 0x217C, 0x217D, 0x217E, 0x217F, 0x2180, 0x2181, 0x2182, 0x2185, 0x2186, 0x2187, 0x2188, 0x2189, 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E, 0x246F, 0x2470, 0x2471, 0x2472, 0x2473, 0x2474, 0x2475, 0x2476, 0x2477, 0x2478, 0x2479, 0x247A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 0x2487, 0x2488, 0x2489, 0x248A, 0x248B, 0x248C, 0x248D, 0x248E, 0x248F, 0x2490, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 0x2497, 0x2498, 0x2499, 0x249A, 0x249B, 0x24EA, 0x24EB, 0x24EC, 0x24ED, 0x24EE, 0x24EF, 0x24F0, 0x24F1, 0x24F2, 0x24F3, 0x24F4, 0x24F5, 0x24F6, 0x24F7, 0x24F8, 0x24F9, 0x24FA, 0x24FB, 0x24FC, 0x24FD, 0x24FE, 0x24FF, 0x2776, 0x2777, 0x2778, 0x2779, 0x277A, 0x277B, 0x277C, 0x277D, 0x277E, 0x277F, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786, 0x2787, 0x2788, 0x2789, 0x278A, 0x278B, 0x278C, 0x278D, 0x278E, 0x278F, 0x2790, 0x2791, 0x2792, 0x2793, 0x2CFD, 0x3007, 0x3021, 0x3022, 0x3023, 0x3024, 0x3025, 0x3026, 0x3027, 0x3028, 0x3029, 0x3038, 0x3039, 0x303A, 0x3192, 0x3193, 0x3194, 0x3195, 0x3220, 0x3221, 0x3222, 0x3223, 0x3224, 0x3225, 0x3226, 0x3227, 0x3228, 0x3229, 0x3248, 0x3249, 0x324A, 0x324B, 0x324C, 0x324D, 0x324E, 0x324F, 0x3251, 0x3252, 0x3253, 0x3254, 0x3255, 0x3256, 0x3257, 0x3258, 0x3259, 0x325A, 0x325B, 0x325C, 0x325D, 0x325E, 0x325F, 0x3280, 0x3281, 0x3282, 0x3283, 0x3284, 0x3285, 0x3286, 0x3287, 0x3288, 0x3289, 0x32B1, 0x32B2, 0x32B3, 0x32B4, 0x32B5, 0x32B6, 0x32B7, 0x32B8, 0x32B9, 0x32BA, 0x32BB, 0x32BC, 0x32BD, 0x32BE, 0x32BF, 0x3405, 0x3483, 0x382A, 0x3B4D, 0x4E00, 0x4E03, 0x4E07, 0x4E09, 0x4E5D, 0x4E8C, 0x4E94, 0x4E96, 0x4EBF, 0x4EC0, 0x4EDF, 0x4EE8, 0x4F0D, 0x4F70, 0x5104, 0x5146, 0x5169, 0x516B, 0x516D, 0x5341, 0x5343, 0x5344, 0x5345, 0x534C, 0x53C1, 0x53C2, 0x53C3, 0x53C4, 0x56DB, 0x58F1, 0x58F9, 0x5E7A, 0x5EFE, 0x5EFF, 0x5F0C, 0x5F0D, 0x5F0E, 0x5F10, 0x62FE, 0x634C, 0x67D2, 0x6F06, 0x7396, 0x767E, 0x8086, 0x842C, 0x8CAE, 0x8CB3, 0x8D30, 0x9621, 0x9646, 0x964C, 0x9678, 0x96F6, 0xA620, 0xA621, 0xA622, 0xA623, 0xA624, 0xA625, 0xA626, 0xA627, 0xA628, 0xA629, 0xA6E6, 0xA6E7, 0xA6E8, 0xA6E9, 0xA6EA, 0xA6EB, 0xA6EC, 0xA6ED, 0xA6EE, 0xA6EF, 0xA830, 0xA831, 0xA832, 0xA833, 0xA834, 0xA835, 0xA8D0, 0xA8D1, 0xA8D2, 0xA8D3, 0xA8D4, 0xA8D5, 0xA8D6, 0xA8D7, 0xA8D8, 0xA8D9, 0xA900, 0xA901, 0xA902, 0xA903, 0xA904, 0xA905, 0xA906, 0xA907, 0xA908, 0xA909, 0xA9D0, 0xA9D1, 0xA9D2, 0xA9D3, 0xA9D4, 0xA9D5, 0xA9D6, 0xA9D7, 0xA9D8, 0xA9D9, 0xA9F0, 0xA9F1, 0xA9F2, 0xA9F3, 0xA9F4, 0xA9F5, 0xA9F6, 0xA9F7, 0xA9F8, 0xA9F9, 0xAA50, 0xAA51, 0xAA52, 0xAA53, 0xAA54, 0xAA55, 0xAA56, 0xAA57, 0xAA58, 0xAA59, 0xABF0, 0xABF1, 0xABF2, 0xABF3, 0xABF4, 0xABF5, 0xABF6, 0xABF7, 0xABF8, 0xABF9, 0xF96B, 0xF973, 0xF978, 0xF9B2, 0xF9D1, 0xF9D3, 0xF9FD, 0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19, 0x10107, 0x10108, 0x10109, 0x1010A, 0x1010B, 0x1010C, 0x1010D, 0x1010E, 0x1010F, 0x10110, 0x10111, 0x10112, 0x10113, 0x10114, 0x10115, 0x10116, 0x10117, 0x10118, 0x10119, 0x1011A, 0x1011B, 0x1011C, 0x1011D, 0x1011E, 0x1011F, 0x10120, 0x10121, 0x10122, 0x10123, 0x10124, 0x10125, 0x10126, 0x10127, 0x10128, 0x10129, 0x1012A, 0x1012B, 0x1012C, 0x1012D, 0x1012E, 0x1012F, 0x10130, 0x10131, 0x10132, 0x10133, 0x10140, 0x10141, 0x10142, 0x10143, 0x10144, 0x10145, 0x10146, 0x10147, 0x10148, 0x10149, 0x1014A, 0x1014B, 0x1014C, 0x1014D, 0x1014E, 0x1014F, 0x10150, 0x10151, 0x10152, 0x10153, 0x10154, 0x10155, 0x10156, 0x10157, 0x10158, 0x10159, 0x1015A, 0x1015B, 0x1015C, 0x1015D, 0x1015E, 0x1015F, 0x10160, 0x10161, 0x10162, 0x10163, 0x10164, 0x10165, 0x10166, 0x10167, 0x10168, 0x10169, 0x1016A, 0x1016B, 0x1016C, 0x1016D, 0x1016E, 0x1016F, 0x10170, 0x10171, 0x10172, 0x10173, 0x10174, 0x10175, 0x10176, 0x10177, 0x10178, 0x1018A, 0x1018B, 0x102E1, 0x102E2, 0x102E3, 0x102E4, 0x102E5, 0x102E6, 0x102E7, 0x102E8, 0x102E9, 0x102EA, 0x102EB, 0x102EC, 0x102ED, 0x102EE, 0x102EF, 0x102F0, 0x102F1, 0x102F2, 0x102F3, 0x102F4, 0x102F5, 0x102F6, 0x102F7, 0x102F8, 0x102F9, 0x102FA, 0x102FB, 0x10320, 0x10321, 0x10322, 0x10323, 0x10341, 0x1034A, 0x103D1, 0x103D2, 0x103D3, 0x103D4, 0x103D5, 0x104A0, 0x104A1, 0x104A2, 0x104A3, 0x104A4, 0x104A5, 0x104A6, 0x104A7, 0x104A8, 0x104A9, 0x10858, 0x10859, 0x1085A, 0x1085B, 0x1085C, 0x1085D, 0x1085E, 0x1085F, 0x10879, 0x1087A, 0x1087B, 0x1087C, 0x1087D, 0x1087E, 0x1087F, 0x108A7, 0x108A8, 0x108A9, 0x108AA, 0x108AB, 0x108AC, 0x108AD, 0x108AE, 0x108AF, 0x108FB, 0x108FC, 0x108FD, 0x108FE, 0x108FF, 0x10916, 0x10917, 0x10918, 0x10919, 0x1091A, 0x1091B, 0x109BC, 0x109BD, 0x109C0, 0x109C1, 0x109C2, 0x109C3, 0x109C4, 0x109C5, 0x109C6, 0x109C7, 0x109C8, 0x109C9, 0x109CA, 0x109CB, 0x109CC, 0x109CD, 0x109CE, 0x109CF, 0x109D2, 0x109D3, 0x109D4, 0x109D5, 0x109D6, 0x109D7, 0x109D8, 0x109D9, 0x109DA, 0x109DB, 0x109DC, 0x109DD, 0x109DE, 0x109DF, 0x109E0, 0x109E1, 0x109E2, 0x109E3, 0x109E4, 0x109E5, 0x109E6, 0x109E7, 0x109E8, 0x109E9, 0x109EA, 0x109EB, 0x109EC, 0x109ED, 0x109EE, 0x109EF, 0x109F0, 0x109F1, 0x109F2, 0x109F3, 0x109F4, 0x109F5, 0x109F6, 0x109F7, 0x109F8, 0x109F9, 0x109FA, 0x109FB, 0x109FC, 0x109FD, 0x109FE, 0x109FF, 0x10A40, 0x10A41, 0x10A42, 0x10A43, 0x10A44, 0x10A45, 0x10A46, 0x10A47, 0x10A48, 0x10A7D, 0x10A7E, 0x10A9D, 0x10A9E, 0x10A9F, 0x10AEB, 0x10AEC, 0x10AED, 0x10AEE, 0x10AEF, 0x10B58, 0x10B59, 0x10B5A, 0x10B5B, 0x10B5C, 0x10B5D, 0x10B5E, 0x10B5F, 0x10B78, 0x10B79, 0x10B7A, 0x10B7B, 0x10B7C, 0x10B7D, 0x10B7E, 0x10B7F, 0x10BA9, 0x10BAA, 0x10BAB, 0x10BAC, 0x10BAD, 0x10BAE, 0x10BAF, 0x10CFA, 0x10CFB, 0x10CFC, 0x10CFD, 0x10CFE, 0x10CFF, 0x10D30, 0x10D31, 0x10D32, 0x10D33, 0x10D34, 0x10D35, 0x10D36, 0x10D37, 0x10D38, 0x10D39, 0x10E60, 0x10E61, 0x10E62, 0x10E63, 0x10E64, 0x10E65, 0x10E66, 0x10E67, 0x10E68, 0x10E69, 0x10E6A, 0x10E6B, 0x10E6C, 0x10E6D, 0x10E6E, 0x10E6F, 0x10E70, 0x10E71, 0x10E72, 0x10E73, 0x10E74, 0x10E75, 0x10E76, 0x10E77, 0x10E78, 0x10E79, 0x10E7A, 0x10E7B, 0x10E7C, 0x10E7D, 0x10E7E, 0x10F1D, 0x10F1E, 0x10F1F, 0x10F20, 0x10F21, 0x10F22, 0x10F23, 0x10F24, 0x10F25, 0x10F26, 0x10F51, 0x10F52, 0x10F53, 0x10F54, 0x10FC5, 0x10FC6, 0x10FC7, 0x10FC8, 0x10FC9, 0x10FCA, 0x10FCB, 0x11052, 0x11053, 0x11054, 0x11055, 0x11056, 0x11057, 0x11058, 0x11059, 0x1105A, 0x1105B, 0x1105C, 0x1105D, 0x1105E, 0x1105F, 0x11060, 0x11061, 0x11062, 0x11063, 0x11064, 0x11065, 0x11066, 0x11067, 0x11068, 0x11069, 0x1106A, 0x1106B, 0x1106C, 0x1106D, 0x1106E, 0x1106F, 0x110F0, 0x110F1, 0x110F2, 0x110F3, 0x110F4, 0x110F5, 0x110F6, 0x110F7, 0x110F8, 0x110F9, 0x11136, 0x11137, 0x11138, 0x11139, 0x1113A, 0x1113B, 0x1113C, 0x1113D, 0x1113E, 0x1113F, 0x111D0, 0x111D1, 0x111D2, 0x111D3, 0x111D4, 0x111D5, 0x111D6, 0x111D7, 0x111D8, 0x111D9, 0x111E1, 0x111E2, 0x111E3, 0x111E4, 0x111E5, 0x111E6, 0x111E7, 0x111E8, 0x111E9, 0x111EA, 0x111EB, 0x111EC, 0x111ED, 0x111EE, 0x111EF, 0x111F0, 0x111F1, 0x111F2, 0x111F3, 0x111F4, 0x112F0, 0x112F1, 0x112F2, 0x112F3, 0x112F4, 0x112F5, 0x112F6, 0x112F7, 0x112F8, 0x112F9, 0x11450, 0x11451, 0x11452, 0x11453, 0x11454, 0x11455, 0x11456, 0x11457, 0x11458, 0x11459, 0x114D0, 0x114D1, 0x114D2, 0x114D3, 0x114D4, 0x114D5, 0x114D6, 0x114D7, 0x114D8, 0x114D9, 0x11650, 0x11651, 0x11652, 0x11653, 0x11654, 0x11655, 0x11656, 0x11657, 0x11658, 0x11659, 0x116C0, 0x116C1, 0x116C2, 0x116C3, 0x116C4, 0x116C5, 0x116C6, 0x116C7, 0x116C8, 0x116C9, 0x11730, 0x11731, 0x11732, 0x11733, 0x11734, 0x11735, 0x11736, 0x11737, 0x11738, 0x11739, 0x1173A, 0x1173B, 0x118E0, 0x118E1, 0x118E2, 0x118E3, 0x118E4, 0x118E5, 0x118E6, 0x118E7, 0x118E8, 0x118E9, 0x118EA, 0x118EB, 0x118EC, 0x118ED, 0x118EE, 0x118EF, 0x118F0, 0x118F1, 0x118F2, 0x11950, 0x11951, 0x11952, 0x11953, 0x11954, 0x11955, 0x11956, 0x11957, 0x11958, 0x11959, 0x11C50, 0x11C51, 0x11C52, 0x11C53, 0x11C54, 0x11C55, 0x11C56, 0x11C57, 0x11C58, 0x11C59, 0x11C5A, 0x11C5B, 0x11C5C, 0x11C5D, 0x11C5E, 0x11C5F, 0x11C60, 0x11C61, 0x11C62, 0x11C63, 0x11C64, 0x11C65, 0x11C66, 0x11C67, 0x11C68, 0x11C69, 0x11C6A, 0x11C6B, 0x11C6C, 0x11D50, 0x11D51, 0x11D52, 0x11D53, 0x11D54, 0x11D55, 0x11D56, 0x11D57, 0x11D58, 0x11D59, 0x11DA0, 0x11DA1, 0x11DA2, 0x11DA3, 0x11DA4, 0x11DA5, 0x11DA6, 0x11DA7, 0x11DA8, 0x11DA9, 0x11FC0, 0x11FC1, 0x11FC2, 0x11FC3, 0x11FC4, 0x11FC5, 0x11FC6, 0x11FC7, 0x11FC8, 0x11FC9, 0x11FCA, 0x11FCB, 0x11FCC, 0x11FCD, 0x11FCE, 0x11FCF, 0x11FD0, 0x11FD1, 0x11FD2, 0x11FD3, 0x11FD4, 0x12400, 0x12401, 0x12402, 0x12403, 0x12404, 0x12405, 0x12406, 0x12407, 0x12408, 0x12409, 0x1240A, 0x1240B, 0x1240C, 0x1240D, 0x1240E, 0x1240F, 0x12410, 0x12411, 0x12412, 0x12413, 0x12414, 0x12415, 0x12416, 0x12417, 0x12418, 0x12419, 0x1241A, 0x1241B, 0x1241C, 0x1241D, 0x1241E, 0x1241F, 0x12420, 0x12421, 0x12422, 0x12423, 0x12424, 0x12425, 0x12426, 0x12427, 0x12428, 0x12429, 0x1242A, 0x1242B, 0x1242C, 0x1242D, 0x1242E, 0x1242F, 0x12430, 0x12431, 0x12432, 0x12433, 0x12434, 0x12435, 0x12436, 0x12437, 0x12438, 0x12439, 0x1243A, 0x1243B, 0x1243C, 0x1243D, 0x1243E, 0x1243F, 0x12440, 0x12441, 0x12442, 0x12443, 0x12444, 0x12445, 0x12446, 0x12447, 0x12448, 0x12449, 0x1244A, 0x1244B, 0x1244C, 0x1244D, 0x1244E, 0x1244F, 0x12450, 0x12451, 0x12452, 0x12453, 0x12454, 0x12455, 0x12456, 0x12457, 0x12458, 0x12459, 0x1245A, 0x1245B, 0x1245C, 0x1245D, 0x1245E, 0x1245F, 0x12460, 0x12461, 0x12462, 0x12463, 0x12464, 0x12465, 0x12466, 0x12467, 0x12468, 0x12469, 0x1246A, 0x1246B, 0x1246C, 0x1246D, 0x1246E, 0x16A60, 0x16A61, 0x16A62, 0x16A63, 0x16A64, 0x16A65, 0x16A66, 0x16A67, 0x16A68, 0x16A69, 0x16AC0, 0x16AC1, 0x16AC2, 0x16AC3, 0x16AC4, 0x16AC5, 0x16AC6, 0x16AC7, 0x16AC8, 0x16AC9, 0x16B50, 0x16B51, 0x16B52, 0x16B53, 0x16B54, 0x16B55, 0x16B56, 0x16B57, 0x16B58, 0x16B59, 0x16B5B, 0x16B5C, 0x16B5D, 0x16B5E, 0x16B5F, 0x16B60, 0x16B61, 0x16E80, 0x16E81, 0x16E82, 0x16E83, 0x16E84, 0x16E85, 0x16E86, 0x16E87, 0x16E88, 0x16E89, 0x16E8A, 0x16E8B, 0x16E8C, 0x16E8D, 0x16E8E, 0x16E8F, 0x16E90, 0x16E91, 0x16E92, 0x16E93, 0x16E94, 0x16E95, 0x16E96, 0x1D2E0, 0x1D2E1, 0x1D2E2, 0x1D2E3, 0x1D2E4, 0x1D2E5, 0x1D2E6, 0x1D2E7, 0x1D2E8, 0x1D2E9, 0x1D2EA, 0x1D2EB, 0x1D2EC, 0x1D2ED, 0x1D2EE, 0x1D2EF, 0x1D2F0, 0x1D2F1, 0x1D2F2, 0x1D2F3, 0x1D360, 0x1D361, 0x1D362, 0x1D363, 0x1D364, 0x1D365, 0x1D366, 0x1D367, 0x1D368, 0x1D369, 0x1D36A, 0x1D36B, 0x1D36C, 0x1D36D, 0x1D36E, 0x1D36F, 0x1D370, 0x1D371, 0x1D372, 0x1D373, 0x1D374, 0x1D375, 0x1D376, 0x1D377, 0x1D378, 0x1D7CE, 0x1D7CF, 0x1D7D0, 0x1D7D1, 0x1D7D2, 0x1D7D3, 0x1D7D4, 0x1D7D5, 0x1D7D6, 0x1D7D7, 0x1D7D8, 0x1D7D9, 0x1D7DA, 0x1D7DB, 0x1D7DC, 0x1D7DD, 0x1D7DE, 0x1D7DF, 0x1D7E0, 0x1D7E1, 0x1D7E2, 0x1D7E3, 0x1D7E4, 0x1D7E5, 0x1D7E6, 0x1D7E7, 0x1D7E8, 0x1D7E9, 0x1D7EA, 0x1D7EB, 0x1D7EC, 0x1D7ED, 0x1D7EE, 0x1D7EF, 0x1D7F0, 0x1D7F1, 0x1D7F2, 0x1D7F3, 0x1D7F4, 0x1D7F5, 0x1D7F6, 0x1D7F7, 0x1D7F8, 0x1D7F9, 0x1D7FA, 0x1D7FB, 0x1D7FC, 0x1D7FD, 0x1D7FE, 0x1D7FF, 0x1E140, 0x1E141, 0x1E142, 0x1E143, 0x1E144, 0x1E145, 0x1E146, 0x1E147, 0x1E148, 0x1E149, 0x1E2F0, 0x1E2F1, 0x1E2F2, 0x1E2F3, 0x1E2F4, 0x1E2F5, 0x1E2F6, 0x1E2F7, 0x1E2F8, 0x1E2F9, 0x1E8C7, 0x1E8C8, 0x1E8C9, 0x1E8CA, 0x1E8CB, 0x1E8CC, 0x1E8CD, 0x1E8CE, 0x1E8CF, 0x1E950, 0x1E951, 0x1E952, 0x1E953, 0x1E954, 0x1E955, 0x1E956, 0x1E957, 0x1E958, 0x1E959, 0x1EC71, 0x1EC72, 0x1EC73, 0x1EC74, 0x1EC75, 0x1EC76, 0x1EC77, 0x1EC78, 0x1EC79, 0x1EC7A, 0x1EC7B, 0x1EC7C, 0x1EC7D, 0x1EC7E, 0x1EC7F, 0x1EC80, 0x1EC81, 0x1EC82, 0x1EC83, 0x1EC84, 0x1EC85, 0x1EC86, 0x1EC87, 0x1EC88, 0x1EC89, 0x1EC8A, 0x1EC8B, 0x1EC8C, 0x1EC8D, 0x1EC8E, 0x1EC8F, 0x1EC90, 0x1EC91, 0x1EC92, 0x1EC93, 0x1EC94, 0x1EC95, 0x1EC96, 0x1EC97, 0x1EC98, 0x1EC99, 0x1EC9A, 0x1EC9B, 0x1EC9C, 0x1EC9D, 0x1EC9E, 0x1EC9F, 0x1ECA0, 0x1ECA1, 0x1ECA2, 0x1ECA3, 0x1ECA4, 0x1ECA5, 0x1ECA6, 0x1ECA7, 0x1ECA8, 0x1ECA9, 0x1ECAA, 0x1ECAB, 0x1ECAD, 0x1ECAE, 0x1ECAF, 0x1ECB1, 0x1ECB2, 0x1ECB3, 0x1ECB4, 0x1ED01, 0x1ED02, 0x1ED03, 0x1ED04, 0x1ED05, 0x1ED06, 0x1ED07, 0x1ED08, 0x1ED09, 0x1ED0A, 0x1ED0B, 0x1ED0C, 0x1ED0D, 0x1ED0E, 0x1ED0F, 0x1ED10, 0x1ED11, 0x1ED12, 0x1ED13, 0x1ED14, 0x1ED15, 0x1ED16, 0x1ED17, 0x1ED18, 0x1ED19, 0x1ED1A, 0x1ED1B, 0x1ED1C, 0x1ED1D, 0x1ED1E, 0x1ED1F, 0x1ED20, 0x1ED21, 0x1ED22, 0x1ED23, 0x1ED24, 0x1ED25, 0x1ED26, 0x1ED27, 0x1ED28, 0x1ED29, 0x1ED2A, 0x1ED2B, 0x1ED2C, 0x1ED2D, 0x1ED2F, 0x1ED30, 0x1ED31, 0x1ED32, 0x1ED33, 0x1ED34, 0x1ED35, 0x1ED36, 0x1ED37, 0x1ED38, 0x1ED39, 0x1ED3A, 0x1ED3B, 0x1ED3C, 0x1ED3D, 0x1F100, 0x1F101, 0x1F102, 0x1F103, 0x1F104, 0x1F105, 0x1F106, 0x1F107, 0x1F108, 0x1F109, 0x1F10A, 0x1F10B, 0x1F10C, 0x1FBF0, 0x1FBF1, 0x1FBF2, 0x1FBF3, 0x1FBF4, 0x1FBF5, 0x1FBF6, 0x1FBF7, 0x1FBF8, 0x1FBF9, 0x20001, 0x20064, 0x200E2, 0x20121, 0x2092A, 0x20983, 0x2098C, 0x2099C, 0x20AEA, 0x20AFD, 0x20B19, 0x22390, 0x22998, 0x23B1B, 0x2626D, 0x2F890, ) natsort-8.4.0/natsort/utils.py000066400000000000000000000676001444422413600164360ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Utilities and definitions for natsort, mostly all used to define the natsort_key function. SOME CONVENTIONS USED IN THIS FILE. 1 - Factory Functions Most of the logic of natsort revolves around factory functions that create branchless transformation functions. For example, rather than making a string transformation function that has an if statement to determine whether or not to perform .lowercase() at runtime for each element to transform, there is a string transformation factory function that will return a function that either calls .lowercase() or does nothing. In this way, all the branches and decisions are taken care of once, up front. In addition to a slight speed improvement, this provides a more extensible infrastructure. Each of these factory functions will end with the suffix "_factory" to indicate that they themselves return a function. 2 - Keyword Parameters For Local Scope Many of the closures that are created by the factory functions have signatures similar to the following >>> def factory(parameter): ... val = 'yes' if parameter else 'no' ... def closure(x, _val=val): ... return '{} {}'.format(_val, x) ... return closure ... The variable value is passed as the default to a keyword argument. This is a micro-optimization that ensures "val" is a local variable instead of global variable and thus has a slightly improved performance at runtime. """ import re from functools import partial, reduce from itertools import chain as ichain from operator import methodcaller from pathlib import PurePath from typing import ( Any, Callable, Dict, Iterable, Iterator, List, Match, Optional, Pattern, TYPE_CHECKING, Tuple, Union, cast, overload, ) from unicodedata import normalize from natsort.compat.fastnumbers import try_float, try_int from natsort.compat.locale import ( StrOrBytes, get_decimal_point, get_strxfrm, get_thousands_sep, ) from natsort.ns_enum import NSType, NS_DUMB, ns from natsort.unicode_numbers import digits_no_decimals, numeric_no_decimals if TYPE_CHECKING: from typing_extensions import Protocol else: Protocol = object # # Pre-define a slew of aggregate types which makes the type hinting below easier # class SupportsDunderLT(Protocol): def __lt__(self, __other: Any) -> bool: ... class SupportsDunderGT(Protocol): def __gt__(self, __other: Any) -> bool: ... Sortable = Union[SupportsDunderLT, SupportsDunderGT] StrToStr = Callable[[str], str] AnyCall = Callable[[Any], Any] # For the bytes transform factory BytesTuple = Tuple[bytes] NestedBytesTuple = Tuple[Tuple[bytes]] BytesTransform = Union[BytesTuple, NestedBytesTuple] BytesTransformer = Callable[[bytes], BytesTransform] # For the number transform factory BasicTuple = Tuple[Any, ...] NestedAnyTuple = Tuple[BasicTuple, ...] AnyTuple = Union[BasicTuple, NestedAnyTuple] NumTransform = AnyTuple NumTransformer = Callable[[Any], NumTransform] # For the string component transform factory StrBytesNum = Union[str, bytes, float, int] StrTransformer = Callable[[Iterable[str]], Iterator[StrBytesNum]] # For the final data transform factory FinalTransform = AnyTuple FinalTransformer = Callable[[Iterable[Any], str], FinalTransform] PathArg = Union[str, PurePath] MatchFn = Callable[[str], Optional[Match]] # For the string parsing factory StrSplitter = Callable[[str], Iterable[str]] StrParser = Callable[[PathArg], FinalTransform] # For the path parsing factory PathSplitter = Callable[[PathArg], Tuple[FinalTransform, ...]] # For the natsort key NatsortInType = Optional[Sortable] NatsortOutType = Tuple[Sortable, ...] KeyType = Callable[[Any], NatsortInType] MaybeKeyType = Optional[KeyType] class NumericalRegularExpressions: """ Container of regular expressions that match numbers. The numbers also account for unicode non-decimal characters. Not intended to be made an instance - use class methods only. """ # All unicode numeric characters (minus the decimal characters). numeric: str = numeric_no_decimals # All unicode digit characters (minus the decimal characters). digits: str = digits_no_decimals # Regular expression to match exponential component of a float. exp: str = r"(?:[eE][-+]?\d+)?" # Regular expression to match a floating point number. float_num: str = r"(?:\d+\.?\d*|\.\d+)" @classmethod def _construct_regex(cls, fmt: str) -> Pattern[str]: """Given a format string, construct the regex with class attributes.""" return re.compile(fmt.format(**vars(cls)), flags=re.U) @classmethod def int_sign(cls) -> Pattern[str]: """Regular expression to match a signed int.""" return cls._construct_regex(r"([-+]?\d+|[{digits}])") @classmethod def int_nosign(cls) -> Pattern[str]: """Regular expression to match an unsigned int.""" return cls._construct_regex(r"(\d+|[{digits}])") @classmethod def float_sign_exp(cls) -> Pattern[str]: """Regular expression to match a signed float with exponent.""" return cls._construct_regex(r"([-+]?{float_num}{exp}|[{numeric}])") @classmethod def float_nosign_exp(cls) -> Pattern[str]: """Regular expression to match an unsigned float with exponent.""" return cls._construct_regex(r"({float_num}{exp}|[{numeric}])") @classmethod def float_sign_noexp(cls) -> Pattern[str]: """Regular expression to match a signed float without exponent.""" return cls._construct_regex(r"([-+]?{float_num}|[{numeric}])") @classmethod def float_nosign_noexp(cls) -> Pattern[str]: """Regular expression to match an unsigned float without exponent.""" return cls._construct_regex(r"({float_num}|[{numeric}])") def regex_chooser(alg: NSType) -> Pattern[str]: """ Select an appropriate regex for the type of number of interest. Parameters ---------- alg : ns enum Used to indicate the regular expression to select. Returns ------- regex : compiled regex object Regular expression object that matches the desired number type. """ if alg & ns.FLOAT: alg &= ns.FLOAT | ns.SIGNED | ns.NOEXP else: alg &= ns.INT | ns.SIGNED return { ns.INT: NumericalRegularExpressions.int_nosign(), ns.FLOAT: NumericalRegularExpressions.float_nosign_exp(), ns.INT | ns.SIGNED: NumericalRegularExpressions.int_sign(), ns.FLOAT | ns.SIGNED: NumericalRegularExpressions.float_sign_exp(), ns.FLOAT | ns.NOEXP: NumericalRegularExpressions.float_nosign_noexp(), ns.FLOAT | ns.SIGNED | ns.NOEXP: NumericalRegularExpressions.float_sign_noexp(), }[alg] def _no_op(x: Any) -> Any: """A function that does nothing and returns the input as-is.""" return x def _normalize_input_factory(alg: NSType) -> StrToStr: """ Create a function that will normalize unicode input data. Parameters ---------- alg : ns enum Used to indicate how to normalize unicode. Returns ------- func : callable A function that accepts string (unicode) input and returns the the input normalized with the desired normalization scheme. """ normalization_form = "NFKD" if alg & ns.COMPATIBILITYNORMALIZE else "NFD" return partial(normalize, normalization_form) def _compose_input_factory(alg: NSType) -> StrToStr: """ Create a function that will compose unicode input data. Parameters ---------- alg : ns enum Used to indicate how to compose unicode. Returns ------- func : callable A function that accepts string (unicode) input and returns the the input normalized with the desired composition scheme. """ normalization_form = "NFKC" if alg & ns.COMPATIBILITYNORMALIZE else "NFC" return partial(normalize, normalization_form) @overload def natsort_key( val: NatsortInType, key: None, string_func: Union[StrParser, PathSplitter], bytes_func: BytesTransformer, num_func: NumTransformer, ) -> NatsortOutType: ... @overload def natsort_key( val: Any, key: KeyType, string_func: Union[StrParser, PathSplitter], bytes_func: BytesTransformer, num_func: NumTransformer, ) -> NatsortOutType: ... def natsort_key( val: Union[NatsortInType, Any], key: MaybeKeyType, string_func: Union[StrParser, PathSplitter], bytes_func: BytesTransformer, num_func: NumTransformer, ) -> NatsortOutType: """ Key to sort strings and numbers naturally. It works by splitting the string into components of strings and numbers, and then converting the numbers into actual ints or floats. Parameters ---------- val : str | bytes | int | float | iterable key : callable | None A key to apply to the *val* before any other operations are performed. string_func : callable If *val* (or the output of *key* if given) is of type *str*, this function will be applied to it. The function must return a tuple. bytes_func : callable If *val* (or the output of *key* if given) is of type *bytes*, this function will be applied to it. The function must return a tuple. num_func : callable If *val* (or the output of *key* if given) is not of type *bytes*, *str*, nor is iterable, this function will be applied to it. The function must return a tuple. Returns ------- out : tuple The string split into its string and numeric components. It *always* starts with a string, and then alternates between numbers and strings (unless it was applied recursively, in which case it will return tuples of tuples, but the lowest-level tuples will then *always* start with a string etc.). See Also -------- parse_string_factory parse_bytes_factory parse_number_or_none_factory """ # Apply key if needed if key is not None: val = key(val) if isinstance(val, (str, PurePath)): return string_func(val) elif isinstance(val, bytes): return bytes_func(val) elif isinstance(val, Iterable): # Must be parsed recursively, but do not apply the key recursively. return tuple( natsort_key(x, None, string_func, bytes_func, num_func) for x in val ) else: # Anything else goes here return num_func(val) def parse_bytes_factory(alg: NSType) -> BytesTransformer: """ Create a function that will format a *bytes* object into a tuple. Parameters ---------- alg : ns enum Indicate how to format the *bytes*. Returns ------- func : callable A function that accepts *bytes* input and returns a tuple with the formatted *bytes*. Intended to be used as the *bytes_func* argument to *natsort_key*. See Also -------- natsort_key """ # We don't worry about ns.UNGROUPLETTERS | ns.LOCALEALPHA because # bytes cannot be compared to strings. if alg & ns.PATH and alg & ns.IGNORECASE: return lambda x: ((x.lower(),),) elif alg & ns.PATH: return lambda x: ((x,),) elif alg & ns.IGNORECASE: return lambda x: (x.lower(),) else: return lambda x: (x,) def parse_number_or_none_factory( alg: NSType, sep: StrOrBytes, pre_sep: str ) -> NumTransformer: """ Create a function that will format a number (or None) into a tuple. Parameters ---------- alg : ns enum Indicate how to format the *bytes*. sep : str The string character to be inserted before the number in the returned tuple. pre_sep : str In the event that *alg* contains ``UNGROUPLETTERS``, this string will be placed in a single-element tuple at the front of the returned nested tuple. Returns ------- func : callable A function that accepts numeric input (e.g. *int* or *float*) and returns a tuple containing the number with the leading string *sep*. Intended to be used as the *num_func* argument to *natsort_key*. See Also -------- natsort_key """ nan_replace = float("+inf") if alg & ns.NANLAST else float("-inf") def func( val: Any, _nan_replace: float = nan_replace, _sep: StrOrBytes = sep, reverse: bool = nan_replace == float("+inf"), ) -> BasicTuple: """Given a number, place it in a tuple with a leading null string.""" # Add a trailing string numbers equaling _nan_replace. This will make # the ordering between None NaN, and the NaN replacement value... # None comes first, then NaN, then the replacement value. if val != val: return _sep, _nan_replace, "3" if reverse else "1" elif val is None: return _sep, _nan_replace, "2" elif val == _nan_replace: return _sep, _nan_replace, "1" if reverse else "3" else: return _sep, val # Return the function, possibly wrapping in tuple if PATH is selected. if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: return lambda x: (((pre_sep,), func(x)),) elif alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: return lambda x: ((pre_sep,), func(x)) elif alg & ns.PATH: return lambda x: (func(x),) else: return func def parse_string_factory( alg: NSType, sep: StrOrBytes, splitter: StrSplitter, input_transform: StrToStr, component_transform: StrTransformer, final_transform: FinalTransformer, ) -> StrParser: """ Create a function that will split and format a *str* into a tuple. Parameters ---------- alg : ns enum Indicate how to format and split the *str*. sep : str The string character to be inserted between adjacent numeric objects in the returned tuple. splitter : callable A function the will accept a string and returns an iterable of strings where the numbers are separated from the non-numbers. input_transform : callable A function to apply to the string input *before* applying the *splitter* function. Must return a string. component_transform : callable A function that is operated elementwise on the output of *splitter*. It must accept a single string and return either a string or a number. final_transform : callable A function to operate on the return value as a whole. It must accept a tuple and a string argument - the tuple should be the result of applying the above functions, and the string is the original input value. It must return a tuple. Returns ------- func : callable A function that accepts string input and returns a tuple containing the string split into numeric and non-numeric components, where the numeric components are converted into numeric objects. The first element is *always* a string, and then alternates number then string. Intended to be used as the *string_func* argument to *natsort_key*. See Also -------- natsort_key input_string_transform_factory string_component_transform_factory final_data_transform_factory """ # Sometimes we store the "original" input before transformation, # sometimes after. orig_after_xfrm = not (alg & NS_DUMB and alg & ns.LOCALEALPHA) original_func = input_transform if orig_after_xfrm else _no_op normalize_input = _normalize_input_factory(alg) compose_input = _compose_input_factory(alg) if alg & ns.LOCALEALPHA else _no_op def func(x: PathArg) -> FinalTransform: if isinstance(x, PurePath): # While paths are technically not strings, it is natural for them # to be treated the same. x = str(x) # Apply string input transformation function and return to x. # Original function is usually a no-op, but some algorithms require it # to also be the transformation function. a = normalize_input(x) b, original = input_transform(a), original_func(a) c = compose_input(b) # Decompose unicode if using LOCALE d = splitter(c) # Split string into components. e = filter(None, d) # Remove empty strings. f = component_transform(e) # Apply transform on components. g = sep_inserter(f, sep) # Insert '' between numbers. return final_transform(g, original) # Apply the final transform. return func def parse_path_factory(str_split: StrParser) -> PathSplitter: """ Create a function that will properly split and format a path. Parameters ---------- str_split : callable The output of the *parse_string_factory* function. Returns ------- func : callable A function that accepts a string or path-like object and splits it into its path components, then passes each component to *str_split* and returns the result as a nested tuple. Can be used as the *string_func* argument to *natsort_key*. See Also -------- natsort_key parse_string_factory """ return lambda x: tuple(map(str_split, path_splitter(x))) def sep_inserter(iterator: Iterator[Any], sep: StrOrBytes) -> Iterator[Any]: """ Insert '' between numbers in an iterator. Parameters ---------- iterator sep : str The string character to be inserted between adjacent numeric objects. Yields ------ The values of *iterator* in order, with *sep* inserted where adjacent elements are numeric. If the first element in the input is numeric then *sep* will be the first value yielded. """ try: # Get the first element. A StopIteration indicates an empty iterator. # Since we are controlling the types of the input, 'type' is used # instead of 'isinstance' for the small speed advantage it offers. types = (int, float) first = next(iterator) if type(first) in types: yield sep yield first # Now, check if pair of elements are both numbers. If so, add ''. second = next(iterator) if type(first) in types and type(second) in types: yield sep yield second # Now repeat in a loop. for x in iterator: first, second = second, x if type(first) in types and type(second) in types: yield sep yield second except StopIteration: # Catch StopIteration per deprecation in PEP 479: # "Change StopIteration handling inside generators" return def input_string_transform_factory(alg: NSType) -> StrToStr: """ Create a function to transform a string. Parameters ---------- alg : ns enum Indicate how to format the *str*. Returns ------- func : callable A function to be used as the *input_transform* argument to *parse_string_factory*. See Also -------- parse_string_factory """ # Shortcuts. lowfirst = alg & ns.LOWERCASEFIRST dumb = alg & NS_DUMB # Build the chain of functions to execute in order. function_chain: List[StrToStr] = [] if (dumb and not lowfirst) or (lowfirst and not dumb): function_chain.append(methodcaller("swapcase")) if alg & ns.IGNORECASE: function_chain.append(methodcaller("casefold")) if alg & ns.LOCALENUM: # Create a regular expression that will remove thousands separators. strip_thousands = r""" (?<=[0-9]{{1}}) # At least 1 number (? StrTransformer: """ Create a function to either transform a string or convert to a number. Parameters ---------- alg : ns enum Indicate how to format the *str*. Returns ------- func : callable A function to be used as the *component_transform* argument to *parse_string_factory*. See Also -------- parse_string_factory """ # Shortcuts. use_locale = alg & ns.LOCALEALPHA dumb = alg & NS_DUMB group_letters = (alg & ns.GROUPLETTERS) or (use_locale and dumb) nan_val = float("+inf") if alg & ns.NANLAST else float("-inf") # Build the chain of functions to execute in order. func_chain: List[Callable[[str], StrOrBytes]] = [] if group_letters: func_chain.append(groupletters) if use_locale: func_chain.append(get_strxfrm()) # Return the correct chained functions. kwargs: Dict[str, Union[float, Callable[[str], StrOrBytes], bool]] kwargs = {"on_fail": chain_functions(func_chain)} if func_chain else {} kwargs["map"] = True if alg & ns.FLOAT: kwargs["nan"] = nan_val return cast(StrTransformer, partial(try_float, **kwargs)) else: return cast(StrTransformer, partial(try_int, **kwargs)) def final_data_transform_factory( alg: NSType, sep: StrOrBytes, pre_sep: str ) -> FinalTransformer: """ Create a function to transform a tuple. Parameters ---------- alg : ns enum Indicate how to format the *str*. sep : str Separator that was passed to *parse_string_factory*. pre_sep : str String separator to insert at the at the front of the return tuple in the case that the first element is *sep*. Returns ------- func : callable A function to be used as the *final_transform* argument to *parse_string_factory*. See Also -------- parse_string_factory """ if alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: swap = alg & NS_DUMB and alg & ns.LOWERCASEFIRST transform = cast(StrToStr, methodcaller("swapcase") if swap else _no_op) def func( split_val: Iterable[NatsortInType], val: str, _transform: StrToStr = transform, _sep: StrOrBytes = sep, _pre_sep: str = pre_sep, ) -> FinalTransform: """ Return a tuple with the first character of the first element of the return value as the first element, and the return value as the second element. This will be used to perform gross sorting by the first letter. """ split_val = tuple(split_val) if not split_val: return (), () elif split_val[0] == _sep: return (_pre_sep,), split_val else: return (_transform(val[0]),), split_val else: def func( split_val: Iterable[NatsortInType], val: str, _transform: StrToStr = _no_op, _sep: StrOrBytes = sep, _pre_sep: str = pre_sep, ) -> FinalTransform: return tuple(split_val) return func lower_function: StrToStr = cast(StrToStr, methodcaller("casefold")) # noinspection PyIncorrectDocstring def groupletters(x: str, _low: StrToStr = lower_function) -> str: """ Double all characters, making doubled letters lowercase. Parameters ---------- x : str Returns ------- str Examples -------- >>> groupletters("Apple") 'aAppppllee' """ return "".join(ichain.from_iterable((_low(y), y) for y in x)) def chain_functions(functions: Iterable[AnyCall]) -> AnyCall: """ Chain a list of single-argument functions together and return. The functions are applied in list order, and the output of the previous functions is passed to the next function. Parameters ---------- functions : list A list of single-argument functions to chain together. Returns ------- func : callable A single argument function. Examples -------- Chain several functions together! >>> funcs = [lambda x: x * 4, len, lambda x: x + 5] >>> func = chain_functions(funcs) >>> func('hey') 17 """ functions = list(functions) if not functions: return _no_op elif len(functions) == 1: return functions[0] else: # See https://stackoverflow.com/a/39123400/1399279 return partial(reduce, lambda res, f: f(res), functions) @overload def do_decoding(s: bytes, encoding: str) -> str: ... @overload def do_decoding(s: Any, encoding: str) -> Any: ... def do_decoding(s: Any, encoding: str) -> Any: """ Helper to decode a *bytes* object, or return the object as-is. Parameters ---------- s : bytes | object encoding : str The encoding to use to decode *s*. Returns ------- decoded *str* if *s* was *bytes* and the decoding was successful. *s* if *s* was not *bytes*. """ if isinstance(s, bytes): return s.decode(encoding) else: return s # noinspection PyIncorrectDocstring def path_splitter( s: PathArg, treat_base: bool = True, _d_match: MatchFn = re.compile(r"\.\d").match ) -> Iterator[str]: """ Split a string into its path components. Assumes a string is a path or is path-like. Parameters ---------- s : str | pathlib.Path treat_base: bool, optional If True, treat the base of component of the file path as special and split off extensions. If False, do not do this. The default is True. Returns ------- split : tuple The path split by directory components and extensions. Examples -------- >>> tuple(path_splitter("this/thing.ext")) ('this', 'thing', '.ext') """ if not isinstance(s, PurePath): s = PurePath(s) # Split the path into parts. try: *path_parts, base = s.parts except ValueError: path_parts = [] base = str(s) suffixes = [] if treat_base: # Now, split off the file extensions until # - we reach a decimal number at the beginning of the suffix # - more than two suffixes have been seen # - a suffix is more than five characters (including leading ".") # - there are no more extensions for i, suffix in enumerate(reversed(PurePath(base).suffixes)): if _d_match(suffix) or i > 1 or len(suffix) > 5: break suffixes.append(suffix) suffixes.reverse() # Remove the suffixes from the base component base = base.replace("".join(suffixes), "") base_component = [base] if base else [] # Join all path comonents in an iterator return filter(None, ichain(path_parts, base_component, suffixes)) natsort-8.4.0/setup.cfg000066400000000000000000000030461444422413600150450ustar00rootroot00000000000000[bumpversion] current_version = 8.4.0 commit = True tag = True tag_name = {new_version} [metadata] author = Seth M. Morton author_email = drtuba78@gmail.com url = https://github.com/SethMMorton/natsort description = Simple yet flexible natural sorting in Python. long_description = file: README.rst long_description_content_type = text/x-rst license = MIT license_files = LICENSE classifiers = Development Status :: 5 - Production/Stable Intended Audience :: Developers Intended Audience :: Science/Research Intended Audience :: System Administrators Intended Audience :: Information Technology Intended Audience :: Financial and Insurance Industry Operating System :: OS Independent License :: OSI Approved :: MIT License Natural Language :: English Programming Language :: Python Programming Language :: Python :: 3 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 Topic :: Scientific/Engineering :: Information Analysis Topic :: Utilities Topic :: Text Processing [bumpversion:file:setup.py] [bumpversion:file:natsort/__init__.py] [bumpversion:file:docs/conf.py] [bumpversion:file:CHANGELOG.md] search = Unreleased replace = Unreleased --- [{new_version}] - {now:%%Y-%%m-%%d} [flake8] max-line-length = 89 import-order-style = pycharm doctests = True max-complexity = 10 exclude = natsort.egg-info, .tox, .cache, .git, __pycache__, build, dist, docs, .venv [mypy] mypy_path = mypy_stubs natsort-8.4.0/setup.py000066400000000000000000000006101444422413600147300ustar00rootroot00000000000000#! /usr/bin/env python from setuptools import find_packages, setup setup( name="natsort", version="8.4.0", packages=find_packages(), entry_points={"console_scripts": ["natsort = natsort.__main__:main"]}, python_requires=">=3.7", extras_require={"fast": ["fastnumbers >= 2.0.0"], "icu": ["PyICU >= 1.0.0"]}, package_data={"": ["py.typed"]}, zip_safe=False, ) natsort-8.4.0/tests/000077500000000000000000000000001444422413600143635ustar00rootroot00000000000000natsort-8.4.0/tests/conftest.py000066400000000000000000000032501444422413600165620ustar00rootroot00000000000000""" Fixtures for pytest. """ import locale from typing import Iterator import hypothesis import pytest from natsort.compat.locale import dumb_sort # This disables the "too slow" hypothesis heath check globally. # For some reason it thinks that the text/binary generation is too # slow then causes the tests to fail. hypothesis.settings.register_profile( "slow-tests", suppress_health_check=[hypothesis.HealthCheck.too_slow] ) def load_locale(x: str) -> None: """Convenience to load a locale.""" locale.setlocale(locale.LC_ALL, str("{}.UTF-8".format(x))) @pytest.fixture() def with_locale_en_us() -> Iterator[None]: """Convenience to load the en_US locale - reset when complete.""" orig = locale.getlocale() load_locale("en_US") yield locale.setlocale(locale.LC_ALL, orig) @pytest.fixture() def with_locale_de_de() -> Iterator[None]: """ Convenience to load the de_DE locale - reset when complete - skip if missing. """ orig = locale.getlocale() try: load_locale("de_DE") except locale.Error: pytest.skip("requires de_DE locale to be installed") else: yield finally: locale.setlocale(locale.LC_ALL, orig) @pytest.fixture() def with_locale_cs_cz() -> Iterator[None]: """ Convenience to load the cs_CZ locale - reset when complete - skip if missing. """ orig = locale.getlocale() try: load_locale("cs_CZ") if dumb_sort(): pytest.skip("requires a functioning locale library to run") except locale.Error: pytest.skip("requires cs_CZ locale to be installed") else: yield finally: locale.setlocale(locale.LC_ALL, orig) natsort-8.4.0/tests/profile_natsorted.py000066400000000000000000000037551444422413600204720ustar00rootroot00000000000000# -*- coding: utf-8 -*- """\ This file contains functions to profile natsorted with different inputs and different settings. """ import cProfile import locale import sys from typing import List, Union try: from natsort import ns, natsort_keygen except ImportError: sys.path.insert(0, ".") from natsort import ns, natsort_keygen from natsort.natsort import NatsortKeyType locale.setlocale(locale.LC_ALL, "en_US.UTF-8") # Samples to parse number = 14695498 int_string = "43493" float_string = "-434.93e7" plain_string = "hello world" fancy_string = "7abba9342fdab" a_path = "/p/Folder (1)/file (1).tar.gz" some_bytes = b"these are bytes" a_list = ["hello", "goodbye", "74"] basic_key = natsort_keygen() real_key = natsort_keygen(alg=ns.REAL) path_key = natsort_keygen(alg=ns.PATH) locale_key = natsort_keygen(alg=ns.LOCALE) def prof_time_to_generate() -> None: print("*** Generate Plain Key ***") for _ in range(100000): natsort_keygen() cProfile.run("prof_time_to_generate()", sort="time") def prof_parsing( a: Union[str, int, bytes, List[str]], msg: str, key: NatsortKeyType = basic_key ) -> None: print(msg) for _ in range(100000): key(a) cProfile.run( 'prof_parsing(int_string, "*** Basic Call, Int as String ***")', sort="time" ) cProfile.run( 'prof_parsing(float_string, "*** Basic Call, Float as String ***")', sort="time" ) cProfile.run('prof_parsing(float_string, "*** Real Call ***", real_key)', sort="time") cProfile.run('prof_parsing(number, "*** Basic Call, Number ***")', sort="time") cProfile.run( 'prof_parsing(fancy_string, "*** Basic Call, Mixed String ***")', sort="time" ) cProfile.run('prof_parsing(some_bytes, "*** Basic Call, Byte String ***")', sort="time") cProfile.run('prof_parsing(a_path, "*** Path Call ***", path_key)', sort="time") cProfile.run('prof_parsing(a_list, "*** Basic Call, Recursive ***")', sort="time") cProfile.run( 'prof_parsing("434,930,000 dollars", "*** Locale Call ***", locale_key)', sort="time", ) natsort-8.4.0/tests/test_fake_fastnumbers.py000066400000000000000000000071141444422413600213160ustar00rootroot00000000000000# -*- coding: utf-8 -*- """\ Test the fake fastnumbers module. """ import unicodedata from math import isinf from typing import Union, cast from hypothesis import given from hypothesis.strategies import floats, integers, text from natsort.compat.fake_fastnumbers import fast_float, fast_int def is_float(x: str) -> bool: try: float(x) except ValueError: try: unicodedata.numeric(x) except (ValueError, TypeError): return False else: return True else: return True def not_a_float(x: str) -> bool: return not is_float(x) def is_int(x: Union[str, float]) -> bool: try: return cast(float, x).is_integer() except AttributeError: try: int(x) except ValueError: try: unicodedata.digit(cast(str, x)) except (ValueError, TypeError): return False else: return True else: return True def not_an_int(x: Union[str, float]) -> bool: return not is_int(x) # Each test has an "example" version for demonstrative purposes, # and a test that uses the hypothesis module. def test_fast_float_returns_nan_alternate_if_nan_option_is_given() -> None: assert fast_float("nan", nan=7) == 7 def test_fast_float_converts_float_string_to_float_example() -> None: assert fast_float("45.8") == 45.8 assert fast_float("-45") == -45.0 assert fast_float("45.8e-2", key=lambda x: x.upper()) == 45.8e-2 assert isinf(cast(float, fast_float("nan"))) assert isinf(cast(float, fast_float("+nan"))) assert isinf(cast(float, fast_float("-NaN"))) assert fast_float("۱۲.۱۲") == 12.12 assert fast_float("-۱۲.۱۲") == -12.12 @given(floats(allow_nan=False)) def test_fast_float_converts_float_string_to_float(x: float) -> None: assert fast_float(repr(x)) == x def test_fast_float_leaves_string_as_is_example() -> None: assert fast_float("invalid") == "invalid" @given(text().filter(not_a_float).filter(bool)) def test_fast_float_leaves_string_as_is(x: str) -> None: assert fast_float(x) == x def test_fast_float_with_key_applies_to_string_example() -> None: assert fast_float("invalid", key=lambda x: x.upper()) == "INVALID" @given(text().filter(not_a_float).filter(bool)) def test_fast_float_with_key_applies_to_string(x: str) -> None: assert fast_float(x, key=lambda x: x.upper()) == x.upper() def test_fast_int_leaves_float_string_as_is_example() -> None: assert fast_int("45.8") == "45.8" assert fast_int("nan") == "nan" assert fast_int("inf") == "inf" @given(floats().filter(not_an_int)) def test_fast_int_leaves_float_string_as_is(x: float) -> None: assert fast_int(repr(x)) == repr(x) def test_fast_int_converts_int_string_to_int_example() -> None: assert fast_int("-45") == -45 assert fast_int("+45") == 45 assert fast_int("۱۲") == 12 assert fast_int("-۱۲") == -12 @given(integers()) def test_fast_int_converts_int_string_to_int(x: int) -> None: assert fast_int(repr(x)) == x def test_fast_int_leaves_string_as_is_example() -> None: assert fast_int("invalid") == "invalid" @given(text().filter(not_an_int).filter(bool)) def test_fast_int_leaves_string_as_is(x: str) -> None: assert fast_int(x) == x def test_fast_int_with_key_applies_to_string_example() -> None: assert fast_int("invalid", key=lambda x: x.upper()) == "INVALID" @given(text().filter(not_an_int).filter(bool)) def test_fast_int_with_key_applies_to_string(x: str) -> None: assert fast_int(x, key=lambda x: x.upper()) == x.upper() natsort-8.4.0/tests/test_final_data_transform_factory.py000066400000000000000000000040531444422413600237020ustar00rootroot00000000000000# -*- coding: utf-8 -*- """These test the utils.py functions.""" from typing import Callable, Union import pytest from hypothesis import example, given from hypothesis.strategies import floats, integers, text from natsort.ns_enum import NSType, NS_DUMB, ns from natsort.utils import final_data_transform_factory @pytest.mark.parametrize("alg", [ns.DEFAULT, ns.UNGROUPLETTERS, ns.LOCALE]) @given(x=text(), y=floats(allow_nan=False, allow_infinity=False) | integers()) @pytest.mark.usefixtures("with_locale_en_us") def test_final_data_transform_factory_default( x: str, y: Union[int, float], alg: NSType ) -> None: final_data_transform_func = final_data_transform_factory(alg, "", "::") value = (x, y) original_value = "".join(map(str, value)) result = final_data_transform_func(value, original_value) assert result == value @pytest.mark.parametrize( "alg, func", [ (ns.UNGROUPLETTERS | ns.LOCALE, lambda x: x), (ns.LOCALE | ns.UNGROUPLETTERS | NS_DUMB, lambda x: x), (ns.LOCALE | ns.UNGROUPLETTERS | ns.LOWERCASEFIRST, lambda x: x), ( ns.LOCALE | ns.UNGROUPLETTERS | NS_DUMB | ns.LOWERCASEFIRST, lambda x: x.swapcase(), ), ], ) @given(x=text(), y=floats(allow_nan=False, allow_infinity=False) | integers()) @example(x="İ", y=0) @pytest.mark.usefixtures("with_locale_en_us") def test_final_data_transform_factory_ungroup_and_locale( x: str, y: Union[int, float], alg: NSType, func: Callable[[str], str] ) -> None: final_data_transform_func = final_data_transform_factory(alg, "", "::") value = (x, y) original_value = "".join(map(str, value)) result = final_data_transform_func(value, original_value) if x: expected = ((func(original_value[:1]),), value) else: expected = (("::",), value) assert result == expected def test_final_data_transform_factory_ungroup_and_locale_empty_tuple() -> None: final_data_transform_func = final_data_transform_factory(ns.UG | ns.L, "", "::") assert final_data_transform_func((), "") == ((), ()) natsort-8.4.0/tests/test_input_string_transform_factory.py000066400000000000000000000102161444422413600243430ustar00rootroot00000000000000# -*- coding: utf-8 -*- """These test the utils.py functions.""" from typing import Callable import pytest from hypothesis import example, given from hypothesis.strategies import integers, text from natsort.ns_enum import NSType, NS_DUMB, ns from natsort.utils import input_string_transform_factory def thousands_separated_int(n: str) -> str: """Insert thousands separators in an int.""" new_int = "" for i, y in enumerate(reversed(n), 1): new_int = y + new_int # For every third digit, insert a thousands separator. if i % 3 == 0 and i != len(n): new_int = "," + new_int return new_int @given(text()) def test_input_string_transform_factory_is_no_op_for_no_alg_options(x: str) -> None: input_string_transform_func = input_string_transform_factory(ns.DEFAULT) assert input_string_transform_func(x) is x @pytest.mark.parametrize( "alg, example_func", [ (ns.IGNORECASE, lambda x: x.casefold()), (NS_DUMB, lambda x: x.swapcase()), (ns.LOWERCASEFIRST, lambda x: x.swapcase()), (NS_DUMB | ns.LOWERCASEFIRST, lambda x: x), # No-op (ns.IGNORECASE | ns.LOWERCASEFIRST, lambda x: x.swapcase().casefold()), ], ) @given(x=text()) def test_input_string_transform_factory( x: str, alg: NSType, example_func: Callable[[str], str] ) -> None: input_string_transform_func = input_string_transform_factory(alg) assert input_string_transform_func(x) == example_func(x) @example(12543642642534980) # 12,543,642,642,534,980 => 12543642642534980 @given(x=integers(min_value=1000)) @pytest.mark.usefixtures("with_locale_en_us") def test_input_string_transform_factory_cleans_thousands(x: int) -> None: int_str = str(x).rstrip("lL") thousands_int_str = thousands_separated_int(int_str) assert thousands_int_str.replace(",", "") != thousands_int_str input_string_transform_func = input_string_transform_factory(ns.LOCALE) assert input_string_transform_func(thousands_int_str) == int_str # Using LOCALEALPHA does not affect numbers. input_string_transform_func_no_op = input_string_transform_factory(ns.LOCALEALPHA) assert input_string_transform_func_no_op(thousands_int_str) == thousands_int_str # These might be too much to test with hypothesis. @pytest.mark.parametrize( "x, expected", [ ("12,543,642642.5345,34980", "12543,642642.5345,34980"), ("12,59443,642,642.53,4534980", "12,59443,642642.53,4534980"), # No change ("12543,642,642.5,34534980", "12543,642642.5,34534980"), ], ) @pytest.mark.usefixtures("with_locale_en_us") def test_input_string_transform_factory_handles_us_locale( x: str, expected: str ) -> None: input_string_transform_func = input_string_transform_factory(ns.LOCALE) assert input_string_transform_func(x) == expected @pytest.mark.parametrize( "x, expected", [ ("12.543.642642,5345.34980", "12543.642642,5345.34980"), ("12.59443.642.642,53.4534980", "12.59443.642642,53.4534980"), # No change ("12543.642.642,5.34534980", "12543.642642,5.34534980"), ], ) @pytest.mark.usefixtures("with_locale_de_de") def test_input_string_transform_factory_handles_de_locale( x: str, expected: str ) -> None: input_string_transform_func = input_string_transform_factory(ns.LOCALE) assert input_string_transform_func(x) == expected @pytest.mark.parametrize( "alg, expected", [ (ns.LOCALE, "1543,753"), # Does nothing without FLOAT (ns.LOCALE | ns.FLOAT, "1543.753"), (ns.LOCALEALPHA, "1543,753"), # LOCALEALPHA won't do anything, need LOCALENUM ], ) @pytest.mark.usefixtures("with_locale_de_de") def test_input_string_transform_factory_handles_german_locale( alg: NSType, expected: str ) -> None: input_string_transform_func = input_string_transform_factory(alg) assert input_string_transform_func("1543,753") == expected @pytest.mark.usefixtures("with_locale_de_de") def test_input_string_transform_factory_does_nothing_with_non_num_input() -> None: input_string_transform_func = input_string_transform_factory(ns.LOCALE | ns.FLOAT) expected = "154s,t53" assert input_string_transform_func("154s,t53") == expected natsort-8.4.0/tests/test_main.py000066400000000000000000000157451444422413600167340ustar00rootroot00000000000000# -*- coding: utf-8 -*- """\ Test the natsort command-line tool functions. """ import re import sys from typing import Any, List, Union import pytest from hypothesis import given from hypothesis.strategies import DataObject, data, floats, integers, lists from natsort.__main__ import ( TypedArgs, check_filters, keep_entry_range, keep_entry_value, main, range_check, sort_and_print_entries, ) from pytest_mock import MockerFixture def test_main_passes_default_arguments_with_no_command_line_options( mocker: MockerFixture, ) -> None: p = mocker.patch("natsort.__main__.sort_and_print_entries") main("num-2", "num-6", "num-1") args = p.call_args[0][1] assert not args.paths assert args.filter is None assert args.reverse_filter is None assert args.exclude == [] assert not args.reverse assert args.number_type == "int" assert not args.signed assert args.exp assert not args.locale def test_main_passes_arguments_with_all_command_line_options( mocker: MockerFixture, ) -> None: arguments = ["--paths", "--reverse", "--locale"] arguments.extend(["--filter", "4", "10"]) arguments.extend(["--reverse-filter", "100", "110"]) arguments.extend(["--number-type", "float"]) arguments.extend(["--noexp", "--sign"]) arguments.extend(["--exclude", "34"]) arguments.extend(["--exclude", "35"]) arguments.extend(["num-2", "num-6", "num-1"]) p = mocker.patch("natsort.__main__.sort_and_print_entries") main(*arguments) args = p.call_args[0][1] assert args.paths assert args.filter == [(4.0, 10.0)] assert args.reverse_filter == [(100.0, 110.0)] assert args.exclude == [34, 35] assert args.reverse assert args.number_type == "float" assert args.signed assert not args.exp assert args.locale mock_print = "__builtin__.print" if sys.version[0] == "2" else "builtins.print" entries = [ "tmp/a57/path2", "tmp/a23/path1", "tmp/a1/path1", "tmp/a1 (1)/path1", "tmp/a130/path1", "tmp/a64/path1", "tmp/a64/path2", ] @pytest.mark.parametrize( "options, order", [ # Defaults, all options false # tmp/a1 (1)/path1 # tmp/a1/path1 # tmp/a23/path1 # tmp/a57/path2 # tmp/a64/path1 # tmp/a64/path2 # tmp/a130/path1 ([None, None, False, False, False], [3, 2, 1, 0, 5, 6, 4]), # Path option True # tmp/a1/path1 # tmp/a1 (1)/path1 # tmp/a23/path1 # tmp/a57/path2 # tmp/a64/path1 # tmp/a64/path2 # tmp/a130/path1 ([None, None, False, True, False], [2, 3, 1, 0, 5, 6, 4]), # Filter option keeps only within range # tmp/a23/path1 # tmp/a57/path2 # tmp/a64/path1 # tmp/a64/path2 ([[(20, 100)], None, False, False, False], [1, 0, 5, 6]), # Reverse filter, exclude in range # tmp/a1/path1 # tmp/a1 (1)/path1 # tmp/a130/path1 ([None, [(20, 100)], False, True, False], [2, 3, 4]), # Exclude given values with exclude list # tmp/a1/path1 # tmp/a1 (1)/path1 # tmp/a57/path2 # tmp/a64/path1 # tmp/a64/path2 ([None, None, [23, 130], True, False], [2, 3, 0, 5, 6]), # Reverse order # tmp/a130/path1 # tmp/a64/path2 # tmp/a64/path1 # tmp/a57/path2 # tmp/a23/path1 # tmp/a1 (1)/path1 # tmp/a1/path1 ([None, None, False, True, True], reversed([2, 3, 1, 0, 5, 6, 4])), ], ) def test_sort_and_print_entries( options: List[Any], order: List[int], mocker: MockerFixture ) -> None: p = mocker.patch(mock_print) sort_and_print_entries(entries, TypedArgs(*options)) e = [mocker.call(entries[i]) for i in order] p.assert_has_calls(e) # Each test has an "example" version for demonstrative purposes, # and a test that uses the hypothesis module. def test_range_check_returns_range_as_is_but_with_floats_example() -> None: assert range_check(10, 11) == (10.0, 11.0) assert range_check(6.4, 30) == (6.4, 30.0) @given(x=floats(allow_nan=False, min_value=-1e8, max_value=1e8) | integers(), d=data()) def test_range_check_returns_range_as_is_if_first_is_less_than_second( x: Union[int, float], d: DataObject ) -> None: # Pull data such that the first is less than the second. if isinstance(x, float): y = d.draw(floats(min_value=x + 1.0, max_value=1e9, allow_nan=False)) else: y = d.draw(integers(min_value=x + 1)) assert range_check(x, y) == (x, y) def test_range_check_raises_value_error_if_second_is_less_than_first_example() -> None: with pytest.raises(ValueError, match="low >= high"): range_check(7, 2) @given(x=floats(allow_nan=False), d=data()) def test_range_check_raises_value_error_if_second_is_less_than_first( x: float, d: DataObject ) -> None: # Pull data such that the first is greater than or equal to the second. y = d.draw(floats(max_value=x, allow_nan=False)) with pytest.raises(ValueError, match="low >= high"): range_check(x, y) def test_check_filters_returns_none_if_filter_evaluates_to_false() -> None: assert check_filters(()) is None def test_check_filters_returns_input_as_is_if_filter_is_valid_example() -> None: assert check_filters([(6, 7)]) == [(6, 7)] assert check_filters([(6, 7), (2, 8)]) == [(6, 7), (2, 8)] @given(x=lists(integers(), min_size=1), d=data()) def test_check_filters_returns_input_as_is_if_filter_is_valid( x: List[int], d: DataObject ) -> None: # ensure y is element-wise greater than x y = [d.draw(integers(min_value=val + 1)) for val in x] assert check_filters(list(zip(x, y))) == [(i, j) for i, j in zip(x, y)] def test_check_filters_raises_value_error_if_filter_is_invalid_example() -> None: with pytest.raises(ValueError, match="Error in --filter: low >= high"): check_filters([(7, 2)]) @given(x=lists(integers(), min_size=1), d=data()) def test_check_filters_raises_value_error_if_filter_is_invalid( x: List[int], d: DataObject ) -> None: # ensure y is element-wise less than or equal to x y = [d.draw(integers(max_value=val)) for val in x] with pytest.raises(ValueError, match="Error in --filter: low >= high"): check_filters(list(zip(x, y))) @pytest.mark.parametrize( "lows, highs, truth", # 1. Any portion is between the bounds => True. # 2. Any portion is between any bounds => True. # 3. No portion is between the bounds => False. [([0], [100], True), ([1, 88], [20, 90], True), ([1], [20], False)], ) def test_keep_entry_range(lows: List[int], highs: List[int], truth: bool) -> None: assert keep_entry_range("a56b23c89", lows, highs, int, re.compile(r"\d+")) is truth # 1. Values not in entry => True. 2. Values in entry => False. @pytest.mark.parametrize("values, truth", [([100, 45], True), ([23], False)]) def test_keep_entry_value(values: List[int], truth: bool) -> None: assert keep_entry_value("a56b23c89", values, int, re.compile(r"\d+")) is truth natsort-8.4.0/tests/test_natsort_key.py000066400000000000000000000026541444422413600203450ustar00rootroot00000000000000# -*- coding: utf-8 -*- """These test the utils.py functions.""" from typing import Any, List, NoReturn, Tuple, Union, cast from hypothesis import given from hypothesis.strategies import binary, floats, integers, lists, text from natsort.utils import natsort_key def str_func(x: Any) -> Tuple[str]: if isinstance(x, str): return (x,) else: raise TypeError("Not a str!") def fail(_: Any) -> NoReturn: raise AssertionError("This should never be reached!") @given(floats(allow_nan=False) | integers()) def test_natsort_key_with_numeric_input_takes_number_path(x: Union[float, int]) -> None: assert natsort_key(x, None, str_func, fail, lambda y: ("", y))[1] is x @given(binary().filter(bool)) def test_natsort_key_with_bytes_input_takes_bytes_path(x: bytes) -> None: assert natsort_key(x, None, str_func, lambda y: (y,), fail)[0] is x @given(text()) def test_natsort_key_with_text_input_takes_string_path(x: str) -> None: assert natsort_key(x, None, str_func, fail, fail)[0] is x @given(lists(elements=text(), min_size=1, max_size=10)) def test_natsort_key_with_nested_input_takes_nested_path(x: List[str]) -> None: assert natsort_key(x, None, str_func, fail, fail) == tuple((y,) for y in x) @given(text()) def test_natsort_key_with_key_argument_applies_key_before_processing(x: str) -> None: assert natsort_key(x, len, str_func, fail, lambda y: ("", cast(int, y)))[1] == len( x ) natsort-8.4.0/tests/test_natsort_keygen.py000066400000000000000000000134561444422413600210410ustar00rootroot00000000000000# -*- coding: utf-8 -*- """\ Here are a collection of examples of how this module can be used. See the README or the natsort homepage for more details. """ import os from typing import List, Tuple, Union import pytest from natsort import natsort_key, natsort_keygen, natsorted, ns from natsort.compat.locale import get_strxfrm, null_string_locale from natsort.ns_enum import NSType from natsort.utils import BytesTransform, FinalTransform from pytest_mock import MockerFixture @pytest.fixture def arbitrary_input() -> List[Union[str, float]]: return ["6A-5.034e+1", "/Folder (1)/Foo", 56.7] @pytest.fixture def bytes_input() -> bytes: return b"6A-5.034e+1" def test_natsort_keygen_demonstration() -> None: original_list = ["a50", "a51.", "a50.31", "a50.4", "a5.034e1", "a50.300"] copy_of_list = original_list[:] original_list.sort(key=natsort_keygen(alg=ns.F)) # natsorted uses the output of natsort_keygen under the hood. assert original_list == natsorted(copy_of_list, alg=ns.F) def test_natsort_key_public() -> None: assert natsort_key("a-5.034e2") == ("a-", 5, ".", 34, "e", 2) def test_natsort_keygen_with_invalid_alg_input_raises_value_error() -> None: # Invalid arguments give the correct response with pytest.raises(ValueError, match="'alg' argument"): natsort_keygen(None, "1") # type: ignore @pytest.mark.parametrize( "alg, expected", [(ns.DEFAULT, ("a-", 5, ".", 34, "e", 1)), (ns.FLOAT | ns.SIGNED, ("a", -50.34))], ) def test_natsort_keygen_returns_natsort_key_that_parses_input( alg: NSType, expected: Tuple[Union[str, int, float], ...] ) -> None: ns_key = natsort_keygen(alg=alg) assert ns_key("a-5.034e1") == expected @pytest.mark.parametrize( "alg, expected", [ ( ns.DEFAULT, (("", 6, "A-", 5, ".", 34, "e+", 1), ("/Folder (", 1, ")/Foo"), ("", 56.7)), ), ( ns.IGNORECASE, (("", 6, "a-", 5, ".", 34, "e+", 1), ("/folder (", 1, ")/foo"), ("", 56.7)), ), (ns.REAL, (("", 6.0, "A", -50.34), ("/Folder (", 1.0, ")/Foo"), ("", 56.7))), ( ns.LOWERCASEFIRST | ns.FLOAT | ns.NOEXP, ( ("", 6.0, "a-", 5.034, "E+", 1.0), ("/fOLDER (", 1.0, ")/fOO"), ("", 56.7), ), ), ( ns.PATH | ns.GROUPLETTERS, ( (("", 6, "aA--", 5, "..", 34, "ee++", 1),), ((2 * os.sep,), ("fFoollddeerr ((", 1, "))"), ("fFoooo",)), (("", 56.7),), ), ), ], ) def test_natsort_keygen_handles_arbitrary_input( arbitrary_input: List[Union[str, float]], alg: NSType, expected: FinalTransform ) -> None: ns_key = natsort_keygen(alg=alg) assert ns_key(arbitrary_input) == expected @pytest.mark.parametrize( "alg, expected", [ (ns.DEFAULT, (b"6A-5.034e+1",)), (ns.IGNORECASE, (b"6a-5.034e+1",)), (ns.REAL, (b"6A-5.034e+1",)), (ns.LOWERCASEFIRST | ns.FLOAT | ns.NOEXP, (b"6A-5.034e+1",)), (ns.PATH | ns.GROUPLETTERS, ((b"6A-5.034e+1",),)), ], ) def test_natsort_keygen_handles_bytes_input( bytes_input: bytes, alg: NSType, expected: BytesTransform ) -> None: ns_key = natsort_keygen(alg=alg) assert ns_key(bytes_input) == expected @pytest.mark.parametrize( "alg, expected, is_dumb", [ ( ns.LOCALE, ( (null_string_locale, 6, "A-", 5, ".", 34, "e+", 1), ("/Folder (", 1, ")/Foo"), (null_string_locale, 56.7), ), False, ), ( ns.LOCALE, ( (null_string_locale, 6, "aa--", 5, "..", 34, "eE++", 1), ("//ffoOlLdDeErR ((", 1, "))//ffoOoO"), (null_string_locale, 56.7), ), True, ), ( ns.LOCALE | ns.CAPITALFIRST, ( (("",), (null_string_locale, 6, "A-", 5, ".", 34, "e+", 1)), (("/",), ("/Folder (", 1, ")/Foo")), (("",), (null_string_locale, 56.7)), ), False, ), ], ) @pytest.mark.usefixtures("with_locale_en_us") def test_natsort_keygen_with_locale( mocker: MockerFixture, arbitrary_input: List[Union[str, float]], alg: NSType, expected: FinalTransform, is_dumb: bool, ) -> None: # First, apply the correct strxfrm function to the string values. strxfrm = get_strxfrm() expected_tmp = [list(sub) for sub in expected] try: for i in (2, 4, 6): expected_tmp[0][i] = strxfrm(expected_tmp[0][i]) for i in (0, 2): expected_tmp[1][i] = strxfrm(expected_tmp[1][i]) expected = tuple(tuple(sub) for sub in expected_tmp) except IndexError: # ns.LOCALE | ns.CAPITALFIRST expected_tmp = [[list(subsub) for subsub in sub] for sub in expected_tmp] for i in (2, 4, 6): expected_tmp[0][1][i] = strxfrm(expected_tmp[0][1][i]) for i in (0, 2): expected_tmp[1][1][i] = strxfrm(expected_tmp[1][1][i]) expected = tuple(tuple(tuple(subsub) for subsub in sub) for sub in expected_tmp) mocker.patch("natsort.compat.locale.dumb_sort", return_value=is_dumb) ns_key = natsort_keygen(alg=alg) assert ns_key(arbitrary_input) == expected @pytest.mark.parametrize( "alg, is_dumb", [(ns.LOCALE, False), (ns.LOCALE, True), (ns.LOCALE | ns.CAPITALFIRST, False)], ) @pytest.mark.usefixtures("with_locale_en_us") def test_natsort_keygen_with_locale_bytes( mocker: MockerFixture, bytes_input: bytes, alg: NSType, is_dumb: bool ) -> None: expected = (b"6A-5.034e+1",) mocker.patch("natsort.compat.locale.dumb_sort", return_value=is_dumb) ns_key = natsort_keygen(alg=alg) assert ns_key(bytes_input) == expected natsort-8.4.0/tests/test_natsorted.py000066400000000000000000000323751444422413600200110ustar00rootroot00000000000000# -*- coding: utf-8 -*- """\ Here are a collection of examples of how this module can be used. See the README or the natsort homepage for more details. """ import math from operator import itemgetter from pathlib import PurePosixPath from typing import List, Tuple, Union import pytest from natsort import as_utf8, natsorted, ns from natsort.ns_enum import NSType from pytest import raises @pytest.fixture def float_list() -> List[str]: return ["a50", "a51.", "a50.31", "a-50", "a50.4", "a5.034e1", "a50.300"] @pytest.fixture def fruit_list() -> List[str]: return ["Apple", "corn", "Corn", "Banana", "apple", "banana"] @pytest.fixture def mixed_list() -> List[Union[str, int, float]]: return ["Ä", "0", "ä", 3, "b", 1.5, "2", "Z"] def test_natsorted_numbers_in_ascending_order() -> None: given = ["a2", "a5", "a9", "a1", "a4", "a10", "a6"] expected = ["a1", "a2", "a4", "a5", "a6", "a9", "a10"] assert natsorted(given) == expected def test_natsorted_can_sort_as_signed_floats_with_exponents( float_list: List[str], ) -> None: expected = ["a-50", "a50", "a50.300", "a50.31", "a5.034e1", "a50.4", "a51."] assert natsorted(float_list, alg=ns.REAL) == expected @pytest.mark.parametrize( # UNSIGNED is default "alg", [ns.NOEXP | ns.FLOAT | ns.UNSIGNED, ns.NOEXP | ns.FLOAT], ) def test_natsorted_can_sort_as_unsigned_and_ignore_exponents( float_list: List[str], alg: NSType ) -> None: expected = ["a5.034e1", "a50", "a50.300", "a50.31", "a50.4", "a51.", "a-50"] assert natsorted(float_list, alg=alg) == expected # DEFAULT and INT are all equivalent. @pytest.mark.parametrize("alg", [ns.DEFAULT, ns.INT]) def test_natsorted_can_sort_as_unsigned_ints_which_is_default( float_list: List[str], alg: NSType ) -> None: expected = ["a5.034e1", "a50", "a50.4", "a50.31", "a50.300", "a51.", "a-50"] assert natsorted(float_list, alg=alg) == expected def test_natsorted_can_sort_as_signed_ints(float_list: List[str]) -> None: expected = ["a-50", "a5.034e1", "a50", "a50.4", "a50.31", "a50.300", "a51."] assert natsorted(float_list, alg=ns.SIGNED) == expected @pytest.mark.parametrize( "alg, expected", [(ns.UNSIGNED, ["a7", "a+2", "a-5"]), (ns.SIGNED, ["a-5", "a+2", "a7"])], ) def test_natsorted_can_sort_with_or_without_accounting_for_sign( alg: NSType, expected: List[str] ) -> None: given = ["a-5", "a7", "a+2"] assert natsorted(given, alg=alg) == expected def test_natsorted_can_sort_as_version_numbers() -> None: given = ["1.9.9a", "1.11", "1.9.9b", "1.11.4", "1.10.1"] expected = ["1.9.9a", "1.9.9b", "1.10.1", "1.11", "1.11.4"] assert natsorted(given) == expected def test_natsorted_can_sorts_paths_same_as_strings() -> None: paths = [ PurePosixPath("a/1/something"), PurePosixPath("a/2/something"), PurePosixPath("a/10/something"), ] assert [str(p) for p in natsorted(paths)] == natsorted([str(p) for p in paths]) @pytest.mark.parametrize( "alg, expected", [ (ns.DEFAULT, ["0", 1.5, "2", 3, "Ä", "Z", "ä", "b"]), (ns.NUMAFTER, ["Ä", "Z", "ä", "b", "0", 1.5, "2", 3]), ], ) def test_natsorted_handles_mixed_types( mixed_list: List[Union[str, int, float]], alg: NSType, expected: List[Union[str, int, float]], ) -> None: assert natsorted(mixed_list, alg=alg) == expected @pytest.mark.parametrize( "alg, expected", [ (ns.DEFAULT, [float("nan"), None, float("-inf"), 5, "25", 1e40, float("inf")]), (ns.NANLAST, [float("-inf"), 5, "25", 1e40, float("inf"), None, float("nan")]), ], ) def test_natsorted_consistent_ordering_with_nan_and_friends( alg: NSType, expected: List[Union[str, float, None, int]] ) -> None: sentinel = math.pi expected = [sentinel if x != x else x for x in expected] given: List[Union[str, float, None, int]] = [ float("inf"), float("-inf"), "25", 5, float("nan"), 1e40, None, ] result = natsorted(given, alg=alg) result = [sentinel if x != x else x for x in result] assert result == expected def test_natsorted_with_mixed_bytes_and_str_input_raises_type_error() -> None: with raises(TypeError, match="bytes"): natsorted(["ä", b"b"]) # ...unless you use as_utf (or some other decoder). assert natsorted(["ä", b"b"], key=as_utf8) == ["ä", b"b"] def test_natsorted_raises_type_error_for_non_iterable_input() -> None: with raises(TypeError, match="'int' object is not iterable"): natsorted(100) # type: ignore def test_natsorted_recurses_into_nested_lists() -> None: given = [["a1", "a5"], ["a1", "a40"], ["a10", "a1"], ["a2", "a5"]] expected = [["a1", "a5"], ["a1", "a40"], ["a2", "a5"], ["a10", "a1"]] assert natsorted(given) == expected def test_natsorted_applies_key_to_each_list_element_before_sorting_list() -> None: given = [("a", "num3"), ("b", "num5"), ("c", "num2")] expected = [("c", "num2"), ("a", "num3"), ("b", "num5")] assert natsorted(given, key=itemgetter(1)) == expected def test_natsorted_returns_list_in_reversed_order_with_reverse_option( float_list: List[str], ) -> None: expected = natsorted(float_list)[::-1] assert natsorted(float_list, reverse=True) == expected def test_natsorted_handles_filesystem_paths() -> None: given = [ "/p/Folder (10)/file.tar.gz", "/p/Folder (1)/file (1).tar.gz", "/p/Folder/file.x1.9.tar.gz", "/p/Folder (1)/file.tar.gz", "/p/Folder/file.x1.10.tar.gz", ] expected_correct = [ "/p/Folder/file.x1.10.tar.gz", "/p/Folder/file.x1.9.tar.gz", "/p/Folder (1)/file.tar.gz", "/p/Folder (1)/file (1).tar.gz", "/p/Folder (10)/file.tar.gz", ] expected_incorrect = [ "/p/Folder (1)/file (1).tar.gz", "/p/Folder (1)/file.tar.gz", "/p/Folder (10)/file.tar.gz", "/p/Folder/file.x1.10.tar.gz", "/p/Folder/file.x1.9.tar.gz", ] # Is incorrect by default. assert natsorted(given, alg=ns.FLOAT) == expected_incorrect # Need ns.PATH to make it correct. assert natsorted(given, alg=ns.FLOAT | ns.PATH) == expected_correct def test_natsorted_handles_numbers_and_filesystem_paths_simultaneously() -> None: # You can sort paths and numbers, not that you'd want to given: List[Union[str, int]] = ["/Folder (9)/file.exe", 43] expected: List[Union[str, int]] = [43, "/Folder (9)/file.exe"] assert natsorted(given, alg=ns.PATH) == expected def test_natsorted_path_extensions_heuristic() -> None: # https://github.com/SethMMorton/natsort/issues/145 given = [ "Try.Me.Bug - 09 - One.Two.Three.[text].mkv", "Try.Me.Bug - 07 - One.Two.5.[text].mkv", "Try.Me.Bug - 08 - One.Two.Three[text].mkv", ] expected = [ "Try.Me.Bug - 07 - One.Two.5.[text].mkv", "Try.Me.Bug - 08 - One.Two.Three[text].mkv", "Try.Me.Bug - 09 - One.Two.Three.[text].mkv", ] assert natsorted(given, alg=ns.PATH) == expected @pytest.mark.parametrize( "alg, expected", [ (ns.DEFAULT, ["Apple", "Banana", "Corn", "apple", "banana", "corn"]), (ns.IGNORECASE, ["Apple", "apple", "Banana", "banana", "corn", "Corn"]), (ns.LOWERCASEFIRST, ["apple", "banana", "corn", "Apple", "Banana", "Corn"]), (ns.GROUPLETTERS, ["Apple", "apple", "Banana", "banana", "Corn", "corn"]), (ns.G | ns.LF, ["apple", "Apple", "banana", "Banana", "corn", "Corn"]), ], ) def test_natsorted_supports_case_handling( alg: NSType, expected: List[str], fruit_list: List[str] ) -> None: assert natsorted(fruit_list, alg=alg) == expected @pytest.mark.parametrize( "alg, expected", [ (ns.DEFAULT, [("A5", "a6"), ("a3", "a1")]), (ns.LOWERCASEFIRST, [("a3", "a1"), ("A5", "a6")]), (ns.IGNORECASE, [("a3", "a1"), ("A5", "a6")]), ], ) def test_natsorted_supports_nested_case_handling( alg: NSType, expected: List[Tuple[str, str]] ) -> None: given = [("A5", "a6"), ("a3", "a1")] assert natsorted(given, alg=alg) == expected @pytest.mark.parametrize( "alg, expected", [ (ns.DEFAULT, ["apple", "Apple", "banana", "Banana", "corn", "Corn"]), (ns.CAPITALFIRST, ["Apple", "Banana", "Corn", "apple", "banana", "corn"]), (ns.LOWERCASEFIRST, ["Apple", "apple", "Banana", "banana", "Corn", "corn"]), (ns.C | ns.LF, ["apple", "banana", "corn", "Apple", "Banana", "Corn"]), ], ) @pytest.mark.usefixtures("with_locale_en_us") def test_natsorted_can_sort_using_locale( fruit_list: List[str], alg: NSType, expected: List[str] ) -> None: assert natsorted(fruit_list, alg=ns.LOCALE | alg) == expected @pytest.mark.usefixtures("with_locale_en_us") def test_natsorted_can_sort_locale_specific_numbers_en() -> None: given = ["c", "a5,467.86", "ä", "b", "a5367.86", "a5,6", "a5,50"] expected = ["a5,6", "a5,50", "a5367.86", "a5,467.86", "ä", "b", "c"] assert natsorted(given, alg=ns.LOCALE | ns.F) == expected @pytest.mark.usefixtures("with_locale_de_de") def test_natsorted_can_sort_locale_specific_numbers_de() -> None: given = ["c", "a5.467,86", "ä", "b", "a5367.86", "a5,6", "a5,50"] expected = ["a5,50", "a5,6", "a5367.86", "a5.467,86", "ä", "b", "c"] assert natsorted(given, alg=ns.LOCALE | ns.F) == expected @pytest.mark.usefixtures("with_locale_de_de") def test_natsorted_locale_bug_regression_test_109() -> None: # https://github.com/SethMMorton/natsort/issues/109 given = ["462166", "461761"] expected = ["461761", "462166"] assert natsorted(given, alg=ns.LOCALE) == expected @pytest.mark.usefixtures("with_locale_cs_cz") def test_natsorted_locale_bug_regression_test_140() -> None: # https://github.com/SethMMorton/natsort/issues/140 given = ["Aš", "Cheb", "Česko", "Cibulov", "Znojmo", "Žilina"] expected = ["Aš", "Cibulov", "Česko", "Cheb", "Znojmo", "Žilina"] assert natsorted(given, alg=ns.LOCALE) == expected @pytest.mark.parametrize( "alg, expected", [ (ns.DEFAULT, ["0", 1.5, "2", 3, "ä", "Ä", "b", "Z"]), (ns.NUMAFTER, ["ä", "Ä", "b", "Z", "0", 1.5, "2", 3]), (ns.UNGROUPLETTERS, ["0", 1.5, "2", 3, "Ä", "Z", "ä", "b"]), (ns.UG | ns.NA, ["Ä", "Z", "ä", "b", "0", 1.5, "2", 3]), # Adding PATH changes nothing. (ns.PATH, ["0", 1.5, "2", 3, "ä", "Ä", "b", "Z"]), (ns.PATH | ns.NUMAFTER, ["ä", "Ä", "b", "Z", "0", 1.5, "2", 3]), (ns.PATH | ns.UNGROUPLETTERS, ["0", 1.5, "2", 3, "Ä", "Z", "ä", "b"]), (ns.PATH | ns.UG | ns.NA, ["Ä", "Z", "ä", "b", "0", 1.5, "2", 3]), ], ) @pytest.mark.usefixtures("with_locale_en_us") def test_natsorted_handles_mixed_types_with_locale( mixed_list: List[Union[str, int, float]], alg: NSType, expected: List[Union[str, int, float]], ) -> None: assert natsorted(mixed_list, alg=ns.LOCALE | alg) == expected @pytest.mark.parametrize( "alg, expected", [ (ns.DEFAULT, ["73", "5039", "Banana", "apple", "corn", "~~~~~~"]), (ns.NUMAFTER, ["Banana", "apple", "corn", "~~~~~~", "73", "5039"]), ], ) def test_natsorted_sorts_an_odd_collection_of_strings( alg: NSType, expected: List[str] ) -> None: given = ["apple", "Banana", "73", "5039", "corn", "~~~~~~"] assert natsorted(given, alg=alg) == expected def test_natsorted_sorts_mixed_ascii_and_non_ascii_numbers() -> None: given = [ "1st street", "10th street", "2nd street", "2 street", "1 street", "1street", "11 street", "street 2", "street 1", "Street 11", "۲ street", "۱ street", "۱street", "۱۲street", "۱۱ street", "street ۲", "street ۱", "street ۱", "street ۱۲", "street ۱۱", ] expected = [ "1 street", "۱ street", "1st street", "1street", "۱street", "2 street", "۲ street", "2nd street", "10th street", "11 street", "۱۱ street", "۱۲street", "street 1", "street ۱", "street ۱", "street 2", "street ۲", "Street 11", "street ۱۱", "street ۱۲", ] assert natsorted(given, alg=ns.IGNORECASE) == expected def test_natsort_sorts_consistently_with_presort() -> None: # Demonstrate the problem: # Sorting is order-dependent for values that have different # string representations are equiavlent numerically. given = ["a01", "a1.4500", "a1", "a1.45"] expected = ["a01", "a1", "a1.4500", "a1.45"] result = natsorted(given, alg=ns.FLOAT) assert result == expected given = ["a1", "a1.45", "a01", "a1.4500"] expected = ["a1", "a01", "a1.45", "a1.4500"] result = natsorted(given, alg=ns.FLOAT) assert result == expected # The solution - use "presort" which will sort the # input by its string representation before sorting # with natsorted, which gives consitent results even # if the numeric representation is identical expected = ["a01", "a1", "a1.45", "a1.4500"] given = ["a01", "a1.4500", "a1", "a1.45"] result = natsorted(given, alg=ns.FLOAT | ns.PRESORT) assert result == expected given = ["a1", "a1.45", "a01", "a1.4500"] result = natsorted(given, alg=ns.FLOAT | ns.PRESORT) assert result == expected natsort-8.4.0/tests/test_natsorted_convenience.py000066400000000000000000000072171444422413600223620ustar00rootroot00000000000000# -*- coding: utf-8 -*- """\ Here are a collection of examples of how this module can be used. See the README or the natsort homepage for more details. """ from operator import itemgetter from typing import List import pytest from natsort import ( as_ascii, as_utf8, decoder, humansorted, index_humansorted, index_natsorted, index_realsorted, natsorted, ns, order_by_index, realsorted, ) @pytest.fixture def version_list() -> List[str]: return ["1.9.9a", "1.11", "1.9.9b", "1.11.4", "1.10.1"] @pytest.fixture def float_list() -> List[str]: return ["a50", "a51.", "a50.31", "a-50", "a50.4", "a5.034e1", "a50.300"] @pytest.fixture def fruit_list() -> List[str]: return ["Apple", "corn", "Corn", "Banana", "apple", "banana"] def test_decoder_returns_function_that_decodes_bytes_but_returns_other_as_is() -> None: func = decoder("latin1") str_obj = "bytes" int_obj = 14 assert func(b"bytes") == str_obj assert func(int_obj) is int_obj # returns as-is, same object ID assert func(str_obj) is str_obj # same object returned b/c only bytes has decode def test_as_ascii_converts_bytes_to_ascii() -> None: assert decoder("ascii")(b"bytes") == as_ascii(b"bytes") def test_as_utf8_converts_bytes_to_utf8() -> None: assert decoder("utf8")(b"bytes") == as_utf8(b"bytes") def test_realsorted_is_identical_to_natsorted_with_real_alg( float_list: List[str], ) -> None: assert realsorted(float_list) == natsorted(float_list, alg=ns.REAL) @pytest.mark.usefixtures("with_locale_en_us") def test_humansorted_is_identical_to_natsorted_with_locale_alg( fruit_list: List[str], ) -> None: assert humansorted(fruit_list) == natsorted(fruit_list, alg=ns.LOCALE) def test_index_natsorted_returns_integer_list_of_sort_order_for_input_list() -> None: given = ["num3", "num5", "num2"] other = ["foo", "bar", "baz"] index = index_natsorted(given) assert index == [2, 0, 1] assert [given[i] for i in index] == ["num2", "num3", "num5"] assert [other[i] for i in index] == ["baz", "foo", "bar"] def test_index_natsorted_reverse() -> None: given = ["num3", "num5", "num2"] assert index_natsorted(given, reverse=True) == index_natsorted(given)[::-1] def test_index_natsorted_applies_key_function_before_sorting() -> None: given = [("a", "num3"), ("b", "num5"), ("c", "num2")] expected = [2, 0, 1] assert index_natsorted(given, key=itemgetter(1)) == expected def test_index_natsorted_can_presort() -> None: expected = [2, 0, 3, 1] given = ["a1", "a1.4500", "a01", "a1.45"] result = index_natsorted(given, alg=ns.FLOAT | ns.PRESORT) assert result == expected def test_index_realsorted_is_identical_to_index_natsorted_with_real_alg( float_list: List[str], ) -> None: assert index_realsorted(float_list) == index_natsorted(float_list, alg=ns.REAL) @pytest.mark.usefixtures("with_locale_en_us") def test_index_humansorted_is_identical_to_index_natsorted_with_locale_alg( fruit_list: List[str], ) -> None: assert index_humansorted(fruit_list) == index_natsorted(fruit_list, alg=ns.LOCALE) def test_order_by_index_sorts_list_according_to_order_of_integer_list() -> None: given = ["num3", "num5", "num2"] index = [2, 0, 1] expected = [given[i] for i in index] assert expected == ["num2", "num3", "num5"] assert order_by_index(given, index) == expected def test_order_by_index_returns_generator_with_iter_true() -> None: given = ["num3", "num5", "num2"] index = [2, 0, 1] assert order_by_index(given, index, True) != [given[i] for i in index] assert list(order_by_index(given, index, True)) == [given[i] for i in index] natsort-8.4.0/tests/test_ns_enum.py000066400000000000000000000023411444422413600174400ustar00rootroot00000000000000import pytest from natsort import ns @pytest.mark.parametrize( "given, expected", [ ("FLOAT", 0x0001), ("SIGNED", 0x0002), ("NOEXP", 0x0004), ("PATH", 0x0008), ("LOCALEALPHA", 0x0010), ("LOCALENUM", 0x0020), ("IGNORECASE", 0x0040), ("LOWERCASEFIRST", 0x0080), ("GROUPLETTERS", 0x0100), ("UNGROUPLETTERS", 0x0200), ("NANLAST", 0x0400), ("COMPATIBILITYNORMALIZE", 0x0800), ("NUMAFTER", 0x1000), ("PRESORT", 0x2000), ("DEFAULT", 0x0000), ("INT", 0x0000), ("UNSIGNED", 0x0000), ("REAL", 0x0003), ("LOCALE", 0x0030), ("I", 0x0000), ("U", 0x0000), ("F", 0x0001), ("S", 0x0002), ("R", 0x0003), ("N", 0x0004), ("P", 0x0008), ("LA", 0x0010), ("LN", 0x0020), ("L", 0x0030), ("IC", 0x0040), ("LF", 0x0080), ("G", 0x0100), ("UG", 0x0200), ("C", 0x0200), ("CAPITALFIRST", 0x0200), ("NL", 0x0400), ("CN", 0x0800), ("NA", 0x1000), ("PS", 0x2000), ], ) def test_ns_enum(given: str, expected: int) -> None: assert ns[given] == expected natsort-8.4.0/tests/test_os_sorted.py000066400000000000000000000101541444422413600177760ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Testing for the OS sorting """ import platform import natsort import pytest try: import icu # noqa: F401 except ImportError: has_icu = False else: has_icu = True def test_os_sorted_compound() -> None: given = [ "/p/Folder (10)/file.tar.gz", "/p/Folder (1)/file (1).tar.gz", "/p/Folder/file.x1.9.tar.gz", "/p/Folder (2)/file.tar.gz", "/p/Folder (1)/file.tar.gz", "/p/Folder/file.x1.10.tar.gz", ] if platform.system() == "Windows": expected = [ "/p/Folder/file.x1.9.tar.gz", "/p/Folder/file.x1.10.tar.gz", "/p/Folder (1)/file (1).tar.gz", "/p/Folder (1)/file.tar.gz", "/p/Folder (2)/file.tar.gz", "/p/Folder (10)/file.tar.gz", ] else: expected = [ "/p/Folder/file.x1.9.tar.gz", "/p/Folder/file.x1.10.tar.gz", "/p/Folder (1)/file.tar.gz", "/p/Folder (1)/file (1).tar.gz", "/p/Folder (2)/file.tar.gz", "/p/Folder (10)/file.tar.gz", ] result = natsort.os_sorted(given) assert result == expected def test_os_sorted_misc_no_fail() -> None: natsort.os_sorted([9, 4.3, None, float("nan")]) def test_os_sorted_key() -> None: given = ["foo0", "foo2", "goo1"] expected = ["foo0", "goo1", "foo2"] result = natsort.os_sorted(given, key=lambda x: x.replace("g", "f")) assert result == expected def test_os_sorted_can_presort() -> None: given = ["a1", "a01"] expected = ["a01", "a1"] result = natsort.os_sorted(given, presort=True) assert result == expected # The following is a master list of things that might give trouble # when sorting like the file explorer. given_characters = [ "11111", "aaaaa", "foo0", "foo_0", "foo1", "foo2", "foo4", "foo10", "Foo3", ] given_special = [ "!", "#", "$", "%", "&", "'", "(", ")", "+", "+11111", "+aaaaa", ",", "-", ";", "=", "@", "[", "]", "^", "_", "`", "{", "}", "~", "§", "°", "´", "µ", "€", ] # The expceted values change based on the environment if platform.system() == "Windows": given = given_characters + given_special expected = [ "'", "-", "!", "#", "$", "%", "&", "(", ")", ",", ";", "@", "[", "]", "^", "_", "`", "{", "}", "~", "´", "€", "+", "+11111", "+aaaaa", "=", "§", "°", "µ", "11111", "aaaaa", "foo_0", "foo0", "foo1", "foo2", "Foo3", "foo4", "foo10", ] elif has_icu: given = given_characters + given_special expected = [ "_", "-", ",", ";", "!", "'", "(", ")", "[", "]", "{", "}", "§", "@", "&", "#", "%", "`", "´", "^", "°", "+", "+11111", "+aaaaa", "=", "~", "$", "€", "11111", "aaaaa", "foo_0", "foo0", "foo1", "foo2", "Foo3", "foo4", "foo10", "µ", ] else: # For non-ICU UNIX, the order is all over the place # from platform to platform, distribution to distribution. # It's not really possible to predict the order across all # the different OS. To work around this, we will exclude # the special characters from the sort. given = given_characters expected = [ "11111", "aaaaa", "foo0", "foo1", "foo2", "Foo3", "foo4", "foo10", "foo_0", ] @pytest.mark.usefixtures("with_locale_en_us") def test_os_sorted_corpus() -> None: result = natsort.os_sorted(given) print(result) assert result == expected natsort-8.4.0/tests/test_parse_bytes_function.py000066400000000000000000000014471444422413600222270ustar00rootroot00000000000000# -*- coding: utf-8 -*- """These test the utils.py functions.""" import pytest from hypothesis import given from hypothesis.strategies import binary from natsort.ns_enum import NSType, ns from natsort.utils import BytesTransformer, parse_bytes_factory @pytest.mark.parametrize( "alg, example_func", [ (ns.DEFAULT, lambda x: (x,)), (ns.IGNORECASE, lambda x: (x.lower(),)), # With PATH, it becomes a tested tuple. (ns.PATH, lambda x: ((x,),)), (ns.PATH | ns.IGNORECASE, lambda x: ((x.lower(),),)), ], ) @given(x=binary()) def test_parse_bytest_factory_makes_function_that_returns_tuple( x: bytes, alg: NSType, example_func: BytesTransformer ) -> None: parse_bytes_func = parse_bytes_factory(alg) assert parse_bytes_func(x) == example_func(x) natsort-8.4.0/tests/test_parse_number_function.py000066400000000000000000000036251444422413600223710ustar00rootroot00000000000000# -*- coding: utf-8 -*- """These test the utils.py functions.""" from typing import Optional, Tuple, Union import pytest from hypothesis import given from hypothesis.strategies import floats, integers from natsort.ns_enum import NSType, ns from natsort.utils import NumTransformer, parse_number_or_none_factory @pytest.mark.usefixtures("with_locale_en_us") @pytest.mark.parametrize( "alg, example_func", [ (ns.DEFAULT, lambda x: ("", x)), (ns.PATH, lambda x: (("", x),)), (ns.UNGROUPLETTERS | ns.LOCALE, lambda x: (("xx",), ("", x))), (ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, lambda x: ((("xx",), ("", x)),)), ], ) @given(x=floats(allow_nan=False, allow_infinity=False) | integers()) def test_parse_number_factory_makes_function_that_returns_tuple( x: Union[float, int], alg: NSType, example_func: NumTransformer ) -> None: parse_number_func = parse_number_or_none_factory(alg, "", "xx") assert parse_number_func(x) == example_func(x) @pytest.mark.parametrize( "alg, x, result", [ (ns.DEFAULT, 57, ("", 57)), ( ns.DEFAULT, float("nan"), ("", float("-inf"), "1"), ), # NaN transformed to -infinity ( ns.NANLAST, float("nan"), ("", float("+inf"), "3"), ), # NANLAST makes it +infinity (ns.DEFAULT, None, ("", float("-inf"), "2")), # None transformed to -infinity (ns.NANLAST, None, ("", float("+inf"), "2")), # NANLAST makes it +infinity (ns.DEFAULT, float("-inf"), ("", float("-inf"), "3")), (ns.NANLAST, float("+inf"), ("", float("+inf"), "1")), ], ) def test_parse_number_factory_treats_nan_and_none_special( alg: NSType, x: Optional[Union[float, int]], result: Tuple[str, Union[float, int]] ) -> None: parse_number_func = parse_number_or_none_factory(alg, "", "xx") assert parse_number_func(x) == result natsort-8.4.0/tests/test_parse_string_function.py000066400000000000000000000061761444422413600224130ustar00rootroot00000000000000# -*- coding: utf-8 -*- """These test the utils.py functions.""" import unicodedata from typing import Any, Callable, Iterable, List, Tuple, Union import pytest from hypothesis import given from hypothesis.strategies import floats, integers, lists, text from natsort.compat.fastnumbers import try_float from natsort.ns_enum import NSType, NS_DUMB, ns from natsort.utils import ( FinalTransform, NumericalRegularExpressions as NumRegex, StrParser, ) from natsort.utils import parse_string_factory class CustomTuple(Tuple[Any, ...]): """Used to ensure what is given during testing is what is returned.""" original: Any = None def input_transform(x: Any) -> Any: """Make uppercase.""" try: return x.upper() except AttributeError: return x def final_transform(x: Iterable[Any], original: str) -> FinalTransform: """Make the input a CustomTuple.""" t = CustomTuple(x) t.original = original return t def parse_string_func_factory(alg: NSType) -> StrParser: """A parse_string_factory result with sample arguments.""" sep = "" return parse_string_factory( alg, sep, NumRegex.int_nosign().split, input_transform, lambda x: try_float(x, map=True), final_transform, ) @given(x=floats() | integers()) def test_parse_string_factory_raises_type_error_if_given_number( x: Union[int, float] ) -> None: parse_string_func = parse_string_func_factory(ns.DEFAULT) with pytest.raises(TypeError): assert parse_string_func(x) # type: ignore # noinspection PyCallingNonCallable @pytest.mark.parametrize( "alg, orig_func", [ (ns.DEFAULT, lambda x: x.upper()), (ns.LOCALE, lambda x: x.upper()), (ns.LOCALE | NS_DUMB, lambda x: x), # This changes the "original" handling. ], ) @given( x=lists( elements=floats(allow_nan=False) | text() | integers(), min_size=1, max_size=10 ) ) @pytest.mark.usefixtures("with_locale_en_us") def test_parse_string_factory_invariance( x: List[Union[float, str, int]], alg: NSType, orig_func: Callable[[str], str] ) -> None: parse_string_func = parse_string_func_factory(alg) # parse_string_factory is the high-level combination of several dedicated # functions involved in splitting and manipulating a string. The details of # what those functions do is not relevant to testing parse_string_factory. # What is relevant is that the form of the output matches the invariant # that even elements are string and odd are numerical. That each component # function is doing what it should is tested elsewhere. value = "".join(map(str, x)) # Convert the input to a single string. result = parse_string_func(value) result_types = list(map(type, result)) expected_types = [str if i % 2 == 0 else float for i in range(len(result))] assert result_types == expected_types # The result is in our CustomTuple. assert isinstance(result, CustomTuple) # Original should have gone through the "input_transform" # which is uppercase in these tests. assert result.original == orig_func(unicodedata.normalize("NFD", value)) natsort-8.4.0/tests/test_regex.py000066400000000000000000000117201444422413600171070ustar00rootroot00000000000000# -*- coding: utf-8 -*- """These test the splitting regular expressions.""" from typing import List, Pattern import pytest from natsort import ns, numeric_regex_chooser from natsort.ns_enum import NSType from natsort.utils import NumericalRegularExpressions as NumRegex regex_names = { NumRegex.int_nosign(): "int_nosign", NumRegex.int_sign(): "int_sign", NumRegex.float_nosign_noexp(): "float_nosign_noexp", NumRegex.float_sign_noexp(): "float_sign_noexp", NumRegex.float_nosign_exp(): "float_nosign_exp", NumRegex.float_sign_exp(): "float_sign_exp", } # Regex Aliases (so lines stay a reasonable length. i_u = NumRegex.int_nosign() i_s = NumRegex.int_sign() f_u = NumRegex.float_nosign_noexp() f_s = NumRegex.float_sign_noexp() f_ue = NumRegex.float_nosign_exp() f_se = NumRegex.float_sign_exp() # Assemble a test suite of regular strings and their regular expression # splitting result. Organize by the input string. regex_tests = { "-123.45e+67": { i_u: ["-", "123", ".", "45", "e+", "67", ""], i_s: ["", "-123", ".", "45", "e", "+67", ""], f_u: ["-", "123.45", "e+", "67", ""], f_s: ["", "-123.45", "e", "+67", ""], f_ue: ["-", "123.45e+67", ""], f_se: ["", "-123.45e+67", ""], }, "a-123.45e+67b": { i_u: ["a-", "123", ".", "45", "e+", "67", "b"], i_s: ["a", "-123", ".", "45", "e", "+67", "b"], f_u: ["a-", "123.45", "e+", "67", "b"], f_s: ["a", "-123.45", "e", "+67", "b"], f_ue: ["a-", "123.45e+67", "b"], f_se: ["a", "-123.45e+67", "b"], }, "hello": { i_u: ["hello"], i_s: ["hello"], f_u: ["hello"], f_s: ["hello"], f_ue: ["hello"], f_se: ["hello"], }, "abc12.34.56-7def": { i_u: ["abc", "12", ".", "34", ".", "56", "-", "7", "def"], i_s: ["abc", "12", ".", "34", ".", "56", "", "-7", "def"], f_u: ["abc", "12.34", "", ".56", "-", "7", "def"], f_s: ["abc", "12.34", "", ".56", "", "-7", "def"], f_ue: ["abc", "12.34", "", ".56", "-", "7", "def"], f_se: ["abc", "12.34", "", ".56", "", "-7", "def"], }, "a1b2c3d4e5e6": { i_u: ["a", "1", "b", "2", "c", "3", "d", "4", "e", "5", "e", "6", ""], i_s: ["a", "1", "b", "2", "c", "3", "d", "4", "e", "5", "e", "6", ""], f_u: ["a", "1", "b", "2", "c", "3", "d", "4", "e", "5", "e", "6", ""], f_s: ["a", "1", "b", "2", "c", "3", "d", "4", "e", "5", "e", "6", ""], f_ue: ["a", "1", "b", "2", "c", "3", "d", "4e5", "e", "6", ""], f_se: ["a", "1", "b", "2", "c", "3", "d", "4e5", "e", "6", ""], }, "eleven۱۱eleven11eleven১১": { # All of these are the decimal 11 i_u: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""], i_s: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""], f_u: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""], f_s: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""], f_ue: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""], f_se: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""], }, "12①②ⅠⅡ⅓": { # Two decimals, Two digits, Two numerals, fraction i_u: ["", "12", "", "①", "", "②", "ⅠⅡ⅓"], i_s: ["", "12", "", "①", "", "②", "ⅠⅡ⅓"], f_u: ["", "12", "", "①", "", "②", "", "Ⅰ", "", "Ⅱ", "", "⅓", ""], f_s: ["", "12", "", "①", "", "②", "", "Ⅰ", "", "Ⅱ", "", "⅓", ""], f_ue: ["", "12", "", "①", "", "②", "", "Ⅰ", "", "Ⅱ", "", "⅓", ""], f_se: ["", "12", "", "①", "", "②", "", "Ⅰ", "", "Ⅱ", "", "⅓", ""], }, } # From the above collections, create the parametrized tests and labels. regex_params = [ (given, expected, regex) for given, values in regex_tests.items() for regex, expected in values.items() ] labels = ["{}-{}".format(given, regex_names[regex]) for given, _, regex in regex_params] @pytest.mark.parametrize("x, expected, regex", regex_params, ids=labels) def test_regex_splits_correctly( x: str, expected: List[str], regex: Pattern[str] ) -> None: # noinspection PyUnresolvedReferences assert regex.split(x) == expected @pytest.mark.parametrize( "given, expected", [ (ns.INT, NumRegex.int_nosign()), (ns.INT | ns.UNSIGNED, NumRegex.int_nosign()), (ns.INT | ns.SIGNED, NumRegex.int_sign()), (ns.INT | ns.NOEXP, NumRegex.int_nosign()), (ns.FLOAT, NumRegex.float_nosign_exp()), (ns.FLOAT | ns.UNSIGNED, NumRegex.float_nosign_exp()), (ns.FLOAT | ns.SIGNED, NumRegex.float_sign_exp()), (ns.FLOAT | ns.NOEXP, NumRegex.float_nosign_noexp()), (ns.FLOAT | ns.SIGNED | ns.NOEXP, NumRegex.float_sign_noexp()), (ns.FLOAT | ns.UNSIGNED | ns.NOEXP, NumRegex.float_nosign_noexp()), ], ) def test_regex_chooser(given: NSType, expected: Pattern[str]) -> None: assert numeric_regex_chooser(given) == expected.pattern[1:-1] # remove parens natsort-8.4.0/tests/test_string_component_transform_factory.py000066400000000000000000000063171444422413600252150ustar00rootroot00000000000000# -*- coding: utf-8 -*- """These test the utils.py functions.""" from functools import partial from typing import Any, Callable, FrozenSet, Union import pytest from hypothesis import assume, example, given from hypothesis.strategies import floats, integers, text from natsort.compat.fastnumbers import try_float, try_int from natsort.compat.locale import get_strxfrm from natsort.ns_enum import NSType, NS_DUMB, ns from natsort.utils import groupletters, string_component_transform_factory # There are some unicode values that are known failures with the builtin locale # library on OSX and some other BSD-based systems that has nothing to do with # natsort (a ValueError is raised by strxfrm). Let's filter them out. try: bad_uni_chars = frozenset(chr(x) for x in range(0x10FEFD, 0x10FFFF + 1)) except ValueError: # Narrow unicode build... no worries. bad_uni_chars = frozenset() def no_bad_uni_chars(x: str, _bad_chars: FrozenSet[str] = bad_uni_chars) -> bool: """Ensure text does not contain bad unicode characters""" return not any(y in _bad_chars for y in x) def no_null(x: str) -> bool: """Ensure text does not contain a null character.""" return "\0" not in x def input_is_ok_with_locale(x: str) -> bool: """Ensure this input won't cause locale.strxfrm to barf""" # Bad input can cause an OSError if the OS doesn't support the value try: get_strxfrm()(x) except OSError: return False else: return True @pytest.mark.parametrize( "alg, example_func", [ (ns.INT, partial(try_int, map=True)), (ns.DEFAULT, partial(try_int, map=True)), (ns.FLOAT, partial(try_float, map=True, nan=float("-inf"))), (ns.FLOAT | ns.NANLAST, partial(try_float, map=True, nan=float("+inf"))), (ns.GROUPLETTERS, partial(try_int, map=True, on_fail=groupletters)), (ns.LOCALE, partial(try_int, map=True, on_fail=lambda x: get_strxfrm()(x))), ( ns.GROUPLETTERS | ns.LOCALE, partial( try_int, map=True, on_fail=lambda x: get_strxfrm()(groupletters(x)) ), ), ( NS_DUMB | ns.LOCALE, partial( try_int, map=True, on_fail=lambda x: get_strxfrm()(groupletters(x)) ), ), ( ns.GROUPLETTERS | ns.LOCALE | ns.FLOAT | ns.NANLAST, partial( try_float, map=True, on_fail=lambda x: get_strxfrm()(groupletters(x)), nan=float("+inf"), ), ), ], ) @example(x=float("nan")) @example(x="Å") @given( x=integers() | floats() | text().filter(bool).filter(no_bad_uni_chars).filter(no_null) ) @pytest.mark.usefixtures("with_locale_en_us") def test_string_component_transform_factory( x: Union[str, float, int], alg: NSType, example_func: Callable[[str], Any] ) -> None: string_component_transform_func = string_component_transform_factory(alg) x = str(x) assume(input_is_ok_with_locale(x)) try: assert list(string_component_transform_func(x)) == list(example_func(x)) except ValueError as e: # handle broken locale lib on OSX. if "is not in range" not in str(e): raise natsort-8.4.0/tests/test_unicode_numbers.py000066400000000000000000000044461444422413600211650ustar00rootroot00000000000000# -*- coding: utf-8 -*- """\ Test the Unicode numbers module. """ import unicodedata import warnings from natsort.unicode_numbers import ( decimal_chars, decimals, digit_chars, digits, digits_no_decimals, numeric, numeric_chars, numeric_no_decimals, ) from natsort.unicode_numeric_hex import numeric_hex def test_numeric_chars_contains_only_valid_unicode_numeric_characters() -> None: for a in numeric_chars: assert unicodedata.numeric(a, None) is not None def test_digit_chars_contains_only_valid_unicode_digit_characters() -> None: for a in digit_chars: assert unicodedata.digit(a, None) is not None def test_decimal_chars_contains_only_valid_unicode_decimal_characters() -> None: for a in decimal_chars: assert unicodedata.decimal(a, None) is not None def test_numeric_chars_contains_all_valid_unicode_numeric_and_digit_characters() -> ( None ): set_numeric_chars = set(numeric_chars) set_digit_chars = set(digit_chars) set_decimal_chars = set(decimal_chars) assert set_decimal_chars.isdisjoint(digits_no_decimals) assert set_digit_chars.issuperset(digits_no_decimals) assert set_decimal_chars.isdisjoint(numeric_no_decimals) assert set_numeric_chars.issuperset(numeric_no_decimals) def test_missing_unicode_number_in_collection() -> None: ok = True set_numeric_hex = set(numeric_hex) for i in range(0x110000): try: a = chr(i) except ValueError: break if a in "0123456789": continue if unicodedata.numeric(a, None) is not None: if i not in set_numeric_hex: ok = False if not ok: warnings.warn( """\ Not all numeric unicode characters are represented in natsort/unicode_numeric_hex.py This can be addressed by running dev/generate_new_unicode_numbers.py with the current \ version of Python. It would be much appreciated if you would submit a Pull Request to the natsort repository (https://github.com/SethMMorton/natsort) with the resulting change. """, stacklevel=2, ) def test_combined_string_contains_all_characters_in_list() -> None: assert numeric == "".join(numeric_chars) assert digits == "".join(digit_chars) assert decimals == "".join(decimal_chars) natsort-8.4.0/tests/test_utils.py000066400000000000000000000145211444422413600171370ustar00rootroot00000000000000# -*- coding: utf-8 -*- """These test the utils.py functions.""" import os import pathlib import string from itertools import chain from operator import neg as op_neg from typing import List, Pattern, Tuple, Union import pytest from hypothesis import given from hypothesis.strategies import integers, lists, sampled_from, text from natsort import utils from natsort.ns_enum import NSType, ns def test_do_decoding_decodes_bytes_string_to_unicode() -> None: assert type(utils.do_decoding(b"bytes", "ascii")) is str assert utils.do_decoding(b"bytes", "ascii") == "bytes" assert utils.do_decoding(b"bytes", "ascii") == b"bytes".decode("ascii") @pytest.mark.parametrize( "alg, expected", [ (ns.I, utils.NumericalRegularExpressions.int_nosign()), (ns.I | ns.N, utils.NumericalRegularExpressions.int_nosign()), (ns.I | ns.S, utils.NumericalRegularExpressions.int_sign()), (ns.I | ns.S | ns.N, utils.NumericalRegularExpressions.int_sign()), (ns.F, utils.NumericalRegularExpressions.float_nosign_exp()), (ns.F | ns.N, utils.NumericalRegularExpressions.float_nosign_noexp()), (ns.F | ns.S, utils.NumericalRegularExpressions.float_sign_exp()), (ns.F | ns.S | ns.N, utils.NumericalRegularExpressions.float_sign_noexp()), ], ) def test_regex_chooser_returns_correct_regular_expression_object( alg: NSType, expected: Pattern[str] ) -> None: assert utils.regex_chooser(alg).pattern == expected.pattern @pytest.mark.parametrize( "alg, value_or_alias", [ # Defaults (ns.DEFAULT, 0), (ns.INT, 0), (ns.UNSIGNED, 0), # Aliases (ns.INT, ns.I), (ns.UNSIGNED, ns.U), (ns.FLOAT, ns.F), (ns.SIGNED, ns.S), (ns.NOEXP, ns.N), (ns.PATH, ns.P), (ns.LOCALEALPHA, ns.LA), (ns.LOCALENUM, ns.LN), (ns.LOCALE, ns.L), (ns.IGNORECASE, ns.IC), (ns.LOWERCASEFIRST, ns.LF), (ns.GROUPLETTERS, ns.G), (ns.UNGROUPLETTERS, ns.UG), (ns.CAPITALFIRST, ns.C), (ns.UNGROUPLETTERS, ns.CAPITALFIRST), (ns.NANLAST, ns.NL), (ns.COMPATIBILITYNORMALIZE, ns.CN), (ns.NUMAFTER, ns.NA), # Convenience (ns.LOCALE, ns.LOCALEALPHA | ns.LOCALENUM), (ns.REAL, ns.FLOAT | ns.SIGNED), ], ) def test_ns_enum_values_and_aliases(alg: NSType, value_or_alias: NSType) -> None: assert alg == value_or_alias def test_chain_functions_is_a_no_op_if_no_functions_are_given() -> None: x = 2345 assert utils.chain_functions([])(x) is x def test_chain_functions_does_one_function_if_one_function_is_given() -> None: x = "2345" assert utils.chain_functions([len])(x) == 4 def test_chain_functions_combines_functions_in_given_order() -> None: x = 2345 assert utils.chain_functions([str, len, op_neg])(x) == -len(str(x)) # Each test has an "example" version for demonstrative purposes, # and a test that uses the hypothesis module. def test_groupletters_gives_letters_with_lowercase_letter_transform_example() -> None: assert utils.groupletters("HELLO") == "hHeElLlLoO" assert utils.groupletters("hello") == "hheelllloo" @given(text().filter(bool)) def test_groupletters_gives_letters_with_lowercase_letter_transform( x: str, ) -> None: assert utils.groupletters(x) == "".join( chain.from_iterable([y.casefold(), y] for y in x) ) def test_sep_inserter_does_nothing_if_no_numbers_example() -> None: assert list(utils.sep_inserter(iter(["a", "b", "c"]), "")) == ["a", "b", "c"] assert list(utils.sep_inserter(iter(["a"]), "")) == ["a"] def test_sep_inserter_does_nothing_if_only_one_number_example() -> None: assert list(utils.sep_inserter(iter(["a", 5]), "")) == ["a", 5] def test_sep_inserter_inserts_separator_string_between_two_numbers_example() -> None: assert list(utils.sep_inserter(iter([5, 9]), "")) == ["", 5, "", 9] @given(lists(elements=text().filter(bool) | integers(), min_size=3)) def test_sep_inserter_inserts_separator_between_two_numbers( x: List[Union[str, int]] ) -> None: # Rather than just replicating the results in a different algorithm, # validate that the "shape" of the output is as expected. result = list(utils.sep_inserter(iter(x), "")) for i, pos in enumerate(result[1:-1], 1): if pos == "": assert isinstance(result[i - 1], int) assert isinstance(result[i + 1], int) def test_path_splitter_splits_path_string_by_sep_example() -> None: given = "/this/is/a/path" expected = (os.sep, "this", "is", "a", "path") assert tuple(utils.path_splitter(given)) == tuple(expected) assert tuple(utils.path_splitter(pathlib.Path(given))) == tuple(expected) @pytest.mark.parametrize("given", [".", "./", "./././", ".\\"]) def test_path_splitter_handles_dot_properly(given: str) -> None: # https://github.com/SethMMorton/natsort/issues/142 expected = (os.path.normpath(given),) assert tuple(utils.path_splitter(given)) == expected assert tuple(utils.path_splitter(pathlib.Path(given))) == expected @given(lists(sampled_from(string.ascii_letters), min_size=2).filter(all)) def test_path_splitter_splits_path_string_by_sep(x: List[str]) -> None: z = str(pathlib.Path(*x)) assert tuple(utils.path_splitter(z)) == tuple(pathlib.Path(z).parts) @pytest.mark.parametrize( "given, expected", [ ( "/this/is/a/path/file.x1.10.tar.gz", (os.sep, "this", "is", "a", "path", "file.x1.10", ".tar", ".gz"), ), ( "/this/is/a/path/file.x1.10.tar", (os.sep, "this", "is", "a", "path", "file.x1.10", ".tar"), ), ( "/this/is/a/path/file.x1.threethousand.tar", (os.sep, "this", "is", "a", "path", "file.x1.threethousand", ".tar"), ), ], ) def test_path_splitter_splits_path_string_by_sep_and_removes_extension_example( given: str, expected: Tuple[str, ...] ) -> None: assert tuple(utils.path_splitter(given)) == tuple(expected) @given(lists(sampled_from(string.ascii_letters), min_size=3).filter(all)) def test_path_splitter_splits_path_string_by_sep_and_removes_extension( x: List[str], ) -> None: z = str(pathlib.Path(*x[:-2])) + "." + x[-1] y = tuple(pathlib.Path(z).parts) assert tuple(utils.path_splitter(z)) == y[:-1] + ( pathlib.Path(z).stem, pathlib.Path(z).suffix, ) natsort-8.4.0/tox.ini000066400000000000000000000045471444422413600145460ustar00rootroot00000000000000# Tox (http://tox.testrun.org/) is a tool for running tests # in multiple virtualenvs. This configuration file will run the # test suite on all supported python versions. To use it, "pip install tox" # and then run "tox" from this directory. [tox] envlist = flake8, mypy, py37, py38, py39, py310, py311 # Other valid environments are: # docs # release # clean # bump # Don't error out if a user hasn't installed all python versions. skip_missing_interpreters = true [testenv] passenv = WITH_EXTRAS deps = coverage pytest pytest-cov pytest-mock hypothesis extras = {env:WITH_EXTRAS:} commands = # Doctests {envpython} -m doctest -o IGNORE_EXCEPTION_DETAIL docs/howitworks.rst pytest README.rst docs/examples.rst pytest --doctest-modules {envsitepackagesdir}/natsort # Full test suite. Allow the user to pass command-line objects. pytest --hypothesis-profile=slow-tests --tb=short --cov {envsitepackagesdir}/natsort --cov-report term-missing {posargs:} # Check code quality. [testenv:flake8] deps = flake8 flake8-import-order flake8-bugbear pep8-naming check-manifest twine commands = {envpython} setup.py sdist pip wheel . -w dist flake8 check-manifest --ignore ".github*,*.md,.coveragerc" twine check dist/* skip_install = true # Type checking [testenv:mypy] deps = mypy hypothesis pytest pytest-mock fastnumbers>=5.0.1 typing_extensions commands = mypy --strict natsort tests skip_install = true # Build documentation. # sphinx and sphinx_rtd_theme not in docs/requirements.txt because they # will already be installed on readthedocs. [testenv:docs] deps = sphinx sphinx_rtd_theme commands = {envpython} setup.py build_sphinx skip_install = true # Bump version [testenv:bump] passenv = HOME deps = bump2version commands = {envpython} dev/bump.py {posargs:} skip_install = true # Release the code to PyPI [testenv:release] deps = twine commands = {envpython} setup.py sdist --format=gztar bdist_wheel twine upload --skip-existing dist/* skip_install = true # Clean up the working directory [testenv:clean] deps = commands = {envpython} dev/clean.py skip_install = true # Get GitHub actions to run the correct tox environment [gh-actions] python = 3.7: py37 3.8: py38 3.9: py39 3.10: py310 3.11: py311