pax_global_header00006660000000000000000000000064150712366560014525gustar00rootroot0000000000000052 comment=d5499f641f52672da11553bb091a6c5a491f02d3 trollsift-1.0.0/000077500000000000000000000000001507123665600135455ustar00rootroot00000000000000trollsift-1.0.0/.gitattributes000066400000000000000000000000421507123665600164340ustar00rootroot00000000000000trollsift/version.py export-subst trollsift-1.0.0/.github/000077500000000000000000000000001507123665600151055ustar00rootroot00000000000000trollsift-1.0.0/.github/dependabot.yml000066400000000000000000000010011507123665600177250ustar00rootroot00000000000000# To get started with Dependabot version updates, you'll need to specify which # package ecosystems to update and where the package manifests are located. # Please see the documentation for all configuration options: # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates version: 2 updates: - package-ecosystem: "github-actions" # See documentation for possible values directory: "/" # Location of package manifests schedule: interval: "weekly" trollsift-1.0.0/.github/workflows/000077500000000000000000000000001507123665600171425ustar00rootroot00000000000000trollsift-1.0.0/.github/workflows/ci.yaml000066400000000000000000000022411507123665600204200ustar00rootroot00000000000000name: CI on: [push, pull_request] jobs: test: runs-on: ${{ matrix.os }} strategy: fail-fast: true matrix: os: ["windows-latest", "ubuntu-latest", "macos-latest"] python-version: ["3.10", "3.11", "3.13"] env: PYTHON_VERSION: ${{ matrix.python-version }} OS: ${{ matrix.os }} ACTIONS_ALLOW_UNSECURE_COMMANDS: true steps: - name: Checkout source uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | pip install -U codecov pytest pytest-cov mypy - name: Install trollsift run: | pip install --no-deps -e . - name: Run unit tests run: | pytest --cov=trollsift trollsift/tests --cov-report=xml - name: Run mypy run: | mypy trollsift - name: Upload unittest coverage to Codecov uses: codecov/codecov-action@v5 with: flags: unittests files: ./coverage.xml env_vars: OS,PYTHON_VERSION trollsift-1.0.0/.github/workflows/deploy-sdist.yaml000066400000000000000000000010411507123665600224420ustar00rootroot00000000000000name: Deploy sdist on: release: types: - published jobs: sdist: runs-on: ubuntu-latest steps: - name: Checkout source uses: actions/checkout@v5 - name: Create sdist shell: bash -l {0} run: | python -m pip install -q build python -m build - name: Publish package to PyPI if: github.event.action == 'published' uses: pypa/gh-action-pypi-publish@v1.13.0 with: user: __token__ password: ${{ secrets.pypi_password }} trollsift-1.0.0/.gitignore000066400000000000000000000006031507123665600155340ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST trollsift/version.py # Jupyter Notebook .ipynb_checkpoints doc/build/* .coverage trollsift-1.0.0/.pre-commit-config.yaml000066400000000000000000000010101507123665600200160ustar00rootroot00000000000000exclude: '^$' fail_fast: false repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: 'v0.13.3' hooks: - id: ruff args: ["--fix"] - id: ruff-format - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml args: [--unsafe] ci: # To trigger manually, comment on a pull request with "pre-commit.ci autofix" autofix_prs: false autoupdate_schedule: "monthly" trollsift-1.0.0/.readthedocs.yaml000066400000000000000000000007001507123665600167710ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 # Build documentation in the docs/ directory with Sphinx sphinx: configuration: doc/source/conf.py fail_on_warning: true # Optionally build your docs in additional formats such as PDF and ePub formats: all build: os: "ubuntu-20.04" tools: python: "mambaforge-4.10" conda: environment: doc/rtd_environment.yaml trollsift-1.0.0/AUTHORS.md000066400000000000000000000013331507123665600152140ustar00rootroot00000000000000# Trollsift developers The following people have made contributions to this project: - [David Hoese (djhoese)](https://github.com/djhoese) - [Panu Lahtinen (pnuu)](https://github.com/pnuu) - [Martin Raspaud (mraspaud)](https://github.com/mraspaud) - [Hrobjartur Thorsteinsson (thorsteinssonh)](https://github.com/thorsteinssonh) - [Stephan Finkensieper (sfinkens)](https://github.com/sfinkens) - [Paulo Medeiros (paulovcmedeiros)](https://github.com/paulovcmedeiros) - [Regan Koopmans (Regan-Koopmans)](https://github.com/Regan-Koopmans) trollsift-1.0.0/CHANGELOG.md000066400000000000000000000174751507123665600153740ustar00rootroot00000000000000## Version 1.0.0 (2025/10/07) ### Issues Closed * [Issue 85](https://github.com/pytroll/trollsift/issues/85) - Relicense to Apache Version 2 ([PR 87](https://github.com/pytroll/trollsift/pull/87) by [@mraspaud](https://github.com/mraspaud)) * [Issue 84](https://github.com/pytroll/trollsift/issues/84) - Missing `trollsift.formatter` in v0.6.0 In this release 2 issues were closed. ### Pull Requests Merged #### Features added * [PR 87](https://github.com/pytroll/trollsift/pull/87) - Relicense to apache v2.0 ([85](https://github.com/pytroll/trollsift/issues/85)) In this release 1 pull request was closed. ## Version 0.6.0 (2025/09/03) ### Issues Closed * [Issue 7](https://github.com/pytroll/trollsift/issues/7) - Switch accepted arguments for parser methods to args and kwargs In this release 1 issue was closed. ### Pull Requests Merged #### Bugs fixed * [PR 81](https://github.com/pytroll/trollsift/pull/81) - Add pre-commit with ruff and ruff format #### Features added * [PR 80](https://github.com/pytroll/trollsift/pull/80) - Add type annotations In this release 2 pull requests were closed. ## Version 0.5.3 (2024/12/03) ### Pull Requests Merged #### Bugs fixed * [PR 76](https://github.com/pytroll/trollsift/pull/76) - Drop support for Python 3.8 and below In this release 1 pull request was closed. ## Version 0.5.2 (2024/12/02) ### Pull Requests Merged #### Features added * [PR 75](https://github.com/pytroll/trollsift/pull/75) - Switch to pyproject.toml In this release 1 pull request was closed. ## Version 0.5.1 (2023/10/09) ### Pull Requests Merged #### Bugs fixed * [PR 49](https://github.com/pytroll/trollsift/pull/49) - Update versioneer to stop using deprecated distutils module. #### Features added * [PR 56](https://github.com/pytroll/trollsift/pull/56) - Add readthedocs configuration file In this release 2 pull requests were closed. ## Version 0.5.0 (2022/11/21) ### Issues Closed * [Issue 45](https://github.com/pytroll/trollsift/issues/45) - Provide simple access to defined keys of a parser instance ([PR 46](https://github.com/pytroll/trollsift/pull/46) by [@carloshorn](https://github.com/carloshorn)) * [Issue 37](https://github.com/pytroll/trollsift/issues/37) - Global instances of formatters ([PR 38](https://github.com/pytroll/trollsift/pull/38) by [@Regan-Koopmans](https://github.com/Regan-Koopmans)) * [Issue 36](https://github.com/pytroll/trollsift/issues/36) - Alignment marker is not optional for numbers when it should * [Issue 34](https://github.com/pytroll/trollsift/issues/34) - Trollsift doesn't parse hex numbers ([PR 35](https://github.com/pytroll/trollsift/pull/35) by [@mraspaud](https://github.com/mraspaud)) In this release 4 issues were closed. ### Pull Requests Merged #### Bugs fixed * [PR 38](https://github.com/pytroll/trollsift/pull/38) - Replace global RegexFormatter with memoized function ([37](https://github.com/pytroll/trollsift/issues/37)) * [PR 35](https://github.com/pytroll/trollsift/pull/35) - Add hex, octal, and binary parsing ([34](https://github.com/pytroll/trollsift/issues/34)) #### Features added * [PR 46](https://github.com/pytroll/trollsift/pull/46) - Add keys method to Parser class ([45](https://github.com/pytroll/trollsift/issues/45)) In this release 3 pull requests were closed. ## Version 0.4.0 (2022/02/03) ### Issues Closed * [Issue 30](https://github.com/pytroll/trollsift/issues/30) - Problems with padding syntax ([PR 33](https://github.com/pytroll/trollsift/pull/33) by [@paulovcmedeiros](https://github.com/paulovcmedeiros)) In this release 1 issue was closed. ### Pull Requests Merged #### Bugs fixed * [PR 33](https://github.com/pytroll/trollsift/pull/33) - Fix problems with type='' in string padding syntax ([30](https://github.com/pytroll/trollsift/issues/30)) #### Features added * [PR 32](https://github.com/pytroll/trollsift/pull/32) - Add 'allow_partial' keyword to compose * [PR 31](https://github.com/pytroll/trollsift/pull/31) - Change tested Python versions to 3.8, 3.9 and 3.10 * [PR 24](https://github.com/pytroll/trollsift/pull/24) - Skip Python2 support and require python 3.6 or higher ## Version 0.3.5 (2021/02/15) ### Issues Closed * [Issue 27](https://github.com/pytroll/trollsift/issues/27) - Parsing zero padded floats * [Issue 26](https://github.com/pytroll/trollsift/issues/26) - MNT: Stop using ci-helpers in appveyor.yml * [Issue 23](https://github.com/pytroll/trollsift/issues/23) - Bug when parsing leap day when you dont have year * [Issue 20](https://github.com/pytroll/trollsift/issues/20) - Special conversion specifiers do not work ([PR 21](https://github.com/pytroll/trollsift/pull/21)) In this release 4 issues were closed. ### Pull Requests Merged #### Bugs fixed * [PR 21](https://github.com/pytroll/trollsift/pull/21) - Fix typo in string formatting usage example and drop Python 2.7 tests ([20](https://github.com/pytroll/trollsift/issues/20)) #### Features added * [PR 29](https://github.com/pytroll/trollsift/pull/29) - GitHub actions * [PR 25](https://github.com/pytroll/trollsift/pull/25) - Add lru_cache to parsing for improved performance In this release 3 pull requests were closed. ## Version 0.3.4 (2019/12/18) ### Issues Closed * [Issue 18](https://github.com/pytroll/trollsift/issues/18) - Different parsing allignment behaviour between 0.2.* and 0.3.* ([PR 19](https://github.com/pytroll/trollsift/pull/19)) In this release 1 issue was closed. ### Pull Requests Merged #### Bugs fixed * [PR 19](https://github.com/pytroll/trollsift/pull/19) - Fix regex parser being too greedy with partial string patterns ([18](https://github.com/pytroll/trollsift/issues/18)) In this release 1 pull request was closed. ## Version 0.3.3 (2019/10/09) ### Pull Requests Merged #### Bugs fixed * [PR 15](https://github.com/pytroll/trollsift/pull/15) - Fix parse accepting strings with trailing characters #### Features added * [PR 14](https://github.com/pytroll/trollsift/pull/14) - Adding .stickler.yml configuration file In this release 2 pull requests were closed. ## Version 0.3.2 (2019/01/14) ### Pull Requests Merged #### Bugs fixed * [PR 13](https://github.com/pytroll/trollsift/pull/13) - Fix backslashes in regex patterns on Windows In this release 1 pull request was closed. ## Version 0.3.1 (2018/11/02) ### Issues Closed * [Issue 11](https://github.com/pytroll/trollsift/issues/11) - Using the same information in two places in the template is fails with 0.3.0 ([PR 12](https://github.com/pytroll/trollsift/pull/12)) In this release 1 issue was closed. ### Pull Requests Merged #### Bugs fixed * [PR 12](https://github.com/pytroll/trollsift/pull/12) - Fix fields being specified multiple times in one pattern ([11](https://github.com/pytroll/trollsift/issues/11)) In this release 1 pull request was closed. ## Version 0.3.0 (2018/09/29) ### Issues Closed * [Issue 5](https://github.com/pytroll/trollsift/issues/5) - Add custom string formatter for lower/upper support In this release 1 issue was closed. ### Pull Requests Merged #### Features added * [PR 6](https://github.com/pytroll/trollsift/pull/6) - Add additional string formatting conversion options In this release 1 pull request was closed. ## Version 0.2.1 (2018/05/22) ### Issues Closed * [Issue 3](https://github.com/pytroll/trollsift/issues/3) - Packaging license file ([PR 4](https://github.com/pytroll/trollsift/pull/4)) In this release 1 issues were closed. ### Pull Requests Merged #### Features added * [PR 4](https://github.com/pytroll/trollsift/pull/4) - Update travis tests and add appveyor tests ([3](https://github.com/pytroll/trollsift/issues/3)) In this release 1 pull request was closed. ## Version 0.2.0 (2017/12/08) ### Issues Closed * [Issue 2](https://github.com/pytroll/trollsift/issues/2) - Another timestring issue * [Issue 1](https://github.com/pytroll/trollsift/issues/1) - problem when parsing time strings In this release 2 issues were closed. trollsift-1.0.0/LICENSE.txt000066400000000000000000000261351507123665600153770ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. trollsift-1.0.0/MANIFEST.in000066400000000000000000000002041507123665600152770ustar00rootroot00000000000000include doc/Makefile include doc/source/* include LICENSE.txt include README.rst include versioneer.py include trollsift/version.py trollsift-1.0.0/README.rst000066400000000000000000000014061507123665600152350ustar00rootroot00000000000000Trollsift ========= Trollsift is a collection of modules that assist with formatting, parsing and filtering satellite granule file names. For documentation, see http://trollsift.readthedocs.org/ License ======= Copyright 2014 Trollsift developers Licensed under the Apache License, Version 2.0 (the "License"); you may not use these files except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. trollsift-1.0.0/RELEASING.md000066400000000000000000000023521507123665600154020ustar00rootroot00000000000000# Releasing trollsift 1. checkout main branch 2. pull from repo 3. run the unittests 4. run `loghub` and update the `CHANGELOG.md` file: ``` loghub pytroll/trollsift --token $LOGHUB_GITHUB_TOKEN -st $(git tag --sort=-version:refname --list 'v*' | head -n 1) -plg bug "Bugs fixed" -plg enhancement "Features added" -plg documentation "Documentation changes" -plg backwards-incompatibility "Backwards incompatible changes" ``` Don't forget to commit! 5. Create a tag with the new version number, starting with a 'v', eg: ``` git tag -a v0.22.45 -m "Version 0.22.45" ``` See [semver.org](http://semver.org/) on how to write a version number. 6. push changes to github `git push --follow-tags` 7. Verify tests pass on GitHub Actions 8. Create a "Release" on GitHub by going to https://github.com/pytroll/trollsift/releases and clicking "Draft a new release". On the next page enter the newly created tag in the "Tag version" field, "Version X.Y.Z" in the "Release title" field, and paste the markdown from the changelog (the portion under the version section header) in the "Describe this release" box. Finally click "Publish release". 9. Verify the GitHub actions for deployment succeed and the release is on PyPI. trollsift-1.0.0/doc/000077500000000000000000000000001507123665600143125ustar00rootroot00000000000000trollsift-1.0.0/doc/Makefile000066400000000000000000000127251507123665600157610ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Pydecorate.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Pydecorate.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/Pydecorate" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Pydecorate" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." trollsift-1.0.0/doc/rtd_environment.yaml000066400000000000000000000003351507123665600204140ustar00rootroot00000000000000name: readthedocs channels: - conda-forge dependencies: - python=3.11 - pip - pytest - sphinx - sphinx_rtd_theme - sphinxcontrib-apidoc - pip: - graphviz - .. # relative path to the satpy project trollsift-1.0.0/doc/source/000077500000000000000000000000001507123665600156125ustar00rootroot00000000000000trollsift-1.0.0/doc/source/_static/000077500000000000000000000000001507123665600172405ustar00rootroot00000000000000trollsift-1.0.0/doc/source/_static/.gitkeep000066400000000000000000000000001507123665600206570ustar00rootroot00000000000000trollsift-1.0.0/doc/source/api.rst000066400000000000000000000002511507123665600171130ustar00rootroot00000000000000The :mod:`trollsift` API =============================== trollsift parser --------------------------- .. automodule:: trollsift.parser :members: :undoc-members: trollsift-1.0.0/doc/source/conf.py000066400000000000000000000174651507123665600171260ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Build configuration file for trollsift's documentation.""" import sys import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("../../")) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.doctest", "sphinx.ext.intersphinx", "sphinx.ext.napoleon", "sphinx.ext.viewcode", ] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix of source filenames. source_suffix = ".rst" # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = "index" # General information about the project. project = "trollsift" copyright = "2014, Panu Lahtinen, Hrobjartur Thorsteinsson" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = "0.1" # The full version, including alpha/beta/rc tags. release = "0.1.0" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "default" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = "trollsiftdoc" # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ( "index", "trollsift.tex", "Trollsift Documentation", "Hrobjartur Thorsteinsson", "manual", ), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ( "index", "trollsift", "Trollsift Documentation", ["Panu Lahtinen", "Hrobjartur Thorsteinsson"], 1, ) ] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( "index", "trollsift", "Trollsift Documentation", "Panu Lahtinen", "Hrobjartur Thorsteinsson", "trollsift", "One line description of project.", "Miscellaneous", ), ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' # How intersphinx should find links to other packages intersphinx_mapping = { "python": ("https://docs.python.org/3", None), } trollsift-1.0.0/doc/source/index.rst000066400000000000000000000017651507123665600174640ustar00rootroot00000000000000.. Trollsift documentation master file, created by sphinx-quickstart on Wed Nov 27 13:05:45 2013. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. .. meta:: :description: Trollsift project, modules for formatting, parsing and filtering satellite granule file names :keywords: Python, pytroll, format, parse, filter, string Welcome to the trollsift documentation! ========================================= Trollsift is a collection of modules that assist with formatting, parsing and filtering satellite granule file names. These modules are useful and necessary for writing higher level applications and api's for satellite batch processing. The source code of the package can be found at github, github_ .. _github: https://github.com/pytroll/trollsift Contents +++++++++ .. toctree:: :maxdepth: 3 installation usage api Indices and tables +++++++++++++++++++ * :ref:`genindex` * :ref:`modindex` * :ref:`search` trollsift-1.0.0/doc/source/installation.rst000066400000000000000000000013731507123665600210510ustar00rootroot00000000000000 .. .. sectnum:: .. :depth: 4 .. :start: 1 .. :suffix: . Installation ------------ Trollsift is available from PyPI:: $ pip install trollsift Alternatively, you can install it into a conda environment by using the conda-forge channel:: $ conda install -c conda-forge trollsift Or you can install it directly from the GitHub repository:: $ pip install git+https://github.com/pytroll/trollsift.git Developer Installation ++++++++++++++++++++++ You can download the trollsift source code from github:: $ git clone https://github.com/pytroll/trollsift.git and then run:: $ pip install -e . Testing ++++++++ To check if your python setup is compatible with trollsift, you can run the test suite using pytest:: $ pytest trollsift/tests trollsift-1.0.0/doc/source/usage.rst000066400000000000000000000076471507123665600174660ustar00rootroot00000000000000Usage ===== Trollsift include collection of modules that assist with formatting, parsing and filtering satellite granule file names. These modules are useful and necessary for writing higher level applications and api’s for satellite batch processing. Currently we are implementing the string parsing and composing functionality. Watch this space for further modules to do with various types of filtering of satellite data granules. Parser ------ The trollsift string parser module is useful for composing (formatting) and parsing strings compatible with the Python :ref:`python:formatstrings`. In satellite data file name filtering, the library is useful for extracting typical information from granule filenames, such as observation time, platform and instrument names. The trollsift Parser can also verify that the string formatting is invertible, i.e. specific enough to ensure that parsing and composing of strings are bijective mappings ( aka one-to-one correspondence ) which may be essential for some applications, such as predicting granule filenames. parsing ^^^^^^^ The Parser object holds a format string, allowing us to parse and compose strings: >>> from trollsift import Parser >>> >>> p = Parser("/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:05d}.l1b") >>> data = p.parse("/somedir/otherdir/hrpt_noaa16_20140210_1004_69022.l1b") >>> print(data) # doctest: +NORMALIZE_WHITESPACE {'directory': 'otherdir', 'platform': 'noaa', 'platnum': '16', 'time': datetime.datetime(2014, 2, 10, 10, 4), 'orbit': 69022} Parsing in trollsift is not "greedy". This means that in the case of ambiguous patterns it will match the shortest portion of the string possible. For example: >>> from trollsift import Parser >>> >>> p = Parser("{field_one}_{field_two}") >>> data = p.parse("abc_def_ghi") >>> print(data) {'field_one': 'abc', 'field_two': 'def_ghi'} So even though the first field could have matched to "abc_def", the non-greedy parsing chose the shorter possible match of "abc". composing ^^^^^^^^^ The reverse operation is called 'compose', and is equivalent to the Python string class format method. Here we take the filename pattern from earlier, change the time stamp of the data, and write out a new file name, >>> from datetime import datetime >>> >>> p = Parser("/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:05d}.l1b") >>> data = {'directory': 'otherdir', 'platform': 'noaa', 'platnum': '16', 'time': datetime(2012, 1, 1, 1, 1), 'orbit': 69022} >>> p.compose(data) '/somedir/otherdir/hrpt_noaa16_20120101_0101_69022.l1b' It is also possible to compose only partially, i.e., compose by specifying values for only a subset of the parameters in the format string. Example: >>> p = Parser("/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:05d}.l1b") >>> data = {'directory':'my_dir'} >>> p.compose(data, allow_partial=True) '/somedir/my_dir/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:05d}.l1b' In addition to python's builtin string formatting functionality trollsift also provides extra conversion options such as making all characters lowercase: >>> my_parser = Parser("{platform_name!l}") >>> my_parser.compose({'platform_name': 'NPP'}) 'npp' For all of the options see :class:`~trollsift.parser.StringFormatter`. standalone parse and compose ---------------------------- The parse and compose methods also exist as standalone functions, depending on your requirements you can call, >>> from trollsift import parse, compose >>> fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:05d}.l1b" >>> data = parse( fmt, "/somedir/otherdir/hrpt_noaa16_20140210_1004_69022.l1b" ) >>> data['time'] = datetime(2012, 1, 1, 1, 1) >>> compose(fmt, data) '/somedir/otherdir/hrpt_noaa16_20120101_0101_69022.l1b' And achieve the exact same result as in the Parse object example above. trollsift-1.0.0/pyproject.toml000066400000000000000000000034451507123665600164670ustar00rootroot00000000000000[project] name = "trollsift" description = "String parser/formatter" readme = "README.rst" authors = [ { name = "The Pytroll Team", email = "pytroll@googlegroups.com" } ] license = "Apache-2.0" license-files = ["LICENSE.txt"] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Science/Research", "Operating System :: OS Independent", "Programming Language :: Python", "Topic :: Scientific/Engineering", "Typing :: Typed", ] keywords = ["string parsing", "string formatting", "pytroll"] requires-python = ">=3.9" dependencies = [] dynamic = ["version"] [build-system] requires = ["hatchling", "hatch-vcs"] build-backend = "hatchling.build" [tool.hatch.metadata] allow-direct-references = true [tool.hatch.build.targets.wheel] packages = ["trollsift"] [tool.hatch.version] source = "vcs" [tool.hatch.build.hooks.vcs] version-file = "trollsift/version.py" [tool.coverage.run] relative_files = true omit = ["trollsift/version.py"] [tool.pytest.ini_options] minversion = 6.0 addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true log_cli_level = "info" testpaths = ["trollsift/tests"] filterwarnings = [ "error", "ignore:numpy.ndarray size changed, may indicate binary incompatibility:RuntimeWarning", ] [tool.ruff] line-length = 120 [tool.ruff.lint] # See https://docs.astral.sh/ruff/rules/ select = ["E", "W", "B", "D", "T10", "C90"] ignore = ["D101", "D102", "D103", "D104", "D105", "D106", "D107", "E203"] [tool.ruff.lint.per-file-ignores] "doc/source/conf.py" = ["E501"] "trollsift/tests/*.py" = ["D205", "D400", "D415", "S101"] # assert allowed in tests [tool.ruff.lint.pydocstyle] convention = "google" [tool.ruff.lint.mccabe] max-complexity = 10 [tool.ruff.lint.isort] known-first-party = ["trollsift"] trollsift-1.0.0/trollsift/000077500000000000000000000000001507123665600155675ustar00rootroot00000000000000trollsift-1.0.0/trollsift/__init__.py000066400000000000000000000010261507123665600176770ustar00rootroot00000000000000from .parser import Parser, StringFormatter, parse, compose, globify, purge, validate try: from trollsift.version import version as __version__ # noqa except ModuleNotFoundError: # pragma: no cover raise ModuleNotFoundError( "No module named trollsift.version. This could mean " "you didn't install 'trollsift' properly. Try reinstalling ('pip " "install')." ) from None __all__ = [ "Parser", "StringFormatter", "parse", "compose", "globify", "purge", "validate", ] trollsift-1.0.0/trollsift/parser.py000066400000000000000000000641371507123665600174500ustar00rootroot00000000000000"""Main parsing and formatting functionality.""" from __future__ import annotations import re import datetime as dt import random import string from functools import lru_cache import typing if typing.TYPE_CHECKING: from _typeshed import StrOrLiteralStr from typing import Any from collections.abc import Iterable, Sequence, Mapping class Parser: """Class-based interface to parsing and formatting functionality.""" def __init__(self, fmt: str): self.fmt = fmt def __str__(self): return self.fmt def keys(self): """Get parameter names defined in the format string.""" convert_dict = get_convert_dict(self.fmt) return convert_dict.keys() def parse(self, stri: str, full_match: bool = True) -> dict[str, Any]: """Parse keys and values from ``stri`` using parser's format.""" return parse(self.fmt, stri, full_match=full_match) def compose(self, keyvals: Mapping[str, Any], allow_partial: bool = False) -> str: """Compose format string ``self.fmt`` with parameters given in the ``keyvals`` dict. Args: keyvals: "Parameter --> parameter value" map allow_partial: If True, then partial composition is allowed, i.e., not all parameters present in `fmt` need to be specified in `keyvals`. Unspecified parameters will, in this case, be left unchanged. (Default value = False). Returns: Result of formatting the *self.fmt* string with parameter values extracted from the corresponding items in the *keyvals* dictionary. """ return compose(fmt=self.fmt, keyvals=keyvals, allow_partial=allow_partial) format = compose def globify(self, keyvals: Mapping[str, Any] | None = None) -> str: """Generate a string usable with glob.glob() from format string.""" return globify(self.fmt, keyvals) def validate(self, stri: str) -> bool: """Validate that string ``stri`` conforms to the parser's format definition. Checks that the provided string is parsable and therefore complies with this parser's string format definition. Useful for filtering strings, or to check if a string is compatible before passing it to the parser function. """ return validate(self.fmt, stri) def is_one2one(self): """Check if this parser's format string has a one to one correspondence. That is, that successive composing and parsing operations will result in the original data. In other words, that input data maps to a string, which then maps back to the original data without any change or loss in information. Note: This test only applies to sensible usage of the format string. If string or numeric data causes overflow, e.g. if composing "abcd" into ``{3s}``, one to one correspondence will always be broken in such cases. This of course also applies to precision losses when using datetime data. """ return is_one2one(self.fmt) class StringFormatter(string.Formatter): """Custom string formatter class for basic strings. This formatter adds a few special conversions for assisting with common trollsift situations like making a parameter lowercase or removing hyphens. The added conversions are listed below and can be used in a format string by prefixing them with an `!` like so: >>> fstr = "{!u}_{!l}" >>> formatter = StringFormatter() >>> formatter.format(fstr, "to_upper", "To_LowerCase") "TO_UPPER_to_lowercase" - c: Make capitalized version of string (first character upper case, all lowercase after that) by executing the parameter's `.capitalize()` method. - l: Make all characters lowercase by executing the parameter's `.lower()` method. - R: Remove all separators from the parameter including '-', '_', ' ', and ':'. - t: Title case the string by executing the parameter's `.title()` method. - u: Make all characters uppercase by executing the parameter's `.upper()` method. - h: A combination of 'R' and 'l'. - H: A combination of 'R' and 'u'. """ CONV_FUNCS = { "c": "capitalize", "h": "lower", "H": "upper", "l": "lower", "t": "title", "u": "upper", } def convert_field(self, value: str, conversion: str | None) -> str: """Apply conversions mentioned in `StringFormatter.CONV_FUNCS`.""" if conversion is None: func = None else: func = self.CONV_FUNCS.get(conversion) if func is not None: value = getattr(value, func)() elif conversion not in ["R"]: # default conversion ('r', 's') return super(StringFormatter, self).convert_field(value, conversion) if conversion in ["h", "H", "R"]: value = value.replace("-", "").replace("_", "").replace(":", "").replace(" ", "") return value formatter = StringFormatter() # taken from https://docs.python.org/3/library/re.html#simulating-scanf spec_regexes = { "b": r"[-+]?[0-1]", "c": r".", "d": r"[-+]?\d", # Naive fixed point format specifier (e.g. {foo:f}) "f": r"[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?", # Fixed point format specifier including width and precision # (e.g. {foo:4.2f}). The lookahead (?=.{width}) makes sure that the # subsequent pattern is only matched if the string has the required # (minimum) width. "f_with_precision": r"(?=.{{{width}}})([-+]?([\d ]+(\.\d{{{decimals}}})+|\.\d{{{decimals}}})([eE][-+]?\d+)?)", "i": r"[-+]?(0[xX][\dA-Fa-f]+|0[0-7]*|\d+)", "o": r"[-+]?[0-7]", "s": r"\S", "x": r"[-+]?(0[xX])?[\dA-Fa-f]", } spec_regexes["e"] = spec_regexes["f"] spec_regexes["E"] = spec_regexes["f"] spec_regexes["g"] = spec_regexes["f"] spec_regexes["X"] = spec_regexes["x"] spec_regexes[""] = spec_regexes["s"] allow_multiple = ["b", "c", "d", "o", "s", "", "x", "X"] fixed_point_types = ["f", "e", "E", "g"] # format_spec ::= [[fill]align][sign][#][0][width][,][.precision][type] # https://docs.python.org/3.4/library/string.html#format-specification-mini-language fmt_spec_regex = re.compile( r"(?P(?P.)?[<>=^])?(?P[\+\-\s])?(?P#)?(?P0)?(?P\d+)?" r"(?P,)?(?P.\d+)?(?P[bcdeEfFgGnosxX%]?)" ) def _get_fixed_point_regex(width: str | None, precision: str | None) -> str: """Get regular expression for fixed point numbers. Args: width: Total width of the string representation. precision: Number of decimals. """ if width or precision: if precision is None: precision = "0," else: precision = precision.strip(".") if width is None: width = "1," return spec_regexes["f_with_precision"].format(width=width, decimals=precision) else: return spec_regexes["f"] class RegexFormatter(string.Formatter): """String formatter that converts a format string to a regular expression. >>> regex_formatter = RegexFormatter() >>> regex_str = regex_formatter.format('{field_one:5d}_{field_two}') Can also be used to extract values from a string given the format spec for that string: >>> regex_formatter.extract_values('{field_one:5d}_{field_two}', '12345_sometext') {'field_one': '12345', 'field_two': 'sometext'} Note that the regular expressions generated by this class are specially generated to reduce "greediness" of the matches found. For ambiguous patterns where a single field could match shorter or longer portions of the provided string, this class will prefer the shorter version of the string in order to make the rest of the pattern match. For example: >>> regex_formatter.extract_values('{field_one}_{field_two}', 'abc_def_ghi') {'field_one': 'abc', 'field_two': 'def_ghi'} Note how `field_one` could have matched "abc_def", but the lower greediness of this parser caused it to only match against "abc". """ # special string to mark a parameter not being specified UNPROVIDED_VALUE = "" ESCAPE_CHARACTERS = ["\\"] + [x for x in string.punctuation if x not in "\\%"] ESCAPE_SETS = [(c, "\\" + c) for c in ESCAPE_CHARACTERS] def __init__(self): # hold on to fields we've seen already so we can reuse their # definitions in the regex self._cached_fields = {} self.format = lru_cache()(self._uncached_format) super(RegexFormatter, self).__init__() def _uncached_format(*args, **kwargs): try: # super() doesn't seem to work here ret_val = string.Formatter.format(*args, **kwargs) finally: self = args[0] # just matching the parent class self._cached_fields.clear() return ret_val def _escape(self, s: str) -> str: """Escape bad characters for regular expressions. Similar to `re.escape` but allows '%' to pass through. """ for ch, r_ch in self.ESCAPE_SETS: s = s.replace(ch, r_ch) return s def parse( self, format_string: StrOrLiteralStr ) -> Iterable[ tuple[ StrOrLiteralStr, StrOrLiteralStr | None, StrOrLiteralStr | None, StrOrLiteralStr | None, ] ]: parse_ret = super(RegexFormatter, self).parse(format_string) for literal_text, field_name, format_spec, conversion in parse_ret: # the parent class will call parse multiple times moving # 'format_spec' to 'literal_text'. We only escape 'literal_text' # so we don't escape things twice. literal_text = self._escape(literal_text) yield literal_text, field_name, format_spec, conversion def get_value(self, key: int | str, args: Sequence[Any], kwargs: Mapping[str, Any]) -> Any: try: return super(RegexFormatter, self).get_value(key, args, kwargs) except (IndexError, KeyError): return key, self.UNPROVIDED_VALUE def _regex_datetime(self, format_spec: str) -> str: replace_str = format_spec for fmt_key, fmt_val in DT_FMT.items(): if fmt_key == "%%": # special case replace_str.replace("%%", "%") continue count = fmt_val.count("?") # either a series of numbers or letters/numbers regex = r"\d{{{:d}}}".format(count) if count else r"[^ \t\n\r\f\v\-_:]+" replace_str = replace_str.replace(fmt_key, regex) return replace_str def regex_field(self, field_name: str, value: Any, format_spec: str) -> str: if value != self.UNPROVIDED_VALUE: return super(RegexFormatter, self).format_field(value, format_spec) if self._cached_fields.get(field_name, format_spec) != format_spec: raise ValueError("Can't specify the same field_name with different formats: {}".format(field_name)) elif field_name in self._cached_fields: return r"(?P={})".format(field_name) else: self._cached_fields[field_name] = format_spec # Replace format spec with glob patterns (*, ?, etc) if not format_spec: return r"(?P<{}>.*?)".format(field_name) if "%" in format_spec: return r"(?P<{}>{})".format(field_name, self._regex_datetime(format_spec)) return format_spec_to_regex(field_name, format_spec) def format_field(self, value: Any, format_spec: str) -> str: if not isinstance(value, tuple) or value[1] != self.UNPROVIDED_VALUE: return super(RegexFormatter, self).format_field(value, format_spec) field_name, value = value return self.regex_field(field_name, value, format_spec) def format_spec_to_regex(field_name: str, format_spec: str) -> str: """Make an attempt at converting a format spec to a regular expression.""" # NOTE: remove escaped backslashes so regex matches regex_match = fmt_spec_regex.match(format_spec.replace("\\", "")) if regex_match is None: raise ValueError("Invalid format specification: '{}'".format(format_spec)) regex_dict = regex_match.groupdict() ftype = regex_dict["type"] width = regex_dict["width"] align = regex_dict["align"] precision = regex_dict["precision"] fill = _get_fill(regex_dict["fill"], width, ftype) char_type = spec_regexes[ftype] if ftype in fixed_point_types: char_type = _get_fixed_point_regex(width=width, precision=precision) if ftype in ("s", "") and align and align.endswith("="): raise ValueError("Invalid format specification: '{}'".format(format_spec)) final_regex = char_type if ftype in allow_multiple and (not width or width == "0"): final_regex += r"*?" elif width and width != "0": if not fill and ftype not in fixed_point_types: # we know we have exactly this many characters final_regex += r"{{{}}}".format(int(width)) elif fill: # we don't know how many fill characters we have compared to # field characters so just match all characters and sort it out # later during type conversion. final_regex = r".{{{}}}".format(int(width)) elif ftype in allow_multiple: final_regex += r"*?" return r"(?P<{}>{})".format(field_name, final_regex) def _get_fill(fill: str | None, width: str | None, ftype: str | None) -> str | None: # NOTE: does not properly handle `=` alignment if fill is None: if width is not None and width[0] == "0": fill = "0" elif ftype in ["s", "", "d", "x", "X", "o", "b"]: fill = " " return fill @lru_cache() def regex_format(fmt: str) -> str: # We create a new instance of RegexFormatter here to prevent concurrent calls to # format interfering with one another. return RegexFormatter().format(fmt) def extract_values(fmt: str, stri: str, full_match: bool = True) -> dict[str, Any]: """Extract information from string matching format. Args: fmt: Python format string to match against stri: String to extract information from full_match: Force the match of the whole string. Default to ``True``. """ regex = regex_format(fmt) if full_match: regex = "^" + regex + "$" match = re.match(regex, stri) if match is None: raise ValueError("String does not match pattern.") return match.groupdict() def _get_number_from_fmt(fmt: str) -> int: """Helper function for extract_values. Figures out string length from format string. """ if "%" in fmt: # its datetime return len(("{0:" + fmt + "}").format(dt.datetime.now())) else: # its something else fmt = fmt.lstrip("0") fmt_digits_match = re.search("[0-9]+", fmt) if fmt_digits_match is None: raise ValueError(f"No number specified in format string: {fmt}") return int(fmt_digits_match.group(0)) def _convert(convdef: str, stri: str) -> Any: """Convert the string *stri* to the given conversion definition *convdef*.""" result: Any # force mypy type if "%" in convdef: result = dt.datetime.strptime(stri, convdef) else: result = _strip_padding(convdef, stri) if "d" in convdef: result = int(result) elif "x" in convdef or "X" in convdef: result = int(result, 16) elif "o" in convdef: result = int(result, 8) elif "b" in convdef: result = int(result, 2) elif any(float_type_marker in convdef for float_type_marker in fixed_point_types): result = float(result) return result def _strip_padding(convdef: str, stri: str) -> str: """Strip padding from the given string. Args: convdef: Conversion definition (indicates the padding) stri: String to be modified """ regex_match = fmt_spec_regex.match(convdef) match_dict = regex_match.groupdict() if regex_match else {} align = match_dict.get("align") pad = match_dict.get("fill") if align: # align character is the last one align = align[-1] if align and align in "<>^" and not pad: pad = " " if align == ">": stri = stri.lstrip(pad) elif align == "<": stri = stri.rstrip(pad) elif align == "^": stri = stri.strip(pad) return stri @lru_cache() def get_convert_dict(fmt: str) -> dict[str, str]: """Retrieve parse definition from the format string `fmt`.""" convdef = {} for _literal_text, field_name, format_spec, _conversion in formatter.parse(fmt): if field_name is None or format_spec is None: continue # XXX: Do I need to include 'conversion'? convdef[field_name] = format_spec return convdef def parse(fmt: str, stri: str, full_match: bool = True) -> dict[str, Any]: """Parse keys and corresponding values from *stri* using format described in *fmt* string. Args: fmt: Python format string to match against stri: String to extract information from full_match: Force the match of the whole string. Default True. """ convdef = get_convert_dict(fmt) keyvals = extract_values(fmt, stri, full_match=full_match) for key in convdef.keys(): keyvals[key] = _convert(convdef[key], keyvals[key]) return keyvals def compose(fmt: str, keyvals: Mapping[str, Any], allow_partial: bool = False) -> str: """Compose format string *self.fmt* with parameters given in the *keyvals* dict. Args: fmt: Python format string to match against keyvals: "Parameter --> parameter value" map allow_partial: If True, then partial composition is allowed, i.e., not all parameters present in `fmt` need to be specified in `keyvals`. Unspecified parameters will, in this case, be left unchanged. (Default value = False). Returns: Result of formatting the *self.fmt* string with parameter values extracted from the corresponding items in the *keyvals* dictionary. """ if allow_partial: return _partial_compose(fmt=fmt, keyvals=keyvals) return _strict_compose(fmt=fmt, keyvals=keyvals) DT_FMT = { "%a": "*", "%A": "*", "%w": "?", "%d": "??", "%b": "*", "%B": "*", "%m": "??", "%y": "??", "%Y": "????", "%H": "??", "%I": "??", "%p": "*", "%M": "??", "%S": "??", "%f": "*", "%z": "*", "%Z": "*", "%j": "???", "%U": "??", "%W": "??", "%c": "*", "%x": "*", "%X": "*", "%%": "?", } class GlobifyFormatter(string.Formatter): # special string to mark a parameter not being specified UNPROVIDED_VALUE = "" def get_value(self, key: str | int, args: Sequence[Any], kwargs: Mapping[str, Any]) -> Any: try: return super(GlobifyFormatter, self).get_value(key, args, kwargs) except (IndexError, KeyError): # assumes that return self.UNPROVIDED_VALUE def format_field(self, value: Any, format_spec: str) -> str: if not isinstance(value, (list, tuple)) and value != self.UNPROVIDED_VALUE: return super(GlobifyFormatter, self).format_field(value, format_spec) elif value != self.UNPROVIDED_VALUE: # partial provided date/time fields # specified with a tuple/list of 2 elements # (value, partial format string) value, dt_fmt = value for fmt_letter in dt_fmt: fmt = "%" + fmt_letter format_spec = format_spec.replace(fmt, value.strftime(fmt)) # Replace format spec with glob patterns (*, ?, etc) if not format_spec: return "*" if "%" in format_spec: replace_str = format_spec for fmt_key, fmt_val in DT_FMT.items(): replace_str = replace_str.replace(fmt_key, fmt_val) return replace_str if not re.search("[0-9]+", format_spec): # non-integer type return "*" return "?" * _get_number_from_fmt(format_spec) globify_formatter = GlobifyFormatter() def globify(fmt: str, keyvals: Mapping[str, Any] | None = None) -> Any: """Generate a string usable with glob.glob() from format string and provided information.""" if keyvals is None: keyvals = {} return globify_formatter.format(fmt, **keyvals) def validate(fmt: str, stri: str) -> bool: """Validates that string ``stri`` conforms to ``fmt``. Useful for filtering string, or to check if string is compatible before passing the string to the parser function. """ try: parse(fmt, stri) return True except ValueError: return False def _generate_data_for_format(fmt: str) -> dict[str, Any]: """Generate a fake data dictionary to fill in the provided format string.""" # finally try some data, create some random data for the fmt. data = {} # keep track of how many "free_size" (wildcard) parameters we have # if we get two in a row then we know the pattern is invalid, meaning # we'll never be able to match the second wildcard field free_size_start = False for literal_text, field_name, format_spec, _conversion in formatter.parse(fmt): if literal_text: free_size_start = False if not field_name: free_size_start = False continue # encapsulating free size keys, # e.g. {:s}{:s} or {:s}{:4s}{:d} if not format_spec or format_spec == "s" or format_spec == "d": if free_size_start: raise ValueError("Can't generate data for spec with two or more fields with no size specifier.") else: free_size_start = True # make some data for this key and format data[field_name] = _gen_data_for_spec(format_spec) return data def _gen_data_for_spec(format_spec: str | None) -> int | str | dt.datetime: if format_spec and "%" in format_spec: # some datetime t = dt.datetime.now() # run once through format to limit precision t = parse("{t:" + format_spec + "}", compose("{t:" + format_spec + "}", {"t": t}))["t"] return t if format_spec and "d" in format_spec: # random number (with n sign. figures) if not format_spec.isalpha(): n = _get_number_from_fmt(format_spec) else: # clearly bad raise ValueError(f"Bad format specification: {format_spec!r}") return random.randint(0, 99999999999999999) % (10**n) # string type if format_spec is None: n = 4 elif format_spec.isalnum(): n = _get_number_from_fmt(format_spec) else: n = 4 randstri = "" for _ in range(n): randstri += random.choice(string.ascii_letters) return randstri def is_one2one(fmt: str) -> bool: """Check if the format string has a one to one correspondence. That is, that successive composing and parsing operations will result in the original data. In other words, that input data maps to a string, which then maps back to the original data without any change or loss in information. Note: This test only applies to sensible usage of the format string. If string or numeric data is causes overflow, e.g. if composing "abcd" into {3s}, one to one correspondence will always be broken in such cases. This of course also applies to precision losses when using datetime data. """ try: data = _generate_data_for_format(fmt) except ValueError: return False # run data forward once and back to data stri = compose(fmt, data) data2 = parse(fmt, stri) # check if data2 equal to original data if len(data) != len(data2): return False for key in data: if key not in data2: return False if data2[key] != data[key]: return False # all checks passed, so just return True return True def purge() -> None: """Clear internal caches. Not needed normally, but can be used to force cache clear when memory is very limited. """ regex_format.cache_clear() get_convert_dict.cache_clear() def _strict_compose(fmt: str, keyvals: Mapping[str, Any]) -> str: """Convert parameters in `keyvals` to a string based on `fmt` string.""" return formatter.format(fmt, **keyvals) def _partial_compose(fmt: str, keyvals: Mapping[str, Any]) -> str: """Convert parameters in `keyvals` to a string based on `fmt` string. Similar to _strict_compose, but accepts partial composing, i.e., not all parameters in `fmt` need to be specified in `keyvals`. Unspecified parameters are left unchanged. Args: fmt (str): Python format string to match against keyvals (dict): "Parameter --> parameter value" map """ fmt, undefined_vars = _replace_undefined_params_with_placeholders(fmt, keyvals) composed_string = _strict_compose(fmt=fmt, keyvals=keyvals) for fmt_placeholder, fmt_specification in undefined_vars.items(): composed_string = composed_string.replace(fmt_placeholder, fmt_specification) return composed_string def _replace_undefined_params_with_placeholders( fmt: str, keyvals: Mapping[str, Any] | None = None ) -> tuple[str, dict[str, Any]]: """Replace with placeholders params in `fmt` not specified in `keyvals`.""" vars_left_undefined = set(get_convert_dict(fmt).keys()) if keyvals is not None: vars_left_undefined -= keyvals.keys() undefined_vars_placeholders_dict = {} new_fmt = fmt for var in sorted(vars_left_undefined): matches = set(match.group() for match in re.finditer(rf"{{{re.escape(var)}([^\w{{}}].*?)*}}", new_fmt)) if len(matches) == 0: raise ValueError(f"Could not capture definitions for {var} from {fmt}") for var_specification in matches: fmt_placeholder = f"({hex(hash(var_specification))})" undefined_vars_placeholders_dict[fmt_placeholder] = var_specification new_fmt = new_fmt.replace(var_specification, fmt_placeholder) return new_fmt, undefined_vars_placeholders_dict trollsift-1.0.0/trollsift/py.typed000066400000000000000000000000001507123665600172540ustar00rootroot00000000000000trollsift-1.0.0/trollsift/tests/000077500000000000000000000000001507123665600167315ustar00rootroot00000000000000trollsift-1.0.0/trollsift/tests/__init__.py000066400000000000000000000000231507123665600210350ustar00rootroot00000000000000"""Test module.""" trollsift-1.0.0/trollsift/tests/integrationtests/000077500000000000000000000000001507123665600223375ustar00rootroot00000000000000trollsift-1.0.0/trollsift/tests/integrationtests/__init__.py000066400000000000000000000000631507123665600244470ustar00rootroot00000000000000"""Integration tests for the trollsift package.""" trollsift-1.0.0/trollsift/tests/integrationtests/test_parser.py000066400000000000000000000126231507123665600252500ustar00rootroot00000000000000"""Parser integration tests.""" import os import unittest import datetime as dt from trollsift.parser import Parser class TestParser(unittest.TestCase): def setUp(self): self.fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}" + "_{time:%Y%m%d_%H%M}_{orbit:05d}.l1b" self.string = "/somedir/otherdir/hrpt_noaa16_20140210_1004_69022.l1b" self.data = { "directory": "otherdir", "platform": "noaa", "platnum": "16", "time": dt.datetime(2014, 2, 10, 10, 4), "orbit": 69022, } self.p = Parser(self.fmt) def test_parse(self): # Run result = self.p.parse(self.string) # Assert self.assertDictEqual(result, self.data) def test_cache_clear(self): """Test we can clear the internal cache properly""" from trollsift.parser import purge, regex_format # Run result = self.p.parse(self.string) # Assert self.assertDictEqual(result, self.data) assert regex_format.cache_info()[-1] != 0 purge() assert regex_format.cache_info()[-1] == 0 def test_compose(self): # Run result = self.p.compose(self.data) # Assert self.assertEqual(result, self.string) def test_validate(self): # These cases are True self.assertTrue(self.p.validate("/somedir/avhrr/2014/hrpt_noaa19_20140212_1412_12345.l1b")) # These cases are False self.assertFalse(self.p.validate("/somedir/bla/bla/hrpt_noaa19_20140212__1412_00000.l1b")) def assertDictEqual(self, a, b): for key in a: self.assertTrue(key in b) self.assertEqual(a[key], b[key]) self.assertEqual(len(a), len(b)) def assertItemsEqual(self, a, b): for i in range(len(a)): if isinstance(a[i], dict): self.assertDictEqual(a[i], b[i]) else: self.assertEqual(a[i], b[i]) self.assertEqual(len(a), len(b)) class TestParserVariousFormats(unittest.TestCase): def test_parse_viirs_sdr(self): fmt = ( "SVI01_{platform_shortname}_d{start_time:%Y%m%d_t%H%M%S%f}_" "e{end_time:%H%M%S%f}_b{orbit:5d}_c{creation_time:%Y%m%d%H%M%S%f}_{source}.h5" ) filename = "SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5" data = { "platform_shortname": "npp", "start_time": dt.datetime(2012, 2, 25, 18, 1, 24, 500000), "orbit": 1708, "end_time": dt.datetime(1900, 1, 1, 18, 2, 48, 700000), "source": "noaa_ops", "creation_time": dt.datetime(2012, 2, 26, 0, 21, 30, 255476), } p = Parser(fmt) result = p.parse(filename) self.assertDictEqual(result, data) def test_parse_iasi_l2(self): fmt = ( "W_XX-EUMETSAT-{reception_location},{instrument},{long_platform_id}+{processing_location}_" "C_EUMS_{processing_time:%Y%m%d%H%M%S}_IASI_PW3_02_{platform_id}_{start_time:%Y%m%d-%H%M%S}Z_" "{end_time:%Y%m%d.%H%M%S}Z.hdf" ) filename = ( "W_XX-EUMETSAT-kan,iasi,metopb+kan_C_EUMS_20170920103559_IASI_PW3_02_" "M01_20170920-102217Z_20170920.102912Z.hdf" ) data = { "reception_location": "kan", "instrument": "iasi", "long_platform_id": "metopb", "processing_location": "kan", "processing_time": dt.datetime(2017, 9, 20, 10, 35, 59), "platform_id": "M01", "start_time": dt.datetime(2017, 9, 20, 10, 22, 17), "end_time": dt.datetime(2017, 9, 20, 10, 29, 12), } p = Parser(fmt) result = p.parse(filename) self.assertDictEqual(result, data) def test_parse_olci_l1b(self): fmt = os.path.join( "{mission_id:3s}_OL_1_{datatype_id:_<6s}_{start_time:%Y%m%dT%H%M%S}_" "{end_time:%Y%m%dT%H%M%S}_{creation_time:%Y%m%dT%H%M%S}_{duration:4d}_" "{cycle:3d}_{relative_orbit:3d}_{frame:4d}_{centre:3s}_{platform_mode:1s}_" "{timeliness:2s}_{collection:3s}.SEN3", "{dataset_name}_radiance.nc", ) # made up: filename = os.path.join( "S3A_OL_1_EFR____20180916T090539_20180916T090839_20180916T090539_0001_001_001_0001_CEN_M_AA_AAA.SEN3", "Oa21_radiance.nc", ) data = { "mission_id": "S3A", "datatype_id": "EFR", "start_time": dt.datetime(2018, 9, 16, 9, 5, 39), "end_time": dt.datetime(2018, 9, 16, 9, 8, 39), "creation_time": dt.datetime(2018, 9, 16, 9, 5, 39), "duration": 1, "cycle": 1, "relative_orbit": 1, "frame": 1, "centre": "CEN", "platform_mode": "M", "timeliness": "AA", "collection": "AAA", "dataset_name": "Oa21", } p = Parser(fmt) result = p.parse(filename) self.assertDictEqual(result, data) def test_parse_duplicate_fields(self): """Test parsing a pattern that has duplicate fields.""" fmt = "{version_number:1s}/filename_with_version_number_{version_number:1s}.tif" filename = "1/filename_with_version_number_1.tif" p = Parser(fmt) result = p.parse(filename) self.assertEqual(result["version_number"], "1") trollsift-1.0.0/trollsift/tests/regressiontests/000077500000000000000000000000001507123665600221745ustar00rootroot00000000000000trollsift-1.0.0/trollsift/tests/regressiontests/__init__.py000066400000000000000000000000621507123665600243030ustar00rootroot00000000000000"""Regression tests for the trollsift package.""" trollsift-1.0.0/trollsift/tests/regressiontests/test_parser.py000066400000000000000000000011011507123665600250720ustar00rootroot00000000000000"""Parser regression tests.""" import unittest import datetime as dt from trollsift.parser import parse class TestParser(unittest.TestCase): def test_002(self): res = parse( "hrpt16_{satellite:7s}_{start_time:%d-%b-%Y_%H:%M:%S.000}_{orbit_number:5d}", "hrpt16_NOAA-19_26-NOV-2014_10:12:00.000_29889", ) self.assertEqual( res, { "orbit_number": 29889, "satellite": "NOAA-19", "start_time": dt.datetime(2014, 11, 26, 10, 12), }, ) trollsift-1.0.0/trollsift/tests/unittests/000077500000000000000000000000001507123665600207735ustar00rootroot00000000000000trollsift-1.0.0/trollsift/tests/unittests/__init__.py000066400000000000000000000000541507123665600231030ustar00rootroot00000000000000"""Unit tests for the trollsift package.""" trollsift-1.0.0/trollsift/tests/unittests/test_parser.py000066400000000000000000000530131507123665600237020ustar00rootroot00000000000000"""Basic unit tests for the parser module.""" import unittest import datetime as dt import pytest from trollsift.parser import get_convert_dict, extract_values from trollsift.parser import _convert from trollsift.parser import parse, globify, validate, is_one2one, compose, Parser class TestParser(unittest.TestCase): def setUp(self): self.fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}" + "_{time:%Y%m%d_%H%M}_{orbit:05d}.l1b" self.string = "/somedir/otherdir/hrpt_noaa16_20140210_1004_69022.l1b" self.string2 = "/somedir/otherdir/hrpt_noaa16_20140210_1004_00022.l1b" self.string3 = "/somedir/otherdir/hrpt_noaa16_20140210_1004_69022" self.string4 = "/somedir/otherdir/hrpt_noaa16_20140210_1004_69022" def test_parser_keys(self): parser = Parser(self.fmt) keys = {"directory", "platform", "platnum", "time", "orbit"} self.assertTrue(keys.issubset(parser.keys()) and keys.issuperset(parser.keys())) def test_get_convert_dict(self): # Run result = get_convert_dict(self.fmt) # Assert self.assertDictEqual( result, { "directory": "", "platform": "4s", "platnum": "2s", "time": "%Y%m%d_%H%M", "orbit": "05d", }, ) def test_extract_values(self): fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}.l1b" result = extract_values(fmt, self.string) self.assertDictEqual( result, { "directory": "otherdir", "platform": "noaa", "platnum": "16", "time": "20140210_1004", "orbit": "69022", }, ) def test_extract_values_end(self): fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}" result = extract_values(fmt, self.string3) self.assertDictEqual( result, { "directory": "otherdir", "platform": "noaa", "platnum": "16", "time": "20140210_1004", "orbit": "69022", }, ) def test_extract_values_beginning(self): fmt = "{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}" result = extract_values(fmt, self.string4) self.assertDictEqual( result, { "directory": "/somedir/otherdir", "platform": "noaa", "platnum": "16", "time": "20140210_1004", "orbit": "69022", }, ) def test_extract_values_s4spair(self): fmt = "{directory}/hrpt_{platform:4s}{platnum:s}_{time:%Y%m%d_%H%M}_{orbit:d}" result = extract_values(fmt, self.string4) self.assertDictEqual( result, { "directory": "/somedir/otherdir", "platform": "noaa", "platnum": "16", "time": "20140210_1004", "orbit": "69022", }, ) def test_extract_values_ss2pair(self): fmt = "{directory}/hrpt_{platform:s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}" result = extract_values(fmt, self.string4) self.assertDictEqual( result, { "directory": "/somedir/otherdir", "platform": "noaa", "platnum": "16", "time": "20140210_1004", "orbit": "69022", }, ) def test_extract_values_ss2pair_end(self): fmt = "{directory}/hrpt_{platform:s}{platnum:2s}" result = extract_values(fmt, "/somedir/otherdir/hrpt_noaa16") self.assertDictEqual( result, {"directory": "/somedir/otherdir", "platform": "noaa", "platnum": "16"}, ) def test_extract_values_sdatetimepair_end(self): fmt = "{directory}/hrpt_{platform:s}{date:%Y%m%d}" result = extract_values(fmt, "/somedir/otherdir/hrpt_noaa20140212") self.assertDictEqual( result, {"directory": "/somedir/otherdir", "platform": "noaa", "date": "20140212"}, ) def test_extract_values_everything(self): fmt = "{everything}" result = extract_values(fmt, self.string) self.assertDictEqual( result, {"everything": "/somedir/otherdir/hrpt_noaa16_20140210_1004_69022.l1b"}, ) def test_extract_values_padding2(self): fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:0>5d}.l1b" # parsedef = ['/somedir/', {'directory': None}, '/hrpt_', # {'platform': '4s'}, {'platnum': '2s'}, # '_', {'time': '%Y%m%d_%H%M'}, '_', # {'orbit': '0>5d'}, '.l1b'] result = extract_values(fmt, self.string2) # Assert self.assertDictEqual( result, { "directory": "otherdir", "platform": "noaa", "platnum": "16", "time": "20140210_1004", "orbit": "00022", }, ) def test_extract_values_fails(self): fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:4d}.l1b" self.assertRaises(ValueError, extract_values, fmt, self.string) def test_extract_values_full_match(self): """Test that a string must completely match.""" fmt = "{orbit:05d}" val = extract_values(fmt, "12345") self.assertEqual(val, {"orbit": "12345"}) self.assertRaises(ValueError, extract_values, fmt, "12345abc") val = extract_values(fmt, "12345abc", full_match=False) self.assertEqual(val, {"orbit": "12345"}) def test_convert_digits(self): self.assertEqual(_convert("d", "69022"), 69022) self.assertRaises(ValueError, _convert, "d", "69dsf") self.assertEqual(_convert("d", "00022"), 22) self.assertEqual(_convert("4d", "69022"), 69022) self.assertEqual(_convert("_>10d", "_____69022"), 69022) self.assertEqual(_convert("%Y%m%d_%H%M", "20140210_1004"), dt.datetime(2014, 2, 10, 10, 4)) def test_parse(self): # Run result = parse(self.fmt, "/somedir/avhrr/2014/hrpt_noaa19_20140212_1412_12345.l1b") # Assert self.assertDictEqual( result, { "directory": "avhrr/2014", "platform": "noaa", "platnum": "19", "time": dt.datetime(2014, 2, 12, 14, 12), "orbit": 12345, }, ) def test_parse_string_padding_syntax_with_and_without_s(self): """Test that, in string padding syntax, '' is equivalent to 's'. From : * Type 's': String format. This is the default type for strings and may be omitted. * Type None: The same as 's'. """ result = parse("{foo}/{bar:_<8}", "baz/qux_____") expected_result = parse("{foo}/{bar:_<8s}", "baz/qux_____") self.assertEqual(expected_result["foo"], "baz") self.assertEqual(expected_result["bar"], "qux") self.assertEqual(result, expected_result) def test_parse_wildcards(self): # Run result = parse( "hrpt_{platform}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:05d}{ext}", "hrpt_noaa19_20140212_1412_12345.l1b", ) # Assert self.assertDictEqual( result, { "platform": "noaa", "platnum": "19", "time": dt.datetime(2014, 2, 12, 14, 12), "orbit": 12345, "ext": ".l1b", }, ) def test_parse_align(self): filepattern = ( "H-000-{hrit_format:4s}__-{platform_name:4s}________-" "{channel_name:_<9s}-{segment:_<9s}-{start_time:%Y%m%d%H%M}-__" ) result = parse(filepattern, "H-000-MSG3__-MSG3________-IR_039___-000007___-201506051700-__") self.assertDictEqual( result, { "channel_name": "IR_039", "hrit_format": "MSG3", "platform_name": "MSG3", "segment": "000007", "start_time": dt.datetime(2015, 6, 5, 17, 0), }, ) def test_parse_digits(self): """Test when a digit field is shorter than the format spec.""" result = parse( "hrpt_{platform}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:05d}{ext}", "hrpt_noaa19_20140212_1412_02345.l1b", ) self.assertDictEqual( result, { "platform": "noaa", "platnum": "19", "time": dt.datetime(2014, 2, 12, 14, 12), "orbit": 2345, "ext": ".l1b", }, ) result = parse( "hrpt_{platform}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:5d}{ext}", "hrpt_noaa19_20140212_1412_ 2345.l1b", ) self.assertDictEqual( result, { "platform": "noaa", "platnum": "19", "time": dt.datetime(2014, 2, 12, 14, 12), "orbit": 2345, "ext": ".l1b", }, ) result = parse( "hrpt_{platform}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:_>5d}{ext}", "hrpt_noaa19_20140212_1412___345.l1b", ) self.assertDictEqual( result, { "platform": "noaa", "platnum": "19", "time": dt.datetime(2014, 2, 12, 14, 12), "orbit": 345, "ext": ".l1b", }, ) def test_parse_bad_pattern(self): """Test when a digit field is shorter than the format spec.""" self.assertRaises( ValueError, parse, "hrpt_{platform}{platnum:-=2s}_{time:%Y%m%d_%H%M}_{orbit:05d}{ext}", "hrpt_noaa19_20140212_1412_02345.l1b", ) def test_globify_simple(self): # Run result = globify("{a}_{b}.end", {"a": "a", "b": "b"}) # Assert self.assertEqual(result, "a_b.end") def test_globify_empty(self): # Run result = globify("{a}_{b:4d}.end", {}) # Assert self.assertEqual(result, "*_????.end") def test_globify_noarg(self): # Run result = globify("{a}_{b:4d}.end") # Assert self.assertEqual(result, "*_????.end") def test_globify_known_lengths(self): # Run result = globify( "{directory}/{platform:4s}{satnum:2d}/{orbit:05d}", {"directory": "otherdir", "platform": "noaa"}, ) # Assert self.assertEqual(result, "otherdir/noaa??/?????") def test_globify_unknown_lengths(self): # Run result = globify( "hrpt_{platform_and_num}_" + "{date}_{time}_{orbit}.l1b", {"platform_and_num": "noaa16"}, ) # Assert self.assertEqual(result, "hrpt_noaa16_*_*_*.l1b") def test_globify_datetime(self): # Run result = globify( "hrpt_{platform}{satnum}_" + "{time:%Y%m%d_%H%M}_{orbit}.l1b", {"platform": "noaa", "time": dt.datetime(2014, 2, 10, 12, 12)}, ) # Assert self.assertEqual(result, "hrpt_noaa*_20140210_1212_*.l1b") def test_globify_partial_datetime(self): # Run result = globify( "hrpt_{platform:4s}{satnum:2d}_" + "{time:%Y%m%d_%H%M}_{orbit}.l1b", {"platform": "noaa", "time": (dt.datetime(2014, 2, 10, 12, 12), "Ymd")}, ) # Assert self.assertEqual(result, "hrpt_noaa??_20140210_????_*.l1b") def test_globify_datetime_nosub(self): # Run result = globify( "hrpt_{platform:4s}{satnum:2d}_" + "{time:%Y%m%d_%H%M}_{orbit}.l1b", {"platform": "noaa"}, ) # Assert self.assertEqual(result, "hrpt_noaa??_????????_????_*.l1b") def test_validate(self): # These cases are True self.assertTrue(validate(self.fmt, "/somedir/avhrr/2014/hrpt_noaa19_20140212_1412_12345.l1b")) self.assertTrue(validate(self.fmt, "/somedir/avhrr/2014/hrpt_noaa01_19790530_0705_00000.l1b")) self.assertTrue(validate(self.fmt, "/somedir/funny-char$dir/hrpt_noaa19_20140212_1412_12345.l1b")) self.assertTrue(validate(self.fmt, "/somedir//hrpt_noaa19_20140212_1412_12345.l1b")) # These cases are False self.assertFalse(validate(self.fmt, "/somedir/bla/bla/hrpt_noaa19_20140212_1412_1A345.l1b")) self.assertFalse(validate(self.fmt, "/somedir/bla/bla/hrpt_noaa19_2014021_1412_00000.l1b")) self.assertFalse(validate(self.fmt, "/somedir/bla/bla/hrpt_noaa19_20140212__412_00000.l1b")) self.assertFalse(validate(self.fmt, "/somedir/bla/bla/hrpt_noaa19_20140212__1412_00000.l1b")) self.assertFalse(validate(self.fmt, "/somedir/bla/bla/hrpt_noaa19_20140212_1412_00000.l1")) self.assertFalse(validate(self.fmt, "/somedir/bla/bla/hrpt_noaa19_20140212_1412_00000")) self.assertFalse(validate(self.fmt, "{}/somedir/bla/bla/hrpt_noaa19_20140212_1412_00000.l1b")) def test_is_one2one(self): # These cases are True self.assertTrue(is_one2one("/somedir/{directory}/somedata_{platform:4s}_{time:%Y%d%m-%H%M}_{orbit:5d}.l1b")) # These cases are False self.assertFalse(is_one2one("/somedir/{directory}/somedata_{platform:4s}_{time:%Y%d%m-%H%M}_{orbit:d}.l1b")) def test_greediness(self): """Test that the minimum match is parsed out. See GH #18. """ from trollsift import parse template = "{band_type}_{polarization_extracted}_{unit}_{s1_fname}" fname = "Amplitude_VH_db_S1A_IW_GRDH_1SDV_20160528T171628_20160528T171653_011462_011752_0EED.tif" res_dict = parse(template, fname) exp = { "band_type": "Amplitude", "polarization_extracted": "VH", "unit": "db", "s1_fname": "S1A_IW_GRDH_1SDV_20160528T171628_20160528T171653_011462_011752_0EED.tif", } self.assertEqual(exp, res_dict) template = "{band_type:s}_{polarization_extracted}_{unit}_{s1_fname}" res_dict = parse(template, fname) self.assertEqual(exp, res_dict) class TestCompose: """Test routines related to `compose` methods.""" @pytest.mark.parametrize("allow_partial", [False, True]) def test_compose(self, allow_partial): """Test the compose method's custom conversion options.""" key_vals = {"a": "this Is A-Test b_test c test"} new_str = compose("{a!c}", key_vals, allow_partial=allow_partial) assert new_str == "This is a-test b_test c test" new_str = compose("{a!h}", key_vals, allow_partial=allow_partial) assert new_str == "thisisatestbtestctest" new_str = compose("{a!H}", key_vals, allow_partial=allow_partial) assert new_str == "THISISATESTBTESTCTEST" new_str = compose("{a!l}", key_vals, allow_partial=allow_partial) assert new_str == "this is a-test b_test c test" new_str = compose("{a!R}", key_vals, allow_partial=allow_partial) assert new_str == "thisIsATestbtestctest" new_str = compose("{a!t}", key_vals, allow_partial=allow_partial) assert new_str == "This Is A-Test B_Test C Test" new_str = compose("{a!u}", key_vals, allow_partial=allow_partial) assert new_str == "THIS IS A-TEST B_TEST C TEST" # builtin repr new_str = compose("{a!r}", key_vals, allow_partial=allow_partial) assert new_str == "'this Is A-Test b_test c test'" # no formatting new_str = compose("{a}", key_vals, allow_partial=allow_partial) assert new_str == "this Is A-Test b_test c test" # bad formatter with pytest.raises(ValueError): new_str = compose("{a!X}", key_vals, allow_partial=allow_partial) assert new_str == "this Is A-Test b_test c test" def test_default_compose_is_strict(self): """Make sure the default compose call does not accept partial composition.""" fmt = "{foo}_{bar}.qux" with pytest.raises(KeyError): _ = compose(fmt, {"foo": "foo"}) def test_partial_compose_simple(self): """Test partial compose with a simple use case.""" fmt = "{variant:s}/{platform_name}_{start_time:%Y%m%d_%H%M}_{product}.{format}" composed = compose( fmt=fmt, keyvals={"platform_name": "foo", "format": "bar"}, allow_partial=True, ) assert composed == "{variant:s}/foo_{start_time:%Y%m%d_%H%M}_{product}.bar" def test_partial_compose_with_similarly_named_params(self): """Test that partial compose handles well vars with common substrings in name.""" original_fmt = "{foo}{afooo}{fooo}.{bar}/{baz:%Y}/{baz:%Y%m%d_%H}/{baz:%Y}/{bar:d}" composed = compose(fmt=original_fmt, keyvals={"afooo": "qux"}, allow_partial=True) assert composed == "{foo}qux{fooo}.{bar}/{baz:%Y}/{baz:%Y%m%d_%H}/{baz:%Y}/{bar:d}" def test_partial_compose_repeated_vars_with_different_formatting(self): """Test partial compose with a fmt with repeated vars with different formatting.""" fmt = "/foo/{start_time:%Y%m}/bar/{baz}_{start_time:%Y%m%d_%H%M}.{format}" composed = compose(fmt=fmt, keyvals={"format": "qux"}, allow_partial=True) assert composed == "/foo/{start_time:%Y%m}/bar/{baz}_{start_time:%Y%m%d_%H%M}.qux" @pytest.mark.parametrize( "original_fmt", ["{}_{}", "{foo}{afooo}{fooo}.{bar}/{baz:%Y}/{baz:%Y%m%d_%H}/{baz:%Y}/{bar:d}"], ) def test_partial_compose_is_identity_with_empty_keyvals(self, original_fmt): """Test that partial compose leaves the input untouched if no keyvals at all.""" assert compose(fmt=original_fmt, keyvals={}, allow_partial=True) == original_fmt def test_that_some_invalid_fmt_can_confuse_partial_compose(self): """Test that a fmt with a weird char can confuse partial compose.""" fmt = "{foo?}_{bar}_{foo}.qux" with pytest.raises(ValueError): _ = compose(fmt=fmt, keyvals={}, allow_partial=True) class TestParserFixedPoint: """Test parsing of fixed point numbers.""" @pytest.mark.parametrize("allow_partial_compose", [False, True]) @pytest.mark.parametrize( ("fmt", "string", "expected"), [ # Naive ("{foo:f}", "12.34", 12.34), # Including width and precision ("{foo:5.2f}", "12.34", 12.34), ("{foo:5.2f}", "-1.23", -1.23), ("{foo:5.2f}", "12.34", 12.34), ("{foo:5.2f}", "123.45", 123.45), # Whitespace padded ("{foo:5.2f}", " 1.23", 1.23), ("{foo:5.2f}", " 12.34", 12.34), # Zero padded ("{foo:05.2f}", "01.23", 1.23), ("{foo:05.2f}", "012.34", 12.34), # Only precision, no width ("{foo:.2f}", "12.34", 12.34), # Only width, no precision ("{foo:16f}", " 1.12", 1.12), # No digits before decimal point ("{foo:3.2f}", ".12", 0.12), ("{foo:4.2f}", "-.12", -0.12), ("{foo:4.2f}", " .12", 0.12), ("{foo:4.2f}", " .12", 0.12), ("{foo:16f}", " .12", 0.12), # Exponential format ("{foo:7.2e}", "-1.23e4", -1.23e4), ], ) def test_match(self, allow_partial_compose, fmt, string, expected): """Test cases expected to be matched.""" # Test parsed value parsed = parse(fmt, string) assert parsed["foo"] == expected # Test round trip composed = compose(fmt, {"foo": expected}, allow_partial=allow_partial_compose) parsed = parse(fmt, composed) assert parsed["foo"] == expected @pytest.mark.parametrize( ("fmt", "string"), [ # Decimals incorrect ("{foo:5.2f}", "12345"), ("{foo:5.2f}", "1234."), ("{foo:5.2f}", "1.234"), ("{foo:5.2f}", "123.4"), ("{foo:.2f}", "12.345"), # Decimals correct, but width too short ("{foo:5.2f}", "1.23"), ("{foo:5.2f}", ".23"), ("{foo:10.2e}", "1.23e4"), # Invalid ("{foo:5.2f}", "12_34"), ("{foo:5.2f}", "aBcD"), ], ) def test_no_match(self, fmt, string): """Test cases expected to not be matched.""" with pytest.raises(ValueError): parse(fmt, string) @pytest.mark.parametrize( ("fmt", "string", "expected"), [ # Decimal ("{foo:d}", "123", 123), # Hex with small letter ("{foo:x}", "7b", 123), # Hex with big letter ("{foo:X}", "7B", 123), # Fixed length hex ("{foo:03x}", "07b", 123), ("{foo:3x}", " 7b", 123), ("{foo:3X}", " 7B", 123), # Octal ("{foo:o}", "173", 123), # Free size with octal ("{bar:s}{foo:o}", "something173", 123), # Fixed length with octal ("{foo:_>4o}", "_173", 123), ("{foo:4o}", " 173", 123), # Binary ("{foo:b}", "1111011", 123), # Fixed length with binary ("{foo:8b}", " 1111011", 123), ("{foo:_>8b}", "_1111011", 123), ], ) def test_parse_integers(fmt, string, expected): assert parse(fmt, string)["foo"] == expected