pax_global_header00006660000000000000000000000064147130174430014516gustar00rootroot0000000000000052 comment=ac028f936dce7435affb5937af38336ae4185f02 flexparser-0.4/000077500000000000000000000000001471301744300135345ustar00rootroot00000000000000flexparser-0.4/.coveragerc000066400000000000000000000003001471301744300156460ustar00rootroot00000000000000[run] source = flexparser omit = flexparser/testsuite/* [report] exclude_lines = @abstractmethod @abc.abstractmethod # Have to re-enable the standard pragma pragma: no cover flexparser-0.4/.github/000077500000000000000000000000001471301744300150745ustar00rootroot00000000000000flexparser-0.4/.github/pull_request_template.md000066400000000000000000000003031471301744300220310ustar00rootroot00000000000000- [ ] Closes # (insert issue number) - [ ] Executed `pre-commit run --all-files` with no errors - [ ] The change is fully covered by automated unit tests - [ ] Added an entry to the CHANGES file flexparser-0.4/.github/workflows/000077500000000000000000000000001471301744300171315ustar00rootroot00000000000000flexparser-0.4/.github/workflows/ci.yml000066400000000000000000000034321471301744300202510ustar00rootroot00000000000000name: CI on: [push, pull_request] jobs: test-linux: strategy: matrix: python-version: [3.9, '3.10', '3.11', '3.12', '3.13'] runs-on: ubuntu-latest env: TEST_OPTS: "-rfsxEX -s --cov=flexparser --cov-config=.coveragerc" steps: - uses: actions/checkout@v2 with: fetch-depth: 100 - name: Get tags run: git fetch --depth=1 origin +refs/tags/*:refs/tags/* - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Get pip cache dir id: pip-cache run: echo "::set-output name=dir::$(pip cache dir)" - name: Setup caching uses: actions/cache@v2 with: path: ${{ steps.pip-cache.outputs.dir }} key: pip-${{ matrix.python-version }} restore-keys: | pip-${{ matrix.python-version }} - name: Install dependencies run: | pip install .[test] - name: Run Tests run: | pytest $TEST_OPTS - name: Coverage report run: coverage report -m - name: Coveralls Parallel env: COVERALLS_FLAG_NAME: ${{ matrix.test-number }} COVERALLS_PARALLEL: true GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COVERALLS_SERVICE_NAME: github run: | pip install coveralls coveralls coveralls: needs: test-linux runs-on: ubuntu-latest steps: - uses: actions/setup-python@v2 with: python-version: 3.x - name: Coveralls Finished env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COVERALLS_SERVICE_NAME: github run: | pip install coveralls coveralls --finish flexparser-0.4/.github/workflows/lint-autoupdate.yml000066400000000000000000000027021471301744300227740ustar00rootroot00000000000000name: pre-commit on: schedule: - cron: "0 0 * * 0" # every Sunday at 00:00 UTC workflow_dispatch: jobs: autoupdate: name: autoupdate runs-on: ubuntu-latest if: github.repository == 'hgrecco/flexparser' steps: - name: checkout uses: actions/checkout@v2 - name: Cache pip and pre-commit uses: actions/cache@v2 with: path: | ~/.cache/pre-commit ~/.cache/pip key: ${{ runner.os }}-pre-commit-autoupdate - name: setup python uses: actions/setup-python@v2 with: python-version: 3.x - name: upgrade pip run: python -m pip install --upgrade pip - name: install dependencies run: python -m pip install --upgrade pre-commit - name: version info run: python -m pip list - name: autoupdate uses: technote-space/create-pr-action@bfd4392c80dbeb54e0bacbcf4750540aecae6ed4 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} EXECUTE_COMMANDS: | python -m pre_commit autoupdate python -m pre_commit run --all-files COMMIT_MESSAGE: 'pre-commit: autoupdate hook versions' COMMIT_NAME: 'github-actions[bot]' COMMIT_EMAIL: 'github-actions[bot]@users.noreply.github.com' PR_TITLE: 'pre-commit: autoupdate hook versions' PR_BRANCH_PREFIX: 'pre-commit/' PR_BRANCH_NAME: 'autoupdate-${PR_ID}' flexparser-0.4/.github/workflows/lint.yml000066400000000000000000000005201471301744300206170ustar00rootroot00000000000000name: Lint on: [push, pull_request] jobs: lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: python-version: 3.x - name: Lint uses: pre-commit/action@v2.0.0 with: extra_args: --all-files --show-diff-on-failure flexparser-0.4/.github/workflows/publish.yml000066400000000000000000000007651471301744300213320ustar00rootroot00000000000000name: Build and publish to PyPI on: push: tags: - '*' jobs: publish: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: '3.x' - name: Install dependencies run: python -m pip install build - name: Build package run: python -m build - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.PYPI_API_TOKEN }} flexparser-0.4/.gitignore000066400000000000000000000003531471301744300155250ustar00rootroot00000000000000*~ __pycache__ *egg-info* *.pyc .DS_Store docs/_build/ .idea .vscode build/ dist/ MANIFEST *pytest_cache* .eggs # WebDAV file system cache files .DAV/ # tags files (from ctags) tags .coverage* # notebook stuff *.ipynb_checkpoints* flexparser-0.4/.pre-commit-config.yaml000066400000000000000000000013341471301744300200160ustar00rootroot00000000000000repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. rev: v0.7.2 hooks: # Run the linter. - id: ruff # Run the formatter. - id: ruff-format - repo: https://github.com/executablebooks/mdformat rev: 0.7.16 hooks: - id: mdformat additional_dependencies: - mdformat-gfm # GitHub-flavored Markdown - mdformat-black - repo: https://github.com/kynan/nbstripout rev: 0.6.1 hooks: - id: nbstripout args: [--extra-keys=metadata.kernelspec metadata.language_info.version] flexparser-0.4/AUTHORS000066400000000000000000000001241471301744300146010ustar00rootroot00000000000000flexparser is written and maintained by Hernan E. Grecco . flexparser-0.4/BADGES.rst000066400000000000000000000016331471301744300152160ustar00rootroot00000000000000.. image:: https://img.shields.io/pypi/v/flexparser.svg :target: https://pypi.python.org/pypi/flexparser :alt: Latest Version .. image:: https://img.shields.io/pypi/l/flexparser.svg :target: https://pypi.python.org/pypi/flexparser :alt: License .. image:: https://img.shields.io/pypi/pyversions/flexparser.svg :target: https://pypi.python.org/pypi/flexparser :alt: Python Versions .. image:: https://github.com/hgrecco/flexparser/workflows/CI/badge.svg :target: https://github.com/hgrecco/flexparser/actions?query=workflow%3ACI :alt: CI .. image:: https://github.com/hgrecco/flexparser/workflows/Lint/badge.svg :target: https://github.com/hgrecco/flexparser/actions?query=workflow%3ALint :alt: LINTER .. image:: https://coveralls.io/repos/github/hgrecco/flexparser/badge.svg?branch=main :target: https://coveralls.io/github/hgrecco/flexparser?branch=main :alt: Coverage flexparser-0.4/CHANGES000066400000000000000000000012541471301744300145310ustar00rootroot00000000000000flexparser Changelog ==================== 0.4 (2024-11-06) ---------------- - Exceptions are not longer dataclasses. 0.3.1 (2024-06-05) ------------------ - Fix flexparser FIPS mode. 0.3 (2024-03-08) ---------------- - export multiple symbols. - improve typing support. 0.2.1 (2024-03-08) ----------------- - fix when FIPS mode enabled. (Issue #7, thanks MattTheCuber) 0.2 (2023-11-26) ---------------- - UnexpectedEOF has been renamed UnexpectedEOS. - Typing related improvements - flexparser now requires Python 3.9+ - Dropped class variables for types in favour of introspecting the generic hierarchy. 0.1 (2022-06-04) ---------------- - first public release. flexparser-0.4/LICENSE000066400000000000000000000030601471301744300145400ustar00rootroot00000000000000Copyright (c) 2022 by Hernan E. Grecco and contributors. See AUTHORS for more details. Some rights reserved. Redistribution and use in source and binary forms of the software as well as documentation, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flexparser-0.4/MANIFEST.in000066400000000000000000000005041471301744300152710ustar00rootroot00000000000000include AUTHORS CHANGES LICENSE README.rst BADGES.rst version.py include *.yaml recursive-include flexparser *.py recursive-exclude examples *.py recursive-exclude examples *.txt recursive-include flexparser *.txt exclude pull_request_template.md version.py .coveragerc global-exclude *.pyc *~ .DS_Store *__pycache__* *.pyo flexparser-0.4/README.rst000066400000000000000000000416051471301744300152310ustar00rootroot00000000000000.. image:: https://img.shields.io/pypi/v/flexparser.svg :target: https://pypi.python.org/pypi/flexparser :alt: Latest Version .. image:: https://img.shields.io/pypi/l/flexparser.svg :target: https://pypi.python.org/pypi/flexparser :alt: License .. image:: https://img.shields.io/pypi/pyversions/flexparser.svg :target: https://pypi.python.org/pypi/flexparser :alt: Python Versions .. image:: https://github.com/hgrecco/flexparser/workflows/CI/badge.svg :target: https://github.com/hgrecco/flexparser/actions?query=workflow%3ACI :alt: CI .. image:: https://github.com/hgrecco/flexparser/workflows/Lint/badge.svg :target: https://github.com/hgrecco/flexparser/actions?query=workflow%3ALint :alt: LINTER .. image:: https://coveralls.io/repos/github/hgrecco/flexparser/badge.svg?branch=main :target: https://coveralls.io/github/hgrecco/flexparser?branch=main :alt: Coverage flexparser ========== Why write another parser? I have asked myself the same question while working on this project. It is clear that there are excellent parsers out there but I wanted to experiment with another way of writing them. The idea is quite simple. You write a class for every type of content (called here ``ParsedStatement``) you need to parse. Each class should have a ``from_string`` constructor. We used extensively the ``typing`` module to make the output structure easy to use and less error prone. For example: .. code-block:: python from dataclasses import dataclass import flexparser as fp @dataclass(frozen=True) class Assigment(fp.ParsedStatement): """Parses the following `this <- other` """ lhs: str rhs: str @classmethod def from_string(cls, s): lhs, rhs = s.split("<-") return cls(lhs.strip(), rhs.strip()) (using a frozen dataclass is not necessary but it convenient. Being a dataclass you get the init, str, repr, etc for free. Being frozen, sort of immutable, makes them easier to reason around) In certain cases you might want to signal the parser that his class is not appropriate to parse the statement. .. code-block:: python @dataclass(frozen=True) class Assigment(fp.ParsedStatement): """Parses the following `this <- other` """ lhs: str rhs: str @classmethod def from_string(cls, s): if "<-" not in s: # This means: I do not know how to parse it # try with another ParsedStatement class. return None lhs, rhs = s.split("<-") return cls(lhs.strip(), rhs.strip()) You might also want to indicate that this is the right ``ParsedStatement`` but something is not right: .. code-block:: python @dataclass(frozen=True) class InvalidIdentifier(fp.ParsingError): value: str @dataclass(frozen=True) class Assigment(fp.ParsedStatement): """Parses the following `this <- other` """ lhs: str rhs: str @classmethod def from_string(cls, s): if "<-" not in s: # This means: I do not know how to parse it # try with another ParsedStatement class. return None lhs, rhs = (p.strip() for p in s.split("<-")) if not str.isidentifier(lhs): return InvalidIdentifier(lhs) return cls(lhs, rhs) Put this into ``source.txt`` .. code-block:: text one <- other 2two <- new three <- newvalue one == three and then run the following code: .. code-block:: python parsed = fp.parse("source.txt", Assigment) for el in parsed.iter_statements(): print(repr(el)) will produce the following output: .. code-block:: text BOF(start_line=0, start_col=0, end_line=0, end_col=0, raw=None, content_hash=Hash(algorithm_name='blake2b', hexdigest='37bc23cde7cad3ece96b7abf64906c84decc116de1e0486679eb6ca696f233a403f756e2e431063c82abed4f0e342294c2fe71af69111faea3765b78cb90c03f'), path=PosixPath('/Users/grecco/Documents/code/flexparser/examples/in_readme/source1.txt'), mtime=1658550284.9419456) Assigment(start_line=1, start_col=0, end_line=1, end_col=12, raw='one <- other', lhs='one', rhs='other') InvalidIdentifier(start_line=2, start_col=0, end_line=2, end_col=11, raw='2two <- new', value='2two') Assigment(start_line=3, start_col=0, end_line=3, end_col=17, raw='three <- newvalue', lhs='three', rhs='newvalue') UnknownStatement(start_line=4, start_col=0, end_line=4, end_col=12, raw='one == three') EOS(start_line=5, start_col=0, end_line=5, end_col=0, raw=None) The result is a collection of ``ParsedStatement`` or ``ParsingError`` (flanked by ``BOF`` and ``EOS`` indicating beginning of file and ending of stream respectively Alternative, it can beginning with ``BOR`` with means beginning of resource and it is used when parsing a Python Resource provided with a package). Notice that there are two correctly parsed statements (``Assigment``), one error found (``InvalidIdentifier``) and one unknown (``UnknownStatement``). Cool, right? Just writing a ``from_string`` method that outputs a datastructure produces a usable structure of parsed objects. Now what? Let's say we want to support equality comparison. Simply do: .. code-block:: python @dataclass(frozen=True) class EqualityComparison(fp.ParsedStatement): """Parses the following `this == other` """ lhs: str rhs: str @classmethod def from_string(cls, s): if "==" not in s: return None lhs, rhs = (p.strip() for p in s.split("==")) return cls(lhs, rhs) parsed = fp.parse("source.txt", (Assigment, Equality)) for el in parsed.iter_statements(): print(repr(el)) and run it again: .. code-block:: text BOF(start_line=0, start_col=0, end_line=0, end_col=0, raw=None, content_hash=Hash(algorithm_name='blake2b', hexdigest='37bc23cde7cad3ece96b7abf64906c84decc116de1e0486679eb6ca696f233a403f756e2e431063c82abed4f0e342294c2fe71af69111faea3765b78cb90c03f'), path=PosixPath('/Users/grecco/Documents/code/flexparser/examples/in_readme/source1.txt'), mtime=1658550284.9419456) Assigment(start_line=1, start_col=0, end_line=1, end_col=12, raw='one <- other', lhs='one', rhs='other') InvalidIdentifier(start_line=2, start_col=0, end_line=2, end_col=11, raw='2two <- new', value='2two') Assigment(start_line=3, start_col=0, end_line=3, end_col=17, raw='three <- newvalue', lhs='three', rhs='newvalue') EqualityComparison(start_line=4, start_col=0, end_line=4, end_col=12, raw='one == three', lhs='one', rhs='three') EOS(start_line=5, start_col=0, end_line=5, end_col=0, raw=None) You need to group certain statements together: welcome to ``Block`` This construct allows you to group .. code-block:: python class Begin(fp.ParsedStatement): @classmethod def from_string(cls, s): if s == "begin": return cls() return None class End(fp.ParsedStatement): @classmethod def from_string(cls, s): if s == "end": return cls() return None class ParserConfig: pass class AssigmentBlock(fp.Block[Begin, Assigment, End, ParserConfig]): pass parsed = fp.parse("source.txt", (AssigmentBlock, Equality)) Run the code: .. code-block:: text BOF(start_line=0, start_col=0, end_line=0, end_col=0, raw=None, content_hash=Hash(algorithm_name='blake2b', hexdigest='37bc23cde7cad3ece96b7abf64906c84decc116de1e0486679eb6ca696f233a403f756e2e431063c82abed4f0e342294c2fe71af69111faea3765b78cb90c03f'), path=PosixPath('/Users/grecco/Documents/code/flexparser/examples/in_readme/source1.txt'), mtime=1658550284.9419456) UnknownStatement(start_line=1, start_col=0, end_line=1, end_col=12, raw='one <- other') UnknownStatement(start_line=2, start_col=0, end_line=2, end_col=11, raw='2two <- new') UnknownStatement(start_line=3, start_col=0, end_line=3, end_col=17, raw='three <- newvalue') UnknownStatement(start_line=4, start_col=0, end_line=4, end_col=12, raw='one == three') EOS(start_line=5, start_col=0, end_line=5, end_col=0, raw=None) Notice that there are a lot of ``UnknownStatement`` now, because we instructed the parser to only look for assignment within a block. So change your text file to: .. code-block:: text begin one <- other 2two <- new three <- newvalue end one == three and try again: .. code-block:: text BOF(start_line=0, start_col=0, end_line=0, end_col=0, raw=None, content_hash=Hash(algorithm_name='blake2b', hexdigest='3d8ce0051dcdd6f0f80ef789a0df179509d927874f242005ac41ed886ae0b71a30b845b9bfcb30194461c0ef6a3ca324c36f411dfafc7e588611f1eb0269bb5a'), path=PosixPath('/Users/grecco/Documents/code/flexparser/examples/in_readme/source2.txt'), mtime=1658550707.1248093) Begin(start_line=1, start_col=0, end_line=1, end_col=5, raw='begin') Assigment(start_line=2, start_col=0, end_line=2, end_col=12, raw='one <- other', lhs='one', rhs='other') InvalidIdentifier(start_line=3, start_col=0, end_line=3, end_col=11, raw='2two <- new', value='2two') Assigment(start_line=4, start_col=0, end_line=4, end_col=17, raw='three <- newvalue', lhs='three', rhs='newvalue') End(start_line=5, start_col=0, end_line=5, end_col=3, raw='end') EqualityComparison(start_line=6, start_col=0, end_line=6, end_col=12, raw='one == three', lhs='one', rhs='three') EOS(start_line=7, start_col=0, end_line=7, end_col=0, raw=None) Until now we have used ``parsed.iter_statements`` to iterate over all parsed statements. But let's look inside ``parsed``, an object of ``ParsedProject`` type. It is a thin wrapper over a dictionary mapping files to parsed content. Because we have provided a single file and this does not contain a link another, our ``parsed`` object contains a single element. The key is ``None`` indicating that the file 'source.txt' was loaded from the root location (None). The content is a ``ParsedSourceFile`` object with the following attributes: - **path**: full path of the source file - **mtime**: modification file of the source file - **content_hash**: hash of the pickled content - **config**: extra parameters that can be given to the parser (see below). .. code-block:: text ParsedSource( parsed_source=parse..CustomRootBlock( opening=BOF(start_line=0, start_col=0, end_line=0, end_col=0, raw=None, content_hash=Hash(algorithm_name='blake2b', hexdigest='3d8ce0051dcdd6f0f80ef789a0df179509d927874f242005ac41ed886ae0b71a30b845b9bfcb30194461c0ef6a3ca324c36f411dfafc7e588611f1eb0269bb5a'), path=PosixPath('/Users/grecco/Documents/code/flexparser/examples/in_readme/source2.txt'), mtime=1658550707.1248093), body=( Block.subclass_with..CustomBlock( opening=Begin(start_line=1, start_col=0, end_line=1, end_col=5, raw='begin'), body=( Assigment(start_line=2, start_col=0, end_line=2, end_col=12, raw='one <- other', lhs='one', rhs='other'), InvalidIdentifier(start_line=3, start_col=0, end_line=3, end_col=11, raw='2two <- new', value='2two'), Assigment(start_line=4, start_col=0, end_line=4, end_col=17, raw='three <- newvalue', lhs='three', rhs='newvalue') ), closing=End(start_line=5, start_col=0, end_line=5, end_col=3, raw='end')), EqualityComparison(start_line=6, start_col=0, end_line=6, end_col=12, raw='one == three', lhs='one', rhs='three')), closing=EOS(start_line=7, start_col=0, end_line=7, end_col=0, raw=None)), config=None ) A few things to notice: 1. We were using a block before without knowing. The ``RootBlock`` is a special type of Block that starts and ends automatically with the file. 2. ``opening``, ``body``, ``closing`` are automatically annotated with the possible ``ParsedStatement`` (plus `ParsingError`), therefore autocompletes works in most IDEs. 3. The same is true for the defined ``ParsedStatement`` (we have use ``dataclass`` for a reason). This makes using the actual result of the parsing a charm!. 4. That annoying ``subclass_with.`` is because we have built a class on the fly when we used ``Block.subclass_with``. You can get rid of it (which is actually useful for pickling) by explicit subclassing Block in your code (see below). Multiple source files --------------------- Most projects have more than one source file internally connected. A file might refer to another that also need to be parsed (e.g. an `#include` statement in c). **flexparser** provides the ``IncludeStatement`` base class specially for this purpose. .. code-block:: python @dataclass(frozen=True) class Include(fp.IncludeStatement): """A naive implementation of #include "file" """ value: str @classmethod def from_string(cls, s): if s.startwith("#include "): return None value = s[len("#include "):].strip().strip('"') return cls(value) @propery def target(self): return self.value The only difference is that you need to implement a ``target`` property that returns the file name or resource that this statement refers to. Customizing statementization ---------------------------- statementi ... what? **flexparser** works by trying to parse each statement with one of the known classes. So it is fair to ask what is an statement in this context and how can you configure it to your needs. A text file is split into non overlapping strings called **statements**. Parsing work as follows: 1. each file is split into statements (can be single or multi line). 2. each statement is parsed with the first of the contextually available ParsedStatement or Block subclassed that returns a ``ParsedStatement`` or ``ParsingError`` You can customize how to split each line into statements with two arguments provided to parse: - **strip_spaces** (`bool`): indicates that leading and trailing spaces must be removed before attempting to parse. (default: True) - **delimiters** (`dict`): indicates how each line must be subsplit. (default: do not divide) An delimiter example might be ``{";": (fp.DelimiterInclude.SKIP, fp.DelimiterAction.CONTINUE)}`` which tells the statementizer (sorry) that when a ";" is found a new statement should begin. ``DelimiterMode.SKIP`` tells that ";" should not be added to the previous statement nor to the next. Other valid values are ``SPLIT_AFTER`` and ``SPLIT_BEFORE`` to append or prepend the delimiter character to the previous or next statement. The second element tells the statementizer (sorry again) what to do next: valid values are: `CONTINUE`, `CAPTURE_NEXT_TIL_EOL`, `STOP_PARSING_LINE`, and `STOP_PARSING`. This is useful with comments. For example, ``{"#": (fp.DelimiterMode.WITH_NEXT, fp.DelimiterAction.CAPTURE_NEXT_TIL_EOL))}`` tells the statementizer (it is not funny anymore) that after the first "#" it should stop splitting and capture all. This allows: .. code-block:: text ## This will work as a single statement # This will work as a single statement # # This will work as # a single statement # a = 3 # this will produce two statements (a=3, and the rest) Explicit Block classes ---------------------- .. code-block:: python class AssigmentBlock(fp.Block[Begin, Assigment, End]): pass class EntryBlock(fp.RootBlock[Union[AssigmentBlock, Equality]]): pass parsed = fp.parse("source.txt", EntryBlock) Customizing parsing ------------------- In certain cases you might want to leave to the user some configuration details. We have method for that!. Instead of overriding ``from_string`` override ``from_string_and_config``. The second argument is an object that can be given to the parser, which in turn will be passed to each ``ParsedStatement`` class. .. code-block:: python @dataclass(frozen=True) class NumericAssigment(fp.ParsedStatement): """Parses the following `this <- other` """ lhs: str rhs: numbers.Number @classmethod def from_string_and_config(cls, s, config): if "==" not in s: # This means: I do not know how to parse it # try with another ParsedStatement class. return None lhs, rhs = s.split("==") return cls(lhs.strip(), config.numeric_type(rhs.strip())) class Config: numeric_type = float parsed = fp.parse("source.txt", NumericAssigment, Config) ---- This project was started as a part of Pint_, the python units package. See AUTHORS_ for a list of the maintainers. To review an ordered list of notable changes for each version of a project, see CHANGES_ .. _`AUTHORS`: https://github.com/hgrecco/flexparser/blob/main/AUTHORS .. _`CHANGES`: https://github.com/hgrecco/flexparser/blob/main/CHANGES .. _`Pint`: https://github.com/hgrecco/pint flexparser-0.4/examples/000077500000000000000000000000001471301744300153525ustar00rootroot00000000000000flexparser-0.4/examples/pint/000077500000000000000000000000001471301744300163245ustar00rootroot00000000000000flexparser-0.4/examples/pint/files/000077500000000000000000000000001471301744300174265ustar00rootroot00000000000000flexparser-0.4/examples/pint/files/constants_en.txt000066400000000000000000000105451471301744300226720ustar00rootroot00000000000000# Default Pint constants definition file # Based on the International System of Units # Language: english # Source: https://physics.nist.gov/cuu/Constants/ # https://physics.nist.gov/PhysRefData/XrayTrans/Html/search.html # :copyright: 2013,2019 by Pint Authors, see AUTHORS for more details. #### MATHEMATICAL CONSTANTS #### # As computed by Maxima with fpprec:50 pi = 3.1415926535897932384626433832795028841971693993751 = π # pi tansec = 4.8481368111333441675396429478852851658848753880815e-6 # tangent of 1 arc-second ~ arc_second/radian ln10 = 2.3025850929940456840179914546843642076011014886288 # natural logarithm of 10 wien_x = 4.9651142317442763036987591313228939440555849867973 # solution to (x-5)*exp(x)+5 = 0 => x = W(5/exp(5))+5 wien_u = 2.8214393721220788934031913302944851953458817440731 # solution to (u-3)*exp(u)+3 = 0 => u = W(3/exp(3))+3 eulers_number = 2.71828182845904523536028747135266249775724709369995 #### DEFINED EXACT CONSTANTS #### speed_of_light = 299792458 m/s = c = c_0 # since 1983 planck_constant = 6.62607015e-34 J s = ℎ # since May 2019 elementary_charge = 1.602176634e-19 C = e # since May 2019 avogadro_number = 6.02214076e23 # since May 2019 boltzmann_constant = 1.380649e-23 J K^-1 = k = k_B # since May 2019 standard_gravity = 9.80665 m/s^2 = g_0 = g0 = g_n = gravity # since 1901 standard_atmosphere = 1.01325e5 Pa = atm = atmosphere # since 1954 conventional_josephson_constant = 4.835979e14 Hz / V = K_J90 # since Jan 1990 conventional_von_klitzing_constant = 2.5812807e4 ohm = R_K90 # since Jan 1990 #### DERIVED EXACT CONSTANTS #### # Floating-point conversion may introduce inaccuracies zeta = c / (cm/s) = ζ dirac_constant = ℎ / (2 * π) = ħ = hbar = atomic_unit_of_action = a_u_action avogadro_constant = avogadro_number * mol^-1 = N_A molar_gas_constant = k * N_A = R faraday_constant = e * N_A conductance_quantum = 2 * e ** 2 / ℎ = G_0 magnetic_flux_quantum = ℎ / (2 * e) = Φ_0 = Phi_0 josephson_constant = 2 * e / ℎ = K_J von_klitzing_constant = ℎ / e ** 2 = R_K stefan_boltzmann_constant = 2 / 15 * π ** 5 * k ** 4 / (ℎ ** 3 * c ** 2) = σ = sigma first_radiation_constant = 2 * π * ℎ * c ** 2 = c_1 second_radiation_constant = ℎ * c / k = c_2 wien_wavelength_displacement_law_constant = ℎ * c / (k * wien_x) wien_frequency_displacement_law_constant = wien_u * k / ℎ #### MEASURED CONSTANTS #### # Recommended CODATA-2018 values # To some extent, what is measured and what is derived is a bit arbitrary. # The choice of measured constants is based on convenience and on available uncertainty. # The uncertainty in the last significant digits is given in parentheses as a comment. newtonian_constant_of_gravitation = 6.67430e-11 m^3/(kg s^2) = _ = gravitational_constant # (15) rydberg_constant = 1.0973731568160e7 * m^-1 = R_∞ = R_inf # (21) electron_g_factor = -2.00231930436256 = g_e # (35) atomic_mass_constant = 1.66053906660e-27 kg = m_u # (50) electron_mass = 9.1093837015e-31 kg = m_e = atomic_unit_of_mass = a_u_mass # (28) proton_mass = 1.67262192369e-27 kg = m_p # (51) neutron_mass = 1.67492749804e-27 kg = m_n # (95) lattice_spacing_of_Si = 1.920155716e-10 m = d_220 # (32) K_alpha_Cu_d_220 = 0.80232719 # (22) K_alpha_Mo_d_220 = 0.36940604 # (19) K_alpha_W_d_220 = 0.108852175 # (98) #### DERIVED CONSTANTS #### fine_structure_constant = (2 * ℎ * R_inf / (m_e * c)) ** 0.5 = α = alpha vacuum_permeability = 2 * α * ℎ / (e ** 2 * c) = µ_0 = mu_0 = mu0 = magnetic_constant vacuum_permittivity = e ** 2 / (2 * α * ℎ * c) = ε_0 = epsilon_0 = eps_0 = eps0 = electric_constant impedance_of_free_space = 2 * α * ℎ / e ** 2 = Z_0 = characteristic_impedance_of_vacuum coulomb_constant = α * hbar * c / e ** 2 = k_C classical_electron_radius = α * hbar / (m_e * c) = r_e thomson_cross_section = 8 / 3 * π * r_e ** 2 = σ_e = sigma_e flexparser-0.4/examples/pint/files/default_en.txt000066400000000000000000000733451471301744300223110ustar00rootroot00000000000000# Default Pint units definition file # Based on the International System of Units # Language: english # :copyright: 2013,2019 by Pint Authors, see AUTHORS for more details. # Syntax # ====== # Units # ----- # = [= ] [= ] [ = ] [...] # # The canonical name and aliases should be expressed in singular form. # Pint automatically deals with plurals built by adding 's' to the singular form; plural # forms that don't follow this rule should be instead explicitly listed as aliases. # # If a unit has no symbol and one wants to define aliases, then the symbol should be # conventionally set to _. # # Example: # millennium = 1e3 * year = _ = millennia # # # Prefixes # -------- # - = [= ] [= ] [ = ] [...] # # Example: # deca- = 1e+1 = da- = deka- # # # Derived dimensions # ------------------ # [dimension name] = # # Example: # [density] = [mass] / [volume] # # Note that primary dimensions don't need to be declared; they can be # defined for the first time in a unit definition. # E.g. see below `meter = [length]` # # # Additional aliases # ------------------ # @alias = [ = ] [...] # # Used to add aliases to already existing unit definitions. # Particularly useful when one wants to enrich definitions # from defaults_en.txt with custom aliases. # # Example: # @alias meter = my_meter # See also: https://pint.readthedocs.io/en/latest/defining.html @defaults group = international system = mks @end #### PREFIXES #### # decimal prefixes yocto- = 1e-24 = y- zepto- = 1e-21 = z- atto- = 1e-18 = a- femto- = 1e-15 = f- pico- = 1e-12 = p- nano- = 1e-9 = n- micro- = 1e-6 = µ- = u- milli- = 1e-3 = m- centi- = 1e-2 = c- deci- = 1e-1 = d- deca- = 1e+1 = da- = deka- hecto- = 1e2 = h- kilo- = 1e3 = k- mega- = 1e6 = M- giga- = 1e9 = G- tera- = 1e12 = T- peta- = 1e15 = P- exa- = 1e18 = E- zetta- = 1e21 = Z- yotta- = 1e24 = Y- # binary_prefixes kibi- = 2**10 = Ki- mebi- = 2**20 = Mi- gibi- = 2**30 = Gi- tebi- = 2**40 = Ti- pebi- = 2**50 = Pi- exbi- = 2**60 = Ei- zebi- = 2**70 = Zi- yobi- = 2**80 = Yi- # extra_prefixes semi- = 0.5 = _ = demi- sesqui- = 1.5 #### BASE UNITS #### meter = [length] = m = metre second = [time] = s = sec ampere = [current] = A = amp candela = [luminosity] = cd = candle gram = [mass] = g mole = [substance] = mol kelvin = [temperature]; offset: 0 = K = degK = °K = degree_Kelvin = degreeK # older names supported for compatibility radian = [] = rad bit = [] count = [] #### CONSTANTS #### @import constants_en.txt #### UNITS #### # Common and less common, grouped by quantity. # Conversion factors are exact (except when noted), # although floating-point conversion may introduce inaccuracies # Angle turn = 2 * π * radian = _ = revolution = cycle = circle degree = π / 180 * radian = deg = arcdeg = arcdegree = angular_degree arcminute = degree / 60 = arcmin = arc_minute = angular_minute arcsecond = arcminute / 60 = arcsec = arc_second = angular_second milliarcsecond = 1e-3 * arcsecond = mas grade = π / 200 * radian = grad = gon mil = π / 32000 * radian # Solid angle steradian = radian ** 2 = sr square_degree = (π / 180) ** 2 * sr = sq_deg = sqdeg # Information baud = bit / second = Bd = bps byte = 8 * bit = B = octet # byte = 8 * bit = _ = octet ## NOTE: B (byte) symbol can conflict with Bell # Length angstrom = 1e-10 * meter = Å = ångström = Å micron = micrometer = µ fermi = femtometer = fm light_year = speed_of_light * julian_year = ly = lightyear astronomical_unit = 149597870700 * meter = au # since Aug 2012 parsec = 1 / tansec * astronomical_unit = pc nautical_mile = 1852 * meter = nmi bohr = hbar / (alpha * m_e * c) = a_0 = a0 = bohr_radius = atomic_unit_of_length = a_u_length x_unit_Cu = K_alpha_Cu_d_220 * d_220 / 1537.4 = Xu_Cu x_unit_Mo = K_alpha_Mo_d_220 * d_220 / 707.831 = Xu_Mo angstrom_star = K_alpha_W_d_220 * d_220 / 0.2090100 = Å_star planck_length = (hbar * gravitational_constant / c ** 3) ** 0.5 # Mass metric_ton = 1e3 * kilogram = t = tonne unified_atomic_mass_unit = atomic_mass_constant = u = amu dalton = atomic_mass_constant = Da grain = 64.79891 * milligram = gr gamma_mass = microgram carat = 200 * milligram = ct = karat planck_mass = (hbar * c / gravitational_constant) ** 0.5 # Time minute = 60 * second = min hour = 60 * minute = h = hr day = 24 * hour = d week = 7 * day fortnight = 2 * week year = 365.25 * day = a = yr = julian_year month = year / 12 # decade = 10 * year ## NOTE: decade [time] can conflict with decade [dimensionless] century = 100 * year = _ = centuries millennium = 1e3 * year = _ = millennia eon = 1e9 * year shake = 1e-8 * second svedberg = 1e-13 * second atomic_unit_of_time = hbar / E_h = a_u_time gregorian_year = 365.2425 * day sidereal_year = 365.256363004 * day # approximate, as of J2000 epoch tropical_year = 365.242190402 * day # approximate, as of J2000 epoch common_year = 365 * day leap_year = 366 * day sidereal_day = day / 1.00273790935079524 # approximate sidereal_month = 27.32166155 * day # approximate tropical_month = 27.321582 * day # approximate synodic_month = 29.530589 * day = _ = lunar_month # approximate planck_time = (hbar * gravitational_constant / c ** 5) ** 0.5 # Temperature degree_Celsius = kelvin; offset: 273.15 = °C = celsius = degC = degreeC degree_Rankine = 5 / 9 * kelvin; offset: 0 = °R = rankine = degR = degreeR degree_Fahrenheit = 5 / 9 * kelvin; offset: 233.15 + 200 / 9 = °F = fahrenheit = degF = degreeF degree_Reaumur = 4 / 5 * kelvin; offset: 273.15 = °Re = reaumur = degRe = degreeRe = degree_Réaumur = réaumur atomic_unit_of_temperature = E_h / k = a_u_temp planck_temperature = (hbar * c ** 5 / gravitational_constant / k ** 2) ** 0.5 # Area [area] = [length] ** 2 are = 100 * meter ** 2 barn = 1e-28 * meter ** 2 = b darcy = centipoise * centimeter ** 2 / (second * atmosphere) hectare = 100 * are = ha # Volume [volume] = [length] ** 3 liter = decimeter ** 3 = l = L = litre cubic_centimeter = centimeter ** 3 = cc lambda = microliter = λ stere = meter ** 3 # Frequency [frequency] = 1 / [time] hertz = 1 / second = Hz revolutions_per_minute = revolution / minute = rpm revolutions_per_second = revolution / second = rps counts_per_second = count / second = cps # Wavenumber [wavenumber] = 1 / [length] reciprocal_centimeter = 1 / cm = cm_1 = kayser # Velocity [velocity] = [length] / [time] = [speed] knot = nautical_mile / hour = kt = knot_international = international_knot mile_per_hour = mile / hour = mph = MPH kilometer_per_hour = kilometer / hour = kph = KPH kilometer_per_second = kilometer / second = kps meter_per_second = meter / second = mps foot_per_second = foot / second = fps # Volumetric Flow Rate [volumetric_flow_rate] = [volume] / [time] sverdrup = 1e6 * meter ** 3 / second = sv # Acceleration [acceleration] = [velocity] / [time] galileo = centimeter / second ** 2 = Gal # Force [force] = [mass] * [acceleration] newton = kilogram * meter / second ** 2 = N dyne = gram * centimeter / second ** 2 = dyn force_kilogram = g_0 * kilogram = kgf = kilogram_force = pond force_gram = g_0 * gram = gf = gram_force force_metric_ton = g_0 * metric_ton = tf = metric_ton_force = force_t = t_force atomic_unit_of_force = E_h / a_0 = a_u_force # Energy [energy] = [force] * [length] joule = newton * meter = J erg = dyne * centimeter watt_hour = watt * hour = Wh = watthour electron_volt = e * volt = eV rydberg = ℎ * c * R_inf = Ry hartree = 2 * rydberg = E_h = Eh = hartree_energy = atomic_unit_of_energy = a_u_energy calorie = 4.184 * joule = cal = thermochemical_calorie = cal_th international_calorie = 4.1868 * joule = cal_it = international_steam_table_calorie fifteen_degree_calorie = 4.1855 * joule = cal_15 british_thermal_unit = 1055.056 * joule = Btu = BTU = Btu_iso international_british_thermal_unit = 1e3 * pound / kilogram * degR / kelvin * international_calorie = Btu_it thermochemical_british_thermal_unit = 1e3 * pound / kilogram * degR / kelvin * calorie = Btu_th quadrillion_Btu = 1e15 * Btu = quad therm = 1e5 * Btu = thm = EC_therm US_therm = 1.054804e8 * joule # approximate, no exact definition ton_TNT = 1e9 * calorie = tTNT tonne_of_oil_equivalent = 1e10 * international_calorie = toe atmosphere_liter = atmosphere * liter = atm_l # Power [power] = [energy] / [time] watt = joule / second = W volt_ampere = volt * ampere = VA horsepower = 550 * foot * force_pound / second = hp = UK_horsepower = hydraulic_horsepower boiler_horsepower = 33475 * Btu / hour # unclear which Btu metric_horsepower = 75 * force_kilogram * meter / second electrical_horsepower = 746 * watt refrigeration_ton = 12e3 * Btu / hour = _ = ton_of_refrigeration # approximate, no exact definition cooling_tower_ton = 1.25 * refrigeration_ton # approximate, no exact definition standard_liter_per_minute = atmosphere * liter / minute = slpm = slm conventional_watt_90 = K_J90 ** 2 * R_K90 / (K_J ** 2 * R_K) * watt = W_90 # Momentum [momentum] = [length] * [mass] / [time] # Density (as auxiliary for pressure) [density] = [mass] / [volume] mercury = 13.5951 * kilogram / liter = Hg = Hg_0C = Hg_32F = conventional_mercury water = 1.0 * kilogram / liter = H2O = conventional_water mercury_60F = 13.5568 * kilogram / liter = Hg_60F # approximate water_39F = 0.999972 * kilogram / liter = water_4C # approximate water_60F = 0.999001 * kilogram / liter # approximate # Pressure [pressure] = [force] / [area] pascal = newton / meter ** 2 = Pa barye = dyne / centimeter ** 2 = Ba = barie = barad = barrie = baryd bar = 1e5 * pascal technical_atmosphere = kilogram * g_0 / centimeter ** 2 = at torr = atm / 760 pound_force_per_square_inch = force_pound / inch ** 2 = psi kip_per_square_inch = kip / inch ** 2 = ksi millimeter_Hg = millimeter * Hg * g_0 = mmHg = mm_Hg = millimeter_Hg_0C centimeter_Hg = centimeter * Hg * g_0 = cmHg = cm_Hg = centimeter_Hg_0C inch_Hg = inch * Hg * g_0 = inHg = in_Hg = inch_Hg_32F inch_Hg_60F = inch * Hg_60F * g_0 inch_H2O_39F = inch * water_39F * g_0 inch_H2O_60F = inch * water_60F * g_0 foot_H2O = foot * water * g_0 = ftH2O = feet_H2O centimeter_H2O = centimeter * water * g_0 = cmH2O = cm_H2O sound_pressure_level = 20e-6 * pascal = SPL # Torque [torque] = [force] * [length] foot_pound = foot * force_pound = ft_lb = footpound # Viscosity [viscosity] = [pressure] * [time] poise = 0.1 * Pa * second = P reyn = psi * second # Kinematic viscosity [kinematic_viscosity] = [area] / [time] stokes = centimeter ** 2 / second = St # Fluidity [fluidity] = 1 / [viscosity] rhe = 1 / poise # Amount of substance particle = 1 / N_A = _ = molec = molecule # Concentration [concentration] = [substance] / [volume] molar = mole / liter = M # Catalytic activity [activity] = [substance] / [time] katal = mole / second = kat enzyme_unit = micromole / minute = U = enzymeunit # Entropy [entropy] = [energy] / [temperature] clausius = calorie / kelvin = Cl # Molar entropy [molar_entropy] = [entropy] / [substance] entropy_unit = calorie / kelvin / mole = eu # Radiation becquerel = counts_per_second = Bq curie = 3.7e10 * becquerel = Ci rutherford = 1e6 * becquerel = Rd gray = joule / kilogram = Gy sievert = joule / kilogram = Sv rads = 0.01 * gray rem = 0.01 * sievert roentgen = 2.58e-4 * coulomb / kilogram = _ = röntgen # approximate, depends on medium # Heat transimission [heat_transmission] = [energy] / [area] peak_sun_hour = 1e3 * watt_hour / meter ** 2 = PSH langley = thermochemical_calorie / centimeter ** 2 = Ly # Luminance [luminance] = [luminosity] / [area] nit = candela / meter ** 2 stilb = candela / centimeter ** 2 lambert = 1 / π * candela / centimeter ** 2 # Luminous flux [luminous_flux] = [luminosity] lumen = candela * steradian = lm # Illuminance [illuminance] = [luminous_flux] / [area] lux = lumen / meter ** 2 = lx # Intensity [intensity] = [power] / [area] atomic_unit_of_intensity = 0.5 * ε_0 * c * atomic_unit_of_electric_field ** 2 = a_u_intensity # Current biot = 10 * ampere = Bi abampere = biot = abA atomic_unit_of_current = e / atomic_unit_of_time = a_u_current mean_international_ampere = mean_international_volt / mean_international_ohm = A_it US_international_ampere = US_international_volt / US_international_ohm = A_US conventional_ampere_90 = K_J90 * R_K90 / (K_J * R_K) * ampere = A_90 planck_current = (c ** 6 / gravitational_constant / k_C) ** 0.5 # Charge [charge] = [current] * [time] coulomb = ampere * second = C abcoulomb = 10 * C = abC faraday = e * N_A * mole conventional_coulomb_90 = K_J90 * R_K90 / (K_J * R_K) * coulomb = C_90 ampere_hour = ampere * hour = Ah # Electric potential [electric_potential] = [energy] / [charge] volt = joule / coulomb = V abvolt = 1e-8 * volt = abV mean_international_volt = 1.00034 * volt = V_it # approximate US_international_volt = 1.00033 * volt = V_US # approximate conventional_volt_90 = K_J90 / K_J * volt = V_90 # Electric field [electric_field] = [electric_potential] / [length] atomic_unit_of_electric_field = e * k_C / a_0 ** 2 = a_u_electric_field # Electric displacement field [electric_displacement_field] = [charge] / [area] # Resistance [resistance] = [electric_potential] / [current] ohm = volt / ampere = Ω abohm = 1e-9 * ohm = abΩ mean_international_ohm = 1.00049 * ohm = Ω_it = ohm_it # approximate US_international_ohm = 1.000495 * ohm = Ω_US = ohm_US # approximate conventional_ohm_90 = R_K / R_K90 * ohm = Ω_90 = ohm_90 # Resistivity [resistivity] = [resistance] * [length] # Conductance [conductance] = [current] / [electric_potential] siemens = ampere / volt = S = mho absiemens = 1e9 * siemens = abS = abmho # Capacitance [capacitance] = [charge] / [electric_potential] farad = coulomb / volt = F abfarad = 1e9 * farad = abF conventional_farad_90 = R_K90 / R_K * farad = F_90 # Inductance [inductance] = [magnetic_flux] / [current] henry = weber / ampere = H abhenry = 1e-9 * henry = abH conventional_henry_90 = R_K / R_K90 * henry = H_90 # Magnetic flux [magnetic_flux] = [electric_potential] * [time] weber = volt * second = Wb unit_pole = µ_0 * biot * centimeter # Magnetic field [magnetic_field] = [magnetic_flux] / [area] tesla = weber / meter ** 2 = T gamma = 1e-9 * tesla = γ # Magnetomotive force [magnetomotive_force] = [current] ampere_turn = ampere = At biot_turn = biot gilbert = 1 / (4 * π) * biot_turn = Gb # Magnetic field strength [magnetic_field_strength] = [current] / [length] # Electric dipole moment [electric_dipole] = [charge] * [length] debye = 1e-9 / ζ * coulomb * angstrom = D # formally 1 D = 1e-10 Fr*Å, but we generally want to use it outside the Gaussian context # Electric quadrupole moment [electric_quadrupole] = [charge] * [area] buckingham = debye * angstrom # Magnetic dipole moment [magnetic_dipole] = [current] * [area] bohr_magneton = e * hbar / (2 * m_e) = µ_B = mu_B nuclear_magneton = e * hbar / (2 * m_p) = µ_N = mu_N # Logaritmic Unit Definition # Unit = scale; logbase; logfactor # x_dB = [logfactor] * log( x_lin / [scale] ) / log( [logbase] ) # Logaritmic Units of dimensionless quantity: [ https://en.wikipedia.org/wiki/Level_(logarithmic_quantity) ] decibelmilliwatt = 1e-3 watt; logbase: 10; logfactor: 10 = dBm decibelmicrowatt = 1e-6 watt; logbase: 10; logfactor: 10 = dBu decibel = 1 ; logbase: 10; logfactor: 10 = dB # bell = 1 ; logbase: 10; logfactor: = B ## NOTE: B (Bell) symbol conflicts with byte decade = 1 ; logbase: 10; logfactor: 1 ## NOTE: decade [time] can conflict with decade [dimensionless] octave = 1 ; logbase: 2; logfactor: 1 = oct neper = 1 ; logbase: 2.71828182845904523536028747135266249775724709369995; logfactor: 0.5 = Np # neper = 1 ; logbase: eulers_number; logfactor: 0.5 = Np #### UNIT GROUPS #### # Mostly for length, area, volume, mass, force # (customary or specialized units) @group USCSLengthInternational thou = 1e-3 * inch = th = mil_length inch = yard / 36 = in = international_inch = inches = international_inches hand = 4 * inch foot = yard / 3 = ft = international_foot = feet = international_feet yard = 0.9144 * meter = yd = international_yard # since Jul 1959 mile = 1760 * yard = mi = international_mile circular_mil = π / 4 * mil_length ** 2 = cmil square_inch = inch ** 2 = sq_in = square_inches square_foot = foot ** 2 = sq_ft = square_feet square_yard = yard ** 2 = sq_yd square_mile = mile ** 2 = sq_mi cubic_inch = in ** 3 = cu_in cubic_foot = ft ** 3 = cu_ft = cubic_feet cubic_yard = yd ** 3 = cu_yd @end @group USCSLengthSurvey link = 1e-2 * chain = li = survey_link survey_foot = 1200 / 3937 * meter = sft fathom = 6 * survey_foot rod = 16.5 * survey_foot = rd = pole = perch chain = 4 * rod furlong = 40 * rod = fur cables_length = 120 * fathom survey_mile = 5280 * survey_foot = smi = us_statute_mile league = 3 * survey_mile square_rod = rod ** 2 = sq_rod = sq_pole = sq_perch acre = 10 * chain ** 2 square_survey_mile = survey_mile ** 2 = _ = section square_league = league ** 2 acre_foot = acre * survey_foot = _ = acre_feet @end @group USCSDryVolume dry_pint = bushel / 64 = dpi = US_dry_pint dry_quart = bushel / 32 = dqt = US_dry_quart dry_gallon = bushel / 8 = dgal = US_dry_gallon peck = bushel / 4 = pk bushel = 2150.42 cubic_inch = bu dry_barrel = 7056 cubic_inch = _ = US_dry_barrel board_foot = ft * ft * in = FBM = board_feet = BF = BDFT = super_foot = superficial_foot = super_feet = superficial_feet @end @group USCSLiquidVolume minim = pint / 7680 fluid_dram = pint / 128 = fldr = fluidram = US_fluid_dram = US_liquid_dram fluid_ounce = pint / 16 = floz = US_fluid_ounce = US_liquid_ounce gill = pint / 4 = gi = liquid_gill = US_liquid_gill pint = quart / 2 = pt = liquid_pint = US_pint fifth = gallon / 5 = _ = US_liquid_fifth quart = gallon / 4 = qt = liquid_quart = US_liquid_quart gallon = 231 * cubic_inch = gal = liquid_gallon = US_liquid_gallon @end @group USCSVolumeOther teaspoon = fluid_ounce / 6 = tsp tablespoon = fluid_ounce / 2 = tbsp shot = 3 * tablespoon = jig = US_shot cup = pint / 2 = cp = liquid_cup = US_liquid_cup barrel = 31.5 * gallon = bbl oil_barrel = 42 * gallon = oil_bbl beer_barrel = 31 * gallon = beer_bbl hogshead = 63 * gallon @end @group Avoirdupois dram = pound / 256 = dr = avoirdupois_dram = avdp_dram = drachm ounce = pound / 16 = oz = avoirdupois_ounce = avdp_ounce pound = 7e3 * grain = lb = avoirdupois_pound = avdp_pound stone = 14 * pound quarter = 28 * stone bag = 94 * pound hundredweight = 100 * pound = cwt = short_hundredweight long_hundredweight = 112 * pound ton = 2e3 * pound = _ = short_ton long_ton = 2240 * pound slug = g_0 * pound * second ** 2 / foot slinch = g_0 * pound * second ** 2 / inch = blob = slugette force_ounce = g_0 * ounce = ozf = ounce_force force_pound = g_0 * pound = lbf = pound_force force_ton = g_0 * ton = _ = ton_force = force_short_ton = short_ton_force force_long_ton = g_0 * long_ton = _ = long_ton_force kip = 1e3 * force_pound poundal = pound * foot / second ** 2 = pdl @end @group AvoirdupoisUK using Avoirdupois UK_hundredweight = long_hundredweight = UK_cwt UK_ton = long_ton UK_force_ton = force_long_ton = _ = UK_ton_force @end @group AvoirdupoisUS using Avoirdupois US_hundredweight = hundredweight = US_cwt US_ton = ton US_force_ton = force_ton = _ = US_ton_force @end @group Troy pennyweight = 24 * grain = dwt troy_ounce = 480 * grain = toz = ozt troy_pound = 12 * troy_ounce = tlb = lbt @end @group Apothecary scruple = 20 * grain apothecary_dram = 3 * scruple = ap_dr apothecary_ounce = 8 * apothecary_dram = ap_oz apothecary_pound = 12 * apothecary_ounce = ap_lb @end @group ImperialVolume imperial_minim = imperial_fluid_ounce / 480 imperial_fluid_scruple = imperial_fluid_ounce / 24 imperial_fluid_drachm = imperial_fluid_ounce / 8 = imperial_fldr = imperial_fluid_dram imperial_fluid_ounce = imperial_pint / 20 = imperial_floz = UK_fluid_ounce imperial_gill = imperial_pint / 4 = imperial_gi = UK_gill imperial_cup = imperial_pint / 2 = imperial_cp = UK_cup imperial_pint = imperial_gallon / 8 = imperial_pt = UK_pint imperial_quart = imperial_gallon / 4 = imperial_qt = UK_quart imperial_gallon = 4.54609 * liter = imperial_gal = UK_gallon imperial_peck = 2 * imperial_gallon = imperial_pk = UK_pk imperial_bushel = 8 * imperial_gallon = imperial_bu = UK_bushel imperial_barrel = 36 * imperial_gallon = imperial_bbl = UK_bbl @end @group Printer pica = inch / 6 = _ = printers_pica point = pica / 12 = pp = printers_point = big_point = bp didot = 1 / 2660 * m cicero = 12 * didot tex_point = inch / 72.27 tex_pica = 12 * tex_point tex_didot = 1238 / 1157 * tex_point tex_cicero = 12 * tex_didot scaled_point = tex_point / 65536 css_pixel = inch / 96 = px pixel = [printing_unit] = _ = dot = pel = picture_element pixels_per_centimeter = pixel / cm = PPCM pixels_per_inch = pixel / inch = dots_per_inch = PPI = ppi = DPI = printers_dpi bits_per_pixel = bit / pixel = bpp @end @group Textile tex = gram / kilometer = Tt dtex = decitex denier = gram / (9 * kilometer) = den = Td jute = pound / (14400 * yard) = Tj aberdeen = jute = Ta RKM = gf / tex number_english = 840 * yard / pound = Ne = NeC = ECC number_meter = kilometer / kilogram = Nm @end #### CGS ELECTROMAGNETIC UNITS #### # === Gaussian system of units === @group Gaussian franklin = erg ** 0.5 * centimeter ** 0.5 = Fr = statcoulomb = statC = esu statvolt = erg / franklin = statV statampere = franklin / second = statA gauss = dyne / franklin = G maxwell = gauss * centimeter ** 2 = Mx oersted = dyne / maxwell = Oe = ørsted statohm = statvolt / statampere = statΩ statfarad = franklin / statvolt = statF statmho = statampere / statvolt @end # Note this system is not commensurate with SI, as ε_0 and µ_0 disappear; # some quantities with different dimensions in SI have the same # dimensions in the Gaussian system (e.g. [Mx] = [Fr], but [Wb] != [C]), # and therefore the conversion factors depend on the context (not in pint sense) [gaussian_charge] = [length] ** 1.5 * [mass] ** 0.5 / [time] [gaussian_current] = [gaussian_charge] / [time] [gaussian_electric_potential] = [gaussian_charge] / [length] [gaussian_electric_field] = [gaussian_electric_potential] / [length] [gaussian_electric_displacement_field] = [gaussian_charge] / [area] [gaussian_electric_flux] = [gaussian_charge] [gaussian_electric_dipole] = [gaussian_charge] * [length] [gaussian_electric_quadrupole] = [gaussian_charge] * [area] [gaussian_magnetic_field] = [force] / [gaussian_charge] [gaussian_magnetic_field_strength] = [gaussian_magnetic_field] [gaussian_magnetic_flux] = [gaussian_magnetic_field] * [area] [gaussian_magnetic_dipole] = [energy] / [gaussian_magnetic_field] [gaussian_resistance] = [gaussian_electric_potential] / [gaussian_current] [gaussian_resistivity] = [gaussian_resistance] * [length] [gaussian_capacitance] = [gaussian_charge] / [gaussian_electric_potential] [gaussian_inductance] = [gaussian_electric_potential] * [time] / [gaussian_current] [gaussian_conductance] = [gaussian_current] / [gaussian_electric_potential] @context Gaussian = Gau [gaussian_charge] -> [charge]: value / k_C ** 0.5 [charge] -> [gaussian_charge]: value * k_C ** 0.5 [gaussian_current] -> [current]: value / k_C ** 0.5 [current] -> [gaussian_current]: value * k_C ** 0.5 [gaussian_electric_potential] -> [electric_potential]: value * k_C ** 0.5 [electric_potential] -> [gaussian_electric_potential]: value / k_C ** 0.5 [gaussian_electric_field] -> [electric_field]: value * k_C ** 0.5 [electric_field] -> [gaussian_electric_field]: value / k_C ** 0.5 [gaussian_electric_displacement_field] -> [electric_displacement_field]: value / (4 * π / ε_0) ** 0.5 [electric_displacement_field] -> [gaussian_electric_displacement_field]: value * (4 * π / ε_0) ** 0.5 [gaussian_electric_dipole] -> [electric_dipole]: value / k_C ** 0.5 [electric_dipole] -> [gaussian_electric_dipole]: value * k_C ** 0.5 [gaussian_electric_quadrupole] -> [electric_quadrupole]: value / k_C ** 0.5 [electric_quadrupole] -> [gaussian_electric_quadrupole]: value * k_C ** 0.5 [gaussian_magnetic_field] -> [magnetic_field]: value / (4 * π / µ_0) ** 0.5 [magnetic_field] -> [gaussian_magnetic_field]: value * (4 * π / µ_0) ** 0.5 [gaussian_magnetic_flux] -> [magnetic_flux]: value / (4 * π / µ_0) ** 0.5 [magnetic_flux] -> [gaussian_magnetic_flux]: value * (4 * π / µ_0) ** 0.5 [gaussian_magnetic_field_strength] -> [magnetic_field_strength]: value / (4 * π * µ_0) ** 0.5 [magnetic_field_strength] -> [gaussian_magnetic_field_strength]: value * (4 * π * µ_0) ** 0.5 [gaussian_magnetic_dipole] -> [magnetic_dipole]: value * (4 * π / µ_0) ** 0.5 [magnetic_dipole] -> [gaussian_magnetic_dipole]: value / (4 * π / µ_0) ** 0.5 [gaussian_resistance] -> [resistance]: value * k_C [resistance] -> [gaussian_resistance]: value / k_C [gaussian_resistivity] -> [resistivity]: value * k_C [resistivity] -> [gaussian_resistivity]: value / k_C [gaussian_capacitance] -> [capacitance]: value / k_C [capacitance] -> [gaussian_capacitance]: value * k_C [gaussian_inductance] -> [inductance]: value * k_C [inductance] -> [gaussian_inductance]: value / k_C [gaussian_conductance] -> [conductance]: value / k_C [conductance] -> [gaussian_conductance]: value * k_C @end # === ESU system of units === # (where different from Gaussian) # See note for Gaussian system too @group ESU using Gaussian statweber = statvolt * second = statWb stattesla = statweber / centimeter ** 2 = statT stathenry = statweber / statampere = statH @end [esu_charge] = [length] ** 1.5 * [mass] ** 0.5 / [time] [esu_current] = [esu_charge] / [time] [esu_electric_potential] = [esu_charge] / [length] [esu_magnetic_flux] = [esu_electric_potential] * [time] [esu_magnetic_field] = [esu_magnetic_flux] / [area] [esu_magnetic_field_strength] = [esu_current] / [length] [esu_magnetic_dipole] = [esu_current] * [area] @context ESU = esu [esu_magnetic_field] -> [magnetic_field]: value * k_C ** 0.5 [magnetic_field] -> [esu_magnetic_field]: value / k_C ** 0.5 [esu_magnetic_flux] -> [magnetic_flux]: value * k_C ** 0.5 [magnetic_flux] -> [esu_magnetic_flux]: value / k_C ** 0.5 [esu_magnetic_field_strength] -> [magnetic_field_strength]: value / (4 * π / ε_0) ** 0.5 [magnetic_field_strength] -> [esu_magnetic_field_strength]: value * (4 * π / ε_0) ** 0.5 [esu_magnetic_dipole] -> [magnetic_dipole]: value / k_C ** 0.5 [magnetic_dipole] -> [esu_magnetic_dipole]: value * k_C ** 0.5 @end #### CONVERSION CONTEXTS #### @context(n=1) spectroscopy = sp # n index of refraction of the medium. [length] <-> [frequency]: speed_of_light / n / value [frequency] -> [energy]: planck_constant * value [energy] -> [frequency]: value / planck_constant # allow wavenumber / kayser [wavenumber] <-> [length]: 1 / value @end @context boltzmann [temperature] -> [energy]: boltzmann_constant * value [energy] -> [temperature]: value / boltzmann_constant @end @context energy [energy] -> [energy] / [substance]: value * N_A [energy] / [substance] -> [energy]: value / N_A [energy] -> [mass]: value / c ** 2 [mass] -> [energy]: value * c ** 2 @end @context(mw=0,volume=0,solvent_mass=0) chemistry = chem # mw is the molecular weight of the species # volume is the volume of the solution # solvent_mass is the mass of solvent in the solution # moles -> mass require the molecular weight [substance] -> [mass]: value * mw [mass] -> [substance]: value / mw # moles/volume -> mass/volume and moles/mass -> mass/mass # require the molecular weight [substance] / [volume] -> [mass] / [volume]: value * mw [mass] / [volume] -> [substance] / [volume]: value / mw [substance] / [mass] -> [mass] / [mass]: value * mw [mass] / [mass] -> [substance] / [mass]: value / mw # moles/volume -> moles requires the solution volume [substance] / [volume] -> [substance]: value * volume [substance] -> [substance] / [volume]: value / volume # moles/mass -> moles requires the solvent (usually water) mass [substance] / [mass] -> [substance]: value * solvent_mass [substance] -> [substance] / [mass]: value / solvent_mass # moles/mass -> moles/volume require the solvent mass and the volume [substance] / [mass] -> [substance]/[volume]: value * solvent_mass / volume [substance] / [volume] -> [substance] / [mass]: value / solvent_mass * volume @end @context textile # Allow switching between Direct count system (i.e. tex) and # Indirect count system (i.e. Ne, Nm) [mass] / [length] <-> [length] / [mass]: 1 / value @end #### SYSTEMS OF UNITS #### @system SI second meter kilogram ampere kelvin mole candela @end @system mks using international meter kilogram second @end @system cgs using international, Gaussian, ESU centimeter gram second @end @system atomic using international # based on unit m_e, e, hbar, k_C, k bohr: meter electron_mass: gram atomic_unit_of_time: second atomic_unit_of_current: ampere atomic_unit_of_temperature: kelvin @end @system Planck using international # based on unit c, gravitational_constant, hbar, k_C, k planck_length: meter planck_mass: gram planck_time: second planck_current: ampere planck_temperature: kelvin @end @system imperial using ImperialVolume, USCSLengthInternational, AvoirdupoisUK yard pound @end @system US using USCSLiquidVolume, USCSDryVolume, USCSVolumeOther, USCSLengthInternational, USCSLengthSurvey, AvoirdupoisUS yard pound @end flexparser-0.4/examples/pint/parse-pint.py000066400000000000000000000034031471301744300207600ustar00rootroot00000000000000from __future__ import annotations import pathlib import typing as ty from dataclasses import dataclass from pint_parser import common, context, defaults, group, plain, system from flexparser import flexparser as fp @dataclass(frozen=True) class ImportDefinition(fp.IncludeStatement): value: str @property def target(self): return self.value @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[ImportDefinition]: if s.startswith("@import"): return ImportDefinition(s[len("@import") :].strip()) return None @dataclass(frozen=True) class EntryBlock( fp.RootBlock[ ty.Union[ common.Comment, ImportDefinition, defaults.DefaultsDefinition, context.ContextDefinition, group.GroupDefinition, system.SystemDefinition, plain.DimensionDefinition, plain.PrefixDefinition, plain.UnitDefinition, ] ] ): pass cfg = common.Config() p = pathlib.Path("files/default_en.txt") parsed = fp.parse( p, EntryBlock, cfg, delimiters={ "#": ( fp.DelimiterInclude.SPLIT_BEFORE, fp.DelimiterAction.CAPTURE_NEXT_TIL_EOL, ), **fp.SPLIT_EOL, }, ) def pprint(objs, indent=1): TT = " " print(TT * (indent - 1), objs.opening) for p in objs.body: if isinstance(p, fp.Block): pprint(p, indent + 1) else: print(TT * indent, p) print(TT * (indent - 1), objs.closing) print("Keys") print("----") for k in parsed.keys(): print(k) print("\n\n----\n\n") for x in parsed.iter_blocks(): print(x) print("\n") print("Errors") print("------") for p in parsed.errors(): print(p) flexparser-0.4/examples/pint/pint_parser/000077500000000000000000000000001471301744300206525ustar00rootroot00000000000000flexparser-0.4/examples/pint/pint_parser/__init__.py000066400000000000000000000000001471301744300227510ustar00rootroot00000000000000flexparser-0.4/examples/pint/pint_parser/common.py000066400000000000000000000071461471301744300225240ustar00rootroot00000000000000from __future__ import annotations import numbers import typing as ty from dataclasses import dataclass from flexparser import flexparser as fp from . import errors from .pintimports import ParserHelper, UnitsContainer @dataclass(frozen=True) class Config: """Configuration used by the parser.""" #: Indicates the output type of non integer numbers. non_int_type: ty.Type[numbers.Number] = float def to_scaled_units_container(self, s: str): return ParserHelper.from_string(s, self.non_int_type) def to_units_container(self, s: str): v = self.to_scaled_units_container(s) if v.scale != 1: raise errors.UnexpectedScaleInContainer(str(v.scale)) return UnitsContainer(v) def to_dimension_container(self, s: str): v = self.to_units_container(s) _ = [check_dim(el) for el in v.keys()] return v def to_number(self, s: str) -> numbers.Number: """Try parse a string into a number (without using eval). The string can contain a number or a simple equation (3 + 4) Raises ------ _NotNumeric If the string cannot be parsed as a number. """ val = self.to_scaled_units_container(s) if len(val): raise NotNumeric(s) return val.scale @dataclass(frozen=True) class Equality(fp.ParsedStatement): """An equality statement contains a left and right hand separated by and equal (=) sign. lhs = rhs lhs and rhs are space stripped. """ lhs: str rhs: str @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[Equality]: if "=" not in s: return None parts = [p.strip() for p in s.split("=")] if len(parts) != 2: return errors.DefinitionSyntaxError( f"Exactly two terms expected, not {len(parts)} (`{s}`)" ) return cls(*parts) @dataclass(frozen=True) class Comment(fp.ParsedStatement): """Comments start with a # character. # This is a comment. ## This is also a comment. Captured value does not include the leading # character and space stripped. """ comment: str @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[fp.ParsedStatement]: if not s.startswith("#"): return None return cls(s[1:].strip()) @dataclass(frozen=True) class EndDirectiveBlock(fp.ParsedStatement): """An EndDirectiveBlock is simply an "@end" statement.""" @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[EndDirectiveBlock]: if s == "@end": return cls() return None @dataclass(frozen=True) class DirectiveBlock(fp.Block): """Directive blocks have beginning statement starting with a @ character. and ending with a "@end" (captured using a EndDirectiveBlock). Subclass this class for convenience. """ closing: EndDirectiveBlock class NotNumeric(Exception): """Internal exception. Do not expose outside Pint""" def __init__(self, value): self.value = value def is_dim(name: str) -> bool: return name[0] == "[" and name[-1] == "]" def check_dim(name: str) -> ty.Union[errors.DefinitionSyntaxError, str]: name = name.strip() if not is_dim(name): raise errors.DefinitionSyntaxError( f"Dimension definition `{name}` must be enclosed by []." ) if not str.isidentifier(name[1:-1]): raise errors.DefinitionSyntaxError( f"`{name[1:-1]}` is not a valid dimension name (must follow Python identifier rules)." ) return name flexparser-0.4/examples/pint/pint_parser/context.py000066400000000000000000000150141471301744300227110ustar00rootroot00000000000000from __future__ import annotations import numbers import re from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Callable from flexparser import flexparser as fp from . import common, errors if TYPE_CHECKING: from pint import Quantity, UnitsContainer class ParserHelper: @classmethod def from_string(cls, s, *args): return s @dataclass(frozen=True) class _Relation: _varname_re = re.compile(r"[A-Za-z_][A-Za-z0-9_]*") src: UnitsContainer dst: UnitsContainer equation: str @classmethod def _from_string_and_context_sep( cls, s: str, config: common.Config, separator: str ) -> fp.NullableParsedResult[_Relation]: if separator not in s: return None if ":" not in s: return None rel, eq = s.split(":") parts = rel.split(separator) try: src, dst = (config.to_dimension_container(s) for s in parts) except errors.DefinitionSyntaxError as ex: return ex return cls(src, dst, eq.strip()) @property def variables(self) -> set[str, ...]: """Find all variables names in the equation.""" return set(self._varname_re.findall(self.equation)) @property def transformation(self) -> Callable[..., Quantity[Any]]: """Return a transformation callable that uses the registry to parse the transformation equation. """ return lambda ureg, value, **kwargs: ureg.parse_expression( self.equation, value=value, **kwargs ) @dataclass(frozen=True) class ForwardRelation(fp.ParsedStatement, _Relation): """A relation connecting a dimension to another via a transformation function. -> : """ @property def bidirectional(self): return False @classmethod def from_string_and_config( cls, s: str, config: common.Config ) -> fp.NullableParsedResult[ForwardRelation]: return super()._from_string_and_context_sep(s, config, "->") @dataclass(frozen=True) class BidirectionalRelation(fp.ParsedStatement, _Relation): """A bidirectional relation connecting a dimension to another via a simple transformation function. <-> : """ @property def bidirectional(self): return True @classmethod def from_string_and_config( cls, s: str, config: common.Config ) -> fp.NullableParsedResult[BidirectionalRelation]: return super()._from_string_and_context_sep(s, config, "<->") @dataclass(frozen=True) class BeginContext(fp.ParsedStatement): """Being of a context directive. @context[(defaults)] [= ] [= ] """ _header_re = re.compile( r"@context\s*(?P\(.*\))?\s+(?P\w+)\s*(=(?P.*))*" ) name: str aliases: tuple[str, ...] defaults: dict[str, numbers.Number] @classmethod def from_string_and_config( cls, s: str, config: common.Config ) -> fp.NullableParsedResult[BeginContext]: try: r = cls._header_re.search(s) if r is None: return None name = r.groupdict()["name"].strip() aliases = r.groupdict()["aliases"] if aliases: aliases = tuple(a.strip() for a in r.groupdict()["aliases"].split("=")) else: aliases = () defaults = r.groupdict()["defaults"] except Exception as ex: return errors.DefinitionSyntaxError( "Could not parse the Context header", ex ) if defaults: # TODO: Use config non_int_type txt = defaults try: defaults = (part.split("=") for part in defaults.strip("()").split(",")) defaults = {str(k).strip(): config.to_number(v) for k, v in defaults} except (ValueError, TypeError) as exc: return errors.DefinitionSyntaxError( f"Could not parse Context definition defaults: '{txt}'", exc ) else: defaults = {} return cls(name, tuple(aliases), defaults) @dataclass(frozen=True) class ContextDefinition(common.DirectiveBlock): """Definition of a Context @context[(defaults)] [= ] [= ] # units can be redefined within the context = # can establish unidirectional relationships between dimensions -> : # can establish bidirectionl relationships between dimensions <-> : @end See BeginContext, Equality, ForwardRelation, BidirectionalRelation and Comment for more parsing related information. Example:: @context(n=1) spectroscopy = sp # n index of refraction of the medium. [length] <-> [frequency]: speed_of_light / n / value [frequency] -> [energy]: planck_constant * value [energy] -> [frequency]: value / planck_constant # allow wavenumber / kayser [wavenumber] <-> [length]: 1 / value @end """ @property def variables(self) -> set[str, ...]: """Return all variable names in all transformations.""" return set.union(*(r.variables for r in self.body if isinstance(r, _Relation))) # TODO: some checks are missing # @staticmethod # def parse_definition(line, non_int_type) -> UnitDefinition: # definition = Definition.from_string(line, non_int_type) # if not isinstance(definition, UnitDefinition): # raise DefinitionSyntaxError( # "Expected = ; got %s" % line.strip() # ) # if definition.symbol != definition.name or definition.aliases: # raise DefinitionSyntaxError( # "Can't change a unit's symbol or aliases within a context" # ) # if definition.is_base: # raise DefinitionSyntaxError("Can't define plain units within a context") # return definition # def __post_init__(self): # missing_pars = self.opening.defaults.keys() - self.variables # if missing_pars: # raise DefinitionSyntaxError( # f"Context parameters {missing_pars} not found in any equation" # ) flexparser-0.4/examples/pint/pint_parser/defaults.py000066400000000000000000000012421471301744300230320ustar00rootroot00000000000000from __future__ import annotations from dataclasses import dataclass import flexparser.flexparser as fp from . import common @dataclass(frozen=True) class BeginDefaults(fp.ParsedStatement): """Being of a defaults directive. @defaults """ @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[BeginDefaults]: if s.strip() == "@defaults": return cls() return None @dataclass(frozen=True) class DefaultsDefinition(common.DirectiveBlock): """Directive to store values. @defaults system = mks @end See Equality and Comment for more parsing related information. """ flexparser-0.4/examples/pint/pint_parser/errors.py000066400000000000000000000005441471301744300225430ustar00rootroot00000000000000from __future__ import annotations import typing as ty from dataclasses import dataclass from flexparser import flexparser as fp @dataclass(frozen=True) class DefinitionSyntaxError(fp.ParsingError): msg: str base_exception: ty.Optional[Exception] = None @dataclass(frozen=True) class UnexpectedScaleInContainer(fp.ParsingError): msg: str flexparser-0.4/examples/pint/pint_parser/group.py000066400000000000000000000034161471301744300223640ustar00rootroot00000000000000from __future__ import annotations import re import typing as ty from dataclasses import dataclass import flexparser.flexparser as fp from . import common, plain @dataclass(frozen=True) class BeginGroup(fp.ParsedStatement): """Being of a group directive. @group [using , ..., ] """ #: Regex to match the header parts of a definition. _header_re = re.compile(r"@group\s+(?P\w+)\s*(using\s(?P.*))*") name: str using_group_names: ty.Tuple[str, ...] @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[BeginGroup]: if not s.startswith("@group"): return None r = cls._header_re.search(s) if r is None: raise ValueError("Invalid Group header syntax: '%s'" % s) name = r.groupdict()["name"].strip() groups = r.groupdict()["used_groups"] if groups: parent_group_names = tuple(a.strip() for a in groups.split(",")) else: parent_group_names = () return cls(name, parent_group_names) @dataclass(frozen=True) class GroupDefinition(common.DirectiveBlock): """Definition of a group. @group [using , ..., ] ... @end See UnitDefinition and Comment for more parsing related information. Example:: @group AvoirdupoisUS using Avoirdupois US_hundredweight = hundredweight = US_cwt US_ton = ton US_force_ton = force_ton = _ = US_ton_force @end """ @property def unit_names(self) -> ty.Tuple[str, ...]: return tuple( el.name for el in self.body if isinstance(el, plain.UnitDefinition) ) flexparser-0.4/examples/pint/pint_parser/pintimports.py000066400000000000000000000002701471301744300236130ustar00rootroot00000000000000from __future__ import annotations from pint.converters import Converter from pint.util import ParserHelper, UnitsContainer __all__ = ["Converter", "ParserHelper", "UnitsContainer"] flexparser-0.4/examples/pint/pint_parser/plain.py000066400000000000000000000177641471301744300223460ustar00rootroot00000000000000from __future__ import annotations import numbers import typing as ty from dataclasses import dataclass from functools import cached_property from typing import Callable from flexparser import flexparser as fp from . import common, errors from .pintimports import Converter, UnitsContainer @dataclass(frozen=True) class PrefixDefinition(fp.ParsedStatement): """Definition of a prefix:: - = [= ] [= ] [ = ] [...] Example:: deca- = 1e+1 = da- = deka- """ name: str value: numbers.Number defined_symbol: str | None aliases: ty.Tuple[str, ...] @classmethod def from_string_and_config( cls, s: str, config: common.Config ) -> fp.NullableParsedResult[PrefixDefinition]: if "=" not in s: return None name, value, *aliases = s.split("=") name = name.strip() if not name.endswith("-"): return None aliases = tuple(alias.strip().rstrip("-") for alias in aliases) if aliases and aliases[0].strip() != "_": defined_symbol, *aliases = aliases else: defined_symbol = None try: value = config.to_number(value) except common.NotNumeric as ex: return errors.DefinitionSyntaxError( f"Prefix definition ('{name}') must contain only numbers, not {ex.value}" ) return cls(name, value, defined_symbol, aliases) @property def symbol(self) -> str: return self.defined_symbol or self.name @property def has_symbol(self) -> bool: return bool(self.defined_symbol) @cached_property def converter(self): return Converter.from_arguments(scale=self.value) @dataclass(frozen=True) class UnitDefinition(fp.ParsedStatement): """Definition of a unit:: = [= ] [= ] [ = ] [...] Example:: millennium = 1e3 * year = _ = millennia Parameters ---------- reference : UnitsContainer Reference units. is_base : bool Indicates if it is a base unit. """ name: str defined_symbol: ty.Optional[str] aliases: ty.Tuple[str, ...] converter: ty.Optional[ty.Union[Callable, Converter]] reference: ty.Optional[UnitsContainer] is_base: bool @classmethod def from_string_and_config( cls, s: str, config: common.Config ) -> fp.NullableParsedResult[UnitDefinition]: if "=" not in s: return None name, value, *aliases = (p.strip() for p in s.split("=")) if aliases and aliases[0].strip() == "_": defined_symbol, *aliases = aliases else: defined_symbol = None if ";" in value: [converter, modifiers] = value.split(";", 1) try: modifiers = { key.strip(): config.to_number(value) for key, value in (part.split(":") for part in modifiers.split(";")) } except common.NotNumeric as ex: return errors.DefinitionSyntaxError( f"Unit definition ('{name}') must contain only numbers in modifier, not {ex.value}" ) else: converter = value modifiers = {} converter = config.to_scaled_units_container(converter) if not any(common.is_dim(key) for key in converter.keys()): is_base = False elif all(common.is_dim(key) for key in converter.keys()): is_base = True if converter.scale != 1: return errors.DefinitionSyntaxError( "Base unit definitions cannot have a scale different to 1. " f"(`{converter.scale}` found)" ) else: return errors.DefinitionSyntaxError( "Cannot mix dimensions and units in the same definition. " "Base units must be referenced only to dimensions. " "Derived units must be referenced only to units." ) try: from pint.util import UnitsContainer reference = UnitsContainer(converter) # reference = converter.to_units_container() except errors.DefinitionSyntaxError as ex: return errors.DefinitionSyntaxError(f"While defining {name}", ex) try: converter = Converter.from_arguments(scale=converter.scale, **modifiers) except Exception as ex: return errors.DefinitionSyntaxError( "Unable to assign a converter to the unit", ex ) return cls( name, defined_symbol, aliases, converter, reference, is_base, ) @property def is_multiplicative(self) -> bool: return self.converter.is_multiplicative @property def is_logarithmic(self) -> bool: return self.converter.is_logarithmic @property def symbol(self) -> str: return self.defined_symbol or self.name @property def has_symbol(self) -> bool: return bool(self.defined_symbol) @dataclass(frozen=True) class DimensionDefinition(fp.ParsedStatement): """Definition of a root dimension:: [dimension name] Example:: [volume] """ name: str @property def is_base(self): return False @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[DimensionDefinition]: s = s.strip() if not (s.startswith("[") and "=" not in s): return None try: s = common.check_dim(s) except errors.DefinitionSyntaxError as ex: return ex return cls(s) @dataclass(frozen=True) class DerivedDimensionDefinition(fp.ParsedStatement): """Definition of a derived dimension:: [dimension name] = Example:: [density] = [mass] / [volume] """ name: str reference: UnitsContainer @property def is_base(self): return False @classmethod def from_string_and_config( cls, s: str, config: common.Config ) -> fp.NullableParsedResult[DerivedDimensionDefinition]: if not (s.startswith("[") and "=" in s): return None name, value, *aliases = s.split("=") if not (s.startswith("[") and "=" not in s): return None try: name = common.check_dim(name) except errors.DefinitionSyntaxError as ex: return ex if aliases: return errors.DefinitionSyntaxError( "Derived dimensions cannot have aliases." ) try: reference = config.to_dimension_container(value) except errors.DefinitionSyntaxError as ex: return errors.DefinitionSyntaxError( f"In {name} derived dimensions must only be referenced " "to dimensions.", ex, ) return cls(name.strip(), reference) @dataclass(frozen=True) class AliasDefinition(fp.ParsedStatement): """Additional alias(es) for an already existing unit:: @alias = [ = ] [...] Example:: @alias meter = my_meter """ name: str aliases: ty.Tuple[str, ...] @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[AliasDefinition]: if not s.startswith("@alias "): return None name, *aliases = s[len("@alias ") :].split("=") name = name.strip() if name.startswith("["): return errors.DefinitionSyntaxError( "Derived dimensions cannot have aliases." ) if name.endswith("-"): return errors.DefinitionSyntaxError( "Prefixes aliases cannot be added after initial definition." ) return cls(name.strip(), tuple(alias.strip() for alias in aliases)) flexparser-0.4/examples/pint/pint_parser/system.py000066400000000000000000000046731471301744300225620ustar00rootroot00000000000000from __future__ import annotations import re import typing as ty from dataclasses import dataclass import flexparser.flexparser as fp from . import common, errors @dataclass(frozen=True) class Rule(fp.ParsedStatement): new_unit_name: str old_unit_name: ty.Optional[str] = None @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[Rule]: if ":" not in s: return cls(s.strip()) parts = [p.strip() for p in s.split(":")] if len(parts) != 2: return errors.DefinitionSyntaxError( f"Exactly two terms expected for rule, not {len(parts)} (`{s}`)" ) return cls(*parts) @dataclass(frozen=True) class BeginSystem(fp.ParsedStatement): """Being of a system directive. @system [using , ..., ] """ #: Regex to match the header parts of a context. _header_re = re.compile(r"@system\s+(?P\w+)\s*(using\s(?P.*))*") name: str using_group_names: ty.Tuple[str, ...] @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[BeginSystem]: if not s.startswith("@system"): return None r = cls._header_re.search(s) if r is None: raise ValueError("Invalid System header syntax '%s'" % s) name = r.groupdict()["name"].strip() groups = r.groupdict()["used_groups"] # If the systems has no group, it automatically uses the root group. if groups: group_names = tuple(a.strip() for a in groups.split(",")) else: group_names = ("root",) return cls(name, group_names) @dataclass(frozen=True) class SystemDefinition(common.DirectiveBlock): """Definition of a System: @system [using , ..., ] ... @end See Rule and Comment for more parsing related information. The syntax for the rule is: new_unit_name : old_unit_name where: - old_unit_name: a root unit part which is going to be removed from the system. - new_unit_name: a non root unit which is going to replace the old_unit. If the new_unit_name and the old_unit_name, the later and the colon can be omitted. """ @property def unit_replacements(self) -> ty.Tuple[ty.Tuple[str, str], ...]: return tuple((el.new_unit_name, el.old_unit_name) for el in self.body) flexparser-0.4/flexparser/000077500000000000000000000000001471301744300157075ustar00rootroot00000000000000flexparser-0.4/flexparser/__init__.py000066400000000000000000000032011471301744300200140ustar00rootroot00000000000000""" flexparser ~~~~~~~~~ Classes and functions to create parsers. The idea is quite simple. You write a class for every type of content (called here ``ParsedStatement``) you need to parse. Each class should have a ``from_string`` constructor. We used extensively the ``typing`` module to make the output structure easy to use and less error prone. :copyright: 2022 by flexparser Authors, see AUTHORS for more details. :license: BSD, see LICENSE for more details. """ from __future__ import annotations from importlib.metadata import version try: # pragma: no cover __version__ = version("flexparser") except Exception: # pragma: no cover # we seem to have a local copy not installed without setuptools # so the reported version will be unknown __version__ = "unknown" from .flexparser import ( BOF, BOR, BOS, EOS, SPLIT_EOL, Block, DelimiterAction, DelimiterInclude, IncludeStatement, NullableParsedResult, ParsedProject, ParsedSource, ParsedStatement, Parser, ParsingError, RootBlock, StatementIterator, UnexpectedEOS, UnknownStatement, parse, parse_bytes, ) # Deprecate in 0.3 UnexpectedEOF = UnexpectedEOS __all__ = ( "__version__", "Block", "DelimiterAction", "DelimiterInclude", "IncludeStatement", "ParsedStatement", "Parser", "ParsingError", "RootBlock", "StatementIterator", "UnexpectedEOF", "UnexpectedEOS", "UnknownStatement", "parse", "ParsedSource", "ParsedProject", "SPLIT_EOL", "BOF", "BOR", "BOS", "EOS", "parse_bytes", "NullableParsedResult", ) flexparser-0.4/flexparser/flexparser.py000066400000000000000000001545171471301744300204510ustar00rootroot00000000000000""" flexparser.flexparser ~~~~~~~~~~~~~~~~~~~~~ Classes and functions to create parsers. The idea is quite simple. You write a class for every type of content (called here ``ParsedStatement``) you need to parse. Each class should have a ``from_string`` constructor. We used extensively the ``typing`` module to make the output structure easy to use and less error prone. For more information, take a look at https://github.com/hgrecco/flexparser :copyright: 2022 by flexparser Authors, see AUTHORS for more details. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import collections import dataclasses import enum import functools import hashlib import hmac import inspect import logging import pathlib import re import sys import typing as ty import warnings from dataclasses import dataclass from functools import cached_property from importlib import resources from typing import Any, Optional, Union, no_type_check if sys.version_info >= (3, 10): from typing import TypeAlias # noqa else: from typing_extensions import TypeAlias # noqa if sys.version_info >= (3, 11): from typing import Self # noqa else: from typing_extensions import Self # noqa _LOGGER = logging.getLogger("flexparser") _SENTINEL = object() _HASH_ALGORITHMS = "blake2b", "blake2s", "sha3_512", "sha512", "sha1" for _algo_name in _HASH_ALGORITHMS: try: _DEFAULT_HASHER = getattr(hashlib, _algo_name) _DEFAULT_HASHER(b"Always look at the bright side of life!").hexdigest() break except Exception: pass else: msg = ( f"Could not use any of the predefined hash algorithms {_HASH_ALGORITHMS}.\n" "Your Python distribution's hashlib module is incomplete or not properly installed.\n" "If you can't fix it, set the algorithm manually in the parser by:\n" ">>> parser._hasher = hasher\n" ) def _internal(*args: Any, **kwargs: Any): raise ValueError(msg) _DEFAULT_HASHER = _internal # type: ignore warnings.warn(msg) class HasherAlgorithm(ty.Protocol): def __call__(self, b: bytes, usedforsecurity: bool) -> HasherProtocol: ... class HasherProtocol(ty.Protocol): @property def name(self) -> str: ... def hexdigest(self) -> str: ... class GenericInfo: _specialized: Optional[ dict[type, Optional[list[tuple[type, dict[ty.TypeVar, type]]]]] ] = None @staticmethod def _summarize(d: dict[ty.TypeVar, type]) -> dict[ty.TypeVar, type]: d = d.copy() while True: for k, v in d.items(): if isinstance(v, ty.TypeVar): d[k] = d[v] break else: return d del d[v] @classmethod def _specialization(cls) -> dict[ty.TypeVar, type]: if cls._specialized is None: return dict() out: dict[ty.TypeVar, type] = {} specialized = cls._specialized[cls] if specialized is None: return {} for parent, content in specialized: for tvar, typ in content.items(): out[tvar] = typ origin = getattr(parent, "__origin__", None) if origin is not None and origin in cls._specialized: out = {**origin._specialization(), **out} return out @classmethod def specialization(cls) -> dict[ty.TypeVar, type]: return GenericInfo._summarize(cls._specialization()) def __init_subclass__(cls) -> None: if cls._specialized is None: cls._specialized = {GenericInfo: None} tv: list[ty.TypeVar] = [] entries: list[tuple[type, dict[ty.TypeVar, type]]] = [] for par in getattr(cls, "__parameters__", ()): if isinstance(par, ty.TypeVar): tv.append(par) for b in getattr(cls, "__orig_bases__", ()): for k in cls._specialized.keys(): if getattr(b, "__origin__", None) is k: entries.append((b, {k: v for k, v in zip(tv, b.__args__)})) break cls._specialized[cls] = entries return super().__init_subclass__() ################ # Exceptions ################ @dataclass(frozen=True) class Statement: """Base class for parsed elements within a source file.""" is_position_set: bool = dataclasses.field(init=False, default=False, repr=False) start_line: int = dataclasses.field(init=False, default=0) start_col: int = dataclasses.field(init=False, default=0) end_line: int = dataclasses.field(init=False, default=0) end_col: int = dataclasses.field(init=False, default=0) raw: Optional[str] = dataclasses.field(init=False, default=None) @classmethod def from_statement(cls, statement: Statement) -> Self: out = cls() if statement.is_position_set: out.set_position(*statement.get_position()) if statement.raw is not None: out.set_raw(statement.raw) return out @classmethod def from_statement_iterator_element( cls, values: tuple[int, int, int, int, str] ) -> Self: out = cls() out.set_position(*values[:-1]) out.set_raw(values[-1]) return out @property def format_position(self) -> str: if not self.is_position_set: return "N/A" return "%d,%d-%d,%d" % self.get_position() @property def raw_strip(self) -> Optional[str]: if self.raw is None: return None return self.raw.strip() def get_position(self) -> tuple[int, int, int, int]: if self.is_position_set: return self.start_line, self.start_col, self.end_line, self.end_col return 0, 0, 0, 0 def set_position( self: Self, start_line: int, start_col: int, end_line: int, end_col: int ) -> Self: object.__setattr__(self, "is_position_set", True) object.__setattr__(self, "start_line", start_line) object.__setattr__(self, "start_col", start_col) object.__setattr__(self, "end_line", end_line) object.__setattr__(self, "end_col", end_col) return self def set_raw(self: Self, raw: str) -> Self: object.__setattr__(self, "raw", raw) return self def set_simple_position(self: Self, line: int, col: int, width: int) -> Self: return self.set_position(line, col, line, col + width) class ParsingError(Exception): """Base class for all parsing exceptions in this package.""" _statement: Statement | None = None def __str__(self) -> str: cv: str = self.custom_values_str() if cv: cv = ", " + cv return self.__class__.__name__ + str(self.statement)[9:-1] + cv + ")" def custom_values_str(self) -> str: return "" def __eq__(self, other: Any) -> bool: if not isinstance(other, ParsingError): return False return self.statement == other.statement @property def statement(self) -> Statement: if self._statement is None: self._statement = Statement() return self._statement @statement.setter def statement(self, value: Statement): self._statement = value @property def is_position_set(self) -> bool: return self.statement.is_position_set @property def start_line(self) -> int: return self.statement.start_line @property def start_col(self) -> int: return self.statement.start_col @property def end_line(self) -> int: return self.statement.end_line @property def end_col(self) -> int: return self.statement.end_col @property def raw(self) -> Optional[str]: return self.statement.raw @property def format_position(self) -> str: return self.statement.format_position @property def raw_strip(self) -> Optional[str]: return self.statement.raw_strip def get_position(self) -> tuple[int, int, int, int]: return self.statement.get_position() def set_position( self: Self, start_line: int, start_col: int, end_line: int, end_col: int ) -> Self: self.statement.set_position(start_line, start_col, end_line, end_col) return self def set_raw(self: Self, raw: str) -> Self: self.statement.set_raw(raw) return self def set_simple_position(self: Self, line: int, col: int, width: int) -> Self: return self.set_position(line, col, line, col + width) class UnknownStatement(ParsingError): """A string statement could not bee parsed.""" def __str__(self) -> str: return ( f"Could not parse '{self.statement.raw}' ({self.statement.format_position})" ) class UnhandledParsingError(ParsingError): """Base class for all parsing exceptions in this package.""" exception: Exception def __init__(self, statement: Statement, exception: Exception) -> None: super().__init__(statement) self.exception = exception def __str__(self) -> str: return f"Unhandled exception while parsing '{self.statement.raw}' ({self.statement.format_position}): {self.exception}" class UnexpectedEOS(ParsingError): """End of file was found within an open block.""" ############################# # Useful methods and classes ############################# @dataclass(frozen=True) class Hash: algorithm_name: str hexdigest: str def __eq__(self, other: Any) -> bool: return ( isinstance(other, Hash) and self.algorithm_name != "" and self.algorithm_name == other.algorithm_name and hmac.compare_digest(self.hexdigest, other.hexdigest) ) @classmethod def from_bytes( cls, algorithm: HasherAlgorithm, b: bytes, ) -> Self: hasher = algorithm(b, usedforsecurity=False) return cls(hasher.name, hasher.hexdigest()) @classmethod def from_file_pointer( cls, algorithm: HasherAlgorithm, fp: ty.BinaryIO, ) -> Self: return cls.from_bytes(algorithm, fp.read()) @classmethod def nullhash(cls) -> Self: return cls("", "") def _yield_types( obj: type, valid_subclasses: tuple[type, ...] = (object,), recurse_origin: tuple[Any, ...] = (tuple, list, Union), ) -> ty.Generator[type, None, None]: """Recursively transverse type annotation if the origin is any of the types in `recurse_origin` and yield those type which are subclasses of `valid_subclasses`. """ if ty.get_origin(obj) in recurse_origin: for el in ty.get_args(obj): yield from _yield_types(el, valid_subclasses, recurse_origin) else: if inspect.isclass(obj) and issubclass(obj, valid_subclasses): yield obj class classproperty: # noqa N801 """Decorator for a class property In Python 3.9+ can be replaced by @classmethod @property def myprop(self): return 42 """ def __init__(self, fget): # type: ignore self.fget = fget def __get__(self, owner_self, owner_cls): # type: ignore return self.fget(owner_cls) # type: ignore class DelimiterInclude(enum.IntEnum): """Specifies how to deal with delimiters while parsing.""" #: Split at delimiter, not including in any string SPLIT = enum.auto() #: Split after, keeping the delimiter with previous string. SPLIT_AFTER = enum.auto() #: Split before, keeping the delimiter with next string. SPLIT_BEFORE = enum.auto() #: Do not split at delimiter. DO_NOT_SPLIT = enum.auto() class DelimiterAction(enum.IntEnum): """Specifies how to deal with delimiters while parsing.""" #: Continue parsing normally. CONTINUE = enum.auto() #: Capture everything til end of line as a whole. CAPTURE_NEXT_TIL_EOL = enum.auto() #: Stop parsing line and move to next. STOP_PARSING_LINE = enum.auto() #: Stop parsing content. STOP_PARSING = enum.auto() DO_NOT_SPLIT_EOL = { "\r\n": (DelimiterInclude.DO_NOT_SPLIT, DelimiterAction.CONTINUE), "\n": (DelimiterInclude.DO_NOT_SPLIT, DelimiterAction.CONTINUE), "\r": (DelimiterInclude.DO_NOT_SPLIT, DelimiterAction.CONTINUE), } SPLIT_EOL = { "\r\n": (DelimiterInclude.SPLIT, DelimiterAction.CONTINUE), "\n": (DelimiterInclude.SPLIT, DelimiterAction.CONTINUE), "\r": (DelimiterInclude.SPLIT, DelimiterAction.CONTINUE), } _EOLs_set = set(DO_NOT_SPLIT_EOL.keys()) @functools.lru_cache def _build_delimiter_pattern(delimiters: tuple[str, ...]) -> re.Pattern[str]: """Compile a tuple of delimiters into a regex expression with a capture group around the delimiter. """ return re.compile("|".join(f"({re.escape(el)})" for el in delimiters)) ############ # Iterators ############ DelimiterDictT = ty.Mapping[str, tuple[DelimiterInclude, DelimiterAction]] class Spliter: """Content iterator splitting according to given delimiters. The pattern can be changed dynamically sending a new pattern to the ty.Generator, see DelimiterInclude and DelimiterAction for more information. The current scanning position can be changed at any time. Parameters ---------- content : str delimiters : dict[str, tuple[DelimiterInclude, DelimiterAction]] Yields ------ start_line : int line number of the start of the content (zero-based numbering). start_col : int column number of the start of the content (zero-based numbering). end_line : int line number of the end of the content (zero-based numbering). end_col : int column number of the end of the content (zero-based numbering). part : str part of the text between delimiters. """ _pattern: Optional[re.Pattern[str]] _delimiters: DelimiterDictT __stop_searching_in_line: bool = False __pending: str = "" __first_line_col: Optional[tuple[int, int]] = None __lines: list[str] __lineno: int = 0 __colno: int = 0 def __init__(self, content: str, delimiters: DelimiterDictT): self.set_delimiters(delimiters) self.__lines = content.splitlines(keepends=True) def set_position(self, lineno: int, colno: int) -> None: self.__lineno, self.__colno = lineno, colno def set_delimiters(self, delimiters: DelimiterDictT) -> None: for k, v in delimiters.items(): if v == (DelimiterInclude.DO_NOT_SPLIT, DelimiterAction.STOP_PARSING): raise ValueError( f"The delimiter action for {k} is not a valid combination ({v})" ) # Build a pattern but removing eols _pat_dlm = tuple(set(delimiters.keys()) - _EOLs_set) if _pat_dlm: self._pattern = _build_delimiter_pattern(_pat_dlm) else: self._pattern = None # We add the end of line as delimiters if not present. self._delimiters = {**DO_NOT_SPLIT_EOL, **delimiters} def __iter__(self) -> Spliter: return self def __next__(self) -> tuple[int, int, int, int, str]: if self.__lineno >= len(self.__lines): raise StopIteration while True: if self.__stop_searching_in_line: # There must be part of a line pending to parse # due to stop line = self.__lines[self.__lineno] mo = None self.__stop_searching_in_line = False else: # We get the current line and the find the first delimiter. line = self.__lines[self.__lineno] if self._pattern is None: mo = None else: mo = self._pattern.search(line, self.__colno) if mo is None: # No delimiter was found, # which should happen at end of the content or end of line for k in DO_NOT_SPLIT_EOL.keys(): if line.endswith(k): dlm = line[-len(k) :] end_col, next_col = len(line) - len(k), 0 break else: # No EOL found, this is end of content dlm = None end_col, next_col = len(line), 0 next_line = self.__lineno + 1 else: next_line = self.__lineno end_col, next_col = mo.span() dlm = mo.group() part = line[self.__colno : end_col] if dlm is None: include, action = DelimiterInclude.SPLIT, DelimiterAction.STOP_PARSING else: include, action = self._delimiters[dlm] if include == DelimiterInclude.SPLIT: next_pending = "" else: # When dlm is None, DelimiterInclude.SPLIT assert isinstance(dlm, str) if include == DelimiterInclude.SPLIT_AFTER: end_col += len(dlm) part = part + dlm next_pending = "" elif include == DelimiterInclude.SPLIT_BEFORE: next_pending = dlm elif include == DelimiterInclude.DO_NOT_SPLIT: self.__pending += line[self.__colno : end_col] + dlm next_pending = "" else: raise ValueError(f"Unknown action {include}.") if action == DelimiterAction.STOP_PARSING: # this will raise a StopIteration in the next call. next_line = len(self.__lines) elif action == DelimiterAction.STOP_PARSING_LINE: next_line = self.__lineno + 1 next_col = 0 start_line = self.__lineno start_col = self.__colno end_line = self.__lineno self.__lineno = next_line self.__colno = next_col if action == DelimiterAction.CAPTURE_NEXT_TIL_EOL: self.__stop_searching_in_line = True if include == DelimiterInclude.DO_NOT_SPLIT: self.__first_line_col = start_line, start_col else: if self.__first_line_col is None: out = ( start_line, start_col - len(self.__pending), end_line, end_col, self.__pending + part, ) else: out = ( *self.__first_line_col, end_line, end_col, self.__pending + part, ) self.__first_line_col = None self.__pending = next_pending return out class StatementIterator: """Content peekable iterator splitting according to given delimiters. The pattern can be changed dynamically sending a new pattern to the ty.Generator, see DelimiterInclude and DelimiterAction for more information. Parameters ---------- content : str delimiters : dict[str, tuple[DelimiterInclude, DelimiterAction]] Yields ------ Statement """ _cache: ty.Deque[Statement] def __init__( self, content: str, delimiters: DelimiterDictT, strip_spaces: bool = True ): self._cache = collections.deque() self._spliter = Spliter(content, delimiters) self._strip_spaces = strip_spaces def __iter__(self): return self def set_delimiters(self, delimiters: DelimiterDictT) -> None: self._spliter.set_delimiters(delimiters) if self._cache: value = self.peek() # Elements are 1 based indexing, while splitter is 0 based. self._spliter.set_position(value.start_line - 1, value.start_col) self._cache.clear() def _get_next_strip(self) -> Statement: part = "" while not part: start_line, start_col, end_line, end_col, part = next(self._spliter) lo = len(part) part = part.lstrip() start_col += lo - len(part) lo = len(part) part = part.rstrip() end_col -= lo - len(part) return Statement.from_statement_iterator_element( (start_line + 1, start_col, end_line + 1, end_col, part) # type: ignore ) def _get_next(self) -> Statement: if self._strip_spaces: return self._get_next_strip() part = "" while not part: start_line, start_col, end_line, end_col, part = next(self._spliter) return Statement.from_statement_iterator_element( (start_line + 1, start_col, end_line + 1, end_col, part) # type: ignore ) def peek(self, default: Any = _SENTINEL) -> Statement: """Return the item that will be next returned from ``next()``. Return ``default`` if there are no items left. If ``default`` is not provided, raise ``StopIteration``. """ if not self._cache: try: self._cache.append(self._get_next()) except StopIteration: if default is _SENTINEL: raise return default return self._cache[0] def __next__(self) -> Statement: if self._cache: return self._cache.popleft() return self._get_next() ########### # Parsing ########### # Configuration type T = ty.TypeVar("T") CT = ty.TypeVar("CT") PST = ty.TypeVar("PST", bound="ParsedStatement[Any]") LineColStr: TypeAlias = tuple[int, int, str] ParsedResult: TypeAlias = Union[T, ParsingError] NullableParsedResult: TypeAlias = Union[T, ParsingError, None] class ConsumeProtocol(ty.Protocol): @property def is_position_set(self) -> bool: ... @property def start_line(self) -> int: ... @property def start_col(self) -> int: ... @property def end_line(self) -> int: ... @property def end_col(self) -> int: ... @classmethod def consume( cls, statement_iterator: StatementIterator, config: Any ) -> NullableParsedResult[Self]: ... @dataclass(frozen=True) class ParsedStatement(ty.Generic[CT], Statement): """A single parsed statement. In order to write your own, you need to subclass it as a frozen dataclass and implement the parsing logic by overriding `from_string` classmethod. Takes two arguments: the string to parse and an object given by the parser which can be used to store configuration information. It should return an instance of this class if parsing was successful or None otherwise """ @classmethod def from_string(cls, s: str) -> NullableParsedResult[Self]: """Parse a string into a ParsedStatement. Return files and their meaning: 1. None: the string cannot be parsed with this class. 2. A subclass of ParsedStatement: the string was parsed successfully 3. A subclass of ParsingError the string could be parsed with this class but there is an error. """ raise NotImplementedError( "ParsedStatement subclasses must implement " "'from_string' or 'from_string_and_config'" ) @classmethod def from_string_and_config(cls, s: str, config: CT) -> NullableParsedResult[Self]: """Parse a string into a ParsedStatement. Return files and their meaning: 1. None: the string cannot be parsed with this class. 2. A subclass of ParsedStatement: the string was parsed successfully 3. A subclass of ParsingError the string could be parsed with this class but there is an error. """ return cls.from_string(s) @classmethod def from_statement_and_config( cls, statement: Statement, config: CT ) -> NullableParsedResult[Self]: raw = statement.raw if raw is None: return None try: out = cls.from_string_and_config(raw, config) except Exception as ex: out = UnhandledParsingError(statement, ex) if out is None: return None out.set_position(*statement.get_position()) out.set_raw(raw) return out @classmethod def consume( cls, statement_iterator: StatementIterator, config: CT ) -> NullableParsedResult[Self]: """Peek into the iterator and try to parse. Return files and their meaning: 1. None: the string cannot be parsed with this class, the iterator is kept an the current place. 2. a subclass of ParsedStatement: the string was parsed successfully, advance the iterator. 3. a subclass of ParsingError: the string could be parsed with this class but there is an error, advance the iterator. """ statement = statement_iterator.peek() parsed_statement = cls.from_statement_and_config(statement, config) if parsed_statement is None: return None next(statement_iterator) return parsed_statement OPST = ty.TypeVar("OPST", bound="ParsedStatement[Any]") BPST = ty.TypeVar( "BPST", bound="Union[ParsedStatement[Any], Block[Any, Any, Any, Any]]" ) CPST = ty.TypeVar("CPST", bound="ParsedStatement[Any]") RBT = ty.TypeVar("RBT", bound="RootBlock[Any, Any]") @dataclass(frozen=True) class Block(ty.Generic[OPST, BPST, CPST, CT], GenericInfo): """A sequence of statements with an opening, body and closing.""" opening: ParsedResult[OPST] body: tuple[ParsedResult[BPST], ...] closing: Union[ParsedResult[CPST], EOS[CT]] delimiters: DelimiterDictT = dataclasses.field(default_factory=dict, init=False) def is_closed(self) -> bool: return not isinstance(self.closing, EOS) @property def is_position_set(self) -> bool: return self.opening.is_position_set @property def start_line(self) -> int: return self.opening.start_line @property def start_col(self) -> int: return self.opening.start_col @property def end_line(self) -> int: return self.closing.end_line @property def end_col(self) -> int: return self.closing.end_col def get_position(self) -> tuple[int, int, int, int]: return self.start_line, self.start_col, self.end_line, self.end_col @property def format_position(self) -> str: if not self.is_position_set: return "N/A" return "%d,%d-%d,%d" % self.get_position() def __iter__( self, ) -> ty.Generator[ ParsedResult[Union[OPST, BPST, Union[CPST, EOS[CT]]]], None, None ]: yield self.opening for el in self.body: if isinstance(el, Block): yield from el else: yield el yield self.closing def iter_blocks( self, ) -> ty.Generator[ParsedResult[Union[OPST, BPST, CPST]], None, None]: # raise RuntimeError("Is this used?") yield self.opening yield from self.body yield self.closing ################################################### # Convenience methods to iterate parsed statements ################################################### _ElementT = ty.TypeVar("_ElementT", bound=Statement) def filter_by( self, klass1: type[_ElementT], *klass: type[_ElementT] ) -> ty.Generator[_ElementT, None, None]: """Yield elements of a given class or classes.""" yield from (el for el in self if isinstance(el, (klass1,) + klass)) # type: ignore[misc] @cached_property def errors(self) -> tuple[ParsingError, ...]: """Tuple of errors found.""" return tuple(self.filter_by(ParsingError)) @property def has_errors(self) -> bool: """True if errors were found during parsing.""" return bool(self.errors) #################### # Statement classes #################### @classmethod def opening_classes(cls) -> ty.Generator[type[OPST], None, None]: """Classes representing any of the parsed statement that can open this block.""" try: opening = cls.specialization()[OPST] # type: ignore[misc] except KeyError: opening: type = ty.get_type_hints(cls)["opening"] # type: ignore[no-redef] yield from _yield_types(opening, ParsedStatement) # type: ignore @classmethod def body_classes(cls) -> ty.Generator[type[BPST], None, None]: """Classes representing any of the parsed statement that can be in the body.""" try: body = cls.specialization()[BPST] # type: ignore[misc] except KeyError: body: type = ty.get_type_hints(cls)["body"] # type: ignore[no-redef] yield from _yield_types(body, (ParsedStatement, Block)) # type: ignore @classmethod def closing_classes(cls) -> ty.Generator[type[CPST], None, None]: """Classes representing any of the parsed statement that can close this block.""" try: closing = cls.specialization()[CPST] # type: ignore[misc] except KeyError: closing: type = ty.get_type_hints(cls)["closing"] # type: ignore[no-redef] yield from _yield_types(closing, ParsedStatement) # type: ignore ########## # ParsedResult ########## @classmethod def consume_opening( cls, statement_iterator: StatementIterator, config: CT ) -> NullableParsedResult[OPST]: """Peek into the iterator and try to parse with any of the opening classes. See `ParsedStatement.consume` for more details. """ for c in cls.opening_classes(): el = c.consume(statement_iterator, config) if el is not None: return el return None @classmethod def consume_body( cls, statement_iterator: StatementIterator, config: CT ) -> ParsedResult[BPST]: """Peek into the iterator and try to parse with any of the body classes. If the statement cannot be parsed, a UnknownStatement is returned. """ for c in cls.body_classes(): el = c.consume(statement_iterator, config) if el is not None: return el parsing_error = UnknownStatement() parsing_error.statement = next(statement_iterator) return parsing_error @classmethod def consume_closing( cls, statement_iterator: StatementIterator, config: CT ) -> NullableParsedResult[CPST]: """Peek into the iterator and try to parse with any of the opening classes. See `ParsedStatement.consume` for more details. """ for c in cls.closing_classes(): el = c.consume(statement_iterator, config) if el is not None: return el return None @classmethod def consume_body_closing( cls, opening: OPST, statement_iterator: StatementIterator, config: CT ) -> Self: body: list[ParsedResult[BPST]] = [] closing: ty.Union[CPST, ParsingError, None] = None last_line = opening.end_line while closing is None: try: closing = cls.consume_closing(statement_iterator, config) if closing is not None: continue el = cls.consume_body(statement_iterator, config) body.append(el) last_line = el.end_line except StopIteration: unexpected_end = cls.on_stop_iteration(config) unexpected_end.set_position(last_line + 1, 0, last_line + 1, 0) return cls(opening, tuple(body), unexpected_end) return cls(opening, tuple(body), closing) @classmethod def consume( cls, statement_iterator: StatementIterator, config: CT ) -> Union[Self, None]: """Try consume the block. Possible outcomes: 1. The opening was not matched, return None. 2. A subclass of Block, where body and closing migh contain errors. """ opening = cls.consume_opening(statement_iterator, config) if opening is None: return None if isinstance(opening, ParsingError): return None return cls.consume_body_closing(opening, statement_iterator, config) @classmethod def on_stop_iteration(cls, config: CT) -> ParsedResult[EOS[CT]]: unexpected_eos = UnexpectedEOS() unexpected_eos.statement = Statement() return unexpected_eos @dataclass(frozen=True) class BOS(ty.Generic[CT], ParsedStatement[CT]): """Beginning of source.""" # Hasher algorithm name and hexdigest content_hash: Hash @classmethod def from_string_and_config(cls, s: str, config: CT) -> NullableParsedResult[Self]: raise RuntimeError("BOS cannot be constructed from_string_and_config") @property def location(self) -> SourceLocationT: return "" @dataclass(frozen=True) class BOF(ty.Generic[CT], BOS[CT]): """Beginning of file.""" path: pathlib.Path # Modification time of the file. mtime: float @property def location(self) -> SourceLocationT: return self.path @dataclass(frozen=True) class BOR(ty.Generic[CT], BOS[CT]): """Beginning of resource.""" package: str resource_name: str @property def location(self) -> SourceLocationT: return self.package, self.resource_name @dataclass(frozen=True) class EOS(ty.Generic[CT], ParsedStatement[CT]): """End of sequence.""" @classmethod def from_string_and_config( cls: type[PST], s: str, config: CT ) -> NullableParsedResult[PST]: return cls() class RootBlock(ty.Generic[BPST, CT], Block[BOS[CT], BPST, EOS[CT], CT]): """A sequence of statement flanked by the beginning and ending of stream.""" @classmethod def consume_opening( cls, statement_iterator: StatementIterator, config: CT ) -> NullableParsedResult[BOS[CT]]: raise RuntimeError( "Implementation error, 'RootBlock.consume_opening' should never be called" ) @classmethod def consume(cls, statement_iterator: StatementIterator, config: CT) -> Self: block = super().consume(statement_iterator, config) if block is None: raise RuntimeError( "Implementation error, 'RootBlock.consume' should never return None" ) return block @classmethod def consume_closing( cls, statement_iterator: StatementIterator, config: CT ) -> NullableParsedResult[EOS[CT]]: return None @classmethod def on_stop_iteration(cls, config: CT) -> ParsedResult[EOS[CT]]: return EOS[CT]() ################# # Source parsing ################# ResourceT: TypeAlias = tuple[str, str] # package name, resource name StrictLocationT: TypeAlias = Union[pathlib.Path, ResourceT] SourceLocationT: TypeAlias = Union[str, StrictLocationT] @dataclass(frozen=True) class ParsedSource(ty.Generic[RBT, CT]): parsed_source: RBT # Parser configuration. config: CT @property def location(self) -> SourceLocationT: if isinstance(self.parsed_source.opening, ParsingError): raise self.parsed_source.opening return self.parsed_source.opening.location @cached_property def has_errors(self) -> bool: return self.parsed_source.has_errors def errors(self) -> ty.Generator[ParsingError, None, None]: yield from self.parsed_source.errors @dataclass(frozen=True) class CannotParseResourceAsFile(Exception): """The requested python package resource cannot be located as a file in the file system. """ package: str resource_name: str class Parser(ty.Generic[RBT, CT], GenericInfo): """Parser class.""" #: class to iterate through statements in a source unit. _statement_iterator_class: type[StatementIterator] = StatementIterator #: Delimiters. _delimiters: DelimiterDictT = SPLIT_EOL _strip_spaces: bool = True #: source file text encoding. _encoding: str = "utf-8" #: configuration passed to from_string functions. _config: CT #: try to open resources as files. _prefer_resource_as_file: bool #: parser algorithm to us. Must be a callable member of hashlib _hasher: HasherAlgorithm = _DEFAULT_HASHER def __init__(self, config: CT, prefer_resource_as_file: bool = True): self._config = config self._prefer_resource_as_file = prefer_resource_as_file @classmethod def root_boot_class(cls) -> type[RBT]: """Class representing the root block class.""" try: return cls.specialization()[RBT] # type: ignore[misc] except KeyError: return ty.get_type_hints(cls)["root_boot_class"] # type: ignore[no-redef] def parse(self, source_location: SourceLocationT) -> ParsedSource[RBT, CT]: """Parse a file into a ParsedSourceFile or ParsedResource. Parameters ---------- source_location: if str or pathlib.Path is interpreted as a file. if (str, str) is interpreted as (package, resource) using the resource python api. """ if isinstance(source_location, tuple) and len(source_location) == 2: if self._prefer_resource_as_file: try: return self.parse_resource_from_file(*source_location) except CannotParseResourceAsFile: pass return self.parse_resource(*source_location) if isinstance(source_location, str): return self.parse_file(pathlib.Path(source_location)) if isinstance(source_location, pathlib.Path): # type: ignore I still want to test it. return self.parse_file(source_location) raise TypeError( f"Unknown type {type(source_location)}, " "use str or pathlib.Path for files or " "(package: str, resource_name: str) tuple " "for a resource." ) def parse_bytes( self, b: bytes, bos: Optional[BOS[CT]] = None ) -> ParsedSource[RBT, CT]: if bos is None: bos = BOS[CT](Hash.from_bytes(self._hasher, b)).set_simple_position(0, 0, 0) sic = self._statement_iterator_class( b.decode(self._encoding), self._delimiters, self._strip_spaces ) parsed = self.root_boot_class().consume_body_closing(bos, sic, self._config) return ParsedSource( parsed, self._config, ) def parse_file(self, path: pathlib.Path) -> ParsedSource[RBT, CT]: """Parse a file into a ParsedSourceFile. Parameters ---------- path path of the file. """ with path.open(mode="rb") as fi: content = fi.read() bos = BOF[CT]( Hash.from_bytes(self._hasher, content), path, path.stat().st_mtime ).set_simple_position(0, 0, 0) return self.parse_bytes(content, bos) def parse_resource_from_file( self, package: str, resource_name: str ) -> ParsedSource[RBT, CT]: """Parse a resource into a ParsedSourceFile, opening as a file. Parameters ---------- package package name where the resource is located. resource_name name of the resource """ with resources.as_file(resources.files(package).joinpath(resource_name)) as p: path = p.resolve() if path.exists(): return self.parse_file(path) raise CannotParseResourceAsFile(package, resource_name) def parse_resource(self, package: str, resource_name: str) -> ParsedSource[RBT, CT]: """Parse a resource into a ParsedResource. Parameters ---------- package package name where the resource is located. resource_name name of the resource """ with resources.files(package).joinpath(resource_name).open("rb") as fi: content = fi.read() bos = BOR[CT]( Hash.from_bytes(self._hasher, content), package, resource_name ).set_simple_position(0, 0, 0) return self.parse_bytes(content, bos) ########## # Project ########## class IncludeStatement(ty.Generic[CT], ParsedStatement[CT]): """ "Include statements allow to merge files.""" @property def target(self) -> str: raise NotImplementedError( "IncludeStatement subclasses must implement target property." ) class ParsedProject( ty.Generic[RBT, CT], dict[ Optional[tuple[StrictLocationT, str]], ParsedSource[RBT, CT], ], ): """Collection of files, independent or connected via IncludeStatement. Keys are either an absolute pathname or a tuple package name, resource name. None is the name of the root. """ @cached_property def has_errors(self) -> bool: return any(el.has_errors for el in self.values()) def errors(self) -> ty.Generator[ParsingError, None, None]: for el in self.values(): yield from el.errors() def _iter_statements( self, items: ty.Iterable[tuple[Any, Any]], seen: set[Any], include_only_once: bool, ) -> ty.Generator[ParsedStatement[CT], None, None]: """Iter all definitions in the order they appear, going into the included files. """ for source_location, parsed in items: seen.add(source_location) for parsed_statement in parsed.parsed_source: if isinstance(parsed_statement, IncludeStatement): location = parsed.location, parsed_statement.target if location in seen and include_only_once: raise ValueError(f"{location} was already included.") yield from self._iter_statements( ((location, self[location]),), seen, include_only_once ) else: yield parsed_statement def iter_statements( self, include_only_once: bool = True ) -> ty.Generator[ParsedStatement[CT], None, None]: """Iter all definitions in the order they appear, going into the included files. Parameters ---------- include_only_once if true, each file cannot be included more than once. """ yield from self._iter_statements([(None, self[None])], set(), include_only_once) def _iter_blocks( self, items: ty.Iterable[tuple[Any, Any]], seen: set[Any], include_only_once: bool, ) -> ty.Generator[ParsedStatement[CT], None, None]: """Iter all definitions in the order they appear, going into the included files. """ for source_location, parsed in items: seen.add(source_location) for parsed_statement in parsed.parsed_source.iter_blocks(): if isinstance(parsed_statement, IncludeStatement): location = parsed.location, parsed_statement.target if location in seen and include_only_once: raise ValueError(f"{location} was already included.") yield from self._iter_blocks( ((location, self[location]),), seen, include_only_once ) else: yield parsed_statement def iter_blocks( self, include_only_once: bool = True ) -> ty.Generator[ParsedStatement[CT], None, None]: """Iter all definitions in the order they appear, going into the included files. Parameters ---------- include_only_once if true, each file cannot be included more than once. """ yield from self._iter_blocks([(None, self[None])], set(), include_only_once) def default_locator(source_location: StrictLocationT, target: str) -> StrictLocationT: """Return a new location from current_location and target.""" if isinstance(source_location, pathlib.Path): current_location = pathlib.Path(source_location).resolve() if current_location.is_file(): current_path = current_location.parent else: current_path = current_location target_path = pathlib.Path(target) if target_path.is_absolute(): raise ValueError( f"Cannot refer to absolute paths in import statements ({source_location}, {target})." ) tmp = (current_path / target_path).resolve() if not tmp.is_relative_to(current_path): raise ValueError( f"Cannot refer to locations above the current location ({source_location}, {target})" ) return tmp.absolute() elif isinstance(source_location, tuple) and len(source_location) == 2: # type: ignore I still want to test it. return source_location[0], target raise TypeError( f"Cannot handle type {type(source_location)}, " "use str or pathlib.Path for files or " "(package: str, resource_name: str) tuple " "for a resource." ) @no_type_check def _build_root_block_class_parsed_statement( spec: type[ParsedStatement[CT]], config: type[CT] ) -> type[RootBlock[ParsedStatement[CT], CT]]: """Build root block class from a single ParsedStatement.""" @dataclass(frozen=True) class CustomRootBlockA(RootBlock[spec, config]): # type: ignore pass return CustomRootBlockA @no_type_check def _build_root_block_class_block( spec: type[Block[OPST, BPST, CPST, CT]], config: type[CT], ) -> type[RootBlock[Block[OPST, BPST, CPST, CT], CT]]: """Build root block class from a single ParsedStatement.""" @dataclass(frozen=True) class CustomRootBlockA(RootBlock[spec, config]): # type: ignore pass return CustomRootBlockA @no_type_check def _build_root_block_class_parsed_statement_it( spec: tuple[type[Union[ParsedStatement[CT], Block[OPST, BPST, CPST, CT]]]], config: type[CT], ) -> type[RootBlock[ParsedStatement[CT], CT]]: """Build root block class from iterable ParsedStatement.""" @dataclass(frozen=True) class CustomRootBlockA(RootBlock[Union[spec], config]): # type: ignore pass return CustomRootBlockA @no_type_check def _build_parser_class_root_block( spec: type[RootBlock[BPST, CT]], *, strip_spaces: bool = True, delimiters: Optional[DelimiterDictT] = None, ) -> type[Parser[RootBlock[BPST, CT], CT]]: class CustomParser(Parser[spec, spec.specialization()[CT]]): # type: ignore _delimiters: DelimiterDictT = delimiters or SPLIT_EOL # type: ignore _strip_spaces: bool = strip_spaces # type: ignore return CustomParser @no_type_check def build_parser_class( spec: Union[ type[ Union[ Parser[RBT, CT], RootBlock[BPST, CT], Block[OPST, BPST, CPST, CT], ParsedStatement[CT], ] ], ty.Iterable[type[ParsedStatement[CT]]], ], config: CT = None, strip_spaces: bool = True, delimiters: Optional[DelimiterDictT] = None, ) -> type[ Union[ Parser[RBT, CT], Parser[RootBlock[BPST, CT], CT], Parser[RootBlock[Block[OPST, BPST, CPST, CT], CT], CT], ] ]: """Build a custom parser class. Parameters ---------- spec RootBlock derived class. strip_spaces : bool if True, spaces will be stripped for each statement before calling ``from_string_and_config``. delimiters : dict Specify how the source file is split into statements (See below). Delimiters dictionary --------------------- The delimiters are specified with the keys of the delimiters dict. The dict files can be used to further customize the iterator. Each consist of a tuple of two elements: 1. A value of the DelimiterMode to indicate what to do with the delimiter string: skip it, attach keep it with previous or next string 2. A boolean indicating if parsing should stop after fiSBT encountering this delimiter. """ if isinstance(spec, type): if issubclass(spec, Parser): CustomParser = spec elif issubclass(spec, RootBlock): CustomParser = _build_parser_class_root_block( spec, strip_spaces=strip_spaces, delimiters=delimiters ) elif issubclass(spec, Block): CustomRootBlock = _build_root_block_class_block(spec, config.__class__) CustomParser = _build_parser_class_root_block( CustomRootBlock, strip_spaces=strip_spaces, delimiters=delimiters ) elif issubclass(spec, ParsedStatement): CustomRootBlock = _build_root_block_class_parsed_statement( spec, config.__class__ ) CustomParser = _build_parser_class_root_block( CustomRootBlock, strip_spaces=strip_spaces, delimiters=delimiters ) else: raise TypeError( "`spec` must be of type Parser, Block, RootBlock or tuple of type Block or ParsedStatement, " f"not {type(spec)}" ) elif isinstance(spec, (tuple, list)): CustomRootBlock = _build_root_block_class_parsed_statement_it( spec, config.__class__ ) CustomParser = _build_parser_class_root_block( CustomRootBlock, strip_spaces=strip_spaces, delimiters=delimiters ) else: raise return CustomParser @no_type_check def parse( entry_point: SourceLocationT, spec: Union[ type[ Union[ Parser[RBT, CT], RootBlock[BPST, CT], Block[OPST, BPST, CPST, CT], ParsedStatement[CT], ] ], ty.Iterable[type[ParsedStatement[CT]]], ], config: CT = None, *, strip_spaces: bool = True, delimiters: Optional[DelimiterDictT] = None, locator: ty.Callable[[SourceLocationT, str], StrictLocationT] = default_locator, prefer_resource_as_file: bool = True, **extra_parser_kwargs: Any, ) -> Union[ParsedProject[RBT, CT], ParsedProject[RootBlock[BPST, CT], CT]]: """Parse sources into a ParsedProject dictionary. Parameters ---------- entry_point file or resource, given as (package_name, resource_name). spec specification of the content to parse. Can be one of the following things: - Parser class. - Block or ParsedStatement derived class. - ty.Iterable of Block or ParsedStatement derived class. - RootBlock derived class. config a configuration object that will be passed to `from_string_and_config` classmethod. strip_spaces : bool if True, spaces will be stripped for each statement before calling ``from_string_and_config``. delimiters : dict Specify how the source file is split into statements (See below). locator : Callable function that takes the current location and a target of an IncludeStatement and returns a new location. prefer_resource_as_file : bool if True, resources will try to be located in the filesystem if available. extra_parser_kwargs extra keyword arguments to be given to the parser. Delimiters dictionary --------------------- The delimiters are specified with the keys of the delimiters dict. The dict files can be used to further customize the iterator. Each consist of a tuple of two elements: 1. A value of the DelimiterMode to indicate what to do with the delimiter string: skip it, attach keep it with previous or next string 2. A boolean indicating if parsing should stop after fiSBT encountering this delimiter. """ CustomParser = build_parser_class(spec, config, strip_spaces, delimiters) parser = CustomParser( config, prefer_resource_as_file=prefer_resource_as_file, **extra_parser_kwargs ) pp = ParsedProject() pending: list[tuple[SourceLocationT, str]] = [] if isinstance(entry_point, (str, pathlib.Path)): entry_point = pathlib.Path(entry_point) if not entry_point.is_absolute(): entry_point = pathlib.Path.cwd() / entry_point elif not (isinstance(entry_point, tuple) and len(entry_point) == 2): raise TypeError( f"Cannot handle type {type(entry_point)}, " "use str or pathlib.Path for files or " "(package: str, resource_name: str) tuple " "for a resource." ) pp[None] = parsed = parser.parse(entry_point) pending.extend( (parsed.location, el.target) for el in parsed.parsed_source.filter_by(IncludeStatement) ) while pending: source_location, target = pending.pop(0) pp[(source_location, target)] = parsed = parser.parse( locator(source_location, target) ) pending.extend( (parsed.location, el.target) for el in parsed.parsed_source.filter_by(IncludeStatement) ) return pp @no_type_check def parse_bytes( content: bytes, spec: Union[ type[ Union[ Parser[RBT, CT], RootBlock[BPST, CT], Block[OPST, BPST, CPST, CT], ParsedStatement[CT], ] ], ty.Iterable[type[ParsedStatement[CT]]], ], config: Optional[CT] = None, *, strip_spaces: bool, delimiters: Optional[DelimiterDictT], **extra_parser_kwargs: Any, ) -> ParsedProject[ Union[RBT, RootBlock[BPST, CT], RootBlock[ParsedStatement[CT], CT]], CT ]: """Parse sources into a ParsedProject dictionary. Parameters ---------- content bytes. spec specification of the content to parse. Can be one of the following things: - Parser class. - Block or ParsedStatement derived class. - ty.Iterable of Block or ParsedStatement derived class. - RootBlock derived class. config a configuration object that will be passed to `from_string_and_config` classmethod. strip_spaces : bool if True, spaces will be stripped for each statement before calling ``from_string_and_config``. delimiters : dict Specify how the source file is split into statements (See below). """ CustomParser = build_parser_class(spec, config, strip_spaces, delimiters) parser = CustomParser(config, prefer_resource_as_file=False, **extra_parser_kwargs) pp = ParsedProject() pp[None] = parsed = parser.parse_bytes(content) if any(parsed.parsed_source.filter_by(IncludeStatement)): raise ValueError("parse_bytes does not support using an IncludeStatement") return pp flexparser-0.4/flexparser/py.typed000066400000000000000000000000001471301744300173740ustar00rootroot00000000000000flexparser-0.4/flexparser/testsuite/000077500000000000000000000000001471301744300177405ustar00rootroot00000000000000flexparser-0.4/flexparser/testsuite/__init__.py000066400000000000000000000000001471301744300220370ustar00rootroot00000000000000flexparser-0.4/flexparser/testsuite/bla1.txt000066400000000000000000000000301471301744300213110ustar00rootroot00000000000000include bla2.txt # chau flexparser-0.4/flexparser/testsuite/bla2.txt000066400000000000000000000000151471301744300213150ustar00rootroot00000000000000# hola x=1.0 flexparser-0.4/flexparser/testsuite/common.py000066400000000000000000000045201471301744300216030ustar00rootroot00000000000000from __future__ import annotations import sys from dataclasses import dataclass from typing import Union if sys.version_info >= (3, 10): from typing import TypeAlias # noqa else: from typing_extensions import TypeAlias # noqa if sys.version_info >= (3, 11): from typing import Self # noqa else: from typing_extensions import Self # noqa from flexparser import flexparser as fp class NotAValidIdentifier(fp.ParsingError): value: str def __init__(self, value: str) -> None: self.value = value def custom_values_str(self) -> str: return f"value='{self.value}'" class CannotParseToFloat(fp.ParsingError): value: str def __init__(self, value: str) -> None: self.value = value def custom_values_str(self) -> str: return f"value='{self.value}'" @dataclass(frozen=True) class Open(fp.ParsedStatement[None]): @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[Self]: if s == "@begin": return cls() return None @dataclass(frozen=True) class Close(fp.ParsedStatement[None]): @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[Self]: if s == "@end": return cls() return None @dataclass(frozen=True) class Comment(fp.ParsedStatement[None]): s: str @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[Self]: if s.startswith("#"): return cls(s) return None @dataclass(frozen=True) class EqualFloat(fp.ParsedStatement[None]): a: str b: float @classmethod def from_string(cls, s: str) -> fp.NullableParsedResult[Self]: if "=" not in s: return None a, b = s.split("=") a = a.strip() b = b.strip() if not str.isidentifier(a): return NotAValidIdentifier(a) try: b = float(b) except Exception: return CannotParseToFloat(b) return cls(a, b) class MyBlock(fp.Block[Open, Union[Comment, EqualFloat], Close, None]): pass class MyRoot(fp.RootBlock[Union[Comment, EqualFloat], None]): pass class MyParser(fp.Parser[MyRoot, None]): pass class MyRootWithBlock(fp.RootBlock[Union[Comment, EqualFloat, MyBlock], None]): pass class MyParserWithBlock(fp.Parser[MyRootWithBlock, None]): pass flexparser-0.4/flexparser/testsuite/test_element_block.py000066400000000000000000000141521471301744300241570ustar00rootroot00000000000000from __future__ import annotations from typing import Union import pytest from flexparser import flexparser as fp from flexparser.testsuite.common import ( CannotParseToFloat, Close, Comment, EqualFloat, MyBlock, MyRoot, NotAValidIdentifier, Open, ) class MyBlock2(fp.Block[Open, Union[Comment, EqualFloat], Close, None]): pass class MyRoot2(fp.RootBlock[Union[Comment, EqualFloat], None]): pass FIRST_NUMBER = 1 def test_block_classes(): assert tuple(MyBlock.opening_classes()) == (Open,) assert tuple(MyBlock.body_classes()) == ( Comment, EqualFloat, ) assert tuple(MyBlock.closing_classes()) == (Close,) assert tuple(MyRoot.opening_classes()) == () assert tuple(MyRoot.body_classes()) == ( Comment, EqualFloat, ) assert tuple(MyRoot.closing_classes()) == () assert tuple(MyBlock2.opening_classes()) == (Open,) assert tuple(MyBlock2.body_classes()) == ( Comment, EqualFloat, ) assert tuple(MyBlock2.closing_classes()) == (Close,) assert tuple(MyRoot2.opening_classes()) == () assert tuple(MyRoot2.body_classes()) == ( Comment, EqualFloat, ) assert tuple(MyRoot2.closing_classes()) == () def test_formatting(): obj = EqualFloat.from_string("a = 3.1") assert obj is not None assert obj.format_position == "N/A" obj.set_simple_position(10, 3, 7) assert obj.format_position == "10,3-10,10" assert ( str(obj) == "EqualFloat(start_line=10, start_col=3, end_line=10, end_col=10, raw=None, a='a', b=3.1)" ) obj = EqualFloat.from_string("%a = 3.1") assert obj is not None assert obj.format_position == "N/A" obj.set_simple_position(10, 3, 8) assert obj.format_position == "10,3-10,11" def test_parse_equal_float(): assert EqualFloat.from_string("a = 3.1") == EqualFloat("a", 3.1) assert EqualFloat.from_string("a") is None assert EqualFloat.from_string("%a = 3.1") == NotAValidIdentifier("%a") assert EqualFloat.from_string("a = 3f1") == CannotParseToFloat("3f1") obj = EqualFloat.from_string("a = 3f1") assert ( str(obj) == "CannotParseToFloat(start_line=0, start_col=0, end_line=0, end_col=0, raw=None, value='3f1')" ) def test_consume_equal_float(): f = lambda s: fp.StatementIterator(s, fp.SPLIT_EOL) assert EqualFloat.consume(f("a = 3.1"), None) == EqualFloat("a", 3.1).set_position( 1, 0, 1, 7 ).set_raw("a = 3.1") assert EqualFloat.consume(f("a"), None) is None assert EqualFloat.consume(f("%a = 3.1"), None) == NotAValidIdentifier( "%a" ).set_position(1, 0, 1, 8).set_raw("%a = 3.1") assert EqualFloat.consume(f("a = 3f1"), None) == CannotParseToFloat( "3f1" ).set_position(1, 0, 1, 7).set_raw("a = 3f1") @pytest.mark.parametrize("klass", (MyRoot, MyRoot2)) def test_stream_block(klass): lines = "# hola\nx=1.0" si = fp.StatementIterator(lines, fp.SPLIT_EOL) mb = klass.consume_body_closing(fp.BOS(fp.Hash.nullhash()), si, None) assert isinstance(mb.opening, fp.BOS) assert isinstance(mb.closing, fp.EOS) body = tuple(mb.body) assert len(body) == 2 assert body == ( Comment("# hola") .set_position(FIRST_NUMBER + 0, 0, FIRST_NUMBER + 0, 6) .set_raw("# hola"), EqualFloat("x", 1.0) .set_position(FIRST_NUMBER + 1, 0, FIRST_NUMBER + 1, 5) .set_raw("x=1.0"), ) assert tuple(mb) == (mb.opening, *body, mb.closing) assert not mb.has_errors @pytest.mark.parametrize("klass", (MyRoot, MyRoot2)) def test_stream_block_error(klass): lines = "# hola\nx=1f0" si = fp.StatementIterator(lines, fp.SPLIT_EOL) mb = klass.consume_body_closing(fp.BOS(fp.Hash.nullhash()), si, None) assert isinstance(mb.opening, fp.BOS) assert isinstance(mb.closing, fp.EOS) body = tuple(mb.body) assert len(body) == 2 assert body == ( Comment("# hola").set_simple_position(FIRST_NUMBER + 0, 0, 6).set_raw("# hola"), CannotParseToFloat("1f0") .set_simple_position(FIRST_NUMBER + 1, 0, 5) .set_raw("x=1f0"), ) assert tuple(mb) == (mb.opening, *body, mb.closing) assert mb.has_errors assert mb.errors == ( CannotParseToFloat("1f0") .set_simple_position(FIRST_NUMBER + 1, 0, 5) .set_raw("x=1f0"), ) @pytest.mark.parametrize("klass", (MyBlock, MyBlock2)) def test_block(klass): lines = "@begin\n# hola\nx=1.0\n@end" si = fp.StatementIterator(lines, fp.SPLIT_EOL) mb = klass.consume(si, None) assert mb.opening == Open().set_simple_position(FIRST_NUMBER + 0, 0, 6).set_raw( "@begin" ) assert mb.closing == Close().set_simple_position(FIRST_NUMBER + 3, 0, 4).set_raw( "@end" ) body = tuple(mb.body) assert len(body) == 2 assert mb.body == ( Comment("# hola").set_simple_position(FIRST_NUMBER + 1, 0, 6).set_raw("# hola"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 2, 0, 5) .set_raw("x=1.0"), ) assert tuple(mb) == (mb.opening, *mb.body, mb.closing) assert not mb.has_errors @pytest.mark.parametrize("klass", (MyBlock, MyBlock2)) def test_unfinished_block(klass): lines = "@begin\n# hola\nx=1.0" si = fp.StatementIterator(lines, fp.SPLIT_EOL) mb = klass.consume(si, None) assert mb.opening == Open().set_simple_position(FIRST_NUMBER + 0, 0, 6).set_raw( "@begin" ) assert mb.closing == fp.UnexpectedEOS().set_simple_position(FIRST_NUMBER + 3, 0, 0) body = tuple(mb.body) assert len(body) == 2 assert mb.body == ( Comment("# hola").set_simple_position(FIRST_NUMBER + 1, 0, 6).set_raw("# hola"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 2, 0, 5) .set_raw("x=1.0"), ) assert tuple(mb) == (mb.opening, *mb.body, mb.closing) assert mb.has_errors def test_not_proper_statement(): class MySt(fp.ParsedStatement): pass with pytest.raises(NotImplementedError): MySt.from_string("a = 1") with pytest.raises(NotImplementedError): MySt.from_string_and_config("a = 1", None) flexparser-0.4/flexparser/testsuite/test_iterators.py000066400000000000000000000204111471301744300233630ustar00rootroot00000000000000from __future__ import annotations import pytest import flexparser.flexparser as fp @pytest.mark.parametrize( "delimiters,content,expected", [ # ### 0 ( {}, "Testing # 123", ((0, 0, 0, 13, "Testing # 123"),), ), # ### 1 ( {"#": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.STOP_PARSING)}, "Testing # 123", ((0, 0, 0, 8, "Testing "),), ), # ### 2 ( {"#": (fp.DelimiterInclude.SPLIT_AFTER, fp.DelimiterAction.STOP_PARSING)}, "Testing # 123", ((0, 0, 0, 9, "Testing #"),), ), # ### 3 ( {"#": (fp.DelimiterInclude.SPLIT_BEFORE, fp.DelimiterAction.STOP_PARSING)}, "Testing # 123", ((0, 0, 0, 8, "Testing "),), ), ], ) def test_split_single_line(delimiters, content, expected): out = tuple(fp.Spliter(content, delimiters)) assert out == expected @pytest.mark.parametrize( "delimiters,content,expected", [ # ### 0 ( {}, "Testing # 123\nCaption # 456", ((0, 0, 1, 13, "Testing # 123\nCaption # 456"),), ), # ### 1 ( {"#": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.STOP_PARSING)}, "Testing # 123\nCaption # 456", ((0, 0, 0, 8, "Testing "),), ), # ### 2 ( {"#": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.STOP_PARSING_LINE)}, "Testing # 123\nCaption # 456", ( (0, 0, 0, 8, "Testing "), (1, 0, 1, 8, "Caption "), ), ), # ### 3 ( {"#": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CAPTURE_NEXT_TIL_EOL)}, "Testing # 123\nCaption # 456", ( (0, 0, 0, 8, "Testing "), (0, 9, 1, 8, " 123\nCaption "), (1, 9, 1, 13, " 456"), ), ), # ### 4 ( { "#": ( fp.DelimiterInclude.SPLIT_BEFORE, fp.DelimiterAction.CAPTURE_NEXT_TIL_EOL, ), "\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), }, "Testing # 123\nCaption # 456", ( (0, 0, 0, 8, "Testing "), (0, 8, 0, 13, "# 123"), (1, 0, 1, 8, "Caption "), (1, 8, 1, 13, "# 456"), ), ), # ### 5 ( { "#": ( fp.DelimiterInclude.SPLIT_BEFORE, fp.DelimiterAction.CAPTURE_NEXT_TIL_EOL, ), "\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), }, "Testing ## 123\nCaption ## 456", ( (0, 0, 0, 8, "Testing "), (0, 8, 0, 14, "## 123"), (1, 0, 1, 8, "Caption "), (1, 8, 1, 14, "## 456"), ), ), # ### 6 ( { "#": (fp.DelimiterInclude.SPLIT_BEFORE, fp.DelimiterAction.CONTINUE), "\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), }, "Testing ## 123\nCaption ## 456", ( (0, 0, 0, 8, "Testing "), (0, 8, 0, 9, "#"), (0, 9, 0, 14, "# 123"), (1, 0, 1, 8, "Caption "), (1, 8, 1, 9, "#"), (1, 9, 1, 14, "# 456"), ), ), # ### 7 ( { "#": ( fp.DelimiterInclude.SPLIT_BEFORE, fp.DelimiterAction.CAPTURE_NEXT_TIL_EOL, ), "\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), }, "Testing ## 123\nCaption ## 456", ( (0, 0, 0, 8, "Testing "), (0, 8, 0, 14, "## 123"), (1, 0, 1, 8, "Caption "), (1, 8, 1, 14, "## 456"), ), ), ], ) def test_split_multi_line(delimiters, content, expected): out = tuple(fp.Spliter(content, delimiters)) assert out == expected def test_statement(): dlm = { "#": ( fp.DelimiterInclude.SPLIT_BEFORE, fp.DelimiterAction.CAPTURE_NEXT_TIL_EOL, ), "\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), } content = "Testing ## 123\nCaption ## 456" bi = fp.StatementIterator(content, dlm) assert bi.peek().raw_strip == "Testing" assert next(bi).raw_strip == "Testing" assert bi.peek().raw_strip == "## 123" assert next(bi).raw_strip == "## 123" el = next(bi) # strip spaces now changes the element # not the parser. assert el.raw == "Caption" assert el.raw_strip == "Caption" assert el.start_line == 2 assert el.start_col == 0 assert el.end_line == 2 assert el.end_col == 7 assert next(bi).raw_strip == "## 456" assert bi.peek("blip") == "blip" with pytest.raises(StopIteration): bi.peek() with pytest.raises(StopIteration): next(bi) def test_statement2(): dlm = { "#": ( fp.DelimiterInclude.SPLIT_BEFORE, fp.DelimiterAction.CAPTURE_NEXT_TIL_EOL, ), "\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), } content = "Testing ## 123\nCaption ## 456" bi = fp.StatementIterator(content, dlm) assert bi.peek().raw_strip == "Testing" assert next(bi).raw_strip == "Testing" assert bi.peek().raw_strip == "## 123" assert next(bi).raw_strip == "## 123" el = next(bi) # strip spaces now changes the element # not the parser. assert el.raw == "Caption" assert el.raw_strip == "Caption" assert el.start_line == 2 assert el.start_col == 0 assert el.end_line == 2 assert el.end_col == 7 assert next(bi).raw_strip == "## 456" assert bi.peek("blip") == "blip" with pytest.raises(StopIteration): bi.peek() with pytest.raises(StopIteration): next(bi) def test_statement_change_dlm(): dlm = { "#": ( fp.DelimiterInclude.SPLIT_BEFORE, fp.DelimiterAction.CAPTURE_NEXT_TIL_EOL, ), "\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), } dlm_new = { "!": ( fp.DelimiterInclude.SPLIT_BEFORE, fp.DelimiterAction.CAPTURE_NEXT_TIL_EOL, ), "\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), "\r\n": (fp.DelimiterInclude.SPLIT, fp.DelimiterAction.CONTINUE), } content = "Testing ## 123\nCaption !! 456" bi = fp.StatementIterator(content, dlm) assert bi.peek().raw_strip == "Testing" assert next(bi).raw_strip == "Testing" assert bi.peek().raw_strip == "## 123" assert next(bi).raw_strip == "## 123" assert bi.peek().raw_strip == "Caption !! 456" bi.set_delimiters(dlm_new) assert bi.peek().raw_strip == "Caption" assert next(bi).raw_strip == "Caption" assert next(bi).raw_strip == "!! 456" flexparser-0.4/flexparser/testsuite/test_parser.py000066400000000000000000000117161471301744300226530ustar00rootroot00000000000000from __future__ import annotations import pytest from flexparser import flexparser as fp from flexparser.testsuite.common import ( Comment, EqualFloat, MyBlock, MyParser, MyParserWithBlock, ) FIRST_NUMBER = 1 @pytest.mark.parametrize( "content", (b"# hola\nx<>1.0", b"# hola\r\nx<>1.0", b"# hola\rx<>1.0") ) def test_consume_err(content): myparser = MyParser(None) pf = myparser.parse_bytes(content).parsed_source assert isinstance(pf.opening, fp.BOS) assert isinstance(pf.closing, fp.EOS) body = tuple(pf.body) assert len(body) == 2 assert body == ( Comment("# hola").set_simple_position(FIRST_NUMBER + 0, 0, 6).set_raw("# hola"), fp.UnknownStatement() .set_simple_position(FIRST_NUMBER + 1, 0, 6) .set_raw("x<>1.0"), ) assert tuple(pf) == (pf.opening, *body, pf.closing) assert pf.has_errors assert str(body[-1]) == "Could not parse 'x<>1.0' (2,0-2,6)" @pytest.mark.parametrize( "content", (b"# hola\nx=1.0", b"# hola\r\nx=1.0", b"# hola\rx=1.0") ) def test_consume(content): myparser = MyParser(None) pf = myparser.parse_bytes(content).parsed_source assert pf.start_line == 0 assert pf.start_col == 0 assert pf.end_line == 3 assert pf.end_col == 0 assert pf.format_position == "0,0-3,0" assert isinstance(pf.opening, fp.BOS) assert isinstance(pf.closing, fp.EOS) body = tuple(pf.body) assert len(body) == 2 assert body == ( Comment("# hola").set_simple_position(FIRST_NUMBER + 0, 0, 6).set_raw("# hola"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 1, 0, 5) .set_raw("x=1.0"), ) assert tuple(pf) == (pf.opening, *body, pf.closing) assert not pf.has_errors @pytest.mark.parametrize("use_string", (True, False)) def test_parse(tmp_path, use_string): content = "# hola\nx=1.0" tmp_file = tmp_path / "bla.txt" tmp_file.write_text(content) myparser = MyParser(None) if use_string: psf = myparser.parse(str(tmp_file)) else: psf = myparser.parse(tmp_file) assert not psf.has_errors assert psf.config is None assert psf.parsed_source.opening.mtime == tmp_file.stat().st_mtime assert psf.parsed_source.opening.path == tmp_file assert tuple(psf.errors()) == () assert psf.location == tmp_file # TODO: # assert psf.content_hash == hashlib.sha1(content.encode("utf-8")).hexdigest() mb = psf.parsed_source assert isinstance(mb.opening, fp.BOS) assert isinstance(mb.closing, fp.EOS) body = tuple(mb.body) assert len(body) == 2 assert body == ( Comment("# hola").set_simple_position(FIRST_NUMBER + 0, 0, 6).set_raw("# hola"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 1, 0, 5) .set_raw("x=1.0"), ) assert tuple(mb) == (mb.opening, *body, mb.closing) assert not mb.has_errors def test_unfinished_block(tmp_path): content = "@begin\n# hola\nx=1.0" tmp_file = tmp_path / "bla.txt" tmp_file.write_text(content) myparser = MyParserWithBlock(None) psf = myparser.parse(tmp_file) assert psf.has_errors assert psf.config is None assert psf.parsed_source.opening.mtime == tmp_file.stat().st_mtime assert psf.parsed_source.opening.path == tmp_file assert tuple(psf.errors()) == ( fp.UnexpectedEOS().set_simple_position(FIRST_NUMBER + 3, 0, 0), ) assert psf.location == tmp_file mb = psf.parsed_source assert isinstance(mb.opening, fp.BOS) assert isinstance(mb.closing, fp.EOS) body = tuple(mb.body) assert len(body) == 1 assert isinstance(body[0], MyBlock) assert body[0].closing == fp.UnexpectedEOS().set_simple_position( FIRST_NUMBER + 3, 0, 0 ) assert body[0].body == ( Comment("# hola").set_simple_position(FIRST_NUMBER + 1, 0, 6).set_raw("# hola"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 2, 0, 5) .set_raw("x=1.0"), ) @pytest.mark.parametrize("use_resource", (True, False)) def test_parse_resource(use_resource): myparser = MyParser(None) location = ("flexparser.testsuite", "bla2.txt") if use_resource: psf = myparser.parse_resource(*location) else: psf = myparser.parse(location) assert not psf.has_errors assert psf.config is None assert tuple(psf.errors()) == () if use_resource: assert psf.location == location # TODO: # assert psf.content_hash == hashlib.sha1(content.encode("utf-8")).hexdigest() mb = psf.parsed_source assert isinstance(mb.opening, fp.BOS) assert isinstance(mb.closing, fp.EOS) body = tuple(mb.body) assert len(body) == 2 assert body == ( Comment("# hola").set_simple_position(FIRST_NUMBER + 0, 0, 6).set_raw("# hola"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 1, 0, 5) .set_raw("x=1.0"), ) assert tuple(mb) == (mb.opening, *body, mb.closing) assert not mb.has_errors flexparser-0.4/flexparser/testsuite/test_project.py000066400000000000000000000231521471301744300230220ustar00rootroot00000000000000from __future__ import annotations import hashlib import pathlib from dataclasses import dataclass import pytest from flexparser import flexparser as fp from flexparser.testsuite.common import ( Close, Comment, EqualFloat, MyBlock, MyParser, MyRoot, Open, ) FIRST_NUMBER = 1 def _bosser(content: bytes): return fp.BOS(fp.Hash.from_bytes(hashlib.blake2b, content)) def _compare(arr1, arr2): assert len(arr1) == len(arr2) for a1, a2 in zip(arr1, arr2): if isinstance(a1, fp.BOS) and isinstance(a2, fp.BOS): assert a1.content_hash == a2.content_hash else: assert a1 == a2, str(a1) + " == " + str(a2) def test_locator(): this_file = pathlib.Path(__file__) with pytest.raises(ValueError): # Cannot use absolute path as target. assert fp.default_locator(this_file, "/temp/bla.txt") with pytest.raises(TypeError): assert fp.default_locator(str(this_file), "bla.txt") with pytest.raises(TypeError): assert fp.default_locator(str(this_file), "/temp/bla.txt") assert fp.default_locator(this_file, "bla.txt") == this_file.parent / "bla.txt" assert ( fp.default_locator(this_file.parent, "bla.txt") == this_file.parent / "bla.txt" ) with pytest.raises(ValueError): assert ( fp.default_locator(this_file.parent, "../bla.txt") == this_file.parent / "bla.txt" ) assert fp.default_locator(("pack", "nam"), "bla") == ("pack", "bla") @pytest.mark.parametrize("definition", [MyRoot, (Comment, EqualFloat), MyParser]) def test_parse1(tmp_path, definition): content = b"# hola\nx=1.0" tmp_file = tmp_path / "bla.txt" tmp_file.write_bytes(content) pp = fp.parse(tmp_file, definition) assert len(pp) == 1 psf = pp[list(pp.keys())[0]] assert not psf.has_errors assert psf.config is None assert psf.parsed_source.opening.mtime == tmp_file.stat().st_mtime assert psf.parsed_source.opening.path == tmp_file assert tuple(psf.errors()) == () assert psf.location == tmp_file # TODO: # assert psf.content_hash == hashlib.sha1(content.encode("utf-8")).hexdigest() mb = psf.parsed_source assert isinstance(mb.opening, fp.BOS) assert isinstance(mb.closing, fp.EOS) body = tuple(mb.body) assert len(body) == 2 assert body == ( Comment("# hola").set_simple_position(FIRST_NUMBER + 0, 0, 6).set_raw("# hola"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 1, 0, 5) .set_raw("x=1.0"), ) assert tuple(mb) == (mb.opening, *body, mb.closing) assert not mb.has_errors _compare( tuple(pp.iter_statements()), ( _bosser(content).set_simple_position(FIRST_NUMBER + 0, 0, 0), Comment("# hola") .set_simple_position(FIRST_NUMBER + 0, 0, 6) .set_raw("# hola"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 1, 0, 5) .set_raw("x=1.0"), fp.EOS().set_simple_position(FIRST_NUMBER + 2, 0, 0), ), ) @pytest.mark.parametrize("definition", [MyRoot, EqualFloat, MyParser]) def test_parse2(tmp_path, definition): content = b"y = 2.0\nx=1.0" tmp_file = tmp_path / "bla.txt" tmp_file.write_bytes(content) pp = fp.parse(tmp_file, definition) assert len(pp) == 1 assert None in pp psf = pp[None] assert not psf.has_errors assert psf.config is None assert psf.parsed_source.opening.mtime == tmp_file.stat().st_mtime assert psf.parsed_source.opening.path == tmp_file assert tuple(psf.errors()) == () assert psf.location == tmp_file # TODO: # assert psf.content_hash == hashlib.sha1(content.encode("utf-8")).hexdigest() mb = psf.parsed_source assert isinstance(mb.opening, fp.BOS) assert isinstance(mb.closing, fp.EOS) body = tuple(mb.body) assert len(body) == 2 assert body == ( EqualFloat("y", 2.0) .set_simple_position(FIRST_NUMBER + 0, 0, 7) .set_raw("y = 2.0"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 1, 0, 5) .set_raw("x=1.0"), ) assert tuple(mb) == (mb.opening, *body, mb.closing) assert not mb.has_errors _compare( tuple(pp.iter_statements()), ( _bosser(content).set_simple_position(FIRST_NUMBER + 0, 0, 0), EqualFloat("y", 2.0) .set_simple_position(FIRST_NUMBER + 0, 0, 7) .set_raw("y = 2.0"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 1, 0, 5) .set_raw("x=1.0"), fp.EOS().set_simple_position(FIRST_NUMBER + 2, 0, 0), ), ) @pytest.mark.parametrize( "definition", [ MyBlock, ], ) def test_parse3(tmp_path, definition): content = b"@begin\ny = 2.0\nx=1.0\n@end" tmp_file = tmp_path / "bla.txt" tmp_file.write_bytes(content) pp = fp.parse(tmp_file, definition) assert not pp.has_errors assert len(pp) == 1 assert tuple(pp.errors()) == () assert None in pp psf = pp[None] assert not psf.has_errors assert psf.config is None assert psf.parsed_source.opening.mtime == tmp_file.stat().st_mtime assert psf.parsed_source.opening.path == tmp_file assert tuple(psf.errors()) == () assert psf.location == tmp_file # TODO: # assert psf.content_hash == hashlib.sha1(content.encode("utf-8")).hexdigest() mb = psf.parsed_source assert isinstance(mb.opening, fp.BOS) assert isinstance(mb.closing, fp.EOS) body = tuple(mb.body) assert len(body) == 1 mb = body[0] assert mb.start_line == 1 assert mb.start_col == 0 assert mb.end_line == 4 assert mb.end_col == 4 assert mb.format_position == "1,0-4,4" assert mb.opening == Open().set_simple_position(FIRST_NUMBER + 0, 0, 6).set_raw( "@begin" ) assert tuple(mb.body) == ( EqualFloat("y", 2.0) .set_simple_position(FIRST_NUMBER + 1, 0, 7) .set_raw("y = 2.0"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 2, 0, 5) .set_raw("x=1.0"), ) assert mb.closing == Close().set_simple_position(FIRST_NUMBER + 3, 0, 4).set_raw( "@end" ) assert not mb.has_errors _compare( tuple(pp.iter_statements()), ( _bosser(content).set_simple_position(FIRST_NUMBER + 0, 0, 0), Open().set_simple_position(FIRST_NUMBER + 0, 0, 6).set_raw("@begin"), EqualFloat("y", 2.0) .set_simple_position(FIRST_NUMBER + 1, 0, 7) .set_raw("y = 2.0"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 2, 0, 5) .set_raw("x=1.0"), Close().set_simple_position(FIRST_NUMBER + 3, 0, 4).set_raw("@end"), fp.EOS().set_simple_position(FIRST_NUMBER + 4, 0, 0), ), ) def test_include_file(tmp_path): @dataclass(frozen=True) class Include(fp.IncludeStatement[None]): value: str @property def target(self) -> str: return "bla2.txt" @classmethod def from_string(cls, s: str): if s.startswith("include"): return cls(s[len("include ") :].strip()) content1 = b"include bla2.txt\n# chau" content2 = b"# hola\nx=1.0" tmp_file1 = tmp_path / "bla1.txt" tmp_file2 = tmp_path / "bla2.txt" tmp_file1.write_bytes(content1) tmp_file2.write_bytes(content2) pp = fp.parse(tmp_file1, (Include, Comment, EqualFloat), None) assert None in pp assert (tmp_file1, "bla2.txt") in pp assert len(pp) == 2 _compare( tuple(pp.iter_statements()), ( _bosser(content1).set_simple_position(FIRST_NUMBER + 0, 0, 0), # Include _bosser(content2).set_simple_position(FIRST_NUMBER + 0, 0, 6), Comment("# hola") .set_simple_position(FIRST_NUMBER + 0, 0, 6) .set_raw("# hola"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 1, 0, 5) .set_raw("x=1.0"), fp.EOS().set_simple_position(FIRST_NUMBER + 2, 0, 0), Comment("# chau") .set_simple_position(FIRST_NUMBER + 1, 0, 6) .set_raw("# chau"), fp.EOS().set_simple_position(FIRST_NUMBER + 2, 0, 0), ), ) def test_resources(tmp_path): @dataclass(frozen=True) class Include(fp.IncludeStatement[None]): value: str @property def target(self) -> str: return "bla2.txt" @classmethod def from_string(cls, s: str): if s.startswith("include"): return cls(s[len("include ") :].strip()) # see files included in the textsuite. content1 = b"include bla2.txt\n# chau\n" content2 = b"# hola\nx=1.0\n" pp = fp.parse(("flexparser.testsuite", "bla1.txt"), (Include, Comment, EqualFloat)) assert len(pp) == 2 _compare( tuple(pp.iter_statements()), ( _bosser(content1).set_simple_position(FIRST_NUMBER + 0, 0, 0), # include _bosser(content2).set_simple_position(FIRST_NUMBER + 0, 0, 0), Comment("# hola") .set_simple_position(FIRST_NUMBER + 0, 0, 6) .set_raw("# hola"), EqualFloat("x", 1.0) .set_simple_position(FIRST_NUMBER + 1, 0, 5) .set_raw("x=1.0"), fp.EOS().set_simple_position(FIRST_NUMBER + 2, 0, 0), Comment("# chau") .set_simple_position(FIRST_NUMBER + 1, 0, 6) .set_raw("# chau"), fp.EOS().set_simple_position(FIRST_NUMBER + 2, 0, 0), ), ) flexparser-0.4/flexparser/testsuite/test_util.py000066400000000000000000000032221471301744300223250ustar00rootroot00000000000000from __future__ import annotations import hashlib import typing import pytest import flexparser.flexparser as fp from flexparser.flexparser import _HASH_ALGORITHMS def test_yield_types(): class X: pass assert tuple(fp._yield_types(float)) == (float,) assert tuple(fp._yield_types(X)) == (X,) assert tuple(fp._yield_types(X())) == () def test_yield_types_container(): class X: pass o = tuple[float, X] assert tuple(fp._yield_types(o)) == (float, X) o = tuple[float, ...] assert tuple(fp._yield_types(o)) == (float,) o = tuple[typing.Union[float, X], ...] assert tuple(fp._yield_types(o)) == (float, X) def test_yield_types_union(): class X: pass o = typing.Union[float, X] assert tuple(fp._yield_types(o)) == (float, X) def test_yield_types_list(): o = list[float] assert tuple(fp._yield_types(o)) == (float,) def test_hash_object(): content = b"spam \n ham" hasher = hashlib.sha1 ho = fp.Hash.from_bytes(hashlib.sha1, content) hd = hasher(content).hexdigest() assert ho.algorithm_name == "sha1" assert ho.hexdigest == hd assert ho != hd assert ho != fp.Hash.from_bytes(hashlib.md5, content) assert ho == fp.Hash.from_bytes(hashlib.sha1, content) @pytest.mark.parametrize("algo_name", _HASH_ALGORITHMS) def test_hash_items(algo_name: str): content = b"spam \n ham" hasher = getattr(hashlib, algo_name) ho = fp.Hash.from_bytes(hasher, content) hd = hasher(content).hexdigest() assert ho.algorithm_name == algo_name assert ho.hexdigest == hd assert ho != hd assert ho == fp.Hash.from_bytes(hasher, content) flexparser-0.4/pyproject.toml000066400000000000000000000037441471301744300164600ustar00rootroot00000000000000[project] name = "flexparser" authors = [ {name="Hernan E. Grecco", email="hernan.grecco@gmail.com"} ] license = {text = "BSD-3-Clause"} description = "Parsing made fun ... using typing." readme = "README.rst" maintainers = [ {name="Hernan E. Grecco", email="hernan.grecco@gmail.com"}, ] keywords = ["parser", "code", "parsing", "source"] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: BSD License", "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX", "Programming Language :: Python", "Topic :: System :: Filesystems", "Topic :: Software Development :: Libraries", "Topic :: Utilities", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] requires-python = ">=3.9" dynamic = ["dependencies", "optional-dependencies", "version"] [tool.setuptools.package-data] flexparser = ["py.typed"] [project.urls] Homepage = "https://github.com/hgrecco/flexparser" [tool.setuptools] packages = ["flexparser"] [tool.setuptools.dynamic] dependencies = {file = "requirements.txt"} optional-dependencies.test = {file = "requirements.test.txt"} [build-system] requires = ["setuptools>=61", "wheel", "setuptools_scm[toml]>=3.4.3"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] [tool.ruff.lint.isort] required-imports = ["from __future__ import annotations"] known-first-party= ["flexparser"] [tool.ruff.lint] extend-select = [ "I", # isort ] ignore = [ # whitespace before ':' - doesn't work well with black # "E203", "E402", # line too long - let black worry about that "E501", # do not assign a lambda expression, use a def "E731", # line break before binary operator # "W503" ] [tool.ruff] extend-exclude = ["build"] line-length=88 flexparser-0.4/requirements.test.txt000066400000000000000000000000551471301744300177760ustar00rootroot00000000000000pytest pytest-mpl pytest-cov pytest-subtests flexparser-0.4/requirements.txt000066400000000000000000000000221471301744300170120ustar00rootroot00000000000000typing_extensions