pax_global_header00006660000000000000000000000064146567340720014530gustar00rootroot0000000000000052 comment=acfe33d943a1310f3ca26145eb2896bc5c4955c9 lark-1.2.2/000077500000000000000000000000001465673407200124635ustar00rootroot00000000000000lark-1.2.2/.github/000077500000000000000000000000001465673407200140235ustar00rootroot00000000000000lark-1.2.2/.github/FUNDING.yml000066400000000000000000000012051465673407200156360ustar00rootroot00000000000000# These are supported funding model platforms github: lark-parser patreon: # Replace with a single Patreon username open_collective: # Replace with a single Open Collective username ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username otechie: # Replace with a single Otechie username custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] lark-1.2.2/.github/ISSUE_TEMPLATE/000077500000000000000000000000001465673407200162065ustar00rootroot00000000000000lark-1.2.2/.github/ISSUE_TEMPLATE/bug_report.md000066400000000000000000000005541465673407200207040ustar00rootroot00000000000000--- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Describe the bug** A clear and concise description of what the bug is, and what you expected to happen. **To Reproduce** Provide a short script that reproduces the erroneous behavior. If that is impossible, provide clear steps to reproduce the behavior. lark-1.2.2/.github/ISSUE_TEMPLATE/feature_request.md000066400000000000000000000007141465673407200217350ustar00rootroot00000000000000--- name: Feature request about: Suggest an idea for this project title: '' labels: enhancement assignees: '' --- **Suggestion** Provide a clear and concise description of what the problem is, and what you would like to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. lark-1.2.2/.github/ISSUE_TEMPLATE/other.md000066400000000000000000000001611465673407200176470ustar00rootroot00000000000000--- name: Other about: For any discussion that doesn't fit the templates title: '' labels: '' assignees: '' --- lark-1.2.2/.github/ISSUE_TEMPLATE/question.md000066400000000000000000000005471465673407200204050ustar00rootroot00000000000000--- name: Question about: Ask a question about Lark or request help title: '' labels: question assignees: '' --- **What is your question?** Try to be accurate and concise. **If you're having trouble with your code or grammar** Provide a small script that encapsulates your issue. Explain what you're trying to do, and what is obstructing your progress. lark-1.2.2/.github/workflows/000077500000000000000000000000001465673407200160605ustar00rootroot00000000000000lark-1.2.2/.github/workflows/codecov.yml000066400000000000000000000021751465673407200202320ustar00rootroot00000000000000name: Compute coverage and push to Codecov on: [push] jobs: run: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] env: OS: ${{ matrix.os }} PYTHON: '3.8' steps: - uses: actions/checkout@v3 name: Download with submodules with: submodules: recursive - name: Setup Python uses: actions/setup-python@v3 with: python-version: "3.8" - name: Install dependencies run: | python -m pip install --upgrade pip pip install -r test-requirements.txt - name: Generate coverage report run: | pip install pytest pip install pytest-cov pytest --cov=./ --cov-report=xml - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 with: token: ${{ secrets.CODECOV_TOKEN }} files: ./coverage.xml flags: unittests env_vars: OS,PYTHON name: codecov-umbrella fail_ci_if_error: false path_to_write_report: ./coverage/codecov_report.txt verbose: true lark-1.2.2/.github/workflows/mypy.yml000066400000000000000000000006431465673407200176040ustar00rootroot00000000000000name: Python type check on: [push, pull_request] jobs: type: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: submodules: recursive - name: Lint with mypy run: pipx run tox -e type pre-commit: name: Format runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v3 - uses: pre-commit/action@v2.0.3 lark-1.2.2/.github/workflows/tests.yml000066400000000000000000000012241465673407200177440ustar00rootroot00000000000000name: Tests on: [push, pull_request] jobs: build: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev", "pypy-3.10"] steps: - uses: actions/checkout@v3 with: submodules: recursive - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install -r test-requirements.txt - name: Run tests run: | python -m tests lark-1.2.2/.gitignore000066400000000000000000000002051465673407200144500ustar00rootroot00000000000000*.pyc *.pyo /.tox /lark_parser.egg-info/** tags .vscode .idea .ropeproject .cache .mypy_cache /dist /build docs/_build docs/examples lark-1.2.2/.gitmodules000066400000000000000000000001721465673407200146400ustar00rootroot00000000000000[submodule "tests/test_nearley/nearley"] path = tests/test_nearley/nearley url = https://github.com/Hardmath123/nearley lark-1.2.2/.pre-commit-config.yaml000066400000000000000000000015201465673407200167420ustar00rootroot00000000000000# To use: # # pre-commit run -a # # Or: # # pre-commit install # (runs every time you commit in git) # # To update this file: # # pre-commit autoupdate # # See https://github.com/pre-commit/pre-commit repos: # Standard hooks - repo: https://github.com/pre-commit/pre-commit-hooks rev: "v4.4.0" hooks: - id: check-added-large-files - id: check-case-conflict - id: check-merge-conflict - id: check-symlinks - id: check-toml - id: check-yaml - id: debug-statements - id: end-of-file-fixer exclude: '(^tests/.*\.lark|\.svg)$' - id: mixed-line-ending - id: requirements-txt-fixer - id: trailing-whitespace exclude: '(^tests/.*\.lark|\.svg)$' - repo: https://github.com/codespell-project/codespell rev: v2.2.2 hooks: - id: codespell args: ["-L", "nd,iif,ot,datas"] lark-1.2.2/CHANGELOG.md000066400000000000000000000007541465673407200143020ustar00rootroot00000000000000v1.0 - `maybe_placeholders` is now True by default - Renamed TraditionalLexer to BasicLexer, and 'standard' lexer option to 'basic' - Default priority is now 0, for both terminals and rules (used to be 1 for terminals) - Discard mechanism is now done by returning Discard, instead of raising it as an exception. - `use_accepts` in `UnexpectedInput.match_examples()` is now True by default - `v_args(meta=True)` now gives meta as the first argument. i.e. `(meta, children)` lark-1.2.2/LICENSE000066400000000000000000000020371465673407200134720ustar00rootroot00000000000000Copyright © 2017 Erez Shinan Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. lark-1.2.2/MANIFEST.in000066400000000000000000000002551465673407200142230ustar00rootroot00000000000000include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.lark tests/*.py tests/*.lark tests/grammars/* tests/test_nearley/*.py tests/test_nearley/grammars/* lark-1.2.2/README.md000066400000000000000000000222421465673407200137440ustar00rootroot00000000000000# Lark - a parsing toolkit for Python Lark is a parsing toolkit for Python, built with a focus on ergonomics, performance and modularity. Lark can parse all context-free languages. To put it simply, it means that it is capable of parsing almost any programming language out there, and to some degree most natural languages too. **Who is it for?** - **Beginners**: Lark is very friendly for experimentation. It can parse any grammar you throw at it, no matter how complicated or ambiguous, and do so efficiently. It also constructs an annotated parse-tree for you, using only the grammar and an input, and it gives you convenient and flexible tools to process that parse-tree. - **Experts**: Lark implements both Earley(SPPF) and LALR(1), and several different lexers, so you can trade-off power and speed, according to your requirements. It also provides a variety of sophisticated features and utilities. **What can it do?** - Parse all context-free grammars, and handle any ambiguity gracefully - Build an annotated parse-tree automagically, no construction code required. - Provide first-rate performance in terms of both Big-O complexity and measured run-time (considering that this is Python ;) - Run on every Python interpreter (it's pure-python) - Generate a stand-alone parser (for LALR(1) grammars) And many more features. Read ahead and find out! Most importantly, Lark will save you time and prevent you from getting parsing headaches. ### Quick links - [Documentation @readthedocs](https://lark-parser.readthedocs.io/) - [Cheatsheet (PDF)](/docs/_static/lark_cheatsheet.pdf) - [Online IDE](https://lark-parser.org/ide) - [Tutorial](/docs/json_tutorial.md) for writing a JSON parser. - Blog post: [How to write a DSL with Lark](http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/) - [Gitter chat](https://gitter.im/lark-parser/Lobby) ### Install Lark $ pip install lark --upgrade Lark has no dependencies. [![Tests](https://github.com/lark-parser/lark/actions/workflows/tests.yml/badge.svg)](https://github.com/lark-parser/lark/actions/workflows/tests.yml) ### Syntax Highlighting Lark provides syntax highlighting for its grammar files (\*.lark): - [Sublime Text & TextMate](https://github.com/lark-parser/lark_syntax) - [vscode](https://github.com/lark-parser/vscode-lark) - [Intellij & PyCharm](https://github.com/lark-parser/intellij-syntax-highlighting) - [Vim](https://github.com/lark-parser/vim-lark-syntax) - [Atom](https://github.com/Alhadis/language-grammars) ### Clones These are implementations of Lark in other languages. They accept Lark grammars, and provide similar utilities. - [Lerche (Julia)](https://github.com/jamesrhester/Lerche.jl) - an unofficial clone, written entirely in Julia. - [Lark.js (Javascript)](https://github.com/lark-parser/lark.js) - a port of the stand-alone LALR(1) parser generator to Javascsript. ### Hello World Here is a little program to parse "Hello, World!" (Or any other similar phrase): ```python from lark import Lark l = Lark('''start: WORD "," WORD "!" %import common.WORD // imports from terminal library %ignore " " // Disregard spaces in text ''') print( l.parse("Hello, World!") ) ``` And the output is: ```python Tree(start, [Token(WORD, 'Hello'), Token(WORD, 'World')]) ``` Notice punctuation doesn't appear in the resulting tree. It's automatically filtered away by Lark. ### Fruit flies like bananas Lark is great at handling ambiguity. Here is the result of parsing the phrase "fruit flies like bananas": ![fruitflies.png](examples/fruitflies.png) [Read the code here](https://github.com/lark-parser/lark/tree/master/examples/fruitflies.py), and see [more examples here](https://lark-parser.readthedocs.io/en/latest/examples/index.html). ## List of main features - Builds a parse-tree (AST) automagically, based on the structure of the grammar - **Earley** parser - Can parse all context-free grammars - Full support for ambiguous grammars - **LALR(1)** parser - Fast and light, competitive with PLY - Can generate a stand-alone parser ([read more](docs/tools.md#stand-alone-parser)) - **EBNF** grammar - **Unicode** fully supported - Automatic line & column tracking - Interactive parser for advanced parsing flows and debugging - Grammar composition - Import terminals and rules from other grammars - Standard library of terminals (strings, numbers, names, etc.) - Import grammars from Nearley.js ([read more](/docs/tools.md#importing-grammars-from-nearleyjs)) - Extensive test suite [![codecov](https://codecov.io/gh/lark-parser/lark/branch/master/graph/badge.svg?token=lPxgVhCVPK)](https://codecov.io/gh/lark-parser/lark) - Type annotations (MyPy support) - And much more! See the full list of [features here](https://lark-parser.readthedocs.io/en/latest/features.html) ### Comparison to other libraries #### Performance comparison Lark is fast and light (lower is better) ![Run-time Comparison](docs/_static/comparison_runtime.png) ![Memory Usage Comparison](docs/_static/comparison_memory.png) Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more details on how the comparison was made. For thorough 3rd-party benchmarks, checkout the [Python Parsing Benchmarks](https://github.com/goodmami/python-parsing-benchmarks) repo. #### Feature comparison | Library | Algorithm | Grammar | Builds tree? | Supports ambiguity? | Can handle every CFG? | Line/Column tracking | Generates Stand-alone |:--------|:----------|:----|:--------|:------------|:------------|:----------|:---------- | **Lark** | Earley/LALR(1) | EBNF | Yes! | Yes! | Yes! | Yes! | Yes! (LALR only) | | [PLY](http://www.dabeaz.com/ply/) | LALR(1) | BNF | No | No | No | No | No | | [PyParsing](https://github.com/pyparsing/pyparsing) | PEG | Combinators | No | No | No\* | No | No | | [Parsley](https://pypi.python.org/pypi/Parsley) | PEG | EBNF | No | No | No\* | No | No | | [Parsimonious](https://github.com/erikrose/parsimonious) | PEG | EBNF | Yes | No | No\* | No | No | | [ANTLR](https://github.com/antlr/antlr4) | LL(*) | EBNF | Yes | No | Yes? | Yes | No | (\* *PEGs cannot handle non-deterministic grammars. Also, according to Wikipedia, it remains unanswered whether PEGs can really parse all deterministic CFGs*) ### Projects using Lark - [Poetry](https://github.com/python-poetry/poetry-core) - A utility for dependency management and packaging - [Vyper](https://github.com/vyperlang/vyper) - Pythonic Smart Contract Language for the EVM - [PyQuil](https://github.com/rigetti/pyquil) - Python library for quantum programming using Quil - [Preql](https://github.com/erezsh/preql) - An interpreted relational query language that compiles to SQL - [Hypothesis](https://github.com/HypothesisWorks/hypothesis) - Library for property-based testing - [mappyfile](https://github.com/geographika/mappyfile) - a MapFile parser for working with MapServer configuration - [tartiflette](https://github.com/dailymotion/tartiflette) - GraphQL server by Dailymotion - [synapse](https://github.com/vertexproject/synapse) - an intelligence analysis platform - [Datacube-core](https://github.com/opendatacube/datacube-core) - Open Data Cube analyses continental scale Earth Observation data through time - [SPFlow](https://github.com/SPFlow/SPFlow) - Library for Sum-Product Networks - [Torchani](https://github.com/aiqm/torchani) - Accurate Neural Network Potential on PyTorch - [Command-Block-Assembly](https://github.com/simon816/Command-Block-Assembly) - An assembly language, and C compiler, for Minecraft commands - [EQL](https://github.com/endgameinc/eql) - Event Query Language - [Fabric-SDK-Py](https://github.com/hyperledger/fabric-sdk-py) - Hyperledger fabric SDK with Python 3.x - [required](https://github.com/shezadkhan137/required) - multi-field validation using docstrings - [miniwdl](https://github.com/chanzuckerberg/miniwdl) - A static analysis toolkit for the Workflow Description Language - [pytreeview](https://gitlab.com/parmenti/pytreeview) - a lightweight tree-based grammar explorer - [harmalysis](https://github.com/napulen/harmalysis) - A language for harmonic analysis and music theory - [gersemi](https://github.com/BlankSpruce/gersemi) - A CMake code formatter - [MistQL](https://github.com/evinism/mistql) - A query language for JSON-like structures - [Outlines](https://github.com/outlines-dev/outlines) - Structured generation with Large Language Models [Full list](https://github.com/lark-parser/lark/network/dependents?package_id=UGFja2FnZS01MjI1OTE0NQ%3D%3D) ## License Lark uses the [MIT license](LICENSE). (The standalone tool is under MPL2) ## Contributors Lark accepts pull-requests. See [How to develop Lark](/docs/how_to_develop.md) Big thanks to everyone who contributed so far: ## Sponsor If you like Lark, and want to see us grow, please consider [sponsoring us!](https://github.com/sponsors/lark-parser) ## Contact the author Questions about code are best asked on [gitter](https://gitter.im/lark-parser/Lobby) or in the issues. For anything else, I can be reached by email at erezshin at gmail com. -- [Erez](https://github.com/erezsh) lark-1.2.2/docs/000077500000000000000000000000001465673407200134135ustar00rootroot00000000000000lark-1.2.2/docs/Makefile000066400000000000000000000011321465673407200150500ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = Lark SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) lark-1.2.2/docs/_static/000077500000000000000000000000001465673407200150415ustar00rootroot00000000000000lark-1.2.2/docs/_static/comparison_memory.png000066400000000000000000000647301465673407200213230ustar00rootroot00000000000000PNG  IHDRR@sBIT|d IDATxy|Y䄬 @l* `EB+jbmjZj{\ϲA@Ed%,a !!d!9Hr1CI@X˗33=3!""""b """"H1M!RDDDDLS"EDDD4H1M!RDDDDLS"EDDD4H1M!RDDDDLS"EDDD4H1M!RDDDDLS"EDDD4H1M!RDDDDLwDDDZS__kÇ}Z_UU3<5͚5_v^o}nY_Wznݺ]ņ ٽ{7|>իGcǨn%߿ķu+:thV(D4m[F!rΝ~RĬl,X@MMB ꫯ9s& h^Z:uLhh(tԉn|C!Rwgvۛ}vh\)ypH}}=RWWǛo?LNڹ/227ӹsgV/rP9ʼn'"55oСCԪn3dnݘ;wѳV]]fŊOמmW=V۹E"r*HoDFFRQQ%#c{cv|>;vx^6nȖ-[(..>|8z)O?f=8q&L@~>ѣl6z)S۷fݺudddPRR#..!Cpuaٌ֭`ڴiAuu5x7<xxꩧ` 8}kqÆ lٲB^/>}pM7W>}\.ҥ cǎm{zjZ$''3i$7:{/$??'NбcGRSSEvv; [o_f_Yx<㚝뵽뮻Z]B0 )S&zYcvΤ(znusXQ?::Ç3f믍AكO?2|A"##7o?XUV{w&eeeXVHKKcF̜÷~m۶ 'N`ݺuD~)L6^w+B7ݻ7{aϞ,jkk; #1[!q&iDFrAf͚l ^|)//7s 5~̤_Weuu5 yyy~#//￿^ƕ+WR]] bo߾s n7=z0d@@;Y=7>7xݻw-?~8ׯgǎG?">>}僆Mvv6ӧO3ʶmoݻwsA©ӗ-+**bdff䴁";{ߗfuu7l5;Xnz4=G;vetCו?~Xx'??]v8'Gvٲe3w}QJKDQWWg̲n jJK --ñt#@0dP lmddd4[vSSSÐ!CIKdx^b  >޽{7iS_,Yj|@2h` H5uA> #""nӯ_Q{w:+~R._mHw^cvUU~p,nqMqw6^zz:'N0uҥFY2d(4 cp1>S/ 7 [o5i 68/my^FŌ3M9|0`z4wjfժUFѣse̙DD4P9r8;vDFIΝ{ "WD|H]]۷ogȐ!~#fժU?q6m2?$%%믿ƍ[|$~㾯UVb y÷IKKܯ?t)pu8w}~!#-m ػw>ž={\G5^:AHtt4K,0>KJJ ::YX, ¡Cq\ՑK޽ٶm`BCC~ 7駟oVt1oޟc۶L~{xx|m߾};w.:~?fXL 4^xh)䲡C; é3-[֭єje~3[ io^z =7n0zdbXx<>G~uݻ $** 9lUUUlݺ;sһwo|>?AAAkܨsu*3t:+4쯫㫯2_{ڞKStnzk׮e۶m%%%^m۶#CBHu:M6rcsƐoK7C#r|Caa'CKJ7+oiԡV#F0bjjj8pk֬!''Mzz:{&33,ưwrrrZGQљ'E~~5Xh͆srrxm[hơo|0a&L@YY9{aՔSVVƚ53mڴK\4;[^!C3gRoE=nW^TTʕ+/H{a>x=|YYYFXZ~uYڍg;;w﷭+ONPر#QQ ã/"~GQQ!!! 0Fcj:")),>ŠxuaئaQTomm-~{[t߰vuu5˗/7ٓ vmEQQq͛7e]v_sQUUe}<#G2yd^㽫9'RIIIDEE5hPM9\{0a;ömۈɞ=Yn7n\7..㥗^x^zQk5,q&M.;Ce8Nn7/w~o&unݍGƼی7n=َ^=Gv$==صkFYcz2mS^^ƾ}.7akT[vƌ?~cǎQ[[kFݻ3mTS!m07+;00kƯ vѣ0b 9Dnn.yyy,\vM4ܧS\\{bi2 h;x7|\.6onnx_!C(..6TPP  ?]HLLd֬Y;nΘ1ïB][Anoj̘ߵq.^z%*++ٱc#|a7brΡL!RC1Bdㄚ3lٲ-[pQn7ۗcѱcԙ7tO_|;٦?v7Yfgew҅G}/,xÇs=aiiij*^޽зo}cĉٓkjjj [}=F.]5dgtbXر#={dF~Y#F 88-[ns"!!_^){dQVVF`` 4#G=R9"WOSD >(si#66mbѢE@CC=M4ѣ0uvn\*4FDN o߾ܢW^ߐfÆ N:|C}"ҢVZMN!WF]_L|g!vv4&~l6nT;RDҦ)"-`-Pcqk+l|n3""O!RDZH{LRSS>|8v#={q-_ѣG3nܸ3ETTGp50tP233/Z/YK**\TWWSZZ`ƌ$$$ց3`֭,['xٗE||ĉo~eeeGjj*F2z{&HDڞ<$&ve_SN?<ǏpDs-d^/W_}N:O3g֬YÖ-[}gf~͘1y'ԩ3˗/o>7hcǎgRRn8@yg?xNGӟs=LJ~9|>ر.]z うp?gx Yp!>A׻י6 ׳xb~Çض:nIDɞbłᠼ·\X=@mTT&L0zپ!dffvbccHKKcɒ%\IjsΌ;M6q0p` Пe˖i߿Y/p\t_߬~n exb0aY`G2<r!n(DHӧ/ׯ a+))0/_/⌏:ng|x^$1INN6!Cl2)))aͤ4 p>x9nx|Q>}MR5Gaa!n>J'!!8zfc  j6Ecн{wٽ{Yo߾sN^/7n8ȥI"rZC &3k:v"5u /f:6[3֝1cK,=VR0a]Z;vdw]j2p@C׮][nbb"SN7ߤѡC9¿/{1BBBN۾0̙Czz:.111̝;9"rAX|fDٟWOFho7ٳUVC6ު &3^緖aOlEi]⋤T|8))),YIrr2f2r9y%>>3f֧CDDD!r{8p={N'7ofժU2zh rau88NceAyy9qTUU#PPPK/Dnn.>Cv*֭csYp!>(ONfƍL2XֿǃzIVzn_Yc6ޏ;fذa8tЁ=zRTTl/.]mFqq1T\Qڼ'Çy뭷3gQQcjg}dffңGl6(zƤWXnZܿj+X,xjuHTTTEDDD.]$''ɶ.Hc̚5otEE3XbIyy9CQQQl߾XZ oNBBFcǎs]wѫW/`>SBBBkgL0`@*k׮W^x<6lA۷//fΝ 0M6xHNNt;v7X>cbccO'䈈zOEE[>eFܹsx cĉ Ƴt{̛7Jjj*ƍ ,,9s搞λKLL s%00Bdrr-[ÇIHH`ٚT#"""r,>ލU""""r~"EDDD4H1M!RDDDDLS"EDDD4H1M!RDDDDLS"EDDD4H1M!RDDDDLS"EDDD4H1M!RDDDDLS"EDDD4H1M!RDDDDLS"EDDD4H1M!RDDDDLS"EDDD4H1M!RDDDDLS"EDDD4H1M!RDDDDLS"EDDD4{{7@DDD,;VP檿ۭ0qQyS6Ƿ賢v^YvH"""&}]n`q~j)"""m,o&\5"EDD䊑)pP"EDDD4 qq|:9\tZtv;]CnŌ[S.~~j67gp~vgW;ᬨE9&9I!RDDDܘAQpJW~YȯaX>V*=<6;#Sɭ"6*^_~+jNx 䅟tk)ң)ϯas<ȫEDDuGwǒ5EXԱ!0Q>~b;㢂-Y6şe|+y j<ȋE="""Y?+g= t_sr q<8+n\u%dse ѻk(~;Ҧ!*CE;ڹ%"""WGۃg}=o/x-;T0g8^H_shͅjCd}}=ڵ:>}:111deel2JKKcԩ$%%P[[˒%Kٻw6T&On?"""rq_4 3^`Ti#DZ'^^\r-gDsQ~gzpkvlC rӐh~>~5;qQ<^HqբC̙?p={taY,|>\._tq8yG(((ॗ^"77!vW_}u1~x{a,\G}#"""1q&ӳsߟXj[)O{ {=$*yqi.2ށoL˧9X)AyF(/ :&;;7ӟ> jAZn|>ǯjէ[رc !99h  @=)**6B. 1x'(,,l7TTT p ?)`'g\'cqon<+l|ON%ClԱcǎV˯v$''ɶ.X<|0os!*Ѱ3WbٰX,l6rkL9ݺ-Z~eb݀BBBp\Ȅ wE%֣Kzum6\-.Hc̚5otTT۷o7|>ˉbIyy9Fٙ=_TWWj{)EDDeyb  gE泳;믿]wE^Kii);wqF<ѭ:`@*k׮J6l@JJYkVVn򨭭?&66V=""""g{"ө`~˧Lȑ#3g0w\˄ Y=͛j%55q0܃e˖qa={&Ո%ϧ'*ךc<0]p86G|8[DDDD| """"bB)"""")Di """"bB)"""")Di """"bB)"""")Di """"bBfoWQ}b1c&?y=(D[^Jc˾}nηb'ڥ EDDRnU~M"EDDDژ=2p)Di """ruKΒB\,~:#-"""W4Ft0 g4ʬa!DqC(_ջ6kL<޼lf3S f fOmHgF/s """rEK|N|7 _bD]7c "$c/O|6ur 2^㭪!&hXwaMhk?+Zfss۲YY`|wI5L`;]CłS>a @YXϥB="""r2W'6%T^%0X:E:bs{5PO\t{7Fe`T,kp{'Jټs{.Lc/3 """rՊP ɟt-|t"ÉT 6SK/= """ruXs+K=QUI7 iQ!'5+׻KB\|>{w;ر5$JSp~;7ң5"""ri|cS?#O/=nyO>-/a?٧ڧH* 7NJ(-ww:vOő[o*lsf)DpUu" H1M!RDDDDLS"EDDD4H1M!RDDDDLS"EDDD4H1M!RDDDDLSӮhb-Zl/F˝ddl###ロk([bV=0tPjkkYd){fɓŲe(--%..St!ADDDDNCdFF|)))((hVt3f,7t,((˗S^^c=F}W_}u1vX*++Y`3g$%%M61|q0DDDD4233ѣ6 c{^cRn+k,l-jY,^ou r":\"EچP:vHuԹ[qa]YQQAFFcƌ1X,DFFR^^Nbb"mq;**۷|>Q[[Kuu5։r~DDDd?bHXvW|vω O?e|>ؽ;~0`@*k׮J6l@JJ };wzٸq#眻dvGmm-1X#"""r.jOd@@s>',,'ҧO&Lҥ1oڎQ#چ C9? """"bB)"""")Di """"bB)"""")Di """"bB)"""")Di """"bB)"""")Di """"bB)"""")Di """"bB)"""")Di """"bB)"""")Di """"bB)"""")Di """"bB)"""")Di """"bB)"""")Di """"bB)""""]!rѢ,Z!"""rٲ_;FFFw}7\sQŲe(--%..S@mm-K,e=l6RSS}ObZM~eYYYő QXr%999ՋLMFhhQرcOnll>&M멮fᤤdN'̚5ض_&77xf̘ABBB[+RDbYIDDbp8(//rN6Gc,_!?8F&@~~>&M'k׮,\fADDDJvQ'ָfjv|x<rj WnFcǎ!$$d6lG$:tGuKK.smQ\\Laa8l+Xv<2׋fb`ʽ^1tjY,^ou r:}V(.i&r8VpSUUM܊V+m"ؾ}Q^^NDDHILL0δVt#""r% Uچ@BBBkV@@6("ŋٹs' `ӦMx<#ڵkի 60hР3[]mOVnFLlli'ŝA\%ývmCpp0H\ $ 5D1gywaܹ0ax.}yaZIMMeܸqg\\Bdrr-[ÇIHH`X,<\+ŧ)"""-i6>:_&~ޮ5?ޮm.jOȕ>kEߧg?,}"-Q1{Oez'˾Eωܕ3 ?v۷HS """&lخV)>֮mHSюn%A!RDDDDLS{r9PioF\6Կ/y[bO<9ٔINd3^'Fst(eDLNYb(DkH0q'/ea­mw[֘J~=گwy*>qtb?Ҥl64p ޚ*_p%t8-98+TSǨ9^/UV`rROHl`ѓSWaX wAGNdvW{ځx9IR.Ii'vAoc :j$#o,FobՒ7p-_~]겳XÌm{w;?_hVKBHVTq~@ط56z$<9xK4lB]JSSm=GnJO":BHٲS?[Nl@`^>^X N@Pjz7 +IIt$"g)""҆,A!vʆ2߈~yz&aZpD,j3w؍>O@B,X,F}Ρv<6b eV, xJbwax*#"r8>;׃cs5zr(D\BFOzR{|g]- xJ w2c@|g<%@D䊡)"r۳}7U&iҽPJPZfkA@ A (8T+ʒLE2KAJ(dJKwӦL[ J..q$>8vJ\ EyV6EbPp0 { %GPz\< .GR{HaOB؀sL6d"TZLG[xNs\PK IDATxIM X  ˬGB['IB-[[xjk7 Jc7Qq'^qYcQ YOdBq$B|Yo~JW b-Z΢!- }"wҭ@M&>!gu-'y$B)v$%[D !I)B!,&IB!$B!brwB\7s+ONTCʩmX'H6O{{Dad敲`ά▐$R!A7";3YrLՄ̼2|M+9QBZ\!n-iBx?i9%L_t%MU'嚧9+'܃oקP\b$񴎵2ɷB[GH!-<וVhf>ˍ8j^Ĵ0i7 eH 蘒YL}m.DZUV[; ! [ë_x야1|nI|Ff" xoCT82zJ!e'r˖-l۶ҴCEII k֬?QTlْ}bgWfRR7n$''???Gƍ!*%,ؔJjVE8w39 苋^i7SݧBIH<[NH!֓H6Νp+w6oL^^.:ee̝-{K.t:/^̰ag߾},ZI&Vk{7wY%m2ddgiϞN{"<!RJIPѬ=67*'N}Ok˽=yFKj=gg*e''':t@||<]t!)) ???Zli.ۺu+gϞ׏3ӻwobbbk׎p֬YV%$$Çםoܹs裏R^{#6a ^ؐ}y1gW$1'%TwPte!~TJ<٣>ޮj6ͪo'J vefVپuj"l۶ '''>1LfVk^e Pτ HKKcΜ9;wcNܹsٳg=.\`ԨQyf,Y«B:, !asl^jx9Ύ*v\LlN2ff"Wdl_֓Ȉ<== ̙3Z OOOBBB0 (Q*P^^^Ryyyu.qtt$$$ooopIP22.7#5oN ӧ'O&==ӧOc4_Q~˶lk5UuŐIvnv%dN<`08|[k,vs1"*+QX'KJYW'={4'HLLI&T* h4TcggWRJu寉RTP(Mj5WD\ !lFS [5)j}ԝSX+J֮RpqqAQ?'Ph4 Z5 PpQM$x:wlh0L( #((dIB!8_w5 q󴲲F6_9.vpR2Y=_5?{#y<ŏЌ^(.vuoB!n=2K%F M+c_RaAcwBWd`ס\ Fkwfꨒg q$Ba<4rf68 / Bmh[R*k\߁έOV})^k6CH!6o؃tS_P(8h 쇽.--YŸ#<9rZ:F1GH)]swDH!6];c5@Vns$2# d؃P)9-ҋC*BZrWGJGNX+B)潿4o fHˍl;ü02`Hiq9澒{ߟ3fxkH.aw8 QI)ƜXv2q%UY{ Mۖ4g !BI)B!,&IB!$B!bD !BI)n~.?P̨ס_=/E˝qIy5pZX!"C[*wŢ mJyvN2C9Fu `4T*Ϟ":rʎœӨCiݾ6wA!W 5r*؎#OT)Sz>GWTVJir NSi]M(Hm.B$RRJoj{ήW.Sko~(;O8DvU !Ҝ-,W'hW|F#<]K!HMEz2&>׸Wia/^Gϫ)ڱڡ !I"EU~0yu8u%B!iξC|yoPi{CyJx*A{7mRR` &(ڼ `H~N\ ZLB!L;Df^)wϨiΎsm <ߌ܂2+|z=kgL?}qbB!eD!2JiOWuF<{)pw;+GK-ՖgUeu]Ú2rKk}~ZߦB;$w̼R~ړ)1 F 4w2=GxFVP.SD7\{-ʋYc^!)I"[{Q[CP7g'; 'IE6^VPVn/[m0뇳>4j1!=Iyxq`C!Ӳ=!לD ucY̛ߞ3V'ӉC dCBq$-eӾ,Fv|S Kɠ'?J}\+F,n ϧPr`nq{juB!n-I"Nv,ؔ;Bv$waV2c*B{PPTZ5~Y wʶK/R\h !yDJr3>>O~HŎ7S R8˒_R)?ғ5rԢ&2/Z-$JI"6"IecW{˚5tWk9"0# CCrF1^cZTJ_c(f ة<+>}pUs؅Pv"IVҹ'=ܐ>Oԅ"vqz:/1=z5 ) NW )B\T a%,ܒʉz Fkvd`VP߫bp_ Yy䗡ՕՕSX-B\h>Ni妫mJ!qR)|J[F3iyQ)I8Y?%[[Fgѳåg8\sC \!Du&R:CʹQM4冊ZŴ6⣥gL(54q |-Z5n| $`*"ev>~9 7`n'NFQWY 7 7B"IVj_ gi>g~"ci#BI""W'_ΆLVy[a.E%!JYL} FcH(9y@QcC9JJ)ʥCVZK((ٽ !MD a%*fnJRc[ÃtCc`T@*@716{"<'j;O #3iEǑj%_;l!;s1 ZN<椝'M"7X!InJuecWByGRn;{O|E<],'yۿY+_gF|t(I qn.m;o'F71{x =יy>K* !}I)jD_ѭ]jƮAnOODl{i\KK~Ih4s|2|GЭ*ncP` 5B Ii{%c Vz,%?G+mtq跬BrEr^p / ǐW`C@_+PDZ!KHqtkXu%Rk3jJs)ھ%;?nO').:{|٭[!I)n^-7aBpqFxzfפOA_*-1aJ]pBD q~ ˪-?`^W)5B C!BIMm #ַ穩m !#UV0x +GrsL&xl?[vqR1}]ʣַ-B볩$2))7qjfp* $No`?o{l_!e3}"u:/{L6h-ZDYYC)zk B+Ȥ$hٲ%:t`֭={__?f̘N޽AӮ];Yf ZZΝ;?>(ճ. !B j_+ <<<3O+,3aƌCLL 7of4i:={p{f4lؐ%K`B\Cyv޵gw+=>vc35(s^RIyyu.qtt$$$ooopIP2226oN ӧ'O&==3f.4X7 biWP-nZRwY5++uu(Ͽ8I?#u8HYXڶ9r~,q usj`1WZ3}1b'kQ9]=X~*9)R>uozl&`0Tf4QTW_TV*S(+ΫVqtt$??$ʿ0WSCTHN@gk@.T> rY;:A]9 'X9>\W_{ZPJ$9{,I"===IHH06LUjQ%%%z\]]gر7!Bۅ #''Çc4`0rCKJ:FJJ %%%lڴ ___F!:LM3O<6l`Ŋ0zh4 z} iƍINN^z9Bq "B!(LrKB!4g !BCHakd!BI" L8krSXz5[n];B؆xfΜi0N{=o׮]lܸ)S5y̯ W^UqyWi?H||hn?D!j}h8p;[60ヒEEEv͛ǔ)Sc۶moޜdzn:#HKKZh-[:tH[Mg„ @X111̛77|#|=8pZm>֭cѢx嗯{&ɍJbذa_:m?NV0L\x˗зo^ǵ޿ V>ѣFϽ=zH\zU!''^,ȬYӧOp^˥h4ŋqppk׮7_^uՓ$i]ӧORh߾=={`ժdppp`ȑ9uƍѱb:s ݛ]vYD* J%7vNVV:P)Qj֬SLAT*j׮_}ή}/@8::믿r݋ >:86nɓ^Na^Ӄnݺ2{lAׯ@|AF#!!M21gԫWǏڵkIII={_~ 2sL"##9v3b1lذh4nnjѭkJ $**Dcjܸq=zM6/q$''`f̘A^3ӻwobbbk׎p֬YV%$$Çkwɮ](..&88MAAӦMGaǎŀ?͛77'&խ'''?iӦO>nݺŋ^ت}ѭ[7bcc9rmڴTqlقf͚/J """HN>W۩Ξ=˺uٙ^zѺukfΜI``N>Etttײ2̙7Æ 3K 'X~=9994hЀAʹiׯXRFxFbkn:;Ԋ?Ļ˸qؽ{7III/2x`}Jmٲ$~M bcѲe+ڴiCNNgΜo憷7)))(J'''6T*IIIjKII {d2Q^}ڴiѣG̏|#FoOll,Plt!}Y^~eǮd2ΡChР`1U\\̒%Kxn&ᆱ械Pτ 3f 111l޼ccҤI{dgΝ5wy??͛WѯgΜe<۷*ۻTǒ-ԩSiӖm۲o_ՋN???zʕ+)*|e2HII!11w_]1|:tO=X|y ^xz<`0pB:tH%7@FF .GL:ƍtR -[СF&ciӺFb^ՌHqwwGRK`` 4osKypwfb9O?=f?>}:4lؐcǢV>nnnlRzz:'NFZ'NʉygT liV kIDAT"::\V)6_%h4 / 88I&U{aѭ[7< !33 H.;vh;ٹ}<W#ӭxbFՕhʶ2LT*{U0t錣#!!!x{{mڤI(߿~ӛ:u=4lhԨQ]m=>>W},^؂}N۶mٶmx{{Ws;vkda5C,d2a0۷/{onR*<'õiӆ˗EÆ h߾rɲe)//gԨQ7wYwww&O|BCC~#;;ȶ̙3b(//',,m$6d2bJ.^L3?Oҏ]uvuՃo1j 4Rq-Ŝ4FEExb ?@'Oٙ۷oŏݥZKT*,^VZ_ZMxx 0899UJ:vXVl޼SNa0U.$ial۶/=;vSN׌_Pkt:UOuȑ#o1V;v,;[nϏݻ_ NJR3=Zqs= 8R><<<'T*XnZv_LtI,Xϧ՝go$KZ-Ǐ筷ު2=$$777=Jjj͛ɮT5ݳ;{y(J̙seƌ`ҥ U:oӷ%Onn.zqqq_qwԉ;v@۶mqttz};mEdd[-[hUc?Q^^΋/=۶mܹ+2 k8p=¹so|"uSzFSO=h?`;7åDHnn.5KdWJ>k{QW$&&RPPʕ+Yryznn.={ro߾^w7EеkW~ZjuG$,^cǚ/x㍫.0o\~gsKBCC1cFs;-[cVIHH ++~P|BD0ɄNCTRZZJRRWQ'___yd _INN&==7|Yf=Wt-j]uVF#CYTXRRBAAƖ4mJEBBE}j u(Jg2od2zCܴnݚݻwNcǎ./ 994~Gٹs'/^`0/hhڴi:Dqq1qqqdff߇^DE]4m4JKK7T:ٳgoIL:uBs[шB]va4{䄇;#G~V%QQ$%#11|ʕȶ8q^KnЪK&F%~!C aÆ lڴMq]M)**j5#F`ٕv\{.]RXXHxxD>F:ub={ܤ}壢6wؑ;vpڵkIII=ɓ'-P*jՊ?GzӇ+V0m4ׯOHH.\Evvv5M6HN,mj׮Y̙3p+ ETFׯ˖-371#22^… )(( ((~fkg_͛7Ӵi3|||^99_Ɩٷo_G}>ԢUw_jnݺuVڶm{[y%AAAKtڕyT*nG``u.1h VXX4 S^=x 6mIJepuu0_he"Ȧ/$ ˱c=zu?o<"""lvh7P(۪F,_łB fϞMoGڷoOQQuux?z(%%%o߾"YT}ùD/bz=;͚^B܌2(((஻v84g P(xꩧǟhѢUMH8ēO>i˖-[ѣ~~~BCC9v,>Dfa a !DZl9))6lmE:Ҝ-B!,vB!I)B!,&IB!$B!bD !BI)B!,&IB!$B!bD !BI)B!,&IB!$B!bD !BI)B!,&IB!$B!bD !B?e,=FIENDB`lark-1.2.2/docs/_static/comparison_runtime.png000066400000000000000000000603241465673407200214710ustar00rootroot00000000000000PNG  IHDRsBIT|d IDATxytOnŲm  Lf!3gqܙ͜2d!%a @ E%۲ekv%zu$$v~9駾U%=O.0dFFvr4yd)$bS'ʕ…IRKKK}{407iҤ{wW+:v$顇R߯Vy<͙3G7oVZZU_Z'NP4M2 CQk`b/~6tuuJ'e˖]q_L /a}^gϞգ>z%4uYs6m;b1?I*IzzܹSX4ԤzJ.kX_zoy=/*q-k t_ԤJ=x饗 ef=%F/ߟ8}}}ٳgu{i~;E"hooWyy|A͟?c%Ң'joov _x}k@hTO|=qĤРB-YDƍeYbP[[UjJ=É`#I[l?̫ꣽ]n[-ҴidY񸚛uj70QGUOO_:1dM>]x\pX[nUggرc***FeJJJ`h4SNɯ~++)##C/֔)S*//X,.DCi_~C^G/ҥKїiw߭7*(Kof݁TQQH$"ϧM6iMJKKS,SCCv=q\m`ںuz{{Ŵl2=cڰa,R$ݻU[[+It~) )HK.Q^^b/zB`:Np8}]FcH Ο?_~Y@PSGG$t\y̝;W?NѨ~Kx\ }u:ڸqLɓ'W_oaaNUՑeYzguyY"=r\D"wP(X,F͚5KgϞUMM,RFFPFF@e7о} tau]`YvޝzTZZ*0t+ELzNЂ FlӔ)SdYTVV8O>eY{9>}ZivD_c@50駟eYTOO%I;bŊx ӧOkÆ #6m|A+,Kuuu,KaܹsחեI&i FgϞ??Kg}Vuuu޽{y׿bY}7ސD@pm kooOX}ZFGX?XiӔ<РٳgKE01o'o,..VWWѨ8LM4I`PgϞMϗ4pl***d3$>tw,mH$p)((-ܢcǎ%F!͛'0zj?~\x\===տXLNȒDSccJJJs=y=sT0GpKׯŋU]]=TN>]V$~=z߯ēkۯXBK./eBOOO>ڲeK"qǝںuIvۣE/jedd{o[am߾]NSe)K͛xBϔ)StݛmT]]|xSNS_y6p @áuĉVUUiܹ4iկLMMMV^Z;vPcc5qڵD":qzzzzɶ[<ڻw*++5|\RW#--M>ۧǏUn.\+Wۧ3g'Ν;uY$HXB#8OT;vPCC<O 6 ^+Wަ@ ]wVssLĉuwKgZ[[#v˗@{х ۫t%e8xLNn}i޽x~L5khԩIeXc1'Ϣ6Ms}|>_bOG΁giet9axzx#RiiiO8N9Ģi`dt*%Fǰ:p8F3,%5>$%pvۺM$Q4'WbKepht [YlYV:ŸzX ~_B!]??Hk%;"xb1E"$KNS^w}Oz{{?{m; >fQayLCچ'.d0 c ױO|;tFõB-HHYY4𤖱$|$l6l!@$l!@$l!@$l!@$l!@$lqPS{ۯ\ȴ1vuС}W5H1>?$p3AU-LeaeUTTh׮]z'6oV_z# vErO!@שh̲%)6u5hkk3$[{{֭[sʲ,k풤u֍u {~D~[8{{{/k6m/멧?QN&@ ˲CZ裏~k͚5ڿHs$4M}:qN-X@@@{wT__t-]T˖-4,b.^Ls Ӳ,?^K.U~~~ . 4sL^Ziiin A2^YY*,,4p>kjjTUuZ@`so|CROOB͛7+??_~6m$I*//׎;o{qۗ4^h/_zUVV{ٳr=Zh^n6ܹSuuu:uj2$b6p iQ---/ԩSUSS&IҩSd/^͛H~DaW9sj֭2MSx\/#;Ԏ;t IR0أ#GhΜ9z衇UVV6jwqdUWW)ShUYYx<{N?4QAWWZ[[3??cǎ˶ f:˲ե^fgg+х vZ=C zWdY&L#G$Ayy FP(H$w}W^z)O*((Pnn|>߈ڔ%+i`3##Cmmm:D#M?N|>VXr?~UTT'OD~_YFGb wVooZZZܬG}T@@Z`8[nE4m4͞=[.Ks|0j~_z饗D4~x͟?DqSwzzzuVeffjҥ#vΙ3W]vf͚d1N۶maJKKӸqaeeezF?^r8Zn諾]%%%z7TSSɓJ?ZR͟?_~_'N/K}[4Dr rDpt$p{sCH… u!YFΝ3YIdYb:::*\.ugv?Oh),,c=Nm߾]>M<eYZn5gUTT KKKURR"+4ѡǏ0rڵ3gzdϡTWWM3f2MSCӦ5TZZ/pX?T[[ٳg_uNsĈeYW RP&VPPB*//oRw*;;{ļOcYR P__߰  i c=*0TXX3fh֬Y6##CCXLar]1?~<(;;[50??_W{Dd-Z'O7o<ψ\tI x=mn*ϧ<&MWkv)m۶MO=԰ѫNeggɲ,(??_dZZ/өx<`0H$"0ᠻ[)I >Ys0T4az˵}}} ɑ4p4XL# @ I#ښD}Cޠ^ns\jedd\1vWǴqFbamш/_P(^)ө y<EQuvv*//_c$QggH罧Gx\NSyO@שLdv~zq\WeYxO5pv:# ^obh5Oo1˵u8rd}.'\O;NK#>ޮS*C]q>o[Qkx<#£tDS~yfN+nii /B-_n s84ߞW٫Hx9?z=;vT&Lu%l`/.= IDAT:7-EcW#u; u;Yկ6\) ϱ9\<ڹSOOP@G>$$l6> [ [ [ [ [ [\MtRJ,uIak .c$# á{Gڶm$Iϟ׮]|rmڴI`PS=%` %5@3ghѢ-ә Рٳg+ EsΕ$͙3G֬Y,8p@'NձcTUUx@:y.\pm9$|\zMsڻwN>X,&álmذA Bz*4xtkƌo.!өnM%%%*I)IPHb2 C@`HaWWLӔU~~N,RGGG=Gyyyr,S$,lB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HB-HJuSүھm?'Ғٙ)͎ l9qG?}N͝:|=(e5zQ{;S]ںZ6;KӋ}.H`%5TqS"@[z.!!R]M l .)6&s U^^V͟?_sΕ$YSNZhTZl~X@q]BKzlooכol7Nىj|T4]wݕ2udVRAG?VY64EcޡYgIeYZlxjzrz7n:9UTTٳb*((ҥKtNZ_3u}ثPL++3AyBuc:\ٝŠi qS}JJJta)STaA'N7~ƌR__vءR\.UVV*kڵ,\C.o#gzsoZ;#}04>Ц#q9t,E%5@B}R$UqqB#߯Yf؈dZZ,R(ROOu=XيDݖiHf*>6i\L h.=rW#vPIov4_)|N\:Tu7(jkrKJ?I pXeiR͘1ChT:vf͚%I4 a7n-Z>HVAA,Xpm Opmٙ8'!^M,HӷdjO 㕝R̔vLF@ $5@|>\\.\.ŋ@ D^!/˲|2Auww֖-[FݖPnnn2WhtuiNuGUv]!gy~I;$I}('ӭeiDW Pnng|FISLաCt]w)4i$i:|UXX48qB/^Ԇ xt%UWW_v[\[a\uۮޘ$KeZRsGD]Qe{v*PE4eu<ѫcHm|fΜ)ۭv9jܸql[N.]R,SFFO.I0aʲ,͚5KӦMOvyjaiV͔Ї;tGqѳAmXd_9eSw6y;|RzS ge(7íP$^5qLwe܊[Z*?#Ӽx1[k&ZG kωWVwƴ2Hu `  `  `  `  `  `  `  `  `  `  `  `+n^ bsʶ)>p"@Rb׉NAՐR[wD_\UϼY=)EyIi-98y>qP]sX]TP0TW"@$la lnZv$e5k_ }z)%7 $p{iOMב)™ ב -$PՐp jj#ey*i 7l{b֎SԉN\Sީų3?" #"Ӳwi$oy%Ia \K@nqI=#m~^ٜҀpyDM8pɺ'u}:ޗʀ=cwk$dHynM,𪶹?ե7%F ׽P8Pa\jj_EgKnJ@{n^~5kj goէJ$]-Wl/5\# [ [ [ [ [ [ [ [\c#Gd-[x… jllT,SVVfϞ-;e ,@^xQ۷oNVرCYYr:w|*I6&O~חxɓ/߯{jǎZl Ѕ  4Yf1: pIz,K$͛7ONWSSiӦ)##C` B|\3g(kҥN$IvP$Q8O0M=gߨSKgD![r~nYΨҒ9U C1 R{_|^ҴrA~UjhɬLyt5%:֙ʲŸnJ3'hf2NuGr: ު^} :$Jn[midz@s6շ+1utP}XZ `(F TMI}:3-5E0$wѳA-)bÒ:$J_8s kraVu5,Iz}woܖ~S+K.j T>:ԯ?38ѿzQ}qI҉^ި[g6SךBpsvãmd캆E9 [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ŕܬH$"ө&okkSggLӔSQQ\1tA]xQXLM6)==]`Pz{{eY$iʕ3gN2JjО={|EӧPKK~hٺ[tIk_222{wٳe7h4[oU3gΔ$+++KHDtY͙3G999˗S$Q,SYYٰ5kh֬Y,SRk$˲TSSX,iӦIzzzhdYzzzTWWrmٲE_W5qDUWW'<$\,K jժ"43 CǕ|;wN3g(>^7ufA?`02n.K>/9}%Ohjj{ァ'[oKz%)0 y^hƍ:}***xxb 44gڦ&7so47{xKHPSS_˸!|>M81)}%ҥK*++S~~6n(HWTTFE"n+==] ڲeL֭[UVVvi&MܐR]@¸"MqnTNgXː$hҤTqIj[o`0e˖ܹ$ӡiӦ[nѫr9rD-R[[ٓX-IݖPZZZ2ygp vsR [HD~_vJiӦiZvN2FDYb̕s\q"V0OY >Cry0Eٲ.ytI%/ 2?Җ%Yfwx\GmxV2-Jžj.gjZH.Ţ2|~9 '\Jq7+eIu9 @CTSVôb|m5m⍵ٿ(pQr*ѿ{QΩ9[%+Q_l$Rsے?<+ԧ+ʳ`ٕd|)Gf2-ynY~0:KKu)b60R%|CH ꏘm/eEOϨ__3'O]B}xkTo/l +Od_(殉Se?!Ûd^ז̔tq#8<j錪!飾-Y,/ɑ-c^eЅW Sp(P+ga4-,?ګ#2ۚgX}MJ͕#cBGA_FeG$G@5#E+-~Yu>O.cdvIV2rdSk ܀7Jg+dv-4j=rfg(P/U_oݽalkVU?kiqST|$gfzƯeEKVޝGQv Y%! p,(:?qT*wƙߝ:e^Ȫ0l,I4$ Q%A@׺ĴI $ytW~NW[9eA cc!EZ h |UyLsGp9ӎ}7\Ac0>'81+\$ v_`09eevw }TۨC08n*ov\3!{N,^|M \mg-v(u- CؘЄE[z o⭮B?`H݅n盯NptW?goy$&b=1Сt8t2( Dikz̠W1߾}T׶BGܟV4F2b4h F"n)>铸0)3p8VlOCx5fqaC^u?*F][wlEuK ˺qS~y?4P<ECh8'8qa?|!С]l !S,IZ D:0E0uD s2] 3%qiSgwlBw L4HT(Jݭϐ0Ptd'Ռ&,_U%hc6qTXw୩7ڮq0mboL_aE!bރD{0щ[>/К4>-b@@FTRfE׳_٭x|5% ڸnbgټ4cңU4ޟ5!!}"(;k]?}k7ހmekRUl;6:n*JL. 'DTdjWeǰ; t]eZJ>0/00ݷ U?O8 ~ a۾}b&$j׽ezY4I]"CO:T@sѥM@ ?yD5\[A}o4cGT tt\BMx$O6VKwf$6#NAB3ALY.AN's ʂ$/Οե+}Xwಾ(.uUH4?N!Fڮ&R9n̼F@ 4w!!7^Y|"AI ҥk٬x+9rLK>B!LQ3@:oY ڮ i l8B<' .y 2$a 't] ]2G {%a !U:ʗ=u&}Gϔܘ70|,cI).!}St=a~3žw a !嫵6}2ژx|+/{U"ݴR׃t:S_5{r1dWι6?hY{'G$RhV}u5:אl *_9} u^|WWs|6+=9]?5Qӯܹ)+Bf٣ڭh?4h u`% dq}ܾ/A z5P=7zu4ǡ'Y<hOĝ_vB!D =w4zNjoToumC *@vZj8yuctW!m k?3,N²Do8B`b۝CQ]@T$lg on5:Y=4B!DӁF9 1}ӽ@xU[$F@gXxKwBE!:/仟UG[0%0A]!dv;ؽdQbˣBKg$-B|5U]Pj- |W2I hP55Rmn:`Q!*LNUx:[2#P Dacwє$B$ T0kJ^|t崊3c8´T[4-:wlĚ@uV#u7ƑA+ qD%a,|n=OPh (2\GX#tZ E,Iv,vFrX SGi g*JW;*mGCwPC:?@& d)r8bwxtUƃ3 |jgt|@""H@yŵ|RS))s~Yn$L΂/(./""N92JJ!D 3рhP4% :R7UFHN|gDէr䄙f7CD2|!B\$ ,;bC=i#<CG;1X°G3 p_XB!Qr ;V-/ǫ?> Mb8u36ʖRL":ROEg}͉R{B!E@:\. EQ͆j?={|F >g%KE&$,p1!]tjrعs'Ÿnbcc֭[gqQ-T5MQDG}"ot-B\:ѣ|̟?%K?#77AϞ=1cx^oΑ#Gz}$&&E||p~B!uz)B!l3Ek$2QBqe2my7 {<JJJ.\ii)O%&"0l6[Pv}:uT6Z;v,Z>9s'*uK/Ă :QV˟g{1Fcnt9}!6nHhh(X&Lh4{l2%%%%ܽ`[r%zbBBBD%,,Ç3eʔ6Ong߾}[UUEUUFۃsNp.ߒѰ\ǃF~n]VKVV111.dY-[Q0 (N޽jvIOO$ZA5JAqqq,] :xk׮eٲeDEE(P(=~xt>tx^FhwwO?ġIOKkr<~8dff6yoر;aG}M?{l222pn:۷}+XKFqct|ԨQ-6\j<ÅhZ #nP8~aUVVr4 C !++ V[Rnnʙ3g'p v;YYYڵI&i'h4(BXXcƌ?h N<3fW_q&e5իWr\5v( у/h42aFbwGuشi|-!993gb0ͥ7pz9rǎ`0psuu]/t׏o9Ν;IIIȑ#ĉ<#w^dxyɧ0ػw/ԩS4h|+ЫW/:l껦{l2EOjԪR2nXm2"""O9x ĭJ>} z $$LƏ(dgg¤IXz5:u ;w.|>kx1F#DGGsk6|-G!;;$kݿ%??m۶pBwx.ܻw/x<1ͬZe˖cժUp ĝm=H|L&-[6Ϳnxx8K,_޽ҥxbJKKk3KKK6m?8Y}|rnFl2lذ#<ŋeӦMڵ+K.eʔ)lݺI_֔ɻM7MWz#FqFONZZ$''3wܠNJmm-Oݻ>ϿnMM k.ϟϊ+HJJoz +6m"))+Vpwk.***9sdcXV<3gغu+YYY,Yxjkk;p|L&ǿ>|8gÆ ?t_yyyXV݇k СCY|QTTDvv6ͿvWp);|0?)=m۶ѭ[7ρ}l63dȐ6}%Ehh(np?h`~y衇}xNmmZ9v<F!Cp[]K/Fp}݇^Q֊ӯW^ 4uTRSSz,\]p80`>o1?#,, ٳgIHH`…-0PLMMɓ/@ݓ~mGyԟr-ZRØ1cHKKwl6cp:-%Z0**޽Ç'%%K^ |A^u+ \-7@~իEEE :Կ޹sxg0q Dӱh"L8N $yi[ bPYYI||<>h u٧OΝ;@QQ>f>LWÇ>|8P(p768k,VZŧ`ܸǖ-[( kֳ+Q\\F7|C^^ͣo>Mʬ |q6??BqOC1rH"""PU#Ganv FAA'O ?HN';v젲UUI~~ޏ|>{9?0&>|0 v%:GMMM귻8ζӐNc…۷O>h?Dg2o}꒎m۶1ydJJJ3g  )..nl ~;{7D1l0&Lp֍N ꔖ~lvbM̚5K5>%%%Mn_kںon|>ٷodtFF3glRFfff} /%z޽O>Z}aÆGQQQѣG[]No2a̙\l֓ g~z-ZSO]R,Wߢu;Bnn.&MjSycƌaϞ=呙IXXX2aق;,Z^zzy'[\>_4i555ܹoG۔]54MyϬV+> CvZ~_T|5$3C aÆ н{V~ʼn'xכ]RYYɴiӘ1cOW_%11:.]pI^/>`0+_S$ IKKORRdǎu]:JҖ}ɓwHHHhɓ;-z]v%==q'ݛݻwSYY3p->p8رc]xNXO(|ᇗ4Տcq{<:uϳtR~M2Ç |MonNA^ĉٵknnݺaq8ncZVnGQn7UUUΖaΝlݺş<Ft j5j۷oiF222]P\n9{[UUŚ5k8y$.NwUCBB4hlv8EtrY͵Xʺu?Y? -222o)(( \tRPPٳ.\ȗ_~Iuuu͘16n6Ŗ 7@AAmNWm70l0222xwp8n\|9r$| O`Z)**bʡC0`@Yv =4z7 gAN6Krr2))) i^ܹsyWHII=EQ aժU^?~^vʭf]kѣ&L`ݺuz}9гgOO͛ZL&Νۦ6\ :F dKeff~ߒ  >V+@t,XlCHHǏϡpz܈cرIzz:zL/رcDDD4IGL4 ш=˜5kk׮mTFM65ĢEߣGRSSٷo~{{}ݠsŅ_ 3f_f׮r-s/!Ǒ#Gt:l6@VV١Ce˖P^4j?FB\Q:DAA .hˊc͚5 :QFuRB4*EEEٳŋ t8*N?/n駟fҥMn !:baʕX⊻r [#GxZH#G9rd'D&D|>ٳQh6Yf eeel6rrr V[[IMMGR3Fw/_|[o"x׿xFp.***tz-{y*++Sh^zFr [!B@ !B6R!B$B!M$B!m" B!hI B!DH)B!DH!BygIDAT&@ !B6R!B$B!M$B!m" B!hI B!DH)B!DH!B&@ !B6R!B+W4rIENDB`lark-1.2.2/docs/_static/lark_cheatsheet.pdf000066400000000000000000001322541465673407200206710ustar00rootroot00000000000000%PDF-1.4 1 0 obj << /Title (Lark Cheat Sheet by erezsh - Cheatography.com) /Creator () /Producer (wkhtmltopdf) /CreationDate (D:20180526220623Z) >> endobj 3 0 obj << /Type /ExtGState /SA true /SM 0.02 /ca 1.0 /CA 1.0 /AIS false /SMask /None>> endobj 4 0 obj [/Pattern /DeviceRGB] endobj 6 0 obj << /Type /XObject /Subtype /Image /Width 156 /Height 25 /ImageMask true /Decode [1 0] /Length 7 0 R /Filter /FlateDecode >> stream xm=j0gRR*9BGRGFu9v7PLMNvwGF(9F/ vK gdendstream endobj 7 0 obj 290 endobj 8 0 obj << /Type /XObject /Subtype /Image /Width 156 /Height 25 /BitsPerComponent 8 /ColorSpace /DeviceRGB /Mask 6 0 R /Length 9 0 R /Filter /FlateDecode >> stream xߋUU_=P ( '*,ʗ*E$p4b~HCQC/o9ss[,ýZ~}i}}G?Ͼ}n{\zYhF?w5nFA?z_1z?z{ ͱ '*Z"t>}wS/gϛq"#?̟Z4 JSD`v Ԛ5L U'ZuRn~7\ôB1;(bJYŅO}nsɏ?8qYfx{ЈVƏ4EB|Wgz sYuX-97. lƮz{E8𱚗2Tڋ>2QgΞPlJcd)_??)̬ ">Ol~yb c/Tҏ?9e_33G̃j̏T@ZadhИ{ak 3H)M<0R kkM:|df1/]~҂uLY؈zU:e3UhβBF1jRW5M91lZ?21ϰZ)loLꀲcP01!3xS#ꎗ5nl,n"C|7Azv-x]9]ƁHz֎<\(6LY*i-z)ғa.gpMm2Yl[Sf6ȩDto bsVaIm= VL&U!NlL@H*XSQ"QSM;&*]z$sb6g'gԸ4kЗN>+ ª܎:T䱛Ni5.Z^'1}zbmZ6#؆*ȊBN:r3FCzb0ڱUMB#!d`jSԲ1mIL{܆ &X0V*"Z#[Ԯ});d dLĩFW"K{1]'1/sqq1MY%,ri7i+ a}LHԞ)Pd1RRqy02ǃXbk\y?]鍩#*ă۪},dZE~uU/T}Y 2aj,ڟ>u3^Ԍ6oj3l/1c> stream x[v0 D&iZ^2,F_Ƃh !B!B!g$ݐQ\%*oq ¶Fm 9Թi<}8s:u1ʚu[}gYRw`rA(r$٠OEy#vO+՗}Y7 *̬[0gjw/\`-`A8PsP=9żK=9 18K =qYRVoX1K,^ڶq@;1VK|ʷ`t ucVx˻`.ҀTP*:{3ν73Q+:tvV =3:cK:G0g s1+g:{RљE1T^y1yOyPљ'yq/Qg߫v+#vnAWt6/>tgMp*,O~ ~kbAcvz1jvi#逘ג F! {uĽnWl#9 ~ iݵh7>P8~=Y3վPTR?[8f=)l:I袎o"Z:s8Nr5/fB!B!B;endstream endobj 11 0 obj 615 endobj 12 0 obj << /Type /XObject /Subtype /Image /Width 180 /Height 180 /BitsPerComponent 8 /ColorSpace /DeviceRGB /SMask 10 0 R /Length 13 0 R /Filter /FlateDecode >> stream xnZQ/REp1=9 _6^qYIZKr^SbHtMywR[w*w~mT_>B6oFp]l ɞwʫ! P*!Fa\_Vq2mdE¸dϑz3|i0 .ÆX$<^Gtɏ#M&ƥb/Vwwi|kX4"aނ]*ս;T=(?y F7tRD#>0 #6rDˆQ#66ȶ\ڈau,&k|ɶQxәfuԲm܅ő,meG3lj)31L,d9ব҈}Y~|2 K#h'%+*8ȭxRGo;屭W4~y>ayO%o2a@φ+g]gTb]੊ߓ0O$ n)A Neendstream endobj 13 0 obj 893 endobj 19 0 obj [0 /XYZ -6720.48000 813.679999 0] endobj 20 0 obj [0 /XYZ 28.3200000 753.199999 0] endobj 21 0 obj [0 /XYZ 210.719999 753.199999 0] endobj 22 0 obj [0 /XYZ 210.719999 509.359999 0] endobj 23 0 obj [0 /XYZ 392.159999 753.199999 0] endobj 24 0 obj [0 /XYZ 392.159999 460.399999 0] endobj 25 0 obj [0 /XYZ 28.3200000 329.839999 0] endobj 26 0 obj [0 /XYZ 210.719999 315.439999 0] endobj 27 0 obj << /Type /Annot /Subtype /Link /Rect [28.3200000 770.479999 159.839999 813.679999 ] /Border [0 0 0] /A << /Type /Action /S /URI /URI (http://www.cheatography.com/) >> >> endobj 28 0 obj << /Type /Annot /Subtype /Link /Rect [187.679999 775.279999 219.359999 791.599999 ] /Border [0 0 0] /A << /Type /Action /S /URI /URI (http://www.cheatography.com/erezsh/) >> >> endobj 29 0 obj << /Type /Annot /Subtype /Link /Rect [238.559999 775.279999 406.559999 791.599999 ] /Border [0 0 0] /A << /Type /Action /S /URI /URI (http://www.cheatography.com/erezsh/cheat-sheets/lark) >> >> endobj 30 0 obj << /Type /Annot /Subtype /Link /Rect [83.0399999 72.5599999 173.279999 85.0399999 ] /Border [0 0 0] /A << /Type /Action /S /URI /URI (http://www.cheatography.com/erezsh/) >> >> endobj 31 0 obj << /Type /Annot /Subtype /Link /Rect [396.959999 60.0799999 492.959999 72.5599999 ] /Border [0 0 0] /A << /Type /Action /S /URI /URI (https://readability-score.com) >> >> endobj 32 0 obj << /__WKANCHOR_2 19 0 R /__WKANCHOR_4 20 0 R /__WKANCHOR_6 21 0 R /__WKANCHOR_a 22 0 R /__WKANCHOR_8 23 0 R /__WKANCHOR_c 24 0 R /__WKANCHOR_e 25 0 R /__WKANCHOR_g 26 0 R >> 35 0 obj <> endobj 36 0 obj <> endobj 37 0 obj <> endobj 38 0 obj <> endobj 39 0 obj <> endobj 40 0 obj <> endobj 41 0 obj <> endobj 34 0 obj <> endobj 33 0 obj <> endobj 42 0 obj << /Type /Catalog /Pages 2 0 R /Outlines 33 0 R /PageMode /UseOutlines /Dests 32 0 R >> endobj 5 0 obj << /Type /Page /Parent 2 0 R /Contents 43 0 R /Resources 45 0 R /Annots 46 0 R /MediaBox [0 0 595 842] >> endobj 45 0 obj << /ColorSpace << /PCSp 4 0 R /CSp /DeviceRGB /CSpg /DeviceGray >> /ExtGState << /GSa 3 0 R >> /Pattern << >> /Font << /F14 14 0 R /F15 15 0 R /F16 16 0 R /F17 17 0 R /F18 18 0 R >> /XObject << /Im8 8 0 R /Im12 12 0 R >> >> endobj 46 0 obj [ 27 0 R 28 0 R 29 0 R 30 0 R 31 0 R ] endobj 43 0 obj << /Length 44 0 R /Filter /FlateDecode >> stream x]K8rׯ-D 0 TWO`Xtރ%, "I@3U OoO/uSuw~!Yɾ:׶i^<<|g瓨ïF4=пS߁_mIӿ[~<Ps퇰=QszU+x_(NO&gYM=+[k[չVvt%O۱K74MݜZWBZLӸ*wz}m ئ'^'[*Q_9vS$x0v^mNZkIJ_h!?eՓG$ԧG~NoA}kR-#P_"%)Q!HI}~km{$ԧG~Nom1}ao*]{Pk=B=ڨ'롍~Խ77/*S ^1Ua}ǑLU#EoLVW4$cD꾻|My| [!McQ(߂oI4E'<>kPCBڋ$O >zTV&6GeQfqV> =Om\_`t#cooFFUU>8_14D3Iʫ )Z $]4{/k9zޒt~ Y%%?? ;I'BgK;~0. @WL:~h᧝: ImWvUQt؈ )b:>"}aoafU?|'e턷*lֈZֈo ֈo_X#oepσurF~p|q'+~(9bHe(΃N ˃n-L4-qf3Ǚ~ D= @? P̀gLxqfS)gÏ3$ fVfo_U\*|ҍvUA'~[Wa7˪paZ_ ޜ YѸ=w~%׸2m'YqFFl+ 83`3Ǚ['AN:(3O:}Rz89pk?9p׾uC?'mmŤC*^{ľ#o. |{89_3062Q-$D`z2M䔢_{]|HB{LRP8*-18o'?Rzt"п'IS1>U׈#趛VHl5nn2CJ i*-fHSupӵ3ķ/Mo^pRf}lO g;ķrf82HbHe(g[UBo6@G9.R tR? t<H0Y ęAK$؀eDp-QfCЭ؂fUa"αwIwKz{:'e#7!O?NvV$iҚ}?]iPxq?/ziBi~h[K_߉=ޚl=}y'1-۟OB'{Y1O$|}V i'?f'^S'}=p^7o;צ'}~vٽȮo;Z.]7Zvm{.Rp0 2'[|kS07ă넩0F+s;_ 1O} 1u;_lXBcVbҴ+zNBܷg_TҸ^\%IN5TpiBhҍX2)b`CkKf1\L8-^O6EVx˚J%|d'\j-h 2t0Tqf"TwvTIfZ Pɮ!(2b(HᙐjLǷʐ oM|9 Oq"'_*zah-?օ$m&eˉa3!U"B]5ֲwfܩ^,KFNxOOVCsܵ}{~;3 g;*Y*xTLdwV E%tuavQ$+#pK,205d]*  )@˃SQv$Sh[ڪ&V fY]Ly`Oe '8tk߄It!KvAopg쫴9OHĥ%FvJuV%_I B$ _f.mY/ሦ0SL9l[1v7i;v~i]`usVa4I\5>k+huȎA|K eYyff\; r)'?)|2,#)9+I6Il&D%ҫJO?MQ$u"dM8sVhH_6\rK%f ef)<6GO2)If=b>J" tKd}2#\_$[ɑb:W1ȸG!@9WFU2_FY+%Ia/W$TW U+u'и;9Uza%$EAʩX^]K͹:ٴX݀2]& }EXaD;sBE.QXad?̑[5G(HW72 J~4%Jic철V?짍Uo#RS=#wOuR m{eAUp>GZ饪ǃ-yvsw{ޛ솾unO+qGDBHpMR(Ltk,B:$ɡ=D""%%'N-%[b,ZuW<<0۲DW&,uC8yzќh*!h3NLr`~=,cNʒY%/KJ"1d?82?B~P4r\^\ׇ\xȅ+i-ˎdc"Jsт}\DeWIn HHd3?x'8&IbhCQ ;Y5u%݈P\w"r̷f$H7`Ҙ,]:ʷJȥ UsVMEg]q`>hH_\Ju哗JrEr .^dnnHVΝ,+g7 +Ri;YH):SreāUEeݹ4ZTثY(JMub"!*"|AQgIUJݔfK^NZ<;Dn &1,/Bi9N6I;|xIϺhΐYaG/3&d"U`wR 0uH(D*x\.Tm˪: fY!OcZo 'Ge2 qsrNIO8+?TN^ovT.WN5%r4hE(!-|qt:D`!_7Qi;܉l-xqH*+¤Hs63Vr'jHB Eې&al5`Ʉ|$Av仔Fkx51R/ٶ19*5 "tQ6~fCrΫ5aꁡsSiq. wj=ͳ|Ja?#.߀KUvEK7`r`Â8/{ՊQB^Ŵ0rEEi<-JC`N&p*z潷/P\-aRXxʸBNn~?Ediw69ݜ[-J؂[SJ*Rک 0Pܤ73ndsԄ[D$ŠkH /4 kʸ+ :8@VZM =Wa{ʻÈMg14IrxN$? 6 6a겵J={~AMY,( Q[u&stIrʧ =GU8 Fhe\`? \JP/dqmgst$SC Wx*93bDsNKu[%,#e[4ߥMHR7c%zPҕR/T4"ZȖΦ@Riq\c$0833o)Ӥ0,L_)/-cOp \BEuD!/ZL76`ӌ0^Z눉w xAG3 iv[rd`T:c#z-xS7:b͑`#Y:b/fb vyjfަ7aw| P~|]q.:5Jބ20ޜS;B ?) =s<0$Q_Ü+GAI.ՇNkƬ0i}.S7^m5w\ ڶqmתW[6=Tޅt{Փ}7ۉ%8!)$ǒNT´lۈvsnRa;U)lgeED"=cViz[[пBl |Ot+8W2#&[ԣbRS d\W)'N/N%Pt߹Vs ":cQqU͔ ASZy3w-geE6t`qVĭp{o.EN B,;_;TQS咁IH2z#e8:7z-U2M Px,u"{=Ssǃe#qƭ9OĢ6STÿwBǒqAp*_!4#܀%M-i apRp!{m~$jï3| 99YA~{pendstream endobj 44 0 obj 7381 endobj 47 0 obj << /Type /FontDescriptor /FontName /QVBAAA+Courier /Flags 4 /FontBBox [-48 -288 684 841 ] /ItalicAngle 0 /Ascent 841 /Descent -288 /CapHeight 841 /StemV 50 /FontFile2 48 0 R >> endobj 48 0 obj << /Length1 6368 /Length 51 0 R /Filter /FlateDecode >> stream xX PTW~4`44M4ݏA~h~ ""*""'F0΄d1'u2Nt4duS)MLʭ$qL"=5?jR5{~sw}P%)B u"(.Ϧʢ|2z]:BR;}-֫C9a}`гk+Mu`(cxB_4_a:T NQ*5ԫu6 nȰe%tEM#og!c]X:;1>lא /Ν(==vH{~씸6395puT jkL7}J P4AzEK묨9D/cW[ft/@Tpgθ\fsOXk}ot$wWt@]=PSUn, tLrTM۳*Jߒb.) sȥї]Mύ>].kHR@Dzaj-ʤ4.:IQ!3Vly춹yrۼ8*)Z+%A sD_Pqp->@`X*cl0ZCq~3-ao\DV}+(k2VL"dmKz۽ez0D/ g6V$X,UZw'qumnrMi &t)!9M5ɛ 61R?}/ H(FWu1,З7ܖSW:RfjVZx96_F? +@4Z]i3w13&2fK.k`a"`kLyl/"GϏlΜw-I) 4$RSlh$:`ק+ Ć"Yjr)1:Ly?ݍڃ8C̔=P8Cfτ0S ׈CR3w)_QcPD ޖCsoSc] ?\)WtXe&f"|K# <0Q&oC !uW.YiE KaL*"fWM9X.m]Z yR6>%ڃk;&8#ַs`ӹ`-) $8m:ԶmdҫcE`VP9Pn-s nb(}e{FvZ26km1SFWyN/RfѹΞK̓p.Y 4G 6^Ad\ݝ=k舰{bH* pɂWȋ祗8P%ʘ  '^o0>vϞbD{CنNg L(itI ǜ~8W9jMZN>HP1󫕬XլVҗXV0lR2f'dfWJ]#=*+kd$X6^!R>vug'-|1N$3-,@!.ů&Ylb4x$E镪'˧A&aWm LUmMm56kXiX#8qb4\66IE.+CDфgmgu.Ԟ3@BǾ40óe̷COA?Mĸq8,P:xJhouPdzwY“5[-"g\=(t+0Ylrw:0WQ(I?Dfȟ<9yuY1qurTV`^lZ}^jYC0lqv^c'1XՈEKiJ:wDA˷٩g(8Ёo/o`WCD l{!^QTr1~Kbb6а+ئ+sڝz}Q[vo b}4\׎;z>ad;DpMt~?2FslqÌ]^ozoʆЙ{j>?eEÏ؞QDS#DS0&#BkW]鴶8I(ZYAROm&u*=MTo*u3š`n/Q,![pY/I`X("kU $.Eh}">Ӝ-7ꟵU5;t5:&H0q75$Ϋ1ULUw ub_OoAsP^p2_,o*$.PooVBZD2mAkކ7'_] %yKB9(X $4@_ZiJ2DAJB 42 T.UC8db ~E7H$ԶҸ⁊xC+f*LN*4jɗ *+.4AX3!Ȥ|- NUҶ,{UM;3|ʇ˖3G\E$[<(1A/dHC~ u޷$B\G!u &yCw } D,L"ӨTv(Q<W _dȥN|U-}U3 &aSD\a:8:o[Tz\-;ZhctHyn]^ӟвtAᧉs$"۬*JXϱ 4rƑ@G09+Ȍ:hM_\niHa*u60 G!c~屢RGLHq2(hk=.N{jݍ%mc{3zdn>ZLONDbnsm0 R n+ϯ9D -<+sLf:!D=\¿<.b Qo0_LbSK!-x:e9^2_a4!enDu9tk@g0Ec+nfVdPcڞkX?:e] L&(۝;oWnmp$W gvWoU8Fg=Ԉdk( G@q(kq? 1clJ҈}23D"yDecnznPF1((3( pC 1r(.8ٌXnDIfnJl }GQZP%o=ӗ(wQFr~[Kq(<Š+~K݇g#d#:+m{1-D#)JrYrG4VZ.}ޒ9dn'?dTx|y|+}R,ϑsAEmG͒$/o2`T&Xץ2_*KUJQۨ>ꥶS1tR,ñSxg%=F`¿4*edR5*ۅ|݃R' Ƕ!Q;schXcendstream endobj 51 0 obj 4625 endobj 49 0 obj << /Type /Font /Subtype /CIDFontType2 /BaseFont /Courier /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> /FontDescriptor 47 0 R /CIDToGIDMap /Identity /DW 595 >> endobj 50 0 obj << /Length 728 >> stream /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << /Registry (Adobe) /Ordering (UCS) /Supplement 0 >> def /CMapName /Adobe-Identity-UCS def /CMapType 2 def 1 begincodespacerange <0000> endcodespacerange 2 beginbfrange <0000> <0000> <0000> <0001> <0034> [<0070> <0061> <0072> <0073> <0065> <003D> <0022> <006C> <0079> <0063> <006B> <0078> <0074> <006E> <0064> <006D> <0062> <0069> <0067> <0075> <0027> <0066> <006F> <002E> <005F> <0054> <0076> <0028> <0029> <003A> <0020> <0045> <0052> <004D> <002F> <0025> <007C> <003F> <005B> <005D> <002A> <002B> <007E> <0033> <0035> <007A> <0042> <0041> <0021> <002D> <003E> <0068> ] endbfrange endcmap CMapName currentdict /CMap defineresource pop end end endstream endobj 16 0 obj << /Type /Font /Subtype /Type0 /BaseFont /Courier /Encoding /Identity-H /DescendantFonts [49 0 R] /ToUnicode 50 0 R>> endobj 52 0 obj << /Type /FontDescriptor /FontName /QACAAA+TeXGyreHeros-Regular /Flags 4 /FontBBox [-529 -284 1353 1148 ] /ItalicAngle 0 /Ascent 1148 /Descent -284 /CapHeight 1148 /StemV 50 /FontFile2 53 0 R >> endobj 53 0 obj << /Length1 7720 /Length 56 0 R /Filter /FlateDecode >> stream x9 XWOkUi"(Ȯ .( IX$7DT TZTRAuuZ>::S yI:}{B*17hj8B^ٱỄRMQ9*ᘪ\$D> K N?3~x!'k E˫H}N)CA}tֆdb͇ܷ4ڻOIB^!h&QVȲ1F݂%Y)-4}2c7pq O430`ܴiߡ7d7C=LA`c&"3nD(dv~-a ]% Wɴ<~,'rR?5_Yc:{'ѣ26T+ ɕ"kTd>jR\.Cјjo&V`_iC kH;vCVsl_0˦>o|m[:t&@媋_4-O)]!kֽzD/ۋ7кdȥ͕qlWqSzPdl(tז/9+ л5ʂm:5?nEט"svBM] ϼ3uy h>roP!x<m Y(Ru2h*]ghU^f" JO 9:,H@B4[ހn龜;A1gIP{jz&4Rgܺ%=( mbgP~o.Qb쌻' r7i:{+Ə/_L?}sZ,/qɃ'2wiT/9b$fi:kWsށon{7!8CKk΂po㨘;ޢiL϶^}g;}=Sпj{k`ܤ(|nmF:nd;bbOQAOIW$JvFFPxOUO~6ȻBuG-l|ˢ'Yt[ 8^Dpz B{ak`ƺj{/ Q y?U2Y=E{jܕiWBš?VϾC[Ly= ) qƄ +7f(brے52-^=J(m(bJ ~5~@ ZgQs&2Sy}TA-g7(jYaݻYrܯE[WdُO~ 1" W _PPS43Dً-ZHOH G '8ֆ-%b߈آ%bAH2HlՈLj!4|]kgcKGG書0\f78l="/xOI9s#;i1οcl_lp4,@ZCF83|Q I c6ȡ't,I=\;]]j~YB$qSO%_KIks/;85:r:oe>E+[9v v3GQ5bN1,{O~wƿ}dl0Esx0? GB"`ɂ  D~ oa c-vQX`Dq}v'찄gtظAg yVlq_g)ӕSUS3\5(r4Bܬ4+MzEl"hiTQ(,Q$*ҍu8XdTkJZQ+5ijL ])-3)5 |h"DQ[YQK5yssJ"l)nQ( IQUAAZ]FESކd b딉99q s͢%sQHٚ)[L /lZUڧsw\aC˜kJeiᑆ Ѭ ԗ|`ȴaE-S2ao,5l 0h E"VD4QnS #FRE1ʈ3QՃLy#E| wJ,uRILH($3p8RIJĹ@&b6L1I(i"G8E- lG8R=b+D+zlEG#huQyd.RQ`xaIE K2%~e=g:4#$fC^:ktLE8S~9hg ].ٌr!=4lQ"#d#Cn[v -;D)$GkOmSf]3Q_"5p\8s8k@Hhqiؾ>q$"vYȪa-gk"סc#/0x{(ifqU;Jȳ֏-4<k/żendstream endobj 56 0 obj 5548 endobj 54 0 obj << /Type /Font /Subtype /CIDFontType2 /BaseFont /TeXGyreHeros-Regular /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> /FontDescriptor 52 0 R /CIDToGIDMap /Identity /W [0 [278 552 552 330 496 276 716 552 552 276 662 552 496 496 496 496 220 496 552 552 552 276 826 276 552 552 552 552 552 552 716 662 220 330 552 276 552 330 662 716 662 662 552 496 352 606 716 716 189 662 606 276 276 826 552 772 330 579 552 662 716 552 276 276 ] ] >> endobj 55 0 obj << /Length 805 >> stream /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << /Registry (Adobe) /Ordering (UCS) /Supplement 0 >> def /CMapName /Adobe-Identity-UCS def /CMapType 2 def 1 begincodespacerange <0000> endcodespacerange 2 beginbfrange <0000> <0000> <0000> <0001> <003F> [<004C> <0061> <0072> <006B> <0020> <0043> <0068> <0065> <0074> <0053> <0062> <0079> <007A> <0073> <0076> <0069> <0063> <006F> <0067> <0070> <002E> <006D> <002F> <0036> <0031> <0033> <0030> <0035> <0039> <0055> <0045> <006C> <0028> <0064> <0066> <0075> <0029> <0041> <0052> <0059> <004B> <006E> <0078> <0022> <0046> <0077> <0044> <0027> <0050> <0054> <002C> <0049> <004D> <0071> <0047> <002D> <003D> <0032> <0042> <004E> <0038> <0021> <003A> ] endbfrange endcmap CMapName currentdict /CMap defineresource pop end end endstream endobj 14 0 obj << /Type /Font /Subtype /Type0 /BaseFont /TeXGyreHeros-Regular /Encoding /Identity-H /DescendantFonts [54 0 R] /ToUnicode 55 0 R>> endobj 57 0 obj << /Type /FontDescriptor /FontName /QFCAAA+TeXGyreHeros-Bold /Flags 4 /FontBBox [-531 -307 1359 1125 ] /ItalicAngle 0 /Ascent 1125 /Descent -307 /CapHeight 1125 /StemV 69 /FontFile2 58 0 R >> endobj 58 0 obj << /Length1 5216 /Length 61 0 R /Filter /FlateDecode >> stream xW pSվ',Kd,[IƲ$/eX˖ddA, K!Nɤ! I%e(v'd?i` ?I^ӧѻ{|s#@& KHE!맑\ODmB&a;,22+䙛1D'a?sd fh>/luu &dJ v[㣅,꟒FTOXF|H$d2,%J-ԓNT-Re<+lX 0d:q E7 aAMzK6_'jl+yƙIqS-8 ŽjGkPLjbeWE m!{yzА=x*vd5g~eӕ̽}za)氋A掛bJI шhLB_T "bQhpNB4sN UZذU8U ++CbI *0)'eqr%gLO2˓)RDFcʠ ZJ]p]p&0S}хin멯qg10Z'0kw'ݸw<ޮg> thEt\#|R#((t\lC7[[z=cOKә !qCNLꖫ/š5T)e*_,_i`#z1;N3E*MRgeFV+88Ӭ=eV" agU~Ʊg5L%Hn\ mHѓ}h9=܋Q3hZ݀a% ϱD=3\AreP|?;XTt۷]~L4y Y )!'p @/ %Prϖo],V|qe9ptw-– +Wa6v2̯C#wY*J MJ2 %=^E=QHѓxR eR‘-s[iشƆr5iacSzi:ZV ѡ[){[&'2H> =VcRSy8`lݻ wZH~F:aP]Gu=x #ߢX>,h_[ Ӆs R T 3$ p7 0`{,JR=[;cܕy7C|f h-OEF|R2$сt"J36QJ6$Yd?K0BC: qSfp|,U93eA^{m<2+j7g%̴ƏفȪM̼xSxG*tSd̎P!*yG_yحjXۿң]VԖ(eūݥGO?lq˰0Ψnnw|>/~#f-Zsƚd.pֶjaΫxMMLϨ\tIF2ennii8xg\#\3 fZoU`R)T`/2 A[f-zNO%$<>)V4qn[|y IKVfXV/QҶSr=tj^TוcuPTͨ˺n.i0?õID4H|NK ~n>*ފҏhk!ފ[=;[QʥǏk렢?vl3ޚ~׳i˅E\9ue"TCΕ 7! Rن' :fqU&?얦.+L!Ȍ gFB`ç3@pv!]Ў],IZ:7.ab#, ts8exXEM5e!dlL,Rs> OsN)H|m,yy8GRKa߄m.|"MquK(wQ? TN/&q1c_`?bfƙI%~ے/%K^IPybw[2ŭ͒aH,Gh ɆZJd`Q:4 a Iw ~Lc{#4C%{#-!~NQ;Bǐ9aE<Aoϧwxyc%_:xGk mMb}/f{N]^Og/U[[||m:a;/ov5V>K[pYl{.e|wXgtV]iѲ=YF<t7\Ng[%ZĶ@@Apj-Oq'Dbb#M8֊R> /FontDescriptor 57 0 R /CIDToGIDMap /Identity /W [0 [278 606 552 386 552 276 772 606 330 276 606 606 552 606 552 716 330 552 772 882 716 606 276 662 716 662 606 606 276 496 606 606 552 330 276 ] ] >> endobj 60 0 obj << /Length 602 >> stream /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << /Registry (Adobe) /Ordering (UCS) /Supplement 0 >> def /CMapName /Adobe-Identity-UCS def /CMapType 2 def 1 begincodespacerange <0000> endcodespacerange 2 beginbfrange <0000> <0000> <0000> <0001> <0022> [<004C> <0061> <0072> <006B> <0020> <004F> <0070> <0074> <0069> <006F> <006E> <0073> <0054> <0065> <0052> <0066> <0063> <0047> <006D> <0044> <0075> <006C> <0050> <0041> <0053> <0068> <0067> <0049> <007A> <0064> <0062> <0079> <002D> <002E> ] endbfrange endcmap CMapName currentdict /CMap defineresource pop end end endstream endobj 15 0 obj << /Type /Font /Subtype /Type0 /BaseFont /TeXGyreHeros-Bold /Encoding /Identity-H /DescendantFonts [59 0 R] /ToUnicode 60 0 R>> endobj 62 0 obj << /Type /FontDescriptor /FontName /QKCAAA+DejaVuSans /Flags 4 /FontBBox [-1020.50781 -415.039062 1680.66406 1166.50390 ] /ItalicAngle 0 /Ascent 928.222656 /Descent -235.839843 /CapHeight 928.222656 /StemV 43.9453125 /FontFile2 63 0 R >> endobj 63 0 obj << /Length1 16036 /Length 66 0 R /Filter /FlateDecode >> stream xMl#WntKj]4uCP'$1ة=vЉٞ]{ƚ' !$qOE+W8!.ġ HHHE*|NM?H͛}J ;lVg^Nx{?+~i {P2cgl\Ep`[=> /Vȹ7f'[p-|g$89\W/~'\땟c|…ryw.2ɭ岷x/QwY_0).|\~/_~>K3<`K?Z槿/4쏦Vb-yftE.#J U ϗtMKb.ҿz],׋^sw<v+^\Hds? ޸ϲ7~ח"u{;Jؿ׋rEϱϕ<{< R^_d^]^xI/z,{Ə2p׬6f,d`WX]kux].@017d9l ح3૰2^SZ]I.@cp}3j=up|F9y8x;l]u$ 4@Ň1]ݡ{Z0> WWŵ_.vĊGq(!~*P*mpOU>&NŊ38qUqv&;pJb<z]#AR0mq|XevApx(ǁ!kG`,x l~7dy/UQ$5EhQS>u%|{&ז\MchT7<)^H>] 8l%MQ0 }Y%^IHـ0$Ů >bM!us0ޘ)j!a`v ս)KNפ_i5OwVt^;ŋz}͚LgpZŴQ8"[߅q w-MŘw >u*Puy: ,5͘.a* VX**(ץψ]QNb:2fj1į~wƔ5.pv"Kkp7? M>R-3Fa!*GOk `az.QDY僢i։唴4c+L}XlQU2ѥwa'Z@tIq<ԕ-Px$%]촵tP 2d>עXgCTMr%Xjs1֔5YπNA}?Ϭ~1Dx:ZjRGM٢{$#iHq;JRy>ԡѣ1+j䒤/?g~\UPMxL'X) 0|t| |1O6C{Hx!՜ :TW2NFd/ӧuN }%ʜ=^rVrQr1uR9Y':8كIvLK^UTbdNvLPi%EaqԺyۥ'5Ϫ7B\6Y9 ?>\oJURɁI9wHoÎM1~B7urs6gZaЊhFqWjd/JJLHg& wEO9~YAtGcti[+*UL4r֭ٷc$@?"3-L^,~S{7&ܢ\$_wYԒoՀmT}QRY?Qf'>,'>TgM|xns,&4 X6ijeqJ$ؕ`jUmU><"AKW`$P/%cOLq"8t\9r»"MS|K#//yP~Z q @eB<0# 4x ;uh :XYXB&\bp(znН;1+HD'`U$0p']Id\v'Dx7w%A0A!PN"Gu 15F˞Jݑs0Мa  .iL d2>W HNDMsB|'#œ:į,."4$%I 1'CBB74+;+2xeWB&! mx`^ #JtʸSN,N_ F]ݮ8pJãCjr:C+ up Շ +(X P K[fjv֪Vhm fӾ-Zkl7UCXlNڢը[Wo۫XffvP5AbVJQo|n7&2v0bkX@c6͵6p6-PZ[ $6 nUkl4kmA Uhk;f!Vvn[&¢u֛M]o5Ŋ+ Kf}榹$LL#ºմf-Vر޶j6A jvaa P5oHnT[e]G-Z{Z^F@!Vp2@b/BtYrclVʨE*>$ڣ%KYtxR膓H^wOB!{e:@y"r 0 j3(P<9 ǡ(C1vC=} " DrɊ?N)oOYFx~/GZu2_7^NZX<U9u\n_Φ'xA|EK̘Ӡf ?M$^?W~xdW {^aij^IWⅾ?WxWʧo]YK\KT/Kύg2q?nL\L-nIZ&>e2qz 67NLtG;0X 6>G> /FontDescriptor 62 0 R /CIDToGIDMap /Identity /W [0 [595 0 ] ] >> endobj 65 0 obj << /Length 368 >> stream /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << /Registry (Adobe) /Ordering (UCS) /Supplement 0 >> def /CMapName /Adobe-Identity-UCS def /CMapType 2 def 1 begincodespacerange <0000> endcodespacerange 2 beginbfrange <0000> <0000> <0000> <0001> <0001> <200B> endbfrange endcmap CMapName currentdict /CMap defineresource pop end end endstream endobj 17 0 obj << /Type /Font /Subtype /Type0 /BaseFont /DejaVuSans /Encoding /Identity-H /DescendantFonts [64 0 R] /ToUnicode 65 0 R>> endobj 67 0 obj << /Type /FontDescriptor /FontName /QPCAAA+DejaVuSans-Bold /Flags 4 /FontBBox [-1069.33593 -415.039062 1975.09765 1174.31640 ] /ItalicAngle 0 /Ascent 928.222656 /Descent -235.839843 /CapHeight 928.222656 /StemV 43.9453125 /FontFile2 68 0 R >> endobj 68 0 obj << /Length1 16084 /Length 71 0 R /Filter /FlateDecode >> stream xo#Wn[J@-Q+SvvuLw;'N<38Tq'ƍ?pġ-pCTR}/II~}J3lf{Rk_Ow77q;{+qO^YHutxR ק;ž߇k;C~㗟}5VYwN]>s}^}uO3V>{fn|/WuV!,ߐI!3`|dx JiEt%S vMp8.&m1@t!j0HG t=wqƪh/zX\\^yK^št0BHd$Vj;;ÛKNeyN{2N(x{u Ɋ*Irb Jl) ,,Ex2{n0_\\~H9$]$,ID\@3`(Hbp/W 3KRT֏ѕE+Q0;=Y%^ITө0H% 'HiL0=7"bJ ZHJHug’zd8.$~px]զ8Ԝ|LgpZŴQ8$[߂ݓZq w-MŘw >t+Puy: ,5͘a* VX*rQ@C\>#8֏St BD%;}Lʘq*cfQָC؉4.iSmݘ*H6&*&}J̐%:TҎɆF;?yDm죇HDeZZ'SҎ҈I,2vCqHKUGޑAh!z &_N"UuK{Z+UZY`l Id׀< Sm.ƚ:K)(Տ"OVGkT)X \ɞ>[wydtu$ (NtGI6us>G]r:t"zT3tS\+SRC ID_S"%d搏/AϤ=ffhۧ;!YJF7ى҈LeI"KZ0<\Hp/9mrQr>qlS9Y:8ف IvLK^UTbdNvLSi%E~qԺYۥ'5˪- Uلk\2B`ٔ;(ImϸDuj5}5ạTkuRFUNW &{A:}E(O(b ֐Y&|zdMe\gT*ږJVʹM%Hvl I56I?$KVD{ӈkR%{PedE35I3+:xaRun-;*kkUZTqbntDϾ\-S&ٮ:ɟi<`jfڻ<6qgX:EP& ﺖ|3llQwڡh%\xk ۩5?]kzΉӺ]<]c֍N#Wk󝀪«;vӒ:g|6 ;y:V|f݇(ԟ0J Lvnv$(xUalkoƥ_")Ŷ+߂ި o8 XCarG ,Aߺopqߒ#hW/bqGġʡAw 2z}΋D_xB 4Xؓ{b$, 9@}ة #Gҏz d@N~ :c'Fyt)hx̿p$ ( qGżq,Q^@0͝EIvcfiF!TQC %j)@a ?j'Xp@v21C`M!@D!Mىq6TzDW8H@E HWFY{";ߖj dS3!.B1 B9SmdF%Tكltzh Ѓu\4WuBk½( .[fQ X.vGbmܪ4RUӠUEBxՇU{c 2NUݲcC^*pH ]/L#p3ODfAAtb'(e7b(&n轫PS4@.YqP2)^`C> /FontDescriptor 67 0 R /CIDToGIDMap /Identity /W [0 [595 0 ] ] >> endobj 70 0 obj << /Length 368 >> stream /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << /Registry (Adobe) /Ordering (UCS) /Supplement 0 >> def /CMapName /Adobe-Identity-UCS def /CMapType 2 def 1 begincodespacerange <0000> endcodespacerange 2 beginbfrange <0000> <0000> <0000> <0001> <0001> <200B> endbfrange endcmap CMapName currentdict /CMap defineresource pop end end endstream endobj 18 0 obj << /Type /Font /Subtype /Type0 /BaseFont /DejaVuSans-Bold /Encoding /Identity-H /DescendantFonts [69 0 R] /ToUnicode 70 0 R>> endobj 2 0 obj << /Type /Pages /Kids [ 5 0 R ] /Count 1 /ProcSet [/PDF /Text /ImageB /ImageC] >> endobj xref 0 72 0000000000 65535 f 0000000009 00000 n 0000044634 00000 n 0000000217 00000 n 0000000312 00000 n 0000007584 00000 n 0000000349 00000 n 0000000795 00000 n 0000000814 00000 n 0000002775 00000 n 0000002795 00000 n 0000003583 00000 n 0000003603 00000 n 0000004682 00000 n 0000028737 00000 n 0000033912 00000 n 0000021395 00000 n 0000039193 00000 n 0000044492 00000 n 0000004702 00000 n 0000004755 00000 n 0000004807 00000 n 0000004859 00000 n 0000004911 00000 n 0000004963 00000 n 0000005015 00000 n 0000005067 00000 n 0000005119 00000 n 0000005308 00000 n 0000005504 00000 n 0000005717 00000 n 0000005913 00000 n 0000006103 00000 n 0000007417 00000 n 0000007253 00000 n 0000006286 00000 n 0000006408 00000 n 0000006561 00000 n 0000006698 00000 n 0000006843 00000 n 0000006984 00000 n 0000007127 00000 n 0000007480 00000 n 0000008000 00000 n 0000015456 00000 n 0000007705 00000 n 0000007945 00000 n 0000015477 00000 n 0000015676 00000 n 0000020412 00000 n 0000020616 00000 n 0000020391 00000 n 0000021529 00000 n 0000021746 00000 n 0000027405 00000 n 0000027881 00000 n 0000027384 00000 n 0000028884 00000 n 0000029098 00000 n 0000032902 00000 n 0000033259 00000 n 0000032881 00000 n 0000034056 00000 n 0000034316 00000 n 0000038558 00000 n 0000038774 00000 n 0000038537 00000 n 0000039330 00000 n 0000039595 00000 n 0000043852 00000 n 0000044073 00000 n 0000043831 00000 n trailer << /Size 72 /Info 1 0 R /Root 42 0 R >> startxref 44732 %%EOF lark-1.2.2/docs/_static/sppf/000077500000000000000000000000001465673407200160115ustar00rootroot00000000000000lark-1.2.2/docs/_static/sppf/sppf.html000066400000000000000000000264401465673407200176550ustar00rootroot00000000000000

Shared Packed Parse Forest (SPPF)

In the last decade there has been a lot of interest in generalized parsing techniques. These techniques can be used to generate a working parser for any context-free grammar. This means that we no longer have to massage our grammar to fit into restricted classes such as LL(k) or LR(k). Supporting all context-free grammars means that grammars can be written in a natural way, and grammars can be combined, since the class of context-free grammars is closed under composition.

One of the consequences of supporting the whole class of context-free grammars is that also ambiguous grammars are supported. In an ambiguous grammar there are sentences in the language that can be derived in multiple ways. Each derivation results in a distinct parse tree. For each additional ambiguity in the input sentence, the number of derivations might grow exponentially. Therefore generalized parsers output a parse forest, rather than a set of the parse trees. In this parse forest, often sharing is used used to reduce the total space required to represent all derivation trees. Nodes which have the same subtree are shared, and nodes are combined which correspond to different derivations of the same substring. A parse forest where sharing is employed is called a shared packed parse forest (SPPF).

This article will describe the SPPF data structure in more detail. More information about the generation of the SPPF using the GLL algorithm can be found in the paper GLL parse-tree generation by E. Scott and A. Johnstone. Right Nulled GLR parsers can also be used to generate an SPPF, which is described in the paper Right Nulled GLR Parsers by E. Scott and A. Johnstone.

There are three types of nodes in an SPPF associated with a GLL parser: symbol nodes, packed nodes, and intermediate nodes. In the visualizations symbol nodes are shown as rectangles with rounded corners, packed nodes are shown as circles, or ovals when the label is visualized, and intermediate nodes are shown as rectangles.

Symbol nodes have labels of the form $(x,j,i)$ where $x$ is a terminal, nonterminal, or $\varepsilon$ (i.e. $x \in T \cup N \cup \lbrace \varepsilon \rbrace$), and $0 \leq j \leq i \leq m$ with $m$ being the length of the input sentence. The tuple $(j,i)$ is called the extent, and denotes that the symbol $x$ has been matched on the substring from position $j$ up to position $i$. Here $j$ is called the left extent, and $i$ is called the right extent.

Packed nodes have labels of the form $(t,k)$, where $0 \leq k \leq m$. Here $k$ is called the pivot, and $t$ is of the form $X ::= \alpha \cdot \beta$. The value of $k$ represents that the last symbol of $\alpha$ ends at position $k$ of the input string. Packed nodes are used to represent multiple derivation trees. When multiple derivations are possible with the same extent, starting from the same nonterminal symbol node, a separate packed node is added to the symbol node for each derivation.

Intermediate nodes are used to binarize the SPPF. They are introduced from the left, and group the children of packed nodes in pairs from the left. The binarization ensures that the size of the SPPF is worst-case cubic in the size of the input sentence. The fact that the SPPF is binarized does not mean that each node in the SPPF has at most two children. A symbol node or intermediate node can still have as many packed node children as there are ambiguities starting from it. Intermediate nodes have labels of the form $(t,j,i)$ where $t$ is a grammar slot, and $(j,i)$ is the extent. There are no intermediate nodes of the shape $(A ::= \alpha \cdot, j,i)$, where the grammar pointer of the grammar slot is at the end of the alternate. These grammar slots are present in the form of symbol nodes.

Consider the following grammar:

$\quad S ::= ABCD \quad A ::= a \quad B ::= b \quad C ::= c \quad D ::= d. $

Then given input sentence $abcd$, the the following SPPF will be the result:

SPPF with intermediate nodes

Suppose that the intermediate nodes had not been added to the SPPF. Then the nonterminal symbol nodes for $A$, $B$, $C$, and $D$ would have been attached to the nonterminal symbol node $S$:

SPPF without intermediate nodes

This example shows how intermediate nodes ensure that the tree is binarized.

Adding cycles

Grammars that contain cycles can generate sentences which have infinitely many derivation trees. A context-free grammar is cyclic if there exists a nonterminal $A \in N$ and a derivation $A \overset{+}\Rightarrow A$. Note that a cyclic context-free grammar implies that the context-free grammar is left-recursive, but the converse does not hold. The derivation trees for a cyclic grammar are represented in the finite SPPF by introducing cycles in the graph.

Consider the following cyclic grammar: $S ::= SS \mid a \mid \varepsilon$.

Given input sentence $a$, there are infinitely many derivations. All these derivations are present in the following SPPF:

SPPF containing an infinite number of derivations

Ambiguities

A parse forest is ambiguous if and only if it contains at least one ambiguity. An ambiguity arises when a symbol node or intermediate node has at least two packed nodes as its children. Such nodes are called ambiguous. Consider for instance the following grammar with input sentence $1+1+1$: $ E ::= E + E \mid 1 $.

This gives the following SPPF:

SPPF containing an ambiguous root node

In this SPPF, symbol node $(E,0,5)$ has two packed nodes as children. This means that there are at least two different parse trees starting at this node, the parse trees representing derivations $(E+(E+E))$ and $((E+E)+E)$ respectively.

The set of all parse trees present in the SPPF is defined in the following way:

Start at the root node of the SPPF, and walk the tree by choosing one packed node below each visited node, and choosing all the children of a visited packed node in a recursive manner.

Structural Properties

There are various structural properties that are useful when reasoning about SPPFs in general. At first note that each symbol node $(E,j,i)$ with $E \in T \cup N \cup \lbrace \varepsilon \rbrace$ is unique, so an SPPF does not contain two symbol nodes $(A,k,l)$ and $(B,m,n)$ with $A = B, k = m$, and $l=n$.

Terminal symbol nodes have no children. These nodes represent the leaves of the parse forest. Nonterminal symbol nodes $(A,j,i)$ have packed node children of the form $(A ::= \gamma \cdot, k)$ with $j \leq k \leq i$, and the number of children is not limited to two.

Intermediate nodes $(t,j,i)$ have packed node children with labels of the form $(t,k)$, where $j \leq k \leq i$.

Packed nodes $(t,k)$ have one or two children. The right child is a symbol node $(x,k,i)$ and the left child (if it exists) is a symbol or intermediate node with label $(s,j,k)$, where $j \leq k \leq i$. Packed nodes have always exactly one parent which is a symbol node or intermediate node.

It is useful to observe that the SPPF is a bipartite graph, with on the one hand the set of intermediate and symbol nodes and on the other hand the set of packed nodes. Therefore edges always go from a node of the first type to a node of the second type, or the other way round. As a consequence, cyles in the SPPF are always of even length.

Transformation to an abstract syntax tree

In the end, we often want a single abstract syntax tree (AST) when parsing an input sentence. In order to arrive at this AST, we need disambiguation techniques to remove undesired parse trees from the SPPF or avoid the generation of undesired parse trees in the first place. {% cite sanden2014thesis %} describes several SPPF disambiguation filters that remove ambiguities arising in expression grammars. Furthermore a method is described to integrate parse-time filtering in GLL that tries to avoid embedding undesired parse trees in the SPPF.

Of course, other transformation might be needed such as the removal of whitespace and comments from the parse forest.

© 2016 Bram van der Sanden · Powered by the Academic theme for Hugo.

Source: Wayback Machine copy of http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/ used to be.

lark-1.2.2/docs/_static/sppf/sppf_111.svg000066400000000000000000001635451465673407200201020ustar00rootroot00000000000000 image/svg+xml(E, 0, 5) (E ::= E + • E ,0,2) (E, 2, 5) (E, 4, 5) (E ::= E + • E ,0,4) (E, 0, 1) (+, 1, 2) (1, 0, 1) (E ::= E + • E ,2,4) (E, 2, 3) (+, 3, 4) (1, 2, 3) (1, 4, 5) (E, 0, 3) lark-1.2.2/docs/_static/sppf/sppf_abcd.svg000066400000000000000000001263321465673407200204620ustar00rootroot00000000000000 image/svg+xml(S, 0, 4) (S ::= A B C • D ,0,3) (D, 3, 4) (S ::= A B • C D ,0,2) (C, 2, 3) (A, 0, 1) (B, 1, 2) (a, 0, 1) (b, 1, 2) (c, 2, 3) (d, 3, 4) lark-1.2.2/docs/_static/sppf/sppf_abcd_noint.svg000066400000000000000000001136561465673407200216760ustar00rootroot00000000000000 image/svg+xml(S, 0, 4) (A, 0, 1) (B, 1, 2) (C, 2, 3) (D, 3, 4) (a, 0, 1) (b, 1, 2) (c, 2, 3) (d, 3, 4) lark-1.2.2/docs/_static/sppf/sppf_cycle.svg000066400000000000000000001574361465673407200207010ustar00rootroot00000000000000 image/svg+xml(S, 0, 1) (S ::= a•,0) (S ::= S S•,1) (S ::= S S•,0) (a, 0, 1) (S ::= S • S ,0,1) (S, 1, 1) (S ::= S • S ,0,0) (S ::= S • S ,0) (S ::= ε •,1) (S ::= S S•,1) ( ε , 1, 1) (S ::= S • S ,1,1) (S ::= S • S ,1) (S ::= S • S ,0) (S, 0, 0) (S ::= ε •,0) (S ::= S S•,0) ( ε , 0, 0) lark-1.2.2/docs/classes.rst000066400000000000000000000050621465673407200156050ustar00rootroot00000000000000API Reference ============= Lark ---- .. autoclass:: lark.Lark :members: open, parse, parse_interactive, lex, save, load, get_terminal, open_from_package Using Unicode character classes with ``regex`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Python's builtin ``re`` module has a few persistent known bugs and also won't parse advanced regex features such as character classes. With ``pip install lark[regex]``, the ``regex`` module will be installed alongside lark and can act as a drop-in replacement to ``re``. Any instance of Lark instantiated with ``regex=True`` will use the ``regex`` module instead of ``re``. For example, we can use character classes to match PEP-3131 compliant Python identifiers: :: from lark import Lark >>> g = Lark(r""" ?start: NAME NAME: ID_START ID_CONTINUE* ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/ ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/ """, regex=True) >>> g.parse('வணக்கம்') 'வணக்கம்' Tree ---- .. autoclass:: lark.Tree :members: pretty, find_pred, find_data, iter_subtrees, scan_values, iter_subtrees_topdown, __rich__ Token ----- .. autoclass:: lark.Token Transformer, Visitor & Interpreter ---------------------------------- See :doc:`visitors`. ForestVisitor, ForestTransformer, & TreeForestTransformer ----------------------------------------------------------- See :doc:`forest`. UnexpectedInput --------------- .. autoclass:: lark.exceptions.UnexpectedInput :members: get_context, match_examples .. autoclass:: lark.exceptions.UnexpectedToken .. autoclass:: lark.exceptions.UnexpectedCharacters .. autoclass:: lark.exceptions.UnexpectedEOF InteractiveParser ----------------- .. autoclass:: lark.parsers.lalr_interactive_parser.InteractiveParser :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts, as_immutable .. autoclass:: lark.parsers.lalr_interactive_parser.ImmutableInteractiveParser :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts, as_mutable ast_utils --------- For an example of using ``ast_utils``, see `/examples/advanced/create_ast.py`_ .. autoclass:: lark.ast_utils.Ast .. autoclass:: lark.ast_utils.AsList .. autofunction:: lark.ast_utils.create_transformer .. _/examples/advanced/create_ast.py: examples/advanced/create_ast.html Indenter -------- .. autoclass:: lark.indenter.Indenter .. autoclass:: lark.indenter.PythonIndenter lark-1.2.2/docs/conf.py000066400000000000000000000125731465673407200147220ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # Lark documentation build configuration file, created by # sphinx-quickstart on Sun Aug 16 13:09:41 2020. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath('..')) autodoc_member_order = 'bysource' # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx.ext.coverage', 'recommonmark', 'sphinx_markdown_tables', 'sphinx_gallery.gen_gallery' ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = { '.rst': 'restructuredtext', '.md': 'markdown' } # The master toctree document. master_doc = 'index' # General information about the project. project = 'Lark' copyright = '2020, Erez Shinan' author = 'Erez Shinan' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '' # The full version, including alpha/beta/rc tags. release = '' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { 'prev_next_buttons_location': 'both' } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # This is required for the alabaster theme # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars html_sidebars = { '**': [ 'relations.html', # needs 'show_related': True theme option to display 'searchbox.html', ] } # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. htmlhelp_basename = 'Larkdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'Lark.tex', 'Lark Documentation', 'Erez Shinan', 'manual'), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'lark', 'Lark Documentation', [author], 7) ] # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'Lark', 'Lark Documentation', author, 'Lark', 'One line description of project.', 'Miscellaneous'), ] # -- Sphinx gallery config ------------------------------------------- sphinx_gallery_conf = { 'examples_dirs': ['../examples'], 'gallery_dirs': ['examples'], } lark-1.2.2/docs/features.md000066400000000000000000000041721465673407200155570ustar00rootroot00000000000000# Features ## Main Features - Earley parser, capable of parsing any context-free grammar - Implements SPPF, for efficient parsing and storing of ambiguous grammars. - LALR(1) parser, limited in power of expression, but very efficient in space and performance (O(n)). - Implements a parse-aware lexer that provides a better power of expression than traditional LALR implementations (such as ply). - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) - Builds a parse-tree (AST) automagically based on the grammar - Stand-alone parser generator - create a small independent parser to embed in your project. ([read more](tools.html#stand-alone-parser)) - Flexible error handling by using an interactive parser interface (LALR only) - Automatic line & column tracking (for both tokens and matched rules) - Automatic terminal collision resolution - Warns on regex collisions using the optional `interegular` library. ([read more](how_to_use.html#regex-collisions)) - Grammar composition - Import terminals and rules from other grammars (see [example](https://github.com/lark-parser/lark/tree/master/examples/composition)). - Standard library of terminals (strings, numbers, names, etc.) - Unicode fully supported - Extensive test suite - Type annotations (MyPy support) - Pure-Python implementation [Read more about the parsers](parsers.md) ## Extra features - Support for external regex module ([see here](classes.html#using-unicode-character-classes-with-regex)) - Import grammars from Nearley.js ([read more](tools.html#importing-grammars-from-nearleyjs)) - CYK parser - Visualize your parse trees as dot or png files ([see_example](https://github.com/lark-parser/lark/blob/master/examples/fruitflies.py)) - Automatic reconstruction of input from parse-tree (see [example](https://github.com/lark-parser/lark/blob/master/examples/advanced/reconstruct_json.py) and [another example](https://github.com/lark-parser/lark/blob/master/examples/advanced/reconstruct_python.py)) - Use Lark grammars in [Julia](https://github.com/jamesrhester/Lerche.jl) and [Javascript](https://github.com/lark-parser/Lark.js). lark-1.2.2/docs/forest.rst000066400000000000000000000036041465673407200154520ustar00rootroot00000000000000Working with the SPPF ===================== When parsing with Earley, Lark provides the ``ambiguity='forest'`` option to obtain the shared packed parse forest (SPPF) produced by the parser as an alternative to it being automatically converted to a tree. Lark provides a few tools to facilitate working with the SPPF. Here are some things to consider when deciding whether or not to use the SPPF. **Pros** - Efficient storage of highly ambiguous parses - Precise handling of ambiguities - Custom rule prioritizers - Ability to handle infinite ambiguities - Directly transform forest -> object instead of forest -> tree -> object **Cons** - More complex than working with a tree - SPPF may contain nodes corresponding to rules generated internally - Loss of Lark grammar features: - Rules starting with '_' are not inlined in the SPPF - Rules starting with '?' are never inlined in the SPPF - All tokens will appear in the SPPF SymbolNode ---------- .. autoclass:: lark.parsers.earley_forest.SymbolNode :members: is_ambiguous, children PackedNode ---------- .. autoclass:: lark.parsers.earley_forest.PackedNode :members: children ForestVisitor ------------- .. autoclass:: lark.parsers.earley_forest.ForestVisitor :members: visit, visit_symbol_node_in, visit_symbol_node_out, visit_packed_node_in, visit_packed_node_out, visit_token_node, on_cycle, get_cycle_in_path ForestTransformer ----------------- .. autoclass:: lark.parsers.earley_forest.ForestTransformer :members: transform, transform_symbol_node, transform_intermediate_node, transform_packed_node, transform_token_node TreeForestTransformer --------------------- .. autoclass:: lark.parsers.earley_forest.TreeForestTransformer :members: __default__, __default_token__, __default_ambig__ handles_ambiguity ----------------- .. autofunction:: lark.parsers.earley_forest.handles_ambiguity lark-1.2.2/docs/grammar.md000066400000000000000000000224441465673407200153710ustar00rootroot00000000000000# Grammar Reference ## Definitions A **grammar** is a list of rules and terminals, that together define a language. Terminals define the alphabet of the language, while rules define its structure. In Lark, a terminal may be a string, a regular expression, or a concatenation of these and other terminals. Each rule is a list of terminals and rules, whose location and nesting define the structure of the resulting parse-tree. A **parsing algorithm** is an algorithm that takes a grammar definition and a sequence of symbols (members of the alphabet), and matches the entirety of the sequence by searching for a structure that is allowed by the grammar. ### General Syntax and notes Grammars in Lark are based on [EBNF](https://en.wikipedia.org/wiki/Extended_Backus–Naur_form) syntax, with several enhancements. EBNF is basically a short-hand for common BNF patterns. Optionals are expanded: ```ebnf a b? c -> (a c | a b c) ``` Repetition is extracted into a recursion: ```ebnf a: b* -> a: _b_tag _b_tag: (_b_tag b)? ``` And so on. Lark grammars are composed of a list of definitions and directives, each on its own line. A definition is either a named rule, or a named terminal, with the following syntax, respectively: ```c rule: | etc. TERM: // Rules aren't allowed ``` **Comments** start with either `//` (C++ style) or `#` (Python style, since version 1.1.6) and last to the end of the line. Lark begins the parse with the rule 'start', unless specified otherwise in the options. Names of rules are always in lowercase, while names of terminals are always in uppercase. This distinction has practical effects, for the shape of the generated parse-tree, and the automatic construction of the lexer (aka tokenizer, or scanner). ## Terminals Terminals are used to match text into symbols. They can be defined as a combination of literals and other terminals. **Syntax:** ```html [. ] : ``` Terminal names must be uppercase. Literals can be one of: * `"string"` * `/regular expression+/` * `"case-insensitive string"i` * `/re with flags/imulx` * Literal range: `"a".."z"`, `"1".."9"`, etc. Terminals also support grammar operators, such as `|`, `+`, `*` and `?`. Terminals are a linear construct, and therefore may not contain themselves (recursion isn't allowed). ### Templates Templates are expanded when preprocessing the grammar. Definition syntax: ```ebnf my_template{param1, param2, ...}: ``` Use syntax: ```ebnf some_rule: my_template{arg1, arg2, ...} ``` Example: ```ebnf _separated{x, sep}: x (sep x)* // Define a sequence of 'x sep x sep x ...' num_list: "[" _separated{NUMBER, ","} "]" // Will match "[1, 2, 3]" etc. ``` ### Priority Terminals can be assigned a priority to influence lexing. Terminal priorities are signed integers with a default value of 0. When using a lexer, the highest priority terminals are always matched first. When using Earley's dynamic lexing, terminal priorities are used to prefer certain lexings and resolve ambiguity. ### Regexp Flags You can use flags on regexps and strings. For example: ```perl SELECT: "select"i //# Will ignore case, and match SELECT or Select, etc. MULTILINE_TEXT: /.+/s SIGNED_INTEGER: / [+-]? # the sign (0|[1-9][0-9]*) # the digits /x ``` Supported flags are one of: `imslux`. See Python's regex documentation for more details on each one. Regexps/strings of different flags can only be concatenated in Python 3.6+ #### Notes for when using a lexer: When using a lexer (basic or contextual), it is the grammar-author's responsibility to make sure the literals don't collide, or that if they do, they are matched in the desired order. Literals are matched according to the following precedence: 1. Highest priority first (priority is specified as: TERM.number: ...) 2. Length of match (for regexps, the longest theoretical match is used) 3. Length of literal / pattern definition 4. Name **Examples:** ```perl IF: "if" INTEGER : /[0-9]+/ INTEGER2 : ("0".."9")+ //# Same as INTEGER DECIMAL.2: INTEGER? "." INTEGER //# Will be matched before INTEGER WHITESPACE: (" " | /\t/ )+ SQL_SELECT: "select"i ``` ### Regular expressions & Ambiguity Each terminal is eventually compiled to a regular expression. All the operators and references inside it are mapped to their respective expressions. For example, in the following grammar, `A1` and `A2`, are equivalent: ```perl A1: "a" | "b" A2: /a|b/ ``` This means that inside terminals, Lark cannot detect or resolve ambiguity, even when using Earley. For example, for this grammar: ```perl start : (A | B)+ A : "a" | "ab" B : "b" ``` We get only one possible derivation, instead of two: ```bash >>> p = Lark(g, ambiguity="explicit") >>> p.parse("ab") Tree('start', [Token('A', 'ab')]) ``` This is happening because Python's regex engine always returns the best matching option. There is no way to access the alternatives. If you find yourself in this situation, the recommended solution is to use rules instead. Example: ```python >>> p = Lark("""start: (a | b)+ ... !a: "a" | "ab" ... !b: "b" ... """, ambiguity="explicit") >>> print(p.parse("ab").pretty()) _ambig start a ab start a a b b ``` ## Rules **Syntax:** ```html : [-> ] | ... ``` Names of rules and aliases are always in lowercase. Rule definitions can be extended to the next line by using the OR operator (signified by a pipe: `|` ). An alias is a name for the specific rule alternative. It affects tree construction. Each item is one of: * `rule` * `TERMINAL` * `"string literal"` or `/regexp literal/` * `(item item ..)` - Group items * `[item item ..]` - Maybe. Same as `(item item ..)?`, but when `maybe_placeholders=True`, generates `None` if there is no match. * `item?` - Zero or one instances of item ("maybe") * `item*` - Zero or more instances of item * `item+` - One or more instances of item * `item ~ n` - Exactly *n* instances of item * `item ~ n..m` - Between *n* to *m* instances of item (not recommended for wide ranges, due to performance issues) **Examples:** ```perl hello_world: "hello" "world" mul: (mul "*")? number //# Left-recursion is allowed and encouraged! expr: expr operator expr | value //# Multi-line, belongs to expr four_words: word ~ 4 ``` ### Priority Like terminals, rules can be assigned a priority. Rule priorities are signed integers with a default value of 0. When using LALR, the highest priority rules are used to resolve collision errors. When using Earley, rule priorities are used to resolve ambiguity. ## Directives ### %ignore All occurrences of the terminal will be ignored, and won't be part of the parse. Using the `%ignore` directive results in a cleaner grammar. It's especially important for the LALR(1) algorithm, because adding whitespace (or comments, or other extraneous elements) explicitly in the grammar, harms its predictive abilities, which are based on a lookahead of 1. **Syntax:** ```html %ignore ``` **Examples:** ```perl %ignore " " COMMENT: "#" /[^\n]/* %ignore COMMENT ``` ### %import Allows one to import terminals and rules from lark grammars. When importing rules, all their dependencies will be imported into a namespace, to avoid collisions. It's not possible to override their dependencies (e.g. like you would when inheriting a class). **Syntax:** ```html %import . %import . %import . -> %import . -> %import (, , , ) ``` If the module path is absolute, Lark will attempt to load it from the built-in directory (which currently contains `common.lark`, `python.lark`, and `unicode.lark`). If the module path is relative, such as `.path.to.file`, Lark will attempt to load it from the current working directory. Grammars must have the `.lark` extension. The rule or terminal can be imported under another name with the `->` syntax. **Example:** ```perl %import common.NUMBER %import .terminals_file (A, B, C) %import .rules_file.rulea -> ruleb ``` Note that `%ignore` directives cannot be imported. Imported rules will abide by the `%ignore` directives declared in the main grammar. ### %declare Declare a terminal without defining it. Useful for plugins. ### %override Override a rule or terminals, affecting all references to it, even in imported grammars. Useful for implementing an inheritance pattern when importing grammars. **Example:** ```perl %import my_grammar (start, number, NUMBER) // Add hex support to my_grammar %override number: NUMBER | /0x\w+/ ``` ### %extend Extend the definition of a rule or terminal, e.g. add a new option on what it can match, like when separated with `|`. Useful for splitting up a definition of a complex rule with many different options over multiple files. Can also be used to implement a plugin system where a core grammar is extended by others. **Example:** ```perl %import my_grammar (start, NUMBER) // Add hex support to my_grammar %extend NUMBER: /0x\w+/ ``` For both `%extend` and `%override`, there is not requirement for a rule/terminal to come from another file, but that is probably the most common usecase lark-1.2.2/docs/how_to_develop.md000066400000000000000000000033441465673407200167560ustar00rootroot00000000000000# How to develop Lark - Guide There are many ways you can help the project: * Help solve issues * Improve the documentation * Write new grammars for Lark's library * Write a blog post introducing Lark to your audience * Port Lark to another language * Help with code development If you're interested in taking one of these on, contact us on [Gitter](https://gitter.im/lark-parser/Lobby) or [Github Discussion](https://github.com/lark-parser/lark/discussions), and we will provide more details and assist you in the process. ## Code Style Lark does not follow a predefined code style. We accept any code style that makes sense, as long as it's Pythonic and easy to read. ## Unit Tests Lark comes with an extensive set of tests. Many of the tests will run several times, once for each parser configuration. To run the tests, just go to the lark project root, and run the command: ```bash python -m tests ``` or ```bash pypy -m tests ``` For a list of supported interpreters, you can consult the `tox.ini` file. You can also run a single unittest using its class and method name, for example: ```bash ## test_package test_class_name.test_function_name python -m tests TestLalrBasic.test_keep_all_tokens ``` ### tox To run all Unit Tests with tox, install tox and Python 2.7 up to the latest python interpreter supported (consult the file tox.ini). Then, run the command `tox` on the root of this project (where the main setup.py file is on). And, for example, if you would like to only run the Unit Tests for Python version 2.7, you can run the command `tox -e py27` ### pytest You can also run the tests using pytest: ```bash pytest tests ``` ### Using setup.py Another way to run the tests is using setup.py: ```bash python setup.py test ``` lark-1.2.2/docs/how_to_use.md000066400000000000000000000134571465673407200161220ustar00rootroot00000000000000# How To Use Lark - Guide ## Work process This is the recommended process for working with Lark: 1. Collect or create input samples, that demonstrate key features or behaviors in the language you're trying to parse. 2. Write a grammar. Try to aim for a structure that is intuitive, and in a way that imitates how you would explain your language to a fellow human. 3. Try your grammar in Lark against each input sample. Make sure the resulting parse-trees make sense. 4. Use Lark's grammar features to [shape the tree](tree_construction.md): Get rid of superfluous rules by inlining them, and use aliases when specific cases need clarification. You can perform steps 1-4 repeatedly, gradually growing your grammar to include more sentences. 5. Create a transformer to evaluate the parse-tree into a structure you'll be comfortable to work with. This may include evaluating literals, merging branches, or even converting the entire tree into your own set of AST classes. Of course, some specific use-cases may deviate from this process. Feel free to suggest these cases, and I'll add them to this page. ## Getting started Browse the [Examples](https://github.com/lark-parser/lark/tree/master/examples) to find a template that suits your purposes. Read the tutorials to get a better understanding of how everything works. (links in the [main page](/index)) Use the [Cheatsheet (PDF)](https://lark-parser.readthedocs.io/en/latest/_static/lark_cheatsheet.pdf) for quick reference. Use the reference pages for more in-depth explanations. (links in the [main page](/index)) ## Debug Grammars may contain non-obvious bugs, usually caused by rules or terminals interfering with each other in subtle ways. When trying to debug a misbehaving grammar, the following methodology is recommended: 1. Create a copy of the grammar, so you can change the parser/grammar without any worries 2. Find the minimal input that creates the error 3. Slowly remove rules from the grammar, while making sure the error still occurs. Usually, by the time you get to a minimal grammar, the problem becomes clear. But if it doesn't, feel free to ask us on gitter, or even open an issue. Post a reproducing code, with the minimal grammar and input, and we'll do our best to help. ### Regex collisions A likely source of bugs occurs when two regexes in a grammar can match the same input. If both terminals have the same priority, most lexers would arbitrarily choose the first one that matches, which isn't always the desired one. (a notable exception is the `dynamic_complete` lexer, which always tries all variations. But its users pay for that with performance.) These collisions can be hard to notice, and their effects can be difficult to debug, as they are subtle and sometimes hard to reproduce. To help with these situations, Lark can utilize a new external library called `interegular`. If it is installed, Lark uses it to check for collisions, and warn about any conflicts that it can find: ``` import logging from lark import Lark, logger logger.setLevel(logging.WARN) collision_grammar = ''' start: A | B A: /a+/ B: /[ab]+/ ''' p = Lark(collision_grammar, parser='lalr') # Output: # Collision between Terminals B and A. The lexer will choose between them arbitrarily # Example Collision: a ``` You can install interegular for Lark using `pip install 'lark[interegular]'`. Note 1: Interegular currently only runs when the lexer is `basic` or `contextual`. Note 2: Some advanced regex features, such as lookahead and lookbehind, may prevent interegular from detecting existing collisions. ### Shift/Reduce collisions By default Lark automatically resolves Shift/Reduce conflicts as Shift. It produces notifications as debug messages. when users pass `debug=True`, those notifications are written as warnings. Either way, to get the messages printed you have to configure the `logger` beforehand. For example: ```python import logging from lark import Lark, logger logger.setLevel(logging.DEBUG) collision_grammar = ''' start: as as as: a* a: "a" ''' p = Lark(collision_grammar, parser='lalr', debug=True) # Shift/Reduce conflict for terminal A: (resolving as shift) # * # Shift/Reduce conflict for terminal A: (resolving as shift) # * ``` ### Strict-Mode Lark, by default, accepts grammars with unresolved Shift/Reduce collisions (which it always resolves to shift), and regex collisions. Strict-mode allows users to validate that their grammars don't contain these collisions. When Lark is initialized with `strict=True`, it raises an exception on any Shift/Reduce or regex collision. If `interegular` isn't installed, an exception is thrown. When using strict-mode, users will be expected to resolve their collisions manually: - To resolve Shift/Reduce collisions, adjust the priority weights of the rules involved, until there are no more collisions. - To resolve regex collisions, change the involved regexes so that they can no longer both match the same input (Lark provides an example). Strict-mode only applies to LALR for now. ```python from lark import Lark collision_grammar = ''' start: as as as: a* a: "a" ''' p = Lark(collision_grammar, parser='lalr', strict=True) # Traceback (most recent call last): # ... # lark.exceptions.GrammarError: Shift/Reduce conflict for terminal A. [strict-mode] ``` ## Tools ### Stand-alone parser Lark can generate a stand-alone LALR(1) parser from a grammar. The resulting module provides the same interface as Lark, but with a fixed grammar, and reduced functionality. Run using: ```bash python -m lark.tools.standalone ``` For a play-by-play, read the [tutorial](http://blog.erezsh.com/create-a-stand-alone-lalr1-parser-in-python/) ### Import Nearley.js grammars It is possible to import Nearley grammars into Lark. The Javascript code is translated using Js2Py. See the [tools page](tools.md) for more information. lark-1.2.2/docs/ide/000077500000000000000000000000001465673407200141545ustar00rootroot00000000000000lark-1.2.2/docs/ide/app.html000066400000000000000000000043051465673407200156240ustar00rootroot00000000000000 lark-1.2.2/docs/ide/app.js000066400000000000000000000051331465673407200152740ustar00rootroot00000000000000class app { constructor(modules, invocation){ languagePluginLoader.then(() => { // If you don't require for pre-loaded Python packages, remove this promise below. window.pyodide.runPythonAsync("import setuptools, micropip").then(()=>{ window.pyodide.runPythonAsync("micropip.install('lark-parser')").then(()=>{ this.fetchSources(modules).then(() => { window.pyodide.runPythonAsync("import " + Object.keys(modules).join("\nimport ") + "\n" + invocation + "\n").then(() => this.initializingComplete()); }); }); }); }); } loadSources(module, baseURL, files) { let promises = []; for (let f in files) { promises.push( new Promise((resolve, reject) => { let file = files[f]; let url = (baseURL ? baseURL + "/" : "") + file; fetch(url, {}).then((response) => { if (response.status === 200) return response.text().then((code) => { let path = ("/lib/python3.7/site-packages/" + module + "/" + file).split("/"); let lookup = ""; for (let i in path) { if (!path[i]) { continue; } lookup += (lookup ? "/" : "") + path[i]; if (parseInt(i) === path.length - 1) { window.pyodide._module.FS.writeFile(lookup, code); console.debug(`fetched ${lookup}`); } else { try { window.pyodide._module.FS.lookupPath(lookup); } catch { window.pyodide._module.FS.mkdir(lookup); console.debug(`created ${lookup}`); } } } resolve(); }); else reject(); }); }) ); } return Promise.all(promises); } fetchSources(modules) { let promises = []; for( let module of Object.keys(modules) ) { promises.push( new Promise((resolve, reject) => { fetch(`${modules[module]}/files.json`, {}).then((response) => { if (response.status === 200) { response.text().then((list) => { let files = JSON.parse(list); this.loadSources(module, modules[module], files).then(() => { resolve(); }) }) } else { reject(); } }) })); } return Promise.all(promises).then(() => { for( let module of Object.keys(modules) ) { window.pyodide.loadedPackages[module] = "default channel"; } window.pyodide.runPython( 'import importlib as _importlib\n' + '_importlib.invalidate_caches()\n' ); }); } initializingComplete() { document.body.classList.remove("is-loading") } } (function () { window.top.app = new app({"app": "app"}, "import app.app; app.app.start()"); })(); lark-1.2.2/docs/ide/app/000077500000000000000000000000001465673407200147345ustar00rootroot00000000000000lark-1.2.2/docs/ide/app/app.py000066400000000000000000000040131465673407200160640ustar00rootroot00000000000000from . import html5 from .examples import examples from lark import Lark from lark.tree import Tree class App(html5.Div): def __init__(self): super().__init__("""

IDE

Grammar:
Input:
""") self.sinkEvent("onKeyUp", "onChange") self.parser = "earley" # Pre-load examples for name, (grammar, input) in examples.items(): option = html5.Option(name) option.grammar = grammar option.input = input self.examples.appendChild(option) def onChange(self, e): if html5.utils.doesEventHitWidgetOrChildren(e, self.examples): example = self.examples.children(self.examples["selectedIndex"]) self.grammar["value"] = example.grammar.strip() self.input["value"] = example.input.strip() self.onKeyUp() elif html5.utils.doesEventHitWidgetOrChildren(e, self.parser): self.parser = self.parser.children(self.parser["selectedIndex"])["value"] self.onKeyUp() def onKeyUp(self, e=None): l = Lark(self.grammar["value"], parser=self.parser) try: ast = l.parse(self.input["value"]) except Exception as e: self.ast.appendChild( html5.Li(str(e)), replace=True ) print(ast) traverse = lambda node: html5.Li([node.data, html5.Ul([traverse(c) for c in node.children])] if isinstance(node, Tree) else node) self.ast.appendChild(traverse(ast), replace=True) def start(): html5.Body().appendChild( App() ) lark-1.2.2/docs/ide/app/core.py000066400000000000000000002123601465673407200162420ustar00rootroot00000000000000# -*- coding: utf-8 -* ######################################################################################################################## # DOM-access functions and variables ######################################################################################################################## try: # Pyodide from js import window, eval as jseval document = window.document except: print("Emulation mode") from xml.dom.minidom import parseString jseval = None window = None document = parseString("") def domCreateAttribute(tag, ns=None): """ Creates a new HTML/SVG/... attribute :param ns: the namespace. Default: HTML. Possible values: HTML, SVG, XBL, XUL """ uri = None if ns == "SVG": uri = "http://www.w3.org/2000/svg" elif ns == "XBL": uri = "http://www.mozilla.org/xbl" elif ns == "XUL": uri = "http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul" if uri: return document.createAttribute(uri, tag) return document.createAttribute(tag) def domCreateElement(tag, ns=None): """ Creates a new HTML/SVG/... tag :param ns: the namespace. Default: HTML. Possible values: HTML, SVG, XBL, XUL """ uri = None if ns == "SVG": uri = "http://www.w3.org/2000/svg" elif ns == "XBL": uri = "http://www.mozilla.org/xbl" elif ns == "XUL": uri = "http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul" if uri: return document.createElementNS(uri, tag) return document.createElement(tag) def domCreateTextNode(txt=""): return document.createTextNode(txt) def domGetElementById(idTag): return document.getElementById(idTag) def domElementFromPoint(x, y): return document.elementFromPoint(x, y) def domGetElementsByTagName(tag): items = document.getElementsByTagName(tag) return [items.item(i) for i in range(0, int(items.length))] #pyodide interprets items.length as float, so convert to int ######################################################################################################################## # HTML Widgets ######################################################################################################################## # TextNode ------------------------------------------------------------------------------------------------------------- class TextNode(object): """ Represents a piece of text inside the DOM. This is the *only* object not deriving from "Widget", as it does not support any of its properties. """ def __init__(self, txt=None, *args, **kwargs): super().__init__() self._parent = None self._children = [] self.element = domCreateTextNode(txt or "") self._isAttached = False def _setText(self, txt): self.element.data = txt def _getText(self): return self.element.data def __str__(self): return self.element.data def onAttach(self): self._isAttached = True def onDetach(self): self._isAttached = False def _setDisabled(self, disabled): return def _getDisabled(self): return False def children(self): return [] # _WidgetClassWrapper ------------------------------------------------------------------------------------------------- class _WidgetClassWrapper(list): def __init__(self, targetWidget): super().__init__() self.targetWidget = targetWidget def _updateElem(self): if len(self) == 0: self.targetWidget.element.removeAttribute("class") else: self.targetWidget.element.setAttribute("class", " ".join(self)) def append(self, p_object): list.append(self, p_object) self._updateElem() def clear(self): list.clear(self) self._updateElem() def remove(self, value): try: list.remove(self, value) except: pass self._updateElem() def extend(self, iterable): list.extend(self, iterable) self._updateElem() def insert(self, index, p_object): list.insert(self, index, p_object) self._updateElem() def pop(self, index=None): list.pop(self, index) self._updateElem() # _WidgetDataWrapper --------------------------------------------------------------------------------------------------- class _WidgetDataWrapper(dict): def __init__(self, targetWidget): super().__init__() self.targetWidget = targetWidget alldata = targetWidget.element for data in dir(alldata.dataset): dict.__setitem__(self, data, getattr(alldata.dataset, data)) def __setitem__(self, key, value): dict.__setitem__(self, key, value) self.targetWidget.element.setAttribute(str("data-" + key), value) def update(self, E=None, **F): dict.update(self, E, **F) if E is not None and "keys" in dir(E): for key in E: self.targetWidget.element.setAttribute(str("data-" + key), E["data-" + key]) elif E: for (key, val) in E: self.targetWidget.element.setAttribute(str("data-" + key), "data-" + val) for key in F: self.targetWidget.element.setAttribute(str("data-" + key), F["data-" + key]) # _WidgetStyleWrapper -------------------------------------------------------------------------------------------------- class _WidgetStyleWrapper(dict): def __init__(self, targetWidget): super().__init__() self.targetWidget = targetWidget style = targetWidget.element.style for key in dir(style): # Convert JS-Style-Syntax to CSS Syntax (ie borderTop -> border-top) realKey = "" for currChar in key: if currChar.isupper(): realKey += "-" realKey += currChar.lower() val = style.getPropertyValue(realKey) if val: dict.__setitem__(self, realKey, val) def __setitem__(self, key, value): dict.__setitem__(self, key, value) self.targetWidget.element.style.setProperty(key, value) def update(self, E=None, **F): dict.update(self, E, **F) if E is not None and "keys" in dir(E): for key in E: self.targetWidget.element.style.setProperty(key, E[key]) elif E: for (key, val) in E: self.targetWidget.element.style.setProperty(key, val) for key in F: self.targetWidget.element.style.setProperty(key, F[key]) # Widget --------------------------------------------------------------------------------------------------------------- class Widget(object): _tagName = None _namespace = None _parserTagName = None style = [] def __init__(self, *args, appendTo=None, style=None, **kwargs): if "_wrapElem" in kwargs.keys(): self.element = kwargs["_wrapElem"] del kwargs["_wrapElem"] else: assert self._tagName is not None self.element = domCreateElement(self._tagName, ns=self._namespace) super().__init__() self._widgetClassWrapper = _WidgetClassWrapper(self) self.addClass(self.style) if style: self.addClass(style) self._children = [] self._catchedEvents = {} self._disabledState = 0 self._isAttached = False self._parent = None self._lastDisplayState = None if args: self.appendChild(*args, **kwargs) if appendTo: appendTo.appendChild(self) def sinkEvent(self, *args): for event_attrName in args: event = event_attrName.lower() if event_attrName in self._catchedEvents or event in ["onattach", "ondetach"]: continue eventFn = getattr(self, event_attrName, None) assert eventFn and callable(eventFn), "{} must provide a {} method".format(str(self), event_attrName) self._catchedEvents[event_attrName] = eventFn if event.startswith("on"): event = event[2:] self.element.addEventListener(event, eventFn) def unsinkEvent(self, *args): for event_attrName in args: event = event_attrName.lower() if event_attrName not in self._catchedEvents: continue eventFn = self._catchedEvents[event_attrName] del self._catchedEvents[event_attrName] if event.startswith("on"): event = event[2:] self.element.removeEventListener(event, eventFn) def disable(self): if not self["disabled"]: self["disabled"] = True def enable(self): if self["disabled"]: self["disabled"] = False def _getDisabled(self): return bool(self._disabledState) def _setDisabled(self, disable): for child in self._children: child._setDisabled(disable) if disable: self._disabledState += 1 self.addClass("is-disabled") if isinstance(self, _attrDisabled): self.element.disabled = True elif self._disabledState: self._disabledState -= 1 if not self._disabledState: self.removeClass("is-disabled") if isinstance(self, _attrDisabled): self.element.disabled = False def _getTargetfuncName(self, key, type): assert type in ["get", "set"] return "_{}{}{}".format(type, key[0].upper(), key[1:]) def __getitem__(self, key): funcName = self._getTargetfuncName(key, "get") if funcName in dir(self): return getattr(self, funcName)() return None def __setitem__(self, key, value): funcName = self._getTargetfuncName(key, "set") if funcName in dir(self): return getattr(self, funcName)(value) raise ValueError("{} is no valid attribute for {}".format(key, (self._tagName or str(self)))) def __str__(self): return str(self.__class__.__name__) def __iter__(self): return self._children.__iter__() def _getData(self): """ Custom data attributes are intended to store custom data private to the page or application, for which there are no more appropriate attributes or elements. :param name: :returns: """ return _WidgetDataWrapper(self) def _getTranslate(self): """ Specifies whether an elements attribute values and contents of its children are to be translated when the page is localized, or whether to leave them unchanged. :returns: True | False """ return True if self.element.translate == "yes" else False def _setTranslate(self, val): """ Specifies whether an elements attribute values and contents of its children are to be translated when the page is localized, or whether to leave them unchanged. :param val: True | False """ self.element.translate = "yes" if val == True else "no" def _getTitle(self): """ Advisory information associated with the element. :returns: str """ return self.element.title def _setTitle(self, val): """ Advisory information associated with the element. :param val: str """ self.element.title = val def _getTabindex(self): """ Specifies whether the element represents an element that is is focusable (that is, an element which is part of the sequence of focusable elements in the document), and the relative order of the element in the sequence of focusable elements in the document. :returns: number """ return self.element.getAttribute("tabindex") def _setTabindex(self, val): """ Specifies whether the element represents an element that is is focusable (that is, an element which is part of the sequence of focusable elements in the document), and the relative order of the element in the sequence of focusable elements in the document. :param val: number """ self.element.setAttribute("tabindex", val) def _getSpellcheck(self): """ Specifies whether the element represents an element whose contents are subject to spell checking and grammar checking. :returns: True | False """ return True if self.element.spellcheck == "true" else False def _setSpellcheck(self, val): """ Specifies whether the element represents an element whose contents are subject to spell checking and grammar checking. :param val: True | False """ self.element.spellcheck = str(val).lower() def _getLang(self): """ Specifies the primary language for the contents of the element and for any of the elements attributes that contain text. :returns: language tag e.g. de|en|fr|es|it|ru| """ return self.element.lang def _setLang(self, val): """ Specifies the primary language for the contents of the element and for any of the elements attributes that contain text. :param val: language tag """ self.element.lang = val def _getHidden(self): """ Specifies that the element represents an element that is not yet, or is no longer, relevant. :returns: True | False """ return True if self.element.hasAttribute("hidden") else False def _setHidden(self, val): """ Specifies that the element represents an element that is not yet, or is no longer, relevant. :param val: True | False """ if val: self.element.setAttribute("hidden", "") else: self.element.removeAttribute("hidden") def _getDropzone(self): """ Specifies what types of content can be dropped on the element, and instructs the UA about which actions to take with content when it is dropped on the element. :returns: "copy" | "move" | "link" """ return self.element.dropzone def _setDropzone(self, val): """ Specifies what types of content can be dropped on the element, and instructs the UA about which actions to take with content when it is dropped on the element. :param val: "copy" | "move" | "link" """ self.element.dropzone = val def _getDraggable(self): """ Specifies whether the element is draggable. :returns: True | False | "auto" """ return (self.element.draggable if str(self.element.draggable) == "auto" else ( True if str(self.element.draggable).lower() == "true" else False)) def _setDraggable(self, val): """ Specifies whether the element is draggable. :param val: True | False | "auto" """ self.element.draggable = str(val).lower() def _getDir(self): """ Specifies the elements text directionality. :returns: ltr | rtl | auto """ return self.element.dir def _setDir(self, val): """ Specifies the elements text directionality. :param val: ltr | rtl | auto """ self.element.dir = val def _getContextmenu(self): """ The value of the id attribute on the menu with which to associate the element as a context menu. :returns: """ return self.element.contextmenu def _setContextmenu(self, val): """ The value of the id attribute on the menu with which to associate the element as a context menu. :param val: """ self.element.contextmenu = val def _getContenteditable(self): """ Specifies whether the contents of the element are editable. :returns: True | False """ v = self.element.getAttribute("contenteditable") return str(v).lower() == "true" def _setContenteditable(self, val): """ Specifies whether the contents of the element are editable. :param val: True | False """ self.element.setAttribute("contenteditable", str(val).lower()) def _getAccesskey(self): """ A key label or list of key labels with which to associate the element; each key label represents a keyboard shortcut which UAs can use to activate the element or give focus to the element. :param self: :returns: """ return self.element.accesskey def _setAccesskey(self, val): """ A key label or list of key labels with which to associate the element; each key label represents a keyboard shortcut which UAs can use to activate the element or give focus to the element. :param self: :param val: """ self.element.accesskey = val def _getId(self): """ Specifies a unique id for an element :param self: :returns: """ return self.element.id def _setId(self, val): """ Specifies a unique id for an element :param self: :param val: """ self.element.id = val def _getClass(self): """ The class attribute specifies one or more classnames for an element. :returns: """ return self._widgetClassWrapper def _setClass(self, value): """ The class attribute specifies one or more classnames for an element. :param self: :param value: @raise ValueError: """ if value is None: self.element.setAttribute("class", " ") elif isinstance(value, str): self.element.setAttribute("class", value) elif isinstance(value, list): self.element.setAttribute("class", " ".join(value)) else: raise ValueError("Class must be a str, a List or None") def _getStyle(self): """ The style attribute specifies an inline style for an element. :param self: :returns: """ return _WidgetStyleWrapper(self) def _getRole(self): """ Specifies a role for an element @param self: @return: """ return self.element.getAttribute("role") def _setRole(self, val): """ Specifies a role for an element @param self: @param val: """ self.element.setAttribute("role", val) def hide(self): """ Hide element, if shown. :return: """ state = self["style"].get("display", "") if state != "none": self._lastDisplayState = state self["style"]["display"] = "none" def show(self): """ Show element, if hidden. :return: """ if self._lastDisplayState is not None: self["style"]["display"] = self._lastDisplayState self._lastDisplayState = None def isHidden(self): """ Checks if a widget is hidden. :return: True if hidden, False otherwise. """ return self["style"].get("display", "") == "none" def isVisible(self): """ Checks if a widget is visible. :return: True if visible, False otherwise. """ return not self.isHidden() def onBind(self, widget, name): """ Event function that is called on the widget when it is bound to another widget with a name. This is only done by the HTML parser, a manual binding by the user is not triggered. """ return def onAttach(self): self._isAttached = True for c in self._children: c.onAttach() def onDetach(self): self._isAttached = False for c in self._children: c.onDetach() def __collectChildren(self, *args, **kwargs): assert not isinstance(self, _isVoid), "<%s> can't have children!" % self._tagName if kwargs.get("bindTo") is None: kwargs["bindTo"] = self widgets = [] for arg in args: if isinstance(arg, (str, HtmlAst)): widgets.extend(fromHTML(arg, **kwargs)) elif isinstance(arg, (list, tuple)): for subarg in arg: widgets.extend(self.__collectChildren(subarg, **kwargs)) elif not isinstance(arg, (Widget, TextNode)): widgets.append(TextNode(str(arg))) else: widgets.append(arg) return widgets def insertBefore(self, insert, child, **kwargs): if not child: return self.appendChild(insert) assert child in self._children, "{} is not a child of {}".format(child, self) toInsert = self.__collectChildren(insert, **kwargs) for insert in toInsert: if insert._parent: insert._parent.removeChild(insert) self.element.insertBefore(insert.element, child.element) self._children.insert(self._children.index(child), insert) insert._parent = self if self._isAttached: insert.onAttach() return toInsert def prependChild(self, *args, **kwargs): if kwargs.get("replace", False): self.removeAllChildren() del kwargs["replace"] toPrepend = self.__collectChildren(*args, **kwargs) for child in toPrepend: if child._parent: child._parent._children.remove(child) child._parent = None if not self._children: self.appendChild(child) else: self.insertBefore(child, self.children(0)) return toPrepend def appendChild(self, *args, **kwargs): if kwargs.get("replace", False): self.removeAllChildren() del kwargs["replace"] toAppend = self.__collectChildren(*args, **kwargs) for child in toAppend: if child._parent: child._parent._children.remove(child) self._children.append(child) self.element.appendChild(child.element) child._parent = self if self._isAttached: child.onAttach() return toAppend def removeChild(self, child): assert child in self._children, "{} is not a child of {}".format(child, self) if child._isAttached: child.onDetach() self.element.removeChild(child.element) self._children.remove(child) child._parent = None def removeAllChildren(self): """ Removes all child widgets of the current widget. """ for child in self._children[:]: self.removeChild(child) def isParentOf(self, widget): """ Checks if an object is the parent of widget. :type widget: Widget :param widget: The widget to check for. :return: True, if widget is a child of the object, else False. """ # You cannot be your own child! if self == widget: return False for child in self._children: if child == widget: return True if child.isParentOf(widget): return True return False def isChildOf(self, widget): """ Checks if an object is the child of widget. :type widget: Widget :param widget: The widget to check for. :return: True, if object is a child of widget, else False. """ # You cannot be your own parent! if self == widget: return False parent = self.parent() while parent: if parent == widget: return True parent = widget.parent() return False def hasClass(self, className): """ Determine whether the current widget is assigned the given class :param className: The class name to search for. :type className: str """ if isinstance(className, str) or isinstance(className, unicode): return className in self["class"] else: raise TypeError() def addClass(self, *args): """ Adds a class or a list of classes to the current widget. If the widget already has the class, it is ignored. :param args: A list of class names. This can also be a list. :type args: list of str | list of list of str """ for item in args: if isinstance(item, list): self.addClass(*item) elif isinstance(item, str): for sitem in item.split(" "): if not self.hasClass(sitem): self["class"].append(sitem) else: raise TypeError() def removeClass(self, *args): """ Removes a class or a list of classes from the current widget. :param args: A list of class names. This can also be a list. :type args: list of str | list of list of str """ for item in args: if isinstance(item, list): self.removeClass(item) elif isinstance(item, str): for sitem in item.split(" "): if self.hasClass(sitem): self["class"].remove(sitem) else: raise TypeError() def toggleClass(self, on, off=None): """ Toggles the class ``on``. If the widget contains a class ``on``, it is toggled by ``off``. ``off`` can either be a class name that is substituted, or nothing. :param on: Classname to test for. If ``on`` does not exist, but ``off``, ``off`` is replaced by ``on``. :type on: str :param off: Classname to replace if ``on`` existed. :type off: str :return: Returns True, if ``on`` was switched, else False. :rtype: bool """ if self.hasClass(on): self.removeClass(on) if off and not self.hasClass(off): self.addClass(off) return False if off and self.hasClass(off): self.removeClass(off) self.addClass(on) return True def onBlur(self, event): pass def onChange(self, event): pass def onContextMenu(self, event): pass def onFocus(self, event): pass def onFocusIn(self, event): pass def onFocusOut(self, event): pass def onFormChange(self, event): pass def onFormInput(self, event): pass def onInput(self, event): pass def onInvalid(self, event): pass def onReset(self, event): pass def onSelect(self, event): pass def onSubmit(self, event): pass def onKeyDown(self, event): pass def onKeyPress(self, event): pass def onKeyUp(self, event): pass def onClick(self, event): pass def onDblClick(self, event): pass def onDrag(self, event): pass def onDragEnd(self, event): pass def onDragEnter(self, event): pass def onDragLeave(self, event): pass def onDragOver(self, event): pass def onDragStart(self, event): pass def onDrop(self, event): pass def onMouseDown(self, event): pass def onMouseMove(self, event): pass def onMouseOut(self, event): pass def onMouseOver(self, event): pass def onMouseUp(self, event): pass def onMouseWheel(self, event): pass def onScroll(self, event): pass def onTouchStart(self, event): pass def onTouchEnd(self, event): pass def onTouchMove(self, event): pass def onTouchCancel(self, event): pass def focus(self): self.element.focus() def blur(self): self.element.blur() def parent(self): return self._parent def children(self, n=None): """ Access children of widget. If ``n`` is omitted, it returns a list of all child-widgets; Else, it returns the N'th child, or None if its out of bounds. :param n: Optional offset of child widget to return. :type n: int :return: Returns all children or only the requested one. :rtype: list | Widget | None """ if n is None: return self._children[:] try: return self._children[n] except IndexError: return None def sortChildren(self, key): """ Sorts our direct children. They are rearranged on DOM level. Key must be a function accepting one widget as parameter and must return the key used to sort these widgets. """ self._children.sort(key=key) tmpl = self._children[:] tmpl.reverse() for c in tmpl: self.element.removeChild(c.element) self.element.insertBefore(c.element, self.element.children.item(0)) def fromHTML(self, html, appendTo=None, bindTo=None, replace=False, vars=None, **kwargs): """ Parses html and constructs its elements as part of self. :param html: HTML code. :param appendTo: The entity where the HTML code is constructed below. This defaults to self in usual case. :param bindTo: The entity where the named objects are bound to. This defaults to self in usual case. :param replace: Clear entire content of appendTo before appending. :param vars: Deprecated; Same as kwargs. :param **kwargs: Additional variables provided as a dict for {{placeholders}} inside the HTML :return: """ if appendTo is None: appendTo = self if bindTo is None: bindTo = self if replace: appendTo.removeAllChildren() # use of vars is deprecated! if isinstance(vars, dict): kwargs.update(vars) return fromHTML(html, appendTo=appendTo, bindTo=bindTo, **kwargs) ######################################################################################################################## # Attribute Collectors ######################################################################################################################## # _attrLabel --------------------------------------------------------------------------------------------------------------- class _attrLabel(object): def _getLabel(self): return self.element.getAttribute("label") def _setLabel(self, val): self.element.setAttribute("label", val) # _attrCharset -------------------------------------------------------------------------------------------------------------- class _attrCharset(object): def _getCharset(self): return self.element._attrCharset def _setCharset(self, val): self.element._attrCharset = val # _attrCite ----------------------------------------------------------------------------------------------------------------- class _attrCite(object): def _getCite(self): return self.element._attrCite def _setCite(self, val): self.element._attrCite = val class _attrDatetime(object): def _getDatetime(self): return self.element.datetime def _setDatetime(self, val): self.element.datetime = val # Form ----------------------------------------------------------------------------------------------------------------- class _attrForm(object): def _getForm(self): return self.element.form def _setForm(self, val): self.element.form = val class _attrAlt(object): def _getAlt(self): return self.element.alt def _setAlt(self, val): self.element.alt = val class _attrAutofocus(object): def _getAutofocus(self): return True if self.element.hasAttribute("autofocus") else False def _setAutofocus(self, val): if val: self.element.setAttribute("autofocus", "") else: self.element.removeAttribute("autofocus") class _attrDisabled(object): pass class _attrChecked(object): def _getChecked(self): return self.element.checked def _setChecked(self, val): self.element.checked = val class _attrIndeterminate(object): def _getIndeterminate(self): return self.element.indeterminate def _setIndeterminate(self, val): self.element.indeterminate = val class _attrName(object): def _getName(self): return self.element.getAttribute("name") def _setName(self, val): self.element.setAttribute("name", val) class _attrValue(object): def _getValue(self): return self.element.value def _setValue(self, val): self.element.value = val class _attrAutocomplete(object): def _getAutocomplete(self): return True if self.element.autocomplete == "on" else False def _setAutocomplete(self, val): self.element.autocomplete = "on" if val == True else "off" class _attrRequired(object): def _getRequired(self): return True if self.element.hasAttribute("required") else False def _setRequired(self, val): if val: self.element.setAttribute("required", "") else: self.element.removeAttribute("required") class _attrMultiple(object): def _getMultiple(self): return True if self.element.hasAttribute("multiple") else False def _setMultiple(self, val): if val: self.element.setAttribute("multiple", "") else: self.element.removeAttribute("multiple") class _attrSize(object): def _getSize(self): return self.element.size def _setSize(self, val): self.element.size = val class _attrFor(object): def _getFor(self): return self.element.getAttribute("for") def _setFor(self, val): self.element.setAttribute("for", val) class _attrInputs(_attrRequired): def _getMaxlength(self): return self.element.maxlength def _setMaxlength(self, val): self.element.maxlength = val def _getPlaceholder(self): return self.element.placeholder def _setPlaceholder(self, val): self.element.placeholder = val def _getReadonly(self): return True if self.element.hasAttribute("readonly") else False def _setReadonly(self, val): if val: self.element.setAttribute("readonly", "") else: self.element.removeAttribute("readonly") class _attrFormhead(object): def _getFormaction(self): return self.element.formaction def _setFormaction(self, val): self.element.formaction = val def _getFormenctype(self): return self.element.formenctype def _setFormenctype(self, val): self.element.formenctype = val def _getFormmethod(self): return self.element.formmethod def _setFormmethod(self, val): self.element.formmethod = val def _getFormtarget(self): return self.element.formtarget def _setFormtarget(self, val): self.element.formtarget = val def _getFormnovalidate(self): return True if self.element.hasAttribute("formnovalidate") else False def _setFormnovalidate(self, val): if val: self.element.setAttribute("formnovalidate", "") else: self.element.removeAttribute("formnovalidate") # _attrHref ----------------------------------------------------------------------------------------------------------------- class _attrHref(object): def _getHref(self): """ Url of a Page :param self: """ return self.element.href def _setHref(self, val): """ Url of a Page :param val: URL """ self.element.href = val def _getHreflang(self): return self.element.hreflang def _setHreflang(self, val): self.element.hreflang = val class _attrTarget(object): def _getTarget(self): return self.element.target def _setTarget(self, val): self.element.target = val # _attrMedia ---------------------------------------------------------------------------------------------------------------- class _attrType(object): def _getType(self): return self.element.type def _setType(self, val): self.element.type = val class _attrMedia(_attrType): def _getMedia(self): return self.element.media def _setMedia(self, val): self.element.media = val class _attrDimensions(object): def _getWidth(self): return self.element.width def _setWidth(self, val): self.element.width = val def _getHeight(self): return self.element.height def _setHeight(self, val): self.element.height = val class _attrUsemap(object): def _getUsemap(self): return self.element.usemap def _setUsemap(self, val): self.element.usemap = val class _attrMultimedia(object): def _getAutoplay(self): return True if self.element.hasAttribute("autoplay") else False def _setAutoplay(self, val): if val: self.element.setAttribute("autoplay", "") else: self.element.removeAttribute("autoplay") def _getPlaysinline(self): return True if self.element.hasAttribute("playsinline") else False def _setPlaysinline(self, val): if val: self.element.setAttribute("playsinline", "") else: self.element.removeAttribute("playsinline") def _getControls(self): return True if self.element.hasAttribute("controls") else False def _setControls(self, val): if val: self.element.setAttribute("controls", "") else: self.element.removeAttribute("controls") def _getLoop(self): return True if self.element.hasAttribute("loop") else False def _setLoop(self, val): if val: self.element.setAttribute("loop", "") else: self.element.removeAttribute("loop") def _getMuted(self): return True if self.element.hasAttribute("muted") else False def _setMuted(self, val): if val: self.element.setAttribute("muted", "") else: self.element.removeAttribute("muted") def _getPreload(self): return self.element.preload def _setPreload(self, val): self.element.preload = val # _attrRel ------------------------------------------------------------------------------------------------------------------ class _attrRel(object): def _getRel(self): return self.element.rel def _setRel(self, val): self.element.rel = val # _attrSrc ------------------------------------------------------------------------------------------------------------------ class _attrSrc(object): def _getSrc(self): return self.element.src def _setSrc(self, val): self.element.src = val # Svg ------------------------------------------------------------------------------------------------------------------ class _attrSvgViewBox(object): def _getViewbox(self): viewBox = self.element.viewBox try: return " ".join([str(x) for x in [viewBox.baseVal.x, viewBox.baseVal.y, viewBox.baseVal.width, viewBox.baseVal.height]]) except: return "" def _setViewbox(self, val): self.element.setAttribute("viewBox", val) def _getPreserveaspectratio(self): return self.element.preserveAspectRatio def _setPreserveaspectratio(self, val): self.element.setAttribute("preserveAspectRatio", val) class _attrSvgDimensions(object): def _getWidth(self): return self.element.width def _setWidth(self, val): self.element.setAttribute("width", val) def _getHeight(self): return self.element.height def _setHeight(self, val): self.element.setAttribute("height", val) def _getX(self): return self.element.x def _setX(self, val): self.element.setAttribute("x", val) def _getY(self): return self.element.y def _setY(self, val): self.element.setAttribute("y", val) def _getR(self): return self.element.r def _setR(self, val): self.element.setAttribute("r", val) def _getRx(self): return self.element.rx def _setRx(self, val): self.element.setAttribute("rx", val) def _getRy(self): return self.element.ry def _setRy(self, val): self.element.setAttribute("ry", val) def _getCx(self): return self.element.cx def _setCx(self, val): self.element.setAttribute("cx", val) def _getCy(self): return self.element.cy def _setCy(self, val): self.element.setAttribute("cy", val) class _attrSvgPoints(object): def _getPoints(self): return self.element.points def _setPoints(self, val): self.element.setAttribute("points", val) def _getX1(self): return self.element.x1 def _setX1(self, val): self.element.setAttribute("x1", val) def _getY1(self): return self.element.y1 def _setY1(self, val): self.element.setAttribute("y1", val) def _getX2(self): return self.element.x2 def _setX2(self, val): self.element.setAttribute("x2", val) def _getY2(self): return self.element.y2 def _setY2(self, val): self.element.setAttribute("y2", val) class _attrSvgTransform(object): def _getTransform(self): return self.element.transform def _setTransform(self, val): self.element.setAttribute("transform", val) class _attrSvgXlink(object): def _getXlinkhref(self): return self.element.getAttribute("xlink:href") def _setXlinkhref(self, val): self.element.setAttribute("xlink:href", val) class _attrSvgStyles(object): def _getFill(self): return self.element.fill def _setFill(self, val): self.element.setAttribute("fill", val) def _getStroke(self): return self.element.stroke def _setStroke(self, val): self.element.setAttribute("stroke", val) class _isVoid(object): pass ######################################################################################################################## # HTML Elements ######################################################################################################################## # A -------------------------------------------------------------------------------------------------------------------- class A(Widget, _attrHref, _attrTarget, _attrMedia, _attrRel, _attrName): _tagName = "a" def _getDownload(self): """ The download attribute specifies the path to a download :returns: filename """ return self.element.download def _setDownload(self, val): """ The download attribute specifies the path to a download :param val: filename """ self.element.download = val # Area ----------------------------------------------------------------------------------------------------------------- class Area(A, _attrAlt, _isVoid): _tagName = "area" def _getCoords(self): return self.element.coords def _setCoords(self, val): self.element.coords = val def _getShape(self): return self.element.shape def _setShape(self, val): self.element.shape = val # Audio ---------------------------------------------------------------------------------------------------------------- class Audio(Widget, _attrSrc, _attrMultimedia): _tagName = "audio" class Bdo(Widget): _tagName = "bdo" # Blockquote ----------------------------------------------------------------------------------------------------------- class Blockquote(Widget): _tagName = "blockquote" def _getBlockquote(self): return self.element.blockquote def _setBlockquote(self, val): self.element.blockquote = val # Body ----------------------------------------------------------------------------------------------------------------- class BodyCls(Widget): def __init__(self, *args, **kwargs): super().__init__(_wrapElem=domGetElementsByTagName("body")[0], *args, **kwargs) self._isAttached = True _body = None def Body(): global _body if _body is None: _body = BodyCls() return _body # Canvas --------------------------------------------------------------------------------------------------------------- class Canvas(Widget, _attrDimensions): _tagName = "canvas" # Command -------------------------------------------------------------------------------------------------------------- class Command(Widget, _attrLabel, _attrType, _attrDisabled, _attrChecked): _tagName = "command" def _getIcon(self): return self.element.icon def _setIcon(self, val): self.element.icon = val def _getRadiogroup(self): return self.element.radiogroup def _setRadiogroup(self, val): self.element.radiogroup = val # _Del ----------------------------------------------------------------------------------------------------------------- class _Del(Widget, _attrCite, _attrDatetime): _tagName = "_del" # Dialog -------------------------------------------------------------------------------------------------------------- class Dialog(Widget): _tagName = "dialog" def _getOpen(self): return True if self.element.hasAttribute("open") else False def _setOpen(self, val): if val: self.element.setAttribute("open", "") else: self.element.removeAttribute("open") # Elements ------------------------------------------------------------------------------------------------------------- class Abbr(Widget): _tagName = "abbr" class Address(Widget): _tagName = "address" class Article(Widget): _tagName = "article" class Aside(Widget): _tagName = "aside" class B(Widget): _tagName = "b" class Bdi(Widget): _tagName = "bdi" class Br(Widget, _isVoid): _tagName = "br" class Caption(Widget): _tagName = "caption" class Cite(Widget): _tagName = "cite" class Code(Widget): _tagName = "code" class Datalist(Widget): _tagName = "datalist" class Dfn(Widget): _tagName = "dfn" class Div(Widget): _tagName = "div" class Em(Widget): _tagName = "em" class Embed(Widget, _attrSrc, _attrType, _attrDimensions, _isVoid): _tagName = "embed" class Figcaption(Widget): _tagName = "figcaption" class Figure(Widget): _tagName = "figure" class Footer(Widget): _tagName = "footer" class Header(Widget): _tagName = "header" class H1(Widget): _tagName = "h1" class H2(Widget): _tagName = "h2" class H3(Widget): _tagName = "h3" class H4(Widget): _tagName = "h4" class H5(Widget): _tagName = "h5" class H6(Widget): _tagName = "h6" class Hr(Widget, _isVoid): _tagName = "hr" class I(Widget): _tagName = "i" class Kdb(Widget): _tagName = "kdb" class Legend(Widget): _tagName = "legend" class Mark(Widget): _tagName = "mark" class Noscript(Widget): _tagName = "noscript" class P(Widget): _tagName = "p" class Rq(Widget): _tagName = "rq" class Rt(Widget): _tagName = "rt" class Ruby(Widget): _tagName = "ruby" class S(Widget): _tagName = "s" class Samp(Widget): _tagName = "samp" class Section(Widget): _tagName = "section" class Small(Widget): _tagName = "small" class Strong(Widget): _tagName = "strong" class Sub(Widget): _tagName = "sub" class Summery(Widget): _tagName = "summery" class Sup(Widget): _tagName = "sup" class U(Widget): _tagName = "u" class Var(Widget): _tagName = "var" class Wbr(Widget): _tagName = "wbr" # Form ----------------------------------------------------------------------------------------------------------------- class Button(Widget, _attrDisabled, _attrType, _attrForm, _attrAutofocus, _attrName, _attrValue, _attrFormhead): _tagName = "button" class Fieldset(Widget, _attrDisabled, _attrForm, _attrName): _tagName = "fieldset" class Form(Widget, _attrDisabled, _attrName, _attrTarget, _attrAutocomplete): _tagName = "form" def _getNovalidate(self): return True if self.element.hasAttribute("novalidate") else False def _setNovalidate(self, val): if val: self.element.setAttribute("novalidate", "") else: self.element.removeAttribute("novalidate") def _getAction(self): return self.element.action def _setAction(self, val): self.element.action = val def _getMethod(self): return self.element.method def _setMethod(self, val): self.element.method = val def _getEnctype(self): return self.element.enctype def _setEnctype(self, val): self.element.enctype = val def _getAccept_attrCharset(self): return getattr(self.element, "accept-charset") def _setAccept_attrCharset(self, val): self.element.setAttribute("accept-charset", val) class Input(Widget, _attrDisabled, _attrType, _attrForm, _attrAlt, _attrAutofocus, _attrChecked, _attrIndeterminate, _attrName, _attrDimensions, _attrValue, _attrFormhead, _attrAutocomplete, _attrInputs, _attrMultiple, _attrSize, _attrSrc, _isVoid): _tagName = "input" def _getAccept(self): return self.element.accept def _setAccept(self, val): self.element.accept = val def _getList(self): return self.element.list def _setList(self, val): self.element.list = val def _getMax(self): return self.element.max def _setMax(self, val): self.element.max = val def _getMin(self): return self.element.min def _setMin(self, val): self.element.min = val def _getPattern(self): return self.element.pattern def _setPattern(self, val): self.element.pattern = val def _getStep(self): return self.element.step def _setStep(self, val): self.element.step = val class Label(Widget, _attrForm, _attrFor): _tagName = "label" autoIdCounter = 0 def __init__(self, *args, forElem=None, **kwargs): super().__init__(*args, **kwargs) if forElem: if not forElem["id"]: idx = Label.autoIdCounter Label.autoIdCounter += 1 forElem["id"] = "label-autoid-for-{}".format(idx) self["for"] = forElem["id"] class Optgroup(Widget, _attrDisabled, _attrLabel): _tagName = "optgroup" class Option(Widget, _attrDisabled, _attrLabel, _attrValue): _tagName = "option" def _getSelected(self): return True if self.element.selected else False def _setSelected(self, val): if val: self.element.selected = True else: self.element.selected = False class Output(Widget, _attrForm, _attrName, _attrFor): _tagName = "output" class Select(Widget, _attrDisabled, _attrForm, _attrAutofocus, _attrName, _attrRequired, _attrMultiple, _attrSize): _tagName = "select" def _getSelectedIndex(self): return self.element.selectedIndex def _getOptions(self): return self.element.options class Textarea(Widget, _attrDisabled, _attrForm, _attrAutofocus, _attrName, _attrInputs, _attrValue): _tagName = "textarea" def _getCols(self): return self.element.cols def _setCols(self, val): self.element.cols = val def _getRows(self): return self.element.rows def _setRows(self, val): self.element.rows = val def _getWrap(self): return self.element.wrap def _setWrap(self, val): self.element.wrap = val # Head ----------------------------------------------------------------------------------------------------------------- class HeadCls(Widget): def __init__(self, *args, **kwargs): super().__init__(_wrapElem=domGetElementsByTagName("head")[0], *args, **kwargs) self._isAttached = True _head = None def Head(): global _head if _head is None: _head = HeadCls() return _head # Iframe --------------------------------------------------------------------------------------------------------------- class Iframe(Widget, _attrSrc, _attrName, _attrDimensions): _tagName = "iframe" def _getSandbox(self): return self.element.sandbox def _setSandbox(self, val): self.element.sandbox = val def _getSrcdoc(self): return self.element.src def _setSrcdoc(self, val): self.element.src = val def _getSeamless(self): return True if self.element.hasAttribute("seamless") else False def _setSeamless(self, val): if val: self.element.setAttribute("seamless", "") else: self.element.removeAttribute("seamless") # Img ------------------------------------------------------------------------------------------------------------------ class Img(Widget, _attrSrc, _attrDimensions, _attrUsemap, _attrAlt, _isVoid): _tagName = "img" def __init__(self, src=None, *args, **kwargs): super().__init__() if src: self["src"] = src def _getCrossorigin(self): return self.element.crossorigin def _setCrossorigin(self, val): self.element.crossorigin = val def _getIsmap(self): return self.element.ismap def _setIsmap(self, val): self.element.ismap = val # Ins ------------------------------------------------------------------------------------------------------------------ class Ins(Widget, _attrCite, _attrDatetime): _tagName = "ins" # Keygen --------------------------------------------------------------------------------------------------------------- class Keygen(Form, _attrAutofocus, _attrDisabled): _tagName = "keygen" def _getChallenge(self): return True if self.element.hasAttribute("challenge") else False def _setChallenge(self, val): if val: self.element.setAttribute("challenge", "") else: self.element.removeAttribute("challenge") def _getKeytype(self): return self.element.keytype def _setKeytype(self, val): self.element.keytype = val # Link ----------------------------------------------------------------------------------------------------------------- class Link(Widget, _attrHref, _attrMedia, _attrRel, _isVoid): _tagName = "link" def _getSizes(self): return self.element.sizes def _setSizes(self, val): self.element.sizes = val # List ----------------------------------------------------------------------------------------------------------------- class Ul(Widget): _tagName = "ul" class Ol(Widget): _tagName = "ol" class Li(Widget): _tagName = "li" class Dl(Widget): _tagName = "dl" class Dt(Widget): _tagName = "dt" class Dd(Widget): _tagName = "dd" # Map ------------------------------------------------------------------------------------------------------------------ class Map(Label, _attrType): _tagName = "map" # Menu ----------------------------------------------------------------------------------------------------------------- class Menu(Widget): _tagName = "menu" # Meta ----------------------------------------------------------------------------------------------------------------- class Meta(Widget, _attrName, _attrCharset, _isVoid): _tagName = "meta" def _getContent(self): return self.element.content def _setContent(self, val): self.element.content = val # Meter ---------------------------------------------------------------------------------------------------------------- class Meter(Form, _attrValue): _tagName = "meter" def _getHigh(self): return self.element.high def _setHigh(self, val): self.element.high = val def _getLow(self): return self.element.low def _setLow(self, val): self.element.low = val def _getMax(self): return self.element.max def _setMax(self, val): self.element.max = val def _getMin(self): return self.element.min def _setMin(self, val): self.element.min = val def _getOptimum(self): return self.element.optimum def _setOptimum(self, val): self.element.optimum = val # Nav ------------------------------------------------------------------------------------------------------------------ class Nav(Widget): _tagName = "nav" # Object ----------------------------------------------------------------------------------------------------------------- class Object(Form, _attrType, _attrName, _attrDimensions, _attrUsemap): _tagName = "object" # Param ----------------------------------------------------------------------------------------------------------------- class Param(Widget, _attrName, _attrValue, _isVoid): _tagName = "param" # Progress ------------------------------------------------------------------------------------------------------------- class Progress(Widget, _attrValue): _tagName = "progress" def _getMax(self): return self.element.max def _setMax(self, val): self.element.max = val # Q -------------------------------------------------------------------------------------------------------------------- class Q(Widget, _attrCite): _tagName = "q" # Script ---------------------------------------------------------------------------------------------------------------- class Script(Widget, _attrSrc, _attrCharset): _tagName = "script" def _getAsync(self): return True if self.element.hasAttribute("async") else False def _setAsync(self, val): if val: self.element.setAttribute("async", "") else: self.element.removeAttribute("async") def _getDefer(self): return True if self.element.hasAttribute("defer") else False def _setDefer(self, val): if val: self.element.setAttribute("defer", "") else: self.element.removeAttribute("defer") # Source --------------------------------------------------------------------------------------------------------------- class Source(Widget, _attrMedia, _attrSrc, _isVoid): _tagName = "source" # Span ----------------------------------------------------------------------------------------------------------------- class Span(Widget): _tagName = "span" # Style ---------------------------------------------------------------------------------------------------------------- class Style(Widget, _attrMedia): _tagName = "style" def _getScoped(self): return True if self.element.hasAttribute("scoped") else False def _setScoped(self, val): if val: self.element.setAttribute("scoped", "") else: self.element.removeAttribute("scoped") # SVG ------------------------------------------------------------------------------------------------------------------ class Svg(Widget, _attrSvgViewBox, _attrSvgDimensions, _attrSvgTransform): _tagName = "svg" _namespace = "SVG" def _getVersion(self): return self.element.version def _setVersion(self, val): self.element.setAttribute("version", val) def _getXmlns(self): return self.element.xmlns def _setXmlns(self, val): self.element.setAttribute("xmlns", val) class SvgCircle(Widget, _attrSvgTransform, _attrSvgDimensions): _tagName = "circle" _namespace = "SVG" class SvgEllipse(Widget, _attrSvgTransform, _attrSvgDimensions): _tagName = "ellipse" _namespace = "SVG" class SvgG(Widget, _attrSvgTransform, _attrSvgStyles): _tagName = "g" _namespace = "SVG" def _getSvgTransform(self): return self.element.transform def _setSvgTransform(self, val): self.element.setAttribute("transform", val) class SvgImage(Widget, _attrSvgViewBox, _attrSvgDimensions, _attrSvgTransform, _attrSvgXlink): _tagName = "image" _namespace = "SVG" class SvgLine(Widget, _attrSvgTransform, _attrSvgPoints): _tagName = "line" _namespace = "SVG" class SvgPath(Widget, _attrSvgTransform): _tagName = "path" _namespace = "SVG" def _getD(self): return self.element.d def _setD(self, val): self.element.setAttribute("d", val) def _getPathLength(self): return self.element.pathLength def _setPathLength(self, val): self.element.setAttribute("pathLength", val) class SvgPolygon(Widget, _attrSvgTransform, _attrSvgPoints): _tagName = "polygon" _namespace = "SVG" class SvgPolyline(Widget, _attrSvgTransform, _attrSvgPoints): _tagName = "polyline" _namespace = "SVG" class SvgRect(Widget, _attrSvgDimensions, _attrSvgTransform, _attrSvgStyles): _tagName = "rect" _namespace = "SVG" class SvgText(Widget, _attrSvgDimensions, _attrSvgTransform, _attrSvgStyles): _tagName = "text" _namespace = "SVG" # Table ---------------------------------------------------------------------------------------------------------------- class Tr(Widget): _tagName = "tr" def _getRowspan(self): span = self.element.getAttribute("rowspan") return span if span else 1 def _setRowspan(self, span): assert span >= 1, "span may not be negative" self.element.setAttribute("rowspan", span) return self class Td(Widget): _tagName = "td" def _getColspan(self): span = self.element.getAttribute("colspan") return span if span else 1 def _setColspan(self, span): assert span >= 1, "span may not be negative" self.element.setAttribute("colspan", span) return self def _getRowspan(self): span = self.element.getAttribute("rowspan") return span if span else 1 def _setRowspan(self, span): assert span >= 1, "span may not be negative" self.element.setAttribute("rowspan", span) return self class Th(Td): _tagName = "th" class Thead(Widget): _tagName = "thead" class Tbody(Widget): _tagName = "tbody" class ColWrapper(object): def __init__(self, parentElem, *args, **kwargs): super().__init__(*args, **kwargs) self.parentElem = parentElem def __getitem__(self, item): assert isinstance(item, int), "Invalid col-number. Expected int, got {}".format(str(type(item))) if item < 0 or item > len(self.parentElem._children): return None return self.parentElem._children[item] def __setitem__(self, key, value): col = self[key] assert col is not None, "Cannot assign widget to invalid column" col.removeAllChildren() if isinstance(value, list) or isinstance(value, tuple): for el in value: if isinstance(el, Widget) or isinstance(el, TextNode): col.appendChild(value) elif isinstance(value, Widget) or isinstance(value, TextNode): col.appendChild(value) class RowWrapper(object): def __init__(self, parentElem, *args, **kwargs): super().__init__(*args, **kwargs) self.parentElem = parentElem def __getitem__(self, item): assert isinstance(item, int), "Invalid row-number. Expected int, got {}".format(str(type(item))) if item < 0 or item > len(self.parentElem._children): return None return ColWrapper(self.parentElem._children[item]) class Table(Widget): _tagName = "table" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.head = Thead() self.body = Tbody() self.appendChild(self.head) self.appendChild(self.body) def prepareRow(self, row): assert row >= 0, "Cannot create rows with negative index" for child in self.body._children: row -= child["rowspan"] if row < 0: return while row >= 0: self.body.appendChild(Tr()) row -= 1 def prepareCol(self, row, col): assert col >= 0, "Cannot create cols with negative index" self.prepareRow(row) for rowChild in self.body._children: row -= rowChild["rowspan"] if row < 0: for colChild in rowChild._children: col -= colChild["colspan"] if col < 0: return while col >= 0: rowChild.appendChild(Td()) col -= 1 return def prepareGrid(self, rows, cols): for row in range(self.getRowCount(), self.getRowCount() + rows): self.prepareCol(row, cols) def clear(self): for row in self.body._children[:]: for col in row._children[:]: row.removeChild(col) self.body.removeChild(row) def _getCell(self): return RowWrapper(self.body) def getRowCount(self): cnt = 0 for tr in self.body._children: cnt += tr["rowspan"] return cnt # Time ----------------------------------------------------------------------------------------------------------------- class Time(Widget, _attrDatetime): _tagName = "time" # Track ---------------------------------------------------------------------------------------------------------------- class Track(Label, _attrSrc, _isVoid): _tagName = "track" def _getKind(self): return self.element.kind def _setKind(self, val): self.element.kind = val def _getSrclang(self): return self.element.srclang def _setSrclang(self, val): self.element.srclang = val def _getDefault(self): return True if self.element.hasAttribute("default") else False def _setDefault(self, val): if val: self.element.setAttribute("default", "") else: self.element.removeAttribute("default") # Video ---------------------------------------------------------------------------------------------------------------- class Video(Widget, _attrSrc, _attrDimensions, _attrMultimedia): _tagName = "video" def _getPoster(self): return self.element.poster def _setPoster(self, val): self.element.poster = val ######################################################################################################################## # Utilities ######################################################################################################################## def unescape(val, maxLength=0): """ Unquotes several HTML-quoted characters in a string. :param val: The value to be unescaped. :type val: str :param maxLength: Cut-off after maxLength characters. A value of 0 means "unlimited". (default) :type maxLength: int :returns: The unquoted string. :rtype: str """ val = val \ .replace("<", "<") \ .replace(">", ">") \ .replace(""", "\"") \ .replace("'", "'") if maxLength > 0: return val[0:maxLength] return val def doesEventHitWidgetOrParents(event, widget): """ Test if event 'event' hits widget 'widget' (or *any* of its parents) """ while widget: if event.target == widget.element: return True widget = widget.parent() return False def doesEventHitWidgetOrChildren(event, widget): """ Test if event 'event' hits widget 'widget' (or *any* of its children) """ if event.target == widget.element: return True for child in widget._children: if doesEventHitWidgetOrChildren(event, child): return True return False def textToHtml(node, text): """ Generates html nodes from text by splitting text into content and into line breaks html5.Br. :param node: The node where the nodes are appended to. :param text: The text to be inserted. """ for (i, part) in enumerate(text.split("\n")): if i > 0: node.appendChild(Br()) node.appendChild(TextNode(part)) def parseInt(s, ret=0): """ Parses a value as int """ if not isinstance(s, str): return int(s) elif s: if s[0] in "+-": ts = s[1:] else: ts = s if ts and all([_ in "0123456789" for _ in ts]): return int(s) return ret def parseFloat(s, ret=0.0): """ Parses a value as float. """ if not isinstance(s, str): return float(s) elif s: if s[0] in "+-": ts = s[1:] else: ts = s if ts and ts.count(".") <= 1 and all([_ in ".0123456789" for _ in ts]): return float(s) return ret ######################################################################################################################## # Keycodes ######################################################################################################################## def getKey(event): """ Returns the Key Identifier of the given event Available Codes: https://www.w3.org/TR/2006/WD-DOM-Level-3-Events-20060413/keyset.html#KeySet-Set """ if hasattr(event, "key"): return event.key elif hasattr(event, "keyIdentifier"): if event.keyIdentifier in ["Esc", "U+001B"]: return "Escape" else: return event.keyIdentifier return None def isArrowLeft(event): return getKey(event) in ["ArrowLeft", "Left"] def isArrowUp(event): return getKey(event) in ["ArrowUp", "Up"] def isArrowRight(event): return getKey(event) in ["ArrowRight", "Right"] def isArrowDown(event): return getKey(event) in ["ArrowDown", "Down"] def isEscape(event): return getKey(event) == "Escape" def isReturn(event): return getKey(event) == "Enter" def isControl(event): # The Control (Ctrl) key. return getKey(event) == "Control" def isShift(event): return getKey(event) == "Shift" ######################################################################################################################## # HTML parser ######################################################################################################################## # Global variables required by HTML parser __tags = None __domParser = None def registerTag(tagName, widgetClass, override=True): assert issubclass(widgetClass, Widget), "widgetClass must be a sub-class of Widget!" global __tags if __tags is None: _buildTags() if not override and tagName.lower() in __tags: return attr = [] for fname in dir(widgetClass): if fname.startswith("_set"): attr.append(fname[4:].lower()) __tags[tagName.lower()] = (widgetClass, attr) def tag(cls): assert issubclass(cls, Widget) registerTag(cls._parserTagName or cls.__name__, cls) # do NOT check for cls._tagName here!!! return cls def _buildTags(debug=False): """ Generates a dictionary of all to the html5-library known tags and their associated objects and attributes. """ global __tags if __tags is not None: return if __tags is None: __tags = {} for cname in globals().keys(): if cname.startswith("_"): continue cls = globals()[cname] try: if not issubclass(cls, Widget): continue except: continue registerTag(cls._parserTagName or cls._tagName or cls.__name__, cls, override=False) if debug: for tag in sorted(__tags.keys()): print("{}: {}".format(tag, ", ".join(sorted(__tags[tag][1])))) class HtmlAst(list): pass def parseHTML(html, debug=False): """ Parses the provided HTML-code according to the objects defined in the html5-library. """ def convertEncodedText(txt): """ Convert HTML-encoded text into decoded string. The reason for this function is the handling of HTML entities, which is not properly supported by native JavaScript. We use the browser's DOM parser to to this, according to https://stackoverflow.com/questions/3700326/decode-amp-back-to-in-javascript :param txt: The encoded text. :return: The decoded text. """ global __domParser if jseval is None: return txt if __domParser is None: __domParser = jseval("new DOMParser") dom = __domParser.parseFromString("" + str(txt), "text/html") return dom.body.textContent def scanWhite(l): """ Scan and return whitespace. """ ret = "" while l and l[0] in " \t\r\n": ret += l.pop(0) return ret def scanWord(l): """ Scan and return a word. """ ret = "" while l and l[0] not in " \t\r\n" + "<>=\"'": ret += l.pop(0) return ret stack = [] # Obtain tag descriptions, if not already done! global __tags if __tags is None: _buildTags(debug=debug) # Prepare stack and input stack.append((None, None, HtmlAst())) html = [ch for ch in html] # Parse while html: tag = None text = "" # Auto-close void elements (_isVoid), e.g.
,
, etc. while stack and stack[-1][0] and issubclass(__tags[stack[-1][0]][0], _isVoid): stack.pop() if not stack: break parent = stack[-1][2] while html: ch = html.pop(0) # Comment if html and ch == "<" and "".join(html[:3]) == "!--": html = html[3:] while html and "".join(html[:3]) != "-->": html.pop(0) html = html[3:] # Opening tag elif html and ch == "<" and html[0] != "/": tag = scanWord(html) if tag.lower() in __tags: break text += ch + tag # Closing tag elif html and stack[-1][0] and ch == "<" and html[0] == "/": junk = ch junk += html.pop(0) tag = scanWord(html) junk += tag if stack[-1][0] == tag.lower(): junk += scanWhite(html) if html and html[0] == ">": html.pop(0) stack.pop() tag = None break text += junk tag = None else: text += ch # Append plain text (if not only whitespace) if (text and ((len(text) == 1 and text in ["\t "]) or not all([ch in " \t\r\n" for ch in text]))): # print("text", text) parent.append(convertEncodedText(text)) # Create tag if tag: tag = tag.lower() # print("tag", tag) elem = (tag, {}, HtmlAst()) stack.append(elem) parent.append(elem) while html: scanWhite(html) if not html: break # End of tag > if html[0] == ">": html.pop(0) break # Closing tag at end /> elif html[0] == "/": html.pop(0) scanWhite(html) if html[0] == ">": stack.pop() html.pop(0) break val = att = scanWord(html).lower() if not att: html.pop(0) continue if att in __tags[tag][1] or att in ["[name]", "style", "disabled", "hidden"] or att.startswith("data-"): scanWhite(html) if html[0] == "=": html.pop(0) scanWhite(html) if html[0] in "\"'": ch = html.pop(0) val = "" while html and html[0] != ch: val += html.pop(0) html.pop(0) if att not in elem[1]: elem[1][att] = val else: elem[1][att] += " " + val continue while stack and stack[-1][0]: stack.pop() return stack[0][2] def fromHTML(html, appendTo=None, bindTo=None, debug=False, vars=None, **kwargs): """ Parses the provided HTML code according to the objects defined in the html5-library. html can also be pre-compiled by `parseHTML()` so that it executes faster. Constructs all objects as DOM nodes. The first level is chained into appendTo. If no appendTo is provided, appendTo will be set to html5.Body(). If bindTo is provided, objects are bound to this widget. ```python from vi import html5 div = html5.Div() html5.parse.fromHTML(''' ''', div) div.myLink.appendChild("appended!") ``` """ # Handle defaults if bindTo is None: bindTo = appendTo if isinstance(html, str): html = parseHTML(html, debug=debug) assert isinstance(html, HtmlAst) if isinstance(vars, dict): kwargs.update(vars) def replaceVars(txt): for var, val in kwargs.items(): txt = txt.replace("{{%s}}" % var, str(val) if val is not None else "") return txt def interpret(parent, items): ret = [] for item in items: if isinstance(item, str): txt = TextNode(replaceVars(item)) if parent: parent.appendChild(txt) ret.append(txt) continue tag = item[0] atts = item[1] children = item[2] # Special handling for tables: A "thead" and "tbody" are already part of table! if tag in ["thead", "tbody"] and isinstance(parent, Table): wdg = getattr(parent, tag[1:]) # Usual way: Construct new element and chain it into the parent. else: wdg = __tags[tag][0]() for att, val in atts.items(): val = replaceVars(val) if att == "[name]": # Allow disable binding! if not bindTo: continue if getattr(bindTo, val, None): print("Cannot assign name '{}' because it already exists in {}".format(val, bindTo)) elif not (any([val.startswith(x) for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "_"]) and all( [x in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789" + "_" for x in val[1:]])): print("Cannot assign name '{}' because it contains invalid characters".format(val)) else: setattr(bindTo, val, wdg) wdg.onBind(bindTo, val) if debug: print("name '{}' assigned to {}".format(val, bindTo)) elif att == "class": # print(tag, att, val.split()) wdg.addClass(*val.split()) elif att == "disabled": # print(tag, att, val) if val == "disabled": wdg.disable() elif att == "hidden": # print(tag, att, val) if val == "hidden": wdg.hide() elif att == "style": for dfn in val.split(";"): if ":" not in dfn: continue att, val = dfn.split(":", 1) # print(tag, "style", att.strip(), val.strip()) wdg["style"][att.strip()] = val.strip() elif att.startswith("data-"): wdg["data"][att[5:]] = val else: wdg[att] = parseInt(val, val) interpret(wdg, children) if parent and not wdg.parent(): parent.appendChild(wdg) ret.append(wdg) return ret return interpret(appendTo, html) if __name__ == '__main__': print(globals()) lark-1.2.2/docs/ide/app/examples.py000066400000000000000000000100271465673407200171240ustar00rootroot00000000000000 # Examples formattet this way: # "name": ("grammar", "demo-input") examples = { # --- hello.lark --- "hello.lark": (""" start: WORD "," WORD "!" %import common.WORD // imports from terminal library %ignore " " // Disregard spaces in text """, "Hello, World!"), # --- calc.lark --- "calc.lark": (""" ?start: sum | NAME "=" sum -> assign_var ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | NAME -> var | "(" sum ")" %import common.CNAME -> NAME %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE""", "1 + 2 * 3 + 4"), # --- json.lark --- "json.lark": (""" ?start: value ?value: object | array | string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null array : "[" [value ("," value)*] "]" object : "{" [pair ("," pair)*] "}" pair : string ":" value string : ESCAPED_STRING %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS""", """ [ { "_id": "5edb875cf3d764da55602437", "index": 0, "guid": "3dae2206-5d4d-41fe-b81d-dc8cdba7acaa", "isActive": false, "balance": "$2,872.54", "picture": "http://placehold.it/32x32", "age": 24, "eyeColor": "blue", "name": "Theresa Vargas", "gender": "female", "company": "GEEKOL", "email": "theresavargas@geekol.com", "phone": "+1 (930) 450-3445", "address": "418 Herbert Street, Sexton, Florida, 1375", "about": "Id minim deserunt laborum enim. Veniam commodo incididunt amet aute esse duis veniam occaecat nulla esse aute et deserunt eiusmod. Anim elit ullamco minim magna sint laboris. Est consequat quis deserunt excepteur in magna pariatur laborum quis eu. Ex quis tempor elit qui qui et culpa sunt sit esse mollit cupidatat. Fugiat cillum deserunt enim minim irure reprehenderit est. Voluptate nisi quis amet quis incididunt pariatur nostrud Lorem consectetur adipisicing voluptate.\\r\\n", "registered": "2016-11-19T01:02:42 -01:00", "latitude": -25.65267, "longitude": 104.19531, "tags": [ "eiusmod", "reprehenderit", "anim", "sunt", "esse", "proident", "esse" ], "friends": [ { "id": 0, "name": "Roth Herrera" }, { "id": 1, "name": "Callie Christian" }, { "id": 2, "name": "Gracie Whitfield" } ], "greeting": "Hello, Theresa Vargas! You have 6 unread messages.", "favoriteFruit": "banana" }, { "_id": "5edb875c845eb08161a83e64", "index": 1, "guid": "a8ada2c1-e2c7-40d3-96b4-52c93baff7f0", "isActive": false, "balance": "$2,717.04", "picture": "http://placehold.it/32x32", "age": 23, "eyeColor": "green", "name": "Lily Ross", "gender": "female", "company": "RODEOMAD", "email": "lilyross@rodeomad.com", "phone": "+1 (941) 465-3561", "address": "525 Beekman Place, Blodgett, Marshall Islands, 3173", "about": "Aliquip duis proident excepteur eiusmod in quis officia consequat culpa eu et ut. Occaecat reprehenderit tempor mollit do eu magna qui et magna exercitation aliqua. Incididunt exercitation dolor proident eiusmod minim occaecat. Sunt et minim mollit et veniam sint ex. Duis ullamco elit aute eu excepteur reprehenderit officia.\\r\\n", "registered": "2019-11-02T04:06:42 -01:00", "latitude": 17.031701, "longitude": -42.657106, "tags": [ "id", "non", "culpa", "reprehenderit", "esse", "elit", "sit" ], "friends": [ { "id": 0, "name": "Ursula Maldonado" }, { "id": 1, "name": "Traci Huff" }, { "id": 2, "name": "Taylor Holt" } ], "greeting": "Hello, Lily Ross! You have 3 unread messages.", "favoriteFruit": "strawberry" } ]""") } lark-1.2.2/docs/ide/app/ext.py000066400000000000000000000276121465673407200161160ustar00rootroot00000000000000# -*- coding: utf-8 -*- from . import core as html5 from . import utils class Button(html5.Button): def __init__(self, txt=None, callback=None, className=None, *args, **kwargs): super().__init__(*args, **kwargs) self["class"] = "btn" if className: self.addClass(className) self["type"] = "button" if txt is not None: self.setText(txt) self.callback = callback self.sinkEvent("onClick") def setText(self, txt): if txt is not None: self.element.innerHTML = txt self["title"] = txt else: self.element.innerHTML = "" self["title"] = "" def onClick(self, event): event.stopPropagation() event.preventDefault() if self.callback is not None: self.callback(self) class Input(html5.Input): def __init__(self, type="text", placeholder=None, callback=None, id=None, focusCallback=None, *args, **kwargs): """ :param type: Input type. Default: "text :param placeholder: Placeholder text. Default: None :param callback: Function to be called onChanged: callback(id, value) :param id: Optional id of the input element. Will be passed to callback :return: """ super().__init__(*args, **kwargs) self["class"] = "input" self["type"] = type if placeholder is not None: self["placeholder"] = placeholder self.callback = callback if id is not None: self["id"] = id self.sinkEvent("onChange") self.focusCallback = focusCallback if focusCallback: self.sinkEvent("onFocus") def onChange(self, event): event.stopPropagation() event.preventDefault() if self.callback is not None: self.callback(self, self["id"], self["value"]) def onFocus(self, event): event.stopPropagation() event.preventDefault() if self.focusCallback is not None: self.focusCallback(self, self["id"], self["value"]) def onDetach(self): super().onDetach() self.callback = None class Popup(html5.Div): def __init__(self, title=None, id=None, className=None, icon=None, enableShortcuts=True, closeable=True, *args, **kwargs): super().__init__("""
""") self.appendChild = self.popupBody.appendChild self.fromHTML = lambda *args, **kwargs: self.popupBody.fromHTML(*args, **kwargs) if kwargs.get("bindTo") else self.popupBody.fromHTML(bindTo=self, *args, **kwargs) self["class"] = "popup popup--center is-active" if className: self.addClass(className) if closeable: closeBtn = Button("×", self.close, className="item-action") closeBtn.removeClass("btn") self.popupHeadItem.appendChild(closeBtn) if title: self.popupHeadline.appendChild(title) if icon: self.popupIcon.appendChild(icon[0]) elif title: self.popupIcon.appendChild(title[0]) else: self.popupIcon.appendChild("Vi") #fixme!!! this _LIBRARY_ is not only used in the Vi... # id can be used to pass information to callbacks self.id = id #FIXME: Implement a global overlay! One popupOverlay next to a list of popups. self.popupOverlay = html5.Div() self.popupOverlay["class"] = "popup-overlay is-active" self.enableShortcuts = enableShortcuts self.onDocumentKeyDownMethod = None self.popupOverlay.appendChild(self) html5.Body().appendChild(self.popupOverlay) #FIXME: Close/Cancel every popup with click on popupCloseBtn without removing the global overlay. def onAttach(self): super(Popup, self).onAttach() if self.enableShortcuts: self.onDocumentKeyDownMethod = self.onDocumentKeyDown # safe reference to method html5.document.addEventListener("keydown", self.onDocumentKeyDownMethod) def onDetach(self): super(Popup, self).onDetach() if self.enableShortcuts: html5.document.removeEventListener("keydown", self.onDocumentKeyDownMethod) def onDocumentKeyDown(self, event): if html5.isEscape(event): self.close() def close(self, *args, **kwargs): html5.Body().removeChild(self.popupOverlay) self.popupOverlay = None class InputDialog(Popup): def __init__(self, text, value="", successHandler=None, abortHandler=None, successLbl="OK", abortLbl="Cancel", placeholder="", *args, **kwargs): super().__init__(*args, **kwargs) self.addClass("popup--inputdialog") self.sinkEvent("onKeyDown", "onKeyUp") self.successHandler = successHandler self.abortHandler = abortHandler self.fromHTML( """
""", vars={ "text": text, "value": value, "placeholder": placeholder } ) # Cancel self.popupFoot.appendChild(Button(abortLbl, self.onCancel, className="btn--cancel btn--danger")) # Okay self.okayBtn = Button(successLbl, self.onOkay, className="btn--okay btn--primary") if not value: self.okayBtn.disable() self.popupFoot.appendChild(self.okayBtn) self.inputElem.focus() def onKeyDown(self, event): if html5.isReturn(event) and self.inputElem["value"]: event.stopPropagation() event.preventDefault() self.onOkay() def onKeyUp(self, event): if self.inputElem["value"]: self.okayBtn.enable() else: self.okayBtn.disable() def onDocumentKeyDown(self, event): if html5.isEscape(event): event.stopPropagation() event.preventDefault() self.onCancel() def onOkay(self, *args, **kwargs): if self.successHandler: self.successHandler(self, self.inputElem["value"]) self.close() def onCancel(self, *args, **kwargs): if self.abortHandler: self.abortHandler(self, self.inputElem["value"]) self.close() class Alert(Popup): """ Just displaying an alerting message box with OK-button. """ def __init__(self, msg, title=None, className=None, okCallback=None, okLabel="OK", icon="!", closeable=True, *args, **kwargs): super().__init__(title, className=None, icon=icon, closeable=closeable, *args, **kwargs) self.addClass("popup--alert") if className: self.addClass(className) self.okCallback = okCallback message = html5.Span() message.addClass("alert-msg") self.popupBody.appendChild(message) if isinstance(msg, str): msg = msg.replace("\n", "
") message.appendChild(msg, bindTo=False) self.sinkEvent("onKeyDown") if closeable: okBtn = Button(okLabel, callback=self.onOkBtnClick) okBtn.addClass("btn--okay btn--primary") self.popupFoot.appendChild(okBtn) okBtn.focus() def drop(self): self.okCallback = None self.close() def onOkBtnClick(self, sender=None): if self.okCallback: self.okCallback(self) self.drop() def onKeyDown(self, event): if html5.isReturn(event): event.stopPropagation() event.preventDefault() self.onOkBtnClick() class YesNoDialog(Popup): def __init__(self, question, title=None, yesCallback=None, noCallback=None, yesLabel="Yes", noLabel="No", icon="?", closeable=False, *args, **kwargs): super().__init__(title, closeable=closeable, icon=icon, *args, **kwargs) self.addClass("popup--yesnodialog") self.yesCallback = yesCallback self.noCallback = noCallback lbl = html5.Span() lbl["class"].append("question") self.popupBody.appendChild(lbl) if isinstance(question, html5.Widget): lbl.appendChild(question) else: utils.textToHtml(lbl, question) if len(noLabel): btnNo = Button(noLabel, className="btn--no", callback=self.onNoClicked) #btnNo["class"].append("btn--no") self.popupFoot.appendChild(btnNo) btnYes = Button(yesLabel, callback=self.onYesClicked) btnYes["class"].append("btn--yes") self.popupFoot.appendChild(btnYes) self.sinkEvent("onKeyDown") btnYes.focus() def onKeyDown(self, event): if html5.isReturn(event): event.stopPropagation() event.preventDefault() self.onYesClicked() def onDocumentKeyDown(self, event): if html5.isEscape(event): event.stopPropagation() event.preventDefault() self.onNoClicked() def drop(self): self.yesCallback = None self.noCallback = None self.close() def onYesClicked(self, *args, **kwargs): if self.yesCallback: self.yesCallback(self) self.drop() def onNoClicked(self, *args, **kwargs): if self.noCallback: self.noCallback(self) self.drop() class SelectDialog(Popup): def __init__(self, prompt, items=None, title=None, okBtn="OK", cancelBtn="Cancel", forceSelect=False, callback=None, *args, **kwargs): super().__init__(title, *args, **kwargs) self["class"].append("popup--selectdialog") self.callback = callback self.items = items assert isinstance(self.items, list) # Prompt if prompt: lbl = html5.Span() lbl["class"].append("prompt") if isinstance(prompt, html5.Widget): lbl.appendChild(prompt) else: utils.textToHtml(lbl, prompt) self.popupBody.appendChild(lbl) # Items if not forceSelect and len(items) <= 3: for idx, item in enumerate(items): if isinstance(item, dict): title = item.get("title") cssc = item.get("class") elif isinstance(item, tuple): title = item[1] cssc = None else: title = item btn = Button(title, callback=self.onAnyBtnClick) btn.idx = idx if cssc: btn.addClass(cssc) self.popupBody.appendChild(btn) else: self.select = html5.Select() self.popupBody.appendChild(self.select) for idx, item in enumerate(items): if isinstance(item, dict): title = item.get("title") elif isinstance(item, tuple): title = item[1] else: title = item opt = html5.Option(title) opt["value"] = str(idx) self.select.appendChild(opt) if okBtn: self.popupFoot.appendChild(Button(okBtn, callback=self.onOkClick)) if cancelBtn: self.popupFoot.appendChild(Button(cancelBtn, callback=self.onCancelClick)) def onAnyBtnClick(self, sender): item = self.items[sender.idx] if isinstance(item, dict) and item.get("callback") and callable(item["callback"]): item["callback"](item) if self.callback: self.callback(item) self.items = None self.close() def onCancelClick(self, sender=None): self.close() def onOkClick(self, sender=None): assert self.select["selectedIndex"] >= 0 item = self.items[int(self.select.children(self.select["selectedIndex"])["value"])] if isinstance(item, dict) and item.get("callback") and callable(item["callback"]): item["callback"](item) if self.callback: self.callback(item) self.items = None self.select = None self.close() class TextareaDialog(Popup): def __init__(self, text, value="", successHandler=None, abortHandler=None, successLbl="OK", abortLbl="Cancel", *args, **kwargs): super().__init__(*args, **kwargs) self["class"].append("popup--textareadialog") self.successHandler = successHandler self.abortHandler = abortHandler span = html5.Span() span.element.innerHTML = text self.popupBody.appendChild(span) self.inputElem = html5.Textarea() self.inputElem["value"] = value self.popupBody.appendChild(self.inputElem) okayBtn = Button(successLbl, self.onOkay) okayBtn["class"].append("btn--okay") self.popupFoot.appendChild(okayBtn) cancelBtn = Button(abortLbl, self.onCancel) cancelBtn["class"].append("btn--cancel") self.popupFoot.appendChild(cancelBtn) self.sinkEvent("onKeyDown") self.inputElem.focus() def onDocumentKeyDown(self, event): if html5.isEscape(event): event.stopPropagation() event.preventDefault() self.onCancel() def onOkay(self, *args, **kwargs): if self.successHandler: self.successHandler(self, self.inputElem["value"]) self.close() def onCancel(self, *args, **kwargs): if self.abortHandler: self.abortHandler(self, self.inputElem["value"]) self.close() lark-1.2.2/docs/ide/app/files.json000066400000000000000000000001441465673407200167300ustar00rootroot00000000000000[ "app.py", "examples.py", "html5.py", "core.py", "ext.py", "ignite.py", "utils.py" ] lark-1.2.2/docs/ide/app/html5.py000066400000000000000000000001151465673407200163340ustar00rootroot00000000000000#-*- coding: utf-8 -*- from .core import * from . import ext, utils, ignite lark-1.2.2/docs/ide/app/ignite.py000066400000000000000000000112401465673407200165630ustar00rootroot00000000000000# -*- coding: utf-8 -*- from . import core as html5 @html5.tag class Label(html5.Label): _parserTagName = "ignite-label" def __init__(self, *args, **kwargs): super(Label, self).__init__(style="label ignt-label", *args, **kwargs) @html5.tag class Input(html5.Input): _parserTagName = "ignite-input" def __init__(self, *args, **kwargs): super(Input, self).__init__(style="input ignt-input", *args, **kwargs) @html5.tag class Switch(html5.Div): _parserTagName = "ignite-switch" def __init__(self, *args, **kwargs): super(Switch, self).__init__(style="switch ignt-switch", *args, **kwargs) self.input = html5.Input(style="switch-input") self.appendChild(self.input) self.input["type"] = "checkbox" switchLabel = html5.Label(forElem=self.input) switchLabel.addClass("switch-label") self.appendChild(switchLabel) def _setChecked(self, value): self.input["checked"] = bool(value) def _getChecked(self): return self.input["checked"] @html5.tag class Check(html5.Input): _parserTagName = "ignite-check" def __init__(self, *args, **kwargs): super(Check, self).__init__(style="check ignt-check", *args, **kwargs) checkInput = html5.Input() checkInput.addClass("check-input") checkInput["type"] = "checkbox" self.appendChild(checkInput) checkLabel = html5.Label(forElem=checkInput) checkLabel.addClass("check-label") self.appendChild(checkLabel) @html5.tag class Radio(html5.Div): _parserTagName = "ignite-radio" def __init__(self, *args, **kwargs): super(Radio, self).__init__(style="radio ignt-radio", *args, **kwargs) radioInput = html5.Input() radioInput.addClass("radio-input") radioInput["type"] = "radio" self.appendChild(radioInput) radioLabel = html5.Label(forElem=radioInput) radioLabel.addClass("radio-label") self.appendChild(radioLabel) @html5.tag class Select(html5.Select): _parserTagName = "ignite-select" def __init__(self, *args, **kwargs): super(Select, self).__init__(style="select ignt-select", *args, **kwargs) defaultOpt = html5.Option() defaultOpt["selected"] = True defaultOpt["disabled"] = True defaultOpt.element.innerHTML = "" self.appendChild(defaultOpt) @html5.tag class Textarea(html5.Textarea): _parserTagName = "ignite-textarea" def __init__(self, *args, **kwargs): super(Textarea, self).__init__(style="textarea ignt-textarea", *args, **kwargs) @html5.tag class Progress(html5.Progress): _parserTagName = "ignite-progress" def __init__(self, *args, **kwargs): super(Progress, self).__init__(style="progress ignt-progress", *args, **kwargs) @html5.tag class Item(html5.Div): _parserTagName = "ignite-item" def __init__(self, title=None, descr=None, className=None, *args, **kwargs): super(Item, self).__init__(style="item ignt-item", *args, **kwargs) if className: self.addClass(className) self.fromHTML("""
""") if title: self.itemHeadline.appendChild(html5.TextNode(title)) if descr: self.itemSubline = html5.Div() self.addClass("item-subline ignt-item-subline") self.itemSubline.appendChild(html5.TextNode(descr)) self.appendChild(self.itemSubline) @html5.tag class Table(html5.Table): _parserTagName = "ignite-table" def __init__(self, *args, **kwargs): super(Table, self).__init__(*args, **kwargs) self.head.addClass("ignt-table-head") self.body.addClass("ignt-table-body") def prepareRow(self, row): assert row >= 0, "Cannot create rows with negative index" for child in self.body._children: row -= child["rowspan"] if row < 0: return while row >= 0: tableRow = html5.Tr() tableRow.addClass("ignt-table-body-row") self.body.appendChild(tableRow) row -= 1 def prepareCol(self, row, col): assert col >= 0, "Cannot create cols with negative index" self.prepareRow(row) for rowChild in self.body._children: row -= rowChild["rowspan"] if row < 0: for colChild in rowChild._children: col -= colChild["colspan"] if col < 0: return while col >= 0: tableCell = html5.Td() tableCell.addClass("ignt-table-body-cell") rowChild.appendChild(tableCell) col -= 1 return def fastGrid( self, rows, cols, createHidden=False ): colsstr = "".join(['' for i in range(0, cols)]) tblstr = '' for r in range(0, rows): tblstr += '%s' %("is-hidden" if createHidden else "",colsstr) tblstr +="" self.fromHTML(tblstr) lark-1.2.2/docs/ide/app/utils.py000066400000000000000000000037121465673407200164510ustar00rootroot00000000000000# -*- coding: utf-8 -*- from . import core as html5 def unescape(val, maxLength = 0): """ Unquotes several HTML-quoted characters in a string. :param val: The value to be unescaped. :type val: str :param maxLength: Cut-off after maxLength characters. A value of 0 means "unlimited". (default) :type maxLength: int :returns: The unquoted string. :rtype: str """ val = val \ .replace("<", "<") \ .replace(">", ">") \ .replace(""", "\"") \ .replace("'", "'") if maxLength > 0: return val[0:maxLength] return val def doesEventHitWidgetOrParents(event, widget): """ Test if event 'event' hits widget 'widget' (or *any* of its parents) """ while widget: if event.target == widget.element: return widget widget = widget.parent() return None def doesEventHitWidgetOrChildren(event, widget): """ Test if event 'event' hits widget 'widget' (or *any* of its children) """ if event.target == widget.element: return widget for child in widget.children(): if doesEventHitWidgetOrChildren(event, child): return child return None def textToHtml(node, text): """ Generates html nodes from text by splitting text into content and into line breaks html5.Br. :param node: The node where the nodes are appended to. :param text: The text to be inserted. """ for (i, part) in enumerate(text.split("\n")): if i > 0: node.appendChild(html5.Br()) node.appendChild(html5.TextNode(part)) def parseInt(s, ret = 0): """ Parses a value as int """ if not isinstance(s, str): return int(s) elif s: if s[0] in "+-": ts = s[1:] else: ts = s if ts and all([_ in "0123456789" for _ in ts]): return int(s) return ret def parseFloat(s, ret = 0.0): """ Parses a value as float. """ if not isinstance(s, str): return float(s) elif s: if s[0] in "+-": ts = s[1:] else: ts = s if ts and ts.count(".") <= 1 and all([_ in ".0123456789" for _ in ts]): return float(s) return ret lark-1.2.2/docs/ide/is-loading.gif000066400000000000000000000006241465673407200166730ustar00rootroot00000000000000GIF89a+ 1e}1! NETSCAPE2.0!Created with ajaxload.info! ,+ 2˖؃ V˅6XYf'}ޚ g-Iׇ.ij> 2L! ,+ =Ď acǬ|adʡJCJp,kͱ$eoX}N,bRǯ! ,+ =0acj[o yyԠ Lc6R50G,bR! ,+ @˖ؘ~eL4GQ*t'Ů6dC]+JvE`";lark-1.2.2/docs/ide/lark-logo.png000066400000000000000000000327121465673407200165560ustar00rootroot00000000000000PNG  IHDRb zTXtRaw profile type exifxڭmr9D{$8e؎uXݪb@"3Q?j5V{K_?by~??KU_2H㡽^(}.dqyOP?76^hO|By+a#K[Q99n_}~+\,y/FB{&$| _oɗ>#廊?"gD1vynJFQO28Iy~.g{^W#.J㊓J= U!H7}EE"Kse.++]΍-9,J;ĩX,q߾?}'p%O&O0r(H׻`U*OqV~9OyP "25eM5Ec@%T &H)9W &M\c9WTaB(eԦAJQcYV5mAkZ+D$7,[1jfͺ[ijZoKpڭ758pd̳Lulϱ*KW]klyCnm=N 8N; nnϪUR~}Vct^3*&%Qq hŖJ,v)TR6a'%,'Y*[u ~n/ݿQ@~O]SWzNcq̬kKZlN)P}G!<^sbȣ(ٳRjݢƺ}G'wtsҤZ Di*{%ه TiTfmɷ#HOl̴G&u417hI={T8*ː&q(+srYm,-H@Zwk'D$E~`y~UL -a$15u3 !Nu7Njz.p֐bvHJc_u*E gU<@;8P%ApUW7J;8Bd\ 4^@hWMoD8wSh)Un]wƥdlvB!DB9CɜEL`#ym[hr&)$OBNtpz]}TIoy=W];܉\I`KAgpSizS5ZW`Tz\NE :}F߅@2]g6%'ϝm aWGZPa^j>tQ@ #0~c¦b1۞# og9{Awq›` U6mUWK; I964ܺ$y~&`9wm%gM392'YqVsEW%'b@r#O!M=ϦCN-J{=Q *KIP Zp+d|*,Acd-Z*laey׀M?[Fdowj˅F HZ*t eHĶ\S0AU<>[P iFw!'ȩ]|¦6/ (_ϲyWN6H#wNgF7t'Z3,.xx*=QQ 4Aa},gNe1>F=-~XVvƉV/ Q3EсI/mwfH"@n8m#uRVe봙nyO F@pEgRLMEl-g2USK }C׳KEjoUVSzc.JЄmaI2;g) k{{~<ҋ-b pV'[bBj@aH*m%T_1*" Q$"fRmS4 Cњ5adx eדv=rpҹToF z\|&ۃQ>>'5PEAe\EvZqv${ ѳ(`ء% nt(}&-a{+Xֲg(EV](&)H3l2ZsobWikm܅"(8!3)x;pJ9Cj^pd N2wr!:\OSރNQ{L{ϞpрڨtqyP*Zm`jp@uvCC]z/k%HgrJ.ƌ_0#u3z`Zqb13 sX+6-s.gU]]0z7JGaGt0!ݐ6(.|d,Efd.$9?b +G5,b\3*(iy (Gwc~V:ۏ3]/zzxk18WX 9tq!:&n`6vp>ge]n'h@CbI |Ѯ{B -i79)&Ox uϸGKqwsX>r DWd&Y 7B&QRgI/M3<$4]f aYݯc`E=u r_u3,.327v\?cYkUv"V|q%gGWفCE_lH?L8Јd 6Q8`GVf|:b2:8)r 6çIf\3ck®m>EC)yng2FI ez.2ўX05AQ+=z`<}M: Tf(p0г =Cl7f,2/z^3i9v~TiG` 8Hx͙|{`<;в3y3>*w;9_suAm\|dL*B9\%V}rUY0>neGX0\d#1L0`ֽ: (0)k*ء ۟ GiEL0Z4({şd!x7L}*%1@ݟ݀Q@4z7(۞6/g 3{ e!|k:ߗ\Ą9Cc&AqKvAZǾMR0lrVAu×l)F- 5.7|Ly GSaUwMu4lCjdtw9LdՂݹM}G %cX Z^ϒp_ƿ P}yMh *(;Խ{P9o0mtyGPyۍt*&5RlewGeüe6՚q[ï$h= y R`Bc^lAQI?҅ |g!o7 xUgiCCPICC profilex}=H@_SkEAD!Cu *U(BP+`rФ!Iqq\ ~,V\uup'G'E)IExwqEYmcf2ә1vЍ!2f%)>EyVs.5k1'0ô׉6m>qdxԤ ?r]segTr8L,XibV05I∪/=V9oq֊eV'a(//q X"$PPU'Bc-_"B 0ṛ Zq/)/1 wZqv+/UO+ -rl M.w'C6eWr9)^o}>)*q#y^k=S rXCBbKGD pHYs  tIME!@ IDATx}ypי8Fw")Ycl\kLɬ4夦"ʌSĵ#v3JʶRey&fٍ̮r;wc(%MH@ 4nC"MDJ(KRՍ{^#p0fWT=apxHHtz˲,$S)(ːΤ$ph!-q!Jϖ@.LF6SJ?@,XER b4-n_;>j EmsB>yE^lnL}X ǎbRh)W*@$,/cz2'{sn럛9^)ybAxN A Ÿ@0г㝚9t7 h"not>k-R2t-LM\艱m[^D(x"0.eARjڗoֽѽz-1*՗b}\R8Ӡy岇 P$܁G@RF#={GX)5X(IRIQS`Wc +㯍_s) 6#[N $ܐ4o/Asc }޸\kq%Wi6ץIl69Z u5po/׃D&0ᙙ3tsZr9Pjo1 &3rJˍ/CI<)adN+OgRGzyKrO4:W)d=LMuôœtna~m}sbƞHmmi63( p'b:F}dӉa45TR7ɶf+'sYou Pm@ \&+Z9&DM10Lv2JYIb6")džĢi EBE^gx!CwG3D ?quf%)JJhα725~M7|#I\C1RI}cST*{&_{N(B8|H&SS?" ,\dr>հ*ULUUf  7x&IJgUjS kwy}o"\2DfbIWr\MtZ-]=fk Z޹9Kz귿ݡM]^^ tl4±K.F V}9FFcQ?n%04Y 1rl*RMD:yt/c/bh$zoD0ޭ#G"3pxًIjɩi_{$mX?gH(g+͎p"vT~w?m}z3N lDܻiH3LË77!mj(vU5Ԩ Adb`(A@TWd2_26]evtZT.{dAb幋/+c8OOL_@(\2~wg;8po7~t&!DufPRt;"w K]Ukl25uYLzT`M '''NLeoa~IœL.dso$E~$f]l>b3) Q쬢VXJr\kZ' >7:#W7s<*)} %"48i(1*9ZIekgb67|vE Cӵ'*Cј#ј}屷vF̱D^w(|4D:>W"N~t](A=8뇹Ņ{.^v;d2ƐPwiAZ Do1Ĩ TJ _,g˥h0<p#5'N5vuz]"<AOOvd|fv_u,σk~iF3 <@ L:3ʕ*z4]bO V, v4;rmb@ҩhG~tN&o `g_E"]yػ0o3lz/~8mu|w:;6/\1:?pWaZ wYDRY|)5:mFc nNs"S,a^[Kb!*Zϸ 4 /"v %(b =Ql2 ]N0qFajf~BɞPF}4X71Őwv]=8o/".6Fo1 ,5clVFj|L :&H,0L2ԬM w:lzxjnnh.8E Rzv |wrL{}O:rui28NJPtve;}7r:`2Wr9,u 8@"RzkEjwl=גٱKWbu^L]M)G RP(bkǢUK֊r4;21Xj x9Lli|(\4&uN[(3[([b[tp}c8xilLJ&kJ$QDgs/+LX:$[S̜&sse gG﫛nXюygBhU|@IRxl~S)g6nXHrN^ {f(.bʥ'2fD)%3vRP7 !1]|Xk$ՐU&~p#α&q,HK,wD(̺F 5aZ효9S(e&xtSBiSY2 v /,TUR}xUya-Fk H|QBt5Co;ёF3N:`0 ғ/#2 BfG]ǸPZ]1iIyμCB˨T0,B!g47;t Ok|}O*S{T\] u|uCr[,a5>kJ&EbtC,+H_ t  &A Hu;]31tFOkWUE2뽹n{eg !6m@^ G3Zj"FqSZ? W,N~ϣڤZk-F2zշ56Ƿzlkop Uݷg`|lTuҜ=k(zd# s<&S5f՝w;]ΣZw4o_J b%EюH8Fwm}If˩+zbGIh9q}8NLBs\p J #۶u!H()":8=5jȃu߬RN} ~k[SJnݛ e2ag/l>GA{oIIŰ"<D@#(oJLƒf2T M+WFFxT Z1N{3Y!~Ôr6B3bz=r,zrlPx?6SpJ3 WPudzj>IRIeP,Ee PA!h6*^j.x??gOg&k:195=7 Aҿ `tupvqsa B[@#y+) HB$IŸm2}} X"Ng3LVn{qhڟʃJ~: P8kimol4h4:~ Z\E^He2 !C`gGpe@x^`wd6'sg!Y%ò뾎H$W<"$aj@UU cT*"qrr1 L^u%5 N#0 3`XH̞i]*ڣ8lVw0bkT*ZM'oȤӫ7{[\~<Щ8nekȜ!0/,[KR0ju9cj2wH(gnLMGrۢoF,+I̦陷Db4OlE8Z/Ex}md7MR $S7v,) w4o& :rRwg荆 8+;߽[o~~ُ/|`0tO!cеi2@RoP( ,BR^ Bа-ER G JvevZb7w\WQ@("Gv9y<,Î*) B!$hD"e:`e!8Low8h\q J b8~!@ D"AQ)(pP]=,F2P8&J _|eCv(&ʴkYZq[9g>H!_8wmޗ5 E"G51x<Xʁ Gq,="Ah9GVCXBu9ݽۇgkFvWYxHTRZA5ϷfiAٳGJ%h*\0JxdǷWZ?<6ry<|$s)p<O$&S;HPbz(g [j~x1>`kWjww<3-0y^HHe˥]dҾ:9*DBӑH*nY $ ~p<۶ud'hNTbBtj9L" f#2<1x|>P-濜dt_1J b x ;*D @ p'{9>\c[\.&wyMF3S+ D%!hxl%2LJ,CNq cҩ^*5KDBm!\Cq=}=t[ZZWr\%)Zwy<( I&C8Deh-A(le q܍wA07f)Kvg븆vsvfGc?6qΞ9Ƕ8V}ynt}/%gnqfi/|x@MդFQЪԣ8хbrZdd>= _7o Mk1b ǃ plv',m˟c}lvn'xw8.]@(Y 'sU%jFPēTJfQ ENR)/I-܋8?)Zc{'k(㾂fbr.o~ !˰gybx(Ƽ2Td :u+`PkGybx(0LI?wÏ>H9^:<j zgGwg⩧rDߎüTx"ql`o:IENDB`lark-1.2.2/docs/index.rst000066400000000000000000000056321465673407200152620ustar00rootroot00000000000000.. Lark documentation master file, created by sphinx-quickstart on Sun Aug 16 13:09:41 2020. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Welcome to Lark's documentation! ================================ .. toctree:: :maxdepth: 2 :caption: Overview :hidden: philosophy features parsers .. toctree:: :maxdepth: 2 :caption: Tutorials & Guides :hidden: json_tutorial how_to_use how_to_develop recipes examples/index .. toctree:: :maxdepth: 2 :caption: Reference :hidden: grammar tree_construction classes visitors forest tools Lark is a modern parsing library for Python. Lark can parse any context-free grammar. Lark provides: - Advanced grammar language, based on EBNF - Three parsing algorithms to choose from: Earley, LALR(1) and CYK - Automatic tree construction, inferred from your grammar - Fast unicode lexer with regexp support, and automatic line-counting Install Lark -------------- .. code:: bash $ pip install lark Syntax Highlighting ------------------- - `Sublime Text & TextMate`_ - `Visual Studio Code`_ (Or install through the vscode plugin system) - `Intellij & PyCharm`_ - `Vim`_ - `Atom`_ .. _Sublime Text & TextMate: https://github.com/lark-parser/lark_syntax .. _Visual Studio Code: https://github.com/lark-parser/vscode-lark .. _Intellij & PyCharm: https://github.com/lark-parser/intellij-syntax-highlighting .. _Vim: https://github.com/lark-parser/vim-lark-syntax .. _Atom: https://github.com/Alhadis/language-grammars Resources --------- - :doc:`philosophy` - :doc:`features` - `Examples`_ - `Third-party examples`_ - `Online IDE`_ - Tutorials - `How to write a DSL`_ - Implements a toy LOGO-like language with an interpreter - :doc:`json_tutorial` - Teaches you how to use Lark - Unofficial - `Program Synthesis is Possible`_ - Creates a DSL for Z3 - `Using Lark to Parse Text - Robin Reynolds-Haertle (PyCascades 2023) `_ (video presentation) - Guides - :doc:`how_to_use` - :doc:`how_to_develop` - Reference - :doc:`grammar` - :doc:`tree_construction` - :doc:`visitors` - :doc:`forest` - :doc:`classes` - :doc:`tools` - `Cheatsheet (PDF)`_ - Discussion - `Gitter`_ - `Forum (Google Groups)`_ .. _Examples: https://github.com/lark-parser/lark/tree/master/examples .. _Third-party examples: https://github.com/ligurio/lark-grammars .. _Online IDE: https://lark-parser.org/ide .. _How to write a DSL: http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/ .. _Program Synthesis is Possible: https://www.cs.cornell.edu/~asampson/blog/minisynth.html .. _Cheatsheet (PDF): _static/lark_cheatsheet.pdf .. _Gitter: https://gitter.im/lark-parser/Lobby .. _Forum (Google Groups): https://groups.google.com/forum/#!forum/lark-parser lark-1.2.2/docs/json_tutorial.md000066400000000000000000000352471465673407200166440ustar00rootroot00000000000000# JSON parser - Tutorial Lark is a parser - a program that accepts a grammar and text, and produces a structured tree that represents that text. In this tutorial we will write a JSON parser in Lark, and explore Lark's various features in the process. It has 5 parts. 1. Writing the grammar 2. Creating the parser 3. Shaping the tree 4. Evaluating the tree 5. Optimizing Knowledge assumed: - Using Python - A basic understanding of how to use regular expressions ## Part 1 - The Grammar Lark accepts its grammars in a format called [EBNF](https://www.wikiwand.com/en/Extended_Backus%E2%80%93Naur_form). It basically looks like this: rule_name : list of rules and TERMINALS to match | another possible list of items | etc. TERMINAL: "some text to match" (*a terminal is a string or a regular expression*) The parser will try to match each rule (left-part) by matching its items (right-part) sequentially, trying each alternative (In practice, the parser is predictive so we don't have to try every alternative). How to structure those rules is beyond the scope of this tutorial, but often it's enough to follow one's intuition. In the case of JSON, the structure is simple: A json document is either a list, or a dictionary, or a string/number/etc. The dictionaries and lists are recursive, and contain other json documents (or "values"). Let's write this structure in EBNF form: ```lark value: dict | list | STRING | NUMBER | "true" | "false" | "null" list : "[" [value ("," value)*] "]" dict : "{" [pair ("," pair)*] "}" pair : STRING ":" value ``` A quick explanation of the syntax: - Parenthesis let us group rules together. - rule\* means *any amount*. That means, zero or more instances of that rule. - [rule] means *optional*. That means zero or one instance of that rule. Lark also supports the rule+ operator, meaning one or more instances. It also supports the rule? operator which is another way to say *optional*. Of course, we still haven't defined "STRING" and "NUMBER". Luckily, both these literals are already defined in Lark's common library: ```lark %import common.ESCAPED_STRING -> STRING %import common.SIGNED_NUMBER -> NUMBER ``` The arrow (->) renames the terminals. But that only adds obscurity in this case, so going forward we'll just use their original names. We'll also take care of the white-space, which is part of the text, by simply matching and then throwing it away. ```lark %import common.WS %ignore WS ``` We tell our parser to ignore whitespace. Otherwise, we'd have to fill our grammar with WS terminals. By the way, if you're curious what these terminals signify, they are roughly equivalent to this: ```lark NUMBER : /-?\d+(\.\d+)?([eE][+-]?\d+)?/ STRING : /".*?(?>> text = '{"key": ["item0", "item1", 3.14]}' >>> json_parser.parse(text) Tree(value, [Tree(dict, [Tree(pair, [Token(STRING, "key"), Tree(value, [Tree(list, [Tree(value, [Token(STRING, "item0")]), Tree(value, [Token(STRING, "item1")]), Tree(value, [Token(NUMBER, 3.14)])])])])])]) >>> print( _.pretty() ) value dict pair "key" value list value "item0" value "item1" value 3.14 ``` As promised, Lark automagically creates a tree that represents the parsed text. But something is suspiciously missing from the tree. Where are the curly braces, the commas and all the other punctuation literals? Lark automatically filters out literals from the tree, based on the following criteria: - Filter out string literals without a name, or with a name that starts with an underscore. - Keep regexps, even unnamed ones, unless their name starts with an underscore. Unfortunately, this means that it will also filter out literals like "true" and "false", and we will lose that information. The next section, "Shaping the tree" deals with this issue, and others. ## Part 3 - Shaping the Tree We now have a parser that can create a parse tree (or: AST), but the tree has some issues: 1. "true", "false" and "null" are filtered out (test it out yourself!) 2. Is has useless branches, like *value*, that clutter-up our view. I'll present the solution, and then explain it: ```lark ?value: dict | list | string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null ... string : ESCAPED_STRING ``` 1. Those little arrows signify *aliases*. An alias is a name for a specific part of the rule. In this case, we will name the *true/false/null* matches, and this way we won't lose the information. We also alias *SIGNED_NUMBER* to mark it for later processing. 2. The question-mark prefixing *value* ("?value") tells the tree-builder to inline this branch if it has only one member. In this case, *value* will always have only one member, and will always be inlined. 3. We turned the *ESCAPED_STRING* terminal into a rule. This way it will appear in the tree as a branch. This is equivalent to aliasing (like we did for the number), but now *string* can also be used elsewhere in the grammar (namely, in the *pair* rule). Here is the new grammar: ```python from lark import Lark json_parser = Lark(r""" ?value: dict | list | string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null list : "[" [value ("," value)*] "]" dict : "{" [pair ("," pair)*] "}" pair : string ":" value string : ESCAPED_STRING %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS """, start='value') ``` And let's test it out: ```python >>> text = '{"key": ["item0", "item1", 3.14, true]}' >>> print( json_parser.parse(text).pretty() ) dict pair string "key" list string "item0" string "item1" number 3.14 true ``` Ah! That is much much nicer. ## Part 4 - Evaluating the tree It's nice to have a tree, but what we really want is a JSON object. The way to do it is to evaluate the tree, using a Transformer. A transformer is a class with methods corresponding to branch names. For each branch, the appropriate method will be called with the children of the branch as its argument, and its return value will replace the branch in the tree. So let's write a partial transformer, that handles lists and dictionaries: ```python from lark import Transformer class MyTransformer(Transformer): def list(self, items): return list(items) def pair(self, key_value): k, v = key_value return k, v def dict(self, items): return dict(items) ``` And when we run it, we get this: ```python >>> tree = json_parser.parse(text) >>> MyTransformer().transform(tree) {Tree(string, [Token(ANONRE_1, "key")]): [Tree(string, [Token(ANONRE_1, "item0")]), Tree(string, [Token(ANONRE_1, "item1")]), Tree(number, [Token(ANONRE_0, 3.14)]), Tree(true, [])]} ``` This is pretty close. Let's write a full transformer that can handle the terminals too. Also, our definitions of list and dict are a bit verbose. We can do better: ```python from lark import Transformer class TreeToJson(Transformer): def string(self, s): (s,) = s return s[1:-1] def number(self, n): (n,) = n return float(n) list = list pair = tuple dict = dict null = lambda self, _: None true = lambda self, _: True false = lambda self, _: False ``` And when we run it: ```python >>> tree = json_parser.parse(text) >>> TreeToJson().transform(tree) {u'key': [u'item0', u'item1', 3.14, True]} ``` Magic! ## Part 5 - Optimizing ### Step 1 - Benchmark By now, we have a fully working JSON parser, that can accept a string of JSON, and return its Pythonic representation. But how fast is it? Now, of course there are JSON libraries for Python written in C, and we can never compete with them. But since this is applicable to any parser you would write in Lark, let's see how far we can take this. The first step for optimizing is to have a benchmark. For this benchmark I'm going to take data from [json-generator.com/](http://www.json-generator.com/). I took their default suggestion and changed it to 5000 objects. The result is a 6.6MB sparse JSON file. Our first program is going to be just a concatenation of everything we've done so far: ```python import sys from lark import Lark, Transformer json_grammar = r""" ?value: dict | list | string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null list : "[" [value ("," value)*] "]" dict : "{" [pair ("," pair)*] "}" pair : string ":" value string : ESCAPED_STRING %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS """ class TreeToJson(Transformer): def string(self, s): (s,) = s return s[1:-1] def number(self, n): (n,) = n return float(n) list = list pair = tuple dict = dict null = lambda self, _: None true = lambda self, _: True false = lambda self, _: False json_parser = Lark(json_grammar, start='value', lexer='basic') if __name__ == '__main__': with open(sys.argv[1]) as f: tree = json_parser.parse(f.read()) print(TreeToJson().transform(tree)) ``` We run it and get this: $ time python tutorial_json.py json_data > /dev/null real 0m36.257s user 0m34.735s sys 0m1.361s That's unsatisfactory time for a 6MB file. Maybe if we were parsing configuration or a small DSL, but we're trying to handle large amount of data here. Well, turns out there's quite a bit we can do about it! ### Step 2 - LALR(1) So far we've been using the Earley algorithm, which is the default in Lark. Earley is powerful but slow. But it just so happens that our grammar is LR-compatible, and specifically LALR(1) compatible. So let's switch to LALR(1) and see what happens: ```python json_parser = Lark(json_grammar, start='value', parser='lalr') ``` $ time python tutorial_json.py json_data > /dev/null real 0m7.554s user 0m7.352s sys 0m0.148s Ah, that's much better. The resulting JSON is of course exactly the same. You can run it for yourself and see. It's important to note that not all grammars are LR-compatible, and so you can't always switch to LALR(1). But there's no harm in trying! If Lark lets you build the grammar, it means you're good to go. ### Step 3 - Tree-less LALR(1) So far, we've built a full parse tree for our JSON, and then transformed it. It's a convenient method, but it's not the most efficient in terms of speed and memory. Luckily, Lark lets us avoid building the tree when parsing with LALR(1). Here's the way to do it: ```python json_parser = Lark(json_grammar, start='value', parser='lalr', transformer=TreeToJson()) if __name__ == '__main__': with open(sys.argv[1]) as f: print( json_parser.parse(f.read()) ) ``` We've used the transformer we've already written, but this time we plug it straight into the parser. Now it can avoid building the parse tree, and just send the data straight into our transformer. The *parse()* method now returns the transformed JSON, instead of a tree. Let's benchmark it: real 0m4.866s user 0m4.722s sys 0m0.121s That's a measurable improvement! Also, this way is more memory efficient. Check out the benchmark table at the end to see just how much. As a general practice, it's recommended to work with parse trees, and only skip the tree-builder when your transformer is already working. ### Step 4 - PyPy PyPy is a JIT engine for running Python, and it's designed to be a drop-in replacement. Lark is written purely in Python, which makes it very suitable for PyPy. Let's get some free performance: $ time pypy tutorial_json.py json_data > /dev/null real 0m1.397s user 0m1.296s sys 0m0.083s PyPy is awesome! ### Conclusion We've brought the run-time down from 36 seconds to 1.1 seconds, in a series of small and simple steps. Now let's compare the benchmarks in a nicely organized table. I measured memory consumption using a little script called [memusg](https://gist.github.com/netj/526585) | Code | CPython Time | PyPy Time | CPython Mem | PyPy Mem |:-----|:-------------|:------------|:----------|:--------- | Lark - Earley *(with lexer)* | 42s | 4s | 1167M | 608M | | Lark - LALR(1) | 8s | 1.53s | 453M | 266M | | Lark - LALR(1) tree-less | 4.76s | 1.23s | 70M | 134M | | PyParsing ([Parser](https://github.com/pyparsing/pyparsing/blob/master/examples/jsonParser.py)) | 32s | 3.53s | 443M | 225M | | funcparserlib ([Parser](https://github.com/vlasovskikh/funcparserlib/blob/master/tests/json.py)) | 8.5s | 1.3s | 483M | 293M | | Parsimonious ([Parser](https://gist.github.com/reclosedev/5222560)) | ? | 5.7s | ? | 1545M | I added a few other parsers for comparison. PyParsing and funcparselib fair pretty well in their memory usage (they don't build a tree), but they can't compete with the run-time speed of LALR(1). These benchmarks are for Lark's alpha version. I already have several optimizations planned that will significantly improve run-time speed. Once again, shout-out to PyPy for being so effective. ## Afterword This is the end of the tutorial. I hoped you liked it and learned a little about Lark. To see what else you can do with Lark, check out the [examples](/examples). Read the documentation here: https://lark-parser.readthedocs.io/en/latest/ lark-1.2.2/docs/make.bat000066400000000000000000000014501465673407200150200ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build set SPHINXPROJ=Lark if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd lark-1.2.2/docs/parsers.md000066400000000000000000000121311465673407200154120ustar00rootroot00000000000000# Parsers Lark implements the following parsing algorithms: Earley, LALR(1), and CYK ## Earley An [Earley Parser](https://www.wikiwand.com/en/Earley_parser) is a chart parser capable of parsing any context-free grammar at O(n^3), and O(n^2) when the grammar is unambiguous. It can parse most LR grammars at O(n). Most programming languages are LR, and can be parsed at a linear time. Lark's Earley implementation runs on top of a skipping chart parser, which allows it to use regular expressions, instead of matching characters one-by-one. This is a huge improvement to Earley that is unique to Lark. This feature is used by default, but can also be requested explicitly using `lexer='dynamic'`. It's possible to bypass the dynamic lexing, and use the regular Earley parser with a basic lexer, that tokenizes as an independent first step. Doing so will provide a speed benefit, but will tokenize without using Earley's ambiguity-resolution ability. So choose this only if you know why! Activate with `lexer='basic'` **SPPF & Ambiguity resolution** Lark implements the Shared Packed Parse Forest data-structure for the Earley parser, in order to reduce the space and computation required to handle ambiguous grammars. You can read more about SPPF [here](https://web.archive.org/web/20191229100607/www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest) As a result, Lark can efficiently parse and store every ambiguity in the grammar, when using Earley. Lark provides the following options to combat ambiguity: 1) Lark will choose the best derivation for you (default). Users can choose between different disambiguation strategies, and can prioritize (or demote) individual rules over others, using the rule-priority syntax. 2) Users may choose to receive the set of all possible parse-trees (using ambiguity='explicit'), and choose the best derivation themselves. While simple and flexible, it comes at the cost of space and performance, and so it isn't recommended for highly ambiguous grammars, or very long inputs. 3) As an advanced feature, users may use specialized visitors to iterate the SPPF themselves. There is also [a 3rd party utility for iterating over the SPPF](https://github.com/chanicpanic/lark-ambig-tools). **lexer="dynamic_complete"** Earley's "dynamic" lexer uses regular expressions in order to tokenize the text. It tries every possible combination of terminals, but it matches each terminal exactly once, returning the longest possible match. That means, for example, that when `lexer="dynamic"` (which is the default), the terminal `/a+/`, when given the text `"aa"`, will return one result, `aa`, even though `a` would also be correct. This behavior was chosen because it is much faster, and it is usually what you would expect. Setting `lexer="dynamic_complete"` instructs the lexer to consider every possible regexp match. This ensures that the parser will consider and resolve every ambiguity, even inside the terminals themselves. This lexer provides the same capabilities as scannerless Earley, but with different performance tradeoffs. Warning: This lexer can be much slower, especially for open-ended terminals such as `/.*/` ## LALR(1) [LALR(1)](https://www.wikiwand.com/en/LALR_parser) is a very efficient, true-and-tested parsing algorithm. It's incredibly fast and requires very little memory. It can parse most programming languages (For example: Python and Java). LALR(1) stands for: - Left-to-right parsing order - Rightmost derivation, bottom-up - Lookahead of 1 token Lark comes with an efficient implementation that outperforms every other parsing library for Python (including PLY) Lark extends the traditional YACC-based architecture with a *contextual lexer*, which processes feedback from the parser, making the LALR(1) algorithm stronger than ever. The contextual lexer communicates with the parser, and uses the parser's lookahead prediction to narrow its choice of terminals. So at each point, the lexer only matches the subgroup of terminals that are legal at that parser state, instead of all of the terminals. It’s surprisingly effective at resolving common terminal collisions, and allows one to parse languages that LALR(1) was previously incapable of parsing. (If you're familiar with YACC, you can think of it as automatic lexer-states) This is an improvement to LALR(1) that is unique to Lark. ### Grammar constraints in LALR(1) Due to having only a lookahead of one token, LALR is limited in its ability to choose between rules, when they both match the input. Tips for writing a conforming grammar: - Try to avoid writing different rules that can match the same sequence of characters. - For the best performance, prefer left-recursion over right-recursion. - Consider setting terminal priority only as a last resort. For a better understanding of these constraints, it's recommended to learn how a SLR parser works. SLR is very similar to LALR but much simpler. ## CYK Parser A [CYK parser](https://www.wikiwand.com/en/CYK_algorithm) can parse any context-free grammar at O(n^3*|G|). Its too slow to be practical for simple grammars, but it offers good performance for highly ambiguous grammars. lark-1.2.2/docs/philosophy.md000066400000000000000000000052561465673407200161430ustar00rootroot00000000000000# Philosophy Parsers are innately complicated and confusing. They're difficult to understand, difficult to write, and difficult to use. Even experts on the subject can become baffled by the nuances of these complicated state-machines. Lark's mission is to make the process of writing them as simple and abstract as possible, by following these design principles: ## Design Principles 1. Readability matters 2. Keep the grammar clean and simple 2. Don't force the user to decide on things that the parser can figure out on its own 4. Usability is more important than performance 5. Performance is still very important 6. Follow the Zen of Python, whenever possible and applicable In accordance with these principles, I arrived at the following design choices: ----------- ## Design Choices ### 1. Separation of code and grammar Grammars are the de-facto reference for your language, and for the structure of your parse-tree. For any non-trivial language, the conflation of code and grammar always turns out convoluted and difficult to read. The grammars in Lark are EBNF-inspired, so they are especially easy to read & work with. ### 2. Always build a parse-tree (unless told not to) Trees are always simpler to work with than state-machines. 1. Trees allow you to see the "state-machine" visually 2. Trees allow your computation to be aware of previous and future states 3. Trees allow you to process the parse in steps, instead of forcing you to do it all at once. And anyway, every parse-tree can be replayed as a state-machine, so there is no loss of information. See this answer in more detail [here](https://github.com/erezsh/lark/issues/4). To improve performance, you can skip building the tree for LALR(1), by providing Lark with a transformer (see the [JSON example](https://github.com/erezsh/lark/blob/master/examples/json_parser.py)). ### 3. Earley is the default The Earley algorithm can accept *any* context-free grammar you throw at it (i.e. any grammar you can write in EBNF, it can parse). That makes it extremely friendly to beginners, who are not aware of the strange and arbitrary restrictions that LALR(1) places on its grammars. As the users grow to understand the structure of their grammar, the scope of their target language, and their performance requirements, they may choose to switch over to LALR(1) to gain a huge performance boost, possibly at the cost of some language features. Both Earley and LALR(1) can use the same grammar, as long as all constraints are satisfied. In short, "Premature optimization is the root of all evil." ### Other design features - Automatically resolve terminal collisions whenever possible - Automatically keep track of line & column numbers lark-1.2.2/docs/recipes.md000066400000000000000000000112721465673407200153720ustar00rootroot00000000000000# Recipes A collection of recipes to use Lark and its various features ## Use a transformer to parse integer tokens Transformers are the common interface for processing matched rules and tokens. They can be used during parsing for better performance. ```python from lark import Lark, Transformer class T(Transformer): def INT(self, tok): "Convert the value of `tok` from string to int, while maintaining line number & column." return tok.update(value=int(tok)) parser = Lark(""" start: INT* %import common.INT %ignore " " """, parser="lalr", transformer=T()) print(parser.parse('3 14 159')) ``` Prints out: ```python Tree(start, [Token(INT, 3), Token(INT, 14), Token(INT, 159)]) ``` ## Collect all comments with lexer_callbacks `lexer_callbacks` can be used to interface with the lexer as it generates tokens. It accepts a dictionary of the form {TOKEN_TYPE: callback} Where callback is of type `f(Token) -> Token` It only works with the basic and contextual lexers. This has the same effect of using a transformer, but can also process ignored tokens. ```python from lark import Lark comments = [] parser = Lark(""" start: INT* COMMENT: /#.*/ %import common (INT, WS) %ignore COMMENT %ignore WS """, parser="lalr", lexer_callbacks={'COMMENT': comments.append}) parser.parse(""" 1 2 3 # hello # world 4 5 6 """) print(comments) ``` Prints out: ```python [Token(COMMENT, '# hello'), Token(COMMENT, '# world')] ``` *Note: We don't have to return a token, because comments are ignored* ## CollapseAmbiguities Parsing ambiguous texts with earley and `ambiguity='explicit'` produces a single tree with `_ambig` nodes to mark where the ambiguity occurred. However, it's sometimes more convenient instead to work with a list of all possible unambiguous trees. Lark provides a utility transformer for that purpose: ```python from lark import Lark, Tree, Transformer from lark.visitors import CollapseAmbiguities grammar = """ !start: x y !x: "a" "b" | "ab" | "abc" !y: "c" "d" | "cd" | "d" """ parser = Lark(grammar, ambiguity='explicit') t = parser.parse('abcd') for x in CollapseAmbiguities().transform(t): print(x.pretty()) ``` This prints out: start x a b y c d start x ab y cd start x abc y d While convenient, this should be used carefully, as highly ambiguous trees will soon create an exponential explosion of such unambiguous derivations. ## Keeping track of parents when visiting The following visitor assigns a `parent` attribute for every node in the tree. If your tree nodes aren't unique (if there is a shared Tree instance), the assert will fail. ```python class Parent(Visitor): def __default__(self, tree): for subtree in tree.children: if isinstance(subtree, Tree): assert not hasattr(subtree, 'parent') subtree.parent = proxy(tree) ``` ## Unwinding VisitError after a transformer/visitor exception Errors that happen inside visitors and transformers get wrapped inside a `VisitError` exception. This can often be inconvenient, if you wish the actual error to propagate upwards, or if you want to catch it. But, it's easy to unwrap it at the point of calling the transformer, by catching it and raising the `VisitError.orig_exc` attribute. For example: ```python from lark import Lark, Transformer from lark.visitors import VisitError tree = Lark('start: "a"').parse('a') class T(Transformer): def start(self, x): raise KeyError("Original Exception") t = T() try: print( t.transform(tree)) except VisitError as e: raise e.orig_exc ``` ## Adding a Progress Bar to Parsing with tqdm Parsing large files can take a long time, even with the `parser='lalr'` option. To make this process more user-friendly, it's useful to add a progress bar. One way to achieve this is to use the `InteractiveParser` to display each token as it is processed. In this example, we use [tqdm](https://github.com/tqdm/tqdm), but a similar approach should work with GUIs. ```python from tqdm import tqdm def parse_with_progress(parser: Lark, text: str, start=None): last = 0 progress = tqdm(total=len(text)) pi = parser.parse_interactive(text, start=start) for token in pi.iter_parse(): if token.end_pos is not None: progress.update(token.end_pos - last) last = token.end_pos return pi.result ``` Note that we don't simply wrap the iterable because tqdm would not be able to determine the total. Additionally, keep in mind that this implementation relies on the `InteractiveParser` and, therefore, only works with the `LALR(1)` parser, not `earley`. lark-1.2.2/docs/requirements.txt000066400000000000000000000003141465673407200166750ustar00rootroot00000000000000# https://docs.readthedocs.io/en/stable/guides/specifying-dependencies.html#specifying-a-requirements-file pillow recommonmark requests==2.28.1 sphinx-gallery sphinx_markdown_tables sphinx_rtd_theme>=1.2 lark-1.2.2/docs/tools.md000066400000000000000000000035551465673407200151050ustar00rootroot00000000000000# Tools (Stand-alone, Nearley) ## Stand-alone parser Lark can generate a stand-alone LALR(1) parser from a grammar. The resulting module provides the same interface as Lark, but with a fixed grammar, and reduced functionality. Run using: ```bash python -m lark.tools.standalone ``` For a play-by-play, read the [tutorial](http://blog.erezsh.com/create-a-stand-alone-lalr1-parser-in-python/) ## Importing grammars from Nearley.js Lark comes with a tool to convert grammars from [Nearley](https://github.com/Hardmath123/nearley), a popular Earley library for Javascript. It uses [Js2Py](https://github.com/PiotrDabkowski/Js2Py) to convert and run the Javascript postprocessing code segments. #### Requirements 1. Install Lark with the `nearley` component: ```bash pip install lark[nearley] ``` 2. Acquire a copy of the Nearley codebase. This can be done using: ```bash git clone https://github.com/Hardmath123/nearley ``` #### Usage The tool can be run using: ```bash python -m lark.tools.nearley ``` Here's an example of how to import nearley's calculator example into Lark: ```bash git clone https://github.com/Hardmath123/nearley python -m lark.tools.nearley nearley/examples/calculator/arithmetic.ne main ./nearley > ncalc.py ``` You can use the output as a regular python module: ```python >>> import ncalc >>> ncalc.parse('sin(pi/4) ^ e') 0.38981434460254655 ``` The Nearley converter also supports an experimental converter for newer JavaScript (ES6+), using the `--es6` flag: ```bash git clone https://github.com/Hardmath123/nearley python -m lark.tools.nearley nearley/examples/calculator/arithmetic.ne main nearley --es6 > ncalc.py ``` #### Notes - Lark currently cannot import templates from Nearley - Lark currently cannot export grammars to Nearley These might get added in the future, if enough users ask for them. lark-1.2.2/docs/tree_construction.md000066400000000000000000000070341465673407200175120ustar00rootroot00000000000000# Tree Construction Reference Lark builds a tree automatically based on the structure of the grammar, where each rule that is matched becomes a branch (node) in the tree, and its children are its matches, in the order of matching. For example, the rule `node: child1 child2` will create a tree node with two children. If it is matched as part of another rule (i.e. if it isn't the root), the new rule's tree node will become its parent. Using `item+` or `item*` will result in a list of items, equivalent to writing `item item item ..`. Using `item?` will return the item if it matched, or nothing. If `maybe_placeholders=True` (the default), then using `[item]` will return the item if it matched, or the value `None`, if it didn't. If `maybe_placeholders=False`, then `[]` behaves like `()?`. ## Terminals Terminals are always values in the tree, never branches. Lark filters out certain types of terminals by default, considering them punctuation: - Terminals that won't appear in the tree are: - Unnamed literals (like `"keyword"` or `"+"`) - Terminals whose name starts with an underscore (like `_DIGIT`) - Terminals that *will* appear in the tree are: - Unnamed regular expressions (like `/[0-9]/`) - Named terminals whose name starts with a letter (like `DIGIT`) Note: Terminals composed of literals and other terminals always include the entire match without filtering any part. **Example:** ``` start: PNAME pname PNAME: "(" NAME ")" pname: "(" NAME ")" NAME: /\w+/ %ignore /\s+/ ``` Lark will parse "(Hello) (World)" as: start (Hello) pname World Rules prefixed with `!` will retain all their literals regardless. **Example:** ```perl expr: "(" expr ")" | NAME+ NAME: /\w+/ %ignore " " ``` Lark will parse "((hello world))" as: expr expr expr "hello" "world" The brackets do not appear in the tree by design. The words appear because they are matched by a named terminal. ## Shaping the tree Users can alter the automatic construction of the tree using a collection of grammar features. ### Inlining rules with `_` Rules whose name begins with an underscore will be inlined into their containing rule. **Example:** ```perl start: "(" _greet ")" _greet: /\w+/ /\w+/ ``` Lark will parse "(hello world)" as: start "hello" "world" ### Conditionally inlining rules with `?` Rules that receive a question mark (?) at the beginning of their definition, will be inlined if they have a single child, after filtering. **Example:** ```ruby start: greet greet ?greet: "(" /\w+/ ")" | /\w+/ /\w+/ ``` Lark will parse "hello world (planet)" as: start greet "hello" "world" "planet" ### Pinning rule terminals with `!` Rules that begin with an exclamation mark will keep all their terminals (they won't get filtered). ```perl !expr: "(" expr ")" | NAME+ NAME: /\w+/ %ignore " " ``` Will parse "((hello world))" as: expr ( expr ( expr hello world ) ) Using the `!` prefix is usually a "code smell", and may point to a flaw in your grammar design. ### Aliasing rules Aliases - options in a rule can receive an alias. It will be then used as the branch name for the option, instead of the rule name. **Example:** ```ruby start: greet greet greet: "hello" | "world" -> planet ``` Lark will parse "hello world" as: start greet planet lark-1.2.2/docs/visitors.rst000066400000000000000000000056621465673407200160400ustar00rootroot00000000000000Transformers & Visitors ======================= Transformers & Visitors provide a convenient interface to process the parse-trees that Lark returns. They are used by inheriting from the correct class (visitor or transformer), and implementing methods corresponding to the rule you wish to process. Each method accepts the children as an argument. That can be modified using the ``v_args`` decorator, which allows one to inline the arguments (akin to ``*args``), or add the tree ``meta`` property as an argument. See: `visitors.py`_ .. _visitors.py: https://github.com/lark-parser/lark/blob/master/lark/visitors.py Visitor ------- Visitors visit each node of the tree, and run the appropriate method on it according to the node's data. They work bottom-up, starting with the leaves and ending at the root of the tree. There are two classes that implement the visitor interface: - ``Visitor``: Visit every node (without recursion) - ``Visitor_Recursive``: Visit every node using recursion. Slightly faster. Example: :: class IncreaseAllNumbers(Visitor): def number(self, tree): assert tree.data == "number" tree.children[0] += 1 IncreaseAllNumbers().visit(parse_tree) .. autoclass:: lark.visitors.Visitor :members: visit, visit_topdown, __default__ .. autoclass:: lark.visitors.Visitor_Recursive :members: visit, visit_topdown, __default__ Interpreter ----------- .. autoclass:: lark.visitors.Interpreter Example: :: class IncreaseSomeOfTheNumbers(Interpreter): def number(self, tree): tree.children[0] += 1 def skip(self, tree): # skip this subtree. don't change any number node inside it. pass IncreaseSomeOfTheNumbers().visit(parse_tree) Transformer ----------- .. autoclass:: lark.visitors.Transformer :members: transform, __default__, __default_token__, __mul__ Example: :: from lark import Tree, Transformer class EvalExpressions(Transformer): def expr(self, args): return eval(args[0]) t = Tree('a', [Tree('expr', ['1+2'])]) print(EvalExpressions().transform( t )) # Prints: Tree(a, [3]) Example: :: class T(Transformer): INT = int NUMBER = float def NAME(self, name): return lookup_dict.get(name, name) T(visit_tokens=True).transform(tree) .. autoclass:: lark.visitors.Transformer_NonRecursive .. autoclass:: lark.visitors.Transformer_InPlace .. autoclass:: lark.visitors.Transformer_InPlaceRecursive v_args ------ .. autofunction:: lark.visitors.v_args merge_transformers ------------------ .. autofunction:: lark.visitors.merge_transformers Discard ------- ``Discard`` is the singleton instance of ``_DiscardType``. .. autoclass:: lark.visitors._DiscardType VisitError ---------- .. autoclass:: lark.exceptions.VisitError lark-1.2.2/examples/000077500000000000000000000000001465673407200143015ustar00rootroot00000000000000lark-1.2.2/examples/README.rst000066400000000000000000000007101465673407200157660ustar00rootroot00000000000000Examples for Lark ================= **How to run the examples**: After cloning the repo, open the terminal into the root directory of the project, and run the following: .. code:: bash [lark]$ python -m examples. For example, the following will parse all the Python files in the standard library of your local installation: .. code:: bash [lark]$ python -m examples.advanced.python_parser Beginner Examples ~~~~~~~~~~~~~~~~~ lark-1.2.2/examples/__init__.py000066400000000000000000000000001465673407200164000ustar00rootroot00000000000000lark-1.2.2/examples/advanced/000077500000000000000000000000001465673407200160465ustar00rootroot00000000000000lark-1.2.2/examples/advanced/README.rst000066400000000000000000000000441465673407200175330ustar00rootroot00000000000000Advanced Examples ~~~~~~~~~~~~~~~~~ lark-1.2.2/examples/advanced/_json_parser.py000066400000000000000000000033451465673407200211110ustar00rootroot00000000000000""" Simple JSON Parser ================== The code is short and clear, and outperforms every other parser (that's written in Python). For an explanation, check out the JSON parser tutorial at /docs/json_tutorial.md (this is here for use by the other examples) """ from lark import Lark, Transformer, v_args json_grammar = r""" ?start: value ?value: object | array | string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null array : "[" [value ("," value)*] "]" object : "{" [pair ("," pair)*] "}" pair : string ":" value string : ESCAPED_STRING %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS """ class TreeToJson(Transformer): @v_args(inline=True) def string(self, s): return s[1:-1].replace('\\"', '"') array = list pair = tuple object = dict number = v_args(inline=True)(float) null = lambda self, _: None true = lambda self, _: True false = lambda self, _: False ### Create the JSON parser with Lark, using the LALR algorithm json_parser = Lark(json_grammar, parser='lalr', # Using the basic lexer isn't required, and isn't usually recommended. # But, it's good enough for JSON, and it's slightly faster. lexer='basic', # Disabling propagate_positions and placeholders slightly improves speed propagate_positions=False, maybe_placeholders=False, # Using an internal transformer is faster and more memory efficient transformer=TreeToJson()) lark-1.2.2/examples/advanced/conf_earley.py000066400000000000000000000020751465673407200207120ustar00rootroot00000000000000""" Earley’s dynamic lexer ====================== Demonstrates the power of Earley’s dynamic lexer on a toy configuration language Using a lexer for configuration files is tricky, because values don't have to be surrounded by delimiters. Using a basic lexer for this just won't work. In this example we use a dynamic lexer and let the Earley parser resolve the ambiguity. Another approach is to use the contextual lexer with LALR. It is less powerful than Earley, but it can handle some ambiguity when lexing and it's much faster. See examples/conf_lalr.py for an example of that approach. """ from lark import Lark parser = Lark(r""" start: _NL? section+ section: "[" NAME "]" _NL item+ item: NAME "=" VALUE? _NL NAME: /\w/+ VALUE: /./+ %import common.NEWLINE -> _NL %import common.WS_INLINE %ignore WS_INLINE """, parser="earley") def test(): sample_conf = """ [bla] a=Hello this="that",4 empty= """ r = parser.parse(sample_conf) print (r.pretty()) if __name__ == '__main__': test() lark-1.2.2/examples/advanced/conf_lalr.py000066400000000000000000000023601465673407200203600ustar00rootroot00000000000000""" LALR’s contextual lexer ======================= This example demonstrates the power of LALR's contextual lexer, by parsing a toy configuration language. The terminals `NAME` and `VALUE` overlap. They can match the same input. A basic lexer would arbitrarily choose one over the other, based on priority, which would lead to a (confusing) parse error. However, due to the unambiguous structure of the grammar, Lark's LALR(1) algorithm knows which one of them to expect at each point during the parse. The lexer then only matches the tokens that the parser expects. The result is a correct parse, something that is impossible with a regular lexer. Another approach is to use the Earley algorithm. It will handle more cases than the contextual lexer, but at the cost of performance. See examples/conf_earley.py for an example of that approach. """ from lark import Lark parser = Lark(r""" start: _NL? section+ section: "[" NAME "]" _NL item+ item: NAME "=" VALUE? _NL NAME: /\w/+ VALUE: /./+ %import common.NEWLINE -> _NL %import common.WS_INLINE %ignore WS_INLINE """, parser="lalr") sample_conf = """ [bla] a=Hello this="that",4 empty= """ print(parser.parse(sample_conf).pretty()) lark-1.2.2/examples/advanced/create_ast.py000066400000000000000000000046141465673407200205370ustar00rootroot00000000000000""" Creating an AST from the parse tree =================================== This example demonstrates how to transform a parse-tree into an AST using `lark.ast_utils`. create_transformer() collects every subclass of `Ast` subclass from the module, and creates a Lark transformer that builds the AST with no extra code. This example only works with Python 3. """ import sys from typing import List from dataclasses import dataclass from lark import Lark, ast_utils, Transformer, v_args from lark.tree import Meta this_module = sys.modules[__name__] # # Define AST # class _Ast(ast_utils.Ast): # This will be skipped by create_transformer(), because it starts with an underscore pass class _Statement(_Ast): # This will be skipped by create_transformer(), because it starts with an underscore pass @dataclass class Value(_Ast, ast_utils.WithMeta): "Uses WithMeta to include line-number metadata in the meta attribute" meta: Meta value: object @dataclass class Name(_Ast): name: str @dataclass class CodeBlock(_Ast, ast_utils.AsList): # Corresponds to code_block in the grammar statements: List[_Statement] @dataclass class If(_Statement): cond: Value then: CodeBlock @dataclass class SetVar(_Statement): # Corresponds to set_var in the grammar name: str value: Value @dataclass class Print(_Statement): value: Value class ToAst(Transformer): # Define extra transformation functions, for rules that don't correspond to an AST class. def STRING(self, s): # Remove quotation marks return s[1:-1] def DEC_NUMBER(self, n): return int(n) @v_args(inline=True) def start(self, x): return x # # Define Parser # parser = Lark(""" start: code_block code_block: statement+ ?statement: if | set_var | print if: "if" value "{" code_block "}" set_var: NAME "=" value ";" print: "print" value ";" value: name | STRING | DEC_NUMBER name: NAME %import python (NAME, STRING, DEC_NUMBER) %import common.WS %ignore WS """, parser="lalr", ) transformer = ast_utils.create_transformer(this_module, ToAst()) def parse(text): tree = parser.parse(text) return transformer.transform(tree) # # Test # if __name__ == '__main__': print(parse(""" a = 1; if a { print "a is 1"; a = 2; } """)) lark-1.2.2/examples/advanced/custom_lexer.py000066400000000000000000000025221465673407200211320ustar00rootroot00000000000000""" Custom lexer ============ Demonstrates using a custom lexer to parse a non-textual stream of data You can use a custom lexer to tokenize text when the lexers offered by Lark are too slow, or not flexible enough. You can also use it (as shown in this example) to tokenize streams of objects. """ from lark import Lark, Transformer, v_args from lark.lexer import Lexer, Token class TypeLexer(Lexer): def __init__(self, lexer_conf): pass def lex(self, data): for obj in data: if isinstance(obj, int): yield Token('INT', obj) elif isinstance(obj, (type(''), type(u''))): yield Token('STR', obj) else: raise TypeError(obj) parser = Lark(""" start: data_item+ data_item: STR INT* %declare STR INT """, parser='lalr', lexer=TypeLexer) class ParseToDict(Transformer): @v_args(inline=True) def data_item(self, name, *numbers): return name.value, [n.value for n in numbers] start = dict def test(): data = ['alice', 1, 27, 3, 'bob', 4, 'carrie', 'dan', 8, 6] print(data) tree = parser.parse(data) res = ParseToDict().transform(tree) print('-->') print(res) # prints {'alice': [1, 27, 3], 'bob': [4], 'carrie': [], 'dan': [8, 6]} if __name__ == '__main__': test() lark-1.2.2/examples/advanced/dynamic_complete.py000066400000000000000000000076241465673407200217450ustar00rootroot00000000000000""" Using lexer dynamic_complete ============================ Demonstrates how to use ``lexer='dynamic_complete'`` and ``ambiguity='explicit'`` Sometimes you have data that is highly ambiguous or 'broken' in some sense. When using ``parser='earley'`` and ``lexer='dynamic_complete'``, Lark will be able parse just about anything as long as there is a valid way to generate it from the Grammar, including looking 'into' the Regexes. This examples shows how to parse a json input where the quotes have been replaced by underscores: ``{_foo_:{}, _bar_: [], _baz_: __}`` Notice that underscores might still appear inside strings, so a potentially valid reading of the above is: ``{"foo_:{}, _bar": [], "baz": ""}`` """ from pprint import pprint from lark import Lark, Tree, Transformer, v_args from lark.visitors import Transformer_InPlace GRAMMAR = r""" %import common.SIGNED_NUMBER %import common.WS_INLINE %import common.NEWLINE %ignore WS_INLINE ?start: value ?value: object | array | string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null array : "[" (value ("," value)*)? "]" object : "{" (pair ("," pair)*)? "}" pair : string ":" value string: STRING STRING : ESCAPED_STRING ESCAPED_STRING: QUOTE_CHAR _STRING_ESC_INNER QUOTE_CHAR QUOTE_CHAR: "_" _STRING_INNER: /.*/ _STRING_ESC_INNER: _STRING_INNER /(? var TEMPLATE_NAME: "$" NAME ?template_start: (stmt | testlist_star_expr _NEWLINE) %ignore /[\t \f]+/ // WS %ignore /\\[\t \f]*\r?\n/ // LINE_CONT %ignore COMMENT """ parser = Lark(TEMPLATED_PYTHON, parser='lalr', start=['single_input', 'file_input', 'eval_input', 'template_start'], postlex=PythonIndenter(), maybe_placeholders=False) def parse_template(s): return parser.parse(s + '\n', start='template_start') def parse_code(s): return parser.parse(s + '\n', start='file_input') # # 2. Define translations using templates (each template code is parsed to a template tree) # pytemplate = TemplateConf(parse=parse_template) translations_3to2 = { 'yield from $a': 'for _tmp in $a: yield _tmp', 'raise $e from $x': 'raise $e', '$a / $b': 'float($a) / $b', } translations_3to2 = {pytemplate(k): pytemplate(v) for k, v in translations_3to2.items()} # # 3. Translate and reconstruct Python 3 code into valid Python 2 code # python_reconstruct = PythonReconstructor(parser) def translate_py3to2(code): tree = parse_code(code) tree = TemplateTranslator(translations_3to2).translate(tree) return python_reconstruct.reconstruct(tree) # # Test Code # _TEST_CODE = ''' if a / 2 > 1: yield from [1,2,3] else: raise ValueError(a) from e ''' def test(): print(_TEST_CODE) print(' -----> ') print(translate_py3to2(_TEST_CODE)) if __name__ == '__main__': test() lark-1.2.2/examples/advanced/python2.lark000066400000000000000000000144501465673407200203300ustar00rootroot00000000000000// Python 2 grammar for Lark // NOTE: Work in progress!!! (XXX TODO) // This grammar should parse all python 2.x code successfully, // but the resulting parse-tree is still not well-organized. // Adapted from: https://docs.python.org/2/reference/grammar.html // Adapted by: Erez Shinan // Start symbols for the grammar: // single_input is a single interactive statement; // file_input is a module or sequence of commands read from an input file; // eval_input is the input for the eval() and input() functions. // NB: compound_stmt in single_input is followed by extra _NEWLINE! single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE ?file_input: (_NEWLINE | stmt)* eval_input: testlist _NEWLINE? decorator: "@" dotted_name [ "(" [arglist] ")" ] _NEWLINE decorators: decorator+ decorated: decorators (classdef | funcdef) funcdef: "def" NAME "(" parameters ")" ":" suite parameters: [paramlist] paramlist: param ("," param)* ["," [star_params ["," kw_params] | kw_params]] | star_params ["," kw_params] | kw_params star_params: "*" NAME kw_params: "**" NAME param: fpdef ["=" test] fpdef: NAME | "(" fplist ")" fplist: fpdef ("," fpdef)* [","] ?stmt: simple_stmt | compound_stmt ?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE ?small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | exec_stmt | assert_stmt) expr_stmt: testlist augassign (yield_expr|testlist) -> augassign2 | testlist ("=" (yield_expr|testlist))+ -> assign | testlist augassign: ("+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=") // For normal assignments, additional restrictions enforced by the interpreter print_stmt: "print" ( [ test ("," test)* [","] ] | ">>" test [ ("," test)+ [","] ] ) del_stmt: "del" exprlist pass_stmt: "pass" ?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt break_stmt: "break" continue_stmt: "continue" return_stmt: "return" [testlist] yield_stmt: yield_expr raise_stmt: "raise" [test ["," test ["," test]]] import_stmt: import_name | import_from import_name: "import" dotted_as_names import_from: "from" ("."* dotted_name | "."+) "import" ("*" | "(" import_as_names ")" | import_as_names) ?import_as_name: NAME ["as" NAME] ?dotted_as_name: dotted_name ["as" NAME] import_as_names: import_as_name ("," import_as_name)* [","] dotted_as_names: dotted_as_name ("," dotted_as_name)* dotted_name: NAME ("." NAME)* global_stmt: "global" NAME ("," NAME)* exec_stmt: "exec" expr ["in" test ["," test]] assert_stmt: "assert" test ["," test] ?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated if_stmt: "if" test ":" suite ("elif" test ":" suite)* ["else" ":" suite] while_stmt: "while" test ":" suite ["else" ":" suite] for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite] try_stmt: ("try" ":" suite ((except_clause ":" suite)+ ["else" ":" suite] ["finally" ":" suite] | "finally" ":" suite)) with_stmt: "with" with_item ("," with_item)* ":" suite with_item: test ["as" expr] // NB compile.c makes sure that the default except clause is last except_clause: "except" [test [("as" | ",") test]] suite: simple_stmt | _NEWLINE _INDENT _NEWLINE? stmt+ _DEDENT _NEWLINE? // Backward compatibility cruft to support: // [ x for x in lambda: True, lambda: False if x() ] // even while also allowing: // lambda x: 5 if x else 2 // (But not a mix of the two) testlist_safe: old_test [("," old_test)+ [","]] old_test: or_test | old_lambdef old_lambdef: "lambda" [paramlist] ":" old_test ?test: or_test ["if" or_test "else" test] | lambdef ?or_test: and_test ("or" and_test)* ?and_test: not_test ("and" not_test)* ?not_test: "not" not_test | comparison ?comparison: expr (comp_op expr)* comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not" ?expr: xor_expr ("|" xor_expr)* ?xor_expr: and_expr ("^" and_expr)* ?and_expr: shift_expr ("&" shift_expr)* ?shift_expr: arith_expr (("<<"|">>") arith_expr)* ?arith_expr: term (("+"|"-") term)* ?term: factor (("*"|"/"|"%"|"//") factor)* ?factor: ("+"|"-"|"~") factor | power ?power: molecule ["**" factor] // _trailer: "(" [arglist] ")" | "[" subscriptlist "]" | "." NAME ?molecule: molecule "(" [arglist] ")" -> func_call | molecule "[" [subscriptlist] "]" -> getitem | molecule "." NAME -> getattr | atom ?atom: "(" [yield_expr|testlist_comp] ")" -> tuple | "[" [listmaker] "]" | "{" [dictorsetmaker] "}" | "`" testlist1 "`" | "(" test ")" | NAME | number | string+ listmaker: test ( list_for | ("," test)* [","] ) ?testlist_comp: test ( comp_for | ("," test)+ [","] | ",") lambdef: "lambda" [paramlist] ":" test ?subscriptlist: subscript ("," subscript)* [","] subscript: "." "." "." | test | [test] ":" [test] [sliceop] sliceop: ":" [test] ?exprlist: expr ("," expr)* [","] ?testlist: test ("," test)* [","] dictorsetmaker: ( (test ":" test (comp_for | ("," test ":" test)* [","])) | (test (comp_for | ("," test)* [","])) ) classdef: "class" NAME ["(" [testlist] ")"] ":" suite arglist: (argument ",")* (argument [","] | star_args ["," kw_args] | kw_args) star_args: "*" test kw_args: "**" test // The reason that keywords are test nodes instead of NAME is that using NAME // results in an ambiguity. ast.c makes sure it's a NAME. argument: test [comp_for] | test "=" test list_iter: list_for | list_if list_for: "for" exprlist "in" testlist_safe [list_iter] list_if: "if" old_test [list_iter] comp_iter: comp_for | comp_if comp_for: "for" exprlist "in" or_test [comp_iter] comp_if: "if" old_test [comp_iter] testlist1: test ("," test)* yield_expr: "yield" [testlist] number: DEC_NUMBER | HEX_NUMBER | OCT_NUMBER | FLOAT | IMAG_NUMBER string: STRING | LONG_STRING // Tokens COMMENT: /#[^\n]*/ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ STRING : /[ubf]?r?("(?!"").*?(? FLOAT %import common.INT -> _INT %import common.CNAME -> NAME IMAG_NUMBER: (_INT | FLOAT) ("j"|"J") %ignore /[\t \f]+/ // WS %ignore /\\[\t \f]*\r?\n/ // LINE_CONT %ignore COMMENT %declare _INDENT _DEDENT lark-1.2.2/examples/advanced/python_parser.py000066400000000000000000000042511465673407200213170ustar00rootroot00000000000000""" Grammar-complete Python Parser ============================== A fully-working Python 2 & 3 parser (but not production ready yet!) This example demonstrates usage of the included Python grammars """ import sys import os, os.path from io import open import glob, time from lark import Lark from lark.indenter import PythonIndenter kwargs = dict(postlex=PythonIndenter(), start='file_input') # Official Python grammar by Lark python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], parser='lalr', **kwargs) # Local Python2 grammar python_parser2 = Lark.open('python2.lark', rel_to=__file__, parser='lalr', **kwargs) python_parser2_earley = Lark.open('python2.lark', rel_to=__file__, parser='earley', lexer='basic', **kwargs) try: xrange except NameError: chosen_parser = python_parser3 else: chosen_parser = python_parser2 def _read(fn, *args): kwargs = {'encoding': 'iso-8859-1'} with open(fn, *args, **kwargs) as f: return f.read() def _get_lib_path(): if os.name == 'nt': if 'PyPy' in sys.version: return os.path.join(sys.base_prefix, 'lib-python', sys.winver) else: return os.path.join(sys.base_prefix, 'Lib') else: return [x for x in sys.path if x.endswith('%s.%s' % sys.version_info[:2])][0] def test_python_lib(): path = _get_lib_path() start = time.time() files = glob.glob(path+'/*.py') total_kb = 0 for f in files: r = _read(os.path.join(path, f)) kb = len(r) / 1024 print( '%s -\t%.1f kb' % (f, kb)) chosen_parser.parse(r + '\n') total_kb += kb end = time.time() print( "test_python_lib (%d files, %.1f kb), time: %.2f secs"%(len(files), total_kb, end-start) ) def test_earley_equals_lalr(): path = _get_lib_path() files = glob.glob(path+'/*.py') for f in files: print( f ) tree1 = python_parser2.parse(_read(os.path.join(path, f)) + '\n') tree2 = python_parser2_earley.parse(_read(os.path.join(path, f)) + '\n') assert tree1 == tree2 if __name__ == '__main__': test_python_lib() # test_earley_equals_lalr() # python_parser3.parse(_read(sys.argv[1]) + '\n') lark-1.2.2/examples/advanced/qscintilla_json.py000066400000000000000000000137341465673407200216240ustar00rootroot00000000000000""" Syntax Highlighting =================== This example shows how to write a syntax-highlighted editor with Qt and Lark Requirements: PyQt5==5.15.8 QScintilla==2.13.4 """ import sys import textwrap from PyQt5.QtWidgets import QApplication from PyQt5.QtGui import QColor, QFont, QFontMetrics from PyQt5.Qsci import QsciScintilla from PyQt5.Qsci import QsciLexerCustom from lark import Lark class LexerJson(QsciLexerCustom): def __init__(self, parent=None): super().__init__(parent) self.create_parser() self.create_styles() def create_styles(self): deeppink = QColor(249, 38, 114) khaki = QColor(230, 219, 116) mediumpurple = QColor(174, 129, 255) mediumturquoise = QColor(81, 217, 205) yellowgreen = QColor(166, 226, 46) lightcyan = QColor(213, 248, 232) darkslategrey = QColor(39, 40, 34) styles = { 0: mediumturquoise, 1: mediumpurple, 2: yellowgreen, 3: deeppink, 4: khaki, 5: lightcyan } for style, color in styles.items(): self.setColor(color, style) self.setPaper(darkslategrey, style) self.setFont(self.parent().font(), style) self.token_styles = { "COLON": 5, "COMMA": 5, "LBRACE": 5, "LSQB": 5, "RBRACE": 5, "RSQB": 5, "FALSE": 0, "NULL": 0, "TRUE": 0, "STRING": 4, "NUMBER": 1, } def create_parser(self): grammar = ''' anons: ":" "{" "}" "," "[" "]" TRUE: "true" FALSE: "false" NULL: "NULL" %import common.ESCAPED_STRING -> STRING %import common.SIGNED_NUMBER -> NUMBER %import common.WS %ignore WS ''' self.lark = Lark(grammar, parser=None, lexer='basic') # All tokens: print([t.name for t in self.lark.parser.lexer.tokens]) def defaultPaper(self, style): return QColor(39, 40, 34) def language(self): return "Json" def description(self, style): return {v: k for k, v in self.token_styles.items()}.get(style, "") def styleText(self, start, end): self.startStyling(start) text = self.parent().text()[start:end] last_pos = 0 try: for token in self.lark.lex(text): ws_len = token.start_pos - last_pos if ws_len: self.setStyling(ws_len, 0) # whitespace token_len = len(bytearray(token, "utf-8")) self.setStyling( token_len, self.token_styles.get(token.type, 0)) last_pos = token.start_pos + token_len except Exception as e: print(e) class EditorAll(QsciScintilla): def __init__(self, parent=None): super().__init__(parent) # Set font defaults font = QFont() font.setFamily('Consolas') font.setFixedPitch(True) font.setPointSize(8) font.setBold(True) self.setFont(font) # Set margin defaults fontmetrics = QFontMetrics(font) self.setMarginsFont(font) self.setMarginWidth(0, fontmetrics.width("000") + 6) self.setMarginLineNumbers(0, True) self.setMarginsForegroundColor(QColor(128, 128, 128)) self.setMarginsBackgroundColor(QColor(39, 40, 34)) self.setMarginType(1, self.SymbolMargin) self.setMarginWidth(1, 12) # Set indentation defaults self.setIndentationsUseTabs(False) self.setIndentationWidth(4) self.setBackspaceUnindents(True) self.setIndentationGuides(True) # self.setFolding(QsciScintilla.CircledFoldStyle) # Set caret defaults self.setCaretForegroundColor(QColor(247, 247, 241)) self.setCaretWidth(2) # Set selection color defaults self.setSelectionBackgroundColor(QColor(61, 61, 52)) self.resetSelectionForegroundColor() # Set multiselection defaults self.SendScintilla(QsciScintilla.SCI_SETMULTIPLESELECTION, True) self.SendScintilla(QsciScintilla.SCI_SETMULTIPASTE, 1) self.SendScintilla( QsciScintilla.SCI_SETADDITIONALSELECTIONTYPING, True) lexer = LexerJson(self) self.setLexer(lexer) EXAMPLE_TEXT = textwrap.dedent("""\ { "_id": "5b05ffcbcf8e597939b3f5ca", "about": "Excepteur consequat commodo esse voluptate aute aliquip ad sint deserunt commodo eiusmod irure. Sint aliquip sit magna duis eu est culpa aliqua excepteur ut tempor nulla. Aliqua ex pariatur id labore sit. Quis sit ex aliqua veniam exercitation laboris anim adipisicing. Lorem nisi reprehenderit ullamco labore qui sit ut aliqua tempor consequat pariatur proident.", "address": "665 Malbone Street, Thornport, Louisiana, 243", "age": 23, "balance": "$3,216.91", "company": "BULLJUICE", "email": "elisekelley@bulljuice.com", "eyeColor": "brown", "gender": "female", "guid": "d3a6d865-0f64-4042-8a78-4f53de9b0707", "index": 0, "isActive": false, "isActive2": true, "latitude": -18.660714, "longitude": -85.378048, "name": "Elise Kelley", "phone": "+1 (808) 543-3966", "picture": "http://placehold.it/32x32", "registered": "2017-09-30T03:47:40 -02:00", "tags": [ "et", "nostrud", "in", "fugiat", "incididunt", "labore", "nostrud" ] }\ """) def main(): app = QApplication(sys.argv) ex = EditorAll() ex.setWindowTitle(__file__) ex.setText(EXAMPLE_TEXT) ex.resize(800, 600) ex.show() sys.exit(app.exec_()) if __name__ == "__main__": main() lark-1.2.2/examples/advanced/reconstruct_json.py000066400000000000000000000025651465673407200220340ustar00rootroot00000000000000""" Reconstruct a JSON ================== Demonstrates the experimental text-reconstruction feature The Reconstructor takes a parse tree (already filtered from punctuation, of course), and reconstructs it into correct text, that can be parsed correctly. It can be useful for creating "hooks" to alter data before handing it to other parsers. You can also use it to generate samples from scratch. """ import json from lark import Lark from lark.reconstruct import Reconstructor from _json_parser import json_grammar test_json = ''' { "empty_object" : {}, "empty_array" : [], "booleans" : { "YES" : true, "NO" : false }, "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ], "nothing" : null } ''' def test_earley(): json_parser = Lark(json_grammar, maybe_placeholders=False) tree = json_parser.parse(test_json) new_json = Reconstructor(json_parser).reconstruct(tree) print (new_json) print (json.loads(new_json) == json.loads(test_json)) def test_lalr(): json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False) tree = json_parser.parse(test_json) new_json = Reconstructor(json_parser).reconstruct(tree) print (new_json) print (json.loads(new_json) == json.loads(test_json)) test_earley() test_lalr() lark-1.2.2/examples/advanced/reconstruct_python.py000066400000000000000000000050701465673407200223760ustar00rootroot00000000000000""" Reconstruct Python ================== Demonstrates how Lark's experimental text-reconstruction feature can recreate functional Python code from its parse-tree, using just the correct grammar and a small formatter. """ from lark import Token, Lark from lark.reconstruct import Reconstructor from lark.indenter import PythonIndenter # Official Python grammar by Lark python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], parser='lalr', postlex=PythonIndenter(), start='file_input', maybe_placeholders=False # Necessary for reconstructor ) SPACE_AFTER = set(',+-*/~@<>="|:') SPACE_BEFORE = (SPACE_AFTER - set(',:')) | set('\'') def special(sym): return Token('SPECIAL', sym.name) def postproc(items): stack = ['\n'] actions = [] last_was_whitespace = True for item in items: if isinstance(item, Token) and item.type == 'SPECIAL': actions.append(item.value) else: if actions: assert actions[0] == '_NEWLINE' and '_NEWLINE' not in actions[1:], actions for a in actions[1:]: if a == '_INDENT': stack.append(stack[-1] + ' ' * 4) else: assert a == '_DEDENT' stack.pop() actions.clear() yield stack[-1] last_was_whitespace = True if not last_was_whitespace: if item[0] in SPACE_BEFORE: yield ' ' yield item last_was_whitespace = item[-1].isspace() if not last_was_whitespace: if item[-1] in SPACE_AFTER: yield ' ' last_was_whitespace = True yield "\n" class PythonReconstructor: def __init__(self, parser): self._recons = Reconstructor(parser, {'_NEWLINE': special, '_DEDENT': special, '_INDENT': special}) def reconstruct(self, tree): return self._recons.reconstruct(tree, postproc) def test(): python_reconstructor = PythonReconstructor(python_parser3) self_contents = open(__file__).read() tree = python_parser3.parse(self_contents+'\n') output = python_reconstructor.reconstruct(tree) tree_new = python_parser3.parse(output) print(tree.pretty()) print(tree_new.pretty()) # assert tree.pretty() == tree_new.pretty() assert tree == tree_new print(output) if __name__ == '__main__': test() lark-1.2.2/examples/advanced/template_lark.lark000066400000000000000000000027601465673407200215520ustar00rootroot00000000000000start: (_item | _NL)* _item: rule | token | statement _rule_or_token: RULE | TOKEN rule: RULE rule_params priority? ":" expansions{_rule_or_token} _NL token: TOKEN priority? ":" expansions{TOKEN} _NL rule_params: ["{" RULE ("," RULE)* "}"] priority: "." NUMBER statement: "%ignore" expansions{TOKEN} _NL -> ignore | "%import" import_path{_rule_or_token} ["->" _rule_or_token] _NL -> import | "%import" import_path{_rule_or_token} name_list{_rule_or_token} _NL -> multi_import | "%declare" TOKEN+ -> declare !import_path{name}: "."? name ("." name)* name_list{name}: "(" name ("," name)* ")" ?expansions{name}: alias{name} (_VBAR alias{name})* ?alias{name}: expansion{name} ["->" RULE] ?expansion{name}: expr{name}* ?expr{name}: atom{name} [OP | "~" NUMBER [".." NUMBER]] ?atom{name}: "(" expansions{name} ")" | "[" expansions{name} "]" -> maybe | value{name} ?value{name}: STRING ".." STRING -> literal_range | name | (REGEXP | STRING) -> literal | name "{" value{name} ("," value{name})* "}" -> template_usage _VBAR: _NL? "|" OP: /[+*]|[?](?![a-z])/ RULE: /!?[_?]?[a-z][_a-z0-9]*/ TOKEN: /_?[A-Z][_A-Z0-9]*/ STRING: _STRING "i"? REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/\n])*?\/[imslux]*/ _NL: /(\r?\n)+\s*/ %import common.ESCAPED_STRING -> _STRING %import common.INT -> NUMBER %import common.WS_INLINE COMMENT: /\s*/ "//" /[^\n]/* %ignore WS_INLINE %ignore COMMENT lark-1.2.2/examples/advanced/templates.py000066400000000000000000000010261465673407200204150ustar00rootroot00000000000000""" Templates ========= This example shows how to use Lark's templates to achieve cleaner grammars """ from lark import Lark grammar = r""" start: list | dict list: "[" _seperated{atom, ","} "]" dict: "{" _seperated{key_value, ","} "}" key_value: atom ":" atom _seperated{x, sep}: x (sep x)* // Define a sequence of 'x sep x sep x ...' atom: NUMBER | ESCAPED_STRING %import common (NUMBER, ESCAPED_STRING, WS) %ignore WS """ parser = Lark(grammar) print(parser.parse('[1, "a", 2]')) print(parser.parse('{"a": 2, "b": 6}')) lark-1.2.2/examples/advanced/tree_forest_transformer.py000066400000000000000000000023541465673407200233670ustar00rootroot00000000000000""" Transform a Forest ================== This example demonstrates how to subclass ``TreeForestTransformer`` to directly transform a SPPF. """ from lark import Lark from lark.parsers.earley_forest import TreeForestTransformer, handles_ambiguity, Discard class CustomTransformer(TreeForestTransformer): @handles_ambiguity def sentence(self, trees): return next(tree for tree in trees if tree.data == 'simple') def simple(self, children): children.append('.') return self.tree_class('simple', children) def adj(self, children): return Discard def __default_token__(self, token): return token.capitalize() grammar = """ sentence: noun verb noun -> simple | noun verb "like" noun -> comparative noun: adj? NOUN verb: VERB adj: ADJ NOUN: "flies" | "bananas" | "fruit" VERB: "like" | "flies" ADJ: "fruit" %import common.WS %ignore WS """ parser = Lark(grammar, start='sentence', ambiguity='forest') sentence = 'fruit flies like bananas' forest = parser.parse(sentence) tree = CustomTransformer(resolve_ambiguity=False).transform(forest) print(tree.pretty()) # Output: # # simple # noun Flies # verb Like # noun Bananas # . # lark-1.2.2/examples/calc.py000066400000000000000000000031311465673407200155530ustar00rootroot00000000000000""" Basic calculator ================ A simple example of a REPL calculator This example shows how to write a basic calculator with variables. """ from lark import Lark, Transformer, v_args try: input = raw_input # For Python2 compatibility except NameError: pass calc_grammar = """ ?start: sum | NAME "=" sum -> assign_var ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | NAME -> var | "(" sum ")" %import common.CNAME -> NAME %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE """ @v_args(inline=True) # Affects the signatures of the methods class CalculateTree(Transformer): from operator import add, sub, mul, truediv as div, neg number = float def __init__(self): self.vars = {} def assign_var(self, name, value): self.vars[name] = value return value def var(self, name): try: return self.vars[name] except KeyError: raise Exception("Variable not found: %s" % name) calc_parser = Lark(calc_grammar, parser='lalr', transformer=CalculateTree()) calc = calc_parser.parse def main(): while True: try: s = input('> ') except EOFError: break print(calc(s)) def test(): print(calc("a = 1+2")) print(calc("1+a*-3")) if __name__ == '__main__': # test() main() lark-1.2.2/examples/composition/000077500000000000000000000000001465673407200166445ustar00rootroot00000000000000lark-1.2.2/examples/composition/README.rst000066400000000000000000000007641465673407200203420ustar00rootroot00000000000000Grammar Composition =================== This example shows how to do grammar composition in Lark, by creating a new file format that allows both CSV and JSON to co-exist. We show how, by using namespaces, Lark grammars and their transformers can be fully reused - they don't need to care if their grammar is used directly, or being imported, or who is doing the importing. See `main.py`_ for more details. .. _main.py: https://github.com/lark-parser/lark/blob/master/examples/composition/main.py lark-1.2.2/examples/composition/combined_csv_and_json.txt000066400000000000000000000002301465673407200237060ustar00rootroot00000000000000{"header": ["this", "is", "json", 1111]} # file lines author data.json 12 Robin data.csv 30 erezsh compiler.py 123123 Megalng {"footer": "done"} lark-1.2.2/examples/composition/csv.lark000066400000000000000000000005661465673407200203210ustar00rootroot00000000000000start: header _NL row+ header: "#" " "? (WORD _SEPARATOR?)+ row: (_anything _SEPARATOR?)+ _NL _anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ _SEPARATOR: /[ ]+/ | "\t" | "," %import common.NEWLINE -> _NL %import common.WORD %import common.INT %import common.FLOAT %import common.SIGNED_FLOAT lark-1.2.2/examples/composition/eval_csv.py000066400000000000000000000011201465673407200210120ustar00rootroot00000000000000"Transformer for evaluating csv.lark" from lark import Transformer class CsvTreeToPandasDict(Transformer): INT = int FLOAT = float SIGNED_FLOAT = float WORD = str NON_SEPARATOR_STRING = str def row(self, children): return children def start(self, children): data = {} header = children[0].children for heading in header: data[heading] = [] for row in children[1:]: for i, element in enumerate(row): data[header[i]].append(element) return data lark-1.2.2/examples/composition/eval_json.py000066400000000000000000000006371465673407200212040ustar00rootroot00000000000000"Transformer for evaluating json.lark" from lark import Transformer, v_args class JsonTreeToJson(Transformer): @v_args(inline=True) def string(self, s): return s[1:-1].replace('\\"', '"') array = list pair = tuple object = dict number = v_args(inline=True)(float) null = lambda self, _: None true = lambda self, _: True false = lambda self, _: False lark-1.2.2/examples/composition/json.lark000066400000000000000000000006651465673407200204770ustar00rootroot00000000000000?start: value ?value: object | array | string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null array : "[" _WS? [value ("," _WS? value)*] "]" object : "{" _WS? [pair ("," _WS? pair)*] "}" pair : string ":" _WS value string : ESCAPED_STRING %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS -> _WS lark-1.2.2/examples/composition/main.py000066400000000000000000000034501465673407200201440ustar00rootroot00000000000000""" Grammar Composition =================== This example shows how to do grammar composition in Lark, by creating a new file format that allows both CSV and JSON to co-exist. 1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``, and allows them to be used one after the other. In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__), which creates an implicit namespace and allows them to coexist without collisions. 2) We merge their respective transformers (unaware of each other) into a new base transformer. The resulting transformer can evaluate both JSON and CSV in the parse tree. The methods of each transformer are renamed into their appropriate namespace, using the given prefix. This approach allows full re-use: the transformers don't need to care if their grammar is used directly, or being imported, or who is doing the importing. """ from pathlib import Path from lark import Lark from json import dumps from lark.visitors import Transformer, merge_transformers from eval_csv import CsvTreeToPandasDict from eval_json import JsonTreeToJson __dir__ = Path(__file__).parent class Storage(Transformer): def start(self, children): return children storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) parser = Lark.open("storage.lark", rel_to=__file__) def main(): json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) res = storage_transformer.transform(json_tree) print("Just JSON: ", res) csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read()) res = storage_transformer.transform(csv_json_tree) print("JSON + CSV: ", dumps(res, indent=2)) if __name__ == "__main__": main() lark-1.2.2/examples/composition/storage.lark000066400000000000000000000004571465673407200211710ustar00rootroot00000000000000start: (csv__start | json__start _NL?)+ // Renaming of the import variables is required, as they receive the namespace of this file. // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 %import .csv.start -> csv__start %import .json.start -> json__start %import .csv._NL -> _NL lark-1.2.2/examples/fruitflies.png000066400000000000000000002674271465673407200172050ustar00rootroot00000000000000PNG  IHDRX]g^bKGD IDATxw\בLAEq57Z)jeYiάLӴoj6ܚ#P@MȞ$q%p3><羯}羮UFB!B!MN B!B'B!BQH.B!B#JBQ&::j5ddd * sss*WJHJJBVBff&iiiAjjj実}}}}|```@ LMM ਄BWR !D)r]Ο?ύ7vׯs=bcb^-VNS~}pss QRSSyORR$%%@BByDy! (|4=`nnfffall &&&ajj}Rx !PɬBQrݾ}={p N"""x>ZMRROHIIyby ʕ+SR%,--Tڇ ˗ߛBdBQ\ruֱcNRFm莃{c]00,j5_#.tџժE7ۛ-[~EOXXaaaܹsp_#""׮RE%+'S~%66<# ̰jժTV +++lmmY&vvvQbER!J4)ԅ$HLLdڵX XUQ4jӞ[W2irry2 C aTR3)33nݺEXXoaaayUbcc6wYnQXJiii4z={zҴ*Jxyܻ‰];8} i)xԩSWDy!G ~]bEիG!^Eff,PmA<,,L3omm3899iUV&EL u!(oYOT06stG?sfF:vV΀5s&JG$##W}\vM; ԫWk[nQ6BPPAAAܸqC}bb"'777pwwE; BR !DqhXbS,5vBߠ}@on6?R2e ƍ%LJJ ϟ_[_~Z 4͍ ժUS:("##+W$''xoԨM6XBQPBΝ; <'O=zi3I$\ڵY4lPX)"""ԩSF˖-iѢT^]Bh營O… deeammM-h߾=^^^ԩSGB_H.Jh4|W̜9xJpMN?lRU$''sߏ/!!!kѲeK<==iҤ\K.J4Ο?Nѣ$%%aoo;wC2YB]!V0p [lawTdNghԈݻwu,::[eNc\\\&Ds .\Ț5kPT 6/KKK !.E0WoapgD\z` xVYl_5ǏgذaXXX(L/!!_~9s搑_~ѣeE!DQBQT6mɓ'4K)޸IDEg޼yJG)!!!L0陙p?tb'++ӹsgׯO`` =EèT*LMMiذ!͚5CRahhHfpqqJETT3ۊcTX֭[<7nJ3y;x ^^^T*<<<ظqc߿G}[oŻKϞ=3f yۿ?CRR8p ׏?N=PTiӆ۷rgINNf͚ݻWLѰ`&NHvhݺ5AAA/f~5k_(ZfĉܻwYo Ãnݺt4!D C߅ɷkR۹qćq|Ն+V0`(TgNIIw;vE1hРNrvލvJ7nyboo4 VVVPTK=ʲeXz5$$$hOz >e˖Y?,,Zjq ӧOƎNrf~zڴi]?==cffF\\Hlll?g{ *TMٳ'/ >>d ¤Ihڴ o}nݺ̘1Cz戋cذaѝ[/'0&W&'߳~|ǬQw$>;u#}׭i)^JeUP{4%-- .xJ%++ x4Ty֬Y8::R^=Mqww/OUBtuuvX 4icǎsΌ;>u[ 7nv4̙СCcǎ\rFÎ;1b<|Aaii ϟ׶sN.^Z[[i&RSS~ ǏArVfM -%/D|y~9>G|pSMjR"zzSŶFƼ1pHe/de[===n߾]m'E͛75kuԡYf̟?_; ˱gȐ!JF}%III4i҄ *0{l͛ή]hܸ1OlǥK8wsΥUV3=zrJΟ?Orر#4nܘu?ә?>׮]cڶs{xu~;;AR>mM-,y{,rW,g]*H* ߿zmKȧhEsgܸqtԉvZC ٳ aȑeUTaԩ 4Yfk_ e̛̙7ʕ+k={+VbŊ'q uFj/F7OL;\i>SN>ܹsi׮oh4n _:" -[z3rH,,,ر#@?/˝0ȽNĉLNx9w(_oTT)sիO"iŊ&-** kkVT\+W0x<~sIM^VVߧe˖y꒓~jsOPHs/W9ɔ3n0YZZt !D)%C߅YXXhHIL,(~fLU8ARC,YF)_ǬRѡ\r4k֌KҦM\\\Jl> ˳<4wF\53gN< 44'yttt^x}߿mY_V̳.L{$V$7nܠf͚JBRR !D!pz'Y * p?SirrHHEuOeefBJi;TA̝;pN>͈#xwرcG{$|yaԹx L”)Sسgvv2O?pVkk|-]ׯ_UV;!R +Ww_?3UVeĉy7nܘ|a8z([CVr ۔hO@C/^̌3h۶SP^=&O:HTTF 0]C077g$''ömۀG\Nذa֭ۛPϡCeѢEXXXh"mQ+K,N'*Ubմmۖڵ݉?`ڴiܼy WĐ:p'fෳl۶+WelBff&7n剟%z… |w,\XVX$￳o>N>B'z\ٳ/}vQlkkƿQF_ر+Wh'kӦ 5j֖9spI155ٙ8_NNNOw^,YR.Xx1ӦMcժU4mT8BJĸF!(cfΜǂ7Uo{кQ ̜9so߾4o_`~)_|ӧO/w:T:J b={V(ɛoIժUY|QG#5;N̙3LBRcBQ"##Q&ƳJ)n^ėzqTЖfVbԨQtЁիWk'c/nݚǏ BJJ 'Ndȑ4h@8/̙3 4sajjtbcc:t(wf=ZHBm\.Eښ=zו e͟&Ez> 7oޤN:L6M{x9VVVlݺ>,YKPf͚U"fΜ/ҳXlNNN\xÇK.(R !Dn,R,] ϜٳRlyxxp%Nʼyppp`͚5_5hЀ3gh" (^YYYYkRzuE|8|ժUS:%FTTK,aҥ[o1vXZht4!D$B@6mJ.o0t2?UKټGGǎSbEFFj 8{%r-99?7w^LMM'v/(PB%ܹK1`(wo,X>H8BFF[laݺu8pv튷7o&*TP:IKKcϞ=lܸݻwI۷/*Q!@ u!P͛۷/àS)WNGHE_W3gǏW:NǶmشiF__ӹsg:w(t7od߿Ck7*UR:BB[2`@59?RDHNoĮ̝;O>DHeBLL ۷og߾}:tE{077W:,11Ç ĄӥK~m)"B(ܹst>|=u+D˙Z IDATe eÆ JG* rdgW >\dZ6,ٙұ?8vN-Zܤ](*!!K.ϩS#22===7nL-hٲ%m۶!BL u!(.4 ӦMc9ʕêFMf݊^ɵz)'3uTJ1v?ӧOs)Μ9CTTvvvꊛnnn)XJw% K.@@@oFPremQ޲eK<<<(_ґH.JСC|J^{R(>MQI.E)99cǎqAKpp0* mݺիW_Kͺt0杻oP|oG/YL:wt4Q|2.]ի"+<<}}}ԩ#ԭ[;;;j֬IedEVwaaaBpp07n $$ ggghذ! B9)ԅ0=>i޽{#++ ==?0~bD)*US?055<7*'QJJ $%%@BBybbKll, y U q!(R !Da8q"s%;;YtqIN@?mdmee+W’30@O_}CC22 [&-9$ŒM(#'']==iݪ-[UV2PF!::|DR077ccc )_<PB066FGGSSS6tuu111ɷ}3332FC|||'''9@NNIIIjRRR$--t222HMM%##C[n}}}LMM=aii=aaa}^J'A!DB]! [FFnnnܼyS{tuuyXzu/''۷osu׎FDFMRR}zZ/_}}}R UVŶZ5lllpttɉڵkbQeff]NJJ"))<233rnNZZMbb='(ebb.FFF?$&&&ړ#,300(BB$BT Ɔ F]WWիsҥ !B2eaBBtq\\\8pg*m۶I.B!PBBΤIh߾=\tuѰaCqJO?iB!Dq"C߅_ 4~F}ڵkVٳ'7oV0B!(dB,̙ իytggg[j֬ʕ+J*B!3QBpU ?~Soeͭ[^~B!̑u!xj9s၁L8ё"]!B<u|7|Y!B!=B4 ˖-ՕDΜ9ĉHB!B u!x aaatЁѣGGq9B!(P2]!^К5k裏QgΜqJGB! !sDEEѽ{w}}HB!Fzԅ6ǫQ077ѣnZHB!u!Gtt4={ۛw}˗/K.B! !Ŀlٲ? *pAڷot$!BQHB#!!#GҫW/t•+WHB!ENzԅeذajvAݕ$B!(QBi9]ҢE ^*EB!P !ʬSN1x`ؼy3={T:B!ң.({Ҙ4imڴ^zH.B! QB)gϞeРADEExbFt$!B!u!DŴif͚\zUt!BQ,IԻr  "((s2fT*ұ\LL QQQǓAZZallXYYaeeU&FB!!!ܹ#Gpq(o`@m۪ԱL-*XYbeaNe3̌ˣ.zdIDCBJ*qD>'&>nFw$ɩiT(_-Ӷm;u놻҇/ʐ "## 22pbccy < 66|NNi?r_}}}#Ottt055\ri71'%%%ߌxDIJJBVLVVvܓ O=x\XZZbiiIJ%+Wlll~_B!MR !JFpqq_QX֭[u cafBG{<kѤ=ulREts7Bmb㰫YbDْNXXoDDDp='**X* +++maiiiI*U濋Nss͚6?O>rWDDAAAy \+V֖jժQjUlmmzXYYiřZܻwO5**J&<<86ڢN}zUhDB]Q7;;;~W5jt֭[/  s]zkFGttpk_<鏩cǎc̘1/_^hj޽Khh(׮]#00P._Ltt4hjժaoobŊ /Ӊ 44GHHv=j׮3NNN8;;cooBGPBl >___ƍ7|Szs<}DHHox3G++?L`şGr*Ug\t,QH>|ŋwooVZ%סCrA<==e˖2 Y(Çɓ'9wiiiTR-[ҡCtB:u*B u!DĉYl `ѢE(صkӆC֍T85hBHeZZZǎc߾}ݻ`LLLh׮[e˖4nܸT J,ĉ9rԩ]v]vrB]Q۷aÆ֭ґ Tjj*]v҅ 4vj)ЄG3g&+q1T$&&}v6nȑ#GHKKՕ.]ХK<==gDSo>G@@kx{{ocnntL!x)ԅWbb"ǏgٲeՋ%K^ج,z-N:)#k[gÎ~YKXņǎajjtR---ݻw~zCNN]tGt# QheǎݻFC.]۷/o !+)ԅ:t(,Y~[Hba_UGРvu{1qݏґJW`6nHJJ m۶o߾;rrQ&%$$m66lC044W^|'*O! qiiiL4 ///6mիWKm~zVZ-SE:me 'Sjh4CΝiذ!'N`ƌܻwO{KtQV1x`Gxx8gܹsѾ}{vANN1)ԅƙ3gpsscҥ,^͛7Srɹw˸{.F=/O5rR:"Uc\|3mqJ4F_>ݺuCR{n3f UK*xUTa\|S|yyXzBI.P\zz:&MUVԪU+W0bc)'ca\q}P:GڌcJܹsjՊӢE \/]vJ *N:iOlk׎ÇӼys'(äPB(˴hтŋ?w^lmmUΟ?Ϻw t\IֶJb|9w[n-F<`4o]]]Ο?/^Iff&'OT:߿ϦM9sQ˖-ŋѪU+GttфeBEj̙C&M022ߟ#F͛Ktj@(OG"ZPU߮^M] u?ɩSpwwlذcǎ^9>JԔ ҬY3T*4k  QTDEE=8&OLŊiݺu#aQTTXEg]*U k֬~J;w[ntЁ5jR{.GSNT*T*ڵCxzzҷo_]g_cƌJ.ݺu ؼyL͚5ٻwvFaL8vQR%{=l߬Y3Ə}VZĉw+\\\8p۶m^uc !)ԅEڵkhтoӧsqԩt";pML::Х7]jnߌΝ˅/EFF0m۶ѡC W^夥Ѷm["##|2gϞΎgrUéSl‚YfaddTѵ=ʴi3f }aT*2220`fffyϩ[.c?@ٿ?vb:t0x h۶- jȑ#:t___4 ޽[ pUmۻvחSNQ~}zܹs_w^ɂ 2e 3gd۶miӆ*kժKY9tuu8q"cƌ!44.=zŋl///֭[t$!D"0|7n./^dĉ+Wvm۶ &-ܔ\zXɾ:զUe6mT$SҌ3V&L ॶݷo{fСl۶ BJbҘ0a3kKKK>Ҟ۞J*c d,\}}}Qoq~HKKŃݻv5@,W'OѿK{r`llw}Gvv6 ,ȳ}D400~А{{{/^Ljܜ?6m 1cKy9,,,޽;ɯ~A033c͌3}v# !ʈX۷oӮ];ƏϤI8ydeʼn'hX&FJG)VT*MJG)ݝ:u0c n޼m_~O믿NNއ~Hݺu ыɡXZZy-wX… rJ Kv_;VVVcbbP lid9\]]]v /(* FAMtSy!DhXl 6Ç={iӦT;[3e 3>p?MŊ?0u&zMϐY;~~IOfqag,$蝨X>YE_ U`ǐ˽ϟGVxōYYYܺuӧSn]3gOl3ydTҥK\*T@W"OBB&L`ҤI;Ν;3vX>|m}||000`ܸqIҘ3gCÃ;r4 ;v`ĈC %...?^Ν;x"]tybxyyqe8]ͯ{999ܿ?7Љ͛7Ð!C^h=z4ڵQFJTTFbԨQhgggi& i\ΝYf=/]]]m9˹uμ `ggt!DPBXwaС;vq1}<=ˢ;w2m {j֐$fNzvm9q!80k{* !s3qŸȞuՊN}'Cg-epa]`bWiih''BBB/s<<<Ƞ}E͞=F]VJNʠA5k?6hh(3gd޼yT\YٳX+V<'NЭ[7UFppQƍ3'ӧ133COO/߼ڵq"~ 2rrryWYl} *Qbj *G[4aKھL&̙) +V`͚5w<7?sT  СVVV^?<^^^j5wvrkwzߑ2E1vXxgΎ_ƀŅiӦ1|sZZZA~dܹX[[o0n^](),fVT\RU=[yvKĖCyNj=k@x\vj_U nѢK.͛={y橊ijj`֮]KTT:ý^ժiwZx1^^^|^_nݻwgΜ9T *JVXA޽?~<.]`Ḻ{1uTm[okƔ)SBmPᠠ@UW^^ƎyͣHT~bϟ$&&IvCCC~G0aB{\|QQQIKK㥗^B[[ڰ<==ٱc}NVuSu>Osd0# @|=Off&~~~} u֭[|ǼkM0 O;UAA^^c 0ҡCuUEGGstP<){ϜX-M ש-Y_&M,LY69tظ/^ @)fG[/W/" ~,?iZ1cj́T]\\3gׯwߥwLt¡Co;v,[qUUH|2bddD́'|2Ǐ̌7RXXH@@ lSSSvf̘1rQ?#֭‚u֩T---:t駟d+++6oL޽iѢ=K[[B1cptt'OoInn.vbܹ@,Xӧ7opeIKK[[[qrrb׮]OϻKDD:t={W_~zڴiömV?paBCCY`իW裏xj wsssx9wyyy'|BPPСC^x{zG_-6m?̡CX~}iɡo߾w=Բ L85AL6R֯_ψ#ZܼyMMMw X]]yq 5K/;'#''qѥKG755???8|* AѺuk֮]PjUTTfСٱqFu@bb"$//* <E.CS(XիW3j(&;*~L%NNN䄋 NNN8::쌝2 RIs1} Ə'$ܳM}66mbJKKG>}^·/r9K,a֬YkN<3g0i$Ο?uСCL4 :$tAn1nGʤI 003g;${$Iff&.]BKK L CpqttܜG4uC74J%~eԨQ"IvvvdF!CXv}o 5eϞ=|7n:|P/Gʕ+9vOJJ0a uiu(oУGZhkDnݺ8OZ^ZJJ ϟիWTp#1'HLb K|Sݡ4zzz|g?~x<<<7oڵkʕ+YnCڵS{_߳m6Zj*ڴiChh(vb֭"IΉ ӹs>TIKK#33 RIOO;)''Kjjb!&&&XYYamc]X[[coo3hkkx|r|||ؼy3[~P_UTTIpp0AAA\xp455qwwSN닞-[(J.\Ȋ+խ\{a̘xuT?fmB.F>xC,7'N`ryƍȑ#E>Ic޽ܹcǎǰaØ4i}E&;DA;D]ETTw۷㉿@qqj[c}lM43k Cl06X#] u04Hc#]45512A[KSu]]-I+T+'_@^"E)R %#_XBfv!r2̑]HfVU555ifo ][₫+iӆ\&OywXp!4={V1۷W%~~~5&7VVVdeed2wƍxyWX4~k̛WVw8MBر"I={d 0OOOu(.**ÇsaN:$I 0c2l0{.B}&u)//^DEFRDKKG+G{3;Ydo96& Jn'咘\s|ng /lAWG닏;v|C)66Vs%J%Ւroo?bۇ|gL8zh֮]˂ 8E㇠H{wU3gΫ]VbA^^dggB0`>l.8qBߺu 3331bI"BC!uq$pΝ;ŋx]BQ.mZ6v -[{%I"!9 ._O$>)qلH%+ [StԉΝ;ӥK{ᮯ S%N"##mmmڷoJ{:G@@/fĉ|'XZZܹɓ&tۖ?,LUKXW~VbѢEIJ%.]رc;vLjѣUUӔΟ?Oii) :>}ЫWNAD.4>;N$#3 m-MZe#mީI$EJz>%rz'sJ"yhiiҡ}{퇯/zw=oɪ<(( .PRR}pu222ݻc+44cPRT^)VENfW;*P >\! ɓ' V6)//yKΝi׮zzzWhBJJJv.\ $$`bbbԤC<ntAGD.4|YYYI)y6cA=EN֭Z0t_?ܹs!$$"LLLڵ*)ٳgH~W*9Lԃ 0ԫSJs_?I$㣏V1m4_#T*y&/_&,,LH憗 VyBMrss"<<("##vH%ު)޴jժI;p~~'NanfHCX T %W"9t2Ct5Gf<7j4SLcǎUEߪz#""$ 777r___<==mQ"V^'AC {^_/T|{r 1s&˗/s#))IITTEEEڪ6m憫+͛7\aa!qqqܺuXUBIJJ *)#pAhD.o$qjFG[};̛gDS g£Sٹ/MNL>{.#˹t*) %++ CCC:vJʻw$\֮]gE.ӷK;ݕnhZl?<ǁ(%>cK,Aq %IӪ=224vVVV4o\W}uvv 5^ 999$''JFXSi1F: B`֭"t+^оm14QwxCR*%Dz}o? }}fΚ͜9sdŋjw.֩S'v튎U vb}` gϝܔxзK;:WTpm_α$eѦ _z ̞x BæP(uVǝ]NNj[===5kFfpppGGGpttKKK,--ҪKg6deeIff&HJJ III B)]|nhXF S"Qꗢ"֮]˧1:$?Z۩;41evv***֦s;;ڱU5899E P{D.|wm 3ٗcmiЄZVZV{qI2.]$צDN:ӧ9)n܌AT-f&Xacn:hkiaCqi%eeQ(!3<2 NaQe={gϞٳΫ $I"--ZqWyg򙗗Rxzzz`ll2L5:*z LLL6W0_^^^RYEEHDnn.jAvUוJSuWpgƝVVV6: H a0ul7HЛ u֝LbK/?k\.'""W];HJJ2ERPz`kg3vvvjՊmҶmFQI_r9T=U e2 :ߝ[Uo)?w6ܫ---UCÝ_F Tfd$AhD.OII ˗/' `5}z!ୡ:^զF^T4ͦ6AAA]D.GLL #G '..Ĕ;$A.\I`I,[1bCAAu-ŋ4ZoH:w"Wyn'G"00P!  ZuL:u F F7o'}XjMF_O{'3^yxw   ) u͛zn$z_@[K; /%-?lywquueҤIIAA. B Ϗє>-',1 .69]?7gŋGAA(&'ԉ  e.NN͌E;}UT(6[2ջ!AAFH֭Iߑ-Ww(BSSW$ vءpAAN4qBuV u8KS^Tߝƭ M °^<LF~A1GCCFiYѩx㍗`f\Q8|*>xc 9yElZ3+ C>Hȅ[XquD$]WUN+~mLxkn?o͸ 9%)'f;#hiKȅ[?vߎ]Η2ٜ:95ݞ~@4L|sTT(U$.1_CWx=)Xҳz6n e{:2f\W#S05c՛jmWgEs+⛍_/yAAA]w߯/L IDAT|zl7ho׫Ơ!c ٽ~2=wgWUHq_QQ$x5bE-'/_Tۘ+/*k\-I'!9W^’>} >hkNghhG뎑K[΅+G#o 1 Ѷ(  4~;BhZlɖߑɠy>3Y/nҵ$_6ښщ"q+) >_?m-3 h1]M.eq{($QXTJl?vCZYcÆ*yQ)oيHq\La޸:YrDŤh8ؙ_bJ.#'t{4MK[RV bE%]v:3}r̛ }=m稥>^%ds<8S7p3wF5oI5o޾+  jr],&ԉ]v1n8gh/uy_͍[FPY/Rƒ{e+עu~LLL    uFGGֆgz1N!:yQ)9DRj Kja+bMGqwwWwH  Pu) ƍ#Gx fNxLz b3>r֭[;$AAAkbPٳ-~%gM$;,AKyg΄9+tAA=Z?IINJ`̞[m0;pD[=իpqqQwX ۷p\~xcxzzұcGQ7@W 999䐛KAAP((,,$//BAQQP(TߗP\\Bx`]U%zzz뫒xSSS100\)cbb1FFFcnn,AH$Bʊ+曍hn+3naoD$IdM>X{{{u&CVVׯ_'""É"11R E-bccU2 ڴi*z٦MH,A4 W||<ʩ?N]ѵ9]ީ9Fz>H+]M$|,Aot."E)3dp BΝXooEEp455qwwWYzD$&e<7k<<: gY ~Sy,] e(77Ud**iӦMd\ E|UCTTj%^^^xr>>>XXX9zA8sNqh455ogqc[G\Ok]Spz"Mȅx΄Q(/Ɗz}2dj1%% .TKދsΪϯZU\\Lrr2nnnrx&OLPP0}L`ԬW041cSiI1~M_Ӣ~۫;F/77k׮qE#""I077 OOOT_7o$[NN1kUR`ooONTΝ;? 7"Q4N>ӧ9u8TTT`an#^xu=.Nh5HI'F%r9<דu={ѫWoz쉇2\BPPETs{>OPPf5wΝ̚5 3;f WƷYzR"-!>ϟbΟ?ϟٳg #11ggg|||Tooo5kV@fk׮ٓ]AP MCaa!/_g9oRQQ-lij5mZҲddRV]^(vR61YDǦNdL1iT`O]N͑?dUEUC;uDn:Yt)$Ѻuk6mO?HqIoʕ+?n"-B Jo]_~ĉzy>}4ϟwNN+++u+Vvvv=$$tttҥ ~~~___1C$uDDD¯_#&eefe2pq4ff45kK#l,0BOWk~WT(̖#'#2 O!1%$$琕]Ɂ6m>>={ƪᅅNff&Onn.r F.SZZJii)ry8ee՞hcxG!2 333126SSS166 ;;;U1񥥥JHH!!!"uִk׎'NԸ$ѣG6nX:gf-,lm=p$f4+gӭKg*--%,,PoNXXjhkk#?.]i&OOѹw ޺~wc\RԹr9Ͼ}`ȑ9}bbbA\޽{ٳg 2I&1p*2*B&ux"[la˖-AEE]ikk3uT֯_(blڴFhhhη~ t8]P6 'o[Ɓ0`éIII|l޼4zI=zH*,,f˖-:u KKKyWqqqQwx 4,"Q|zjh# NttCo_5}3 Vl/Nʍ"mĕoܸ{Ǯ]d֬YA%of֭lذy-[V6^-=^u&|"I+//7n}mmmtuuW$I#e%%uj2a';'"==YfɅ ذaqqq,_Q%饥;&///O!4iμİyf]Fv2e )))O@$B0e/^\-q7---<==9sfF&4DgϞ߃ iժ `ԩ,['OrMiJym weTVP\\M6Cظq#W^ej_Cĉd2LLLh߾=ݺuC&Gnh۶-zzzd2RSS{lssszQGWP)::vEdxyyQTTTmcǎѿd2;wf׮]OKKcΜ91ѣG3j(ΝKzzz=dd2 WiL&gϞݻcB\\\ؿ?}~~~:Dyy9K,!11QaE[[ &pelw?"p'1]h2J%#FPo2SNѳgO5D'4$Ν㯿Q읱} חW:fN|7L8QᐑNr93f`̛7+Vԫe8@@@@Bwww"##ez)9u%I֖:KBN:_͛!//333f̘_]m8\]]VEԩS;pBU?vQb155%;;5kFRR͚5ϱ=HII ޽A@NNNNQF xϩ:f͛EM Aj" M< wUajI0vʌ38p ^^^Ln݊H܂Ne-={`ccÌ38u}[^^}ѣJ#G@HHh͠A۷{iժUDJ%/"SLq]cƌƪ^jsLJJ -\ӧO'##XdME#%I˖-yܡCZhqXуrss:tC "B"u+((`ȑ|{߿ d2{Pӹym;$I©cl|-^y~3WLl5նElcv|ր`|}z*$믿2sLaҤIXZZҶmj $ҥK\ޞݻwSTTĘ1csik|RS9=Ga+^*7n`СӥKNRw8 7 4b111doo/;w{wd2ԲeKTM M3g$@Ii~l%;rZח9K_:)rH-ڶv\6_윛KfwÉPιd(}ihI굇=Wm>_۷IzUV˥Lrpp,Y@rww|uVIKKZj%J999$ITEVV/JWcM>]P=۷dcc#J H'mݺU];v?o> ,XP$IVVV}?KK˻gu{0o<ѣ%ICCCu/Zl)I$I%)ʇOUZj%KJR$Iz{^{XXH|cuwߕ$BPA?v=]FgΜٹ?Ǫ5d䌗 \߬gkQzaUd2f=zz{W߼y+WҾ}{8ʐ%2%"bQ%vbDUjTV[7UAkV]e$i3VDvd' I|<Σzs]:'|wDDRKٳy&_|r9SLaȐ!xxx/( c̙̟?333sαzjV^9Μ9C^Q7n~,u5qD𥿿ǍɩEPi(J7O?)'*`ܸq̛7?QFQzuISSS_VVVE JHHH`Ō3S>sis9s0h ~Giݺ!)Em6 FΝٺusgT DIJKKCMM -mb= 555 Hz"SGDaM[p#]_v⦣Y~}U!OP(W`ƌ\¬pBӦMZjpMfll\`he"jjjYF$Eb&MpY?-;;Ү]555;`NNKM?O9Y`{\t4vm;&RdM ÜEjrrUʟAL<7o?7o}w:T`pvv.Ņɓ'3yd>޸qcdrOz*K.-rիE|&MXvm}7f͚Bϵ|r,,,o l'&&;w<ɓ'i߾JNz`dggӣGeπW;__>lٲLOlhaaQ 6o<5kFƍUEQH#*H:uĩSdС$RF!JH7+/ܥup꿃֍$>Uk&6mN|ezr-(}ϐs;Q(4mڴX[s Fŋy&ӦM+LGeѢEZr=ܤIaɒ%DEE)/[vW_ <)ONN8p Sn]f̘~͛2e ƍcذaP-%%y:vHJJr{nՅu}ׯƍ{f;v,]TY,`=͛5O?G},8x cǎ}fd;w.lݺ'r||<}\`nnnˋ͛7+=Ο?σ1Ν,틁rwwtt,JWfϞ=2 QNӥKrss9v-ZXBMMMxz}.rLn[9w9XlM''8vFFF臘2w\OiРl۶-[O@߿?aaaxyyq1Xl&&&,[LYjjjҴiSVX]'UVetIexϞ=L6[nquj֬Iڵ \g.]8~8~iuaРA矬Y]vk.ؾ};ϼg|G\tYfpB,Y£GXzuƦͶi&9'N,g٨Q#ظq#x{{ӤI~WeO3Ξ=Lz<|{3011)cHHH`ժUTVkbll\ٳ+V`bR<x3e8B劚B(&wf888cǎB',9s&s,`_]YQ)}[NQزe 0Ν;J{xx0e>3-Z^ %};ٓ pBUG)Qׯ_g;wNQݻwgϞS'(bcc9rȑ#<~ݻ3x`QuD!D%(rss?~5kFʕUW2#;;7np UF˖-qtt֭[SR%UGB]Pe֭[Oٳ'6l/ɧ~Q AX&//mٶx.666l޴ƍ:Vst򑘘5ޞ͛cggG&MTud!J\tt4W^ʿ`hhHfh޼9͛7u4h@ՑB uz ӧ3}tƎ˂ PWWWu,!JB`Ŋ|$u律l7[L{L"wJQXXXҥK`ddDƍqиqcԩ#EP(w׮]ʕ+\vWJBBʂ6GKNKqW/кM~>nݺ:($ׯ(Рf͚½~XYYQV-,--k ""H߿Oxxxa *rccc_B1)ԅj憎aÆ$D9sصk::֓n{n?=ĸ8ww}GTMgb?"""RjժXZZRfMjԨ%jբzXZZbff*"QZrss%66(e!IDD< 22R9$@KK KKgzt?LMMUxEBQPo <v؁# Q.İi&VYCի^N4떭ѭ\zkffs38KxSN СC]veeCtt/ Wfiii/̨V昙annfffcll*:񴌌 HLL$>>X>|HLL ,=z_7Y1Ғ5j`aaQ!D1B]E1aҥKeݷK/P띆Ӵ9mŪ;X֩n1A0"nv 99X777Zj%_ %&&* G,bbbfe===tuu144rhiiU/;;TIKK#--DIOO'!!TxBBB?ߌVzꘙQjU,,,05]!^N uQ:233ټy3gر$D/gϞח 2"mVݒ-145 St*SI[JZZh萝IVf&99GR#bcÈpЬT ;:Ү];Y^TT* +$ }*U(.:::){}顭imxzriiidff\NN)))CVVrⴤ$TXȟǤy5e "Oo333{k?H.J^\\}!00-[ЫW/UGBΝ;rUß "&&222$#=-mmtuuB___꟱3&ׯ__zr###Cy8qrr2=sg9//$IEnn.w_&XT0FFFzԤJ*TRMM200@CC@O}}}tuuR RreeV ed]|7778p֪$B!eَ1]pttzI.B!E (VŅ=zp1UI!B!)Eeر9~[XB!BQnh:8RRR0`ǎc 0@ՑB!ܑB]۷oJRRgΜE$B!t}oLJmۢB!BPodڵtܙ:KZTI!B!5)kQ(L6>#GcTK!B!=.^Ǐ4hׯgȐ!$B!DFFݻwcǎ$B!ȂpssCWW___6lHB!BQuQ$v&Mp9)҅B!H.^HP0gϠA8x FFF%B!t}ϕ͛Y`cǎUu$!B!Pz~!۷UGB!B/_uuuVu$!B!xkuQ'XZZ'EB!B2)ԅҪUի={رc:B!BuP0fF?-[Qu,!B!x+\BB}޽>@ՑB!&[۸ɓ'iѢ# !B![O |5jߟ5k:BQadddI\\III( 044D]]LLLQ*N,(deeBNNwlaQ(lFOOZZZT\]]]ttt\2ZZZQW$B ڵk5jlذпB!xΝ;˗|2WB_8FFԬY;찱uXXXPr!DIѣGKBBIII$''T#11,RSS$--M՗ddd:FFFhiiahh|chh3033jժrEMQXS ӧOg; . !EƑ#G8z(OPԨYՊFѸZ#j¨: ̩Z}*ak@rF2yy R3SK#:!I<\ν( 5hHΝpvvG BC"""ÇEyLLLw +f]ο[OOmmmT@??y[ZZ^Jvv6撔D^^ deeh`HHHx"==9455Z*fffSZ5eoeeE5VZʻoR%RSS4hGaժU Skkklllޞ{ B"۵kC ё;v`hhHB!DɓYz j3ces)+LF-[X`cƌQu$!̊ggٿ?]I:ky_=8;;oÄK-Cpp0x{{Czz:իW^h׮]E,JPdd$/^T>|||HHHܜ;#͛7WUɣGӧAAAlݺUGB!ʬӷO_rsY>p UubqEFnMz6;vQՑx-7od߾}xyyCZZjբSN899DڵUST\t'Or Μ9Cjj*899B=022*HRW/_uuu|}qi޼9 ?.K nժU=O>+VHB!DuazfDLs8#ﵐ;wЧOUx2dsͬ_ 5jĠApwwNxLN8ݻٱc鸸0l0zY\xK^^0n8/_Ώ?ȴiTI!(.]D{nƂުYѿockNq[,88ٳggׯÆ Aфxeiii޽sI9r$cƌM-zy@߾}9w6mwު$Bi7B۾\a?O"kq=AHB̙39t.Ǐo߾eyk!^ݻwYf ˗/';;QF1aU:ۡ^Eɺ}6ڵڵk:uJt!&@8\j,L㈷HHHδmۖ8O`` C "]T(uaƌܽ{Sa֭˄ HNN~I^={mۢ?$By.]b53 ׺Q!T6aGuVΞ=8KOOgۓʱcW^oհR | w_eƍذwW:Ě5kҥ ;vLJ5k:BQ.|=a"-<#)ﲼ g.tl܁o&~~%ԩSٱ|r,X/;wVuז%[0zhBCCҥ }> &&HK^) Mƈ#?~<۷oGOOOձBrҥK8u=+3w23Ypt=huޟ´3J{~y|||JQz聚jjj899D-psscdff>/cƌQ7p@:T̩Ԯ]KMPxb[h߾=ׯ_/1 Ӻuk"o%<<&VEt?)˰T>fϞ={vB!ʻkRߢ>mQu%Jڌcn+\-ӚhjzZv]D#ʟ7ƌ?3fg,--)YѥөS'9wds|24hЀRKIMرc }iӦBڵ+W\'8qϟgĈ̚5 [[[\R`vڱdj׮ upqq)rJ*^}]x1'Of̙矘TcnݺBshjj2vXosШQ#8 _/Qq=CTI!(W ̪EuJqΣ=bn͊ױ:fE|B<*U*󋲩jժԫWӯ_?͋ڵk>|8+V?//k.*WL=hԨ׮]Sn?>ݻwlܸ1ׯ_/"W^'00SSB_FPP\|7 nx coo/_FWWor'c)7"wa9s9G__IJEff&~!\p:u2,:w-Z ++ .H.Bk׮KF迵oHTUE!999\pqQzu?^:c[=z4Jt={ҵkח/w)DEǠA6ls`4hĉ ȑ#|KKKf̘AXX{X[[ӠA?xDӫW/bcc_iӦԯ_]K6;vF 4 "R1;ws4mڔg>E!/燮.֖MJ\ct~:3_n\ U>Ώ{1q7:4zCsr9u4K?,7 %~.Q( rssɓ|gҳgO6lǏgڴiԫW~I CSK0蠥ERR&Mc„ t֍ &}Ν6'NTN2Μ9shѢ΄P(طo9VVV$$$0dLMM… /]?˗/gܸq|gvݷo_zgpAFIZZь9#FpAƍG:ucǎԪU9s($''3o޼rssٱcC Q\~=W^U9ֽ{w~wn߾]:Xn~~~۷H^F( ̙ 144Tu,!ܺwujSIv\9:Ey/6m۶h"썣B!ʛ8K\CPW6v̓6l@PygN:6EX*iT¡ARɚDτPvYe׋$ټy3&&&899N.]J+f={67o/Pn377gʔ) 2~saaa̜9cff~9V^igΜW^ԨQ7n?PV-&NHPP~~~>wވUVƌüy?>7n|,0i׮5kƍ| Pu{ۛ,Q]ړgΜ]2BB]=zD>}f߾}SՑB !-- M3:'INޓ䵓2)S]`^>NXJI݊:V~ݻؽ{7&L(l%!*U؞߭CC\\\hڴ)UV-= ._s﷚ԎzŲe˔wm6f͚s_>|,KS{abb###>|(nƅR!!!hтHHB!1Io PQ]H9mZSYsJzVsyUDP( H]p'j*SMlw-=뿇Ν;۷3gΜ +t~nnnhhh<;woy3۴i999,Y'ggg?'DyPZo~n*rϏ&MJ~!mbjjJ\j\McͦK'J w=yURRT?zB3jԨQfBw:T`{ɓ}::ubIݩ[.3fO?eL2q1l0222g~a @ǎIIIy޲e Ջc7.]D׮]IHH`=zUVǀ 7w\ٺu3'>>xiRSS loܸ1?37odѢEӓ\{'**_ѣG Um͚5ܾ}QF(''3e֬Y%'Bնm[ȯ^QE ϡimw/,^}r=8=&+'vJw;Q>ʕ+᯿b>n8,OOOƌrHN<|^OO???Ȑ!C8q"&MԔǏŒ3]gϞÇ:t(IIItؑYfquue޽L86oތ˖-S0sLXp!L:t # xa&ׯ_$0aƏl75b@߾}9w6mwު$BTxڽKs9/R}BM޵k=NyDҳgOuF~|}CCCqppm۶޽5Eٓ pBUGyԨQGҲ B!xqk撗|XXXPfM022"-+{ǏnS01P%-Z[[[MƘ1cVwf^ʕ+|RewDFF2sLU?11,] ̄c^V^͗_~;zzz$BKyyyyVcx2B@WWM4ETyl֜YGe )w^befgf#nX|yC=<MCC<>c-ZIȱ#kj,eJTR4=¡ĺ <999l߾YfJn6lhkk:o$227vZnݺ|ɓi޼lw}ܻw{qݿGDD$%&9T+++ԮCڵ&MPv"ߟ>UrԩلBINN8q'NCzz:uɉ;_̾XXXn: =~rr2w$%&_Tߴ/LHLK}UQϙg0(M j*9!dذaoRQ겲ؿ?ׯ/_ҤI7=|)o޼/AAA7!!!CVXֲIJխ,053#CM@Bl`TE甤?Hb| ?JaCEq/$%&`hh-vv{k444wɐ!CС۷oа4:!\#449z(TV:쌃666}wrÇg8s@/2Fl¬Y^%۴]q믿(BQPXX+ljjժiGGGi޼sϙ3S 4`Æ jժܹ'+)oYŜAlxyҨQ#UG|}}ٱcΝ;XZZ憻;NNN=^Ç9pۛLڶm; (KPOOOӓÇsDG`V*hO IDAT-Ӣ]3n]s*iߘ▗ǍE@.r*F=իt)B!ٳgÄO6mpvvٙf͚yU???ڵk&?'OFKKs=z^={q# ϣ|x l/5XqaWHBٕ+W8x 7olk߾ĤlT DKK GGGzG}eI(B=//___6nȶm[IM}{8viCgNط}ψ~%7''''>.xB!DE#e7Gqtt '''7o=в֭/5kP~tSղruۊߨR/DQɓ'9~8~k׎͛ꨢ& SNq Q(4mTVΝKgd (/_aC~.>UU܉[RSwqzz  cǎqƪ'B,|}}ѣ\t uuuڴiC׮]ҥ Z*UVbjsoMՑ^ˉk')ĤưdHB7npIN8ɓ'FSSZlIVhٲ%hjj:(cp?sWg;1}89DJe,ى 3c`,1e c_& җdMJЦ}_OBu*:/uI빯=xԘ6 BQPP[2 GGGyΝt9bƸ1#ێ`GSWlؔT8oM`}Z;fׯXJ^"&&ww!88gK2׮]+++5jDƍ | hР ~﮸8"## Ç<|PGFFRΚbooOӦMqppyUמaod2mY3d֏_gp/q^c̷ܿ~ܹsEJAA^UޣGtuufy&7ofdegѾq;6MO{WUNyytJ gq)CʐO0qZN%~]4 )}}} D>rrrHHHӧDGG"##DEEINNJJJӨQ#5jT ~ð͘/G͂/SW m^V}333<{ҼysE% #p'NԩSDDDCnٳ'X[DFFNП8u֪ Y=6F6h]L>^T ~ӻwo D߾}E'wA)))ԯ__~_WWmm7--->˷WPP@jj*)))*%''@ll,$$$ӧOIOO/64hsssLMM1773334h.Mn^~e> ZuXk9Z9Tdph6og}Aɓ'ǓT*-1D"AGGQQQAGG%%%}MMMjժk.6{Sҗ}'HII)q\SRRd$''SXXHjj*OFFd<##צnݺ```!FFFׯW\۷_ЭogVm_VYvpssc:3ArM?Ή'quԡ}׏A@&.ɓ'D>$6:(,,$%ه_]m]$ abj)fff888дiSD @fSSSIKK#99Y~_*KR)呙INN|&(ly󴴴JUVV.6_NN:uJT<A[[F1eK7mɓ}2_͟, r|.&LdݺuuA^[RR7Νرcbee+gϞR  TG*c֭Lk~E i۶-ӧOG899):DAA^Kg}}}ҥ ggwS*3wCgl߾1c(:Aۛsqקk׮׏0AAx3/.}e4iј~qI]u;.w_ïkwK- `;vFpٹAwԋɓ's!nE[z/PD*-`;En^ͣ/`!e3+)WPPGS[ޕ~|AA񂃃9|0GڵkJ>}ӧ&&&QAW5!!!l۶%Ԙ$@EE1_`rs+3j)Ꮂ2Wf`!=z*$Arݻwp ӣO>ѧO  'ruH\OC_BP^io]Ut( B(((חp!"##`ߟ.]ޯ  %g 8|0=F&'?bt<~ (:AqY<ѣGINNёq$ +Oz("pڜ1qDGh|4%+3#8"QѸԅ$kz4g}n˚]hԴ1FecЂ9Khץ >t /$, da/F]Cϝ˗W3AK[G_ B-Ҷm[Rt  T%J7m\܌QTLti>2.=B/ Ʀ}BEҥhhiv ̭pmS F~1a>qX$}[M7EcA>yɢK?i̖1ok+; FWO ˶} 㦏}Ŭ {-]+6lPaA_nn.gΜĉ:t1b 6Tt  T=%Kߣ04`lҧ72@IY z:>{v_I ksЭOg MDطhB=z*, - rrrYOl]U;~ܨ |ȧ{,w_+7ddfHdddCA(yyyxyyq;FZZڵc޼y 4sssE( BWbմ44Qvtו]OC*Nc5-H&=5c%t $ У=2.;·uk$%5B! 9Lƕ+W2e &&& 8G`?~ӧOI  eRbFȈ ~w2~;F~1#{OT 0i`}FMZ̚+7 Oeʷ+4xLZ UMDDc턄`ggԩSEY  oDnjjJltRi**oJty#gdvX} y}X5,^$*"% /))k.\1fǎtA PH;vHNv._}6 x6>$'omGW_xKLDsssg-7W$SOL5ڿ+G~  #=C>9wl84 -ubne^a'5 }~AWڵ ///TTTׯGwbsAAUkԭhޢ9^G)"Y?NGCKy`Мs&d|"#ۗKgz kHNLa߉xm$%$`\ď1glZ$ Yt q1l[ Zu$333uNW>gމӇTuիBAJWXXș3g9r$ >2Fbb"_$  ˳XE?.b_3LƁ̤fU^hr=fAA[~HVmسgO- .**={yf°gȑ=CCCE' BWry6Sփ7`ͼr=e-Ww Zq֦X6o57cȷ)++clnD02)%vZj):9pHARJJ 888իWYd 111xzzңG PtFHPPݺwXblnTS23f.ȑ*:$Aj|∋#55TJVVD"AGG ]~clܸ?%%%θqpvvVthB5T*%33<5#*qEFUUuuRWN]6u֥VZhhh,.A^^2]4 vqlּ4E' {NN|>?? HKKC&T*%== '%233KOMMXn``>zzzcll,PN2o yDȦM={cɆڦ"т0w0c ,X{.{c &MZ9ӸyKl[:aДj!@*%=ܾ[7OJbVV ߟ!Cо}r?ni 9<v,G9&&]va"##֭&L?DEP6yyy# 'Œp###LLL011m+J ׯ/ֹdRb'Op!**cI%VVVXZZbmmM!P|8bڵܻw'''MСC V?#~t$= fţ8{Q9%cllZn:nܸA6m6m )Xnn.ܺuK~w&`gg'q뺅wSNN!!!LPP봴4L8::ҲeK6m*/(DHaa!'Oc>UcK>ޟCʼS%%qi9-;8tdڗfڵ>|}}}FԩS133SP (L&c۶m̝7|)MJ׏>FvZXXNrd0w\^Yoƺu눏g|WTR233~:HRi֬4o[[[lmm155UtB5#O޽˭[s`kk+O[j{gA囨?޽{l޼Sۖ7t̲OK⟿.q_xZm>#ƏOǎ_hlu-ѣG%D/BUѣupMV靤N;а5CKWZy8qYooo_Nnn.$5ieeeE,pE?E*ɉ:₽;_)BPqzTʹsu(>zt5ܫNZRn)K3>\8}7QV-\]{0lpF}EY+WвeK&Mg}&۷o'ND-NKUH6ͣ͟;8zw^V8q"#F?U08лwow^%XlٲݻwʨQ>}h$5L&c:a_ͤV ,+,ojvݻwINNfРAL6v):/## .p ACCΝ;Ӿ}{\\\hݺ89,T+\~+W?Czz:Ջ={ҽ{wѬPr~^;ϮyP]C4}CX7&hT/"#&2"H$6ť ...o[[ H\\+7nɓ'>#Qr(՘T*eȑy c@+:Js>ΝqƊF?x{{-fpYbb"ӓ .F~߿?ν TU ={'Or1ر#C aРA(:DAUDE '"""##ILL$11HMM%''l23JH$u5cRUUE]] ի^==ի>&&&XZZbaaUӃرc7o&''?3fڠIgҤI7fxliw,?69uꔨzK23g/兊 }O>_~d sӧٿ?'N 77WWWLB>}PRRRt #Q^,==]p@@X];v0n8Y.be 77/^p vt֍c2`qM Dff&'N`ǎ={ 2eƌS'mAD^5wέ[kH>p m|N<￯p.]ʦM>/iӦM ֭[Ǯ]H$7y桧AxD&*Z}Æ 5 ܹ 1)|s(u&&>];ߗTʖ-[9s&ƍJ믿l2rss?>SL AL"Ą Ctt4jՊ-[鉷eF͞/R wاϊJ;w͛7oa̘10k֬j0R<}OOwmmmONHH'Of޼y4mڔSN):4A!"Qj׮7ׯ_uL>SSS+BCC S#.ӰhXz 3r,+V$''GT9̜9={bggG`` ˗/GGGc9<---7oN6mH$ѦM6mؗĜ9sեcǎ y+WdhDYYY;wzB"}ӧL:>3h MF\\\Μ9ðaÐH$H$FI``.]bH$:uđ#G:‚ӧO˷d2֮]ٳڵ+ >!C{boӦ 3g,T*eDFFoKCC`ׯ_~%Mw(}Lj5A1\ ?' ):*+-9:}6FpMll,nҙ|'\x3jԨrSf̘]+WЩS'999ԩSmmmJL`bbBTT&&&o۫Rn]<==4h̛7222;v,ӧOs_O?ƍhѢ2,q$%%1n8V\ʟʴ~&L3FKKK! PswM={6xyyƐ!ChҤ ˖-#))ՃvލH_AKN]JZb&&&Z?Ϗ .(}&OLƍ+!+,,>c̘1v޿?FFF?%bEK>Plļ{ܸqdzr2WTVZVV4jH~ɓ'H$m7-Zаar)/O-Zٳ0}tE#B &u.]Ǐ͛8::̮]WtP0$&-e aǒdgf2o TaW31cSƥ !Yit4fqkwbrdhրZjVcW%EIÇYd 5M6xxxțmݺkkkƎPJzz:[nݺ,]={p HII)񘤤$n߾VCL6}v_.KKKɉ{ņ Xp!ܻw)S.*wv~񉳎;b _ܹs__`` hjj}]]]T9QVez @~شi,ӦMlڴ QVVm۶ڵXü?Ç'$$_~E1h ys8׮];IfȐ!4oޜ͛7+:AjWI:tCd_zX]@BB:s D"· W-%">~Lpz TXzz|8~2ȶM zV3Ez$H̙3ebbb^=L&O WWWBBBݻBJҥK aĉmݝQFd/_.^hh(/@իl۶m۶8˗ׯ̈́A,$ kM$ ZZ%%|YZG휞]vK_uV.;wdΝ2~ekҤId H믿H$TmI(1\T2}ʕbK-/^|Ei?f}ccc166~e۷s]F],/bgg7))).ӧOi߾}***z\TTe.qFƌٽ{7K;O>-˙h+BBlM'[EիL&#RQ:>e+6ЪQznU̦Md5֧OW>gD2JJJiӆ͛7өS'6mZmtbۋfK:)Z|ٲeŶ'&&Jfffc9ee2ﯩɡC>+.]p~%׮]C*RleNKJJ*qMbley̭[ٳ't'z%IPPCJ$ksrrb̈́3gpppՕ&Ax&W4wmOYg0oc*Ya!Y6x5GFZZYfZ(|}}0a}Gv/9)9?ydO<(q)U߾};w.sԩSM47+{^`` Kc566.]fggǎ;Jm7sLׯK=ֆ 022bŶ;99Wj&U?} 6ˋ{"JqwwE*.70`C^&-˖-#1rAlmmF轻5X?$e;U 86ݤ$96-ZXZpxbszoΕX)~2-ZUI2Z5bܹpmbIѣQWWMQI,jQc͚5 ֭[WZӾ}{N _~.]0l0n޼ bѢE;wwwOΘ1c߼sΤ˷)*/O>)u).---<==9u򋼜իWsY~U .WĉL6oʕ+?():!nEk 5+rJy=x`kGQ3dܹ2aE"B %uwMl2ĚKGï{_D"a.O\zg,]({hش9vʎ%?вcW6C߁ds]W---6o̮]^k-ŗ_~ <%4iR޺u˰a5jnnn̚5 ===Ο?Ozz:-/]O2zhRSSܹ3?999?s܈GKKŋIMMe͚5DGG0|9r$\C0a._\y._D ;yp*nUקOlllXfCP9W*:7ҿغuCUjOx*ى^S$Bfڵ>|}}}FŔ)SJ,"Ell,ݺuAXXX`ff昙ann^bɡXX0iR\zWPUû?c|}}(vI޽;;w7c^ӧOر#.]?LfϞĉi֬ym5j~~~/ _Y9y$L2E! PD]<ϯɞ*d5CCCPRRw~~%]]]iذ!VVVsA>e_ghAմm*qdׯ3tPbccqsso}:‹ݽ{%Km۶ݻwI늎fĉlذA'_qwwGMM={$ T /77W&vvvL8qKB4x`9U$ jB&ɛYXZ3s&XVBǽk #믿իéXn/FWWE1bQBCC9x fRt(¿Yj'OVIs_&M… K4A P&{ǎ3g|MG+UICL&7'#%by9YY\I+ԭ[ٳgHN=z4[~wNZ$ULZow^ڶm+vvv!tA*H*Z=::~3gдi7Zɓ'%Һuk (+?+oOxȐufօdazERm{n_ &LAZeXXX0j(͹r XYY):c3֯᯿=+BLL 6mb$%%ѫW/ €^B,UQFFǏg>}---Ə_|kAxkԅ+$$۷eqssE%MOOА4h籶xR)'((GÐG&zhkhAMYukoytZֳYviA!9$gArFD'&qSrPRRuCZj:tu֨Tsiӹz*MJJ 4oޜ:BՕ?nΜ9f% m۶eٽ`Сt)fs]?֬`ٲe̜9SHIII>|OOOΟ?*ݺugϞ[[[E(Ç9s3gMΝ2d}QD.T//ZGP㬬*.T?~؛5 ,62'-*1O"~c::t[n &44TpM 166.l ;|L&VZzjFUr?#Fbʉ/fy=*K#ZJ!9r7XZZʓݻ0᭥qy8s hjjҭ[7}>C  ˈD]?~̦Mضmiiiv:VRRBIIݻw駟q?9xPKEƶnd{Mhfi]iIyY k_p)ѠA jm\{W{ǞU?s./!TM\~___|||Gll,8::ҢE qttR 5Ǐuo֭[ܺu0d2}8;;SNE,P^D.TosaժUeZZIIaÆcǎbIBrr2WfZ |ک+v^=Ék8q|swlZZ׮][><)ԩڊEEE1g\ލMsG] n̍Kݻ7?EƢ%LnѣGCfhҤ 6664i҄&M`ii)[ /UPP@xx8Lpp0w!99DP "| 5Hԅ+77###RRReeezŁPUUe|7>0+û@Z!QL0 VroE3G&ɯ-*eoժUKbϏ///,{X|U򋂂n^ݿGmYSѡ  ==;wpmIVTT4jԈ&M`kkKƍsssQYJDFFNxx8!!!\]]jeY,K-+54-wnMj88sG7zu3OQ&mK]jԮ3U(bqb(Bu)B}|ۧ{yr x#66abb}kXYYaoo_"y KKK,--.yG999dggEFF333_$''7ZGGGjԨ%B2!Ξ=K=171(H<2?W -^̨QڄN8Qb<i֬YmdgѣG?zsΑ5w6XUCcc 0210?|TEE(HM!#5DRIAVc`h/ҺukeA'DKM2SRRHMM-Zffv. 666XYYaaa `llXXXbXZZZ5zLSh4\c%;;DCDFFj,ɡRI^^撟M‹)VVV6hiJ:/%3]Nέ/)NEB!FuQ$&&2{lJ_tyaS Zmm{埣/N#űcذa W!BDuQ,X3##l{?){_Њuk0/{5pqtLUB!9D]T*,YbbdB^0&{Q.s B!De%T>LfV&=u(GM<]kc]"B!D#KaJe׮]wu}z=~_o-wYk;'.ЈΜP9bmT={CMIfʀt4r,,, ڴW.ckiɼcF|5>ɞ3Xt>]TޯҵIƘWX{ODMLh2W;`e^B!UJ%2"oR?-zB^ }ѵISѨN]9Gc;Щ5ҳYsp?SYc+C:ueoq;%w3W<0ٿFFM=t?%;OYwZ\J*B!ʤG]T*qoıs);37V7;v᝞}ĝl8WOա:66 Sѭ^nT$ h߰Nոn/ɗہ4q_|磯>Cکˑoبމ;mPT$&&\e !BQI.*lk>gG+~⋵> ;u)/혹)^ks Jwg{wo—p=!.^^l071 33Su!B!ʐ }Ce[@3m} q7ϥTZR_O&&) e~}6j)wg''2/[!BLuQԨYR[6P;#ǡRύϥҤd˧89WXK~5.y< ii`mm]e !BQI.*f͚q҃]dwz-LLq '/lm+,@hJ_XRocaxf`w/((q}FMpw`-L'?7n䋵kxKr:~9͛yB!BTuJgϞffz=+,`؂,۽)HS,|@RF:͍ټؔds9^n$]eg}EJV&6# DЦuNI}{/,׉Sߘ}g0gfe4bv.}YQk4\H=˴\!B!(4ww Q x{yѠ_PZS#bٯTOIztB!:QGS#H,}utl4 ?΀%IB!9D]T::۴VסWbcd4]"B!D$t e ;džC`z\q.ꪎ[II|a-'OǧB!F樋Jk,;VM[m]S18 29s&m۶}}}]&B!DeNuQiЫW/Μ8>C \Nۛ .%͛7uִjՊVZaccpB!$Q[NN;t &:k: G{?.(aÆDEE‘#G !<<FC:uhӦ iooo B$%%Ozz:(J044CCClllpttQ>7!FR,<LMK>Bm->V(ҒD]T~.^cM޺HHg$de}7os3228y6y?rJ+++7oM[n ERʙ3gt.rd$ /~$==LJM˓w,-- KK666cggB!D)$QUC^^ÆeӦM5uy=-|L^vعkuy닊>q٢+WuV8@pa23056j8^:լpf%`dh!E*d䒚M\J:I\I\Gv3SSZjIի7*xbbb#66dIHH %%E}rr2*%%`ؠP(FOO+++:oq}ʢㅅdggV6xLOO ޞիk׬Yggg\\\pvv.B9IEաh;w.ӧM#Ó/ĹZ5]U͟Xo}.aoԩSL@@6yo֬eRgeu5~W6m@إpY[ҬA6pW8yc$NGFs2"'R^[o'JELL QQQDEEq-bcc#&&x+ qppΎիk'888`oomѓKzz!#11DFJJ %?jzGGG{͚5quuN:* !ijD]T= ~ ]]^f˽0 =zs/773gh! 6oKEVٺu+K,f߾TSSo:7iw^܎X e0chѼ9yAICx\"""vڤ<::7nPXXj&w:99ꊓLx ILL֭[kpMCCCܴ{cݺuiР:#!$QURdܹ+m_ܨI]ͥKӳ\5\||M4QFҠAjժUahy&q9Μ9CDD* mޤIZn^BI.ݮ]… Yj9ٴo؈>-[㇉aQk4FG1͙;w.ƍux,>>'OHprriӦ=00]H;wd!(Vo7uHO|b 1bCOx:J׽ cP)G˗/si݋ViPB*Eu!Jƍq2CB064oC:7 /,-u#pe=ߡHHMţ^=1Ν;3e:ĉ5kVZXXȅ šCHLLڴiC@@ڵ+ޙݻwfk잺Zɓ?>0mX_M*lE*߮8` ?/_.s˱(v޽{9z(XXXвeKy饗hѢ, &^(Rɉ'&$$GEjhݺ5;wGxxx:T!( (lٲM6q Fzi酿{]:Lrt:##8q%QW/(aC^׏}ҰaC'ίZľ}v'NPXXXw& -Z<*ӦM?ޞŋӿ'///7f|UewGC/c۶T˅|ٹs';w$22kkk:vH۶miӦ 7@ס R8F+:vcǎtWWׇ9z(SNeڴiUblΝ;Mޏ=Jjj*k֭[?]vP(PtڕKRvNJ%77_3|P<˧D-֞됪}H!DE""==3gp;:kW)}-Lͨ@5 [bca%FFX`ld!*eʂ| H"-;RBJF: ך.4j܈Ff(Zc4jԈ+VРA2z*{ↇhSv>佋ԩT*,--Q*r 㣏>bԩVPXXH߾}9v$SGR(JLdK^aeU Z~jzj6mDvv6۷_~CJLL ;wdݻ333oйs* -$QT*nݺETT׮]͛$&&DRR")呖~'V*}0;{{wpWWW֭Kݺuqww/!}.]bС1c &MTILLرc=zGr)pppUVn{{{FYzzzxzz|rZlY9F5Yh>|Ceʛ׸ {%4?G,Z~۷o8p 5juxB}:͚5cŊԫWOa EEEDFFj{CBBR100@R1rHJ <F&/6ʍ뱼b>6O>DT:|׬Z F[oU%GQիZK믿ᅬCBI.(݅ :t(׮]#((QF^ȥӧ;v@R=<lll?>C ͛һ?S-~WsQ7op*0LŽ;cС2?Wo_s%vܹsuhB !ٳgӹsg~'\\\tVBllcP(h4tc##Ν׺ʋlT*Ou}FFz"<7Ag}V&ͽj|Կ_ݝ;wPx!ċ&C߅O&003gзo_^{5 @jj҉?sαe.\ĉ8p [ť-6oތF{F:ɘbgeBj]WG֭[B+._#}eƍ=BKTTiwIzi7nJ)]t!CwaddE~7P*'%%hoUϾ}w~(‚/Jŷ~[z'''m9Ō7o&&&,^ؘ:u -?>3l>{V3f̠wcǎOϞ=S!ʂ$B'feeҥKٹs'GLJm۶: !88janbPBAs/wu(/T~~>۷o1d}ǜ>}ݻwzA.]y;# j|M]&Mw}w_?ȑ#K$[ 1<<ƍx,--j $33>N༷^֭}CddVBg%⩽\x޽{ӻwo BVV*׎=BeVޞh9z:ރ'pnY?a?m;'2.ߊCVs"_J _ϖdF|2-Ũ 㗮=k\6NzdZ٨T*4 ڤ}߾}%W^o!((lffw/011Ȉ >CLO׮]yIKK{AAA3qDBfJs2b6mJΝ EѰeF iii :;;;|}}9uꔶܭ[rY^~[n\p{jT*~W {V$$$籒kL^J-xZ@pp0}3F^v ݻwرcEQH.(3իWgӦM_cDz~~Ry Q54筗 Z[ )8WFݢ#[>xeWk R_/N:5U[Pf`frgQ- zBJلh{/+#G]xhK?y$>>>\xu;x{㵋=I&1h ϸqCMOO'''{RZj1b}6n- U^^^tooo}]2ec6Y>wʛjժ=tjB5.(sŋxyyѱcGLB~~9J.+o:S[g2Tjeg}'r}K}!#22FSN>XK5kdt҅-[V$jK/Mw ֮]ܹsKOII!**PT#ׯ]?ѣG?4-ͫYYY7|p Ÿٳ-[|9ܼ|jy|pqƒx:9rVծw1+9򡸇+{5 :L&>m[[OX_Ih  ֭[c…%z v'x>C|||߂-u֌?@; GV駟Ҿ}{x Ξ= @>}pww>f|'eڽNw(=Ԯ];cEm ֶ$%b[^^>&&&I?ܜcjj^_|nnnIHH`ܸq7Cnş{/wX_MihSbll 4B4gΜ9SA!*7fbKU8[yVd(޳&F kG_bS8zcaWqfʹabdh[f;eT+j;;dki]Ѱ+ԭȤ7z>pea<|3`2-cٲeꫯ2g-[kgﱑ|GѼyCٳyq9233‚ڵkw$z-8wƆlؼy3ԫWkkk֬Y@ƍ8p QQQ_߿-ZDjXh6I500ߟ~ 6wkӦ XSnޗ!Jr`DDDڵkٶmK,aĉ>| &PTTDPPΝ#//pttWWW֮]ˆ ƍ̚5p233ٵk7nd4hЀիWӸqcm<wرcL8[TT9'N ##\\\X`!!!dggSTTĮ]O<ȦMصk?Ci[|9ӧOg:!DW?{ҭ[7v}H-P 9y9/`ܸq O>_wޘ߃̙3S`>(=zP~}]R"##2dǏ߿Cy+ď?PXd Ǐ?f֬YGQD]1|ONӦMYbPԮUM߁uNq\qqV,7|&MĐ!CXhlx饗8|prrr\;&y:*ER~֬Y͛!00ݻӣG Br%vɮ]ؘ}oХK9BT?dٲeߟKVm&N[/2^UUzoFztHT}vmݻIJJUw ])#kׯcggGnݴ+:L!x !*;v0j(X|9;vuHe?`СPͪaWP_ ΝˤItZi߷oDI`` ;w}888:T!㄄pۛW^yΝ;Ӯ]; uB<-IԅKRRcǎe͌5W^cǎ1h@srZx:gy6AJ_VO>I|04 ^^^H@@:\Qj._ӧIyXX޴mۖ@:v숓B2#bZ~=ƍښ_~RmΨ#ٸiiRa=|mE:,ӵIRHH'OsssiҤ[z2S)**"<<3ghΝ;Gvv6K/D6md!De&⊏gرW_all̖-[x?!=-{n+(,b,vB9s2bdʢ/H.\R___A.x ΢\tpùt/^+W~ !#H.V\qss_% @!\͛ǂhrtlU.g+r4?wppVVVXZZjW&YYYdff}Fff&$''MʓGGG\]]qvv6ԨQU:[!t$QBTN*u IDAT f̘A&MXbuXMLL >t+WV116½#6V8X[P 3L040؈B /($[OrF $ed};\%\iۮ=m۶m۶F!!!Ad&''T",>MRVcaa!affVVV6 ZZZ7ZRy߹jDj ɡlʢ@=ޞիk55wrrB}B!$B-,,!CΌ34iRX}<''pBCC|ȘnINΝ?sJL111+K+pus |}}Z׷%DHOO/\u>>;v3f0}tڶmիWx5kزe P!BT$ FuBQY\pCży5j Bzll,^^^( BCCqssaB!Z'=BQ6lȱc7nݻw'&&FoM^^J^{"G,B!QBaС$&&2w\ƎK]}}}>C ])B!ʗu !sɓYx1x]P{nvB!D9#BFFF?} !(=!eo%44Ej5 0CLuݼySNAXXÉ!9)˲f͚x{y具75b,)JHKK#==,R$;; J%R>׮<+NMLL055&֘bff5昚bee%bkk fff-!G]!H6lxߐ{3c MDGGGsN  vl,zzz8tٽ5a\jêVadb"rs$#5ҒIIvtEEӺukھݻw!DRRIxq"~yyygii} jV=i^[[[,,,044|}y|LHdD$Y-%X*(JiRnۅRJ7[ZMU$P"bK/ЙkX9 q̜33~ΝBh4Q\\w.mYYY5NCAAEEE766օvmڢVGVV122u !#.JEEݺuѣU ػw/={N>}c--[7.^xĴ.C+gp2 Q?q|1СC֭[RRRHJJ"11k׮322HMM%++K8###j5[G ,,,-??jEnPIzz:%%%z粲Z.7mڔ͛䄓vvvzKX !$ !ăܹsٱcNvB9-lll8{,zj*,]J99٫7=h*+tQ{9/\@[WW^3QFѴiZIԮt\B||< $$$H||<$%%8]`iӦҴiSu=vvvXYY)xe^ꍄHMM50JFF8###h֬8::ꂼ3...Q$A]!jC^^ᄆr PTzk7lؐ^zsNT*,\y~KQQ=Ҿչ^9o@QaC amV}*++#>>*ۥKt8::Rf͚aiiՈׯL\\IIIU~vFcccpqqdzBu!P•+WسgwfdeeaddDYY3g 037@̢ҒbHOJdD` gA-\~gKll,111?*++6(|W$&&VHWT899榛 jW!MB('OΎ;F /`dlr1Ds)fڴiL4I}SPvv6QQQ7Vѭe(Krssu˜?^ؤacc\ၧ' W/w$A]!?QFq!<{ƭ sCUiI1+e?Ҫ VcǎJ̙3?~\ƢhhҤ ...kݟ-[ BYYY(PnҥB5ԅ.Xv-*VMym<-yS|>'*]CGr~7HHHyxzzΝ;pBԾTW舧']vGtU4B(@B(I2k, i TV5d eĈ}СC}pufff_|޽{9r={$##C鲄uu!{c\_u?_GTR!.$$3i$;vc*++y爏'22Z׏~M6mjWYY/ѣn:7nqqqn%W_ɓ}cǒNPP]ק(ծ];Zn}gN:ѪU[~udgg3`ʔ.IQHPB,--K/ֹKOpl_8?~)9{na~~ŚoX9_͊)Ƚ3g:=ޖa tj{U/̟?oooxtCo6o<"## m۶]]333311 +2e 'Nߟ'uc066fҤII把/yWҥ O=OF/ ggg9r$666o^d˖-8q988~z :tm~"""ܹskg޷omߧu/ݭ7xC˯uE @&M_=vԩS1b~~~:u>yGo3044Ww}a\|oZ˖-پ};O/P!D# !oO0:8i4RlJ; \1~0V; 0mst7@nu>5ʊr>_3 KMZB?Kyy9QQQXYYh4dff2~x&OLǎDqܸqL4 7779y$.\ //777 x9p#Fk׮o 6kRZZ뉾_oΝĉ4iGj5*ʤq7AzϹ{U][~Ǐ' 8&NHqq1'NcǎiӆJ._LYYjgggN>Mbb]}ghl۶-\~JСC?-8qOOOfϞ2sL;4<!YB`$%%ۿ|rFɓꫯt!̚5/چlݺʅ tU`ooOvv6%%%l:t(?۶m# J.3EEE\~UuuomC&*o0a7aèM6\vM~7AmCVyx뭷y淜-)) '''KXXzk˕+Wpqqȑ#(]nXnB!BT*F5z^JU斖fO:nb?Q7N;VM315#88r{h48y$p~xhH?tz3GFFSN?z(T֤I `ps-]T$^ttt縻sAu߬Tu릷 񶂻9>L0Aĉg…dffRRRZ9{F̈́ Xb ,,/55ޣ@;L*'ܣ.5Z}]LIk 7s-Ss*y9Y,ZFU0߱cGݰ)S<4!tUڐ\K_~QPPP5***aaaM(**_x^rw﮷e˖z3,33J#VSŚԡC\\\1c}HPח9s(\ύ9 SO=? iӦ1m4!nnnn Vjk_C;˖-vBɓ'ӴiS.]Zk?ޞS"--+WT9f߾}Un}js߿?ѷo_Ȁ{7_iժ;wdՔ3}[O;]נ9sйsgݜ BԅsuuĄk%뛾KYQ^΀QcqnՆkV{ήu+iɓ>/ ?/)׮}u.<@ee_&]=FSN5zޚ k?~/-5{lvͷ~TE]XXX)SINN_p!ݺu7{\YY'|B^>|8'N`РAɑ]a}-Ξ2_wd\Yj|DZcزe 6663y1w\{{[h1'OFV]eߗ;w_Mnn.)))ӲeK#Fu㶄hك/&?? ~giݺ57fڵ^СCc׮]ٳggg.\5 .ԅRCCC:uĢEذt[[[իnM㏹tϟYfhB:{޽{3f~L?3K.eÆ lذR֭[Gg666 DEE3o<ϟOFFK,klV\Ɏ;8|0&MՕ,VXAdd$ḻ_na:u^zI7ٵkHMMew 9ٳٵkYYYٱl24iRoڴ۷h"RJpp0׿>}:cǎU!DrVf}BZ0k,igw*] W&<=K'K~ˇpR%{{{/_ίJaa!>>>ӇpK:tYXpҥ<:t.)++cZf͚)]ӿ:tիWٵk[n-'"C߅M4%~{? t`˖_.۱cAAAٳwwwFK/ҥ !dV^MHHOG߿^.Pܣ.)??.ިL̘de͔^o"!ĉG*8qe˖fݻ7焨G_X|9v‚C+ԋUuu!m111tڕ' 3_O cz)QDii) }_~;;;KBEzz:;v 44;vPPP@>} d(].Jزe 1y(f|}o _[o>JKK" !h&,,m۶ӰaCz2d4 !ju!PO?İa=EFNNFw}5d k}͗_~ɓ.N*,,$<<0BCCIHH???ֱc{^]qg9sOC,~s>'!ӧKT- QQQ`eeڵ hٲ% 4Pr!F?ܹs={sCll,YYYvkժU?VP.uMbb"O+hq}}{ֻ/NF}7H/z^%ƒ)j E"Q^^εk׸|2qqqz)((wwwƧvaoo!].uѣGٹs'-ڸw#;QXEE'%Q|xO?Kw!''G#yy ͚5VZLqttY&Dv z׮]ƍ5 mVʛ4i!&A]!꺓'Oŗ_aMLx¿=>G#Ó/^ bDm!u{G=.MԐ*-11RsmmmqttYf899HqppZZP+tINN$'))IwKUFth7H!8 BQ_rJ,]JlL j5³ǓQ[R\œDEDį_E˖5QFѢEZE ))) a $$$Yaa1ViӦj6m=j&MФI055U͊";;LIMM%--tHIIх niffFft#0u9899aoo_nB$A]!SNuV6 QǏмM[tU8jcKLk`IҒbđx2ϜhbR^^F;4ҵkWb-n+;;[ns$'']ZZU711хgsss]733 333LMMiܸ15HW^YYRXXHaa!QTTDVVdggxVVvvvj5aehBܙu!҈$::?Hup+KkLؘFFPVRBiI 瓓y rLM!51J 6?__u놯/͚5Sêt]PkXU}100 LMM171E MMM111ARaee666{3ss[Ngddt5 )))rQ^^Niinⴜ*++)))эXr^PP@aa!yyycoE{چ;5ܼOV?BHPBMee%W\!66nLZZyyySRRBqQFƘbddnرkgLnժ-b]877| ,WVVTTT /$k{o ў: 65TJov4!XXX`hhw.KKK FcjjҨQ#@! B!B!DnL,B!B$ !B!uu!B!1~R!B!BpN rlrIENDB`lark-1.2.2/examples/fruitflies.py000066400000000000000000000021371465673407200170320ustar00rootroot00000000000000""" Handling Ambiguity ================== A demonstration of ambiguity This example shows how to use get explicit ambiguity from Lark's Earley parser. """ import sys from lark import Lark, tree grammar = """ sentence: noun verb noun -> simple | noun verb "like" noun -> comparative noun: adj? NOUN verb: VERB adj: ADJ NOUN: "flies" | "bananas" | "fruit" VERB: "like" | "flies" ADJ: "fruit" %import common.WS %ignore WS """ parser = Lark(grammar, start='sentence', ambiguity='explicit') sentence = 'fruit flies like bananas' def make_png(filename): tree.pydot__tree_to_png( parser.parse(sentence), filename) def make_dot(filename): tree.pydot__tree_to_dot( parser.parse(sentence), filename) if __name__ == '__main__': print(parser.parse(sentence).pretty()) # make_png(sys.argv[1]) # make_dot(sys.argv[1]) # Output: # # _ambig # comparative # noun fruit # verb flies # noun bananas # simple # noun # fruit # flies # verb like # noun bananas # # (or view a nicer version at "./fruitflies.png") lark-1.2.2/examples/grammars/000077500000000000000000000000001465673407200161125ustar00rootroot00000000000000lark-1.2.2/examples/grammars/README.rst000066400000000000000000000005051465673407200176010ustar00rootroot00000000000000Example Grammars ================ This directory is a collection of lark grammars, taken from real world projects. - `Verilog`_ - Taken from https://github.com/circuitgraph/circuitgraph/blob/main/circuitgraph/parsing/verilog.lark .. _Verilog: https://github.com/lark-parser/lark/blob/master/examples/grammars/verilog.lark lark-1.2.2/examples/grammars/verilog.lark000066400000000000000000000050131465673407200204330ustar00rootroot00000000000000// Taken from https://github.com/circuitgraph/circuitgraph/blob/master/circuitgraph/parsing/verilog.lark // Following https://www.verilog.com/VerilogBNF.html // 1. Source Text start: description* ?description: module module: "module" name_of_module list_of_ports? ";" module_item* "endmodule" ?name_of_module: IDENTIFIER list_of_ports: "(" port ("," port)* ")" ?port: IDENTIFIER ?module_item: input_declaration | output_declaration | net_declaration | module_instantiation | continuous_assign // 2. Declarations input_declaration: "input" list_of_variables ";" output_declaration: "output" list_of_variables ";" net_declaration: "wire" list_of_variables ";" continuous_assign: "assign" list_of_assignments ";" list_of_variables: IDENTIFIER ("," IDENTIFIER)* list_of_assignments: assignment ("," assignment)* // 3. Primitive Instances // These are merged with module instantiations // 4. Module Instantiations module_instantiation: name_of_module module_instance ("," module_instance)* ";" module_instance: name_of_instance "(" list_of_module_connections ")" ?name_of_instance: IDENTIFIER list_of_module_connections: module_port_connection ("," module_port_connection)* | named_port_connection ("," named_port_connection)* module_port_connection: expression named_port_connection: "." IDENTIFIER "(" expression ")" // 5. Behavioral Statements assignment: lvalue "=" expression // 6. Specify Section // 7. Expressions ?lvalue: identifier expression: condition ?constant_value: constant_zero | constant_one | constant_x constant_zero: "1'b0" | "1'h0" constant_one: "1'b1" | "1'h1" constant_x: "1'bx" | "1'hx" ?condition : or | ternary ?ternary: or "?" or ":" or ?or : xor | or_gate ?or_gate: or "|" xor ?xor : and | xor_gate | xnor_gate ?xor_gate: xor "^" and ?xnor_gate: xor "~^" and | xor "^~" and ?and : unary | and_gate ?and_gate: and "&" unary ?unary : primary | not_gate not_gate: ( "!" | "~" ) primary ?primary : IDENTIFIER | constant_value | "(" or ")" // 8. General ?identifier: IDENTIFIER IDENTIFIER: CNAME | ESCAPED_IDENTIFIER // Lark ESCAPED_IDENTIFIER: /\\([^\s]+)/ COMMENT: "//" /[^\n]*/ NEWLINE NEWLINE: "\n" MULTILINE_COMMENT: /\/\*(\*(?!\/)|[^*])*\*\// %import common.CNAME %import common.ESCAPED_STRING %import common.WS %ignore WS %ignore COMMENT %ignore MULTILINE_COMMENT %ignore NEWLINE lark-1.2.2/examples/indented_tree.py000066400000000000000000000020571465673407200174700ustar00rootroot00000000000000""" Parsing Indentation =================== A demonstration of parsing indentation (“whitespace significant” language) and the usage of the Indenter class. Since indentation is context-sensitive, a postlex stage is introduced to manufacture INDENT/DEDENT tokens. It is crucial for the indenter that the NL_type matches the spaces (and tabs) after the newline. """ from lark import Lark from lark.indenter import Indenter tree_grammar = r""" ?start: _NL* tree tree: NAME _NL [_INDENT tree+ _DEDENT] %import common.CNAME -> NAME %import common.WS_INLINE %declare _INDENT _DEDENT %ignore WS_INLINE _NL: /(\r?\n[\t ]*)+/ """ class TreeIndenter(Indenter): NL_type = '_NL' OPEN_PAREN_types = [] CLOSE_PAREN_types = [] INDENT_type = '_INDENT' DEDENT_type = '_DEDENT' tab_len = 8 parser = Lark(tree_grammar, parser='lalr', postlex=TreeIndenter()) test_tree = """ a b c d e f g """ def test(): print(parser.parse(test_tree).pretty()) if __name__ == '__main__': test() lark-1.2.2/examples/json_parser.py000066400000000000000000000047341465673407200172100ustar00rootroot00000000000000""" Simple JSON Parser ================== The code is short and clear, and outperforms every other parser (that's written in Python). For an explanation, check out the JSON parser tutorial at /docs/json_tutorial.md """ import sys from lark import Lark, Transformer, v_args json_grammar = r""" ?start: value ?value: object | array | string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null array : "[" [value ("," value)*] "]" object : "{" [pair ("," pair)*] "}" pair : string ":" value string : ESCAPED_STRING %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS """ class TreeToJson(Transformer): @v_args(inline=True) def string(self, s): return s[1:-1].replace('\\"', '"') array = list pair = tuple object = dict number = v_args(inline=True)(float) null = lambda self, _: None true = lambda self, _: True false = lambda self, _: False ### Create the JSON parser with Lark, using the Earley algorithm # json_parser = Lark(json_grammar, parser='earley', lexer='basic') # def parse(x): # return TreeToJson().transform(json_parser.parse(x)) ### Create the JSON parser with Lark, using the LALR algorithm json_parser = Lark(json_grammar, parser='lalr', # Using the basic lexer isn't required, and isn't usually recommended. # But, it's good enough for JSON, and it's slightly faster. lexer='basic', # Disabling propagate_positions and placeholders slightly improves speed propagate_positions=False, maybe_placeholders=False, # Using an internal transformer is faster and more memory efficient transformer=TreeToJson()) parse = json_parser.parse def test(): test_json = ''' { "empty_object" : {}, "empty_array" : [], "booleans" : { "YES" : true, "NO" : false }, "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ], "nothing" : null } ''' j = parse(test_json) print(j) import json assert j == json.loads(test_json) if __name__ == '__main__': # test() with open(sys.argv[1]) as f: print(parse(f.read())) lark-1.2.2/examples/lark_grammar.py000066400000000000000000000020131465673407200173060ustar00rootroot00000000000000""" Lark Grammar ============ A reference implementation of the Lark grammar (using LALR(1)) """ import lark from pathlib import Path examples_path = Path(__file__).parent lark_path = Path(lark.__file__).parent parser = lark.Lark.open(lark_path / 'grammars/lark.lark', rel_to=__file__, parser="lalr") grammar_files = [ examples_path / 'advanced/python2.lark', examples_path / 'relative-imports/multiples.lark', examples_path / 'relative-imports/multiple2.lark', examples_path / 'relative-imports/multiple3.lark', examples_path / 'tests/no_newline_at_end.lark', examples_path / 'tests/negative_priority.lark', examples_path / 'standalone/json.lark', lark_path / 'grammars/common.lark', lark_path / 'grammars/lark.lark', lark_path / 'grammars/unicode.lark', lark_path / 'grammars/python.lark', ] def test(): for grammar_file in grammar_files: tree = parser.parse(open(grammar_file).read()) print("All grammars parsed successfully") if __name__ == '__main__': test() lark-1.2.2/examples/relative-imports/000077500000000000000000000000001465673407200176075ustar00rootroot00000000000000lark-1.2.2/examples/relative-imports/multiple2.lark000066400000000000000000000000301465673407200223700ustar00rootroot00000000000000start: ("0" | "1")* "0" lark-1.2.2/examples/relative-imports/multiple3.lark000066400000000000000000000001621465673407200223770ustar00rootroot00000000000000start: mod0mod0+ mod0mod0: "0" | "1" mod1mod0 mod1mod0: "1" | "0" mod2mod1 mod1mod0 mod2mod1: "0" | "1" mod2mod1 lark-1.2.2/examples/relative-imports/multiples.lark000066400000000000000000000001711465673407200224770ustar00rootroot00000000000000start: "2:" multiple2 | "3:" multiple3 %import .multiple2.start -> multiple2 %import .multiple3.start -> multiple3 lark-1.2.2/examples/relative-imports/multiples.py000066400000000000000000000013161465673407200222000ustar00rootroot00000000000000# # This example demonstrates relative imports with rule rewrite # see multiples.lark # # # if b is a number written in binary, and m is either 2 or 3, # the grammar aims to recognise m:b iif b is a multiple of m # # for example, 3:1001 is recognised # because 9 (0b1001) is a multiple of 3 # from lark import Lark, UnexpectedInput parser = Lark.open('multiples.lark', rel_to=__file__, parser='lalr') def is_in_grammar(data): try: parser.parse(data) except UnexpectedInput: return False return True for n_dec in range(100): n_bin = bin(n_dec)[2:] assert is_in_grammar('2:{}'.format(n_bin)) == (n_dec % 2 == 0) assert is_in_grammar('3:{}'.format(n_bin)) == (n_dec % 3 == 0) lark-1.2.2/examples/standalone/000077500000000000000000000000001465673407200164315ustar00rootroot00000000000000lark-1.2.2/examples/standalone/README.rst000066400000000000000000000004621465673407200201220ustar00rootroot00000000000000Standalone example ================== To initialize, cd to this folder, and run: .. code-block:: bash ./create_standalone.sh Or: .. code-block:: bash python -m lark.tools.standalone json.lark > json_parser.py Then run using: .. code-block:: bash python json_parser_main.py lark-1.2.2/examples/standalone/create_standalone.sh000077500000000000000000000001261465673407200224420ustar00rootroot00000000000000#!/bin/sh PYTHONPATH=../.. python -m lark.tools.standalone json.lark > json_parser.py lark-1.2.2/examples/standalone/json.lark000066400000000000000000000006561465673407200202640ustar00rootroot00000000000000?start: value ?value: object | array | string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null array : "[" [value ("," value)*] "]" object : "{" [pair ("," pair)*] "}" pair : string ":" value string : ESCAPED_STRING %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS lark-1.2.2/examples/standalone/json_parser_main.py000066400000000000000000000013771465673407200223440ustar00rootroot00000000000000""" Standalone Parser =================================== This example demonstrates how to generate and use the standalone parser, using the JSON example. See README.md for more details. """ import sys from json_parser import Lark_StandAlone, Transformer, v_args inline_args = v_args(inline=True) class TreeToJson(Transformer): @inline_args def string(self, s): return s[1:-1].replace('\\"', '"') array = list pair = tuple object = dict number = inline_args(float) null = lambda self, _: None true = lambda self, _: True false = lambda self, _: False parser = Lark_StandAlone(transformer=TreeToJson()) if __name__ == '__main__': with open(sys.argv[1]) as f: print(parser.parse(f.read())) lark-1.2.2/examples/tests/000077500000000000000000000000001465673407200154435ustar00rootroot00000000000000lark-1.2.2/examples/tests/negative_priority.lark000066400000000000000000000000231465673407200220540ustar00rootroot00000000000000start: r r.-1: "a" lark-1.2.2/examples/tests/no_newline_at_end.lark000066400000000000000000000000131465673407200217570ustar00rootroot00000000000000start: "a" lark-1.2.2/examples/turtle_dsl.py000066400000000000000000000036251465673407200170420ustar00rootroot00000000000000""" Turtle DSL ========== Implements a LOGO-like toy language for Python’s turtle, with interpreter. """ try: input = raw_input # For Python2 compatibility except NameError: pass import turtle from lark import Lark turtle_grammar = """ start: instruction+ instruction: MOVEMENT NUMBER -> movement | "c" COLOR [COLOR] -> change_color | "fill" code_block -> fill | "repeat" NUMBER code_block -> repeat code_block: "{" instruction+ "}" MOVEMENT: "f"|"b"|"l"|"r" COLOR: LETTER+ %import common.LETTER %import common.INT -> NUMBER %import common.WS %ignore WS """ parser = Lark(turtle_grammar) def run_instruction(t): if t.data == 'change_color': turtle.color(*t.children) # We just pass the color names as-is elif t.data == 'movement': name, number = t.children { 'f': turtle.fd, 'b': turtle.bk, 'l': turtle.lt, 'r': turtle.rt, }[name](int(number)) elif t.data == 'repeat': count, block = t.children for i in range(int(count)): run_instruction(block) elif t.data == 'fill': turtle.begin_fill() run_instruction(t.children[0]) turtle.end_fill() elif t.data == 'code_block': for cmd in t.children: run_instruction(cmd) else: raise SyntaxError('Unknown instruction: %s' % t.data) def run_turtle(program): parse_tree = parser.parse(program) for inst in parse_tree.children: run_instruction(inst) def main(): while True: code = input('> ') try: run_turtle(code) except Exception as e: print(e) def test(): text = """ c red yellow fill { repeat 36 { f200 l170 }} """ run_turtle(text) if __name__ == '__main__': # test() main() lark-1.2.2/lark/000077500000000000000000000000001465673407200134145ustar00rootroot00000000000000lark-1.2.2/lark/__init__.py000066400000000000000000000013501465673407200155240ustar00rootroot00000000000000from .exceptions import ( GrammarError, LarkError, LexError, ParseError, UnexpectedCharacters, UnexpectedEOF, UnexpectedInput, UnexpectedToken, ) from .lark import Lark from .lexer import Token from .tree import ParseTree, Tree from .utils import logger from .visitors import Discard, Transformer, Transformer_NonRecursive, Visitor, v_args __version__: str = "1.2.2" __all__ = ( "GrammarError", "LarkError", "LexError", "ParseError", "UnexpectedCharacters", "UnexpectedEOF", "UnexpectedInput", "UnexpectedToken", "Lark", "Token", "ParseTree", "Tree", "logger", "Discard", "Transformer", "Transformer_NonRecursive", "Visitor", "v_args", ) lark-1.2.2/lark/__pyinstaller/000077500000000000000000000000001465673407200162605ustar00rootroot00000000000000lark-1.2.2/lark/__pyinstaller/__init__.py000066400000000000000000000002661465673407200203750ustar00rootroot00000000000000# For usage of lark with PyInstaller. See https://pyinstaller-sample-hook.readthedocs.io/en/latest/index.html import os def get_hook_dirs(): return [os.path.dirname(__file__)] lark-1.2.2/lark/__pyinstaller/hook-lark.py000066400000000000000000000011271465673407200205220ustar00rootroot00000000000000#----------------------------------------------------------------------------- # Copyright (c) 2017-2020, PyInstaller Development Team. # # Distributed under the terms of the GNU General Public License (version 2 # or later) with exception for distributing the bootloader. # # The full license is in the file COPYING.txt, distributed with this software. # # SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception) #----------------------------------------------------------------------------- from PyInstaller.utils.hooks import collect_data_files datas = collect_data_files('lark') lark-1.2.2/lark/ast_utils.py000066400000000000000000000041051465673407200157750ustar00rootroot00000000000000""" Module of utilities for transforming a lark.Tree into a custom Abstract Syntax Tree (AST defined in classes) """ import inspect, re import types from typing import Optional, Callable from lark import Transformer, v_args class Ast: """Abstract class Subclasses will be collected by `create_transformer()` """ pass class AsList: """Abstract class Subclasses will be instantiated with the parse results as a single list, instead of as arguments. """ class WithMeta: """Abstract class Subclasses will be instantiated with the Meta instance of the tree. (see ``v_args`` for more detail) """ pass def camel_to_snake(name): return re.sub(r'(? Transformer: """Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST. For each class, we create a corresponding rule in the transformer, with a matching name. CamelCase names will be converted into snake_case. Example: "CodeBlock" -> "code_block". Classes starting with an underscore (`_`) will be skipped. Parameters: ast_module: A Python module containing all the subclasses of ``ast_utils.Ast`` transformer (Optional[Transformer]): An initial transformer. Its attributes may be overwritten. decorator_factory (Callable): An optional callable accepting two booleans, inline, and meta, and returning a decorator for the methods of ``transformer``. (default: ``v_args``). """ t = transformer or Transformer() for name, obj in inspect.getmembers(ast_module): if not name.startswith('_') and inspect.isclass(obj): if issubclass(obj, Ast): wrapper = decorator_factory(inline=not issubclass(obj, AsList), meta=issubclass(obj, WithMeta)) obj = wrapper(obj).__get__(t) setattr(t, camel_to_snake(name), obj) return t lark-1.2.2/lark/common.py000066400000000000000000000057001465673407200152600ustar00rootroot00000000000000from copy import deepcopy import sys from types import ModuleType from typing import Callable, Collection, Dict, Optional, TYPE_CHECKING, List if TYPE_CHECKING: from .lark import PostLex from .lexer import Lexer from .grammar import Rule from typing import Union, Type from typing import Literal if sys.version_info >= (3, 10): from typing import TypeAlias else: from typing_extensions import TypeAlias from .utils import Serialize from .lexer import TerminalDef, Token ###{standalone _ParserArgType: 'TypeAlias' = 'Literal["earley", "lalr", "cyk", "auto"]' _LexerArgType: 'TypeAlias' = 'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]' _LexerCallback = Callable[[Token], Token] ParserCallbacks = Dict[str, Callable] class LexerConf(Serialize): __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' __serialize_namespace__ = TerminalDef, terminals: Collection[TerminalDef] re_module: ModuleType ignore: Collection[str] postlex: 'Optional[PostLex]' callbacks: Dict[str, _LexerCallback] g_regex_flags: int skip_validation: bool use_bytes: bool lexer_type: Optional[_LexerArgType] strict: bool def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _LexerCallback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False, strict: bool=False): self.terminals = terminals self.terminals_by_name = {t.name: t for t in self.terminals} assert len(self.terminals) == len(self.terminals_by_name) self.ignore = ignore self.postlex = postlex self.callbacks = callbacks or {} self.g_regex_flags = g_regex_flags self.re_module = re_module self.skip_validation = skip_validation self.use_bytes = use_bytes self.strict = strict self.lexer_type = None def _deserialize(self): self.terminals_by_name = {t.name: t for t in self.terminals} def __deepcopy__(self, memo=None): return type(self)( deepcopy(self.terminals, memo), self.re_module, deepcopy(self.ignore, memo), deepcopy(self.postlex, memo), deepcopy(self.callbacks, memo), deepcopy(self.g_regex_flags, memo), deepcopy(self.skip_validation, memo), deepcopy(self.use_bytes, memo), ) class ParserConf(Serialize): __serialize_fields__ = 'rules', 'start', 'parser_type' rules: List['Rule'] callbacks: ParserCallbacks start: List[str] parser_type: _ParserArgType def __init__(self, rules: List['Rule'], callbacks: ParserCallbacks, start: List[str]): assert isinstance(start, list) self.rules = rules self.callbacks = callbacks self.start = start ###} lark-1.2.2/lark/exceptions.py000066400000000000000000000252731465673407200161600ustar00rootroot00000000000000from .utils import logger, NO_VALUE from typing import Mapping, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, TYPE_CHECKING if TYPE_CHECKING: from .lexer import Token from .parsers.lalr_interactive_parser import InteractiveParser from .tree import Tree ###{standalone class LarkError(Exception): pass class ConfigurationError(LarkError, ValueError): pass def assert_config(value, options: Collection, msg='Got %r, expected one of %s'): if value not in options: raise ConfigurationError(msg % (value, options)) class GrammarError(LarkError): pass class ParseError(LarkError): pass class LexError(LarkError): pass T = TypeVar('T') class UnexpectedInput(LarkError): """UnexpectedInput Error. Used as a base class for the following exceptions: - ``UnexpectedCharacters``: The lexer encountered an unexpected string - ``UnexpectedToken``: The parser received an unexpected token - ``UnexpectedEOF``: The parser expected a token, but the input ended After catching one of these exceptions, you may call the following helper methods to create a nicer error message. """ line: int column: int pos_in_stream = None state: Any _terminals_by_name = None interactive_parser: 'InteractiveParser' def get_context(self, text: str, span: int=40) -> str: """Returns a pretty string pinpointing the error in the text, with span amount of context characters around it. Note: The parser doesn't hold a copy of the text it has to parse, so you have to provide it again """ assert self.pos_in_stream is not None, self pos = self.pos_in_stream start = max(pos - span, 0) end = pos + span if not isinstance(text, bytes): before = text[start:pos].rsplit('\n', 1)[-1] after = text[pos:end].split('\n', 1)[0] return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n' else: before = text[start:pos].rsplit(b'\n', 1)[-1] after = text[pos:end].split(b'\n', 1)[0] return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=True ) -> Optional[T]: """Allows you to detect what's wrong in the input text by matching against example errors. Given a parser instance and a dictionary mapping some label with some malformed syntax examples, it'll return the label for the example that bests matches the current error. The function will iterate the dictionary until it finds a matching error, and return the corresponding value. For an example usage, see `examples/error_reporting_lalr.py` Parameters: parse_fn: parse function (usually ``lark_instance.parse``) examples: dictionary of ``{'example_string': value}``. use_accepts: Recommended to keep this as ``use_accepts=True``. """ assert self.state is not None, "Not supported for this exception" if isinstance(examples, Mapping): examples = examples.items() candidate = (None, False) for i, (label, example) in enumerate(examples): assert not isinstance(example, str), "Expecting a list" for j, malformed in enumerate(example): try: parse_fn(malformed) except UnexpectedInput as ut: if ut.state == self.state: if ( use_accepts and isinstance(self, UnexpectedToken) and isinstance(ut, UnexpectedToken) and ut.accepts != self.accepts ): logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % (self.state, self.accepts, ut.accepts, i, j)) continue if ( isinstance(self, (UnexpectedToken, UnexpectedEOF)) and isinstance(ut, (UnexpectedToken, UnexpectedEOF)) ): if ut.token == self.token: # Try exact match first logger.debug("Exact Match at example [%s][%s]" % (i, j)) return label if token_type_match_fallback: # Fallback to token types match if (ut.token.type == self.token.type) and not candidate[-1]: logger.debug("Token Type Fallback at example [%s][%s]" % (i, j)) candidate = label, True if candidate[0] is None: logger.debug("Same State match at example [%s][%s]" % (i, j)) candidate = label, False return candidate[0] def _format_expected(self, expected): if self._terminals_by_name: d = self._terminals_by_name expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected] return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected) class UnexpectedEOF(ParseError, UnexpectedInput): """An exception that is raised by the parser, when the input ends while it still expects a token. """ expected: 'List[Token]' def __init__(self, expected, state=None, terminals_by_name=None): super(UnexpectedEOF, self).__init__() self.expected = expected self.state = state from .lexer import Token self.token = Token("", "") # , line=-1, column=-1, pos_in_stream=-1) self.pos_in_stream = -1 self.line = -1 self.column = -1 self._terminals_by_name = terminals_by_name def __str__(self): message = "Unexpected end-of-input. " message += self._format_expected(self.expected) return message class UnexpectedCharacters(LexError, UnexpectedInput): """An exception that is raised by the lexer, when it cannot match the next string of characters to any of its terminals. """ allowed: Set[str] considered_tokens: Set[Any] def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, terminals_by_name=None, considered_rules=None): super(UnexpectedCharacters, self).__init__() # TODO considered_tokens and allowed can be figured out using state self.line = line self.column = column self.pos_in_stream = lex_pos self.state = state self._terminals_by_name = terminals_by_name self.allowed = allowed self.considered_tokens = considered_tokens self.considered_rules = considered_rules self.token_history = token_history if isinstance(seq, bytes): self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace") else: self.char = seq[lex_pos] self._context = self.get_context(seq) def __str__(self): message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column) message += '\n\n' + self._context if self.allowed: message += self._format_expected(self.allowed) if self.token_history: message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history) return message class UnexpectedToken(ParseError, UnexpectedInput): """An exception that is raised by the parser, when the token it received doesn't match any valid step forward. Parameters: token: The mismatched token expected: The set of expected tokens considered_rules: Which rules were considered, to deduce the expected tokens state: A value representing the parser state. Do not rely on its value or type. interactive_parser: An instance of ``InteractiveParser``, that is initialized to the point of failure, and can be used for debugging and error handling. Note: These parameters are available as attributes of the instance. """ expected: Set[str] considered_rules: Set[str] def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): super(UnexpectedToken, self).__init__() # TODO considered_rules and expected can be figured out using state self.line = getattr(token, 'line', '?') self.column = getattr(token, 'column', '?') self.pos_in_stream = getattr(token, 'start_pos', None) self.state = state self.token = token self.expected = expected # XXX deprecate? `accepts` is better self._accepts = NO_VALUE self.considered_rules = considered_rules self.interactive_parser = interactive_parser self._terminals_by_name = terminals_by_name self.token_history = token_history @property def accepts(self) -> Set[str]: if self._accepts is NO_VALUE: self._accepts = self.interactive_parser and self.interactive_parser.accepts() return self._accepts def __str__(self): message = ("Unexpected token %r at line %s, column %s.\n%s" % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected))) if self.token_history: message += "Previous tokens: %r\n" % self.token_history return message class VisitError(LarkError): """VisitError is raised when visitors are interrupted by an exception It provides the following attributes for inspection: Parameters: rule: the name of the visit rule that failed obj: the tree-node or token that was being processed orig_exc: the exception that cause it to fail Note: These parameters are available as attributes """ obj: 'Union[Tree, Token]' orig_exc: Exception def __init__(self, rule, obj, orig_exc): message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc) super(VisitError, self).__init__(message) self.rule = rule self.obj = obj self.orig_exc = orig_exc class MissingVariableError(LarkError): pass ###} lark-1.2.2/lark/grammar.py000066400000000000000000000071211465673407200154150ustar00rootroot00000000000000from typing import Optional, Tuple, ClassVar, Sequence from .utils import Serialize ###{standalone TOKEN_DEFAULT_PRIORITY = 0 class Symbol(Serialize): __slots__ = ('name',) name: str is_term: ClassVar[bool] = NotImplemented def __init__(self, name: str) -> None: self.name = name def __eq__(self, other): assert isinstance(other, Symbol), other return self.is_term == other.is_term and self.name == other.name def __ne__(self, other): return not (self == other) def __hash__(self): return hash(self.name) def __repr__(self): return '%s(%r)' % (type(self).__name__, self.name) fullrepr = property(__repr__) def renamed(self, f): return type(self)(f(self.name)) class Terminal(Symbol): __serialize_fields__ = 'name', 'filter_out' is_term: ClassVar[bool] = True def __init__(self, name, filter_out=False): self.name = name self.filter_out = filter_out @property def fullrepr(self): return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) def renamed(self, f): return type(self)(f(self.name), self.filter_out) class NonTerminal(Symbol): __serialize_fields__ = 'name', is_term: ClassVar[bool] = False class RuleOptions(Serialize): __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' keep_all_tokens: bool expand1: bool priority: Optional[int] template_source: Optional[str] empty_indices: Tuple[bool, ...] def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None: self.keep_all_tokens = keep_all_tokens self.expand1 = expand1 self.priority = priority self.template_source = template_source self.empty_indices = empty_indices def __repr__(self): return 'RuleOptions(%r, %r, %r, %r)' % ( self.keep_all_tokens, self.expand1, self.priority, self.template_source ) class Rule(Serialize): """ origin : a symbol expansion : a list of symbols order : index of this expansion amongst all rules of the same name """ __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' __serialize_namespace__ = Terminal, NonTerminal, RuleOptions origin: NonTerminal expansion: Sequence[Symbol] order: int alias: Optional[str] options: RuleOptions _hash: int def __init__(self, origin: NonTerminal, expansion: Sequence[Symbol], order: int=0, alias: Optional[str]=None, options: Optional[RuleOptions]=None): self.origin = origin self.expansion = expansion self.alias = alias self.order = order self.options = options or RuleOptions() self._hash = hash((self.origin, tuple(self.expansion))) def _deserialize(self): self._hash = hash((self.origin, tuple(self.expansion))) def __str__(self): return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion)) def __repr__(self): return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options) def __hash__(self): return self._hash def __eq__(self, other): if not isinstance(other, Rule): return False return self.origin == other.origin and self.expansion == other.expansion ###} lark-1.2.2/lark/grammars/000077500000000000000000000000001465673407200152255ustar00rootroot00000000000000lark-1.2.2/lark/grammars/__init__.py000066400000000000000000000000001465673407200173240ustar00rootroot00000000000000lark-1.2.2/lark/grammars/common.lark000066400000000000000000000015651465673407200173770ustar00rootroot00000000000000// Basic terminals for common use // // Numbers // DIGIT: "0".."9" HEXDIGIT: "a".."f"|"A".."F"|DIGIT INT: DIGIT+ SIGNED_INT: ["+"|"-"] INT DECIMAL: INT "." INT? | "." INT // float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/ _EXP: ("e"|"E") SIGNED_INT FLOAT: INT _EXP | DECIMAL _EXP? SIGNED_FLOAT: ["+"|"-"] FLOAT NUMBER: FLOAT | INT SIGNED_NUMBER: ["+"|"-"] NUMBER // // Strings // _STRING_INNER: /.*?/ _STRING_ESC_INNER: _STRING_INNER /(? ignore | "%import" import_path ["->" name] -> import | "%import" import_path name_list -> multi_import | "%override" rule -> override_rule | "%declare" name+ -> declare !import_path: "."? name ("." name)* name_list: "(" name ("," name)* ")" ?expansions: alias (_VBAR alias)* ?alias: expansion ["->" RULE] ?expansion: expr* ?expr: atom [OP | "~" NUMBER [".." NUMBER]] ?atom: "(" expansions ")" | "[" expansions "]" -> maybe | value ?value: STRING ".." STRING -> literal_range | name | (REGEXP | STRING) -> literal | name "{" value ("," value)* "}" -> template_usage name: RULE | TOKEN _VBAR: _NL? "|" OP: /[+*]|[?](?![a-z])/ RULE: /!?[_?]?[a-z][_a-z0-9]*/ TOKEN: /_?[A-Z][_A-Z0-9]*/ STRING: _STRING "i"? REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/])*?\/[imslux]*/ _NL: /(\r?\n)+\s*/ %import common.ESCAPED_STRING -> _STRING %import common.SIGNED_INT -> NUMBER %import common.WS_INLINE COMMENT: /\s*/ "//" /[^\n]/* | /\s*/ "#" /[^\n]/* %ignore WS_INLINE %ignore COMMENT lark-1.2.2/lark/grammars/python.lark000066400000000000000000000246211465673407200174260ustar00rootroot00000000000000// Python 3 grammar for Lark // This grammar should parse all python 3.x code successfully. // Adapted from: https://docs.python.org/3/reference/grammar.html // Start symbols for the grammar: // single_input is a single interactive statement; // file_input is a module or sequence of commands read from an input file; // eval_input is the input for the eval() functions. // NB: compound_stmt in single_input is followed by extra NEWLINE! // single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE file_input: (_NEWLINE | stmt)* eval_input: testlist _NEWLINE* decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE decorators: decorator+ decorated: decorators (classdef | funcdef | async_funcdef) async_funcdef: "async" funcdef funcdef: "def" name "(" [parameters] ")" ["->" test] ":" suite parameters: paramvalue ("," paramvalue)* ["," SLASH ("," paramvalue)*] ["," [starparams | kwparams]] | starparams | kwparams SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result starparams: (starparam | starguard) poststarparams starparam: "*" typedparam starguard: "*" poststarparams: ("," paramvalue)* ["," kwparams] kwparams: "**" typedparam ","? ?paramvalue: typedparam ("=" test)? ?typedparam: name (":" test)? lambdef: "lambda" [lambda_params] ":" test lambdef_nocond: "lambda" [lambda_params] ":" test_nocond lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]] | lambda_starparams | lambda_kwparams ?lambda_paramvalue: name ("=" test)? lambda_starparams: "*" [name] ("," lambda_paramvalue)* ["," [lambda_kwparams]] lambda_kwparams: "**" name ","? ?stmt: simple_stmt | compound_stmt ?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE ?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) expr_stmt: testlist_star_expr assign_stmt: annassign | augassign | assign annassign: testlist_star_expr ":" test ["=" test] assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+ augassign: testlist_star_expr augassign_op (yield_expr|testlist) !augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=" ?testlist_star_expr: test_or_star_expr | test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple | test_or_star_expr "," -> tuple // For normal and annotated assignments, additional restrictions enforced by the interpreter del_stmt: "del" exprlist pass_stmt: "pass" ?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt break_stmt: "break" continue_stmt: "continue" return_stmt: "return" [testlist] yield_stmt: yield_expr raise_stmt: "raise" [test ["from" test]] import_stmt: import_name | import_from import_name: "import" dotted_as_names // note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names) !dots: "."+ import_as_name: name ["as" name] dotted_as_name: dotted_name ["as" name] import_as_names: import_as_name ("," import_as_name)* [","] dotted_as_names: dotted_as_name ("," dotted_as_name)* dotted_name: name ("." name)* global_stmt: "global" name ("," name)* nonlocal_stmt: "nonlocal" name ("," name)* assert_stmt: "assert" test ["," test] ?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | match_stmt | with_stmt | funcdef | classdef | decorated | async_stmt async_stmt: "async" (funcdef | with_stmt | for_stmt) if_stmt: "if" test ":" suite elifs ["else" ":" suite] elifs: elif_* elif_: "elif" test ":" suite while_stmt: "while" test ":" suite ["else" ":" suite] for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite] try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally] | "try" ":" suite finally -> try_finally finally: "finally" ":" suite except_clauses: except_clause+ except_clause: "except" [test ["as" name]] ":" suite // NB compile.c makes sure that the default except clause is last with_stmt: "with" with_items ":" suite with_items: with_item ("," with_item)* with_item: test ["as" name] match_stmt: "match" test ":" _NEWLINE _INDENT case+ _DEDENT case: "case" pattern ["if" test] ":" suite ?pattern: sequence_item_pattern "," _sequence_pattern -> sequence_pattern | as_pattern ?as_pattern: or_pattern ("as" NAME)? ?or_pattern: closed_pattern ("|" closed_pattern)* ?closed_pattern: literal_pattern | NAME -> capture_pattern | "_" -> any_pattern | attr_pattern | "(" as_pattern ")" | "[" _sequence_pattern "]" -> sequence_pattern | "(" (sequence_item_pattern "," _sequence_pattern)? ")" -> sequence_pattern | "{" (mapping_item_pattern ("," mapping_item_pattern)* ","?)?"}" -> mapping_pattern | "{" (mapping_item_pattern ("," mapping_item_pattern)* ",")? "**" NAME ","? "}" -> mapping_star_pattern | class_pattern literal_pattern: inner_literal_pattern ?inner_literal_pattern: "None" -> const_none | "True" -> const_true | "False" -> const_false | STRING -> string | number attr_pattern: NAME ("." NAME)+ -> value name_or_attr_pattern: NAME ("." NAME)* -> value mapping_item_pattern: (literal_pattern|attr_pattern) ":" as_pattern _sequence_pattern: (sequence_item_pattern ("," sequence_item_pattern)* ","?)? ?sequence_item_pattern: as_pattern | "*" NAME -> star_pattern class_pattern: name_or_attr_pattern "(" [arguments_pattern ","?] ")" arguments_pattern: pos_arg_pattern ["," keyws_arg_pattern] | keyws_arg_pattern -> no_pos_arguments pos_arg_pattern: as_pattern ("," as_pattern)* keyws_arg_pattern: keyw_arg_pattern ("," keyw_arg_pattern)* keyw_arg_pattern: NAME "=" as_pattern suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT ?test: or_test ("if" or_test "else" test)? | lambdef | assign_expr assign_expr: name ":=" test ?test_nocond: or_test | lambdef_nocond ?or_test: and_test ("or" and_test)* ?and_test: not_test_ ("and" not_test_)* ?not_test_: "not" not_test_ -> not_test | comparison ?comparison: expr (comp_op expr)* star_expr: "*" expr ?expr: or_expr ?or_expr: xor_expr ("|" xor_expr)* ?xor_expr: and_expr ("^" and_expr)* ?and_expr: shift_expr ("&" shift_expr)* ?shift_expr: arith_expr (_shift_op arith_expr)* ?arith_expr: term (_add_op term)* ?term: factor (_mul_op factor)* ?factor: _unary_op factor | power !_unary_op: "+"|"-"|"~" !_add_op: "+"|"-" !_shift_op: "<<"|">>" !_mul_op: "*"|"@"|"/"|"%"|"//" // <> isn't actually a valid comparison operator in Python. It's here for the // sake of a __future__ import described in PEP 401 (which really works :-) !comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not" ?power: await_expr ("**" factor)? ?await_expr: AWAIT? atom_expr AWAIT: "await" ?atom_expr: atom_expr "(" [arguments] ")" -> funccall | atom_expr "[" subscriptlist "]" -> getitem | atom_expr "." name -> getattr | atom ?atom: "(" yield_expr ")" | "(" _tuple_inner? ")" -> tuple | "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension | "[" _exprlist? "]" -> list | "[" comprehension{test_or_star_expr} "]" -> list_comprehension | "{" _dict_exprlist? "}" -> dict | "{" comprehension{key_value} "}" -> dict_comprehension | "{" _exprlist "}" -> set | "{" comprehension{test} "}" -> set_comprehension | name -> var | number | string_concat | "(" test ")" | "..." -> ellipsis | "None" -> const_none | "True" -> const_true | "False" -> const_false ?string_concat: string+ _tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",") ?test_or_star_expr: test | star_expr ?subscriptlist: subscript | subscript (("," subscript)+ [","] | ",") -> subscript_tuple ?subscript: test | ([test] ":" [test] [sliceop]) -> slice sliceop: ":" [test] ?exprlist: (expr|star_expr) | (expr|star_expr) (("," (expr|star_expr))+ [","]|",") ?testlist: test | testlist_tuple testlist_tuple: test (("," test)+ [","] | ",") _dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","] key_value: test ":" test _exprlist: test_or_star_expr ("," test_or_star_expr)* [","] classdef: "class" name ["(" [arguments] ")"] ":" suite arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])? | starargs | kwargs | comprehension{test} starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs] stararg: "*" test kwargs: "**" test ("," argvalue)* ?argvalue: test ("=" test)? comprehension{comp_result}: comp_result comp_fors [comp_if] comp_fors: comp_for+ comp_for: [ASYNC] "for" exprlist "in" or_test ASYNC: "async" ?comp_if: "if" test_nocond // not used in grammar, but may appear in "node" passed from Parser to Compiler encoding_decl: name yield_expr: "yield" [testlist] | "yield" "from" test -> yield_from number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER string: STRING | LONG_STRING // Other terminals _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ %ignore /[\t \f]+/ // WS %ignore /\\[\t \f]*\r?\n/ // LINE_CONT %ignore COMMENT %declare _INDENT _DEDENT // Python terminals !name: NAME | "match" | "case" NAME: /[^\W\d]\w*/ COMMENT: /#[^\n]*/ STRING: /([ubf]?r?|r[ubf])("(?!"").*?(? None: self.paren_level = 0 self.indent_level = [0] assert self.tab_len > 0 def handle_NL(self, token: Token) -> Iterator[Token]: if self.paren_level > 0: return yield token indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len if indent > self.indent_level[-1]: self.indent_level.append(indent) yield Token.new_borrow_pos(self.INDENT_type, indent_str, token) else: while indent < self.indent_level[-1]: self.indent_level.pop() yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token) if indent != self.indent_level[-1]: raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1])) def _process(self, stream): for token in stream: if token.type == self.NL_type: yield from self.handle_NL(token) else: yield token if token.type in self.OPEN_PAREN_types: self.paren_level += 1 elif token.type in self.CLOSE_PAREN_types: self.paren_level -= 1 assert self.paren_level >= 0 while len(self.indent_level) > 1: self.indent_level.pop() yield Token(self.DEDENT_type, '') assert self.indent_level == [0], self.indent_level def process(self, stream): self.paren_level = 0 self.indent_level = [0] return self._process(stream) # XXX Hack for ContextualLexer. Maybe there's a more elegant solution? @property def always_accept(self): return (self.NL_type,) @property @abstractmethod def NL_type(self) -> str: "The name of the newline token" raise NotImplementedError() @property @abstractmethod def OPEN_PAREN_types(self) -> List[str]: "The names of the tokens that open a parenthesis" raise NotImplementedError() @property @abstractmethod def CLOSE_PAREN_types(self) -> List[str]: """The names of the tokens that close a parenthesis """ raise NotImplementedError() @property @abstractmethod def INDENT_type(self) -> str: """The name of the token that starts an indentation in the grammar. See also: %declare """ raise NotImplementedError() @property @abstractmethod def DEDENT_type(self) -> str: """The name of the token that end an indentation in the grammar. See also: %declare """ raise NotImplementedError() @property @abstractmethod def tab_len(self) -> int: """How many spaces does a tab equal""" raise NotImplementedError() class PythonIndenter(Indenter): """A postlexer that "injects" _INDENT/_DEDENT tokens based on indentation, according to the Python syntax. See also: the ``postlex`` option in `Lark`. """ NL_type = '_NEWLINE' OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE'] CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE'] INDENT_type = '_INDENT' DEDENT_type = '_DEDENT' tab_len = 8 ###} lark-1.2.2/lark/lark.py000066400000000000000000000670301465673407200147250ustar00rootroot00000000000000from abc import ABC, abstractmethod import getpass import sys, os, pickle import tempfile import types import re from typing import ( TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, Sequence, Tuple, Iterable, IO, Any, TYPE_CHECKING, Collection ) if TYPE_CHECKING: from .parsers.lalr_interactive_parser import InteractiveParser from .tree import ParseTree from .visitors import Transformer from typing import Literal from .parser_frontends import ParsingFrontend from .exceptions import ConfigurationError, assert_config, UnexpectedInput from .utils import Serialize, SerializeMemoizer, FS, logger from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, sha256_digest from .tree import Tree from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType from .lexer import Lexer, BasicLexer, TerminalDef, LexerThread, Token from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import _validate_frontend_args, _get_lexer_callbacks, _deserialize_parsing_frontend, _construct_parsing_frontend from .grammar import Rule try: import regex _has_regex = True except ImportError: _has_regex = False ###{standalone class PostLex(ABC): @abstractmethod def process(self, stream: Iterator[Token]) -> Iterator[Token]: return stream always_accept: Iterable[str] = () class LarkOptions(Serialize): """Specifies the options for Lark """ start: List[str] debug: bool strict: bool transformer: 'Optional[Transformer]' propagate_positions: Union[bool, str] maybe_placeholders: bool cache: Union[bool, str] regex: bool g_regex_flags: int keep_all_tokens: bool tree_class: Optional[Callable[[str, List], Any]] parser: _ParserArgType lexer: _LexerArgType ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]' postlex: Optional[PostLex] priority: 'Optional[Literal["auto", "normal", "invert"]]' lexer_callbacks: Dict[str, Callable[[Token], Token]] use_bytes: bool ordered_sets: bool edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]] import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]' source_path: Optional[str] OPTIONS_DOC = r""" **=== General Options ===** start The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start") debug Display debug information and extra warnings. Use only when debugging (Default: ``False``) When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed. strict Throw an exception on any potential ambiguity, including shift/reduce conflicts, and regex collisions. transformer Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) propagate_positions Propagates positional attributes into the 'meta' attribute of all tree branches. Sets attributes: (line, column, end_line, end_column, start_pos, end_pos, container_line, container_column, container_end_line, container_end_column) Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating. maybe_placeholders When ``True``, the ``[]`` operator returns ``None`` when not matched. When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. (default= ``True``) cache Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now. - When ``False``, does nothing (default) - When ``True``, caches to a temporary file in the local directory - When given a string, caches to the path pointed by the string regex When True, uses the ``regex`` module instead of the stdlib ``re``. g_regex_flags Flags that are applied to all terminals (both regex and strings) keep_all_tokens Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``) tree_class Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``. **=== Algorithm Options ===** parser Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley"). (there is also a "cyk" option for legacy) lexer Decides whether or not to use a lexer stage - "auto" (default): Choose for me based on the parser - "basic": Use a basic lexer - "contextual": Stronger lexer (only works with parser="lalr") - "dynamic": Flexible and powerful (only with parser="earley") - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible. ambiguity Decides how to handle ambiguity in the parse. Only relevant if parser="earley" - "resolve": The parser will automatically choose the simplest derivation (it chooses consistently: greedy for tokens, non-greedy for rules) - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). - "forest": The parser will return the root of the shared packed parse forest. **=== Misc. / Domain Specific Options ===** postlex Lexer post-processing (Default: ``None``) Only works with the basic and contextual lexers. priority How priorities should be evaluated - "auto", ``None``, "normal", "invert" (Default: "auto") lexer_callbacks Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. use_bytes Accept an input of type ``bytes`` instead of ``str``. ordered_sets Should Earley use ordered-sets to achieve stable output (~10% slower than regular sets. Default: True) edit_terminals A callback for editing the terminals before parse. import_paths A List of either paths or loader functions to specify from where grammars are imported source_path Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading **=== End of Options ===** """ if __doc__: __doc__ += OPTIONS_DOC # Adding a new option needs to be done in multiple places: # - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts # - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs # - As an attribute of `LarkOptions` above # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument _defaults: Dict[str, Any] = { 'debug': False, 'strict': False, 'keep_all_tokens': False, 'tree_class': None, 'cache': False, 'postlex': None, 'parser': 'earley', 'lexer': 'auto', 'transformer': None, 'start': 'start', 'priority': 'auto', 'ambiguity': 'auto', 'regex': False, 'propagate_positions': False, 'lexer_callbacks': {}, 'maybe_placeholders': True, 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False, 'ordered_sets': True, 'import_paths': [], 'source_path': None, '_plugins': {}, } def __init__(self, options_dict: Dict[str, Any]) -> None: o = dict(options_dict) options = {} for name, default in self._defaults.items(): if name in o: value = o.pop(name) if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'): value = bool(value) else: value = default options[name] = value if isinstance(options['start'], str): options['start'] = [options['start']] self.__dict__['options'] = options assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) if self.parser == 'earley' and self.transformer: raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. ' 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') if o: raise ConfigurationError("Unknown options: %s" % o.keys()) def __getattr__(self, name: str) -> Any: try: return self.__dict__['options'][name] except KeyError as e: raise AttributeError(e) def __setattr__(self, name: str, value: str) -> None: assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s") self.options[name] = value def serialize(self, memo = None) -> Dict[str, Any]: return self.options @classmethod def deserialize(cls, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]]) -> "LarkOptions": return cls(data) # Options that can be passed to the Lark parser, even when it was loaded from cache/standalone. # These options are only used outside of `load_grammar`. _LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class', '_plugins'} _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') _T = TypeVar('_T', bound="Lark") class Lark(Serialize): """Main interface for the library. It's mostly a thin wrapper for the many different parsers, and for the tree constructor. Parameters: grammar: a string or file-object containing the grammar spec (using Lark's ebnf syntax) options: a dictionary controlling various aspects of Lark. Example: >>> Lark(r'''start: "foo" ''') Lark(...) """ source_path: str source_grammar: str grammar: 'Grammar' options: LarkOptions lexer: Lexer parser: 'ParsingFrontend' terminals: Collection[TerminalDef] def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: self.options = LarkOptions(options) re_module: types.ModuleType # Set regex or re module use_regex = self.options.regex if use_regex: if _has_regex: re_module = regex else: raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.') else: re_module = re # Some, but not all file-like objects have a 'name' attribute if self.options.source_path is None: try: self.source_path = grammar.name # type: ignore[union-attr] except AttributeError: self.source_path = '' else: self.source_path = self.options.source_path # Drain file-like objects to get their contents try: read = grammar.read # type: ignore[union-attr] except AttributeError: pass else: grammar = read() cache_fn = None cache_sha256 = None if isinstance(grammar, str): self.source_grammar = grammar if self.options.use_bytes: if not grammar.isascii(): raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") if self.options.cache: if self.options.parser != 'lalr': raise ConfigurationError("cache only works with parser='lalr' for now") unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals', '_plugins') options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) from . import __version__ s = grammar + options_str + __version__ + str(sys.version_info[:2]) cache_sha256 = sha256_digest(s) if isinstance(self.options.cache, str): cache_fn = self.options.cache else: if self.options.cache is not True: raise ConfigurationError("cache argument must be bool or str") try: username = getpass.getuser() except Exception: # The exception raised may be ImportError or OSError in # the future. For the cache, we don't care about the # specific reason - we just want a username. username = "unknown" cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_sha256, *sys.version_info[:2]) old_options = self.options try: with FS.open(cache_fn, 'rb') as f: logger.debug('Loading grammar from cache: %s', cache_fn) # Remove options that aren't relevant for loading from cache for name in (set(options) - _LOAD_ALLOWED_OPTIONS): del options[name] file_sha256 = f.readline().rstrip(b'\n') cached_used_files = pickle.load(f) if file_sha256 == cache_sha256.encode('utf8') and verify_used_files(cached_used_files): cached_parser_data = pickle.load(f) self._load(cached_parser_data, **options) return except FileNotFoundError: # The cache file doesn't exist; parse and compose the grammar as normal pass except Exception: # We should probably narrow done which errors we catch here. logger.exception("Failed to load Lark from cache: %r. We will try to carry on.", cache_fn) # In theory, the Lark instance might have been messed up by the call to `_load`. # In practice the only relevant thing that might have been overwritten should be `options` self.options = old_options # Parse the grammar file and compose the grammars self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) else: assert isinstance(grammar, Grammar) self.grammar = grammar if self.options.lexer == 'auto': if self.options.parser == 'lalr': self.options.lexer = 'contextual' elif self.options.parser == 'earley': if self.options.postlex is not None: logger.info("postlex can't be used with the dynamic lexer, so we use 'basic' instead. " "Consider using lalr with contextual instead of earley") self.options.lexer = 'basic' else: self.options.lexer = 'dynamic' elif self.options.parser == 'cyk': self.options.lexer = 'basic' else: assert False, self.options.parser lexer = self.options.lexer if isinstance(lexer, type): assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance else: assert_config(lexer, ('basic', 'contextual', 'dynamic', 'dynamic_complete')) if self.options.postlex is not None and 'dynamic' in lexer: raise ConfigurationError("Can't use postlex with a dynamic lexer. Use basic or contextual instead") if self.options.ambiguity == 'auto': if self.options.parser == 'earley': self.options.ambiguity = 'resolve' else: assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s") if self.options.priority == 'auto': self.options.priority = 'normal' if self.options.priority not in _VALID_PRIORITY_OPTIONS: raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) if self.options.parser is None: terminals_to_keep = '*' elif self.options.postlex is not None: terminals_to_keep = set(self.options.postlex.always_accept) else: terminals_to_keep = set() # Compile the EBNF grammar into BNF self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep) if self.options.edit_terminals: for t in self.terminals: self.options.edit_terminals(t) self._terminals_dict = {t.name: t for t in self.terminals} # If the user asked to invert the priorities, negate them all here. if self.options.priority == 'invert': for rule in self.rules: if rule.options.priority is not None: rule.options.priority = -rule.options.priority for term in self.terminals: term.priority = -term.priority # Else, if the user asked to disable priorities, strip them from the # rules and terminals. This allows the Earley parsers to skip an extra forest walk # for improved performance, if you don't need them (or didn't specify any). elif self.options.priority is None: for rule in self.rules: if rule.options.priority is not None: rule.options.priority = None for term in self.terminals: term.priority = 0 # TODO Deprecate lexer_callbacks? self.lexer_conf = LexerConf( self.terminals, re_module, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes, strict=self.options.strict ) if self.options.parser: self.parser = self._build_parser() elif lexer: self.lexer = self._build_lexer() if cache_fn: logger.debug('Saving grammar to cache: %s', cache_fn) try: with FS.open(cache_fn, 'wb') as f: assert cache_sha256 is not None f.write(cache_sha256.encode('utf8') + b'\n') pickle.dump(used_files, f) self.save(f, _LOAD_ALLOWED_OPTIONS) except IOError as e: logger.exception("Failed to save Lark to cache: %r.", cache_fn, e) if __doc__: __doc__ += "\n\n" + LarkOptions.OPTIONS_DOC __serialize_fields__ = 'parser', 'rules', 'options' def _build_lexer(self, dont_ignore: bool=False) -> BasicLexer: lexer_conf = self.lexer_conf if dont_ignore: from copy import copy lexer_conf = copy(lexer_conf) lexer_conf.ignore = () return BasicLexer(lexer_conf) def _prepare_callbacks(self) -> None: self._callbacks = {} # we don't need these callbacks if we aren't building a tree if self.options.ambiguity != 'forest': self._parse_tree_builder = ParseTreeBuilder( self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', self.options.maybe_placeholders ) self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals)) def _build_parser(self) -> "ParsingFrontend": self._prepare_callbacks() _validate_frontend_args(self.options.parser, self.options.lexer) parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) return _construct_parsing_frontend( self.options.parser, self.options.lexer, self.lexer_conf, parser_conf, options=self.options ) def save(self, f, exclude_options: Collection[str] = ()) -> None: """Saves the instance into the given file object Useful for caching and multiprocessing. """ if self.options.parser != 'lalr': raise NotImplementedError("Lark.save() is only implemented for the LALR(1) parser.") data, m = self.memo_serialize([TerminalDef, Rule]) if exclude_options: data["options"] = {n: v for n, v in data["options"].items() if n not in exclude_options} pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL) @classmethod def load(cls: Type[_T], f) -> _T: """Loads an instance from the given file object Useful for caching and multiprocessing. """ inst = cls.__new__(cls) return inst._load(f) def _deserialize_lexer_conf(self, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]], options: LarkOptions) -> LexerConf: lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo) lexer_conf.callbacks = options.lexer_callbacks or {} lexer_conf.re_module = regex if options.regex else re lexer_conf.use_bytes = options.use_bytes lexer_conf.g_regex_flags = options.g_regex_flags lexer_conf.skip_validation = True lexer_conf.postlex = options.postlex return lexer_conf def _load(self: _T, f: Any, **kwargs) -> _T: if isinstance(f, dict): d = f else: d = pickle.load(f) memo_json = d['memo'] data = d['data'] assert memo_json memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) options = dict(data['options']) if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): raise ConfigurationError("Some options are not allowed when loading a Parser: {}" .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) options.update(kwargs) self.options = LarkOptions.deserialize(options, memo) self.rules = [Rule.deserialize(r, memo) for r in data['rules']] self.source_path = '' _validate_frontend_args(self.options.parser, self.options.lexer) self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options) self.terminals = self.lexer_conf.terminals self._prepare_callbacks() self._terminals_dict = {t.name: t for t in self.terminals} self.parser = _deserialize_parsing_frontend( data['parser'], memo, self.lexer_conf, self._callbacks, self.options, # Not all, but multiple attributes are used ) return self @classmethod def _load_from_dict(cls, data, memo, **kwargs): inst = cls.__new__(cls) return inst._load({'data': data, 'memo': memo}, **kwargs) @classmethod def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T: """Create an instance of Lark with the grammar given by its filename If ``rel_to`` is provided, the function will find the grammar filename in relation to it. Example: >>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr") Lark(...) """ if rel_to: basepath = os.path.dirname(rel_to) grammar_filename = os.path.join(basepath, grammar_filename) with open(grammar_filename, encoding='utf8') as f: return cls(f, **options) @classmethod def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: 'Sequence[str]'=[""], **options) -> _T: """Create an instance of Lark with the grammar loaded from within the package `package`. This allows grammar loading from zipapps. Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader` Example: Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...) """ package_loader = FromPackageLoader(package, search_paths) full_path, text = package_loader(None, grammar_path) options.setdefault('source_path', full_path) options.setdefault('import_paths', []) options['import_paths'].append(package_loader) return cls(text, **options) def __repr__(self): return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]: """Only lex (and postlex) the text, without parsing it. Only relevant when lexer='basic' When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore. :raises UnexpectedCharacters: In case the lexer cannot find a suitable match. """ lexer: Lexer if not hasattr(self, 'lexer') or dont_ignore: lexer = self._build_lexer(dont_ignore) else: lexer = self.lexer lexer_thread = LexerThread.from_text(lexer, text) stream = lexer_thread.lex(None) if self.options.postlex: return self.options.postlex.process(stream) return stream def get_terminal(self, name: str) -> TerminalDef: """Get information about a terminal""" return self._terminals_dict[name] def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser': """Start an interactive parsing session. Parameters: text (str, optional): Text to be parsed. Required for ``resume_parse()``. start (str, optional): Start symbol Returns: A new InteractiveParser instance. See Also: ``Lark.parse()`` """ return self.parser.parse_interactive(text, start=start) def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> 'ParseTree': """Parse the given text, according to the options provided. Parameters: text (str): Text to be parsed. start (str, optional): Required if Lark was given multiple possible start symbols (using the start option). on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only. See examples/advanced/error_handling.py for an example of how to use on_error. Returns: If a transformer is supplied to ``__init__``, returns whatever is the result of the transformation. Otherwise, returns a Tree instance. :raises UnexpectedInput: On a parse error, one of these sub-exceptions will rise: ``UnexpectedCharacters``, ``UnexpectedToken``, or ``UnexpectedEOF``. For convenience, these sub-exceptions also inherit from ``ParserError`` and ``LexerError``. """ return self.parser.parse(text, start=start, on_error=on_error) ###} lark-1.2.2/lark/lexer.py000066400000000000000000000567451465673407200151260ustar00rootroot00000000000000# Lexer Implementation from abc import abstractmethod, ABC import re from contextlib import suppress from typing import ( TypeVar, Type, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, ClassVar, TYPE_CHECKING, overload ) from types import ModuleType import warnings try: import interegular except ImportError: pass if TYPE_CHECKING: from .common import LexerConf from .parsers.lalr_parser_state import ParserState from .utils import classify, get_regexp_width, Serialize, logger from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken from .grammar import TOKEN_DEFAULT_PRIORITY ###{standalone from copy import copy try: # For the standalone parser, we need to make sure that has_interegular is False to avoid NameErrors later on has_interegular = bool(interegular) except NameError: has_interegular = False class Pattern(Serialize, ABC): "An abstraction over regular expressions." value: str flags: Collection[str] raw: Optional[str] type: ClassVar[str] def __init__(self, value: str, flags: Collection[str] = (), raw: Optional[str] = None) -> None: self.value = value self.flags = frozenset(flags) self.raw = raw def __repr__(self): return repr(self.to_regexp()) # Pattern Hashing assumes all subclasses have a different priority! def __hash__(self): return hash((type(self), self.value, self.flags)) def __eq__(self, other): return type(self) == type(other) and self.value == other.value and self.flags == other.flags @abstractmethod def to_regexp(self) -> str: raise NotImplementedError() @property @abstractmethod def min_width(self) -> int: raise NotImplementedError() @property @abstractmethod def max_width(self) -> int: raise NotImplementedError() def _get_flags(self, value): for f in self.flags: value = ('(?%s:%s)' % (f, value)) return value class PatternStr(Pattern): __serialize_fields__ = 'value', 'flags', 'raw' type: ClassVar[str] = "str" def to_regexp(self) -> str: return self._get_flags(re.escape(self.value)) @property def min_width(self) -> int: return len(self.value) @property def max_width(self) -> int: return len(self.value) class PatternRE(Pattern): __serialize_fields__ = 'value', 'flags', 'raw', '_width' type: ClassVar[str] = "re" def to_regexp(self) -> str: return self._get_flags(self.value) _width = None def _get_width(self): if self._width is None: self._width = get_regexp_width(self.to_regexp()) return self._width @property def min_width(self) -> int: return self._get_width()[0] @property def max_width(self) -> int: return self._get_width()[1] class TerminalDef(Serialize): "A definition of a terminal" __serialize_fields__ = 'name', 'pattern', 'priority' __serialize_namespace__ = PatternStr, PatternRE name: str pattern: Pattern priority: int def __init__(self, name: str, pattern: Pattern, priority: int = TOKEN_DEFAULT_PRIORITY) -> None: assert isinstance(pattern, Pattern), pattern self.name = name self.pattern = pattern self.priority = priority def __repr__(self): return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) def user_repr(self) -> str: if self.name.startswith('__'): # We represent a generated terminal return self.pattern.raw or self.name else: return self.name _T = TypeVar('_T', bound="Token") class Token(str): """A string with meta-information, that is produced by the lexer. When parsing text, the resulting chunks of the input that haven't been discarded, will end up in the tree as Token instances. The Token class inherits from Python's ``str``, so normal string comparisons and operations will work as expected. Attributes: type: Name of the token (as specified in grammar) value: Value of the token (redundant, as ``token.value == token`` will always be true) start_pos: The index of the token in the text line: The line of the token in the text (starting with 1) column: The column of the token in the text (starting with 1) end_line: The line where the token ends end_column: The next column after the end of the token. For example, if the token is a single character with a column value of 4, end_column will be 5. end_pos: the index where the token ends (basically ``start_pos + len(token)``) """ __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') __match_args__ = ('type', 'value') type: str start_pos: Optional[int] value: Any line: Optional[int] column: Optional[int] end_line: Optional[int] end_column: Optional[int] end_pos: Optional[int] @overload def __new__( cls, type: str, value: Any, start_pos: Optional[int] = None, line: Optional[int] = None, column: Optional[int] = None, end_line: Optional[int] = None, end_column: Optional[int] = None, end_pos: Optional[int] = None ) -> 'Token': ... @overload def __new__( cls, type_: str, value: Any, start_pos: Optional[int] = None, line: Optional[int] = None, column: Optional[int] = None, end_line: Optional[int] = None, end_column: Optional[int] = None, end_pos: Optional[int] = None ) -> 'Token': ... def __new__(cls, *args, **kwargs): if "type_" in kwargs: warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning) if "type" in kwargs: raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.") kwargs["type"] = kwargs.pop("type_") return cls._future_new(*args, **kwargs) @classmethod def _future_new(cls, type, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): inst = super(Token, cls).__new__(cls, value) inst.type = type inst.start_pos = start_pos inst.value = value inst.line = line inst.column = column inst.end_line = end_line inst.end_column = end_column inst.end_pos = end_pos return inst @overload def update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token': ... @overload def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> 'Token': ... def update(self, *args, **kwargs): if "type_" in kwargs: warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning) if "type" in kwargs: raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.") kwargs["type"] = kwargs.pop("type_") return self._future_update(*args, **kwargs) def _future_update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token': return Token.new_borrow_pos( type if type is not None else self.type, value if value is not None else self.value, self ) @classmethod def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T: return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) def __reduce__(self): return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column)) def __repr__(self): return 'Token(%r, %r)' % (self.type, self.value) def __deepcopy__(self, memo): return Token(self.type, self.value, self.start_pos, self.line, self.column) def __eq__(self, other): if isinstance(other, Token) and self.type != other.type: return False return str.__eq__(self, other) __hash__ = str.__hash__ class LineCounter: "A utility class for keeping track of line & column information" __slots__ = 'char_pos', 'line', 'column', 'line_start_pos', 'newline_char' def __init__(self, newline_char): self.newline_char = newline_char self.char_pos = 0 self.line = 1 self.column = 1 self.line_start_pos = 0 def __eq__(self, other): if not isinstance(other, LineCounter): return NotImplemented return self.char_pos == other.char_pos and self.newline_char == other.newline_char def feed(self, token: Token, test_newline=True): """Consume a token and calculate the new line & column. As an optional optimization, set test_newline=False if token doesn't contain a newline. """ if test_newline: newlines = token.count(self.newline_char) if newlines: self.line += newlines self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 self.char_pos += len(token) self.column = self.char_pos - self.line_start_pos + 1 class UnlessCallback: def __init__(self, scanner): self.scanner = scanner def __call__(self, t): res = self.scanner.match(t.value, 0) if res: _value, t.type = res return t class CallChain: def __init__(self, callback1, callback2, cond): self.callback1 = callback1 self.callback2 = callback2 self.cond = cond def __call__(self, t): t2 = self.callback1(t) return self.callback2(t) if self.cond(t2) else t2 def _get_match(re_, regexp, s, flags): m = re_.match(regexp, s, flags) if m: return m.group(0) def _create_unless(terminals, g_regex_flags, re_, use_bytes): tokens_by_type = classify(terminals, lambda t: type(t.pattern)) assert len(tokens_by_type) <= 2, tokens_by_type.keys() embedded_strs = set() callback = {} for retok in tokens_by_type.get(PatternRE, []): unless = [] for strtok in tokens_by_type.get(PatternStr, []): if strtok.priority != retok.priority: continue s = strtok.pattern.value if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags): unless.append(strtok) if strtok.pattern.flags <= retok.pattern.flags: embedded_strs.add(strtok) if unless: callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes)) new_terminals = [t for t in terminals if t not in embedded_strs] return new_terminals, callback class Scanner: def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False): self.terminals = terminals self.g_regex_flags = g_regex_flags self.re_ = re_ self.use_bytes = use_bytes self.match_whole = match_whole self.allowed_types = {t.name for t in self.terminals} self._mres = self._build_mres(terminals, len(terminals)) def _build_mres(self, terminals, max_size): # Python sets an unreasonable group limit (currently 100) in its re module # Worse, the only way to know we reached it is by catching an AssertionError! # This function recursively tries less and less groups until it's successful. postfix = '$' if self.match_whole else '' mres = [] while terminals: pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size]) if self.use_bytes: pattern = pattern.encode('latin-1') try: mre = self.re_.compile(pattern, self.g_regex_flags) except AssertionError: # Yes, this is what Python provides us.. :/ return self._build_mres(terminals, max_size // 2) mres.append(mre) terminals = terminals[max_size:] return mres def match(self, text, pos): for mre in self._mres: m = mre.match(text, pos) if m: return m.group(0), m.lastgroup def _regexp_has_newline(r: str): r"""Expressions that may indicate newlines in a regexp: - newlines (\n) - escaped newline (\\n) - anything but ([^...]) - any-char (.) when the flag (?s) exists - spaces (\s) """ return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) class LexerState: """Represents the current state of the lexer as it scans the text (Lexer objects are only instantiated per grammar, not per text) """ __slots__ = 'text', 'line_ctr', 'last_token' text: str line_ctr: LineCounter last_token: Optional[Token] def __init__(self, text: str, line_ctr: Optional[LineCounter]=None, last_token: Optional[Token]=None): self.text = text self.line_ctr = line_ctr or LineCounter(b'\n' if isinstance(text, bytes) else '\n') self.last_token = last_token def __eq__(self, other): if not isinstance(other, LexerState): return NotImplemented return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token def __copy__(self): return type(self)(self.text, copy(self.line_ctr), self.last_token) class LexerThread: """A thread that ties a lexer instance and a lexer state, to be used by the parser """ def __init__(self, lexer: 'Lexer', lexer_state: LexerState): self.lexer = lexer self.state = lexer_state @classmethod def from_text(cls, lexer: 'Lexer', text: str) -> 'LexerThread': return cls(lexer, LexerState(text)) def lex(self, parser_state): return self.lexer.lex(self.state, parser_state) def __copy__(self): return type(self)(self.lexer, copy(self.state)) _Token = Token _Callback = Callable[[Token], Token] class Lexer(ABC): """Lexer interface Method Signatures: lex(self, lexer_state, parser_state) -> Iterator[Token] """ @abstractmethod def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: return NotImplemented def make_lexer_state(self, text): "Deprecated" return LexerState(text) def _check_regex_collisions(terminal_to_regexp: Dict[TerminalDef, str], comparator, strict_mode, max_collisions_to_show=8): if not comparator: comparator = interegular.Comparator.from_regexes(terminal_to_regexp) # When in strict mode, we only ever try to provide one example, so taking # a long time for that should be fine max_time = 2 if strict_mode else 0.2 # We don't want to show too many collisions. if comparator.count_marked_pairs() >= max_collisions_to_show: return for group in classify(terminal_to_regexp, lambda t: t.priority).values(): for a, b in comparator.check(group, skip_marked=True): assert a.priority == b.priority # Mark this pair to not repeat warnings when multiple different BasicLexers see the same collision comparator.mark(a, b) # Notify the user message = f"Collision between Terminals {a.name} and {b.name}. " try: example = comparator.get_example_overlap(a, b, max_time).format_multiline() except ValueError: # Couldn't find an example within max_time steps. example = "No example could be found fast enough. However, the collision does still exists" if strict_mode: raise LexError(f"{message}\n{example}") logger.warning("%s The lexer will choose between them arbitrarily.\n%s", message, example) if comparator.count_marked_pairs() >= max_collisions_to_show: logger.warning("Found 8 regex collisions, will not check for more.") return class AbstractBasicLexer(Lexer): terminals_by_name: Dict[str, TerminalDef] @abstractmethod def __init__(self, conf: 'LexerConf', comparator=None) -> None: ... @abstractmethod def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token: ... def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]: with suppress(EOFError): while True: yield self.next_token(state, parser_state) class BasicLexer(AbstractBasicLexer): terminals: Collection[TerminalDef] ignore_types: FrozenSet[str] newline_types: FrozenSet[str] user_callbacks: Dict[str, _Callback] callback: Dict[str, _Callback] re: ModuleType def __init__(self, conf: 'LexerConf', comparator=None) -> None: terminals = list(conf.terminals) assert all(isinstance(t, TerminalDef) for t in terminals), terminals self.re = conf.re_module if not conf.skip_validation: # Sanitization terminal_to_regexp = {} for t in terminals: regexp = t.pattern.to_regexp() try: self.re.compile(regexp, conf.g_regex_flags) except self.re.error: raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) if t.pattern.min_width == 0: raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) if t.pattern.type == "re": terminal_to_regexp[t] = regexp if not (set(conf.ignore) <= {t.name for t in terminals}): raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals})) if has_interegular: _check_regex_collisions(terminal_to_regexp, comparator, conf.strict) elif conf.strict: raise LexError("interegular must be installed for strict mode. Use `pip install 'lark[interegular]'`.") # Init self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) self.ignore_types = frozenset(conf.ignore) terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) self.terminals = terminals self.user_callbacks = conf.callbacks self.g_regex_flags = conf.g_regex_flags self.use_bytes = conf.use_bytes self.terminals_by_name = conf.terminals_by_name self._scanner = None def _build_scanner(self): terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) assert all(self.callback.values()) for type_, f in self.user_callbacks.items(): if type_ in self.callback: # Already a callback there, probably UnlessCallback self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_) else: self.callback[type_] = f self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes) @property def scanner(self): if self._scanner is None: self._build_scanner() return self._scanner def match(self, text, pos): return self.scanner.match(text, pos) def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token: line_ctr = lex_state.line_ctr while line_ctr.char_pos < len(lex_state.text): res = self.match(lex_state.text, line_ctr.char_pos) if not res: allowed = self.scanner.allowed_types - self.ignore_types if not allowed: allowed = {""} raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token], state=parser_state, terminals_by_name=self.terminals_by_name) value, type_ = res ignored = type_ in self.ignore_types t = None if not ignored or type_ in self.callback: t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) line_ctr.feed(value, type_ in self.newline_types) if t is not None: t.end_line = line_ctr.line t.end_column = line_ctr.column t.end_pos = line_ctr.char_pos if t.type in self.callback: t = self.callback[t.type](t) if not ignored: if not isinstance(t, Token): raise LexError("Callbacks must return a token (returned %r)" % t) lex_state.last_token = t return t # EOF raise EOFError(self) class ContextualLexer(Lexer): lexers: Dict[int, AbstractBasicLexer] root_lexer: AbstractBasicLexer BasicLexer: Type[AbstractBasicLexer] = BasicLexer def __init__(self, conf: 'LexerConf', states: Dict[int, Collection[str]], always_accept: Collection[str]=()) -> None: terminals = list(conf.terminals) terminals_by_name = conf.terminals_by_name trad_conf = copy(conf) trad_conf.terminals = terminals if has_interegular and not conf.skip_validation: comparator = interegular.Comparator.from_regexes({t: t.pattern.to_regexp() for t in terminals}) else: comparator = None lexer_by_tokens: Dict[FrozenSet[str], AbstractBasicLexer] = {} self.lexers = {} for state, accepts in states.items(): key = frozenset(accepts) try: lexer = lexer_by_tokens[key] except KeyError: accepts = set(accepts) | set(conf.ignore) | set(always_accept) lexer_conf = copy(trad_conf) lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name] lexer = self.BasicLexer(lexer_conf, comparator) lexer_by_tokens[key] = lexer self.lexers[state] = lexer assert trad_conf.terminals is terminals trad_conf.skip_validation = True # We don't need to verify all terminals again self.root_lexer = self.BasicLexer(trad_conf, comparator) def lex(self, lexer_state: LexerState, parser_state: 'ParserState') -> Iterator[Token]: try: while True: lexer = self.lexers[parser_state.position] yield lexer.next_token(lexer_state, parser_state) except EOFError: pass except UnexpectedCharacters as e: # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context. # This tests the input against the global context, to provide a nicer error. try: last_token = lexer_state.last_token # Save last_token. Calling root_lexer.next_token will change this to the wrong token token = self.root_lexer.next_token(lexer_state, parser_state) raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name) except UnexpectedCharacters: raise e # Raise the original UnexpectedCharacters. The root lexer raises it with the wrong expected set. ###} lark-1.2.2/lark/load_grammar.py000066400000000000000000001512721465673407200164230ustar00rootroot00000000000000"""Parses and compiles Lark grammars into an internal representation. """ import hashlib import os.path import sys from collections import namedtuple from copy import copy, deepcopy import pkgutil from ast import literal_eval from contextlib import suppress from typing import List, Tuple, Union, Callable, Dict, Optional, Sequence, Generator from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors, OrderedSet from .lexer import Token, TerminalDef, PatternStr, PatternRE, Pattern from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import ParsingFrontend from .common import LexerConf, ParserConf from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, TOKEN_DEFAULT_PRIORITY from .utils import classify, dedup_list from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError, UnexpectedInput from .tree import Tree, SlottedTree as ST from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive inline_args = v_args(inline=True) IMPORT_PATHS = ['grammars'] EXT = '.lark' _RE_FLAGS = 'imslux' _EMPTY = Symbol('__empty__') _TERMINAL_NAMES = { '.' : 'DOT', ',' : 'COMMA', ':' : 'COLON', ';' : 'SEMICOLON', '+' : 'PLUS', '-' : 'MINUS', '*' : 'STAR', '/' : 'SLASH', '\\' : 'BACKSLASH', '|' : 'VBAR', '?' : 'QMARK', '!' : 'BANG', '@' : 'AT', '#' : 'HASH', '$' : 'DOLLAR', '%' : 'PERCENT', '^' : 'CIRCUMFLEX', '&' : 'AMPERSAND', '_' : 'UNDERSCORE', '<' : 'LESSTHAN', '>' : 'MORETHAN', '=' : 'EQUAL', '"' : 'DBLQUOTE', '\'' : 'QUOTE', '`' : 'BACKQUOTE', '~' : 'TILDE', '(' : 'LPAR', ')' : 'RPAR', '{' : 'LBRACE', '}' : 'RBRACE', '[' : 'LSQB', ']' : 'RSQB', '\n' : 'NEWLINE', '\r\n' : 'CRLF', '\t' : 'TAB', ' ' : 'SPACE', } # Grammar Parser TERMINALS = { '_LPAR': r'\(', '_RPAR': r'\)', '_LBRA': r'\[', '_RBRA': r'\]', '_LBRACE': r'\{', '_RBRACE': r'\}', 'OP': '[+*]|[?](?![a-z_])', '_COLON': ':', '_COMMA': ',', '_OR': r'\|', '_DOT': r'\.(?!\.)', '_DOTDOT': r'\.\.', 'TILDE': '~', 'RULE_MODIFIERS': '(!|![?]?|[?]!?)(?=[_a-z])', 'RULE': '_?[a-z][_a-z0-9]*', 'TERMINAL': '_?[A-Z][_A-Z0-9]*', 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', 'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS, '_NL': r'(\r?\n)+\s*', '_NL_OR': r'(\r?\n)+\s*\|', 'WS': r'[ \t]+', 'COMMENT': r'\s*//[^\n]*|\s*#[^\n]*', 'BACKSLASH': r'\\[ ]*\n', '_TO': '->', '_IGNORE': r'%ignore', '_OVERRIDE': r'%override', '_DECLARE': r'%declare', '_EXTEND': r'%extend', '_IMPORT': r'%import', 'NUMBER': r'[+-]?\d+', } RULES = { 'start': ['_list'], '_list': ['_item', '_list _item'], '_item': ['rule', 'term', 'ignore', 'import', 'declare', 'override', 'extend', '_NL'], 'rule': ['rule_modifiers RULE template_params priority _COLON expansions _NL'], 'rule_modifiers': ['RULE_MODIFIERS', ''], 'priority': ['_DOT NUMBER', ''], 'template_params': ['_LBRACE _template_params _RBRACE', ''], '_template_params': ['RULE', '_template_params _COMMA RULE'], 'expansions': ['_expansions'], '_expansions': ['alias', '_expansions _OR alias', '_expansions _NL_OR alias'], '?alias': ['expansion _TO nonterminal', 'expansion'], 'expansion': ['_expansion'], '_expansion': ['', '_expansion expr'], '?expr': ['atom', 'atom OP', 'atom TILDE NUMBER', 'atom TILDE NUMBER _DOTDOT NUMBER', ], '?atom': ['_LPAR expansions _RPAR', 'maybe', 'value'], 'value': ['terminal', 'nonterminal', 'literal', 'range', 'template_usage'], 'terminal': ['TERMINAL'], 'nonterminal': ['RULE'], '?name': ['RULE', 'TERMINAL'], '?symbol': ['terminal', 'nonterminal'], 'maybe': ['_LBRA expansions _RBRA'], 'range': ['STRING _DOTDOT STRING'], 'template_usage': ['nonterminal _LBRACE _template_args _RBRACE'], '_template_args': ['value', '_template_args _COMMA value'], 'term': ['TERMINAL _COLON expansions _NL', 'TERMINAL _DOT NUMBER _COLON expansions _NL'], 'override': ['_OVERRIDE rule', '_OVERRIDE term'], 'extend': ['_EXTEND rule', '_EXTEND term'], 'ignore': ['_IGNORE expansions _NL'], 'declare': ['_DECLARE _declare_args _NL'], 'import': ['_IMPORT _import_path _NL', '_IMPORT _import_path _LPAR name_list _RPAR _NL', '_IMPORT _import_path _TO name _NL'], '_import_path': ['import_lib', 'import_rel'], 'import_lib': ['_import_args'], 'import_rel': ['_DOT _import_args'], '_import_args': ['name', '_import_args _DOT name'], 'name_list': ['_name_list'], '_name_list': ['name', '_name_list _COMMA name'], '_declare_args': ['symbol', '_declare_args symbol'], 'literal': ['REGEXP', 'STRING'], } # Value 5 keeps the number of states in the lalr parser somewhat minimal # It isn't optimal, but close to it. See PR #949 SMALL_FACTOR_THRESHOLD = 5 # The Threshold whether repeat via ~ are split up into different rules # 50 is chosen since it keeps the number of states low and therefore lalr analysis time low, # while not being to overaggressive and unnecessarily creating rules that might create shift/reduce conflicts. # (See PR #949) REPEAT_BREAK_THRESHOLD = 50 class FindRuleSize(Transformer): def __init__(self, keep_all_tokens: bool): self.keep_all_tokens = keep_all_tokens def _will_not_get_removed(self, sym: Symbol) -> bool: if isinstance(sym, NonTerminal): return not sym.name.startswith('_') if isinstance(sym, Terminal): return self.keep_all_tokens or not sym.filter_out if sym is _EMPTY: return False assert False, sym def _args_as_int(self, args: List[Union[int, Symbol]]) -> Generator[int, None, None]: for a in args: if isinstance(a, int): yield a elif isinstance(a, Symbol): yield 1 if self._will_not_get_removed(a) else 0 else: assert False def expansion(self, args) -> int: return sum(self._args_as_int(args)) def expansions(self, args) -> int: return max(self._args_as_int(args)) @inline_args class EBNF_to_BNF(Transformer_InPlace): def __init__(self): self.new_rules = [] self.rules_cache = {} self.prefix = 'anon' self.i = 0 self.rule_options = None def _name_rule(self, inner: str): new_name = '__%s_%s_%d' % (self.prefix, inner, self.i) self.i += 1 return new_name def _add_rule(self, key, name, expansions): t = NonTerminal(name) self.new_rules.append((name, expansions, self.rule_options)) self.rules_cache[key] = t return t def _add_recurse_rule(self, type_: str, expr: Tree): try: return self.rules_cache[expr] except KeyError: new_name = self._name_rule(type_) t = NonTerminal(new_name) tree = ST('expansions', [ ST('expansion', [expr]), ST('expansion', [t, expr]) ]) return self._add_rule(expr, new_name, tree) def _add_repeat_rule(self, a, b, target, atom): """Generate a rule that repeats target ``a`` times, and repeats atom ``b`` times. When called recursively (into target), it repeats atom for x(n) times, where: x(0) = 1 x(n) = a(n) * x(n-1) + b Example rule when a=3, b=4: new_rule: target target target atom atom atom atom """ key = (a, b, target, atom) try: return self.rules_cache[key] except KeyError: new_name = self._name_rule('repeat_a%d_b%d' % (a, b)) tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)]) return self._add_rule(key, new_name, tree) def _add_repeat_opt_rule(self, a, b, target, target_opt, atom): """Creates a rule that matches atom 0 to (a*n+b)-1 times. When target matches n times atom, and target_opt 0 to n-1 times target_opt, First we generate target * i followed by target_opt, for i from 0 to a-1 These match 0 to n*a - 1 times atom Then we generate target * a followed by atom * i, for i from 0 to b-1 These match n*a to n*a + b-1 times atom The created rule will not have any shift/reduce conflicts so that it can be used with lalr Example rule when a=3, b=4: new_rule: target_opt | target target_opt | target target target_opt | target target target | target target target atom | target target target atom atom | target target target atom atom atom """ key = (a, b, target, atom, "opt") try: return self.rules_cache[key] except KeyError: new_name = self._name_rule('repeat_a%d_b%d_opt' % (a, b)) tree = ST('expansions', [ ST('expansion', [target]*i + [target_opt]) for i in range(a) ] + [ ST('expansion', [target]*a + [atom]*i) for i in range(b) ]) return self._add_rule(key, new_name, tree) def _generate_repeats(self, rule: Tree, mn: int, mx: int): """Generates a rule tree that repeats ``rule`` exactly between ``mn`` to ``mx`` times. """ # For a small number of repeats, we can take the naive approach if mx < REPEAT_BREAK_THRESHOLD: return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)]) # For large repeat values, we break the repetition into sub-rules. # We treat ``rule~mn..mx`` as ``rule~mn rule~0..(diff=mx-mn)``. # We then use small_factors to split up mn and diff up into values [(a, b), ...] # This values are used with the help of _add_repeat_rule and _add_repeat_rule_opt # to generate a complete rule/expression that matches the corresponding number of repeats mn_target = rule for a, b in small_factors(mn, SMALL_FACTOR_THRESHOLD): mn_target = self._add_repeat_rule(a, b, mn_target, rule) if mx == mn: return mn_target diff = mx - mn + 1 # We add one because _add_repeat_opt_rule generates rules that match one less diff_factors = small_factors(diff, SMALL_FACTOR_THRESHOLD) diff_target = rule # Match rule 1 times diff_opt_target = ST('expansion', []) # match rule 0 times (e.g. up to 1 -1 times) for a, b in diff_factors[:-1]: diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule) diff_target = self._add_repeat_rule(a, b, diff_target, rule) a, b = diff_factors[-1] diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule) return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])]) def expr(self, rule: Tree, op: Token, *args): if op.value == '?': empty = ST('expansion', []) return ST('expansions', [rule, empty]) elif op.value == '+': # a : b c+ d # --> # a : b _c d # _c : _c c | c; return self._add_recurse_rule('plus', rule) elif op.value == '*': # a : b c* d # --> # a : b _c? d # _c : _c c | c; new_name = self._add_recurse_rule('star', rule) return ST('expansions', [new_name, ST('expansion', [])]) elif op.value == '~': if len(args) == 1: mn = mx = int(args[0]) else: mn, mx = map(int, args) if mx < mn or mn < 0: raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx)) return self._generate_repeats(rule, mn, mx) assert False, op def maybe(self, rule: Tree): keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens rule_size = FindRuleSize(keep_all_tokens).transform(rule) empty = ST('expansion', [_EMPTY] * rule_size) return ST('expansions', [rule, empty]) class SimplifyRule_Visitor(Visitor): @staticmethod def _flatten(tree: Tree): while tree.expand_kids_by_data(tree.data): pass def expansion(self, tree: Tree): # rules_list unpacking # a : b (c|d) e # --> # a : b c e | b d e # # In AST terms: # expansion(b, expansions(c, d), e) # --> # expansions( expansion(b, c, e), expansion(b, d, e) ) self._flatten(tree) for i, child in enumerate(tree.children): if isinstance(child, Tree) and child.data == 'expansions': tree.data = 'expansions' tree.children = [self.visit(ST('expansion', [option if i == j else other for j, other in enumerate(tree.children)])) for option in dedup_list(child.children)] self._flatten(tree) break def alias(self, tree): rule, alias_name = tree.children if rule.data == 'expansions': aliases = [] for child in tree.children[0].children: aliases.append(ST('alias', [child, alias_name])) tree.data = 'expansions' tree.children = aliases def expansions(self, tree: Tree): self._flatten(tree) # Ensure all children are unique if len(set(tree.children)) != len(tree.children): tree.children = dedup_list(tree.children) # dedup is expensive, so try to minimize its use class RuleTreeToText(Transformer): def expansions(self, x): return x def expansion(self, symbols): return symbols, None def alias(self, x): (expansion, _alias), alias = x assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed return expansion, alias.name class PrepareAnonTerminals(Transformer_InPlace): """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them""" def __init__(self, terminals): self.terminals = terminals self.term_set = {td.name for td in self.terminals} self.term_reverse = {td.pattern: td for td in terminals} self.i = 0 self.rule_options = None @inline_args def pattern(self, p): value = p.value if p in self.term_reverse and p.flags != self.term_reverse[p].pattern.flags: raise GrammarError(u'Conflicting flags for the same terminal: %s' % p) term_name = None if isinstance(p, PatternStr): try: # If already defined, use the user-defined terminal name term_name = self.term_reverse[p].name except KeyError: # Try to assign an indicative anon-terminal name try: term_name = _TERMINAL_NAMES[value] except KeyError: if value and is_id_continue(value) and is_id_start(value[0]) and value.upper() not in self.term_set: term_name = value.upper() if term_name in self.term_set: term_name = None elif isinstance(p, PatternRE): if p in self.term_reverse: # Kind of a weird placement.name term_name = self.term_reverse[p].name else: assert False, p if term_name is None: term_name = '__ANON_%d' % self.i self.i += 1 if term_name not in self.term_set: assert p not in self.term_reverse self.term_set.add(term_name) termdef = TerminalDef(term_name, p) self.term_reverse[p] = termdef self.terminals.append(termdef) filter_out = False if self.rule_options and self.rule_options.keep_all_tokens else isinstance(p, PatternStr) return Terminal(term_name, filter_out=filter_out) class _ReplaceSymbols(Transformer_InPlace): """Helper for ApplyTemplates""" def __init__(self): self.names = {} def value(self, c): if len(c) == 1 and isinstance(c[0], Symbol) and c[0].name in self.names: return self.names[c[0].name] return self.__default__('value', c, None) def template_usage(self, c): name = c[0].name if name in self.names: return self.__default__('template_usage', [self.names[name]] + c[1:], None) return self.__default__('template_usage', c, None) class ApplyTemplates(Transformer_InPlace): """Apply the templates, creating new rules that represent the used templates""" def __init__(self, rule_defs): self.rule_defs = rule_defs self.replacer = _ReplaceSymbols() self.created_templates = set() def template_usage(self, c): name = c[0].name args = c[1:] result_name = "%s{%s}" % (name, ",".join(a.name for a in args)) if result_name not in self.created_templates: self.created_templates.add(result_name) (_n, params, tree, options) ,= (t for t in self.rule_defs if t[0] == name) assert len(params) == len(args), args result_tree = deepcopy(tree) self.replacer.names = dict(zip(params, args)) self.replacer.transform(result_tree) self.rule_defs.append((result_name, [], result_tree, deepcopy(options))) return NonTerminal(result_name) def _rfind(s, choices): return max(s.rfind(c) for c in choices) def eval_escaping(s): w = '' i = iter(s) for n in i: w += n if n == '\\': try: n2 = next(i) except StopIteration: raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s) if n2 == '\\': w += '\\\\' elif n2 not in 'Uuxnftr': w += '\\' w += n2 w = w.replace('\\"', '"').replace("'", "\\'") to_eval = "u'''%s'''" % w try: s = literal_eval(to_eval) except SyntaxError as e: raise GrammarError(s, e) return s def _literal_to_pattern(literal): assert isinstance(literal, Token) v = literal.value flag_start = _rfind(v, '/"')+1 assert flag_start > 0 flags = v[flag_start:] assert all(f in _RE_FLAGS for f in flags), flags if literal.type == 'STRING' and '\n' in v: raise GrammarError('You cannot put newlines in string literals') if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags: raise GrammarError('You can only use newlines in regular expressions ' 'with the `x` (verbose) flag') v = v[:flag_start] assert v[0] == v[-1] and v[0] in '"/' x = v[1:-1] s = eval_escaping(x) if s == "": raise GrammarError("Empty terminals are not allowed (%s)" % literal) if literal.type == 'STRING': s = s.replace('\\\\', '\\') return PatternStr(s, flags, raw=literal.value) elif literal.type == 'REGEXP': return PatternRE(s, flags, raw=literal.value) else: assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]' @inline_args class PrepareLiterals(Transformer_InPlace): def literal(self, literal): return ST('pattern', [_literal_to_pattern(literal)]) def range(self, start, end): assert start.type == end.type == 'STRING' start = start.value[1:-1] end = end.value[1:-1] assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1 regexp = '[%s-%s]' % (start, end) return ST('pattern', [PatternRE(regexp)]) def _make_joined_pattern(regexp, flags_set) -> PatternRE: return PatternRE(regexp, ()) class TerminalTreeToPattern(Transformer_NonRecursive): def pattern(self, ps): p ,= ps return p def expansion(self, items: List[Pattern]) -> Pattern: if not items: return PatternStr('') if len(items) == 1: return items[0] pattern = ''.join(i.to_regexp() for i in items) return _make_joined_pattern(pattern, {i.flags for i in items}) def expansions(self, exps: List[Pattern]) -> Pattern: if len(exps) == 1: return exps[0] # Do a bit of sorting to make sure that the longest option is returned # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match) exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value))) pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps)) return _make_joined_pattern(pattern, {i.flags for i in exps}) def expr(self, args) -> Pattern: inner: Pattern inner, op = args[:2] if op == '~': if len(args) == 3: op = "{%d}" % int(args[2]) else: mn, mx = map(int, args[2:]) if mx < mn: raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx)) op = "{%d,%d}" % (mn, mx) else: assert len(args) == 2 return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags) def maybe(self, expr): return self.expr(expr + ['?']) def alias(self, t): raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)") def value(self, v): return v[0] class ValidateSymbols(Transformer_InPlace): def value(self, v): v ,= v assert isinstance(v, (Tree, Symbol)) return v def nr_deepcopy_tree(t): """Deepcopy tree `t` without recursion""" return Transformer_NonRecursive(False).transform(t) class Grammar: term_defs: List[Tuple[str, Tuple[Tree, int]]] rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] ignore: List[str] def __init__(self, rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]], term_defs: List[Tuple[str, Tuple[Tree, int]]], ignore: List[str]) -> None: self.term_defs = term_defs self.rule_defs = rule_defs self.ignore = ignore def compile(self, start, terminals_to_keep) -> Tuple[List[TerminalDef], List[Rule], List[str]]: # We change the trees in-place (to support huge grammars) # So deepcopy allows calling compile more than once. term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs] rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs] # =================== # Compile Terminals # =================== # Convert terminal-trees to strings/regexps for name, (term_tree, priority) in term_defs: if term_tree is None: # Terminal added through %declare continue expansions = list(term_tree.find_data('expansion')) if len(expansions) == 1 and not expansions[0].children: raise GrammarError("Terminals cannot be empty (%s)" % name) transformer = PrepareLiterals() * TerminalTreeToPattern() terminals = [TerminalDef(name, transformer.transform(term_tree), priority) for name, (term_tree, priority) in term_defs if term_tree] # ================= # Compile Rules # ================= # 1. Pre-process terminals anon_tokens_transf = PrepareAnonTerminals(terminals) transformer = PrepareLiterals() * ValidateSymbols() * anon_tokens_transf # Adds to terminals # 2. Inline Templates transformer *= ApplyTemplates(rule_defs) # 3. Convert EBNF to BNF (and apply step 1 & 2) ebnf_to_bnf = EBNF_to_BNF() rules = [] i = 0 while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates name, params, rule_tree, options = rule_defs[i] i += 1 if len(params) != 0: # Dont transform templates continue rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None ebnf_to_bnf.rule_options = rule_options ebnf_to_bnf.prefix = name anon_tokens_transf.rule_options = rule_options tree = transformer.transform(rule_tree) res: Tree = ebnf_to_bnf.transform(tree) rules.append((name, res, options)) rules += ebnf_to_bnf.new_rules assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" # 4. Compile tree to Rule objects rule_tree_to_text = RuleTreeToText() simplify_rule = SimplifyRule_Visitor() compiled_rules: List[Rule] = [] for rule_content in rules: name, tree, options = rule_content simplify_rule.visit(tree) expansions = rule_tree_to_text.transform(tree) for i, (expansion, alias) in enumerate(expansions): if alias and name.startswith('_'): raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias)) empty_indices = tuple(x==_EMPTY for x in expansion) if any(empty_indices): exp_options = copy(options) or RuleOptions() exp_options.empty_indices = empty_indices expansion = [x for x in expansion if x!=_EMPTY] else: exp_options = options for sym in expansion: assert isinstance(sym, Symbol) if sym.is_term and exp_options and exp_options.keep_all_tokens: assert isinstance(sym, Terminal) sym.filter_out = False rule = Rule(NonTerminal(name), expansion, i, alias, exp_options) compiled_rules.append(rule) # Remove duplicates of empty rules, throw error for non-empty duplicates if len(set(compiled_rules)) != len(compiled_rules): duplicates = classify(compiled_rules, lambda x: x) for dups in duplicates.values(): if len(dups) > 1: if dups[0].expansion: raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)" % ''.join('\n * %s' % i for i in dups)) # Empty rule; assert all other attributes are equal assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups) # Remove duplicates compiled_rules = list(OrderedSet(compiled_rules)) # Filter out unused rules while True: c = len(compiled_rules) used_rules = {s for r in compiled_rules for s in r.expansion if isinstance(s, NonTerminal) and s != r.origin} used_rules |= {NonTerminal(s) for s in start} compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules) for r in unused: logger.debug("Unused rule: %s", r) if len(compiled_rules) == c: break # Filter out unused terminals if terminals_to_keep != '*': used_terms = {t.name for r in compiled_rules for t in r.expansion if isinstance(t, Terminal)} terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep) if unused: logger.debug("Unused terminals: %s", [t.name for t in unused]) return terminals, compiled_rules, self.ignore PackageResource = namedtuple('PackageResource', 'pkg_name path') class FromPackageLoader: """ Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. This allows them to be compatible even from within zip files. Relative imports are handled, so you can just freely use them. pkg_name: The name of the package. You can probably provide `__name__` most of the time search_paths: All the path that will be search on absolute imports. """ pkg_name: str search_paths: Sequence[str] def __init__(self, pkg_name: str, search_paths: Sequence[str]=("", )) -> None: self.pkg_name = pkg_name self.search_paths = search_paths def __repr__(self): return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: if base_path is None: to_try = self.search_paths else: # Check whether or not the importing grammar was loaded by this module. if not isinstance(base_path, PackageResource) or base_path.pkg_name != self.pkg_name: # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway raise IOError() to_try = [base_path.path] err = None for path in to_try: full_path = os.path.join(path, grammar_path) try: text: Optional[bytes] = pkgutil.get_data(self.pkg_name, full_path) except IOError as e: err = e continue else: return PackageResource(self.pkg_name, full_path), (text.decode() if text else '') raise IOError('Cannot find grammar in given paths') from err stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) def resolve_term_references(term_dict): # TODO Solve with transitive closure (maybe) while True: changed = False for name, token_tree in term_dict.items(): if token_tree is None: # Terminal added through %declare continue for exp in token_tree.find_data('value'): item ,= exp.children if isinstance(item, NonTerminal): raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name)) elif isinstance(item, Terminal): try: term_value = term_dict[item.name] except KeyError: raise GrammarError("Terminal used but not defined: %s" % item.name) assert term_value is not None exp.children[0] = term_value changed = True else: assert isinstance(item, Tree) if not changed: break for name, term in term_dict.items(): if term: # Not just declared for child in term.children: ids = [id(x) for x in child.iter_subtrees()] if id(term) in ids: raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name) def symbol_from_strcase(s): assert isinstance(s, str) return Terminal(s, filter_out=s.startswith('_')) if s.isupper() else NonTerminal(s) @inline_args class PrepareGrammar(Transformer_InPlace): def terminal(self, name): return Terminal(str(name), filter_out=name.startswith('_')) def nonterminal(self, name): return NonTerminal(name.value) def _find_used_symbols(tree): assert tree.data == 'expansions' return {t.name for x in tree.find_data('expansion') for t in x.scan_values(lambda t: isinstance(t, Symbol))} def _get_parser(): try: return _get_parser.cache except AttributeError: terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] rules = [(name.lstrip('?'), x, RuleOptions(expand1=name.startswith('?'))) for name, x in RULES.items()] rules = [Rule(NonTerminal(r), [symbol_from_strcase(s) for s in x.split()], i, None, o) for r, xs, o in rules for i, x in enumerate(xs)] callback = ParseTreeBuilder(rules, ST).create_callback() import re lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT', 'BACKSLASH']) parser_conf = ParserConf(rules, callback, ['start']) lexer_conf.lexer_type = 'basic' parser_conf.parser_type = 'lalr' _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None) return _get_parser.cache GRAMMAR_ERRORS = [ ('Incorrect type of value', ['a: 1\n']), ('Unclosed parenthesis', ['a: (\n']), ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), ('Illegal name for rules or terminals', ['Aa:\n']), ('Alias expects lowercase name', ['a: -> "a"\n']), ('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']), ('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']), ('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']), ('Terminal names cannot contain dots', ['A.B\n']), ('Expecting rule or terminal definition', ['"a"\n']), ('%import expects a name', ['%import "a"\n']), ('%ignore expects a value', ['%ignore %import\n']), ] def _translate_parser_exception(parse, e): error = e.match_examples(parse, GRAMMAR_ERRORS, use_accepts=True) if error: return error elif 'STRING' in e.expected: return "Expecting a value" def _parse_grammar(text, name, start='start'): try: tree = _get_parser().parse(text + '\n', start) except UnexpectedCharacters as e: context = e.get_context(text) raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % (e.line, e.column, name, context)) except UnexpectedToken as e: context = e.get_context(text) error = _translate_parser_exception(_get_parser().parse, e) if error: raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context)) raise return PrepareGrammar().transform(tree) def _error_repr(error): if isinstance(error, UnexpectedToken): error2 = _translate_parser_exception(_get_parser().parse, error) if error2: return error2 expected = ', '.join(error.accepts or error.expected) return "Unexpected token %r. Expected one of: {%s}" % (str(error.token), expected) else: return str(error) def _search_interactive_parser(interactive_parser, predicate): def expand(node): path, p = node for choice in p.choices(): t = Token(choice, '') try: new_p = p.feed_token(t) except ParseError: # Illegal pass else: yield path + (choice,), new_p for path, p in bfs_all_unique([((), interactive_parser)], expand): if predicate(p): return path, p def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: errors = [] def on_error(e): errors.append((e, _error_repr(e))) # recover to a new line token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices()) for token_type in token_path: e.interactive_parser.feed_token(Token(token_type, '')) e.interactive_parser.feed_token(Token('_NL', '\n')) return True _tree = _get_parser().parse(text + '\n', start, on_error=on_error) errors_by_line = classify(errors, lambda e: e[0].line) errors = [el[0] for el in errors_by_line.values()] # already sorted for e in errors: e[0].interactive_parser = None return errors def _get_mangle(prefix, aliases, base_mangle=None): def mangle(s): if s in aliases: s = aliases[s] else: if s[0] == '_': s = '_%s__%s' % (prefix, s[1:]) else: s = '%s__%s' % (prefix, s) if base_mangle is not None: s = base_mangle(s) return s return mangle def _mangle_definition_tree(exp, mangle): if mangle is None: return exp exp = deepcopy(exp) # TODO: is this needed? for t in exp.iter_subtrees(): for i, c in enumerate(t.children): if isinstance(c, Symbol): t.children[i] = c.renamed(mangle) return exp def _make_rule_tuple(modifiers_tree, name, params, priority_tree, expansions): if modifiers_tree.children: m ,= modifiers_tree.children expand1 = '?' in m if expand1 and name.startswith('_'): raise GrammarError("Inlined rules (_rule) cannot use the ?rule modifier.") keep_all_tokens = '!' in m else: keep_all_tokens = False expand1 = False if priority_tree.children: p ,= priority_tree.children priority = int(p) else: priority = None if params is not None: params = [t.value for t in params.children] # For the grammar parser return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority, template_source=(name if params else None)) class Definition: def __init__(self, is_term, tree, params=(), options=None): self.is_term = is_term self.tree = tree self.params = tuple(params) self.options = options class GrammarBuilder: global_keep_all_tokens: bool import_paths: List[Union[str, Callable]] used_files: Dict[str, str] _definitions: Dict[str, Definition] _ignore_names: List[str] def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None: self.global_keep_all_tokens = global_keep_all_tokens self.import_paths = import_paths or [] self.used_files = used_files or {} self._definitions: Dict[str, Definition] = {} self._ignore_names: List[str] = [] def _grammar_error(self, is_term, msg, *names): args = {} for i, name in enumerate(names, start=1): postfix = '' if i == 1 else str(i) args['name' + postfix] = name args['type' + postfix] = lowercase_type = ("rule", "terminal")[is_term] args['Type' + postfix] = lowercase_type.title() raise GrammarError(msg.format(**args)) def _check_options(self, is_term, options): if is_term: if options is None: options = 1 elif not isinstance(options, int): raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),)) else: if options is None: options = RuleOptions() elif not isinstance(options, RuleOptions): raise GrammarError("Rules require a RuleOptions instance as 'options'") if self.global_keep_all_tokens: options.keep_all_tokens = True return options def _define(self, name, is_term, exp, params=(), options=None, *, override=False): if name in self._definitions: if not override: self._grammar_error(is_term, "{Type} '{name}' defined more than once", name) elif override: self._grammar_error(is_term, "Cannot override a nonexisting {type} {name}", name) if name.startswith('__'): self._grammar_error(is_term, 'Names starting with double-underscore are reserved (Error at {name})', name) self._definitions[name] = Definition(is_term, exp, params, self._check_options(is_term, options)) def _extend(self, name, is_term, exp, params=(), options=None): if name not in self._definitions: self._grammar_error(is_term, "Can't extend {type} {name} as it wasn't defined before", name) d = self._definitions[name] if is_term != d.is_term: self._grammar_error(is_term, "Cannot extend {type} {name} - one is a terminal, while the other is not.", name) if tuple(params) != d.params: self._grammar_error(is_term, "Cannot extend {type} with different parameters: {name}", name) if d.tree is None: self._grammar_error(is_term, "Can't extend {type} {name} - it is abstract.", name) # TODO: think about what to do with 'options' base = d.tree assert isinstance(base, Tree) and base.data == 'expansions' base.children.insert(0, exp) def _ignore(self, exp_or_name): if isinstance(exp_or_name, str): self._ignore_names.append(exp_or_name) else: assert isinstance(exp_or_name, Tree) t = exp_or_name if t.data == 'expansions' and len(t.children) == 1: t2 ,= t.children if t2.data=='expansion' and len(t2.children) == 1: item ,= t2.children if item.data == 'value': item ,= item.children if isinstance(item, Terminal): # Keep terminal name, no need to create a new definition self._ignore_names.append(item.name) return name = '__IGNORE_%d'% len(self._ignore_names) self._ignore_names.append(name) self._definitions[name] = Definition(True, t, options=TOKEN_DEFAULT_PRIORITY) def _unpack_import(self, stmt, grammar_name): if len(stmt.children) > 1: path_node, arg1 = stmt.children else: path_node, = stmt.children arg1 = None if isinstance(arg1, Tree): # Multi import dotted_path = tuple(path_node.children) names = arg1.children aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names else: # Single import dotted_path = tuple(path_node.children[:-1]) if not dotted_path: name ,= path_node.children raise GrammarError("Nothing was imported from grammar `%s`" % name) name = path_node.children[-1] # Get name from dotted path aliases = {name.value: (arg1 or name).value} # Aliases if exist if path_node.data == 'import_lib': # Import from library base_path = None else: # Relative import if grammar_name == '': # Import relative to script file path if grammar is coded in script try: base_file = os.path.abspath(sys.modules['__main__'].__file__) except AttributeError: base_file = None else: base_file = grammar_name # Import relative to grammar file path if external grammar file if base_file: if isinstance(base_file, PackageResource): base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0]) else: base_path = os.path.split(base_file)[0] else: base_path = os.path.abspath(os.path.curdir) return dotted_path, base_path, aliases def _unpack_definition(self, tree, mangle): if tree.data == 'rule': name, params, exp, opts = _make_rule_tuple(*tree.children) is_term = False else: name = tree.children[0].value params = () # TODO terminal templates opts = int(tree.children[1]) if len(tree.children) == 3 else TOKEN_DEFAULT_PRIORITY # priority exp = tree.children[-1] is_term = True if mangle is not None: params = tuple(mangle(p) for p in params) name = mangle(name) exp = _mangle_definition_tree(exp, mangle) return name, is_term, exp, params, opts def load_grammar(self, grammar_text: str, grammar_name: str="", mangle: Optional[Callable[[str], str]]=None) -> None: tree = _parse_grammar(grammar_text, grammar_name) imports: Dict[Tuple[str, ...], Tuple[Optional[str], Dict[str, str]]] = {} for stmt in tree.children: if stmt.data == 'import': dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name) try: import_base_path, import_aliases = imports[dotted_path] assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path) import_aliases.update(aliases) except KeyError: imports[dotted_path] = base_path, aliases for dotted_path, (base_path, aliases) in imports.items(): self.do_import(dotted_path, base_path, aliases, mangle) for stmt in tree.children: if stmt.data in ('term', 'rule'): self._define(*self._unpack_definition(stmt, mangle)) elif stmt.data == 'override': r ,= stmt.children self._define(*self._unpack_definition(r, mangle), override=True) elif stmt.data == 'extend': r ,= stmt.children self._extend(*self._unpack_definition(r, mangle)) elif stmt.data == 'ignore': # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar if mangle is None: self._ignore(*stmt.children) elif stmt.data == 'declare': for symbol in stmt.children: assert isinstance(symbol, Symbol), symbol is_term = isinstance(symbol, Terminal) if mangle is None: name = symbol.name else: name = mangle(symbol.name) self._define(name, is_term, None) elif stmt.data == 'import': pass else: assert False, stmt term_defs = { name: d.tree for name, d in self._definitions.items() if d.is_term } resolve_term_references(term_defs) def _remove_unused(self, used): def rule_dependencies(symbol): try: d = self._definitions[symbol] except KeyError: return [] if d.is_term: return [] return _find_used_symbols(d.tree) - set(d.params) _used = set(bfs(used, rule_dependencies)) self._definitions = {k: v for k, v in self._definitions.items() if k in _used} def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None: assert dotted_path mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle) grammar_path = os.path.join(*dotted_path) + EXT to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader] for source in to_try: try: if callable(source): joined_path, text = source(base_path, grammar_path) else: joined_path = os.path.join(source, grammar_path) with open(joined_path, encoding='utf8') as f: text = f.read() except IOError: continue else: h = sha256_digest(text) if self.used_files.get(joined_path, h) != h: raise RuntimeError("Grammar file was changed during importing") self.used_files[joined_path] = h gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files) gb.load_grammar(text, joined_path, mangle) gb._remove_unused(map(mangle, aliases)) for name in gb._definitions: if name in self._definitions: raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path)) self._definitions.update(**gb._definitions) break else: # Search failed. Make Python throw a nice error. open(grammar_path, encoding='utf8') assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,) def validate(self) -> None: for name, d in self._definitions.items(): params = d.params exp = d.tree for i, p in enumerate(params): if p in self._definitions: raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) if p in params[:i]: raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name)) if exp is None: # Remaining checks don't apply to abstract rules/terminals (created with %declare) continue for temp in exp.find_data('template_usage'): sym = temp.children[0].name args = temp.children[1:] if sym not in params: if sym not in self._definitions: self._grammar_error(d.is_term, "Template '%s' used but not defined (in {type} {name})" % sym, name) if len(args) != len(self._definitions[sym].params): expected, actual = len(self._definitions[sym].params), len(args) self._grammar_error(d.is_term, "Wrong number of template arguments used for {name} " "(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name) for sym in _find_used_symbols(exp): if sym not in self._definitions and sym not in params: self._grammar_error(d.is_term, "{Type} '{name}' used but not defined (in {type2} {name2})", sym, name) if not set(self._definitions).issuperset(self._ignore_names): raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions))) def build(self) -> Grammar: self.validate() rule_defs = [] term_defs = [] for name, d in self._definitions.items(): (params, exp, options) = d.params, d.tree, d.options if d.is_term: assert len(params) == 0 term_defs.append((name, (exp, options))) else: rule_defs.append((name, params, exp, options)) # resolve_term_references(term_defs) return Grammar(rule_defs, term_defs, self._ignore_names) def verify_used_files(file_hashes): for path, old in file_hashes.items(): text = None if isinstance(path, str) and os.path.exists(path): with open(path, encoding='utf8') as f: text = f.read() elif isinstance(path, PackageResource): with suppress(IOError): text = pkgutil.get_data(*path).decode('utf-8') if text is None: # We don't know how to load the path. ignore it. continue current = sha256_digest(text) if old != current: logger.info("File %r changed, rebuilding Parser" % path) return False return True def list_grammar_imports(grammar, import_paths=[]): "Returns a list of paths to the lark grammars imported by the given grammar (recursively)" builder = GrammarBuilder(False, import_paths) builder.load_grammar(grammar, '') return list(builder.used_files.keys()) def load_grammar(grammar, source, import_paths, global_keep_all_tokens): builder = GrammarBuilder(global_keep_all_tokens, import_paths) builder.load_grammar(grammar, source) return builder.build(), builder.used_files def sha256_digest(s: str) -> str: """Get the sha256 digest of a string Supports the `usedforsecurity` argument for Python 3.9+ to allow running on a FIPS-enabled system. """ if sys.version_info >= (3, 9): return hashlib.sha256(s.encode('utf8'), usedforsecurity=False).hexdigest() else: return hashlib.sha256(s.encode('utf8')).hexdigest() lark-1.2.2/lark/parse_tree_builder.py000066400000000000000000000341141465673407200176300ustar00rootroot00000000000000"""Provides functions for the automatic building and shaping of the parse-tree.""" from typing import List from .exceptions import GrammarError, ConfigurationError from .lexer import Token from .tree import Tree from .visitors import Transformer_InPlace from .visitors import _vargs_meta, _vargs_meta_inline ###{standalone from functools import partial, wraps from itertools import product class ExpandSingleChild: def __init__(self, node_builder): self.node_builder = node_builder def __call__(self, children): if len(children) == 1: return children[0] else: return self.node_builder(children) class PropagatePositions: def __init__(self, node_builder, node_filter=None): self.node_builder = node_builder self.node_filter = node_filter def __call__(self, children): res = self.node_builder(children) if isinstance(res, Tree): # Calculate positions while the tree is streaming, according to the rule: # - nodes start at the start of their first child's container, # and end at the end of their last child's container. # Containers are nodes that take up space in text, but have been inlined in the tree. res_meta = res.meta first_meta = self._pp_get_meta(children) if first_meta is not None: if not hasattr(res_meta, 'line'): # meta was already set, probably because the rule has been inlined (e.g. `?rule`) res_meta.line = getattr(first_meta, 'container_line', first_meta.line) res_meta.column = getattr(first_meta, 'container_column', first_meta.column) res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos) res_meta.empty = False res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line) res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column) res_meta.container_start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos) last_meta = self._pp_get_meta(reversed(children)) if last_meta is not None: if not hasattr(res_meta, 'end_line'): res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line) res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column) res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos) res_meta.empty = False res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line) res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column) res_meta.container_end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos) return res def _pp_get_meta(self, children): for c in children: if self.node_filter is not None and not self.node_filter(c): continue if isinstance(c, Tree): if not c.meta.empty: return c.meta elif isinstance(c, Token): return c elif hasattr(c, '__lark_meta__'): return c.__lark_meta__() def make_propagate_positions(option): if callable(option): return partial(PropagatePositions, node_filter=option) elif option is True: return PropagatePositions elif option is False: return None raise ConfigurationError('Invalid option for propagate_positions: %r' % option) class ChildFilter: def __init__(self, to_include, append_none, node_builder): self.node_builder = node_builder self.to_include = to_include self.append_none = append_none def __call__(self, children): filtered = [] for i, to_expand, add_none in self.to_include: if add_none: filtered += [None] * add_none if to_expand: filtered += children[i].children else: filtered.append(children[i]) if self.append_none: filtered += [None] * self.append_none return self.node_builder(filtered) class ChildFilterLALR(ChildFilter): """Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)""" def __call__(self, children): filtered = [] for i, to_expand, add_none in self.to_include: if add_none: filtered += [None] * add_none if to_expand: if filtered: filtered += children[i].children else: # Optimize for left-recursion filtered = children[i].children else: filtered.append(children[i]) if self.append_none: filtered += [None] * self.append_none return self.node_builder(filtered) class ChildFilterLALR_NoPlaceholders(ChildFilter): "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" def __init__(self, to_include, node_builder): self.node_builder = node_builder self.to_include = to_include def __call__(self, children): filtered = [] for i, to_expand in self.to_include: if to_expand: if filtered: filtered += children[i].children else: # Optimize for left-recursion filtered = children[i].children else: filtered.append(children[i]) return self.node_builder(filtered) def _should_expand(sym): return not sym.is_term and sym.name.startswith('_') def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]): # Prepare empty_indices as: How many Nones to insert at each index? if _empty_indices: assert _empty_indices.count(False) == len(expansion) s = ''.join(str(int(b)) for b in _empty_indices) empty_indices = [len(ones) for ones in s.split('0')] assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion)) else: empty_indices = [0] * (len(expansion)+1) to_include = [] nones_to_add = 0 for i, sym in enumerate(expansion): nones_to_add += empty_indices[i] if keep_all_tokens or not (sym.is_term and sym.filter_out): to_include.append((i, _should_expand(sym), nones_to_add)) nones_to_add = 0 nones_to_add += empty_indices[len(expansion)] if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include): if _empty_indices or ambiguous: return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add) else: # LALR without placeholders return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) class AmbiguousExpander: """Deal with the case where we're expanding children ('_rule') into a parent but the children are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself ambiguous with as many copies as there are ambiguous children, and then copy the ambiguous children into the right parents in the right places, essentially shifting the ambiguity up the tree.""" def __init__(self, to_expand, tree_class, node_builder): self.node_builder = node_builder self.tree_class = tree_class self.to_expand = to_expand def __call__(self, children): def _is_ambig_tree(t): return hasattr(t, 'data') and t.data == '_ambig' # -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities. # All children of an _ambig node should be a derivation of that ambig node, hence # it is safe to assume that if we see an _ambig node nested within an ambig node # it is safe to simply expand it into the parent _ambig node as an alternative derivation. ambiguous = [] for i, child in enumerate(children): if _is_ambig_tree(child): if i in self.to_expand: ambiguous.append(i) child.expand_kids_by_data('_ambig') if not ambiguous: return self.node_builder(children) expand = [child.children if i in ambiguous else (child,) for i, child in enumerate(children)] return self.tree_class('_ambig', [self.node_builder(list(f)) for f in product(*expand)]) def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): to_expand = [i for i, sym in enumerate(expansion) if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] if to_expand: return partial(AmbiguousExpander, to_expand, tree_class) class AmbiguousIntermediateExpander: """ Propagate ambiguous intermediate nodes and their derivations up to the current rule. In general, converts rule _iambig _inter someChildren1 ... _inter someChildren2 ... someChildren3 ... to _ambig rule someChildren1 ... someChildren3 ... rule someChildren2 ... someChildren3 ... rule childrenFromNestedIambigs ... someChildren3 ... ... propagating up any nested '_iambig' nodes along the way. """ def __init__(self, tree_class, node_builder): self.node_builder = node_builder self.tree_class = tree_class def __call__(self, children): def _is_iambig_tree(child): return hasattr(child, 'data') and child.data == '_iambig' def _collapse_iambig(children): """ Recursively flatten the derivations of the parent of an '_iambig' node. Returns a list of '_inter' nodes guaranteed not to contain any nested '_iambig' nodes, or None if children does not contain an '_iambig' node. """ # Due to the structure of the SPPF, # an '_iambig' node can only appear as the first child if children and _is_iambig_tree(children[0]): iambig_node = children[0] result = [] for grandchild in iambig_node.children: collapsed = _collapse_iambig(grandchild.children) if collapsed: for child in collapsed: child.children += children[1:] result += collapsed else: new_tree = self.tree_class('_inter', grandchild.children + children[1:]) result.append(new_tree) return result collapsed = _collapse_iambig(children) if collapsed: processed_nodes = [self.node_builder(c.children) for c in collapsed] return self.tree_class('_ambig', processed_nodes) return self.node_builder(children) def inplace_transformer(func): @wraps(func) def f(children): # function name in a Transformer is a rule name. tree = Tree(func.__name__, children) return func(tree) return f def apply_visit_wrapper(func, name, wrapper): if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: raise NotImplementedError("Meta args not supported for internal transformer") @wraps(func) def f(children): return wrapper(func, name, children, None) return f class ParseTreeBuilder: def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False): self.tree_class = tree_class self.propagate_positions = propagate_positions self.ambiguous = ambiguous self.maybe_placeholders = maybe_placeholders self.rule_builders = list(self._init_builders(rules)) def _init_builders(self, rules): propagate_positions = make_propagate_positions(self.propagate_positions) for rule in rules: options = rule.options keep_all_tokens = options.keep_all_tokens expand_single_child = options.expand1 wrapper_chain = list(filter(None, [ (expand_single_child and not rule.alias) and ExpandSingleChild, maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), propagate_positions, self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) ])) yield rule, wrapper_chain def create_callback(self, transformer=None): callbacks = {} default_handler = getattr(transformer, '__default__', None) if default_handler: def default_callback(data, children): return default_handler(data, children, None) else: default_callback = self.tree_class for rule, wrapper_chain in self.rule_builders: user_callback_name = rule.alias or rule.options.template_source or rule.origin.name try: f = getattr(transformer, user_callback_name) wrapper = getattr(f, 'visit_wrapper', None) if wrapper is not None: f = apply_visit_wrapper(f, user_callback_name, wrapper) elif isinstance(transformer, Transformer_InPlace): f = inplace_transformer(f) except AttributeError: f = partial(default_callback, user_callback_name) for w in wrapper_chain: f = w(f) if rule in callbacks: raise GrammarError("Rule '%s' already exists" % (rule,)) callbacks[rule] = f return callbacks ###} lark-1.2.2/lark/parser_frontends.py000066400000000000000000000236651465673407200173600ustar00rootroot00000000000000from typing import Any, Callable, Dict, Optional, Collection, Union, TYPE_CHECKING from .exceptions import ConfigurationError, GrammarError, assert_config from .utils import get_regexp_width, Serialize from .lexer import LexerThread, BasicLexer, ContextualLexer, Lexer from .parsers import earley, xearley, cyk from .parsers.lalr_parser import LALR_Parser from .tree import Tree from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType if TYPE_CHECKING: from .parsers.lalr_analysis import ParseTableBase ###{standalone def _wrap_lexer(lexer_class): future_interface = getattr(lexer_class, '__future_interface__', False) if future_interface: return lexer_class else: class CustomLexerWrapper(Lexer): def __init__(self, lexer_conf): self.lexer = lexer_class(lexer_conf) def lex(self, lexer_state, parser_state): return self.lexer.lex(lexer_state.text) return CustomLexerWrapper def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options): parser_conf = ParserConf.deserialize(data['parser_conf'], memo) cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser parser = cls.deserialize(data['parser'], memo, callbacks, options.debug) parser_conf.callbacks = callbacks return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) _parser_creators: 'Dict[str, Callable[[LexerConf, Any, Any], Any]]' = {} class ParsingFrontend(Serialize): __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser' lexer_conf: LexerConf parser_conf: ParserConf options: Any def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None): self.parser_conf = parser_conf self.lexer_conf = lexer_conf self.options = options # Set-up parser if parser: # From cache self.parser = parser else: create_parser = _parser_creators.get(parser_conf.parser_type) assert create_parser is not None, "{} is not supported in standalone mode".format( parser_conf.parser_type ) self.parser = create_parser(lexer_conf, parser_conf, options) # Set-up lexer lexer_type = lexer_conf.lexer_type self.skip_lexer = False if lexer_type in ('dynamic', 'dynamic_complete'): assert lexer_conf.postlex is None self.skip_lexer = True return if isinstance(lexer_type, type): assert issubclass(lexer_type, Lexer) self.lexer = _wrap_lexer(lexer_type)(lexer_conf) elif isinstance(lexer_type, str): create_lexer = { 'basic': create_basic_lexer, 'contextual': create_contextual_lexer, }[lexer_type] self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options) else: raise TypeError("Bad value for lexer_type: {lexer_type}") if lexer_conf.postlex: self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) def _verify_start(self, start=None): if start is None: start_decls = self.parser_conf.start if len(start_decls) > 1: raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls) start ,= start_decls elif start not in self.parser_conf.start: raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start)) return start def _make_lexer_thread(self, text: str) -> Union[str, LexerThread]: cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread return text if self.skip_lexer else cls.from_text(self.lexer, text) def parse(self, text: str, start=None, on_error=None): chosen_start = self._verify_start(start) kw = {} if on_error is None else {'on_error': on_error} stream = self._make_lexer_thread(text) return self.parser.parse(stream, chosen_start, **kw) def parse_interactive(self, text: Optional[str]=None, start=None): # TODO BREAK - Change text from Optional[str] to text: str = ''. # Would break behavior of exhaust_lexer(), which currently raises TypeError, and after the change would just return [] chosen_start = self._verify_start(start) if self.parser_conf.parser_type != 'lalr': raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") stream = self._make_lexer_thread(text) # type: ignore[arg-type] return self.parser.parse_interactive(stream, chosen_start) def _validate_frontend_args(parser, lexer) -> None: assert_config(parser, ('lalr', 'earley', 'cyk')) if not isinstance(lexer, type): # not custom lexer? expected = { 'lalr': ('basic', 'contextual'), 'earley': ('basic', 'dynamic', 'dynamic_complete'), 'cyk': ('basic', ), }[parser] assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser) def _get_lexer_callbacks(transformer, terminals): result = {} for terminal in terminals: callback = getattr(transformer, terminal.name, None) if callback is not None: result[terminal.name] = callback return result class PostLexConnector: def __init__(self, lexer, postlexer): self.lexer = lexer self.postlexer = postlexer def lex(self, lexer_state, parser_state): i = self.lexer.lex(lexer_state, parser_state) return self.postlexer.process(i) def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer: cls = (options and options._plugins.get('BasicLexer')) or BasicLexer return cls(lexer_conf) def create_contextual_lexer(lexer_conf: LexerConf, parser, postlex, options) -> ContextualLexer: cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer parse_table: ParseTableBase[int] = parser._parse_table states: Dict[int, Collection[str]] = {idx:list(t.keys()) for idx, t in parse_table.states.items()} always_accept: Collection[str] = postlex.always_accept if postlex else () return cls(lexer_conf, states, always_accept=always_accept) def create_lalr_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options=None) -> LALR_Parser: debug = options.debug if options else False strict = options.strict if options else False cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser return cls(parser_conf, debug=debug, strict=strict) _parser_creators['lalr'] = create_lalr_parser ###} class EarleyRegexpMatcher: def __init__(self, lexer_conf): self.regexps = {} for t in lexer_conf.terminals: regexp = t.pattern.to_regexp() try: width = get_regexp_width(regexp)[0] except ValueError: raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp)) else: if width == 0: raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t) if lexer_conf.use_bytes: regexp = regexp.encode('utf-8') self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags) def match(self, term, text, index=0): return self.regexps[term.name].match(text, index) def create_earley_parser__dynamic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw): if lexer_conf.callbacks: raise GrammarError("Earley's dynamic lexer doesn't support lexer_callbacks.") earley_matcher = EarleyRegexpMatcher(lexer_conf) return xearley.Parser(lexer_conf, parser_conf, earley_matcher.match, **kw) def _match_earley_basic(term, token): return term.name == token.type def create_earley_parser__basic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw): return earley.Parser(lexer_conf, parser_conf, _match_earley_basic, **kw) def create_earley_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options) -> earley.Parser: resolve_ambiguity = options.ambiguity == 'resolve' debug = options.debug if options else False tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None extra = {} if lexer_conf.lexer_type == 'dynamic': f = create_earley_parser__dynamic elif lexer_conf.lexer_type == 'dynamic_complete': extra['complete_lex'] = True f = create_earley_parser__dynamic else: f = create_earley_parser__basic return f(lexer_conf, parser_conf, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, ordered_sets=options.ordered_sets, **extra) class CYK_FrontEnd: def __init__(self, lexer_conf, parser_conf, options=None): self.parser = cyk.Parser(parser_conf.rules) self.callbacks = parser_conf.callbacks def parse(self, lexer_thread, start): tokens = list(lexer_thread.lex(None)) tree = self.parser.parse(tokens, start) return self._transform(tree) def _transform(self, tree): subtrees = list(tree.iter_subtrees()) for subtree in subtrees: subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children] return self._apply_callback(tree) def _apply_callback(self, tree): return self.callbacks[tree.rule](tree.children) _parser_creators['earley'] = create_earley_parser _parser_creators['cyk'] = CYK_FrontEnd def _construct_parsing_frontend( parser_type: _ParserArgType, lexer_type: _LexerArgType, lexer_conf, parser_conf, options ): assert isinstance(lexer_conf, LexerConf) assert isinstance(parser_conf, ParserConf) parser_conf.parser_type = parser_type lexer_conf.lexer_type = lexer_type return ParsingFrontend(lexer_conf, parser_conf, options) lark-1.2.2/lark/parsers/000077500000000000000000000000001465673407200150735ustar00rootroot00000000000000lark-1.2.2/lark/parsers/__init__.py000066400000000000000000000000001465673407200171720ustar00rootroot00000000000000lark-1.2.2/lark/parsers/cyk.py000066400000000000000000000276401465673407200162440ustar00rootroot00000000000000"""This module implements a CYK parser.""" # Author: https://github.com/ehudt (2018) # # Adapted by Erez from collections import defaultdict import itertools from ..exceptions import ParseError from ..lexer import Token from ..tree import Tree from ..grammar import Terminal as T, NonTerminal as NT, Symbol def match(t, s): assert isinstance(t, T) return t.name == s.type class Rule: """Context-free grammar rule.""" def __init__(self, lhs, rhs, weight, alias): super(Rule, self).__init__() assert isinstance(lhs, NT), lhs assert all(isinstance(x, NT) or isinstance(x, T) for x in rhs), rhs self.lhs = lhs self.rhs = rhs self.weight = weight self.alias = alias def __str__(self): return '%s -> %s' % (str(self.lhs), ' '.join(str(x) for x in self.rhs)) def __repr__(self): return str(self) def __hash__(self): return hash((self.lhs, tuple(self.rhs))) def __eq__(self, other): return self.lhs == other.lhs and self.rhs == other.rhs def __ne__(self, other): return not (self == other) class Grammar: """Context-free grammar.""" def __init__(self, rules): self.rules = frozenset(rules) def __eq__(self, other): return self.rules == other.rules def __str__(self): return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n' def __repr__(self): return str(self) # Parse tree data structures class RuleNode: """A node in the parse tree, which also contains the full rhs rule.""" def __init__(self, rule, children, weight=0): self.rule = rule self.children = children self.weight = weight def __repr__(self): return 'RuleNode(%s, [%s])' % (repr(self.rule.lhs), ', '.join(str(x) for x in self.children)) class Parser: """Parser wrapper.""" def __init__(self, rules): super(Parser, self).__init__() self.orig_rules = {rule: rule for rule in rules} rules = [self._to_rule(rule) for rule in rules] self.grammar = to_cnf(Grammar(rules)) def _to_rule(self, lark_rule): """Converts a lark rule, (lhs, rhs, callback, options), to a Rule.""" assert isinstance(lark_rule.origin, NT) assert all(isinstance(x, Symbol) for x in lark_rule.expansion) return Rule( lark_rule.origin, lark_rule.expansion, weight=lark_rule.options.priority if lark_rule.options.priority else 0, alias=lark_rule) def parse(self, tokenized, start): # pylint: disable=invalid-name """Parses input, which is a list of tokens.""" assert start start = NT(start) table, trees = _parse(tokenized, self.grammar) # Check if the parse succeeded. if all(r.lhs != start for r in table[(0, len(tokenized) - 1)]): raise ParseError('Parsing failed.') parse = trees[(0, len(tokenized) - 1)][start] return self._to_tree(revert_cnf(parse)) def _to_tree(self, rule_node): """Converts a RuleNode parse tree to a lark Tree.""" orig_rule = self.orig_rules[rule_node.rule.alias] children = [] for child in rule_node.children: if isinstance(child, RuleNode): children.append(self._to_tree(child)) else: assert isinstance(child.name, Token) children.append(child.name) t = Tree(orig_rule.origin, children) t.rule=orig_rule return t def print_parse(node, indent=0): if isinstance(node, RuleNode): print(' ' * (indent * 2) + str(node.rule.lhs)) for child in node.children: print_parse(child, indent + 1) else: print(' ' * (indent * 2) + str(node.s)) def _parse(s, g): """Parses sentence 's' using CNF grammar 'g'.""" # The CYK table. Indexed with a 2-tuple: (start pos, end pos) table = defaultdict(set) # Top-level structure is similar to the CYK table. Each cell is a dict from # rule name to the best (lightest) tree for that rule. trees = defaultdict(dict) # Populate base case with existing terminal production rules for i, w in enumerate(s): for terminal, rules in g.terminal_rules.items(): if match(terminal, w): for rule in rules: table[(i, i)].add(rule) if (rule.lhs not in trees[(i, i)] or rule.weight < trees[(i, i)][rule.lhs].weight): trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight) # Iterate over lengths of sub-sentences for l in range(2, len(s) + 1): # Iterate over sub-sentences with the given length for i in range(len(s) - l + 1): # Choose partition of the sub-sentence in [1, l) for p in range(i + 1, i + l): span1 = (i, p - 1) span2 = (p, i + l - 1) for r1, r2 in itertools.product(table[span1], table[span2]): for rule in g.nonterminal_rules.get((r1.lhs, r2.lhs), []): table[(i, i + l - 1)].add(rule) r1_tree = trees[span1][r1.lhs] r2_tree = trees[span2][r2.lhs] rule_total_weight = rule.weight + r1_tree.weight + r2_tree.weight if (rule.lhs not in trees[(i, i + l - 1)] or rule_total_weight < trees[(i, i + l - 1)][rule.lhs].weight): trees[(i, i + l - 1)][rule.lhs] = RuleNode(rule, [r1_tree, r2_tree], weight=rule_total_weight) return table, trees # This section implements context-free grammar converter to Chomsky normal form. # It also implements a conversion of parse trees from its CNF to the original # grammar. # Overview: # Applies the following operations in this order: # * TERM: Eliminates non-solitary terminals from all rules # * BIN: Eliminates rules with more than 2 symbols on their right-hand-side. # * UNIT: Eliminates non-terminal unit rules # # The following grammar characteristics aren't featured: # * Start symbol appears on RHS # * Empty rules (epsilon rules) class CnfWrapper: """CNF wrapper for grammar. Validates that the input grammar is CNF and provides helper data structures. """ def __init__(self, grammar): super(CnfWrapper, self).__init__() self.grammar = grammar self.rules = grammar.rules self.terminal_rules = defaultdict(list) self.nonterminal_rules = defaultdict(list) for r in self.rules: # Validate that the grammar is CNF and populate auxiliary data structures. assert isinstance(r.lhs, NT), r if len(r.rhs) not in [1, 2]: raise ParseError("CYK doesn't support empty rules") if len(r.rhs) == 1 and isinstance(r.rhs[0], T): self.terminal_rules[r.rhs[0]].append(r) elif len(r.rhs) == 2 and all(isinstance(x, NT) for x in r.rhs): self.nonterminal_rules[tuple(r.rhs)].append(r) else: assert False, r def __eq__(self, other): return self.grammar == other.grammar def __repr__(self): return repr(self.grammar) class UnitSkipRule(Rule): """A rule that records NTs that were skipped during transformation.""" def __init__(self, lhs, rhs, skipped_rules, weight, alias): super(UnitSkipRule, self).__init__(lhs, rhs, weight, alias) self.skipped_rules = skipped_rules def __eq__(self, other): return isinstance(other, type(self)) and self.skipped_rules == other.skipped_rules __hash__ = Rule.__hash__ def build_unit_skiprule(unit_rule, target_rule): skipped_rules = [] if isinstance(unit_rule, UnitSkipRule): skipped_rules += unit_rule.skipped_rules skipped_rules.append(target_rule) if isinstance(target_rule, UnitSkipRule): skipped_rules += target_rule.skipped_rules return UnitSkipRule(unit_rule.lhs, target_rule.rhs, skipped_rules, weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias) def get_any_nt_unit_rule(g): """Returns a non-terminal unit rule from 'g', or None if there is none.""" for rule in g.rules: if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT): return rule return None def _remove_unit_rule(g, rule): """Removes 'rule' from 'g' without changing the language produced by 'g'.""" new_rules = [x for x in g.rules if x != rule] refs = [x for x in g.rules if x.lhs == rule.rhs[0]] new_rules += [build_unit_skiprule(rule, ref) for ref in refs] return Grammar(new_rules) def _split(rule): """Splits a rule whose len(rhs) > 2 into shorter rules.""" rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs) rule_name = '__SP_%s' % (rule_str) + '_%d' yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias) for i in range(1, len(rule.rhs) - 2): yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split') yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split') def _term(g): """Applies the TERM rule on 'g' (see top comment).""" all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)} t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t} new_rules = [] for rule in g.rules: if len(rule.rhs) > 1 and any(isinstance(x, T) for x in rule.rhs): new_rhs = [t_rules[x].lhs if isinstance(x, T) else x for x in rule.rhs] new_rules.append(Rule(rule.lhs, new_rhs, weight=rule.weight, alias=rule.alias)) new_rules.extend(v for k, v in t_rules.items() if k in rule.rhs) else: new_rules.append(rule) return Grammar(new_rules) def _bin(g): """Applies the BIN rule to 'g' (see top comment).""" new_rules = [] for rule in g.rules: if len(rule.rhs) > 2: new_rules += _split(rule) else: new_rules.append(rule) return Grammar(new_rules) def _unit(g): """Applies the UNIT rule to 'g' (see top comment).""" nt_unit_rule = get_any_nt_unit_rule(g) while nt_unit_rule: g = _remove_unit_rule(g, nt_unit_rule) nt_unit_rule = get_any_nt_unit_rule(g) return g def to_cnf(g): """Creates a CNF grammar from a general context-free grammar 'g'.""" g = _unit(_bin(_term(g))) return CnfWrapper(g) def unroll_unit_skiprule(lhs, orig_rhs, skipped_rules, children, weight, alias): if not skipped_rules: return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight) else: weight = weight - skipped_rules[0].weight return RuleNode( Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [ unroll_unit_skiprule(skipped_rules[0].lhs, orig_rhs, skipped_rules[1:], children, skipped_rules[0].weight, skipped_rules[0].alias) ], weight=weight) def revert_cnf(node): """Reverts a parse tree (RuleNode) to its original non-CNF form (Node).""" if isinstance(node, T): return node # Reverts TERM rule. if node.rule.lhs.name.startswith('__T_'): return node.children[0] else: children = [] for child in map(revert_cnf, node.children): # Reverts BIN rule. if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'): children += child.children else: children.append(child) # Reverts UNIT rule. if isinstance(node.rule, UnitSkipRule): return unroll_unit_skiprule(node.rule.lhs, node.rule.rhs, node.rule.skipped_rules, children, node.rule.weight, node.rule.alias) else: return RuleNode(node.rule, children) lark-1.2.2/lark/parsers/earley.py000066400000000000000000000354151465673407200167360ustar00rootroot00000000000000"""This module implements an Earley parser. The core Earley algorithm used here is based on Elizabeth Scott's implementation, here: https://www.sciencedirect.com/science/article/pii/S1571066108001497 That is probably the best reference for understanding the algorithm here. The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format is explained here: https://lark-parser.readthedocs.io/en/latest/_static/sppf/sppf.html """ from typing import TYPE_CHECKING, Callable, Optional, List, Any from collections import deque from ..lexer import Token from ..tree import Tree from ..exceptions import UnexpectedEOF, UnexpectedToken from ..utils import logger, OrderedSet, dedup_list from .grammar_analysis import GrammarAnalyzer from ..grammar import NonTerminal from .earley_common import Item from .earley_forest import ForestSumVisitor, SymbolNode, StableSymbolNode, TokenNode, ForestToParseTree if TYPE_CHECKING: from ..common import LexerConf, ParserConf class Parser: lexer_conf: 'LexerConf' parser_conf: 'ParserConf' debug: bool def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher: Callable, resolve_ambiguity: bool=True, debug: bool=False, tree_class: Optional[Callable[[str, List], Any]]=Tree, ordered_sets: bool=True): analysis = GrammarAnalyzer(parser_conf) self.lexer_conf = lexer_conf self.parser_conf = parser_conf self.resolve_ambiguity = resolve_ambiguity self.debug = debug self.Tree = tree_class self.Set = OrderedSet if ordered_sets else set self.SymbolNode = StableSymbolNode if ordered_sets else SymbolNode self.FIRST = analysis.FIRST self.NULLABLE = analysis.NULLABLE self.callbacks = parser_conf.callbacks # TODO add typing info self.predictions = {} # type: ignore[var-annotated] ## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than # the slow 'isupper' in is_terminal. self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term } self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term } self.forest_sum_visitor = None for rule in parser_conf.rules: if rule.origin not in self.predictions: self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] ## Detect if any rules/terminals have priorities set. If the user specified priority = None, then # the priorities will be stripped from all rules/terminals before they reach us, allowing us to # skip the extra tree walk. We'll also skip this if the user just didn't specify priorities # on any rules/terminals. if self.forest_sum_visitor is None and rule.options.priority is not None: self.forest_sum_visitor = ForestSumVisitor # Check terminals for priorities # Ignore terminal priorities if the basic lexer is used if self.lexer_conf.lexer_type != 'basic' and self.forest_sum_visitor is None: for term in self.lexer_conf.terminals: if term.priority: self.forest_sum_visitor = ForestSumVisitor break self.term_matcher = term_matcher def predict_and_complete(self, i, to_scan, columns, transitives): """The core Earley Predictor and Completer. At each stage of the input, we handling any completed items (things that matched on the last cycle) and use those to predict what should come next in the input stream. The completions and any predicted non-terminals are recursively processed until we reach a set of, which can be added to the scan list for the next scanner cycle.""" # Held Completions (H in E.Scotts paper). node_cache = {} held_completions = {} column = columns[i] # R (items) = Ei (column.items) items = deque(column) while items: item = items.pop() # remove an element, A say, from R ### The Earley completer if item.is_complete: ### (item.s == string) if item.node is None: label = (item.s, item.start, i) item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label)) item.node.add_family(item.s, item.rule, item.start, None, None) # create_leo_transitives(item.rule.origin, item.start) ###R Joop Leo right recursion Completer if item.rule.origin in transitives[item.start]: transitive = transitives[item.start][item.s] if transitive.previous in transitives[transitive.column]: root_transitive = transitives[transitive.column][transitive.previous] else: root_transitive = transitive new_item = Item(transitive.rule, transitive.ptr, transitive.start) label = (root_transitive.s, root_transitive.start, i) new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label)) new_item.node.add_path(root_transitive, item.node) if new_item.expect in self.TERMINALS: # Add (B :: aC.B, h, y) to Q to_scan.add(new_item) elif new_item not in column: # Add (B :: aC.B, h, y) to Ei and R column.add(new_item) items.append(new_item) ###R Regular Earley completer else: # Empty has 0 length. If we complete an empty symbol in a particular # parse step, we need to be able to use that same empty symbol to complete # any predictions that result, that themselves require empty. Avoids # infinite recursion on empty symbols. # held_completions is 'H' in E.Scott's paper. is_empty_item = item.start == i if is_empty_item: held_completions[item.rule.origin] = item.node originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s] for originator in originators: new_item = originator.advance() label = (new_item.s, originator.start, i) new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label)) new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node) if new_item.expect in self.TERMINALS: # Add (B :: aC.B, h, y) to Q to_scan.add(new_item) elif new_item not in column: # Add (B :: aC.B, h, y) to Ei and R column.add(new_item) items.append(new_item) ### The Earley predictor elif item.expect in self.NON_TERMINALS: ### (item.s == lr0) new_items = [] for rule in self.predictions[item.expect]: new_item = Item(rule, 0, i) new_items.append(new_item) # Process any held completions (H). if item.expect in held_completions: new_item = item.advance() label = (new_item.s, item.start, i) new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label)) new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect]) new_items.append(new_item) for new_item in new_items: if new_item.expect in self.TERMINALS: to_scan.add(new_item) elif new_item not in column: column.add(new_item) items.append(new_item) def _parse(self, lexer, columns, to_scan, start_symbol=None): def is_quasi_complete(item): if item.is_complete: return True quasi = item.advance() while not quasi.is_complete: if quasi.expect not in self.NULLABLE: return False if quasi.rule.origin == start_symbol and quasi.expect == start_symbol: return False quasi = quasi.advance() return True # def create_leo_transitives(origin, start): # ... # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420 def scan(i, token, to_scan): """The core Earley Scanner. This is a custom implementation of the scanner that uses the Lark lexer to match tokens. The scan list is built by the Earley predictor, based on the previously completed tokens. This ensures that at each phase of the parse we have a custom lexer context, allowing for more complex ambiguities.""" next_to_scan = self.Set() next_set = self.Set() columns.append(next_set) transitives.append({}) node_cache = {} for item in self.Set(to_scan): if match(item.expect, token): new_item = item.advance() label = (new_item.s, new_item.start, i) # 'terminals' may not contain token.type when using %declare # Additionally, token is not always a Token # For example, it can be a Tree when using TreeMatcher term = terminals.get(token.type) if isinstance(token, Token) else None # Set the priority of the token node to 0 so that the # terminal priorities do not affect the Tree chosen by # ForestSumVisitor after the basic lexer has already # "used up" the terminal priorities token_node = TokenNode(token, term, priority=0) new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label)) new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node) if new_item.expect in self.TERMINALS: # add (B ::= Aai+1.B, h, y) to Q' next_to_scan.add(new_item) else: # add (B ::= Aa+1.B, h, y) to Ei+1 next_set.add(new_item) if not next_set and not next_to_scan: expect = {i.expect.name for i in to_scan} raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan)) return next_to_scan # Define parser functions match = self.term_matcher terminals = self.lexer_conf.terminals_by_name # Cache for nodes & tokens created in a particular parse step. transitives = [{}] ## The main Earley loop. # Run the Prediction/Completion cycle for any Items in the current Earley set. # Completions will be added to the SPPF tree, and predictions will be recursively # processed down to terminals/empty nodes to be added to the scanner for the next # step. expects = {i.expect for i in to_scan} i = 0 for token in lexer.lex(expects): self.predict_and_complete(i, to_scan, columns, transitives) to_scan = scan(i, token, to_scan) i += 1 expects.clear() expects |= {i.expect for i in to_scan} self.predict_and_complete(i, to_scan, columns, transitives) ## Column is now the final column in the parse. assert i == len(columns)-1 return to_scan def parse(self, lexer, start): assert start, start start_symbol = NonTerminal(start) columns = [self.Set()] to_scan = self.Set() # The scan buffer. 'Q' in E.Scott's paper. ## Predict for the start_symbol. # Add predicted items to the first Earley set (for the predictor) if they # result in a non-terminal, or the scanner if they result in a terminal. for rule in self.predictions[start_symbol]: item = Item(rule, 0, 0) if item.expect in self.TERMINALS: to_scan.add(item) else: columns[0].add(item) to_scan = self._parse(lexer, columns, to_scan, start_symbol) # If the parse was successful, the start # symbol should have been completed in the last step of the Earley cycle, and will be in # this column. Find the item for the start_symbol, which is the root of the SPPF tree. solutions = dedup_list(n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0) if not solutions: expected_terminals = [t.expect.name for t in to_scan] raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan)) if self.debug: from .earley_forest import ForestToPyDotVisitor try: debug_walker = ForestToPyDotVisitor() except ImportError: logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image") else: for i, s in enumerate(solutions): debug_walker.visit(s, f"sppf{i}.png") if self.Tree is not None: # Perform our SPPF -> AST conversion # Disable the ForestToParseTree cache when ambiguity='resolve' # to prevent a tree construction bug. See issue #1283 use_cache = not self.resolve_ambiguity transformer = ForestToParseTree(self.Tree, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity, use_cache) solutions = [transformer.transform(s) for s in solutions] if len(solutions) > 1 and not self.resolve_ambiguity: t: Tree = self.Tree('_ambig', solutions) t.expand_kids_by_data('_ambig') # solutions may themselves be _ambig nodes return t return solutions[0] # return the root of the SPPF # TODO return a list of solutions, or join them together somehow return solutions[0] lark-1.2.2/lark/parsers/earley_common.py000066400000000000000000000031241465673407200202760ustar00rootroot00000000000000"""This module implements useful building blocks for the Earley parser """ class Item: "An Earley Item, the atom of the algorithm." __slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash') def __init__(self, rule, ptr, start): self.is_complete = len(rule.expansion) == ptr self.rule = rule # rule self.ptr = ptr # ptr self.start = start # j self.node = None # w if self.is_complete: self.s = rule.origin self.expect = None self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None else: self.s = (rule, ptr) self.expect = rule.expansion[ptr] self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None self._hash = hash((self.s, self.start, self.rule)) def advance(self): return Item(self.rule, self.ptr + 1, self.start) def __eq__(self, other): return self is other or (self.s == other.s and self.start == other.start and self.rule == other.rule) def __hash__(self): return self._hash def __repr__(self): before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] ) after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] ) symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after)) return '%s (%d)' % (symbol, self.start) # class TransitiveItem(Item): # ... # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420 lark-1.2.2/lark/parsers/earley_forest.py000066400000000000000000000751441465673407200203230ustar00rootroot00000000000000""""This module implements an SPPF implementation This is used as the primary output mechanism for the Earley parser in order to store complex ambiguities. Full reference and more details is here: https://web.archive.org/web/20190616123959/http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/ """ from typing import Type, AbstractSet from random import randint from collections import deque from operator import attrgetter from importlib import import_module from functools import partial from ..parse_tree_builder import AmbiguousIntermediateExpander from ..visitors import Discard from ..utils import logger, OrderedSet from ..tree import Tree class ForestNode: pass class SymbolNode(ForestNode): """ A Symbol Node represents a symbol (or Intermediate LR0). Symbol nodes are keyed by the symbol (s). For intermediate nodes s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol nodes, s will be a string representing the non-terminal origin (i.e. the left hand side of the rule). The children of a Symbol or Intermediate Node will always be Packed Nodes; with each Packed Node child representing a single derivation of a production. Hence a Symbol Node with a single child is unambiguous. Parameters: s: A Symbol, or a tuple of (rule, ptr) for an intermediate node. start: For dynamic lexers, the index of the start of the substring matched by this symbol (inclusive). end: For dynamic lexers, the index of the end of the substring matched by this symbol (exclusive). Properties: is_intermediate: True if this node is an intermediate node. priority: The priority of the node's symbol. """ Set: Type[AbstractSet] = set # Overridden by StableSymbolNode __slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate') def __init__(self, s, start, end): self.s = s self.start = start self.end = end self._children = self.Set() self.paths = self.Set() self.paths_loaded = False ### We use inf here as it can be safely negated without resorting to conditionals, # unlike None or float('NaN'), and sorts appropriately. self.priority = float('-inf') self.is_intermediate = isinstance(s, tuple) def add_family(self, lr0, rule, start, left, right): self._children.add(PackedNode(self, lr0, rule, start, left, right)) def add_path(self, transitive, node): self.paths.add((transitive, node)) def load_paths(self): for transitive, node in self.paths: if transitive.next_titem is not None: vn = type(self)(transitive.next_titem.s, transitive.next_titem.start, self.end) vn.add_path(transitive.next_titem, node) self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn) else: self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node) self.paths_loaded = True @property def is_ambiguous(self): """Returns True if this node is ambiguous.""" return len(self.children) > 1 @property def children(self): """Returns a list of this node's children sorted from greatest to least priority.""" if not self.paths_loaded: self.load_paths() return sorted(self._children, key=attrgetter('sort_key')) def __iter__(self): return iter(self._children) def __repr__(self): if self.is_intermediate: rule = self.s[0] ptr = self.s[1] before = ( expansion.name for expansion in rule.expansion[:ptr] ) after = ( expansion.name for expansion in rule.expansion[ptr:] ) symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after)) else: symbol = self.s.name return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority) class StableSymbolNode(SymbolNode): "A version of SymbolNode that uses OrderedSet for output stability" Set = OrderedSet class PackedNode(ForestNode): """ A Packed Node represents a single derivation in a symbol node. Parameters: rule: The rule associated with this node. parent: The parent of this node. left: The left child of this node. ``None`` if one does not exist. right: The right child of this node. ``None`` if one does not exist. priority: The priority of this node. """ __slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash') def __init__(self, parent, s, rule, start, left, right): self.parent = parent self.s = s self.start = start self.rule = rule self.left = left self.right = right self.priority = float('-inf') self._hash = hash((self.left, self.right)) @property def is_empty(self): return self.left is None and self.right is None @property def sort_key(self): """ Used to sort PackedNode children of SymbolNodes. A SymbolNode has multiple PackedNodes if it matched ambiguously. Hence, we use the sort order to identify the order in which ambiguous children should be considered. """ return self.is_empty, -self.priority, self.rule.order @property def children(self): """Returns a list of this node's children.""" return [x for x in [self.left, self.right] if x is not None] def __iter__(self): yield self.left yield self.right def __eq__(self, other): if not isinstance(other, PackedNode): return False return self is other or (self.left == other.left and self.right == other.right) def __hash__(self): return self._hash def __repr__(self): if isinstance(self.s, tuple): rule = self.s[0] ptr = self.s[1] before = ( expansion.name for expansion in rule.expansion[:ptr] ) after = ( expansion.name for expansion in rule.expansion[ptr:] ) symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after)) else: symbol = self.s.name return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order) class TokenNode(ForestNode): """ A Token Node represents a matched terminal and is always a leaf node. Parameters: token: The Token associated with this node. term: The TerminalDef matched by the token. priority: The priority of this node. """ __slots__ = ('token', 'term', 'priority', '_hash') def __init__(self, token, term, priority=None): self.token = token self.term = term if priority is not None: self.priority = priority else: self.priority = term.priority if term is not None else 0 self._hash = hash(token) def __eq__(self, other): if not isinstance(other, TokenNode): return False return self is other or (self.token == other.token) def __hash__(self): return self._hash def __repr__(self): return repr(self.token) class ForestVisitor: """ An abstract base class for building forest visitors. This class performs a controllable depth-first walk of an SPPF. The visitor will not enter cycles and will backtrack if one is encountered. Subclasses are notified of cycles through the ``on_cycle`` method. Behavior for visit events is defined by overriding the ``visit*node*`` functions. The walk is controlled by the return values of the ``visit*node_in`` methods. Returning a node(s) will schedule them to be visited. The visitor will begin to backtrack if no nodes are returned. Parameters: single_visit: If ``True``, non-Token nodes will only be visited once. """ def __init__(self, single_visit=False): self.single_visit = single_visit def visit_token_node(self, node): """Called when a ``Token`` is visited. ``Token`` nodes are always leaves.""" pass def visit_symbol_node_in(self, node): """Called when a symbol node is visited. Nodes that are returned will be scheduled to be visited. If ``visit_intermediate_node_in`` is not implemented, this function will be called for intermediate nodes as well.""" pass def visit_symbol_node_out(self, node): """Called after all nodes returned from a corresponding ``visit_symbol_node_in`` call have been visited. If ``visit_intermediate_node_out`` is not implemented, this function will be called for intermediate nodes as well.""" pass def visit_packed_node_in(self, node): """Called when a packed node is visited. Nodes that are returned will be scheduled to be visited. """ pass def visit_packed_node_out(self, node): """Called after all nodes returned from a corresponding ``visit_packed_node_in`` call have been visited.""" pass def on_cycle(self, node, path): """Called when a cycle is encountered. Parameters: node: The node that causes a cycle. path: The list of nodes being visited: nodes that have been entered but not exited. The first element is the root in a forest visit, and the last element is the node visited most recently. ``path`` should be treated as read-only. """ pass def get_cycle_in_path(self, node, path): """A utility function for use in ``on_cycle`` to obtain a slice of ``path`` that only contains the nodes that make up the cycle.""" index = len(path) - 1 while id(path[index]) != id(node): index -= 1 return path[index:] def visit(self, root): # Visiting is a list of IDs of all symbol/intermediate nodes currently in # the stack. It serves two purposes: to detect when we 'recurse' in and out # of a symbol/intermediate so that we can process both up and down. Also, # since the SPPF can have cycles it allows us to detect if we're trying # to recurse into a node that's already on the stack (infinite recursion). visiting = set() # set of all nodes that have been visited visited = set() # a list of nodes that are currently being visited # used for the `on_cycle` callback path = [] # We do not use recursion here to walk the Forest due to the limited # stack size in python. Therefore input_stack is essentially our stack. input_stack = deque([root]) # It is much faster to cache these as locals since they are called # many times in large parses. vpno = getattr(self, 'visit_packed_node_out') vpni = getattr(self, 'visit_packed_node_in') vsno = getattr(self, 'visit_symbol_node_out') vsni = getattr(self, 'visit_symbol_node_in') vino = getattr(self, 'visit_intermediate_node_out', vsno) vini = getattr(self, 'visit_intermediate_node_in', vsni) vtn = getattr(self, 'visit_token_node') oc = getattr(self, 'on_cycle') while input_stack: current = next(reversed(input_stack)) try: next_node = next(current) except StopIteration: input_stack.pop() continue except TypeError: ### If the current object is not an iterator, pass through to Token/SymbolNode pass else: if next_node is None: continue if id(next_node) in visiting: oc(next_node, path) continue input_stack.append(next_node) continue if isinstance(current, TokenNode): vtn(current.token) input_stack.pop() continue current_id = id(current) if current_id in visiting: if isinstance(current, PackedNode): vpno(current) elif current.is_intermediate: vino(current) else: vsno(current) input_stack.pop() path.pop() visiting.remove(current_id) visited.add(current_id) elif self.single_visit and current_id in visited: input_stack.pop() else: visiting.add(current_id) path.append(current) if isinstance(current, PackedNode): next_node = vpni(current) elif current.is_intermediate: next_node = vini(current) else: next_node = vsni(current) if next_node is None: continue if not isinstance(next_node, ForestNode): next_node = iter(next_node) elif id(next_node) in visiting: oc(next_node, path) continue input_stack.append(next_node) class ForestTransformer(ForestVisitor): """The base class for a bottom-up forest transformation. Most users will want to use ``TreeForestTransformer`` instead as it has a friendlier interface and covers most use cases. Transformations are applied via inheritance and overriding of the ``transform*node`` methods. ``transform_token_node`` receives a ``Token`` as an argument. All other methods receive the node that is being transformed and a list of the results of the transformations of that node's children. The return value of these methods are the resulting transformations. If ``Discard`` is raised in a node's transformation, no data from that node will be passed to its parent's transformation. """ def __init__(self): super(ForestTransformer, self).__init__() # results of transformations self.data = dict() # used to track parent nodes self.node_stack = deque() def transform(self, root): """Perform a transformation on an SPPF.""" self.node_stack.append('result') self.data['result'] = [] self.visit(root) assert len(self.data['result']) <= 1 if self.data['result']: return self.data['result'][0] def transform_symbol_node(self, node, data): """Transform a symbol node.""" return node def transform_intermediate_node(self, node, data): """Transform an intermediate node.""" return node def transform_packed_node(self, node, data): """Transform a packed node.""" return node def transform_token_node(self, node): """Transform a ``Token``.""" return node def visit_symbol_node_in(self, node): self.node_stack.append(id(node)) self.data[id(node)] = [] return node.children def visit_packed_node_in(self, node): self.node_stack.append(id(node)) self.data[id(node)] = [] return node.children def visit_token_node(self, node): transformed = self.transform_token_node(node) if transformed is not Discard: self.data[self.node_stack[-1]].append(transformed) def _visit_node_out_helper(self, node, method): self.node_stack.pop() transformed = method(node, self.data[id(node)]) if transformed is not Discard: self.data[self.node_stack[-1]].append(transformed) del self.data[id(node)] def visit_symbol_node_out(self, node): self._visit_node_out_helper(node, self.transform_symbol_node) def visit_intermediate_node_out(self, node): self._visit_node_out_helper(node, self.transform_intermediate_node) def visit_packed_node_out(self, node): self._visit_node_out_helper(node, self.transform_packed_node) class ForestSumVisitor(ForestVisitor): """ A visitor for prioritizing ambiguous parts of the Forest. This visitor is used when support for explicit priorities on rules is requested (whether normal, or invert). It walks the forest (or subsets thereof) and cascades properties upwards from the leaves. It would be ideal to do this during parsing, however this would require processing each Earley item multiple times. That's a big performance drawback; so running a forest walk is the lesser of two evils: there can be significantly more Earley items created during parsing than there are SPPF nodes in the final tree. """ def __init__(self): super(ForestSumVisitor, self).__init__(single_visit=True) def visit_packed_node_in(self, node): yield node.left yield node.right def visit_symbol_node_in(self, node): return iter(node.children) def visit_packed_node_out(self, node): priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0 priority += getattr(node.right, 'priority', 0) priority += getattr(node.left, 'priority', 0) node.priority = priority def visit_symbol_node_out(self, node): node.priority = max(child.priority for child in node.children) class PackedData(): """Used in transformationss of packed nodes to distinguish the data that comes from the left child and the right child. """ class _NoData(): pass NO_DATA = _NoData() def __init__(self, node, data): self.left = self.NO_DATA self.right = self.NO_DATA if data: if node.left is not None: self.left = data[0] if len(data) > 1: self.right = data[1] else: self.right = data[0] class ForestToParseTree(ForestTransformer): """Used by the earley parser when ambiguity equals 'resolve' or 'explicit'. Transforms an SPPF into an (ambiguous) parse tree. Parameters: tree_class: The tree class to use for construction callbacks: A dictionary of rules to functions that output a tree prioritizer: A ``ForestVisitor`` that manipulates the priorities of ForestNodes resolve_ambiguity: If True, ambiguities will be resolved based on priorities. Otherwise, `_ambig` nodes will be in the resulting tree. use_cache: If True, the results of packed node transformations will be cached. """ def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True): super(ForestToParseTree, self).__init__() self.tree_class = tree_class self.callbacks = callbacks self.prioritizer = prioritizer self.resolve_ambiguity = resolve_ambiguity self._use_cache = use_cache self._cache = {} self._on_cycle_retreat = False self._cycle_node = None self._successful_visits = set() def visit(self, root): if self.prioritizer: self.prioritizer.visit(root) super(ForestToParseTree, self).visit(root) self._cache = {} def on_cycle(self, node, path): logger.debug("Cycle encountered in the SPPF at node: %s. " "As infinite ambiguities cannot be represented in a tree, " "this family of derivations will be discarded.", node) self._cycle_node = node self._on_cycle_retreat = True def _check_cycle(self, node): if self._on_cycle_retreat: if id(node) == id(self._cycle_node) or id(node) in self._successful_visits: self._cycle_node = None self._on_cycle_retreat = False else: return Discard def _collapse_ambig(self, children): new_children = [] for child in children: if hasattr(child, 'data') and child.data == '_ambig': new_children += child.children else: new_children.append(child) return new_children def _call_rule_func(self, node, data): # called when transforming children of symbol nodes # data is a list of trees or tokens that correspond to the # symbol's rule expansion return self.callbacks[node.rule](data) def _call_ambig_func(self, node, data): # called when transforming a symbol node # data is a list of trees where each tree's data is # equal to the name of the symbol or one of its aliases. if len(data) > 1: return self.tree_class('_ambig', data) elif data: return data[0] return Discard def transform_symbol_node(self, node, data): if id(node) not in self._successful_visits: return Discard r = self._check_cycle(node) if r is Discard: return r self._successful_visits.remove(id(node)) data = self._collapse_ambig(data) return self._call_ambig_func(node, data) def transform_intermediate_node(self, node, data): if id(node) not in self._successful_visits: return Discard r = self._check_cycle(node) if r is Discard: return r self._successful_visits.remove(id(node)) if len(data) > 1: children = [self.tree_class('_inter', c) for c in data] return self.tree_class('_iambig', children) return data[0] def transform_packed_node(self, node, data): r = self._check_cycle(node) if r is Discard: return r if self.resolve_ambiguity and id(node.parent) in self._successful_visits: return Discard if self._use_cache and id(node) in self._cache: return self._cache[id(node)] children = [] assert len(data) <= 2 data = PackedData(node, data) if data.left is not PackedData.NO_DATA: if node.left.is_intermediate and isinstance(data.left, list): children += data.left else: children.append(data.left) if data.right is not PackedData.NO_DATA: children.append(data.right) transformed = children if node.parent.is_intermediate else self._call_rule_func(node, children) if self._use_cache: self._cache[id(node)] = transformed return transformed def visit_symbol_node_in(self, node): super(ForestToParseTree, self).visit_symbol_node_in(node) if self._on_cycle_retreat: return return node.children def visit_packed_node_in(self, node): self._on_cycle_retreat = False to_visit = super(ForestToParseTree, self).visit_packed_node_in(node) if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits: if not self._use_cache or id(node) not in self._cache: return to_visit def visit_packed_node_out(self, node): super(ForestToParseTree, self).visit_packed_node_out(node) if not self._on_cycle_retreat: self._successful_visits.add(id(node.parent)) def handles_ambiguity(func): """Decorator for methods of subclasses of ``TreeForestTransformer``. Denotes that the method should receive a list of transformed derivations.""" func.handles_ambiguity = True return func class TreeForestTransformer(ForestToParseTree): """A ``ForestTransformer`` with a tree ``Transformer``-like interface. By default, it will construct a tree. Methods provided via inheritance are called based on the rule/symbol names of nodes in the forest. Methods that act on rules will receive a list of the results of the transformations of the rule's children. By default, trees and tokens. Methods that act on tokens will receive a token. Alternatively, methods that act on rules may be annotated with ``handles_ambiguity``. In this case, the function will receive a list of all the transformations of all the derivations of the rule. By default, a list of trees where each tree.data is equal to the rule name or one of its aliases. Non-tree transformations are made possible by override of ``__default__``, ``__default_token__``, and ``__default_ambig__``. Note: Tree shaping features such as inlined rules and token filtering are not built into the transformation. Positions are also not propagated. Parameters: tree_class: The tree class to use for construction prioritizer: A ``ForestVisitor`` that manipulates the priorities of nodes in the SPPF. resolve_ambiguity: If True, ambiguities will be resolved based on priorities. use_cache (bool): If True, caches the results of some transformations, potentially improving performance when ``resolve_ambiguity==False``. Only use if you know what you are doing: i.e. All transformation functions are pure and referentially transparent. """ def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False): super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache) def __default__(self, name, data): """Default operation on tree (for override). Returns a tree with name with data as children. """ return self.tree_class(name, data) def __default_ambig__(self, name, data): """Default operation on ambiguous rule (for override). Wraps data in an '_ambig_' node if it contains more than one element. """ if len(data) > 1: return self.tree_class('_ambig', data) elif data: return data[0] return Discard def __default_token__(self, node): """Default operation on ``Token`` (for override). Returns ``node``. """ return node def transform_token_node(self, node): return getattr(self, node.type, self.__default_token__)(node) def _call_rule_func(self, node, data): name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name user_func = getattr(self, name, self.__default__) if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'): user_func = partial(self.__default__, name) if not self.resolve_ambiguity: wrapper = partial(AmbiguousIntermediateExpander, self.tree_class) user_func = wrapper(user_func) return user_func(data) def _call_ambig_func(self, node, data): name = node.s.name user_func = getattr(self, name, self.__default_ambig__) if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'): user_func = partial(self.__default_ambig__, name) return user_func(data) class ForestToPyDotVisitor(ForestVisitor): """ A Forest visitor which writes the SPPF to a PNG. The SPPF can get really large, really quickly because of the amount of meta-data it stores, so this is probably only useful for trivial trees and learning how the SPPF is structured. """ def __init__(self, rankdir="TB"): super(ForestToPyDotVisitor, self).__init__(single_visit=True) self.pydot = import_module('pydot') self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir) def visit(self, root, filename): super(ForestToPyDotVisitor, self).visit(root) try: self.graph.write_png(filename) except FileNotFoundError as e: logger.error("Could not write png: ", e) def visit_token_node(self, node): graph_node_id = str(id(node)) graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"')) graph_node_color = 0x808080 graph_node_style = "\"filled,rounded\"" graph_node_shape = "diamond" graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label) self.graph.add_node(graph_node) def visit_packed_node_in(self, node): graph_node_id = str(id(node)) graph_node_label = repr(node) graph_node_color = 0x808080 graph_node_style = "filled" graph_node_shape = "diamond" graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label) self.graph.add_node(graph_node) yield node.left yield node.right def visit_packed_node_out(self, node): graph_node_id = str(id(node)) graph_node = self.graph.get_node(graph_node_id)[0] for child in [node.left, node.right]: if child is not None: child_graph_node_id = str(id(child.token if isinstance(child, TokenNode) else child)) child_graph_node = self.graph.get_node(child_graph_node_id)[0] self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node)) else: #### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay. child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890)) child_graph_node_style = "invis" child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None") child_edge_style = "invis" self.graph.add_node(child_graph_node) self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style)) def visit_symbol_node_in(self, node): graph_node_id = str(id(node)) graph_node_label = repr(node) graph_node_color = 0x808080 graph_node_style = "\"filled\"" if node.is_intermediate: graph_node_shape = "ellipse" else: graph_node_shape = "rectangle" graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label) self.graph.add_node(graph_node) return iter(node.children) def visit_symbol_node_out(self, node): graph_node_id = str(id(node)) graph_node = self.graph.get_node(graph_node_id)[0] for child in node.children: child_graph_node_id = str(id(child)) child_graph_node = self.graph.get_node(child_graph_node_id)[0] self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node)) lark-1.2.2/lark/parsers/grammar_analysis.py000066400000000000000000000157451465673407200210120ustar00rootroot00000000000000"Provides for superficial grammar analysis." from collections import Counter, defaultdict from typing import List, Dict, Iterator, FrozenSet, Set from ..utils import bfs, fzset, classify, OrderedSet from ..exceptions import GrammarError from ..grammar import Rule, Terminal, NonTerminal, Symbol from ..common import ParserConf class RulePtr: __slots__ = ('rule', 'index') rule: Rule index: int def __init__(self, rule: Rule, index: int): assert isinstance(rule, Rule) assert index <= len(rule.expansion) self.rule = rule self.index = index def __repr__(self): before = [x.name for x in self.rule.expansion[:self.index]] after = [x.name for x in self.rule.expansion[self.index:]] return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after)) @property def next(self) -> Symbol: return self.rule.expansion[self.index] def advance(self, sym: Symbol) -> 'RulePtr': assert self.next == sym return RulePtr(self.rule, self.index+1) @property def is_satisfied(self) -> bool: return self.index == len(self.rule.expansion) def __eq__(self, other) -> bool: if not isinstance(other, RulePtr): return NotImplemented return self.rule == other.rule and self.index == other.index def __hash__(self) -> int: return hash((self.rule, self.index)) State = FrozenSet[RulePtr] # state generation ensures no duplicate LR0ItemSets class LR0ItemSet: __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads') kernel: State closure: State transitions: Dict[Symbol, 'LR0ItemSet'] lookaheads: Dict[Symbol, Set[Rule]] def __init__(self, kernel, closure): self.kernel = fzset(kernel) self.closure = fzset(closure) self.transitions = {} self.lookaheads = defaultdict(set) def __repr__(self): return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure])) def update_set(set1, set2): if not set2 or set1 > set2: return False copy = set(set1) set1 |= set2 return set1 != copy def calculate_sets(rules): """Calculate FOLLOW sets. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets""" symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules} # foreach grammar rule X ::= Y(1) ... Y(k) # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then # NULLABLE = NULLABLE union {X} # for i = 1 to k # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then # FIRST(X) = FIRST(X) union FIRST(Y(i)) # for j = i+1 to k # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X) # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j)) # until none of NULLABLE,FIRST,FOLLOW changed in last iteration NULLABLE = set() FIRST = {} FOLLOW = {} for sym in symbols: FIRST[sym]={sym} if sym.is_term else set() FOLLOW[sym]=set() # Calculate NULLABLE and FIRST changed = True while changed: changed = False for rule in rules: if set(rule.expansion) <= NULLABLE: if update_set(NULLABLE, {rule.origin}): changed = True for i, sym in enumerate(rule.expansion): if set(rule.expansion[:i]) <= NULLABLE: if update_set(FIRST[rule.origin], FIRST[sym]): changed = True else: break # Calculate FOLLOW changed = True while changed: changed = False for rule in rules: for i, sym in enumerate(rule.expansion): if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE: if update_set(FOLLOW[sym], FOLLOW[rule.origin]): changed = True for j in range(i+1, len(rule.expansion)): if set(rule.expansion[i+1:j]) <= NULLABLE: if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]): changed = True return FIRST, FOLLOW, NULLABLE class GrammarAnalyzer: def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False): self.debug = debug self.strict = strict root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')]) for start in parser_conf.start} rules = parser_conf.rules + list(root_rules.values()) self.rules_by_origin: Dict[NonTerminal, List[Rule]] = classify(rules, lambda r: r.origin) if len(rules) != len(set(rules)): duplicates = [item for item, count in Counter(rules).items() if count > 1] raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates)) for r in rules: for sym in r.expansion: if not (sym.is_term or sym in self.rules_by_origin): raise GrammarError("Using an undefined rule: %s" % sym) self.start_states = {start: self.expand_rule(root_rule.origin) for start, root_rule in root_rules.items()} self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))}) for start, root_rule in root_rules.items()} lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)]) for start in parser_conf.start} lr0_rules = parser_conf.rules + list(lr0_root_rules.values()) assert(len(lr0_rules) == len(set(lr0_rules))) self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) # cache RulePtr(r, 0) in r (no duplicate RulePtr objects) self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) for start, root_rule in lr0_root_rules.items()} self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) def expand_rule(self, source_rule: NonTerminal, rules_by_origin=None) -> OrderedSet[RulePtr]: "Returns all init_ptrs accessible by rule (recursive)" if rules_by_origin is None: rules_by_origin = self.rules_by_origin init_ptrs = OrderedSet[RulePtr]() def _expand_rule(rule: NonTerminal) -> Iterator[NonTerminal]: assert not rule.is_term, rule for r in rules_by_origin[rule]: init_ptr = RulePtr(r, 0) init_ptrs.add(init_ptr) if r.expansion: # if not empty rule new_r = init_ptr.next if not new_r.is_term: assert isinstance(new_r, NonTerminal) yield new_r for _ in bfs([source_rule], _expand_rule): pass return init_ptrs lark-1.2.2/lark/parsers/lalr_analysis.py000066400000000000000000000276571465673407200203230ustar00rootroot00000000000000"""This module builds a LALR(1) transition-table for lalr_parser.py For now, shift/reduce conflicts are automatically resolved as shifts. """ # Author: Erez Shinan (2017) # Email : erezshin@gmail.com from typing import Dict, Set, Iterator, Tuple, List, TypeVar, Generic from collections import defaultdict from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger from ..exceptions import GrammarError from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet, RulePtr, State from ..grammar import Rule, Symbol from ..common import ParserConf ###{standalone class Action: def __init__(self, name): self.name = name def __str__(self): return self.name def __repr__(self): return str(self) Shift = Action('Shift') Reduce = Action('Reduce') StateT = TypeVar("StateT") class ParseTableBase(Generic[StateT]): states: Dict[StateT, Dict[str, Tuple]] start_states: Dict[str, StateT] end_states: Dict[str, StateT] def __init__(self, states, start_states, end_states): self.states = states self.start_states = start_states self.end_states = end_states def serialize(self, memo): tokens = Enumerator() states = { state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg)) for token, (action, arg) in actions.items()} for state, actions in self.states.items() } return { 'tokens': tokens.reversed(), 'states': states, 'start_states': self.start_states, 'end_states': self.end_states, } @classmethod def deserialize(cls, data, memo): tokens = data['tokens'] states = { state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg)) for token, (action, arg) in actions.items()} for state, actions in data['states'].items() } return cls(states, data['start_states'], data['end_states']) class ParseTable(ParseTableBase['State']): """Parse-table whose key is State, i.e. set[RulePtr] Slower than IntParseTable, but useful for debugging """ pass class IntParseTable(ParseTableBase[int]): """Parse-table whose key is int. Best for performance.""" @classmethod def from_ParseTable(cls, parse_table: ParseTable): enum = list(parse_table.states) state_to_idx: Dict['State', int] = {s:i for i,s in enumerate(enum)} int_states = {} for s, la in parse_table.states.items(): la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v for k,v in la.items()} int_states[ state_to_idx[s] ] = la start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()} end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()} return cls(int_states, start_states, end_states) ###} # digraph and traverse, see The Theory and Practice of Compiler Writing # computes F(x) = G(x) union (union { G(y) | x R y }) # X: nodes # R: relation (function mapping node -> list of nodes that satisfy the relation) # G: set valued function def digraph(X, R, G): F = {} S = [] N = dict.fromkeys(X, 0) for x in X: # this is always true for the first iteration, but N[x] may be updated in traverse below if N[x] == 0: traverse(x, S, N, X, R, G, F) return F # x: single node # S: stack # N: weights # X: nodes # R: relation (see above) # G: set valued function # F: set valued function we are computing (map of input -> output) def traverse(x, S, N, X, R, G, F): S.append(x) d = len(S) N[x] = d F[x] = G[x] for y in R[x]: if N[y] == 0: traverse(y, S, N, X, R, G, F) n_x = N[x] assert(n_x > 0) n_y = N[y] assert(n_y != 0) if (n_y > 0) and (n_y < n_x): N[x] = n_y F[x].update(F[y]) if N[x] == d: f_x = F[x] while True: z = S.pop() N[z] = -1 F[z] = f_x if z == x: break class LALR_Analyzer(GrammarAnalyzer): lr0_itemsets: Set[LR0ItemSet] nonterminal_transitions: List[Tuple[LR0ItemSet, Symbol]] lookback: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Rule]]] includes: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Symbol]]] reads: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Symbol]]] directly_reads: Dict[Tuple[LR0ItemSet, Symbol], Set[Symbol]] def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False): GrammarAnalyzer.__init__(self, parser_conf, debug, strict) self.nonterminal_transitions = [] self.directly_reads = defaultdict(set) self.reads = defaultdict(set) self.includes = defaultdict(set) self.lookback = defaultdict(set) def compute_lr0_states(self) -> None: self.lr0_itemsets = set() # map of kernels to LR0ItemSets cache: Dict['State', LR0ItemSet] = {} def step(state: LR0ItemSet) -> Iterator[LR0ItemSet]: _, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied) d = classify(unsat, lambda rp: rp.next) for sym, rps in d.items(): kernel = fzset({rp.advance(sym) for rp in rps}) new_state = cache.get(kernel, None) if new_state is None: closure = set(kernel) for rp in kernel: if not rp.is_satisfied and not rp.next.is_term: closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin) new_state = LR0ItemSet(kernel, closure) cache[kernel] = new_state state.transitions[sym] = new_state yield new_state self.lr0_itemsets.add(state) for _ in bfs(self.lr0_start_states.values(), step): pass def compute_reads_relations(self): # handle start state for root in self.lr0_start_states.values(): assert(len(root.kernel) == 1) for rp in root.kernel: assert(rp.index == 0) self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) for state in self.lr0_itemsets: seen = set() for rp in state.closure: if rp.is_satisfied: continue s = rp.next # if s is a not a nonterminal if s not in self.lr0_rules_by_origin: continue if s in seen: continue seen.add(s) nt = (state, s) self.nonterminal_transitions.append(nt) dr = self.directly_reads[nt] r = self.reads[nt] next_state = state.transitions[s] for rp2 in next_state.closure: if rp2.is_satisfied: continue s2 = rp2.next # if s2 is a terminal if s2 not in self.lr0_rules_by_origin: dr.add(s2) if s2 in self.NULLABLE: r.add((next_state, s2)) def compute_includes_lookback(self): for nt in self.nonterminal_transitions: state, nonterminal = nt includes = [] lookback = self.lookback[nt] for rp in state.closure: if rp.rule.origin != nonterminal: continue # traverse the states for rp(.rule) state2 = state for i in range(rp.index, len(rp.rule.expansion)): s = rp.rule.expansion[i] nt2 = (state2, s) state2 = state2.transitions[s] if nt2 not in self.reads: continue for j in range(i + 1, len(rp.rule.expansion)): if rp.rule.expansion[j] not in self.NULLABLE: break else: includes.append(nt2) # state2 is at the final state for rp.rule if rp.index == 0: for rp2 in state2.closure: if (rp2.rule == rp.rule) and rp2.is_satisfied: lookback.add((state2, rp2.rule)) for nt2 in includes: self.includes[nt2].add(nt) def compute_lookaheads(self): read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads) follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets) for nt, lookbacks in self.lookback.items(): for state, rule in lookbacks: for s in follow_sets[nt]: state.lookaheads[s].add(rule) def compute_lalr1_states(self) -> None: m: Dict[LR0ItemSet, Dict[str, Tuple]] = {} reduce_reduce = [] for itemset in self.lr0_itemsets: actions: Dict[Symbol, Tuple] = {la: (Shift, next_state.closure) for la, next_state in itemset.transitions.items()} for la, rules in itemset.lookaheads.items(): if len(rules) > 1: # Try to resolve conflict based on priority p = [(r.options.priority or 0, r) for r in rules] p.sort(key=lambda r: r[0], reverse=True) best, second_best = p[:2] if best[0] > second_best[0]: rules = {best[1]} else: reduce_reduce.append((itemset, la, rules)) continue rule ,= rules if la in actions: if self.strict: raise GrammarError(f"Shift/Reduce conflict for terminal {la.name}. [strict-mode]\n ") elif self.debug: logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name) logger.warning(' * %s', rule) else: logger.debug('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name) logger.debug(' * %s', rule) else: actions[la] = (Reduce, rule) m[itemset] = { k.name: v for k, v in actions.items() } if reduce_reduce: msgs = [] for itemset, la, rules in reduce_reduce: msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ])) if self.debug: msg += '\n collision occurred in state: {%s\n }' % ''.join(['\n\t' + str(x) for x in itemset.closure]) msgs.append(msg) raise GrammarError('\n\n'.join(msgs)) states = { k.closure: v for k, v in m.items() } # compute end states end_states: Dict[str, 'State'] = {} for state in states: for rp in state: for start in self.lr0_start_states: if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied: assert start not in end_states end_states[start] = state start_states = { start: state.closure for start, state in self.lr0_start_states.items() } _parse_table = ParseTable(states, start_states, end_states) if self.debug: self.parse_table = _parse_table else: self.parse_table = IntParseTable.from_ParseTable(_parse_table) def compute_lalr(self): self.compute_lr0_states() self.compute_reads_relations() self.compute_includes_lookback() self.compute_lookaheads() self.compute_lalr1_states() lark-1.2.2/lark/parsers/lalr_interactive_parser.py000066400000000000000000000131751465673407200223570ustar00rootroot00000000000000# This module provides a LALR interactive parser, which is used for debugging and error handling from typing import Iterator, List from copy import copy import warnings from lark.exceptions import UnexpectedToken from lark.lexer import Token, LexerThread from .lalr_parser_state import ParserState ###{standalone class InteractiveParser: """InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR. For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. """ def __init__(self, parser, parser_state: ParserState, lexer_thread: LexerThread): self.parser = parser self.parser_state = parser_state self.lexer_thread = lexer_thread self.result = None @property def lexer_state(self) -> LexerThread: warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning) return self.lexer_thread def feed_token(self, token: Token): """Feed the parser with a token, and advance it to the next state, as if it received it from the lexer. Note that ``token`` has to be an instance of ``Token``. """ return self.parser_state.feed_token(token, token.type == '$END') def iter_parse(self) -> Iterator[Token]: """Step through the different stages of the parse, by reading tokens from the lexer and feeding them to the parser, one per iteration. Returns an iterator of the tokens it encounters. When the parse is over, the resulting tree can be found in ``InteractiveParser.result``. """ for token in self.lexer_thread.lex(self.parser_state): yield token self.result = self.feed_token(token) def exhaust_lexer(self) -> List[Token]: """Try to feed the rest of the lexer state into the interactive parser. Note that this modifies the instance in place and does not feed an '$END' Token """ return list(self.iter_parse()) def feed_eof(self, last_token=None): """Feed a '$END' Token. Borrows from 'last_token' if given.""" eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1) return self.feed_token(eof) def __copy__(self): """Create a new interactive parser with a separate state. Calls to feed_token() won't affect the old instance, and vice-versa. """ return self.copy() def copy(self, deepcopy_values=True): return type(self)( self.parser, self.parser_state.copy(deepcopy_values=deepcopy_values), copy(self.lexer_thread), ) def __eq__(self, other): if not isinstance(other, InteractiveParser): return False return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread def as_immutable(self): """Convert to an ``ImmutableInteractiveParser``.""" p = copy(self) return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread) def pretty(self): """Print the output of ``choices()`` in a way that's easier to read.""" out = ["Parser choices:"] for k, v in self.choices().items(): out.append('\t- %s -> %r' % (k, v)) out.append('stack size: %s' % len(self.parser_state.state_stack)) return '\n'.join(out) def choices(self): """Returns a dictionary of token types, matched to their action in the parser. Only returns token types that are accepted by the current state. Updated by ``feed_token()``. """ return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] def accepts(self): """Returns the set of possible tokens that will advance the parser into a new valid state.""" accepts = set() conf_no_callbacks = copy(self.parser_state.parse_conf) # We don't want to call callbacks here since those might have arbitrary side effects # and are unnecessarily slow. conf_no_callbacks.callbacks = {} for t in self.choices(): if t.isupper(): # is terminal? new_cursor = self.copy(deepcopy_values=False) new_cursor.parser_state.parse_conf = conf_no_callbacks try: new_cursor.feed_token(self.lexer_thread._Token(t, '')) except UnexpectedToken: pass else: accepts.add(t) return accepts def resume_parse(self): """Resume automated parsing from the current state. """ return self.parser.parse_from_state(self.parser_state, last_token=self.lexer_thread.state.last_token) class ImmutableInteractiveParser(InteractiveParser): """Same as ``InteractiveParser``, but operations create a new instance instead of changing it in-place. """ result = None def __hash__(self): return hash((self.parser_state, self.lexer_thread)) def feed_token(self, token): c = copy(self) c.result = InteractiveParser.feed_token(c, token) return c def exhaust_lexer(self): """Try to feed the rest of the lexer state into the parser. Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" cursor = self.as_mutable() cursor.exhaust_lexer() return cursor.as_immutable() def as_mutable(self): """Convert to an ``InteractiveParser``.""" p = copy(self) return InteractiveParser(p.parser, p.parser_state, p.lexer_thread) ###} lark-1.2.2/lark/parsers/lalr_parser.py000066400000000000000000000107521465673407200177600ustar00rootroot00000000000000"""This module implements a LALR(1) Parser """ # Author: Erez Shinan (2017) # Email : erezshin@gmail.com from typing import Dict, Any, Optional from ..lexer import Token, LexerThread from ..utils import Serialize from ..common import ParserConf, ParserCallbacks from .lalr_analysis import LALR_Analyzer, IntParseTable, ParseTableBase from .lalr_interactive_parser import InteractiveParser from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken from .lalr_parser_state import ParserState, ParseConf ###{standalone class LALR_Parser(Serialize): def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False): analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict) analysis.compute_lalr() callbacks = parser_conf.callbacks self._parse_table = analysis.parse_table self.parser_conf = parser_conf self.parser = _Parser(analysis.parse_table, callbacks, debug) @classmethod def deserialize(cls, data, memo, callbacks, debug=False): inst = cls.__new__(cls) inst._parse_table = IntParseTable.deserialize(data, memo) inst.parser = _Parser(inst._parse_table, callbacks, debug) return inst def serialize(self, memo: Any = None) -> Dict[str, Any]: return self._parse_table.serialize(memo) def parse_interactive(self, lexer: LexerThread, start: str): return self.parser.parse(lexer, start, start_interactive=True) def parse(self, lexer, start, on_error=None): try: return self.parser.parse(lexer, start) except UnexpectedInput as e: if on_error is None: raise while True: if isinstance(e, UnexpectedCharacters): s = e.interactive_parser.lexer_thread.state p = s.line_ctr.char_pos if not on_error(e): raise e if isinstance(e, UnexpectedCharacters): # If user didn't change the character position, then we should if p == s.line_ctr.char_pos: s.line_ctr.feed(s.text[p:p+1]) try: return e.interactive_parser.resume_parse() except UnexpectedToken as e2: if (isinstance(e, UnexpectedToken) and e.token.type == e2.token.type == '$END' and e.interactive_parser == e2.interactive_parser): # Prevent infinite loop raise e2 e = e2 except UnexpectedCharacters as e2: e = e2 class _Parser: parse_table: ParseTableBase callbacks: ParserCallbacks debug: bool def __init__(self, parse_table: ParseTableBase, callbacks: ParserCallbacks, debug: bool=False): self.parse_table = parse_table self.callbacks = callbacks self.debug = debug def parse(self, lexer: LexerThread, start: str, value_stack=None, state_stack=None, start_interactive=False): parse_conf = ParseConf(self.parse_table, self.callbacks, start) parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) if start_interactive: return InteractiveParser(self, parser_state, parser_state.lexer) return self.parse_from_state(parser_state) def parse_from_state(self, state: ParserState, last_token: Optional[Token]=None): """Run the main LALR parser loop Parameters: state - the initial state. Changed in-place. last_token - Used only for line information in case of an empty lexer. """ try: token = last_token for token in state.lexer.lex(state): assert token is not None state.feed_token(token) end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) return state.feed_token(end_token, True) except UnexpectedInput as e: try: e.interactive_parser = InteractiveParser(self, state, state.lexer) except NameError: pass raise e except Exception as e: if self.debug: print("") print("STATE STACK DUMP") print("----------------") for i, s in enumerate(state.state_stack): print('%d)' % i , s) print("") raise ###} lark-1.2.2/lark/parsers/lalr_parser_state.py000066400000000000000000000073211465673407200211560ustar00rootroot00000000000000from copy import deepcopy, copy from typing import Dict, Any, Generic, List from ..lexer import Token, LexerThread from ..common import ParserCallbacks from .lalr_analysis import Shift, ParseTableBase, StateT from lark.exceptions import UnexpectedToken ###{standalone class ParseConf(Generic[StateT]): __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states' parse_table: ParseTableBase[StateT] callbacks: ParserCallbacks start: str start_state: StateT end_state: StateT states: Dict[StateT, Dict[str, tuple]] def __init__(self, parse_table: ParseTableBase[StateT], callbacks: ParserCallbacks, start: str): self.parse_table = parse_table self.start_state = self.parse_table.start_states[start] self.end_state = self.parse_table.end_states[start] self.states = self.parse_table.states self.callbacks = callbacks self.start = start class ParserState(Generic[StateT]): __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack' parse_conf: ParseConf[StateT] lexer: LexerThread state_stack: List[StateT] value_stack: list def __init__(self, parse_conf: ParseConf[StateT], lexer: LexerThread, state_stack=None, value_stack=None): self.parse_conf = parse_conf self.lexer = lexer self.state_stack = state_stack or [self.parse_conf.start_state] self.value_stack = value_stack or [] @property def position(self) -> StateT: return self.state_stack[-1] # Necessary for match_examples() to work def __eq__(self, other) -> bool: if not isinstance(other, ParserState): return NotImplemented return len(self.state_stack) == len(other.state_stack) and self.position == other.position def __copy__(self): return self.copy() def copy(self, deepcopy_values=True) -> 'ParserState[StateT]': return type(self)( self.parse_conf, self.lexer, # XXX copy copy(self.state_stack), deepcopy(self.value_stack) if deepcopy_values else copy(self.value_stack), ) def feed_token(self, token: Token, is_end=False) -> Any: state_stack = self.state_stack value_stack = self.value_stack states = self.parse_conf.states end_state = self.parse_conf.end_state callbacks = self.parse_conf.callbacks while True: state = state_stack[-1] try: action, arg = states[state][token.type] except KeyError: expected = {s for s in states[state].keys() if s.isupper()} raise UnexpectedToken(token, expected, state=self, interactive_parser=None) assert arg != end_state if action is Shift: # shift once and return assert not is_end state_stack.append(arg) value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) return else: # reduce+shift as many times as necessary rule = arg size = len(rule.expansion) if size: s = value_stack[-size:] del state_stack[-size:] del value_stack[-size:] else: s = [] value = callbacks[rule](s) if callbacks else s _action, new_state = states[state_stack[-1]][rule.origin.name] assert _action is Shift state_stack.append(new_state) value_stack.append(value) if is_end and state_stack[-1] == end_state: return value_stack[-1] ###} lark-1.2.2/lark/parsers/xearley.py000066400000000000000000000172211465673407200171210ustar00rootroot00000000000000"""This module implements an Earley parser with a dynamic lexer The core Earley algorithm used here is based on Elizabeth Scott's implementation, here: https://www.sciencedirect.com/science/article/pii/S1571066108001497 That is probably the best reference for understanding the algorithm here. The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format is better documented here: http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/ Instead of running a lexer beforehand, or using a costy char-by-char method, this parser uses regular expressions by necessity, achieving high-performance while maintaining all of Earley's power in parsing any CFG. """ from typing import TYPE_CHECKING, Callable, Optional, List, Any from collections import defaultdict from ..tree import Tree from ..exceptions import UnexpectedCharacters from ..lexer import Token from ..grammar import Terminal from .earley import Parser as BaseParser from .earley_forest import TokenNode if TYPE_CHECKING: from ..common import LexerConf, ParserConf class Parser(BaseParser): def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher: Callable, resolve_ambiguity: bool=True, complete_lex: bool=False, debug: bool=False, tree_class: Optional[Callable[[str, List], Any]]=Tree, ordered_sets: bool=True): BaseParser.__init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity, debug, tree_class, ordered_sets) self.ignore = [Terminal(t) for t in lexer_conf.ignore] self.complete_lex = complete_lex def _parse(self, stream, columns, to_scan, start_symbol=None): def scan(i, to_scan): """The core Earley Scanner. This is a custom implementation of the scanner that uses the Lark lexer to match tokens. The scan list is built by the Earley predictor, based on the previously completed tokens. This ensures that at each phase of the parse we have a custom lexer context, allowing for more complex ambiguities.""" node_cache = {} # 1) Loop the expectations and ask the lexer to match. # Since regexp is forward looking on the input stream, and we only # want to process tokens when we hit the point in the stream at which # they complete, we push all tokens into a buffer (delayed_matches), to # be held possibly for a later parse step when we reach the point in the # input stream at which they complete. for item in self.Set(to_scan): m = match(item.expect, stream, i) if m: t = Token(item.expect.name, m.group(0), i, text_line, text_column) delayed_matches[m.end()].append( (item, i, t) ) if self.complete_lex: s = m.group(0) for j in range(1, len(s)): m = match(item.expect, s[:-j]) if m: t = Token(item.expect.name, m.group(0), i, text_line, text_column) delayed_matches[i+m.end()].append( (item, i, t) ) # XXX The following 3 lines were commented out for causing a bug. See issue #768 # # Remove any items that successfully matched in this pass from the to_scan buffer. # # This ensures we don't carry over tokens that already matched, if we're ignoring below. # to_scan.remove(item) # 3) Process any ignores. This is typically used for e.g. whitespace. # We carry over any unmatched items from the to_scan buffer to be matched again after # the ignore. This should allow us to use ignored symbols in non-terminals to implement # e.g. mandatory spacing. for x in self.ignore: m = match(x, stream, i) if m: # Carry over any items still in the scan buffer, to past the end of the ignored items. delayed_matches[m.end()].extend([(item, i, None) for item in to_scan ]) # If we're ignoring up to the end of the file, # carry over the start symbol if it already completed. delayed_matches[m.end()].extend([(item, i, None) for item in columns[i] if item.is_complete and item.s == start_symbol]) next_to_scan = self.Set() next_set = self.Set() columns.append(next_set) transitives.append({}) ## 4) Process Tokens from delayed_matches. # This is the core of the Earley scanner. Create an SPPF node for each Token, # and create the symbol node in the SPPF tree. Advance the item that completed, # and add the resulting new item to either the Earley set (for processing by the # completer/predictor) or the to_scan buffer for the next parse step. for item, start, token in delayed_matches[i+1]: if token is not None: token.end_line = text_line token.end_column = text_column + 1 token.end_pos = i + 1 new_item = item.advance() label = (new_item.s, new_item.start, i + 1) token_node = TokenNode(token, terminals[token.type]) new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label)) new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node) else: new_item = item if new_item.expect in self.TERMINALS: # add (B ::= Aai+1.B, h, y) to Q' next_to_scan.add(new_item) else: # add (B ::= Aa+1.B, h, y) to Ei+1 next_set.add(new_item) del delayed_matches[i+1] # No longer needed, so unburden memory if not next_set and not delayed_matches and not next_to_scan: considered_rules = list(sorted(to_scan, key=lambda key: key.rule.origin.name)) raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan), state=frozenset(i.s for i in to_scan), considered_rules=considered_rules ) return next_to_scan delayed_matches = defaultdict(list) match = self.term_matcher terminals = self.lexer_conf.terminals_by_name # Cache for nodes & tokens created in a particular parse step. transitives = [{}] text_line = 1 text_column = 1 ## The main Earley loop. # Run the Prediction/Completion cycle for any Items in the current Earley set. # Completions will be added to the SPPF tree, and predictions will be recursively # processed down to terminals/empty nodes to be added to the scanner for the next # step. i = 0 for token in stream: self.predict_and_complete(i, to_scan, columns, transitives) to_scan = scan(i, to_scan) if token == '\n': text_line += 1 text_column = 1 else: text_column += 1 i += 1 self.predict_and_complete(i, to_scan, columns, transitives) ## Column is now the final column in the parse. assert i == len(columns)-1 return to_scan lark-1.2.2/lark/py.typed000066400000000000000000000000001465673407200151010ustar00rootroot00000000000000lark-1.2.2/lark/reconstruct.py000066400000000000000000000072631465673407200163510ustar00rootroot00000000000000"""This is an experimental tool for reconstructing text from a shaped tree, based on a Lark grammar. """ from typing import Dict, Callable, Iterable, Optional from .lark import Lark from .tree import Tree, ParseTree from .visitors import Transformer_InPlace from .lexer import Token, PatternStr, TerminalDef from .grammar import Terminal, NonTerminal, Symbol from .tree_matcher import TreeMatcher, is_discarded_terminal from .utils import is_id_continue def is_iter_empty(i): try: _ = next(i) return False except StopIteration: return True class WriteTokensTransformer(Transformer_InPlace): "Inserts discarded tokens into their correct place, according to the rules of grammar" tokens: Dict[str, TerminalDef] term_subs: Dict[str, Callable[[Symbol], str]] def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None: self.tokens = tokens self.term_subs = term_subs def __default__(self, data, children, meta): if not getattr(meta, 'match_tree', False): return Tree(data, children) iter_args = iter(children) to_write = [] for sym in meta.orig_expansion: if is_discarded_terminal(sym): try: v = self.term_subs[sym.name](sym) except KeyError: t = self.tokens[sym.name] if not isinstance(t.pattern, PatternStr): raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) v = t.pattern.value to_write.append(v) else: x = next(iter_args) if isinstance(x, list): to_write += x else: if isinstance(x, Token): assert Terminal(x.type) == sym, x else: assert NonTerminal(x.data) == sym, (sym, x) to_write.append(x) assert is_iter_empty(iter_args) return to_write class Reconstructor(TreeMatcher): """ A Reconstructor that will, given a full parse Tree, generate source code. Note: The reconstructor cannot generate values from regexps. If you need to produce discarded regexes, such as newlines, use `term_subs` and provide default values for them. Parameters: parser: a Lark instance term_subs: a dictionary of [Terminal name as str] to [output text as str] """ write_tokens: WriteTokensTransformer def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None: TreeMatcher.__init__(self, parser) self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) def _reconstruct(self, tree): unreduced_tree = self.match_tree(tree, tree.data) res = self.write_tokens.transform(unreduced_tree) for item in res: if isinstance(item, Tree): # TODO use orig_expansion.rulename to support templates yield from self._reconstruct(item) else: yield item def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str: x = self._reconstruct(tree) if postproc: x = postproc(x) y = [] prev_item = '' for item in x: if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]): y.append(' ') y.append(item) prev_item = item return ''.join(y) lark-1.2.2/lark/tools/000077500000000000000000000000001465673407200145545ustar00rootroot00000000000000lark-1.2.2/lark/tools/__init__.py000066400000000000000000000046451465673407200166760ustar00rootroot00000000000000import sys from argparse import ArgumentParser, FileType from textwrap import indent from logging import DEBUG, INFO, WARN, ERROR from typing import Optional import warnings from lark import Lark, logger try: from interegular import logger as interegular_logger has_interegular = True except ImportError: has_interegular = False lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options') flags = [ ('d', 'debug'), 'keep_all_tokens', 'regex', 'propagate_positions', 'maybe_placeholders', 'use_bytes' ] options = ['start', 'lexer'] lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times") lalr_argparser.add_argument('-s', '--start', action='append', default=[]) lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('basic', 'contextual')) lalr_argparser.add_argument('-o', '--out', type=FileType('w', encoding='utf-8'), default=sys.stdout, help='the output file (default=stdout)') lalr_argparser.add_argument('grammar_file', type=FileType('r', encoding='utf-8'), help='A valid .lark file') for flag in flags: if isinstance(flag, tuple): options.append(flag[1]) lalr_argparser.add_argument('-' + flag[0], '--' + flag[1], action='store_true') elif isinstance(flag, str): options.append(flag) lalr_argparser.add_argument('--' + flag, action='store_true') else: raise NotImplementedError("flags must only contain strings or tuples of strings") def build_lalr(namespace): logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)]) if has_interegular: interegular_logger.setLevel(logger.getEffectiveLevel()) if len(namespace.start) == 0: namespace.start.append('start') kwargs = {n: getattr(namespace, n) for n in options} return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out def showwarning_as_comment(message, category, filename, lineno, file=None, line=None): # Based on warnings._showwarnmsg_impl text = warnings.formatwarning(message, category, filename, lineno, line) text = indent(text, '# ') if file is None: file = sys.stderr if file is None: return try: file.write(text) except OSError: pass def make_warnings_comments(): warnings.showwarning = showwarning_as_comment lark-1.2.2/lark/tools/nearley.py000066400000000000000000000141711465673407200165710ustar00rootroot00000000000000"Converts Nearley grammars to Lark" import os.path import sys import codecs import argparse from lark import Lark, Transformer, v_args nearley_grammar = r""" start: (ruledef|directive)+ directive: "@" NAME (STRING|NAME) | "@" JS -> js_code ruledef: NAME "->" expansions | NAME REGEXP "->" expansions -> macro expansions: expansion ("|" expansion)* expansion: expr+ js ?expr: item (":" /[+*?]/)? ?item: rule|string|regexp|null | "(" expansions ")" rule: NAME string: STRING regexp: REGEXP null: "null" JS: /{%.*?%}/s js: JS? NAME: /[a-zA-Z_$]\w*/ COMMENT: /#[^\n]*/ REGEXP: /\[.*?\]/ STRING: _STRING "i"? %import common.ESCAPED_STRING -> _STRING %import common.WS %ignore WS %ignore COMMENT """ nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='basic') def _get_rulename(name): name = {'_': '_ws_maybe', '__': '_ws'}.get(name, name) return 'n_' + name.replace('$', '__DOLLAR__').lower() @v_args(inline=True) class NearleyToLark(Transformer): def __init__(self): self._count = 0 self.extra_rules = {} self.extra_rules_rev = {} self.alias_js_code = {} def _new_function(self, code): name = 'alias_%d' % self._count self._count += 1 self.alias_js_code[name] = code return name def _extra_rule(self, rule): if rule in self.extra_rules_rev: return self.extra_rules_rev[rule] name = 'xrule_%d' % len(self.extra_rules) assert name not in self.extra_rules self.extra_rules[name] = rule self.extra_rules_rev[rule] = name return name def rule(self, name): return _get_rulename(name) def ruledef(self, name, exps): return '!%s: %s' % (_get_rulename(name), exps) def expr(self, item, op): rule = '(%s)%s' % (item, op) return self._extra_rule(rule) def regexp(self, r): return '/%s/' % r def null(self): return '' def string(self, s): return self._extra_rule(s) def expansion(self, *x): x, js = x[:-1], x[-1] if js.children: js_code ,= js.children js_code = js_code[2:-2] alias = '-> ' + self._new_function(js_code) else: alias = '' return ' '.join(x) + alias def expansions(self, *x): return '%s' % ('\n |'.join(x)) def start(self, *rules): return '\n'.join(filter(None, rules)) def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes): rule_defs = [] tree = nearley_grammar_parser.parse(g) for statement in tree.children: if statement.data == 'directive': directive, arg = statement.children if directive in ('builtin', 'include'): folder = builtin_path if directive == 'builtin' else folder_path path = os.path.join(folder, arg[1:-1]) if path not in includes: includes.add(path) with codecs.open(path, encoding='utf8') as f: text = f.read() rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes) else: assert False, directive elif statement.data == 'js_code': code ,= statement.children code = code[2:-2] js_code.append(code) elif statement.data == 'macro': pass # TODO Add support for macros! elif statement.data == 'ruledef': rule_defs.append(n2l.transform(statement)) else: raise Exception("Unknown statement: %s" % statement) return rule_defs def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False): import js2py emit_code = [] def emit(x=None): if x: emit_code.append(x) emit_code.append('\n') js_code = ['function id(x) {return x[0];}'] n2l = NearleyToLark() rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set()) lark_g = '\n'.join(rule_defs) lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items()) emit('from lark import Lark, Transformer') emit() emit('grammar = ' + repr(lark_g)) emit() for alias, code in n2l.alias_js_code.items(): js_code.append('%s = (%s);' % (alias, code)) if es6: emit(js2py.translate_js6('\n'.join(js_code))) else: emit(js2py.translate_js('\n'.join(js_code))) emit('class TransformNearley(Transformer):') for alias in n2l.alias_js_code: emit(" %s = var.get('%s').to_python()" % (alias, alias)) emit(" __default__ = lambda self, n, c, m: c if c else None") emit() emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start) emit('def parse(text):') emit(' return TransformNearley().transform(parser.parse(text))') return ''.join(emit_code) def main(fn, start, nearley_lib, es6=False): with codecs.open(fn, encoding='utf8') as f: grammar = f.read() return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6) def get_arg_parser(): parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.') parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar') parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule') parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)') parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true') return parser if __name__ == '__main__': parser = get_arg_parser() if len(sys.argv) == 1: parser.print_help(sys.stderr) sys.exit(1) args = parser.parse_args() print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6)) lark-1.2.2/lark/tools/serialize.py000066400000000000000000000017051465673407200171200ustar00rootroot00000000000000import sys import json from lark.grammar import Rule from lark.lexer import TerminalDef from lark.tools import lalr_argparser, build_lalr import argparse argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser], description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file", epilog='Look at the Lark documentation for more info on the options') def serialize(lark_inst, outfile): data, memo = lark_inst.memo_serialize([TerminalDef, Rule]) outfile.write('{\n') outfile.write(' "data": %s,\n' % json.dumps(data)) outfile.write(' "memo": %s\n' % json.dumps(memo)) outfile.write('}\n') def main(): if len(sys.argv)==1: argparser.print_help(sys.stderr) sys.exit(1) ns = argparser.parse_args() serialize(*build_lalr(ns)) if __name__ == '__main__': main() lark-1.2.2/lark/tools/standalone.py000066400000000000000000000127731465673407200172700ustar00rootroot00000000000000###{standalone # # # Lark Stand-alone Generator Tool # ---------------------------------- # Generates a stand-alone LALR(1) parser # # Git: https://github.com/erezsh/lark # Author: Erez Shinan (erezshin@gmail.com) # # # >>> LICENSE # # This tool and its generated code use a separate license from Lark, # and are subject to the terms of the Mozilla Public License, v. 2.0. # If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. # # If you wish to purchase a commercial license for this tool and its # generated code, you may contact me via email or otherwise. # # If MPL2 is incompatible with your free or open-source project, # contact me and we'll work it out. # # from copy import deepcopy from abc import ABC, abstractmethod from types import ModuleType from typing import ( TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, Union, Iterable, IO, TYPE_CHECKING, overload, Sequence, Pattern as REPattern, ClassVar, Set, Mapping ) ###} import sys import token, tokenize import os from os import path from collections import defaultdict from functools import partial from argparse import ArgumentParser import lark from lark.tools import lalr_argparser, build_lalr, make_warnings_comments from lark.grammar import Rule from lark.lexer import TerminalDef _dir = path.dirname(__file__) _larkdir = path.join(_dir, path.pardir) EXTRACT_STANDALONE_FILES = [ 'tools/standalone.py', 'exceptions.py', 'utils.py', 'tree.py', 'visitors.py', 'grammar.py', 'lexer.py', 'common.py', 'parse_tree_builder.py', 'parsers/lalr_analysis.py', 'parsers/lalr_parser_state.py', 'parsers/lalr_parser.py', 'parsers/lalr_interactive_parser.py', 'parser_frontends.py', 'lark.py', 'indenter.py', ] def extract_sections(lines): section = None text = [] sections = defaultdict(list) for line in lines: if line.startswith('###'): if line[3] == '{': section = line[4:].strip() elif line[3] == '}': sections[section] += text section = None text = [] else: raise ValueError(line) elif section: text.append(line) return {name: ''.join(text) for name, text in sections.items()} def strip_docstrings(line_gen): """ Strip comments and docstrings from a file. Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings """ res = [] prev_toktype = token.INDENT last_lineno = -1 last_col = 0 tokgen = tokenize.generate_tokens(line_gen) for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen: if slineno > last_lineno: last_col = 0 if scol > last_col: res.append(" " * (scol - last_col)) if toktype == token.STRING and prev_toktype == token.INDENT: # Docstring res.append("#--") elif toktype == tokenize.COMMENT: # Comment res.append("##\n") else: res.append(ttext) prev_toktype = toktype last_col = ecol last_lineno = elineno return ''.join(res) def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False): if output is None: output = partial(print, file=out) import pickle, zlib, base64 def compressed_output(obj): s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) c = zlib.compress(s) output(repr(base64.b64encode(c))) def output_decompress(name): output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals()) output('# The file was automatically generated by Lark v%s' % lark.__version__) output('__version__ = "%s"' % lark.__version__) output() for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES): with open(os.path.join(_larkdir, pyfile)) as f: code = extract_sections(f)['standalone'] if i: # if not this file code = strip_docstrings(partial(next, iter(code.splitlines(True)))) output(code) data, m = lark_inst.memo_serialize([TerminalDef, Rule]) output('import pickle, zlib, base64') if compress: output('DATA = (') compressed_output(data) output(')') output_decompress('DATA') output('MEMO = (') compressed_output(m) output(')') output_decompress('MEMO') else: output('DATA = (') output(data) output(')') output('MEMO = (') output(m) output(')') output('Shift = 0') output('Reduce = 1') output("def Lark_StandAlone(**kwargs):") output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)") def main(): make_warnings_comments() parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool", parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options') parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression") if len(sys.argv) == 1: parser.print_help(sys.stderr) sys.exit(1) ns = parser.parse_args() lark_inst, out = build_lalr(ns) gen_standalone(lark_inst, out=out, compress=ns.compress) ns.out.close() ns.grammar_file.close() if __name__ == '__main__': main() lark-1.2.2/lark/tree.py000066400000000000000000000205121465673407200147250ustar00rootroot00000000000000import sys from copy import deepcopy from typing import List, Callable, Iterator, Union, Optional, Generic, TypeVar, TYPE_CHECKING if TYPE_CHECKING: from .lexer import TerminalDef, Token try: import rich except ImportError: pass from typing import Literal ###{standalone class Meta: empty: bool line: int column: int start_pos: int end_line: int end_column: int end_pos: int orig_expansion: 'List[TerminalDef]' match_tree: bool def __init__(self): self.empty = True _Leaf_T = TypeVar("_Leaf_T") Branch = Union[_Leaf_T, 'Tree[_Leaf_T]'] class Tree(Generic[_Leaf_T]): """The main tree class. Creates a new tree, and stores "data" and "children" in attributes of the same name. Trees can be hashed and compared. Parameters: data: The name of the rule or alias children: List of matched sub-rules and terminals meta: Line & Column numbers (if ``propagate_positions`` is enabled). meta attributes: (line, column, end_line, end_column, start_pos, end_pos, container_line, container_column, container_end_line, container_end_column) container_* attributes consider all symbols, including those that have been inlined in the tree. For example, in the rule 'a: _A B _C', the regular attributes will mark the start and end of B, but the container_* attributes will also include _A and _C in the range. However, rules that contain 'a' will consider it in full, including _A and _C for all attributes. """ data: str children: 'List[Branch[_Leaf_T]]' def __init__(self, data: str, children: 'List[Branch[_Leaf_T]]', meta: Optional[Meta]=None) -> None: self.data = data self.children = children self._meta = meta @property def meta(self) -> Meta: if self._meta is None: self._meta = Meta() return self._meta def __repr__(self): return 'Tree(%r, %r)' % (self.data, self.children) def _pretty_label(self): return self.data def _pretty(self, level, indent_str): yield f'{indent_str*level}{self._pretty_label()}' if len(self.children) == 1 and not isinstance(self.children[0], Tree): yield f'\t{self.children[0]}\n' else: yield '\n' for n in self.children: if isinstance(n, Tree): yield from n._pretty(level+1, indent_str) else: yield f'{indent_str*(level+1)}{n}\n' def pretty(self, indent_str: str=' ') -> str: """Returns an indented string representation of the tree. Great for debugging. """ return ''.join(self._pretty(0, indent_str)) def __rich__(self, parent:Optional['rich.tree.Tree']=None) -> 'rich.tree.Tree': """Returns a tree widget for the 'rich' library. Example: :: from rich import print from lark import Tree tree = Tree('root', ['node1', 'node2']) print(tree) """ return self._rich(parent) def _rich(self, parent): if parent: tree = parent.add(f'[bold]{self.data}[/bold]') else: import rich.tree tree = rich.tree.Tree(self.data) for c in self.children: if isinstance(c, Tree): c._rich(tree) else: tree.add(f'[green]{c}[/green]') return tree def __eq__(self, other): try: return self.data == other.data and self.children == other.children except AttributeError: return False def __ne__(self, other): return not (self == other) def __hash__(self) -> int: return hash((self.data, tuple(self.children))) def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]': """Depth-first iteration. Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG). """ queue = [self] subtrees = dict() for subtree in queue: subtrees[id(subtree)] = subtree queue += [c for c in reversed(subtree.children) if isinstance(c, Tree) and id(c) not in subtrees] del queue return reversed(list(subtrees.values())) def iter_subtrees_topdown(self): """Breadth-first iteration. Iterates over all the subtrees, return nodes in order like pretty() does. """ stack = [self] stack_append = stack.append stack_pop = stack.pop while stack: node = stack_pop() if not isinstance(node, Tree): continue yield node for child in reversed(node.children): stack_append(child) def find_pred(self, pred: 'Callable[[Tree[_Leaf_T]], bool]') -> 'Iterator[Tree[_Leaf_T]]': """Returns all nodes of the tree that evaluate pred(node) as true.""" return filter(pred, self.iter_subtrees()) def find_data(self, data: str) -> 'Iterator[Tree[_Leaf_T]]': """Returns all nodes of the tree whose data equals the given data.""" return self.find_pred(lambda t: t.data == data) ###} def expand_kids_by_data(self, *data_values): """Expand (inline) children with any of the given data values. Returns True if anything changed""" changed = False for i in range(len(self.children)-1, -1, -1): child = self.children[i] if isinstance(child, Tree) and child.data in data_values: self.children[i:i+1] = child.children changed = True return changed def scan_values(self, pred: 'Callable[[Branch[_Leaf_T]], bool]') -> Iterator[_Leaf_T]: """Return all values in the tree that evaluate pred(value) as true. This can be used to find all the tokens in the tree. Example: >>> all_tokens = tree.scan_values(lambda v: isinstance(v, Token)) """ for c in self.children: if isinstance(c, Tree): for t in c.scan_values(pred): yield t else: if pred(c): yield c def __deepcopy__(self, memo): return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta) def copy(self) -> 'Tree[_Leaf_T]': return type(self)(self.data, self.children) def set(self, data: str, children: 'List[Branch[_Leaf_T]]') -> None: self.data = data self.children = children ParseTree = Tree['Token'] class SlottedTree(Tree): __slots__ = 'data', 'children', 'rule', '_meta' def pydot__tree_to_png(tree: Tree, filename: str, rankdir: 'Literal["TB", "LR", "BT", "RL"]'="LR", **kwargs) -> None: graph = pydot__tree_to_graph(tree, rankdir, **kwargs) graph.write_png(filename) def pydot__tree_to_dot(tree: Tree, filename, rankdir="LR", **kwargs): graph = pydot__tree_to_graph(tree, rankdir, **kwargs) graph.write(filename) def pydot__tree_to_graph(tree: Tree, rankdir="LR", **kwargs): """Creates a colorful image that represents the tree (data+children, without meta) Possible values for `rankdir` are "TB", "LR", "BT", "RL", corresponding to directed graphs drawn from top to bottom, from left to right, from bottom to top, and from right to left, respectively. `kwargs` can be any graph attribute (e. g. `dpi=200`). For a list of possible attributes, see https://www.graphviz.org/doc/info/attrs.html. """ import pydot # type: ignore[import-not-found] graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) i = [0] def new_leaf(leaf): node = pydot.Node(i[0], label=repr(leaf)) i[0] += 1 graph.add_node(node) return node def _to_pydot(subtree): color = hash(subtree.data) & 0xffffff color |= 0x808080 subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child) for child in subtree.children] node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data) i[0] += 1 graph.add_node(node) for subnode in subnodes: graph.add_edge(pydot.Edge(node, subnode)) return node _to_pydot(tree) return graph lark-1.2.2/lark/tree_matcher.py000066400000000000000000000135631465673407200164400ustar00rootroot00000000000000"""Tree matcher based on Lark grammar""" import re from collections import defaultdict from . import Tree, Token from .common import ParserConf from .parsers import earley from .grammar import Rule, Terminal, NonTerminal def is_discarded_terminal(t): return t.is_term and t.filter_out class _MakeTreeMatch: def __init__(self, name, expansion): self.name = name self.expansion = expansion def __call__(self, args): t = Tree(self.name, args) t.meta.match_tree = True t.meta.orig_expansion = self.expansion return t def _best_from_group(seq, group_key, cmp_key): d = {} for item in seq: key = group_key(item) if key in d: v1 = cmp_key(item) v2 = cmp_key(d[key]) if v2 > v1: d[key] = item else: d[key] = item return list(d.values()) def _best_rules_from_group(rules): rules = _best_from_group(rules, lambda r: r, lambda r: -len(r.expansion)) rules.sort(key=lambda r: len(r.expansion)) return rules def _match(term, token): if isinstance(token, Tree): name, _args = parse_rulename(term.name) return token.data == name elif isinstance(token, Token): return term == Terminal(token.type) assert False, (term, token) def make_recons_rule(origin, expansion, old_expansion): return Rule(origin, expansion, alias=_MakeTreeMatch(origin.name, old_expansion)) def make_recons_rule_to_term(origin, term): return make_recons_rule(origin, [Terminal(term.name)], [term]) def parse_rulename(s): "Parse rule names that may contain a template syntax (like rule{a, b, ...})" name, args_str = re.match(r'(\w+)(?:{(.+)})?', s).groups() args = args_str and [a.strip() for a in args_str.split(',')] return name, args class ChildrenLexer: def __init__(self, children): self.children = children def lex(self, parser_state): return self.children class TreeMatcher: """Match the elements of a tree node, based on an ontology provided by a Lark grammar. Supports templates and inlined rules (`rule{a, b,..}` and `_rule`) Initialize with an instance of Lark. """ def __init__(self, parser): # XXX TODO calling compile twice returns different results! assert not parser.options.maybe_placeholders # XXX TODO: we just ignore the potential existence of a postlexer self.tokens, rules, _extra = parser.grammar.compile(parser.options.start, set()) self.rules_for_root = defaultdict(list) self.rules = list(self._build_recons_rules(rules)) self.rules.reverse() # Choose the best rule from each group of {rule => [rule.alias]}, since we only really need one derivation. self.rules = _best_rules_from_group(self.rules) self.parser = parser self._parser_cache = {} def _build_recons_rules(self, rules): "Convert tree-parsing/construction rules to tree-matching rules" expand1s = {r.origin for r in rules if r.options.expand1} aliases = defaultdict(list) for r in rules: if r.alias: aliases[r.origin].append(r.alias) rule_names = {r.origin for r in rules} nonterminals = {sym for sym in rule_names if sym.name.startswith('_') or sym in expand1s or sym in aliases} seen = set() for r in rules: recons_exp = [sym if sym in nonterminals else Terminal(sym.name) for sym in r.expansion if not is_discarded_terminal(sym)] # Skip self-recursive constructs if recons_exp == [r.origin] and r.alias is None: continue sym = NonTerminal(r.alias) if r.alias else r.origin rule = make_recons_rule(sym, recons_exp, r.expansion) if sym in expand1s and len(recons_exp) != 1: self.rules_for_root[sym.name].append(rule) if sym.name not in seen: yield make_recons_rule_to_term(sym, sym) seen.add(sym.name) else: if sym.name.startswith('_') or sym in expand1s: yield rule else: self.rules_for_root[sym.name].append(rule) for origin, rule_aliases in aliases.items(): for alias in rule_aliases: yield make_recons_rule_to_term(origin, NonTerminal(alias)) yield make_recons_rule_to_term(origin, origin) def match_tree(self, tree, rulename): """Match the elements of `tree` to the symbols of rule `rulename`. Parameters: tree (Tree): the tree node to match rulename (str): The expected full rule name (including template args) Returns: Tree: an unreduced tree that matches `rulename` Raises: UnexpectedToken: If no match was found. Note: It's the callers' responsibility match the tree recursively. """ if rulename: # validate name, _args = parse_rulename(rulename) assert tree.data == name else: rulename = tree.data # TODO: ambiguity? try: parser = self._parser_cache[rulename] except KeyError: rules = self.rules + _best_rules_from_group(self.rules_for_root[rulename]) # TODO pass callbacks through dict, instead of alias? callbacks = {rule: rule.alias for rule in rules} conf = ParserConf(rules, callbacks, [rulename]) parser = earley.Parser(self.parser.lexer_conf, conf, _match, resolve_ambiguity=True) self._parser_cache[rulename] = parser # find a full derivation unreduced_tree = parser.parse(ChildrenLexer(tree.children), rulename) assert unreduced_tree.data == rulename return unreduced_tree lark-1.2.2/lark/tree_templates.py000066400000000000000000000137731465673407200170160ustar00rootroot00000000000000"""This module defines utilities for matching and translation tree templates. A tree templates is a tree that contains nodes that are template variables. """ from typing import Union, Optional, Mapping, Dict, Tuple, Iterator from lark import Tree, Transformer from lark.exceptions import MissingVariableError Branch = Union[Tree[str], str] TreeOrCode = Union[Tree[str], str] MatchResult = Dict[str, Tree] _TEMPLATE_MARKER = '$' class TemplateConf: """Template Configuration Allows customization for different uses of Template parse() must return a Tree instance. """ def __init__(self, parse=None): self._parse = parse def test_var(self, var: Union[Tree[str], str]) -> Optional[str]: """Given a tree node, if it is a template variable return its name. Otherwise, return None. This method may be overridden for customization Parameters: var: Tree | str - The tree node to test """ if isinstance(var, str): return _get_template_name(var) if ( isinstance(var, Tree) and var.data == "var" and len(var.children) > 0 and isinstance(var.children[0], str) ): return _get_template_name(var.children[0]) return None def _get_tree(self, template: TreeOrCode) -> Tree[str]: if isinstance(template, str): assert self._parse template = self._parse(template) if not isinstance(template, Tree): raise TypeError("template parser must return a Tree instance") return template def __call__(self, template: Tree[str]) -> 'Template': return Template(template, conf=self) def _match_tree_template(self, template: TreeOrCode, tree: Branch) -> Optional[MatchResult]: """Returns dict of {var: match} if found a match, else None """ template_var = self.test_var(template) if template_var: if not isinstance(tree, Tree): raise TypeError(f"Template variables can only match Tree instances. Not {tree!r}") return {template_var: tree} if isinstance(template, str): if template == tree: return {} return None assert isinstance(template, Tree) and isinstance(tree, Tree), f"template={template} tree={tree}" if template.data == tree.data and len(template.children) == len(tree.children): res = {} for t1, t2 in zip(template.children, tree.children): matches = self._match_tree_template(t1, t2) if matches is None: return None res.update(matches) return res return None class _ReplaceVars(Transformer[str, Tree[str]]): def __init__(self, conf: TemplateConf, vars: Mapping[str, Tree[str]]) -> None: super().__init__() self._conf = conf self._vars = vars def __default__(self, data, children, meta) -> Tree[str]: tree = super().__default__(data, children, meta) var = self._conf.test_var(tree) if var: try: return self._vars[var] except KeyError: raise MissingVariableError(f"No mapping for template variable ({var})") return tree class Template: """Represents a tree template, tied to a specific configuration A tree template is a tree that contains nodes that are template variables. Those variables will match any tree. (future versions may support annotations on the variables, to allow more complex templates) """ def __init__(self, tree: Tree[str], conf: TemplateConf = TemplateConf()): self.conf = conf self.tree = conf._get_tree(tree) def match(self, tree: TreeOrCode) -> Optional[MatchResult]: """Match a tree template to a tree. A tree template without variables will only match ``tree`` if it is equal to the template. Parameters: tree (Tree): The tree to match to the template Returns: Optional[Dict[str, Tree]]: If match is found, returns a dictionary mapping template variable names to their matching tree nodes. If no match was found, returns None. """ tree = self.conf._get_tree(tree) return self.conf._match_tree_template(self.tree, tree) def search(self, tree: TreeOrCode) -> Iterator[Tuple[Tree[str], MatchResult]]: """Search for all occurrences of the tree template inside ``tree``. """ tree = self.conf._get_tree(tree) for subtree in tree.iter_subtrees(): res = self.match(subtree) if res: yield subtree, res def apply_vars(self, vars: Mapping[str, Tree[str]]) -> Tree[str]: """Apply vars to the template tree """ return _ReplaceVars(self.conf, vars).transform(self.tree) def translate(t1: Template, t2: Template, tree: TreeOrCode): """Search tree and translate each occurrence of t1 into t2. """ tree = t1.conf._get_tree(tree) # ensure it's a tree, parse if necessary and possible for subtree, vars in t1.search(tree): res = t2.apply_vars(vars) subtree.set(res.data, res.children) return tree class TemplateTranslator: """Utility class for translating a collection of patterns """ def __init__(self, translations: Mapping[Template, Template]): assert all(isinstance(k, Template) and isinstance(v, Template) for k, v in translations.items()) self.translations = translations def translate(self, tree: Tree[str]): for k, v in self.translations.items(): tree = translate(k, v, tree) return tree def _get_template_name(value: str) -> Optional[str]: return value.lstrip(_TEMPLATE_MARKER) if value.startswith(_TEMPLATE_MARKER) else None lark-1.2.2/lark/utils.py000066400000000000000000000260061465673407200151320ustar00rootroot00000000000000import unicodedata import os from itertools import product from collections import deque from typing import Callable, Iterator, List, Optional, Tuple, Type, TypeVar, Union, Dict, Any, Sequence, Iterable, AbstractSet ###{standalone import sys, re import logging logger: logging.Logger = logging.getLogger("lark") logger.addHandler(logging.StreamHandler()) # Set to highest level, since we have some warnings amongst the code # By default, we should not output any log messages logger.setLevel(logging.CRITICAL) NO_VALUE = object() T = TypeVar("T") def classify(seq: Iterable, key: Optional[Callable] = None, value: Optional[Callable] = None) -> Dict: d: Dict[Any, Any] = {} for item in seq: k = key(item) if (key is not None) else item v = value(item) if (value is not None) else item try: d[k].append(v) except KeyError: d[k] = [v] return d def _deserialize(data: Any, namespace: Dict[str, Any], memo: Dict) -> Any: if isinstance(data, dict): if '__type__' in data: # Object class_ = namespace[data['__type__']] return class_.deserialize(data, memo) elif '@' in data: return memo[data['@']] return {key:_deserialize(value, namespace, memo) for key, value in data.items()} elif isinstance(data, list): return [_deserialize(value, namespace, memo) for value in data] return data _T = TypeVar("_T", bound="Serialize") class Serialize: """Safe-ish serialization interface that doesn't rely on Pickle Attributes: __serialize_fields__ (List[str]): Fields (aka attributes) to serialize. __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate. Should include all field types that aren't builtin types. """ def memo_serialize(self, types_to_memoize: List) -> Any: memo = SerializeMemoizer(types_to_memoize) return self.serialize(memo), memo.serialize() def serialize(self, memo = None) -> Dict[str, Any]: if memo and memo.in_types(self): return {'@': memo.memoized.get(self)} fields = getattr(self, '__serialize_fields__') res = {f: _serialize(getattr(self, f), memo) for f in fields} res['__type__'] = type(self).__name__ if hasattr(self, '_serialize'): self._serialize(res, memo) return res @classmethod def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T: namespace = getattr(cls, '__serialize_namespace__', []) namespace = {c.__name__:c for c in namespace} fields = getattr(cls, '__serialize_fields__') if '@' in data: return memo[data['@']] inst = cls.__new__(cls) for f in fields: try: setattr(inst, f, _deserialize(data[f], namespace, memo)) except KeyError as e: raise KeyError("Cannot find key for class", cls, e) if hasattr(inst, '_deserialize'): inst._deserialize() return inst class SerializeMemoizer(Serialize): "A version of serialize that memoizes objects to reduce space" __serialize_fields__ = 'memoized', def __init__(self, types_to_memoize: List) -> None: self.types_to_memoize = tuple(types_to_memoize) self.memoized = Enumerator() def in_types(self, value: Serialize) -> bool: return isinstance(value, self.types_to_memoize) def serialize(self) -> Dict[int, Any]: # type: ignore[override] return _serialize(self.memoized.reversed(), None) @classmethod def deserialize(cls, data: Dict[int, Any], namespace: Dict[str, Any], memo: Dict[Any, Any]) -> Dict[int, Any]: # type: ignore[override] return _deserialize(data, namespace, memo) try: import regex _has_regex = True except ImportError: _has_regex = False if sys.version_info >= (3, 11): import re._parser as sre_parse import re._constants as sre_constants else: import sre_parse import sre_constants categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]: if _has_regex: # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex # match here below. regexp_final = re.sub(categ_pattern, 'A', expr) else: if re.search(categ_pattern, expr): raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr) regexp_final = expr try: # Fixed in next version (past 0.960) of typeshed return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] except sre_constants.error: if not _has_regex: raise ValueError(expr) else: # sre_parse does not support the new features in regex. To not completely fail in that case, # we manually test for the most important info (whether the empty string is matched) c = regex.compile(regexp_final) # Python 3.11.7 introducded sre_parse.MAXWIDTH that is used instead of MAXREPEAT # See lark-parser/lark#1376 and python/cpython#109859 MAXWIDTH = getattr(sre_parse, "MAXWIDTH", sre_constants.MAXREPEAT) if c.match('') is None: # MAXREPEAT is a none pickable subclass of int, therefore needs to be converted to enable caching return 1, int(MAXWIDTH) else: return 0, int(MAXWIDTH) ###} _ID_START = 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc' _ID_CONTINUE = _ID_START + ('Nd', 'Nl',) def _test_unicode_category(s: str, categories: Sequence[str]) -> bool: if len(s) != 1: return all(_test_unicode_category(char, categories) for char in s) return s == '_' or unicodedata.category(s) in categories def is_id_continue(s: str) -> bool: """ Checks if all characters in `s` are alphanumeric characters (Unicode standard, so diacritics, indian vowels, non-latin numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier. See PEP 3131 for details. """ return _test_unicode_category(s, _ID_CONTINUE) def is_id_start(s: str) -> bool: """ Checks if all characters in `s` are alphabetic characters (Unicode standard, so diacritics, indian vowels, non-latin numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details. """ return _test_unicode_category(s, _ID_START) def dedup_list(l: Sequence[T]) -> List[T]: """Given a list (l) will removing duplicates from the list, preserving the original order of the list. Assumes that the list entries are hashable.""" return list(dict.fromkeys(l)) class Enumerator(Serialize): def __init__(self) -> None: self.enums: Dict[Any, int] = {} def get(self, item) -> int: if item not in self.enums: self.enums[item] = len(self.enums) return self.enums[item] def __len__(self): return len(self.enums) def reversed(self) -> Dict[int, Any]: r = {v: k for k, v in self.enums.items()} assert len(r) == len(self.enums) return r def combine_alternatives(lists): """ Accepts a list of alternatives, and enumerates all their possible concatenations. Examples: >>> combine_alternatives([range(2), [4,5]]) [[0, 4], [0, 5], [1, 4], [1, 5]] >>> combine_alternatives(["abc", "xy", '$']) [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']] >>> combine_alternatives([]) [[]] """ if not lists: return [[]] assert all(l for l in lists), lists return list(product(*lists)) try: import atomicwrites _has_atomicwrites = True except ImportError: _has_atomicwrites = False class FS: exists = staticmethod(os.path.exists) @staticmethod def open(name, mode="r", **kwargs): if _has_atomicwrites and "w" in mode: return atomicwrites.atomic_write(name, mode=mode, overwrite=True, **kwargs) else: return open(name, mode, **kwargs) class fzset(frozenset): def __repr__(self): return '{%s}' % ', '.join(map(repr, self)) def classify_bool(seq: Iterable, pred: Callable) -> Any: false_elems = [] true_elems = [elem for elem in seq if pred(elem) or false_elems.append(elem)] # type: ignore[func-returns-value] return true_elems, false_elems def bfs(initial: Iterable, expand: Callable) -> Iterator: open_q = deque(list(initial)) visited = set(open_q) while open_q: node = open_q.popleft() yield node for next_node in expand(node): if next_node not in visited: visited.add(next_node) open_q.append(next_node) def bfs_all_unique(initial, expand): "bfs, but doesn't keep track of visited (aka seen), because there can be no repetitions" open_q = deque(list(initial)) while open_q: node = open_q.popleft() yield node open_q += expand(node) def _serialize(value: Any, memo: Optional[SerializeMemoizer]) -> Any: if isinstance(value, Serialize): return value.serialize(memo) elif isinstance(value, list): return [_serialize(elem, memo) for elem in value] elif isinstance(value, frozenset): return list(value) # TODO reversible? elif isinstance(value, dict): return {key:_serialize(elem, memo) for key, elem in value.items()} # assert value is None or isinstance(value, (int, float, str, tuple)), value return value def small_factors(n: int, max_factor: int) -> List[Tuple[int, int]]: """ Splits n up into smaller factors and summands <= max_factor. Returns a list of [(a, b), ...] so that the following code returns n: n = 1 for a, b in values: n = n * a + b Currently, we also keep a + b <= max_factor, but that might change """ assert n >= 0 assert max_factor > 2 if n <= max_factor: return [(n, 0)] for a in range(max_factor, 1, -1): r, b = divmod(n, a) if a + b <= max_factor: return small_factors(r, max_factor) + [(a, b)] assert False, "Failed to factorize %s" % n class OrderedSet(AbstractSet[T]): """A minimal OrderedSet implementation, using a dictionary. (relies on the dictionary being ordered) """ def __init__(self, items: Iterable[T] =()): self.d = dict.fromkeys(items) def __contains__(self, item: Any) -> bool: return item in self.d def add(self, item: T): self.d[item] = None def __iter__(self) -> Iterator[T]: return iter(self.d) def remove(self, item: T): del self.d[item] def __bool__(self): return bool(self.d) def __len__(self) -> int: return len(self.d) def __repr__(self): return f"{type(self).__name__}({', '.join(map(repr,self))})" lark-1.2.2/lark/visitors.py000066400000000000000000000516561465673407200156650ustar00rootroot00000000000000from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional, Any, cast from abc import ABC from .utils import combine_alternatives from .tree import Tree, Branch from .exceptions import VisitError, GrammarError from .lexer import Token ###{standalone from functools import wraps, update_wrapper from inspect import getmembers, getmro _Return_T = TypeVar('_Return_T') _Return_V = TypeVar('_Return_V') _Leaf_T = TypeVar('_Leaf_T') _Leaf_U = TypeVar('_Leaf_U') _R = TypeVar('_R') _FUNC = Callable[..., _Return_T] _DECORATED = Union[_FUNC, type] class _DiscardType: """When the Discard value is returned from a transformer callback, that node is discarded and won't appear in the parent. Note: This feature is disabled when the transformer is provided to Lark using the ``transformer`` keyword (aka Tree-less LALR mode). Example: :: class T(Transformer): def ignore_tree(self, children): return Discard def IGNORE_TOKEN(self, token): return Discard """ def __repr__(self): return "lark.visitors.Discard" Discard = _DiscardType() # Transformers class _Decoratable: "Provides support for decorating methods with @v_args" @classmethod def _apply_v_args(cls, visit_wrapper): mro = getmro(cls) assert mro[0] is cls libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)} for name, value in getmembers(cls): # Make sure the function isn't inherited (unless it's overwritten) if name.startswith('_') or (name in libmembers and name not in cls.__dict__): continue if not callable(value): continue # Skip if v_args already applied (at the function level) if isinstance(cls.__dict__[name], _VArgsWrapper): continue setattr(cls, name, _VArgsWrapper(cls.__dict__[name], visit_wrapper)) return cls def __class_getitem__(cls, _): return cls class Transformer(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]): """Transformers work bottom-up (or depth-first), starting with visiting the leaves and working their way up until ending at the root of the tree. For each node visited, the transformer will call the appropriate method (callbacks), according to the node's ``data``, and use the returned value to replace the node, thereby creating a new tree structure. Transformers can be used to implement map & reduce patterns. Because nodes are reduced from leaf to root, at any point the callbacks may assume the children have already been transformed (if applicable). If the transformer cannot find a method with the right name, it will instead call ``__default__``, which by default creates a copy of the node. To discard a node, return Discard (``lark.visitors.Discard``). ``Transformer`` can do anything ``Visitor`` can do, but because it reconstructs the tree, it is slightly less efficient. A transformer without methods essentially performs a non-memoized partial deepcopy. All these classes implement the transformer interface: - ``Transformer`` - Recursively transforms the tree. This is the one you probably want. - ``Transformer_InPlace`` - Non-recursive. Changes the tree in-place instead of returning new instances - ``Transformer_InPlaceRecursive`` - Recursive. Changes the tree in-place instead of returning new instances Parameters: visit_tokens (bool, optional): Should the transformer visit tokens in addition to rules. Setting this to ``False`` is slightly faster. Defaults to ``True``. (For processing ignored tokens, use the ``lexer_callbacks`` options) """ __visit_tokens__ = True # For backwards compatibility def __init__(self, visit_tokens: bool=True) -> None: self.__visit_tokens__ = visit_tokens def _call_userfunc(self, tree, new_children=None): # Assumes tree is already transformed children = new_children if new_children is not None else tree.children try: f = getattr(self, tree.data) except AttributeError: return self.__default__(tree.data, children, tree.meta) else: try: wrapper = getattr(f, 'visit_wrapper', None) if wrapper is not None: return f.visit_wrapper(f, tree.data, children, tree.meta) else: return f(children) except GrammarError: raise except Exception as e: raise VisitError(tree.data, tree, e) def _call_userfunc_token(self, token): try: f = getattr(self, token.type) except AttributeError: return self.__default_token__(token) else: try: return f(token) except GrammarError: raise except Exception as e: raise VisitError(token.type, token, e) def _transform_children(self, children): for c in children: if isinstance(c, Tree): res = self._transform_tree(c) elif self.__visit_tokens__ and isinstance(c, Token): res = self._call_userfunc_token(c) else: res = c if res is not Discard: yield res def _transform_tree(self, tree): children = list(self._transform_children(tree.children)) return self._call_userfunc(tree, children) def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: "Transform the given tree, and return the final result" res = list(self._transform_children([tree])) if not res: return None # type: ignore[return-value] assert len(res) == 1 return res[0] def __mul__( self: 'Transformer[_Leaf_T, Tree[_Leaf_U]]', other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V,]]' ) -> 'TransformerChain[_Leaf_T, _Return_V]': """Chain two transformers together, returning a new transformer. """ return TransformerChain(self, other) def __default__(self, data, children, meta): """Default function that is called if there is no attribute matching ``data`` Can be overridden. Defaults to creating a new copy of the tree node (i.e. ``return Tree(data, children, meta)``) """ return Tree(data, children, meta) def __default_token__(self, token): """Default function that is called if there is no attribute matching ``token.type`` Can be overridden. Defaults to returning the token as-is. """ return token def merge_transformers(base_transformer=None, **transformers_to_merge): """Merge a collection of transformers into the base_transformer, each into its own 'namespace'. When called, it will collect the methods from each transformer, and assign them to base_transformer, with their name prefixed with the given keyword, as ``prefix__methodname``. This function is especially useful for processing grammars that import other grammars, thereby creating some of their rules in a 'namespace'. (i.e with a consistent name prefix). In this case, the key for the transformer should match the name of the imported grammar. Parameters: base_transformer (Transformer, optional): The transformer that all other transformers will be added to. **transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``. Raises: AttributeError: In case of a name collision in the merged methods Example: :: class TBase(Transformer): def start(self, children): return children[0] + 'bar' class TImportedGrammar(Transformer): def foo(self, children): return "foo" composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar()) t = Tree('start', [ Tree('imported__foo', []) ]) assert composed_transformer.transform(t) == 'foobar' """ if base_transformer is None: base_transformer = Transformer() for prefix, transformer in transformers_to_merge.items(): for method_name in dir(transformer): method = getattr(transformer, method_name) if not callable(method): continue if method_name.startswith("_") or method_name == "transform": continue prefixed_method = prefix + "__" + method_name if hasattr(base_transformer, prefixed_method): raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method) setattr(base_transformer, prefixed_method, method) return base_transformer class InlineTransformer(Transformer): # XXX Deprecated def _call_userfunc(self, tree, new_children=None): # Assumes tree is already transformed children = new_children if new_children is not None else tree.children try: f = getattr(self, tree.data) except AttributeError: return self.__default__(tree.data, children, tree.meta) else: return f(*children) class TransformerChain(Generic[_Leaf_T, _Return_T]): transformers: 'Tuple[Union[Transformer, TransformerChain], ...]' def __init__(self, *transformers: 'Union[Transformer, TransformerChain]') -> None: self.transformers = transformers def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: for t in self.transformers: tree = t.transform(tree) return cast(_Return_T, tree) def __mul__( self: 'TransformerChain[_Leaf_T, Tree[_Leaf_U]]', other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V]]' ) -> 'TransformerChain[_Leaf_T, _Return_V]': return TransformerChain(*self.transformers + (other,)) class Transformer_InPlace(Transformer[_Leaf_T, _Return_T]): """Same as Transformer, but non-recursive, and changes the tree in-place instead of returning new instances Useful for huge trees. Conservative in memory. """ def _transform_tree(self, tree): # Cancel recursion return self._call_userfunc(tree) def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: for subtree in tree.iter_subtrees(): subtree.children = list(self._transform_children(subtree.children)) return self._transform_tree(tree) class Transformer_NonRecursive(Transformer[_Leaf_T, _Return_T]): """Same as Transformer but non-recursive. Like Transformer, it doesn't change the original tree. Useful for huge trees. """ def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: # Tree to postfix rev_postfix = [] q: List[Branch[_Leaf_T]] = [tree] while q: t = q.pop() rev_postfix.append(t) if isinstance(t, Tree): q += t.children # Postfix to tree stack: List = [] for x in reversed(rev_postfix): if isinstance(x, Tree): size = len(x.children) if size: args = stack[-size:] del stack[-size:] else: args = [] res = self._call_userfunc(x, args) if res is not Discard: stack.append(res) elif self.__visit_tokens__ and isinstance(x, Token): res = self._call_userfunc_token(x) if res is not Discard: stack.append(res) else: stack.append(x) result, = stack # We should have only one tree remaining # There are no guarantees on the type of the value produced by calling a user func for a # child will produce. This means type system can't statically know that the final result is # _Return_T. As a result a cast is required. return cast(_Return_T, result) class Transformer_InPlaceRecursive(Transformer): "Same as Transformer, recursive, but changes the tree in-place instead of returning new instances" def _transform_tree(self, tree): tree.children = list(self._transform_children(tree.children)) return self._call_userfunc(tree) # Visitors class VisitorBase: def _call_userfunc(self, tree): return getattr(self, tree.data, self.__default__)(tree) def __default__(self, tree): """Default function that is called if there is no attribute matching ``tree.data`` Can be overridden. Defaults to doing nothing. """ return tree def __class_getitem__(cls, _): return cls class Visitor(VisitorBase, ABC, Generic[_Leaf_T]): """Tree visitor, non-recursive (can handle huge trees). Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data`` """ def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: "Visits the tree, starting with the leaves and finally the root (bottom-up)" for subtree in tree.iter_subtrees(): self._call_userfunc(subtree) return tree def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: "Visit the tree, starting at the root, and ending at the leaves (top-down)" for subtree in tree.iter_subtrees_topdown(): self._call_userfunc(subtree) return tree class Visitor_Recursive(VisitorBase, Generic[_Leaf_T]): """Bottom-up visitor, recursive. Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data`` Slightly faster than the non-recursive version. """ def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: "Visits the tree, starting with the leaves and finally the root (bottom-up)" for child in tree.children: if isinstance(child, Tree): self.visit(child) self._call_userfunc(tree) return tree def visit_topdown(self,tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: "Visit the tree, starting at the root, and ending at the leaves (top-down)" self._call_userfunc(tree) for child in tree.children: if isinstance(child, Tree): self.visit_topdown(child) return tree class Interpreter(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]): """Interpreter walks the tree starting at the root. Visits the tree, starting with the root and finally the leaves (top-down) For each tree node, it calls its methods (provided by user via inheritance) according to ``tree.data``. Unlike ``Transformer`` and ``Visitor``, the Interpreter doesn't automatically visit its sub-branches. The user has to explicitly call ``visit``, ``visit_children``, or use the ``@visit_children_decor``. This allows the user to implement branching and loops. """ def visit(self, tree: Tree[_Leaf_T]) -> _Return_T: # There are no guarantees on the type of the value produced by calling a user func for a # child will produce. So only annotate the public method and use an internal method when # visiting child trees. return self._visit_tree(tree) def _visit_tree(self, tree: Tree[_Leaf_T]): f = getattr(self, tree.data) wrapper = getattr(f, 'visit_wrapper', None) if wrapper is not None: return f.visit_wrapper(f, tree.data, tree.children, tree.meta) else: return f(tree) def visit_children(self, tree: Tree[_Leaf_T]) -> List: return [self._visit_tree(child) if isinstance(child, Tree) else child for child in tree.children] def __getattr__(self, name): return self.__default__ def __default__(self, tree): return self.visit_children(tree) _InterMethod = Callable[[Type[Interpreter], _Return_T], _R] def visit_children_decor(func: _InterMethod) -> _InterMethod: "See Interpreter" @wraps(func) def inner(cls, tree): values = cls.visit_children(tree) return func(cls, values) return inner # Decorators def _apply_v_args(obj, visit_wrapper): try: _apply = obj._apply_v_args except AttributeError: return _VArgsWrapper(obj, visit_wrapper) else: return _apply(visit_wrapper) class _VArgsWrapper: """ A wrapper around a Callable. It delegates `__call__` to the Callable. If the Callable has a `__get__`, that is also delegate and the resulting function is wrapped. Otherwise, we use the original function mirroring the behaviour without a __get__. We also have the visit_wrapper attribute to be used by Transformers. """ base_func: Callable def __init__(self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]): if isinstance(func, _VArgsWrapper): func = func.base_func self.base_func = func self.visit_wrapper = visit_wrapper update_wrapper(self, func) def __call__(self, *args, **kwargs): return self.base_func(*args, **kwargs) def __get__(self, instance, owner=None): try: # Use the __get__ attribute of the type instead of the instance # to fully mirror the behavior of getattr g = type(self.base_func).__get__ except AttributeError: return self else: return _VArgsWrapper(g(self.base_func, instance, owner), self.visit_wrapper) def __set_name__(self, owner, name): try: f = type(self.base_func).__set_name__ except AttributeError: return else: f(self.base_func, owner, name) def _vargs_inline(f, _data, children, _meta): return f(*children) def _vargs_meta_inline(f, _data, children, meta): return f(meta, *children) def _vargs_meta(f, _data, children, meta): return f(meta, children) def _vargs_tree(f, data, children, meta): return f(Tree(data, children, meta)) def v_args(inline: bool = False, meta: bool = False, tree: bool = False, wrapper: Optional[Callable] = None) -> Callable[[_DECORATED], _DECORATED]: """A convenience decorator factory for modifying the behavior of user-supplied visitor methods. By default, callback methods of transformers/visitors accept one argument - a list of the node's children. ``v_args`` can modify this behavior. When used on a transformer/visitor class definition, it applies to all the callback methods inside it. ``v_args`` can be applied to a single method, or to an entire class. When applied to both, the options given to the method take precedence. Parameters: inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists). meta (bool, optional): Provides two arguments: ``meta`` and ``children`` (instead of just the latter) tree (bool, optional): Provides the entire tree as the argument, instead of the children. wrapper (function, optional): Provide a function to decorate all methods. Example: :: @v_args(inline=True) class SolveArith(Transformer): def add(self, left, right): return left + right @v_args(meta=True) def mul(self, meta, children): logger.info(f'mul at line {meta.line}') left, right = children return left * right class ReverseNotation(Transformer_InPlace): @v_args(tree=True) def tree_node(self, tree): tree.children = tree.children[::-1] """ if tree and (meta or inline): raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.") func = None if meta: if inline: func = _vargs_meta_inline else: func = _vargs_meta elif inline: func = _vargs_inline elif tree: func = _vargs_tree if wrapper is not None: if func is not None: raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.") func = wrapper def _visitor_args_dec(obj): return _apply_v_args(obj, func) return _visitor_args_dec ###} # --- Visitor Utilities --- class CollapseAmbiguities(Transformer): """ Transforms a tree that contains any number of _ambig nodes into a list of trees, each one containing an unambiguous tree. The length of the resulting list is the product of the length of all _ambig nodes. Warning: This may quickly explode for highly ambiguous trees. """ def _ambig(self, options): return sum(options, []) def __default__(self, data, children_lists, meta): return [Tree(data, children, meta) for children in combine_alternatives(children_lists)] def __default_token__(self, t): return [t] lark-1.2.2/pyproject.toml000066400000000000000000000053031465673407200154000ustar00rootroot00000000000000[build-system] requires = ["setuptools>=61.2.0"] build-backend = "setuptools.build_meta" [project] name = "lark" authors = [{name = "Erez Shinan", email = "erezshin@gmail.com"}] license = {text = "MIT"} description = "a modern parsing library" keywords = ["Earley", "LALR", "parser", "parsing", "ast"] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Programming Language :: Python :: 3", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: General", "Topic :: Text Processing :: Linguistic", "License :: OSI Approved :: MIT License", ] requires-python = ">=3.8" dependencies = [] dynamic = ["version"] [project.readme] text = """ Lark is a modern general-purpose parsing library for Python. With Lark, you can parse any context-free grammar, efficiently, with very little code. Main Features: - Builds a parse-tree (AST) automagically, based on the structure of the grammar - Earley parser - Can parse all context-free grammars - Full support for ambiguous grammars - LALR(1) parser - Fast and light, competitive with PLY - Can generate a stand-alone parser - CYK parser, for highly ambiguous grammars - EBNF grammar - Unicode fully supported - Automatic line & column tracking - Standard library of terminals (strings, numbers, names, etc.) - Import grammars from Nearley.js - Extensive test suite - And much more! Since version 1.2, only Python versions 3.8 and up are supported.""" content-type = "text/markdown" [project.urls] Homepage = "https://github.com/lark-parser/lark" Download = "https://github.com/lark-parser/lark/tarball/master" [project.entry-points.pyinstaller40] hook-dirs = "lark.__pyinstaller:get_hook_dirs" [project.optional-dependencies] regex = ["regex"] nearley = ["js2py"] atomic_cache = ["atomicwrites"] interegular = ["interegular>=0.3.1,<0.4.0"] [tool.setuptools] packages = [ "lark", "lark.parsers", "lark.tools", "lark.grammars", "lark.__pyinstaller", ] include-package-data = true [tool.setuptools.package-data] "*" = ["*.lark"] lark = ["py.typed"] [tool.setuptools.dynamic] version = {attr = "lark.__version__"} [tool.mypy] files = "lark" python_version = "3.8" show_error_codes = true enable_error_code = ["ignore-without-code", "unused-ignore"] exclude = [ "^lark/__pyinstaller", ] # You can disable imports or control per-module/file settings here [[tool.mypy.overrides]] module = [ "js2py" ] ignore_missing_imports = true [tool.coverage.report] exclude_lines = [ "pragma: no cover", "if TYPE_CHECKING:" ] [tool.pyright] include = ["lark"] [tool.pytest.ini_options] minversion = 6.0 addopts = "-ra -q" testpaths =[ "tests" ] python_files = "__main__.py" lark-1.2.2/readthedocs.yml000066400000000000000000000004021465673407200154670ustar00rootroot00000000000000version: 2 formats: all build: os: ubuntu-22.04 tools: python: "3.7" python: # version: 3.7 install: - requirements: docs/requirements.txt # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py lark-1.2.2/test-requirements.txt000066400000000000000000000000541465673407200167230ustar00rootroot00000000000000interegular>=0.3.1,<0.4.0 Js2Py==0.68 regex lark-1.2.2/tests/000077500000000000000000000000001465673407200136255ustar00rootroot00000000000000lark-1.2.2/tests/__init__.py000066400000000000000000000000001465673407200157240ustar00rootroot00000000000000lark-1.2.2/tests/__main__.py000066400000000000000000000021361465673407200157210ustar00rootroot00000000000000from __future__ import absolute_import, print_function import unittest import logging import sys from lark import logger from .test_trees import TestTrees from .test_tools import TestStandalone from .test_cache import TestCache from .test_grammar import TestGrammar from .test_reconstructor import TestReconstructor from .test_tree_forest_transformer import TestTreeForestTransformer from .test_lexer import TestLexer from .test_python_grammar import TestPythonParser from .test_tree_templates import * # We define __all__ to list which TestSuites to run try: from .test_nearley.test_nearley import TestNearley except ImportError: logger.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)") # from .test_selectors import TestSelectors # from .test_grammars import TestPythonG, TestConfigG from .test_logger import Testlogger from .test_parser import * # We define __all__ to list which TestSuites to run if sys.version_info >= (3, 10): from .test_pattern_matching import TestPatternMatching logger.setLevel(logging.INFO) if __name__ == '__main__': unittest.main() lark-1.2.2/tests/grammars/000077500000000000000000000000001465673407200154365ustar00rootroot00000000000000lark-1.2.2/tests/grammars/ab.lark000066400000000000000000000001251465673407200166710ustar00rootroot00000000000000startab: expr expr: A B | A expr B A: "a" B: "b" %import common.WS %ignore WS lark-1.2.2/tests/grammars/leading_underscore_grammar.lark000066400000000000000000000000431465673407200236500ustar00rootroot00000000000000A: "A" _SEP: "x" _a: A c: _a _SEPlark-1.2.2/tests/grammars/templates.lark000066400000000000000000000000441465673407200203050ustar00rootroot00000000000000sep{item, delim}: item (delim item)*lark-1.2.2/tests/grammars/test.lark000066400000000000000000000000741465673407200172710ustar00rootroot00000000000000%import common.NUMBER %import common.WORD %import common.WS lark-1.2.2/tests/grammars/test_relative_import_of_nested_grammar.lark000066400000000000000000000001511465673407200263060ustar00rootroot00000000000000 start: rule_to_import %import .test_relative_import_of_nested_grammar__grammar_to_import.rule_to_importlark-1.2.2/tests/grammars/test_relative_import_of_nested_grammar__grammar_to_import.lark000066400000000000000000000001621465673407200322510ustar00rootroot00000000000000 rule_to_import: NESTED_TERMINAL %import .test_relative_import_of_nested_grammar__nested_grammar.NESTED_TERMINAL lark-1.2.2/tests/grammars/test_relative_import_of_nested_grammar__nested_grammar.lark000066400000000000000000000000251465673407200315150ustar00rootroot00000000000000NESTED_TERMINAL: "N" lark-1.2.2/tests/grammars/test_unicode.lark000066400000000000000000000000331465673407200207720ustar00rootroot00000000000000UNICODE : /[a-zØ-öø-ÿ]/lark-1.2.2/tests/grammars/three_rules_using_same_token.lark000066400000000000000000000000521465673407200242410ustar00rootroot00000000000000%import common.INT a: A b: A c: A A: "A"lark-1.2.2/tests/test_cache.py000066400000000000000000000137201465673407200163040ustar00rootroot00000000000000from __future__ import absolute_import import logging from unittest import TestCase, main, skipIf from lark import Lark, Tree, Transformer, UnexpectedInput from lark.lexer import Lexer, Token import lark.lark as lark_module from io import BytesIO try: import regex except ImportError: regex = None class MockFile(BytesIO): def close(self): pass def __enter__(self): return self def __exit__(self, *args): pass class MockFS: def __init__(self): self.files = {} def open(self, name, mode="r", **kwargs): if name not in self.files: if "r" in mode: # If we are reading from a file, it should already exist raise FileNotFoundError(name) f = self.files[name] = MockFile() else: f = self.files[name] f.seek(0) return f def exists(self, name): return name in self.files class CustomLexer(Lexer): def __init__(self, lexer_conf): pass def lex(self, data): for obj in data: yield Token('A', obj) class InlineTestT(Transformer): def add(self, children): return sum(children if isinstance(children, list) else children.children) def NUM(self, token): return int(token) def __reduce__(self): raise TypeError("This Transformer should not be pickled.") def append_zero(t): return t.update(value=t.value + '0') class TestCache(TestCase): g = '''start: "a"''' def setUp(self): self.fs = lark_module.FS self.mock_fs = MockFS() lark_module.FS = self.mock_fs def tearDown(self): self.mock_fs.files = {} lark_module.FS = self.fs def test_simple(self): fn = "bla" Lark(self.g, parser='lalr', cache=fn) assert fn in self.mock_fs.files parser = Lark(self.g, parser='lalr', cache=fn) assert parser.parse('a') == Tree('start', []) def test_automatic_naming(self): assert len(self.mock_fs.files) == 0 Lark(self.g, parser='lalr', cache=True) assert len(self.mock_fs.files) == 1 parser = Lark(self.g, parser='lalr', cache=True) assert parser.parse('a') == Tree('start', []) parser = Lark(self.g + ' "b"', parser='lalr', cache=True) assert len(self.mock_fs.files) == 2 assert parser.parse('ab') == Tree('start', []) parser = Lark(self.g, parser='lalr', cache=True) assert parser.parse('a') == Tree('start', []) def test_custom_lexer(self): parser = Lark(self.g, parser='lalr', lexer=CustomLexer, cache=True) parser = Lark(self.g, parser='lalr', lexer=CustomLexer, cache=True) assert len(self.mock_fs.files) == 1 assert parser.parse('a') == Tree('start', []) def test_options(self): # Test options persistence Lark(self.g, parser="lalr", debug=True, cache=True) parser = Lark(self.g, parser="lalr", debug=True, cache=True) assert parser.options.options['debug'] def test_inline(self): # Test inline transformer (tree-less) & lexer_callbacks # Note: the Transformer should not be saved to the file, # and is made unpickable to check for that g = r""" start: add+ add: NUM "+" NUM NUM: /\d+/ %ignore " " """ text = "1+2 3+4" expected = Tree('start', [30, 70]) parser = Lark(g, parser='lalr', transformer=InlineTestT(), cache=True, lexer_callbacks={'NUM': append_zero}) res0 = parser.parse(text) parser = Lark(g, parser='lalr', transformer=InlineTestT(), cache=True, lexer_callbacks={'NUM': append_zero}) assert len(self.mock_fs.files) == 1 res1 = parser.parse(text) res2 = InlineTestT().transform(Lark(g, parser="lalr", cache=True, lexer_callbacks={'NUM': append_zero}).parse(text)) assert res0 == res1 == res2 == expected def test_imports(self): g = """ %import .grammars.ab (startab, expr) """ parser = Lark(g, parser='lalr', start='startab', cache=True, source_path=__file__) assert len(self.mock_fs.files) == 1 parser = Lark(g, parser='lalr', start='startab', cache=True, source_path=__file__) assert len(self.mock_fs.files) == 1 res = parser.parse("ab") self.assertEqual(res, Tree('startab', [Tree('expr', ['a', 'b'])])) @skipIf(regex is None, "'regex' lib not installed") def test_recursive_pattern(self): g = """ start: recursive+ recursive: /\w{3}\d{3}(?R)?/ """ assert len(self.mock_fs.files) == 0 Lark(g, parser="lalr", regex=True, cache=True) assert len(self.mock_fs.files) == 1 with self.assertLogs("lark", level="ERROR") as cm: Lark(g, parser='lalr', regex=True, cache=True) assert len(self.mock_fs.files) == 1 # need to add an error log, because 'self.assertNoLogs' was added in Python 3.10 logging.getLogger('lark').error("dummy message") # should only have the dummy log self.assertCountEqual(cm.output, ["ERROR:lark:dummy message"]) def test_error_message(self): # Checks that error message generation works # This is especially important since sometimes the `str` method fails with # the mysterious "" or similar g = r""" start: add+ add: /\d+/ "+" /\d+/ %ignore " " """ texts = ("1+", "+1", "", "1 1+1") parser1 = Lark(g, parser='lalr', cache=True) parser2 = Lark(g, parser='lalr', cache=True) assert len(self.mock_fs.files) == 1 for text in texts: with self.assertRaises((UnexpectedInput)) as cm1: parser1.parse(text) with self.assertRaises((UnexpectedInput)) as cm2: parser2.parse(text) self.assertEqual(str(cm1.exception), str(cm2.exception)) if __name__ == '__main__': main() lark-1.2.2/tests/test_grammar.py000066400000000000000000000224031465673407200166650ustar00rootroot00000000000000from __future__ import absolute_import import os from unittest import TestCase, main from lark import Lark, Token, Tree, ParseError, UnexpectedInput from lark.load_grammar import GrammarError, GRAMMAR_ERRORS, find_grammar_errors, list_grammar_imports from lark.load_grammar import FromPackageLoader class TestGrammar(TestCase): def setUp(self): pass def test_errors(self): for msg, examples in GRAMMAR_ERRORS: for example in examples: try: p = Lark(example) except GrammarError as e: assert msg in str(e) else: assert False, "example did not raise an error" def test_empty_literal(self): # Issues #888 self.assertRaises(GrammarError, Lark, "start: \"\"") def test_ignore_name(self): spaces = [] p = Lark(""" start: "a" "b" WS: " " %ignore WS """, parser='lalr', lexer_callbacks={'WS': spaces.append}) assert p.parse("a b") == p.parse("a b") assert len(spaces) == 5 def test_override_rule(self): # Overrides the 'sep' template in existing grammar to add an optional terminating delimiter # Thus extending it beyond its original capacity p = Lark(""" %import .test_templates_import (start, sep) %override sep{item, delim}: item (delim item)* delim? %ignore " " """, source_path=__file__) a = p.parse('[1, 2, 3]') b = p.parse('[1, 2, 3, ]') assert a == b self.assertRaises(GrammarError, Lark, """ %import .test_templates_import (start, sep) %override sep{item}: item (delim item)* delim? """, source_path=__file__) self.assertRaises(GrammarError, Lark, """ %override sep{item}: item (delim item)* delim? """, source_path=__file__) def test_override_terminal(self): p = Lark(""" %import .grammars.ab (startab, A, B) %override A: "c" %override B: "d" """, start='startab', source_path=__file__) a = p.parse('cd') self.assertEqual(a.children[0].children, [Token('A', 'c'), Token('B', 'd')]) def test_extend_rule(self): p = Lark(""" %import .grammars.ab (startab, A, B, expr) %extend expr: B A """, start='startab', source_path=__file__) a = p.parse('abab') self.assertEqual(a.children[0].children, ['a', Tree('expr', ['b', 'a']), 'b']) self.assertRaises(GrammarError, Lark, """ %extend expr: B A """) def test_extend_term(self): p = Lark(""" %import .grammars.ab (startab, A, B, expr) %extend A: "c" """, start='startab', source_path=__file__) a = p.parse('acbb') self.assertEqual(a.children[0].children, ['a', Tree('expr', ['c', 'b']), 'b']) def test_extend_twice(self): p = Lark(""" start: x+ x: "a" %extend x: "b" %extend x: "c" """) assert p.parse("abccbba") == p.parse("cbabbbb") def test_undefined_ignore(self): g = """!start: "A" %ignore B """ self.assertRaises( GrammarError, Lark, g) g = """!start: "A" %ignore start """ self.assertRaises( GrammarError, Lark, g) def test_alias_in_terminal(self): g = """start: TERM TERM: "a" -> alias """ self.assertRaises( GrammarError, Lark, g) def test_undefined_rule(self): self.assertRaises(GrammarError, Lark, """start: a""") def test_undefined_term(self): self.assertRaises(GrammarError, Lark, """start: A""") def test_token_multiline_only_works_with_x_flag(self): g = r"""start: ABC ABC: / a b c d e f /i """ self.assertRaises( GrammarError, Lark, g) def test_import_custom_sources(self): custom_loader = FromPackageLoader(__name__, ('grammars', )) grammar = """ start: startab %import ab.startab """ p = Lark(grammar, import_paths=[custom_loader]) self.assertEqual(p.parse('ab'), Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) def test_import_custom_sources2(self): custom_loader = FromPackageLoader(__name__, ('grammars', )) grammar = """ start: rule_to_import %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import """ p = Lark(grammar, import_paths=[custom_loader]) x = p.parse('N') self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) def test_import_custom_sources3(self): custom_loader2 = FromPackageLoader(__name__) grammar = """ %import .test_relative_import (start, WS) %ignore WS """ p = Lark(grammar, import_paths=[custom_loader2], source_path=__file__) # import relative to current file x = p.parse('12 capybaras') self.assertEqual(x.children, ['12', 'capybaras']) def test_find_grammar_errors(self): text = """ a: rule b rule c: rule B.: "hello" f D: "okay" """ assert [e.line for e, _s in find_grammar_errors(text)] == [3, 5] text = """ a: rule b rule | ok c: rule B.: "hello" f D: "okay" """ assert [e.line for e, _s in find_grammar_errors(text)] == [3, 4, 6] text = """ a: rule @#$#@$@&& b: rule | ok c: rule B: "hello" f @ D: "okay" """ x = find_grammar_errors(text) assert [e.line for e, _s in find_grammar_errors(text)] == [2, 6] def test_ranged_repeat_terms(self): g = u"""!start: AAA AAA: "A"~3 """ l = Lark(g, parser='lalr') self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"])) self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA') self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA') g = u"""!start: AABB CC AABB: "A"~0..2 "B"~2 CC: "C"~1..2 """ l = Lark(g, parser='lalr') self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC'])) self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C'])) self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC'])) self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB') self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB') self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB') self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB') def test_ranged_repeat_large(self): g = u"""!start: "A"~60 """ l = Lark(g, parser='lalr') self.assertGreater(len(l.rules), 1, "Expected that more than one rule will be generated") self.assertEqual(l.parse(u'A' * 60), Tree('start', ["A"] * 60)) self.assertRaises(ParseError, l.parse, u'A' * 59) self.assertRaises((ParseError, UnexpectedInput), l.parse, u'A' * 61) g = u"""!start: "A"~15..100 """ l = Lark(g, parser='lalr') for i in range(0, 110): if 15 <= i <= 100: self.assertEqual(l.parse(u'A' * i), Tree('start', ['A']*i)) else: self.assertRaises(UnexpectedInput, l.parse, u'A' * i) # 8191 is a Mersenne prime g = u"""start: "A"~8191 """ l = Lark(g, parser='lalr') self.assertEqual(l.parse(u'A' * 8191), Tree('start', [])) self.assertRaises(UnexpectedInput, l.parse, u'A' * 8190) self.assertRaises(UnexpectedInput, l.parse, u'A' * 8192) def test_large_terminal(self): g = "start: NUMBERS\n" g += "NUMBERS: " + '|'.join('"%s"' % i for i in range(0, 1000)) l = Lark(g, parser='lalr') for i in (0, 9, 99, 999): self.assertEqual(l.parse(str(i)), Tree('start', [str(i)])) for i in (-1, 1000): self.assertRaises(UnexpectedInput, l.parse, str(i)) def test_list_grammar_imports(self): grammar = """ %import .test_templates_import (start, sep) %override sep{item, delim}: item (delim item)* delim? %ignore " " """ imports = list_grammar_imports(grammar, [os.path.dirname(__file__)]) self.assertEqual({os.path.split(i)[-1] for i in imports}, {'test_templates_import.lark', 'templates.lark'}) imports = list_grammar_imports('%import common.WS', []) assert len(imports) == 1 and imports[0].pkg_name == 'lark' def test_inline_with_expand_single(self): grammar = r""" start: _a !?_a: "A" """ self.assertRaises(GrammarError, Lark, grammar) def test_line_breaks(self): p = Lark(r"""start: "a" \ "b" """) p.parse('ab') if __name__ == '__main__': main() lark-1.2.2/tests/test_lexer.py000066400000000000000000000007231465673407200163570ustar00rootroot00000000000000from unittest import TestCase, main from lark import Lark, Tree class TestLexer(TestCase): def setUp(self): pass def test_basic(self): p = Lark(""" start: "a" "b" "c" "d" %ignore " " """) res = list(p.lex("abc cba dd")) assert res == list('abccbadd') res = list(p.lex("abc cba dd", dont_ignore=True)) assert res == list('abc cba dd') if __name__ == '__main__': main() lark-1.2.2/tests/test_logger.py000066400000000000000000000051741465673407200165240ustar00rootroot00000000000000import logging from contextlib import contextmanager from lark import Lark, logger from unittest import TestCase, main, skipIf from io import StringIO try: import interegular except ImportError: interegular = None @contextmanager def capture_log(): stream = StringIO() orig_handler = logger.handlers[0] del logger.handlers[:] logger.addHandler(logging.StreamHandler(stream)) yield stream del logger.handlers[:] logger.addHandler(orig_handler) class Testlogger(TestCase): def test_debug(self): logger.setLevel(logging.DEBUG) collision_grammar = ''' start: as as as: a* a: "a" ''' with capture_log() as log: Lark(collision_grammar, parser='lalr', debug=True) log = log.getvalue() # since there are conflicts about A # symbol A should appear in the log message for hint self.assertIn("A", log) def test_non_debug(self): logger.setLevel(logging.WARNING) collision_grammar = ''' start: as as as: a* a: "a" ''' with capture_log() as log: Lark(collision_grammar, parser='lalr', debug=False) log = log.getvalue() # no log message self.assertEqual(log, "") def test_loglevel_higher(self): logger.setLevel(logging.ERROR) collision_grammar = ''' start: as as as: a* a: "a" ''' with capture_log() as log: Lark(collision_grammar, parser='lalr', debug=True) log = log.getvalue() # no log message self.assertEqual(len(log), 0) @skipIf(interegular is None, "interegular is not installed, can't test regex collisions") def test_regex_collision(self): logger.setLevel(logging.WARNING) collision_grammar = ''' start: A | B A: /a+/ B: /(a|b)+/ ''' with capture_log() as log: Lark(collision_grammar, parser='lalr') log = log.getvalue() # since there are conflicts between A and B # symbols A and B should appear in the log message self.assertIn("A", log) self.assertIn("B", log) @skipIf(interegular is None, "interegular is not installed, can't test regex collisions") def test_no_regex_collision(self): logger.setLevel(logging.WARNING) collision_grammar = ''' start: A " " B A: /a+/ B: /(a|b)+/ ''' with capture_log() as log: Lark(collision_grammar, parser='lalr') log = log.getvalue() self.assertEqual(log, "") if __name__ == '__main__': main() lark-1.2.2/tests/test_nearley/000077500000000000000000000000001465673407200163235ustar00rootroot00000000000000lark-1.2.2/tests/test_nearley/__init__.py000066400000000000000000000000001465673407200204220ustar00rootroot00000000000000lark-1.2.2/tests/test_nearley/grammars/000077500000000000000000000000001465673407200201345ustar00rootroot00000000000000lark-1.2.2/tests/test_nearley/grammars/include_unicode.ne000066400000000000000000000000411465673407200236040ustar00rootroot00000000000000@include "unicode.ne" main -> x lark-1.2.2/tests/test_nearley/grammars/unicode.ne000066400000000000000000000000131465673407200221000ustar00rootroot00000000000000x -> "±a" lark-1.2.2/tests/test_nearley/nearley/000077500000000000000000000000001465673407200177625ustar00rootroot00000000000000lark-1.2.2/tests/test_nearley/test_nearley.py000066400000000000000000000062561465673407200214040ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import import unittest import logging import os import codecs from lark import logger from lark.tools.nearley import create_code_for_nearley_grammar, main as nearley_tool_main logger.setLevel(logging.INFO) TEST_PATH = os.path.abspath(os.path.dirname(__file__)) NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley') BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin') if not os.path.exists(BUILTIN_PATH): logger.warning("Nearley not included. Skipping Nearley tests! (use git submodule to add)") raise ImportError("Skipping Nearley tests!") try: import js2py # Ensures that js2py exists, to avoid failing tests except RuntimeError as e: if "python version" in str(e): raise ImportError("js2py does not support this python version") raise class TestNearley(unittest.TestCase): def test_css(self): fn = os.path.join(NEARLEY_PATH, 'examples/csscolor.ne') with open(fn) as f: grammar = f.read() code = create_code_for_nearley_grammar(grammar, 'csscolor', BUILTIN_PATH, os.path.dirname(fn)) d = {} exec (code, d) parse = d['parse'] c = parse('#a199ff') assert c['r'] == 161 assert c['g'] == 153 assert c['b'] == 255 c = parse('rgb(255, 70%, 3)') assert c['r'] == 255 assert c['g'] == 178 assert c['b'] == 3 def test_include(self): fn = os.path.join(NEARLEY_PATH, 'test/grammars/folder-test.ne') with open(fn) as f: grammar = f.read() code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, os.path.dirname(fn)) d = {} exec (code, d) parse = d['parse'] parse('a') parse('b') def test_multi_include(self): fn = os.path.join(NEARLEY_PATH, 'test/grammars/multi-include-test.ne') with open(fn) as f: grammar = f.read() code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, os.path.dirname(fn)) d = {} exec (code, d) parse = d['parse'] parse('a') parse('b') parse('c') def test_utf8(self): grammar = u'main -> "±a"' code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './') d = {} exec (code, d) parse = d['parse'] parse(u'±a') def test_backslash(self): grammar = r'main -> "\""' code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './') d = {} exec (code, d) parse = d['parse'] parse(u'"') def test_null(self): grammar = r'main -> "a" | null' code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './') d = {} exec (code, d) parse = d['parse'] parse('a') parse('') def test_utf8_2(self): fn = os.path.join(TEST_PATH, 'grammars/unicode.ne') nearley_tool_main(fn, 'x', NEARLEY_PATH) def test_include_utf8(self): fn = os.path.join(TEST_PATH, 'grammars/include_unicode.ne') nearley_tool_main(fn, 'main', NEARLEY_PATH) if __name__ == '__main__': unittest.main() lark-1.2.2/tests/test_parser.py000066400000000000000000002650431465673407200165440ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import import re import unittest import os import sys from copy import copy, deepcopy from lark import Token, Transformer_NonRecursive, LexError from io import ( StringIO as uStringIO, BytesIO, open, ) try: import regex except ImportError: regex = None import lark from lark import logger from lark.lark import Lark from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters from lark.tree import Tree from lark.visitors import Transformer, Transformer_InPlace, v_args, Transformer_InPlaceRecursive from lark.lexer import Lexer, BasicLexer from lark.indenter import Indenter __all__ = ['TestParsers'] class TestParsers(unittest.TestCase): def test_big_list(self): Lark(r""" start: {} """.format( "|".join(['"%s"'%i for i in range(250)]) )) def test_same_ast(self): "Tests that Earley and LALR parsers produce equal trees" g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")" name_list: NAME | name_list "," NAME NAME: /\w+/ """, parser='lalr') l = g.parse('(a,b,c,*x)') g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")" name_list: NAME | name_list "," NAME NAME: /\w/+ """) l2 = g.parse('(a,b,c,*x)') assert l == l2, '%s != %s' % (l.pretty(), l2.pretty()) def test_infinite_recurse(self): g = """start: a a: a | "a" """ self.assertRaises(GrammarError, Lark, g, parser='lalr') # TODO: should it? shouldn't it? # l = Lark(g, parser='earley', lexer='dynamic') # self.assertRaises(ParseError, l.parse, 'a') def test_propagate_positions(self): g = Lark("""start: a a: "a" """, propagate_positions=True) r = g.parse('a') self.assertEqual( r.children[0].meta.line, 1 ) g = Lark("""start: x x: a a: "a" """, propagate_positions=True) r = g.parse('a') self.assertEqual( r.children[0].meta.line, 1 ) def test_propagate_positions2(self): g = Lark("""start: a a: b ?b: "(" t ")" !t: "t" """, propagate_positions=True) start = g.parse("(t)") a ,= start.children t ,= a.children assert t.children[0] == "t" assert t.meta.column == 2 assert t.meta.end_column == 3 assert start.meta.column == a.meta.column == 1 assert start.meta.end_column == a.meta.end_column == 4 def test_expand1(self): g = Lark("""start: a ?a: b b: "x" """) r = g.parse('x') self.assertEqual( r.children[0].data, "b" ) g = Lark("""start: a ?a: b -> c b: "x" """) r = g.parse('x') self.assertEqual( r.children[0].data, "c" ) g = Lark("""start: a ?a: B -> c B: "x" """) self.assertEqual( r.children[0].data, "c" ) g = Lark("""start: a ?a: b b -> c b: "x" """) r = g.parse('xx') self.assertEqual( r.children[0].data, "c" ) def test_comment_in_rule_definition(self): g = Lark("""start: a a: "a" // A comment // Another comment | "b" // Still more c: "unrelated" """) r = g.parse('b') self.assertEqual( r.children[0].data, "a" ) def test_visit_tokens(self): class T(Transformer): def a(self, children): return children[0] + "!" def A(self, tok): return tok.update(value=tok.upper()) # Test regular g = """start: a a : A A: "x" """ p = Lark(g, parser='lalr') r = T(False).transform(p.parse("x")) self.assertEqual( r.children, ["x!"] ) r = T().transform(p.parse("x")) self.assertEqual( r.children, ["X!"] ) # Test internal transformer p = Lark(g, parser='lalr', transformer=T()) r = p.parse("x") self.assertEqual( r.children, ["X!"] ) def test_visit_tokens2(self): g = """ start: add+ add: NUM "+" NUM NUM: /\\d+/ %ignore " " """ text = "1+2 3+4" expected = Tree('start', [3, 7]) for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): class T(base): def add(self, children): return sum(children if isinstance(children, list) else children.children) def NUM(self, token): return int(token) parser = Lark(g, parser='lalr', transformer=T()) result = parser.parse(text) self.assertEqual(result, expected) def test_vargs_meta(self): @v_args(meta=True) class T1(Transformer): def a(self, meta, children): assert not children return meta.line def start(self, meta, children): return children @v_args(meta=True, inline=True) class T2(Transformer): def a(self, meta): return meta.line def start(self, meta, *res): return list(res) for T in (T1, T2): for internal in [False, True]: try: g = Lark(r"""start: a+ a : "x" _NL? _NL: /\n/+ """, parser='lalr', transformer=T() if internal else None, propagate_positions=True) except NotImplementedError: assert internal continue res = g.parse("xx\nx\nxxx\n\n\nxx") assert not internal res = T().transform(res) self.assertEqual(res, [1, 1, 2, 3, 3, 3, 6, 6]) def test_vargs_tree(self): tree = Lark(''' start: a a a !a: "A" ''').parse('AAA') tree_copy = deepcopy(tree) @v_args(tree=True) class T(Transformer): def a(self, tree): return 1 def start(self, tree): return tree.children res = T().transform(tree) self.assertEqual(res, [1, 1, 1]) self.assertEqual(tree, tree_copy) def test_embedded_transformer(self): class T(Transformer): def a(self, children): return "" def b(self, children): return "" def c(self, children): return "" # Test regular g = Lark("""start: a a : "x" """, parser='lalr') r = T().transform(g.parse("x")) self.assertEqual( r.children, [""] ) g = Lark("""start: a a : "x" """, parser='lalr', transformer=T()) r = g.parse("x") self.assertEqual( r.children, [""] ) # Test Expand1 g = Lark("""start: a ?a : b b : "x" """, parser='lalr') r = T().transform(g.parse("x")) self.assertEqual( r.children, [""] ) g = Lark("""start: a ?a : b b : "x" """, parser='lalr', transformer=T()) r = g.parse("x") self.assertEqual( r.children, [""] ) # Test Expand1 -> Alias g = Lark("""start: a ?a : b b -> c b : "x" """, parser='lalr') r = T().transform(g.parse("xx")) self.assertEqual( r.children, [""] ) g = Lark("""start: a ?a : b b -> c b : "x" """, parser='lalr', transformer=T()) r = g.parse("xx") self.assertEqual( r.children, [""] ) def test_embedded_transformer_inplace(self): @v_args(tree=True) class T1(Transformer_InPlace): def a(self, tree): assert isinstance(tree, Tree), tree tree.children.append("tested") return tree def b(self, tree): return Tree(tree.data, tree.children + ['tested2']) @v_args(tree=True) class T2(Transformer): def a(self, tree): assert isinstance(tree, Tree), tree tree.children.append("tested") return tree def b(self, tree): return Tree(tree.data, tree.children + ['tested2']) class T3(Transformer): @v_args(tree=True) def a(self, tree): assert isinstance(tree, Tree) tree.children.append("tested") return tree @v_args(tree=True) def b(self, tree): return Tree(tree.data, tree.children + ['tested2']) for t in [T1(), T2(), T3()]: for internal in [False, True]: g = Lark("""start: a b a : "x" b : "y" """, parser='lalr', transformer=t if internal else None) r = g.parse("xy") if not internal: r = t.transform(r) a, b = r.children self.assertEqual(a.children, ["tested"]) self.assertEqual(b.children, ["tested2"]) def test_alias(self): Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """) def test_backwards_custom_lexer(self): class OldCustomLexer(Lexer): def __init__(self, lexer_conf): pass def lex(self, text): yield Token('A', 'A') p = Lark(""" start: A %declare A """, parser='lalr', lexer=OldCustomLexer) r = p.parse('') self.assertEqual(r, Tree('start', [Token('A', 'A')])) def test_lexer_token_limit(self): "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation" tokens = {'A%d'%i:'"%d"'%i for i in range(300)} g = """start: %s %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())) p = Lark(g, parser='lalr') def _make_full_earley_test(LEXER): def _Lark(grammar, **kwargs): return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs) class _TestFullEarley(unittest.TestCase): def test_anon(self): # Fails an Earley implementation without special handling for empty rules, # or re-processing of already completed rules. g = Lark(r"""start: B B: ("ab"|/[^b]/)+ """, lexer=LEXER) self.assertEqual( g.parse('abc').children[0], 'abc') @unittest.skipIf(LEXER=='basic', "Requires dynamic lexer") def test_earley(self): g = Lark("""start: A "b" c A: "a"+ c: "abc" """, parser="earley", lexer=LEXER) x = g.parse('aaaababc') def test_earley2(self): grammar = """ start: statement+ statement: "r" | "c" /[a-z]/+ %ignore " " """ program = """c b r""" l = Lark(grammar, parser='earley', lexer=LEXER) l.parse(program) @unittest.skipIf(LEXER != 'dynamic_complete', "Only relevant for the dynamic_complete parser") def test_earley3(self): """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result) By default, `+` should imitate regexp greedy-matching """ grammar = """ start: A A A: "a"+ """ l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") self.assertEqual(set(res.children), {'aa', 'a'}) # XXX TODO fix Earley to maintain correct order # i.e. terminals it imitate greedy search for terminals, but lazy search for rules # self.assertEqual(res.children, ['aa', 'a']) def test_earley4(self): grammar = """ start: A A? A: "a"+ """ l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") assert set(res.children) == {'aa', 'a'} or res.children == ['aaa'] # XXX TODO fix Earley to maintain correct order # i.e. terminals it imitate greedy search for terminals, but lazy search for rules # self.assertEqual(res.children, ['aaa']) def test_earley_repeating_empty(self): # This was a sneaky bug! grammar = """ !start: "a" empty empty "b" empty: empty2 empty2: """ parser = Lark(grammar, parser='earley', lexer=LEXER) res = parser.parse('ab') empty_tree = Tree('empty', [Tree('empty2', [])]) self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b']) @unittest.skipIf(LEXER=='basic', "Requires dynamic lexer") def test_earley_explicit_ambiguity(self): # This was a sneaky bug! grammar = """ start: a b | ab a: "a" b: "b" ab: "ab" """ parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit') ambig_tree = parser.parse('ab') self.assertEqual( ambig_tree.data, '_ambig') self.assertEqual( len(ambig_tree.children), 2) @unittest.skipIf(LEXER=='basic', "Requires dynamic lexer") def test_ambiguity1(self): grammar = """ start: cd+ "e" !cd: "c" | "d" | "cd" """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) ambig_tree = l.parse('cde') assert ambig_tree.data == '_ambig', ambig_tree assert len(ambig_tree.children) == 2 @unittest.skipIf(LEXER=='basic', "Requires dynamic lexer") def test_ambiguity2(self): grammar = """ ANY: /[a-zA-Z0-9 ]+/ a.2: "A" b+ b.2: "B" c: ANY start: (a|c)* """ l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse('ABX') expected = Tree('start', [ Tree('a', [ Tree('b', []) ]), Tree('c', [ 'X' ]) ]) self.assertEqual(res, expected) def test_ambiguous_inlined_rule(self): grammar = """ start: _field+ _field: f1 | f2 | f3 f1: INT f2: INT "M"? f3: INT "M" %import common.INT """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) ambig_tree = l.parse("1M2") expected = { Tree('start', [Tree('f2', ['1']), Tree('f1', ['2'])]), Tree('start', [Tree('f2', ['1']), Tree('f2', ['2'])]), Tree('start', [Tree('f3', ['1']), Tree('f1', ['2'])]), Tree('start', [Tree('f3', ['1']), Tree('f2', ['2'])]), } self.assertEqual(ambig_tree.data, '_ambig') self.assertEqual(set(ambig_tree.children), expected) def test_ambiguous_intermediate_node(self): grammar = """ start: ab bc d? !ab: "A" "B"? !bc: "B"? "C" !d: "D" """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) ambig_tree = l.parse("ABCD") expected = { Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]), Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C']), Tree('d', ['D'])]) } self.assertEqual(ambig_tree.data, '_ambig') self.assertEqual(set(ambig_tree.children), expected) def test_ambiguous_symbol_and_intermediate_nodes(self): grammar = """ start: ab bc cd !ab: "A" "B"? !bc: "B"? "C"? !cd: "C"? "D" """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) ambig_tree = l.parse("ABCD") expected = { Tree('start', [ Tree('ab', ['A', 'B']), Tree('bc', ['C']), Tree('cd', ['D']) ]), Tree('start', [ Tree('ab', ['A', 'B']), Tree('bc', []), Tree('cd', ['C', 'D']) ]), Tree('start', [ Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('cd', ['D']) ]), Tree('start', [ Tree('ab', ['A']), Tree('bc', ['B']), Tree('cd', ['C', 'D']) ]), } self.assertEqual(ambig_tree.data, '_ambig') self.assertEqual(set(ambig_tree.children), expected) def test_nested_ambiguous_intermediate_nodes(self): grammar = """ start: ab bc cd e? !ab: "A" "B"? !bc: "B"? "C"? !cd: "C"? "D" !e: "E" """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) ambig_tree = l.parse("ABCDE") expected = { Tree('start', [ Tree('ab', ['A', 'B']), Tree('bc', ['C']), Tree('cd', ['D']), Tree('e', ['E']) ]), Tree('start', [ Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('cd', ['D']), Tree('e', ['E']) ]), Tree('start', [ Tree('ab', ['A']), Tree('bc', ['B']), Tree('cd', ['C', 'D']), Tree('e', ['E']) ]), Tree('start', [ Tree('ab', ['A', 'B']), Tree('bc', []), Tree('cd', ['C', 'D']), Tree('e', ['E']) ]), } self.assertEqual(ambig_tree.data, '_ambig') self.assertEqual(set(ambig_tree.children), expected) def test_nested_ambiguous_intermediate_nodes2(self): grammar = """ start: ab bc cd de f !ab: "A" "B"? !bc: "B"? "C"? !cd: "C"? "D"? !de: "D"? "E" !f: "F" """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) ambig_tree = l.parse("ABCDEF") expected = { Tree('start', [ Tree('ab', ['A', 'B']), Tree('bc', ['C']), Tree('cd', ['D']), Tree('de', ['E']), Tree('f', ['F']), ]), Tree('start', [ Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('cd', ['D']), Tree('de', ['E']), Tree('f', ['F']), ]), Tree('start', [ Tree('ab', ['A']), Tree('bc', ['B']), Tree('cd', ['C', 'D']), Tree('de', ['E']), Tree('f', ['F']), ]), Tree('start', [ Tree('ab', ['A']), Tree('bc', ['B']), Tree('cd', ['C']), Tree('de', ['D', 'E']), Tree('f', ['F']), ]), Tree('start', [ Tree('ab', ['A', "B"]), Tree('bc', []), Tree('cd', ['C']), Tree('de', ['D', 'E']), Tree('f', ['F']), ]), Tree('start', [ Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('cd', []), Tree('de', ['D', 'E']), Tree('f', ['F']), ]), Tree('start', [ Tree('ab', ['A', 'B']), Tree('bc', []), Tree('cd', ['C', 'D']), Tree('de', ['E']), Tree('f', ['F']), ]), Tree('start', [ Tree('ab', ['A', 'B']), Tree('bc', ['C']), Tree('cd', []), Tree('de', ['D', 'E']), Tree('f', ['F']), ]), } self.assertEqual(ambig_tree.data, '_ambig') self.assertEqual(set(ambig_tree.children), expected) def test_ambiguous_intermediate_node_unnamed_token(self): grammar = """ start: ab bc "D" !ab: "A" "B"? !bc: "B"? "C" """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) ambig_tree = l.parse("ABCD") expected = { Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C'])]), Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C'])]) } self.assertEqual(ambig_tree.data, '_ambig') self.assertEqual(set(ambig_tree.children), expected) def test_ambiguous_intermediate_node_inlined_rule(self): grammar = """ start: ab _bc d? !ab: "A" "B"? _bc: "B"? "C" !d: "D" """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) ambig_tree = l.parse("ABCD") expected = { Tree('start', [Tree('ab', ['A']), Tree('d', ['D'])]), Tree('start', [Tree('ab', ['A', 'B']), Tree('d', ['D'])]) } self.assertEqual(ambig_tree.data, '_ambig') self.assertEqual(set(ambig_tree.children), expected) def test_ambiguous_intermediate_node_conditionally_inlined_rule(self): grammar = """ start: ab bc d? !ab: "A" "B"? !?bc: "B"? "C" !d: "D" """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) ambig_tree = l.parse("ABCD") expected = { Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]), Tree('start', [Tree('ab', ['A', 'B']), 'C', Tree('d', ['D'])]) } self.assertEqual(ambig_tree.data, '_ambig') self.assertEqual(set(ambig_tree.children), expected) @unittest.skipIf(LEXER=='basic', "Requires dynamic lexer") def test_fruitflies_ambig(self): grammar = """ start: noun verb noun -> simple | noun verb "like" noun -> comparative noun: adj? NOUN verb: VERB adj: ADJ NOUN: "flies" | "bananas" | "fruit" VERB: "like" | "flies" ADJ: "fruit" %import common.WS %ignore WS """ parser = Lark(grammar, ambiguity='explicit', lexer=LEXER) tree = parser.parse('fruit flies like bananas') expected = Tree('_ambig', [ Tree('comparative', [ Tree('noun', ['fruit']), Tree('verb', ['flies']), Tree('noun', ['bananas']) ]), Tree('simple', [ Tree('noun', [Tree('adj', ['fruit']), 'flies']), Tree('verb', ['like']), Tree('noun', ['bananas']) ]) ]) # self.assertEqual(tree, expected) self.assertEqual(tree.data, expected.data) self.assertEqual(set(tree.children), set(expected.children)) @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser") def test_explicit_ambiguity2(self): grammar = r""" start: NAME+ NAME: /\w+/ %ignore " " """ text = """cat""" parser = _Lark(grammar, start='start', ambiguity='explicit') tree = parser.parse(text) self.assertEqual(tree.data, '_ambig') combinations = {tuple(str(s) for s in t.children) for t in tree.children} self.assertEqual(combinations, { ('cat',), ('ca', 't'), ('c', 'at'), ('c', 'a' ,'t') }) def test_term_ambig_resolve(self): grammar = r""" !start: NAME+ NAME: /\w+/ %ignore " " """ text = """foo bar""" parser = Lark(grammar) tree = parser.parse(text) self.assertEqual(tree.children, ['foo', 'bar']) def test_multiple_start_solutions(self): grammar = r""" !start: a | A !a: A A: "x" """ l = Lark(grammar, ambiguity='explicit', lexer=LEXER) tree = l.parse('x') expected = Tree('_ambig', [ Tree('start', ['x']), Tree('start', [Tree('a', ['x'])])] ) self.assertEqual(tree, expected) l = Lark(grammar, ambiguity='resolve', lexer=LEXER) tree = l.parse('x') assert tree == Tree('start', ['x']) def test_cycle(self): grammar = """ start: start? """ l = Lark(grammar, ambiguity='resolve', lexer=LEXER) tree = l.parse('') self.assertEqual(tree, Tree('start', [])) l = Lark(grammar, ambiguity='explicit', lexer=LEXER) tree = l.parse('') self.assertEqual(tree, Tree('start', [])) def test_cycle2(self): grammar = """ start: _recurse _recurse: v v: "b" | "a" v | _recurse """ l = Lark(grammar, ambiguity="explicit", lexer=LEXER) tree = l.parse("ab") expected = ( Tree('start', [ Tree('_ambig', [ Tree('v', [Tree('v', [])]), Tree('v', [Tree('v', [Tree('v', [])])]) ]) ]) ) self.assertEqual(tree, expected) def test_cycles(self): grammar = """ a: b b: c* c: a """ l = Lark(grammar, start='a', ambiguity='resolve', lexer=LEXER) tree = l.parse('') self.assertEqual(tree, Tree('a', [Tree('b', [])])) l = Lark(grammar, start='a', ambiguity='explicit', lexer=LEXER) tree = l.parse('') self.assertEqual(tree, Tree('a', [Tree('b', [])])) def test_many_cycles(self): grammar = """ start: a? | start start !a: "a" """ l = Lark(grammar, ambiguity='resolve', lexer=LEXER) tree = l.parse('a') self.assertEqual(tree, Tree('start', [Tree('a', ['a'])])) l = Lark(grammar, ambiguity='explicit', lexer=LEXER) tree = l.parse('a') self.assertEqual(tree, Tree('start', [Tree('a', ['a'])])) def test_cycles_with_child_filter(self): grammar = """ a: _x _x: _x? b b: """ grammar2 = """ a: x x: x? b b: """ l = Lark(grammar, start='a', ambiguity='resolve', lexer=LEXER) tree = l.parse('') self.assertEqual(tree, Tree('a', [Tree('b', [])])) l = Lark(grammar, start='a', ambiguity='explicit', lexer=LEXER) tree = l.parse(''); self.assertEqual(tree, Tree('a', [Tree('b', [])])) l = Lark(grammar2, start='a', ambiguity='resolve', lexer=LEXER) tree = l.parse(''); self.assertEqual(tree, Tree('a', [Tree('x', [Tree('b', [])])])) l = Lark(grammar2, start='a', ambiguity='explicit', lexer=LEXER) tree = l.parse(''); self.assertEqual(tree, Tree('a', [Tree('x', [Tree('b', [])])])) @unittest.skipIf(LEXER=='basic', "start/end values work differently for the basic lexer") def test_symbol_node_start_end_dynamic_lexer(self): grammar = """ start: "ABC" """ l = Lark(grammar, ambiguity='forest', lexer=LEXER) node = l.parse('ABC') self.assertEqual(node.start, 0) self.assertEqual(node.end, 3) grammar2 = """ start: abc abc: "ABC" """ l = Lark(grammar2, ambiguity='forest', lexer=LEXER) node = l.parse('ABC') self.assertEqual(node.start, 0) self.assertEqual(node.end, 3) def test_resolve_ambiguity_with_shared_node(self): grammar = """ start: (a+)* !a.1: "A" | """ l = Lark(grammar, ambiguity='resolve', lexer=LEXER) tree = l.parse("A") self.assertEqual(tree, Tree('start', [Tree('a', []), Tree('a', []), Tree('a', ['A'])])) def test_resolve_ambiguity_with_shared_node2(self): grammar = """ start: _s x _s x: "X"? _s: " "? """ l = Lark(grammar, ambiguity='resolve', lexer=LEXER) tree = l.parse("") self.assertEqual(tree, Tree('start', [Tree('x', [])])) def test_consistent_derivation_order1(self): # Should return the same result for any hash-seed parser = Lark(''' start: a a a: "." | b b: "." ''', lexer=LEXER) tree = parser.parse('..') n = Tree('a', [Tree('b', [])]) assert tree == Tree('start', [n, n]) _NAME = "TestFullEarley" + LEXER.capitalize() _TestFullEarley.__name__ = _NAME globals()[_NAME] = _TestFullEarley __all__.append(_NAME) class CustomLexerNew(Lexer): """ Purpose of this custom lexer is to test the integration, so it uses the traditionalparser as implementation without custom lexing behaviour. """ def __init__(self, lexer_conf): self.lexer = BasicLexer(copy(lexer_conf)) def lex(self, lexer_state, parser_state): return self.lexer.lex(lexer_state, parser_state) __future_interface__ = True class CustomLexerOld(Lexer): """ Purpose of this custom lexer is to test the integration, so it uses the traditionalparser as implementation without custom lexing behaviour. """ def __init__(self, lexer_conf): self.lexer = BasicLexer(copy(lexer_conf)) def lex(self, text): ls = self.lexer.make_lexer_state(text) return self.lexer.lex(ls, None) __future_interface__ = False def _tree_structure_check(a, b): """ Checks that both Tree objects have the same structure, without checking their values. """ assert a.data == b.data and len(a.children) == len(b.children) for ca,cb in zip(a.children, b.children): assert type(ca) == type(cb) if isinstance(ca, Tree): _tree_structure_check(ca, cb) elif isinstance(ca, Token): assert ca.type == cb.type else: assert ca == cb class DualBytesLark: """ A helper class that wraps both a normal parser, and a parser for bytes. It automatically transforms `.parse` calls for both lexer, returning the value from the text lexer It always checks that both produce the same output/error NOTE: Not currently used, but left here for future debugging. """ def __init__(self, g, *args, **kwargs): self.text_lexer = Lark(g, *args, use_bytes=False, **kwargs) g = self.text_lexer.grammar_source.lower() if '\\u' in g or not g.isascii(): # Bytes re can't deal with uniode escapes self.bytes_lark = None else: # Everything here should work, so use `use_bytes='force'` self.bytes_lark = Lark(self.text_lexer.grammar_source, *args, use_bytes='force', **kwargs) def parse(self, text, start=None): # TODO: Easy workaround, more complex checks would be beneficial if not text.isascii() or self.bytes_lark is None: return self.text_lexer.parse(text, start) try: rv = self.text_lexer.parse(text, start) except Exception as e: try: self.bytes_lark.parse(text.encode(), start) except Exception as be: assert type(e) == type(be), "Parser with and without `use_bytes` raise different exceptions" raise e assert False, "Parser without `use_bytes` raises exception, with doesn't" try: bv = self.bytes_lark.parse(text.encode(), start) except Exception as be: assert False, "Parser without `use_bytes` doesn't raise an exception, with does" _tree_structure_check(rv, bv) return rv @classmethod def open(cls, grammar_filename, rel_to=None, **options): if rel_to: basepath = os.path.dirname(rel_to) grammar_filename = os.path.join(basepath, grammar_filename) with open(grammar_filename, encoding='utf8') as f: return cls(f, **options) def save(self,f): self.text_lexer.save(f) if self.bytes_lark is not None: self.bytes_lark.save(f) def load(self,f): self.text_lexer = self.text_lexer.load(f) if self.bytes_lark is not None: self.bytes_lark.load(f) def _make_parser_test(LEXER, PARSER): lexer_class_or_name = { 'custom_new': CustomLexerNew, 'custom_old': CustomLexerOld, }.get(LEXER, LEXER) def _Lark(grammar, **kwargs): return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs) def _Lark_open(gfilename, **kwargs): return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs) if (LEXER, PARSER) == ('basic', 'earley'): # Check that the `lark.lark` grammar represents can parse every example used in these tests. # basic-Earley was an arbitrary choice, to make sure it only ran once. lalr_parser = Lark.open(os.path.join(os.path.dirname(lark.__file__), 'grammars/lark.lark'), parser='lalr') def wrap_with_test_grammar(f): def _f(x, **kwargs): inst = f(x, **kwargs) lalr_parser.parse(inst.source_grammar) # Test after instance creation. When the grammar should fail, don't test it. return inst return _f _Lark = wrap_with_test_grammar(_Lark) _Lark_open = wrap_with_test_grammar(_Lark_open) class _TestParser(unittest.TestCase): def test_basic1(self): g = _Lark("""start: a+ b a* "b" a* b: "b" a: "a" """) r = g.parse('aaabaab') self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' ) r = g.parse('aaabaaba') self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' ) self.assertRaises(ParseError, g.parse, 'aaabaa') def test_basic2(self): # Multiple parsers and colliding tokens g = _Lark("""start: B A B: "12" A: "1" """) g2 = _Lark("""start: B A B: "12" A: "2" """) x = g.parse('121') assert x.data == 'start' and x.children == ['12', '1'], x x = g2.parse('122') assert x.data == 'start' and x.children == ['12', '2'], x def test_stringio_unicode(self): """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object""" _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" ')) def test_unicode(self): g = _Lark(u"""start: UNIA UNIB UNIA UNIA: /\xa3/ UNIB: /\u0101/ """) g.parse(u'\xa3\u0101\u00a3') def test_unicode2(self): g = _Lark(r"""start: UNIA UNIB UNIA UNIC UNIA: /\xa3/ UNIB: "a\u0101b\ " UNIC: /a?\u0101c\n/ """) g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n') def test_unicode3(self): g = _Lark(r"""start: UNIA UNIB UNIA UNIC UNIA: /\xa3/ UNIB: "\u0101" UNIC: /\u0203/ /\n/ """) g.parse(u'\xa3\u0101\u00a3\u0203\n') def test_unicode4(self): g = _Lark(r"""start: UNIA UNIB UNIA UNIC UNIA: /\xa3/ UNIB: "\U0010FFFF" UNIC: /\U00100000/ /\n/ """) g.parse(u'\xa3\U0010FFFF\u00a3\U00100000\n') def test_hex_escape(self): g = _Lark(r"""start: A B C A: "\x01" B: /\x02/ C: "\xABCD" """) g.parse('\x01\x02\xABCD') def test_unicode_literal_range_escape(self): g = _Lark(r"""start: A+ A: "\u0061".."\u0063" """) g.parse('abc') def test_unicode_literal_range_escape2(self): g = _Lark(r"""start: A+ A: "\U0000FFFF".."\U00010002" """) g.parse('\U0000FFFF\U00010000\U00010001\U00010002') def test_hex_literal_range_escape(self): g = _Lark(r"""start: A+ A: "\x01".."\x03" """) g.parse('\x01\x02\x03') def test_bytes_utf8(self): g = r""" start: BOM? char+ BOM: "\xef\xbb\xbf" char: CHAR1 | CHAR2 | CHAR3 | CHAR4 CONTINUATION_BYTE: "\x80" .. "\xbf" CHAR1: "\x00" .. "\x7f" CHAR2: "\xc0" .. "\xdf" CONTINUATION_BYTE CHAR3: "\xe0" .. "\xef" CONTINUATION_BYTE CONTINUATION_BYTE CHAR4: "\xf0" .. "\xf7" CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE """ g = _Lark(g, use_bytes=True) s = u"🔣 地? gurīn".encode('utf-8') self.assertEqual(len(g.parse(s).children), 10) for enc, j in [("sjis", u"地球の絵はグリーンでグッド? Chikyuu no e wa guriin de guddo"), ("sjis", u"売春婦"), ("euc-jp", u"乂鵬鵠")]: s = j.encode(enc) self.assertRaises(UnexpectedCharacters, g.parse, s) @unittest.skipIf(PARSER == 'cyk', "Takes forever") def test_stack_for_ebnf(self): """Verify that stack depth isn't an issue for EBNF grammars""" g = _Lark(r"""start: a+ a : "a" """) g.parse("a" * (sys.getrecursionlimit()*2 )) def test_expand1_lists_with_one_item(self): g = _Lark(r"""start: list ?list: item+ item : A A: "a" """) r = g.parse("a") # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item' self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',)) # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule self.assertEqual(len(r.children), 1) def test_expand1_lists_with_one_item_2(self): g = _Lark(r"""start: list ?list: item+ "!" item : A A: "a" """) r = g.parse("a!") # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item' self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',)) # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule self.assertEqual(len(r.children), 1) def test_dont_expand1_lists_with_multiple_items(self): g = _Lark(r"""start: list ?list: item+ item : A A: "a" """) r = g.parse("aa") # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule self.assertEqual(len(r.children), 1) # Sanity check: verify that 'list' contains the two 'item's we've given it [list] = r.children self.assertSequenceEqual([item.data for item in list.children], ('item', 'item')) def test_dont_expand1_lists_with_multiple_items_2(self): g = _Lark(r"""start: list ?list: item+ "!" item : A A: "a" """) r = g.parse("aa!") # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule self.assertEqual(len(r.children), 1) # Sanity check: verify that 'list' contains the two 'item's we've given it [list] = r.children self.assertSequenceEqual([item.data for item in list.children], ('item', 'item')) @unittest.skipIf(PARSER == 'cyk', "No empty rules") def test_empty_expand1_list(self): g = _Lark(r"""start: list ?list: item* item : A A: "a" """) r = g.parse("") # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule self.assertEqual(len(r.children), 1) # Sanity check: verify that 'list' contains no 'item's as we've given it none [list] = r.children self.assertSequenceEqual([item.data for item in list.children], ()) @unittest.skipIf(PARSER == 'cyk', "No empty rules") def test_empty_expand1_list_2(self): g = _Lark(r"""start: list ?list: item* "!"? item : A A: "a" """) r = g.parse("") # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule self.assertEqual(len(r.children), 1) # Sanity check: verify that 'list' contains no 'item's as we've given it none [list] = r.children self.assertSequenceEqual([item.data for item in list.children], ()) @unittest.skipIf(PARSER == 'cyk', "No empty rules") def test_empty_flatten_list(self): g = _Lark(r"""start: list list: | item "," list item : A A: "a" """) r = g.parse("") # Because 'list' is a flatten rule it's top-level element should *never* be expanded self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) # Sanity check: verify that 'list' contains no 'item's as we've given it none [list] = r.children self.assertSequenceEqual([item.data for item in list.children], ()) def test_token_collision(self): g = _Lark(r"""start: "Hello" NAME NAME: /\w/+ %ignore " " """) x = g.parse('Hello World') self.assertSequenceEqual(x.children, ['World']) x = g.parse('Hello HelloWorld') self.assertSequenceEqual(x.children, ['HelloWorld']) def test_token_collision_WS(self): g = _Lark(r"""start: "Hello" NAME NAME: /\w/+ %import common.WS %ignore WS """) x = g.parse('Hello World') self.assertSequenceEqual(x.children, ['World']) x = g.parse('Hello HelloWorld') self.assertSequenceEqual(x.children, ['HelloWorld']) def test_token_collision2(self): g = _Lark(""" !start: "starts" %import common.LCASE_LETTER """) x = g.parse("starts") self.assertSequenceEqual(x.children, ['starts']) def test_templates(self): g = _Lark(r""" start: "[" sep{NUMBER, ","} "]" sep{item, delim}: item (delim item)* NUMBER: /\d+/ %ignore " " """) x = g.parse("[1, 2, 3, 4]") self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])]) x = g.parse("[1]") self.assertSequenceEqual(x.children, [Tree('sep', ['1'])]) def test_templates_recursion(self): g = _Lark(r""" start: "[" _sep{NUMBER, ","} "]" _sep{item, delim}: item | _sep{item, delim} delim item NUMBER: /\d+/ %ignore " " """) x = g.parse("[1, 2, 3, 4]") self.assertSequenceEqual(x.children, ['1', '2', '3', '4']) x = g.parse("[1]") self.assertSequenceEqual(x.children, ['1']) def test_templates_import(self): g = _Lark_open("test_templates_import.lark", rel_to=__file__) x = g.parse("[1, 2, 3, 4]") self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])]) x = g.parse("[1]") self.assertSequenceEqual(x.children, [Tree('sep', ['1'])]) def test_templates_alias(self): g = _Lark(r""" start: expr{"C"} expr{t}: "A" t | "B" t -> b """) x = g.parse("AC") self.assertSequenceEqual(x.children, [Tree('expr', [])]) x = g.parse("BC") self.assertSequenceEqual(x.children, [Tree('b', [])]) def test_templates_modifiers(self): g = _Lark(r""" start: expr{"B"} !expr{t}: "A" t """) x = g.parse("AB") self.assertSequenceEqual(x.children, [Tree('expr', ["A", "B"])]) g = _Lark(r""" start: _expr{"B"} !_expr{t}: "A" t """) x = g.parse("AB") self.assertSequenceEqual(x.children, ["A", "B"]) g = _Lark(r""" start: expr{b} b: "B" ?expr{t}: "A" t """) x = g.parse("AB") self.assertSequenceEqual(x.children, [Tree('b',[])]) def test_templates_templates(self): g = _Lark('''start: a{b} a{t}: t{"a"} b{x}: x''') x = g.parse('a') self.assertSequenceEqual(x.children, [Tree('a', [Tree('b',[])])]) def test_g_regex_flags(self): g = _Lark(""" start: "a" /b+/ C C: "C" | D D: "D" E E: "e" """, g_regex_flags=re.I) x1 = g.parse("ABBc") x2 = g.parse("abdE") def test_rule_collision(self): g = _Lark("""start: "a"+ "b" | "a"+ """) x = g.parse('aaaa') x = g.parse('aaaab') def test_rule_collision2(self): g = _Lark("""start: "a"* "b" | "a"+ """) x = g.parse('aaaa') x = g.parse('aaaab') x = g.parse('b') def test_token_not_anon(self): """Tests that "a" is matched as an anonymous token, and not A. """ g = _Lark("""start: "a" A: "a" """) x = g.parse('a') self.assertEqual(len(x.children), 0, '"a" should be considered anonymous') g = _Lark("""start: "a" A A: "a" """) x = g.parse('aa') self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous') self.assertEqual(x.children[0].type, "A") g = _Lark("""start: /a/ A: /a/ """) x = g.parse('a') self.assertEqual(len(x.children), 1) self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/") @unittest.skipIf(PARSER == 'cyk', "No empty rules") def test_maybe(self): g = _Lark("""start: ["a"] """) x = g.parse('a') x = g.parse('') def test_start(self): g = _Lark("""a: "a" a? """, start='a') x = g.parse('a') x = g.parse('aa') x = g.parse('aaa') def test_alias(self): g = _Lark("""start: "a" -> b """) x = g.parse('a') self.assertEqual(x.data, "b") def test_token_ebnf(self): g = _Lark("""start: A A: "a"* ("b"? "c".."e")+ """) x = g.parse('abcde') x = g.parse('dd') def test_backslash(self): g = _Lark(r"""start: "\\" "a" """) x = g.parse(r'\a') g = _Lark(r"""start: /\\/ /a/ """) x = g.parse(r'\a') def test_backslash2(self): g = _Lark(r"""start: "\"" "-" """) x = g.parse('"-') g = _Lark(r"""start: /\// /-/ """) x = g.parse('/-') def test_special_chars(self): g = _Lark(r"""start: "\n" """) x = g.parse('\n') g = _Lark(r"""start: /\n/ """) x = g.parse('\n') @unittest.skipIf(PARSER == 'cyk', "No empty rules") def test_empty(self): # Fails an Earley implementation without special handling for empty rules, # or re-processing of already completed rules. g = _Lark(r"""start: _empty a "B" a: _empty "A" _empty: """) x = g.parse('AB') def test_regex_quote(self): g = r""" start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING SINGLE_QUOTED_STRING : /'[^']*'/ DOUBLE_QUOTED_STRING : /"[^"]*"/ """ g = _Lark(g) self.assertEqual( g.parse('"hello"').children, ['"hello"']) self.assertEqual( g.parse("'hello'").children, ["'hello'"]) def test_join_regex_flags(self): g = r""" start: A A: B C B: /./s C: /./ """ g = _Lark(g) self.assertEqual(g.parse(" ").children,[" "]) self.assertEqual(g.parse("\n ").children,["\n "]) self.assertRaises(UnexpectedCharacters, g.parse, "\n\n") g = r""" start: A A: B | C B: "b"i C: "c" """ g = _Lark(g) self.assertEqual(g.parse("b").children,["b"]) self.assertEqual(g.parse("B").children,["B"]) self.assertEqual(g.parse("c").children,["c"]) self.assertRaises(UnexpectedCharacters, g.parse, "C") def test_float_without_lexer(self): expected_error = UnexpectedCharacters if 'dynamic' in LEXER else UnexpectedToken if PARSER == 'cyk': expected_error = ParseError g = _Lark("""start: ["+"|"-"] float float: digit* "." digit+ exp? | digit+ exp exp: ("e"|"E") ["+"|"-"] digit+ digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9" """) g.parse("1.2") g.parse("-.2e9") g.parse("+2e-9") self.assertRaises( expected_error, g.parse, "+2e-9e") def test_keep_all_tokens(self): l = _Lark("""start: "a"+ """, keep_all_tokens=True) tree = l.parse('aaa') self.assertEqual(tree.children, ['a', 'a', 'a']) def test_token_flags(self): l = _Lark("""!start: "a"i+ """ ) tree = l.parse('aA') self.assertEqual(tree.children, ['a', 'A']) l = _Lark("""!start: /a/i+ """ ) tree = l.parse('aA') self.assertEqual(tree.children, ['a', 'A']) g = """start: NAME "," "a" NAME: /[a-z_]/i /[a-z0-9_]/i* """ l = _Lark(g) tree = l.parse('ab,a') self.assertEqual(tree.children, ['ab']) tree = l.parse('AB,a') self.assertEqual(tree.children, ['AB']) @unittest.skipIf(LEXER in ('basic', 'custom_old', 'custom_new'), "Requires context sensitive terminal selection") def test_token_flags_collision(self): g = """!start: "a"i "a" """ l = _Lark(g) self.assertEqual(l.parse('aa').children, ['a', 'a']) self.assertEqual(l.parse('Aa').children, ['A', 'a']) self.assertRaises(UnexpectedInput, l.parse, 'aA') self.assertRaises(UnexpectedInput, l.parse, 'AA') g = """!start: /a/i /a/ """ l = _Lark(g) self.assertEqual(l.parse('aa').children, ['a', 'a']) self.assertEqual(l.parse('Aa').children, ['A', 'a']) self.assertRaises(UnexpectedInput, l.parse, 'aA') self.assertRaises(UnexpectedInput, l.parse, 'AA') def test_token_flags3(self): l = _Lark("""!start: ABC+ ABC: "abc"i """ ) tree = l.parse('aBcAbC') self.assertEqual(tree.children, ['aBc', 'AbC']) def test_token_flags2(self): g = """!start: ("a"i | /a/ /b/?)+ """ l = _Lark(g) tree = l.parse('aA') self.assertEqual(tree.children, ['a', 'A']) def test_token_flags_verbose(self): g = _Lark(r"""start: NL | ABC ABC: / [a-z] /x NL: /\n/ """) x = g.parse('a') self.assertEqual(x.children, ['a']) def test_token_flags_verbose_multiline(self): g = _Lark(r"""start: ABC ABC: / a b c d e f /x """) x = g.parse('abcdef') self.assertEqual(x.children, ['abcdef']) @unittest.skipIf(PARSER == 'cyk', "No empty rules") def test_twice_empty(self): g = """!start: ("A"?)? """ l = _Lark(g) tree = l.parse('A') self.assertEqual(tree.children, ['A']) tree = l.parse('') self.assertEqual(tree.children, []) def test_line_and_column(self): g = r"""!start: "A" bc "D" !bc: "B\nC" """ l = _Lark(g) a, bc, d = l.parse("AB\nCD").children self.assertEqual(a.line, 1) self.assertEqual(a.column, 1) bc ,= bc.children self.assertEqual(bc.line, 1) self.assertEqual(bc.column, 2) self.assertEqual(d.line, 2) self.assertEqual(d.column, 2) # if LEXER != 'dynamic': self.assertEqual(a.end_line, 1) self.assertEqual(a.end_column, 2) self.assertEqual(bc.end_line, 2) self.assertEqual(bc.end_column, 2) self.assertEqual(d.end_line, 2) self.assertEqual(d.end_column, 3) def test_reduce_cycle(self): """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state. It seems that the correct solution is to explicitly distinguish finalization in the reduce() function. """ l = _Lark(""" term: A | term term A: "a" """, start='term') tree = l.parse("aa") self.assertEqual(len(tree.children), 2) @unittest.skipIf('dynamic' in LEXER, "basic lexer prioritization differs from dynamic lexer prioritization") def test_lexer_prioritization(self): "Tests effect of priority on result" grammar = """ start: A B | AB A.2: "a" B: "b" AB: "ab" """ l = _Lark(grammar) res = l.parse("ab") self.assertEqual(res.children, ['a', 'b']) self.assertNotEqual(res.children, ['ab']) grammar = """ start: A B | AB A: "a" B: "b" AB.3: "ab" """ l = _Lark(grammar) res = l.parse("ab") self.assertNotEqual(res.children, ['a', 'b']) self.assertEqual(res.children, ['ab']) grammar = """ start: A B | AB A: "a" B.-20: "b" AB.-10: "ab" """ l = _Lark(grammar) res = l.parse("ab") self.assertEqual(res.children, ['a', 'b']) grammar = """ start: A B | AB A.-99999999999999999999999: "a" B: "b" AB: "ab" """ l = _Lark(grammar) res = l.parse("ab") self.assertEqual(res.children, ['ab']) @unittest.skipIf('dynamic' not in LEXER, "dynamic lexer prioritization differs from basic lexer prioritization") def test_dynamic_lexer_prioritization(self): "Tests effect of priority on result" grammar = """ start: A B | AB A.2: "a" B: "b" AB: "ab" """ l = _Lark(grammar) res = l.parse("ab") self.assertEqual(res.children, ['a', 'b']) self.assertNotEqual(res.children, ['ab']) grammar = """ start: A B | AB A: "a" B: "b" AB.3: "ab" """ l = _Lark(grammar) res = l.parse("ab") self.assertNotEqual(res.children, ['a', 'b']) self.assertEqual(res.children, ['ab']) # this case differs from prioritization with a basic lexer grammar = """ start: A B | AB A: "a" B.-20: "b" AB.-10: "ab" """ l = _Lark(grammar) res = l.parse("ab") self.assertEqual(res.children, ['ab']) grammar = """ start: A B | AB A.-99999999999999999999999: "a" B: "b" AB: "ab" """ l = _Lark(grammar) res = l.parse("ab") self.assertEqual(res.children, ['ab']) def test_import(self): grammar = """ start: NUMBER WORD %import common.NUMBER %import common.WORD %import common.WS %ignore WS """ l = _Lark(grammar) x = l.parse('12 elephants') self.assertEqual(x.children, ['12', 'elephants']) def test_import_rename(self): grammar = """ start: N W %import common.NUMBER -> N %import common.WORD -> W %import common.WS %ignore WS """ l = _Lark(grammar) x = l.parse('12 elephants') self.assertEqual(x.children, ['12', 'elephants']) def test_relative_import(self): l = _Lark_open('test_relative_import.lark', rel_to=__file__) x = l.parse('12 lions') self.assertEqual(x.children, ['12', 'lions']) def test_relative_import_unicode(self): l = _Lark_open('test_relative_import_unicode.lark', rel_to=__file__) x = l.parse(u'Ø') self.assertEqual(x.children, [u'Ø']) def test_relative_import_rename(self): l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__) x = l.parse('12 lions') self.assertEqual(x.children, ['12', 'lions']) def test_relative_rule_import(self): l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__) x = l.parse('xaabby') self.assertEqual(x.children, [ 'x', Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']), 'y']) def test_relative_rule_import_drop_ignore(self): # %ignore rules are dropped on import l = _Lark_open('test_relative_rule_import_drop_ignore.lark', rel_to=__file__) self.assertRaises((ParseError, UnexpectedInput), l.parse, 'xa abby') def test_relative_rule_import_subrule(self): l = _Lark_open('test_relative_rule_import_subrule.lark', rel_to=__file__) x = l.parse('xaabby') self.assertEqual(x.children, [ 'x', Tree('startab', [ Tree('grammars__ab__expr', [ 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b', ]), ]), 'y']) def test_relative_rule_import_subrule_no_conflict(self): l = _Lark_open( 'test_relative_rule_import_subrule_no_conflict.lark', rel_to=__file__) x = l.parse('xaby') self.assertEqual(x.children, [Tree('expr', [ 'x', Tree('startab', [ Tree('grammars__ab__expr', ['a', 'b']), ]), 'y'])]) self.assertRaises((ParseError, UnexpectedInput), l.parse, 'xaxabyby') def test_relative_rule_import_rename(self): l = _Lark_open('test_relative_rule_import_rename.lark', rel_to=__file__) x = l.parse('xaabby') self.assertEqual(x.children, [ 'x', Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']), 'y']) def test_multi_import(self): grammar = """ start: NUMBER WORD %import common (NUMBER, WORD, WS) %ignore WS """ l = _Lark(grammar) x = l.parse('12 toucans') self.assertEqual(x.children, ['12', 'toucans']) def test_relative_multi_import(self): l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__) x = l.parse('12 capybaras') self.assertEqual(x.children, ['12', 'capybaras']) def test_relative_import_preserves_leading_underscore(self): l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__) x = l.parse('Ax') self.assertEqual(next(x.find_data('c')).children, ['A']) def test_relative_import_of_nested_grammar(self): l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__) x = l.parse('N') self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) def test_relative_import_rules_dependencies_imported_only_once(self): l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__) x = l.parse('AAA') self.assertEqual(next(x.find_data('a')).children, ['A']) self.assertEqual(next(x.find_data('b')).children, ['A']) self.assertEqual(next(x.find_data('d')).children, ['A']) def test_import_errors(self): grammar = """ start: NUMBER WORD %import .grammars.bad_test.NUMBER """ self.assertRaises(IOError, _Lark, grammar) grammar = """ start: NUMBER WORD %import bad_test.NUMBER """ self.assertRaises(IOError, _Lark, grammar) @unittest.skipIf('dynamic' in LEXER, "%declare/postlex doesn't work with dynamic") def test_postlex_declare(self): # Note: this test does a lot. maybe split it up? class TestPostLexer: def process(self, stream): for t in stream: if t.type == 'A': t.type = 'B' yield t else: yield t always_accept = ('A',) parser = _Lark(""" start: B A: "A" %declare B """, postlex=TestPostLexer()) test_file = "A" tree = parser.parse(test_file) self.assertEqual(tree.children, [Token('B', 'A')]) @unittest.skipIf('dynamic' in LEXER, "%declare/postlex doesn't work with dynamic") def test_postlex_indenter(self): class CustomIndenter(Indenter): NL_type = 'NEWLINE' OPEN_PAREN_types = [] CLOSE_PAREN_types = [] INDENT_type = 'INDENT' DEDENT_type = 'DEDENT' tab_len = 8 grammar = r""" start: "a" NEWLINE INDENT "b" NEWLINE DEDENT NEWLINE: ( /\r?\n */ )+ %ignore " "+ %declare INDENT DEDENT """ parser = _Lark(grammar, postlex=CustomIndenter()) parser.parse("a\n b\n") @unittest.skipIf(PARSER == 'cyk', "Doesn't work for CYK") def test_prioritization(self): "Tests effect of priority on result" grammar = """ start: a | b a.1: "a" b.2: "a" """ l = _Lark(grammar) res = l.parse("a") self.assertEqual(res.children[0].data, 'b') grammar = """ start: a | b a.2: "a" b.1: "a" """ l = _Lark(grammar) res = l.parse("a") self.assertEqual(res.children[0].data, 'a') grammar = """ start: a | b a.2: "A"+ b.1: "A"+ "B"? """ l = _Lark(grammar) res = l.parse("AAAA") self.assertEqual(res.children[0].data, 'a') l = _Lark(grammar) res = l.parse("AAAB") self.assertEqual(res.children[0].data, 'b') l = _Lark(grammar, priority="invert") res = l.parse("AAAA") self.assertEqual(res.children[0].data, 'b') @unittest.skipIf(PARSER != 'earley' or 'dynamic' not in LEXER, "Currently only Earley supports priority sum in rules") def test_prioritization_sum(self): "Tests effect of priority on result" grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_: "a" b_: "b" ab_: "ab" bb_.1: "bb" """ l = _Lark(grammar, priority="invert") res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_: "a" b_: "b" ab_.1: "ab" bb_: "bb" """ l = _Lark(grammar, priority="invert") res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'indirection') grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_.2: "a" b_.1: "b" ab_.3: "ab" bb_.3: "bb" """ l = _Lark(grammar, priority="invert") res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_.1: "a" b_.1: "b" ab_.4: "ab" bb_.3: "bb" """ l = _Lark(grammar, priority="invert") res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'indirection') def test_utf8(self): g = u"""start: a a: "±a" """ l = _Lark(g) self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])])) g = u"""start: A A: "±a" """ l = _Lark(g) self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a'])) @unittest.skipIf(PARSER == 'cyk', "No empty rules") def test_ignore(self): grammar = r""" COMMENT: /(!|(\/\/))[^\n]*/ %ignore COMMENT %import common.WS -> _WS %import common.INT start: "INT"i _WS+ INT _WS* """ parser = _Lark(grammar) tree = parser.parse("int 1 ! This is a comment\n") self.assertEqual(tree.children, ['1']) tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky! self.assertEqual(tree.children, ['1']) parser = _Lark(r""" start : "a"* %ignore "b" """) tree = parser.parse("bb") self.assertEqual(tree.children, []) def test_regex_escaping(self): g = _Lark("start: /[ab]/") g.parse('a') g.parse('b') self.assertRaises( UnexpectedInput, g.parse, 'c') _Lark(r'start: /\w/').parse('a') g = _Lark(r'start: /\\w/') self.assertRaises( UnexpectedInput, g.parse, 'a') g.parse(r'\w') _Lark(r'start: /\[/').parse('[') _Lark(r'start: /\//').parse('/') _Lark(r'start: /\\/').parse('\\') _Lark(r'start: /\[ab]/').parse('[ab]') _Lark(r'start: /\\[ab]/').parse('\\a') _Lark(r'start: /\t/').parse('\t') _Lark(r'start: /\\t/').parse('\\t') _Lark(r'start: /\\\t/').parse('\\\t') _Lark(r'start: "\t"').parse('\t') _Lark(r'start: "\\t"').parse('\\t') _Lark(r'start: "\\\t"').parse('\\\t') def test_ranged_repeat_rules(self): g = u"""!start: "A"~3 """ l = _Lark(g) self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"])) self.assertRaises(ParseError, l.parse, u'AA') self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA') g = u"""!start: "A"~0..2 """ if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars l = _Lark(g) self.assertEqual(l.parse(u''), Tree('start', [])) self.assertEqual(l.parse(u'A'), Tree('start', ['A'])) self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A'])) self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA') g = u"""!start: "A"~3..2 """ self.assertRaises(GrammarError, _Lark, g) g = u"""!start: "A"~2..3 "B"~2 """ l = _Lark(g) self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B'])) self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B'])) self.assertRaises(ParseError, l.parse, u'AAAB') self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB') self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB') self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB') def test_priority_vs_embedded(self): g = """ A.2: "a" WORD: ("a".."z")+ start: (A | WORD)+ """ l = _Lark(g) t = l.parse('abc') self.assertEqual(t.children, ['a', 'bc']) self.assertEqual(t.children[0].type, 'A') def test_line_counting(self): p = _Lark("start: /[^x]+/") text = 'hello\nworld' t = p.parse(text) tok = t.children[0] self.assertEqual(tok, text) self.assertEqual(tok.line, 1) self.assertEqual(tok.column, 1) # if _LEXER != 'dynamic': self.assertEqual(tok.end_line, 2) self.assertEqual(tok.end_column, 6) @unittest.skipIf(PARSER=='cyk', "Empty rules") def test_empty_end(self): p = _Lark(""" start: b c d b: "B" c: | "C" d: | "D" """) res = p.parse('B') self.assertEqual(len(res.children), 3) @unittest.skipIf(PARSER=='cyk', "Empty rules") def test_maybe_placeholders(self): # Anonymous tokens shouldn't count p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) self.assertEqual(p.parse("").children, []) # Unless keep_all_tokens=True p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True) self.assertEqual(p.parse("").children, [None, None, None]) # All invisible constructs shouldn't count p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c] A: "a" _c: "c" """, maybe_placeholders=True) self.assertEqual(p.parse("").children, [None]) self.assertEqual(p.parse("c").children, [None]) self.assertEqual(p.parse("aefc").children, ['a']) # ? shouldn't apply p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True) self.assertEqual(p.parse("").children, [None, None]) self.assertEqual(p.parse("b").children, [None, 'b', None]) p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) self.assertEqual(p.parse("").children, [None, None, None]) self.assertEqual(p.parse("a").children, ['a', None, None]) self.assertEqual(p.parse("b").children, [None, 'b', None]) self.assertEqual(p.parse("c").children, [None, None, 'c']) self.assertEqual(p.parse("ab").children, ['a', 'b', None]) self.assertEqual(p.parse("ac").children, ['a', None, 'c']) self.assertEqual(p.parse("bc").children, [None, 'b', 'c']) self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True) self.assertEqual(p.parse("b").children, [None, 'b', None]) self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None]) self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c']) self.assertEqual(p.parse("babbcabcb").children, [None, 'b', None, 'a', 'b', None, None, 'b', 'c', 'a', 'b', 'c', None, 'b', None]) p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True) self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None]) self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd']) self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None]) self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None]) p = _Lark("""!start: ["a" "b" "c"] """, maybe_placeholders=True) self.assertEqual(p.parse("").children, [None, None, None]) self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) p = _Lark("""!start: ["a" ["b" "c"]] """, maybe_placeholders=True) self.assertEqual(p.parse("").children, [None, None, None]) self.assertEqual(p.parse("a").children, ['a', None, None]) self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) p = _Lark(r"""!start: "a" ["b" | "c"] """, maybe_placeholders=True) self.assertEqual(p.parse("a").children, ['a', None]) self.assertEqual(p.parse("ab").children, ['a', 'b']) p = _Lark(r"""!start: "a" ["b" | "c" "d"] """, maybe_placeholders=True) self.assertEqual(p.parse("a").children, ['a', None, None]) # self.assertEqual(p.parse("ab").children, ['a', 'b', None]) # Not implemented; current behavior is incorrect self.assertEqual(p.parse("acd").children, ['a', 'c', 'd']) def test_escaped_string(self): "Tests common.ESCAPED_STRING" grammar = r""" start: ESCAPED_STRING+ %import common (WS_INLINE, ESCAPED_STRING) %ignore WS_INLINE """ parser = _Lark(grammar) parser.parse(r'"\\" "b" "c"') parser.parse(r'"That" "And a \"b"') def test_meddling_unused(self): "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision" grammar = """ start: EKS* x x: EKS unused: x* EKS: "x" """ parser = _Lark(grammar) @unittest.skipIf(PARSER!='lalr' or LEXER == 'custom_old', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)") def test_serialize(self): grammar = """ start: _ANY b "C" _ANY: /./ b: "B" """ parser = _Lark(grammar) s = BytesIO() parser.save(s) s.seek(0) parser2 = Lark.load(s) self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) ) def test_multi_start(self): parser = _Lark(''' a: "x" "a"? b: "x" "b"? ''', start=['a', 'b']) self.assertEqual(parser.parse('xa', 'a'), Tree('a', [])) self.assertEqual(parser.parse('xb', 'b'), Tree('b', [])) def test_lexer_detect_newline_tokens(self): # Detect newlines in regular tokens g = _Lark(r"""start: "go" tail* !tail : SA "@" | SB "@" | SC "@" | SD "@" SA : "a" /\n/ SB : /b./s SC : "c" /[^a-z]/ SD : "d" /\s/ """) a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children] self.assertEqual(a.line, 2) self.assertEqual(b.line, 3) self.assertEqual(c.line, 4) self.assertEqual(d.line, 5) # Detect newlines in ignored tokens for re in ['/\\n/', '/[^a-z]/', '/\\s/']: g = _Lark('''!start: "a" "a" %ignore {}'''.format(re)) a, b = g.parse('a\na').children self.assertEqual(a.line, 1) self.assertEqual(b.line, 2) @unittest.skipIf(PARSER=='cyk' or LEXER=='custom_old', "match_examples() not supported for CYK/old custom lexer") def test_match_examples(self): p = _Lark(r""" start: "a" "b" "c" """) def match_error(s): try: _ = p.parse(s) except UnexpectedInput as u: return u.match_examples(p.parse, { 0: ['abe'], 1: ['ab'], 2: ['cbc', 'dbc'], }) assert False assert match_error("abe") == 0 assert match_error("ab") == 1 assert match_error("bbc") == 2 assert match_error("cbc") == 2 self.assertEqual( match_error("dbc"), 2 ) self.assertEqual( match_error("ebc"), 2 ) @unittest.skipIf(not regex, "regex not installed") def test_unicode_class(self): "Tests that character classes from the `regex` module work correctly." g = _Lark(r"""?start: NAME NAME: ID_START ID_CONTINUE* ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/ ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}]+/""", regex=True) self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') @unittest.skipIf(not regex, "regex not installed") def test_unicode_word(self): "Tests that a persistent bug in the `re` module works when `regex` is enabled." g = _Lark(r"""?start: NAME NAME: /[\w]+/ """, regex=True) self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') @unittest.skipIf(not regex, "regex not installed") def test_regex_width_fallback(self): g = r""" start: NAME NAME? NAME: /(?(?=\d)\d+|\w+)/ """ self.assertRaises((GrammarError, LexError, re.error), _Lark, g) p = _Lark(g, regex=True) self.assertEqual(p.parse("123abc"), Tree('start', ['123', 'abc'])) g = r""" start: NAME NAME? NAME: /(?(?=\d)\d+|\w*)/ """ self.assertRaises((GrammarError, LexError, re.error), _Lark, g, regex=True) @unittest.skipIf(PARSER != 'lalr', "interactive_parser is only implemented for LALR at the moment") def test_parser_interactive_parser(self): g = _Lark(r''' start: A+ B* A: "a" B: "b" ''') ip = g.parse_interactive() self.assertRaises(UnexpectedToken, ip.feed_eof) self.assertRaises(TypeError, ip.exhaust_lexer) ip.feed_token(Token('A', 'a')) res = ip.feed_eof() self.assertEqual(res, Tree('start', ['a'])) ip = g.parse_interactive("ab") ip.exhaust_lexer() ip_copy = ip.copy() self.assertEqual(ip_copy.parser_state, ip.parser_state) self.assertEqual(ip_copy.lexer_thread.state, ip.lexer_thread.state) self.assertIsNot(ip_copy.parser_state, ip.parser_state) self.assertIsNot(ip_copy.lexer_thread.state, ip.lexer_thread.state) self.assertIsNot(ip_copy.lexer_thread.state.line_ctr, ip.lexer_thread.state.line_ctr) res = ip.feed_eof(ip.lexer_thread.state.last_token) self.assertEqual(res, Tree('start', ['a', 'b'])) self.assertRaises(UnexpectedToken, ip.feed_eof) self.assertRaises(UnexpectedToken, ip_copy.feed_token, Token('A', 'a')) ip_copy.feed_token(Token('B', 'b')) res = ip_copy.feed_eof() self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) @unittest.skipIf(PARSER != 'lalr', "interactive_parser error handling only works with LALR for now") def test_error_with_interactive_parser(self): def ignore_errors(e): if isinstance(e, UnexpectedCharacters): # Skip bad character return True # Must be UnexpectedToken if e.token.type == 'COMMA': # Skip comma return True elif e.token.type == 'SIGNED_NUMBER': # Try to feed a comma and retry the number e.interactive_parser.feed_token(Token('COMMA', ',')) e.interactive_parser.feed_token(e.token) return True # Unhandled error. Will stop parse and raise exception return False g = _Lark(r''' start: "[" num ("," num)* "]" ?num: SIGNED_NUMBER %import common.SIGNED_NUMBER %ignore " " ''') s = "[0 1, 2,, 3,,, 4, 5 6 ]" tree = g.parse(s, on_error=ignore_errors) res = [int(x) for x in tree.children] assert res == list(range(7)) s = "[0 1, 2,@, 3,,, 4, 5 6 ]$" tree = g.parse(s, on_error=ignore_errors) @unittest.skipIf(PARSER != 'lalr', "interactive_parser error handling only works with LALR for now") def test_iter_parse(self): ab_grammar = '!start: "a"* "b"*' parser = _Lark(ab_grammar) ip = parser.parse_interactive("aaabb") i = ip.iter_parse() assert next(i) == 'a' assert next(i) == 'a' assert next(i) == 'a' assert next(i) == 'b' @unittest.skipIf(PARSER != 'lalr', "interactive_parser is only implemented for LALR at the moment") def test_interactive_treeless_transformer(self): grammar = r""" start: SYM+ SYM: "a" | "b" """ class SYMTransformer(lark.visitors.Transformer): def SYM(self, token): return {"a": 1, "b": 2}[str(token)] parser = _Lark(grammar, transformer=SYMTransformer()) res = parser.parse("aba") self.assertEqual(res.children, [1, 2, 1]) ip = parser.parse_interactive("aba") ip.exhaust_lexer() # Previously `accepts` would call `SYMTransformer.SYM` with `Token('SYM', '')`, which would cause an error. self.assertEqual(ip.accepts(), {"$END", "SYM"}) res = ip.feed_eof() self.assertEqual(res.children, [1, 2, 1]) @unittest.skipIf(PARSER == 'earley', "Tree-less mode is not supported in earley") def test_default_in_treeless_mode(self): grammar = r""" start: expr expr: A B | A expr B A: "a" B: "b" %import common.WS %ignore WS """ s = 'a a a b b b' class AbTransformer(Transformer): def __default__(self, data, children, meta): return '@', data, children parser = _Lark(grammar) a = AbTransformer().transform(parser.parse(s)) parser = _Lark(grammar, transformer=AbTransformer()) b = parser.parse(s) assert a == b @unittest.skipIf(PARSER != 'lalr', "strict mode is only supported in lalr for now") def test_strict(self): # Test regex collision grammar = r""" start: A | B A: /e?rez/ B: /erez?/ """ self.assertRaises(LexError, _Lark, grammar, strict=True) # Test shift-reduce collision grammar = r""" start: a "." a: "."+ """ self.assertRaises(GrammarError, _Lark, grammar, strict=True) _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() _TestParser.__name__ = _NAME _TestParser.__qualname__ = "tests.test_parser." + _NAME globals()[_NAME] = _TestParser __all__.append(_NAME) _TO_TEST = [ ('basic', 'earley'), ('basic', 'cyk'), ('basic', 'lalr'), ('dynamic', 'earley'), ('dynamic_complete', 'earley'), ('contextual', 'lalr'), ('custom_new', 'lalr'), ('custom_new', 'cyk'), ('custom_old', 'earley'), ] for _LEXER, _PARSER in _TO_TEST: _make_parser_test(_LEXER, _PARSER) for _LEXER in ('basic', 'dynamic', 'dynamic_complete'): _make_full_earley_test(_LEXER) if __name__ == '__main__': unittest.main() lark-1.2.2/tests/test_pattern_matching.py000066400000000000000000000021601465673407200205640ustar00rootroot00000000000000from unittest import TestCase, main from lark import Token class TestPatternMatching(TestCase): token = Token('A', 'a') def setUp(self): pass def test_matches_with_string(self): match self.token: case 'a': pass case _: assert False def test_matches_with_str_positional_arg(self): match self.token: case str('a'): pass case _: assert False def test_matches_with_token_positional_arg(self): match self.token: case Token('a'): assert False case Token('A'): pass case _: assert False def test_matches_with_token_kwarg_type(self): match self.token: case Token(type='A'): pass case _: assert False def test_matches_with_bad_token_type(self): match self.token: case Token(type='B'): assert False case _: pass if __name__ == '__main__': main() lark-1.2.2/tests/test_python_grammar.py000066400000000000000000000156611465673407200202760ustar00rootroot00000000000000import inspect import textwrap from unittest import TestCase, main from lark import Lark from lark.indenter import PythonIndenter from lark.exceptions import UnexpectedCharacters, UnexpectedToken, ParseError valid_DEC_NUMBER = [ "0", "000", "0_0_0", "4_2", "1_0000_0000", "123456789012345678901234567890", ] valid_HEX_NUMBER = [ "0x_f", "0xffff_ffff", "0xffffffffffffffff", "0Xffffffffffffffff", ] valid_OCT_NUMBER = [ "0o5_7_7", "0o_5", "0o77777777777777777", "0O77777777777777777", ] valid_BIN_NUMBER = [ "0b1001_0100", "0b_0", "0b100000000000000000000000000000000000000000000000000000000000000000000", "0B111111111111111111111111111111111111111111111111111111111111111111111", ] valid_FLOAT_NUMBER = [ "1_00_00.5", "1_00_00.5e5", "1_00_00e5_1", "1e1_0", ".1_4", ".1_4e1", "1_2.5", "3.14", "314.", "0.314", "000.314", ".314", "3e14", "3E14", "3e-14", "3e+14", "3.e14", ".3e14", "3.1e4", ] valid_IMAG_NUMBER = [ "0j", "123456789012345678901234567890j", "1_00_00j", "1_00_00.5j", "1_00_00e5_1j", ".1_4j", "3_3j", ".5_6j", "3.14j", "314.j", "0.314j", "000.314j", ".314j", "3e14j", "3E14j", "3e-14j", "3e+14j", "3.e14j", ".3e14j", "3.1e4j", ] valid_number = (valid_DEC_NUMBER + valid_HEX_NUMBER + valid_OCT_NUMBER + valid_BIN_NUMBER + valid_FLOAT_NUMBER + valid_IMAG_NUMBER) invalid_number = [ "0_", "42_", "1.4j_", "0x_", "0b1_", "0xf_", "0o5_", "1_Else", "0_b0", "0_xf", "0_o5", "0_7", "09_99", "4_______2", "0.1__4", "0.1__4j", "0b1001__0100", "0xffff__ffff", "0x___", "0o5__77", "1e1__0", "1e1__0j", "1_.4", "1_.4j", "1._4", "1._4j", "._5", "._5j", "1.0e+_1", "1.0e+_1j", "1.4_j", "1.4e5_j", "1_e1", "1.4_e1", "1.4_e1j", "1e_1", "1.4e_1", "1.4e_1j", "1+1.5_j_", "1+1.5_j", "_0", "_42", "_1.4j", "_0x", "_0b1", "_0xf", "_0o5", "_1_Else", "_0_b0", "_0_xf", "_0_o5", "_0_7", "_09_99", "_4_______2", "_0.1__4", "_0.1__4j", "_0b1001__0100", "_0xffff__ffff", "_0x__", "_0o5__77", "_1e1__0", "_1e1__0j", "_1_.4", "_1_.4j", "_1._4", "_1._4j", "_._5", "_._5j", "_1.0e+_1", "_1.0e+_1j", "_1.4_j", "_1.4e5_j", "_1_e1", "_1.4_e1", "_1.4_e1j", "_1e_1", "_1.4e_1", "_1.4e_1j", "_1+1.5_j", "_1+1.5_j", ] valid_match_statements = [ # constant and capture patterns textwrap.dedent(""" match greeting: case "": print("Hello!") case name: print(f"Hi {name}!") """), # pattern unions textwrap.dedent(""" match something: case 0 | 1 | 2: print("Small number") case [] | [_]: print("A short sequence") case str() | bytes(): print("Something string-like") case _: print("Something else") """), # guards textwrap.dedent(""" match val: case [x, y] if x > 0 and y > 0: return f"A pair of {x} and {y}" case [x, *other]: return f"A sequence starting with {x}" case int(): return f"Some integer" """), # "as" patterns textwrap.dedent(""" match command.split(): case ["go", ("north" | "south" | "east" | "west") as direction]: current_room = current_room.neighbor(direction) """) ] invalid_match_statements = [ # no cases textwrap.dedent(""" match val: pass """), # cases not indented relative to match textwrap.dedent(""" match val: case x: pass """) ] class TestPythonParser(TestCase): @classmethod def setUpClass(cls): cls.python_parser = Lark.open_from_package( "lark", "python.lark", ("grammars",), parser='lalr', postlex=PythonIndenter(), start=["number", "file_input"]) def _test_parsed_is_this_terminal(self, text, terminal, start): tree = self.python_parser.parse(text, start=start) self.assertEqual(len(tree.children), 1) token = tree.children[0] self.assertEqual(token.type, terminal) self.assertEqual(token.value, text) def _test_parsed_is_file_containing_only_this_statement(self, text, statement): tree = self.python_parser.parse(text, start="file_input") self.assertEqual(len(tree.children), 1) statement_token = tree.children[0].data self.assertEqual(statement_token.type, "RULE") self.assertEqual(statement_token.value, statement) def test_DEC_NUMBER(self): for case in valid_DEC_NUMBER: self._test_parsed_is_this_terminal(case, "DEC_NUMBER", "number") def test_HEX_NUMBER(self): for case in valid_HEX_NUMBER: self._test_parsed_is_this_terminal(case, "HEX_NUMBER", "number") def test_OCT_NUMBER(self): for case in valid_OCT_NUMBER: self._test_parsed_is_this_terminal(case, "OCT_NUMBER", "number") def test_BIN_NUMBER(self): for case in valid_BIN_NUMBER: self._test_parsed_is_this_terminal(case, "BIN_NUMBER", "number") def test_FLOAT_NUMBER(self): for case in valid_FLOAT_NUMBER: self._test_parsed_is_this_terminal(case, "FLOAT_NUMBER", "number") def test_IMAG_NUMBER(self): for case in valid_IMAG_NUMBER: self._test_parsed_is_this_terminal(case, "IMAG_NUMBER", "number") def test_valid_number(self): # XXX: all valid test cases should run with the above tests for numbers for case in valid_number: self.python_parser.parse(case, start="number") # no error def test_invalid_number(self): for case in invalid_number: with self.assertRaises((UnexpectedCharacters, UnexpectedToken)): self.python_parser.parse(case, start="number") def test_valid_match_statement(self): for case in valid_match_statements: self._test_parsed_is_file_containing_only_this_statement(case, "match_stmt") def test_invalid_match_statement(self): for case in invalid_match_statements: with self.assertRaises(ParseError): self.python_parser.parse(case, start="file_input") def test_assign_to_variable_named_match(self): text = textwrap.dedent(""" match = re.match(pattern, string) """) self._test_parsed_is_file_containing_only_this_statement(text, "assign_stmt") def test_assign_expr_with_variable_named_match(self): text = textwrap.dedent(""" if match := re.match(pattern, string): do_thing(match) """) self._test_parsed_is_file_containing_only_this_statement(text, "if_stmt") if __name__ == '__main__': main() lark-1.2.2/tests/test_reconstructor.py000066400000000000000000000117371465673407200201630ustar00rootroot00000000000000# coding=utf-8 import json import sys import unittest from itertools import product from unittest import TestCase from lark import Lark from lark.reconstruct import Reconstructor common = """ %import common (WS_INLINE, NUMBER, WORD) %ignore WS_INLINE """ def _remove_ws(s): return s.replace(' ', '').replace('\n', '') class TestReconstructor(TestCase): def assert_reconstruct(self, grammar, code, **options): parser = Lark(grammar, parser='lalr', maybe_placeholders=False, **options) tree = parser.parse(code) new = Reconstructor(parser).reconstruct(tree) self.assertEqual(_remove_ws(code), _remove_ws(new)) def test_starred_rule(self): g = """ start: item* item: NL | rule rule: WORD ":" NUMBER NL: /(\\r?\\n)+\\s*/ """ + common code = """ Elephants: 12 """ self.assert_reconstruct(g, code) def test_starred_group(self): g = """ start: (rule | NL)* rule: WORD ":" NUMBER NL: /(\\r?\\n)+\\s*/ """ + common code = """ Elephants: 12 """ self.assert_reconstruct(g, code) def test_alias(self): g = """ start: line* line: NL | rule | "hello" -> hi rule: WORD ":" NUMBER NL: /(\\r?\\n)+\\s*/ """ + common code = """ Elephants: 12 hello """ self.assert_reconstruct(g, code) def test_keep_tokens(self): g = """ start: (NL | stmt)* stmt: var op var !op: ("+" | "-" | "*" | "/") var: WORD NL: /(\\r?\\n)+\\s*/ """ + common code = """ a+b """ self.assert_reconstruct(g, code) def test_expand_rule(self): g = """ ?start: (NL | mult_stmt)* ?mult_stmt: sum_stmt ["*" sum_stmt] ?sum_stmt: var ["+" var] var: WORD NL: /(\\r?\\n)+\\s*/ """ + common code = ['a', 'a*b', 'a+b', 'a*b+c', 'a+b*c', 'a+b*c+d'] for c in code: self.assert_reconstruct(g, c) def test_json_example(self): test_json = ''' { "empty_object" : {}, "empty_array" : [], "booleans" : { "YES" : true, "NO" : false }, "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ], "nothing" : null } ''' json_grammar = r""" ?start: value ?value: object | array | string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null array : "[" [value ("," value)*] "]" object : "{" [pair ("," pair)*] "}" pair : string ":" value string : ESCAPED_STRING %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS """ json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False) tree = json_parser.parse(test_json) new_json = Reconstructor(json_parser).reconstruct(tree) self.assertEqual(json.loads(new_json), json.loads(test_json)) def test_keep_all_tokens(self): g = """ start: "a"? _B? c? _d? _B: "b" c: "c" _d: "d" """ examples = list(map(''.join, product(('', 'a'), ('', 'b'), ('', 'c'), ('', 'd'), ))) for code in examples: self.assert_reconstruct(g, code, keep_all_tokens=True) def test_switch_grammar_unicode_terminal(self): """ This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed with a grammar that has unicode rules (or vice versa). The original bug assigned ANON terminals to unicode keywords, which offsets the ANON terminal count in the unicode grammar and causes subsequent identical ANON tokens (e.g., `+=`) to mismatch between the two grammars. """ g1 = """ start: (NL | stmt)* stmt: "keyword" var op var !op: ("+=" | "-=" | "*=" | "/=") var: WORD NL: /(\\r?\\n)+\\s*/ """ + common g2 = """ start: (NL | stmt)* stmt: "குறிப்பு" var op var !op: ("+=" | "-=" | "*=" | "/=") var: WORD NL: /(\\r?\\n)+\\s*/ """ + common code = """ keyword x += y """ l1 = Lark(g1, parser='lalr', maybe_placeholders=False) l2 = Lark(g2, parser='lalr', maybe_placeholders=False) r = Reconstructor(l2) tree = l1.parse(code) code2 = r.reconstruct(tree) assert l2.parse(code2) == tree if __name__ == '__main__': unittest.main() lark-1.2.2/tests/test_relative_import.lark000066400000000000000000000001441465673407200207430ustar00rootroot00000000000000start: NUMBER WORD %import .grammars.test.NUMBER %import common.WORD %import common.WS %ignore WS lark-1.2.2/tests/test_relative_import_preserves_leading_underscore.lark000066400000000000000000000000701465673407200267530ustar00rootroot00000000000000start: c %import .grammars.leading_underscore_grammar.clark-1.2.2/tests/test_relative_import_rename.lark000066400000000000000000000001441465673407200222720ustar00rootroot00000000000000start: N WORD %import .grammars.test.NUMBER -> N %import common.WORD %import common.WS %ignore WS lark-1.2.2/tests/test_relative_import_rules_dependencies_imported_only_once.lark000066400000000000000000000002461465673407200306360ustar00rootroot00000000000000%import .grammars.three_rules_using_same_token.a %import .grammars.three_rules_using_same_token.b %import .grammars.three_rules_using_same_token.c -> d start: a b d lark-1.2.2/tests/test_relative_import_unicode.lark000066400000000000000000000000661465673407200224540ustar00rootroot00000000000000start: UNICODE %import .grammars.test_unicode.UNICODElark-1.2.2/tests/test_relative_multi_import.lark000066400000000000000000000001111465673407200221470ustar00rootroot00000000000000start: NUMBER WORD %import .grammars.test (NUMBER, WORD, WS) %ignore WS lark-1.2.2/tests/test_relative_rule_import.lark000066400000000000000000000000731465673407200217730ustar00rootroot00000000000000start: X expr Y X: "x" Y: "y" %import .grammars.ab.expr lark-1.2.2/tests/test_relative_rule_import_drop_ignore.lark000066400000000000000000000000731465673407200243620ustar00rootroot00000000000000start: X expr Y X: "x" Y: "y" %import .grammars.ab.expr lark-1.2.2/tests/test_relative_rule_import_rename.lark000066400000000000000000000000771465673407200233260ustar00rootroot00000000000000start: X ab Y X: "x" Y: "y" %import .grammars.ab.expr -> ab lark-1.2.2/tests/test_relative_rule_import_subrule.lark000066400000000000000000000001011465673407200235240ustar00rootroot00000000000000start: X startab Y X: "x" Y: "y" %import .grammars.ab.startab lark-1.2.2/tests/test_relative_rule_import_subrule_no_conflict.lark000066400000000000000000000001151465673407200261060ustar00rootroot00000000000000start: expr expr: X startab Y X: "x" Y: "y" %import .grammars.ab.startab lark-1.2.2/tests/test_templates_import.lark000066400000000000000000000001311465673407200211220ustar00rootroot00000000000000start: "[" sep{NUMBER, ","} "]" NUMBER: /\d+/ %ignore " " %import .grammars.templates.seplark-1.2.2/tests/test_tools.py000066400000000000000000000125041465673407200164000ustar00rootroot00000000000000from __future__ import absolute_import, print_function from unittest import TestCase, main from lark import Lark from lark.tree import Tree from lark.tools import standalone from io import StringIO class TestStandalone(TestCase): def setUp(self): pass def _create_standalone(self, grammar, compress=False): code_buf = StringIO() standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf, compress=compress) code = code_buf.getvalue() context = {'__doc__': None, '__name__': 'test_standalone'} exec(code, context) return context def test_simple(self): grammar = """ start: NUMBER WORD %import common.NUMBER %import common.WORD %import common.WS %ignore WS """ context = self._create_standalone(grammar) _Lark = context['Lark_StandAlone'] l = _Lark() x = l.parse('12 elephants') self.assertEqual(x.children, ['12', 'elephants']) x = l.parse('16 candles') self.assertEqual(x.children, ['16', 'candles']) self.assertRaises(context['UnexpectedToken'], l.parse, 'twelve monkeys') self.assertRaises(context['UnexpectedToken'], l.parse, 'twelve') self.assertRaises(context['UnexpectedCharacters'], l.parse, '$ talks') context = self._create_standalone(grammar, compress=True) _Lark = context['Lark_StandAlone'] l = _Lark() x = l.parse('12 elephants') def test_interactive(self): grammar = """ start: A+ B* A: "a" B: "b" """ context = self._create_standalone(grammar) parser: Lark = context['Lark_StandAlone']() ip = parser.parse_interactive() UnexpectedToken = context['UnexpectedToken'] Token = context['Token'] self.assertRaises(UnexpectedToken, ip.feed_eof) self.assertRaises(TypeError, ip.exhaust_lexer) ip.feed_token(Token('A', 'a')) res = ip.feed_eof() self.assertEqual(res, Tree('start', ['a'])) ip = parser.parse_interactive("ab") ip.exhaust_lexer() ip_copy = ip.copy() self.assertEqual(ip_copy.parser_state, ip.parser_state) self.assertEqual(ip_copy.lexer_thread.state, ip.lexer_thread.state) self.assertIsNot(ip_copy.parser_state, ip.parser_state) self.assertIsNot(ip_copy.lexer_thread.state, ip.lexer_thread.state) self.assertIsNot(ip_copy.lexer_thread.state.line_ctr, ip.lexer_thread.state.line_ctr) res = ip.feed_eof(ip.lexer_thread.state.last_token) self.assertEqual(res, Tree('start', ['a', 'b'])) self.assertRaises(UnexpectedToken, ip.feed_eof) self.assertRaises(UnexpectedToken, ip_copy.feed_token, Token('A', 'a')) ip_copy.feed_token(Token('B', 'b')) res = ip_copy.feed_eof() self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) def test_contextual(self): grammar = """ start: a b a: "A" "B" b: "AB" """ context = self._create_standalone(grammar) _Lark = context['Lark_StandAlone'] l = _Lark() x = l.parse('ABAB') _v_args = context['v_args'] @_v_args(inline=True) class T(context['Transformer']): def a(self): return 'a' def b(self): return 'b' start = _v_args(inline=False)(list) x = T().transform(x) self.assertEqual(x, ['a', 'b']) l2 = _Lark(transformer=T()) x = l2.parse('ABAB') self.assertEqual(x, ['a', 'b']) def test_postlex(self): from lark.indenter import Indenter class MyIndenter(Indenter): NL_type = '_NEWLINE' OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE'] CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE'] INDENT_type = '_INDENT' DEDENT_type = '_DEDENT' tab_len = 8 grammar = r""" start: "(" ")" _NEWLINE _NEWLINE: /\n/ """ context = self._create_standalone(grammar) _Lark = context['Lark_StandAlone'] l = _Lark(postlex=MyIndenter()) x = l.parse('()\n') self.assertEqual(x, Tree('start', [])) l = _Lark(postlex=MyIndenter()) x = l.parse('(\n)\n') self.assertEqual(x, Tree('start', [])) def test_transformer(self): grammar = r""" start: some_rule "(" SOME_TERMINAL ")" some_rule: SOME_TERMINAL SOME_TERMINAL: /[A-Za-z_][A-Za-z0-9_]*/ """ context = self._create_standalone(grammar) _Lark = context["Lark_StandAlone"] _Token = context["Token"] _Tree = context["Tree"] class MyTransformer(context["Transformer"]): def SOME_TERMINAL(self, token): return _Token("SOME_TERMINAL", "token is transformed") def some_rule(self, children): return _Tree("rule_is_transformed", []) parser = _Lark(transformer=MyTransformer()) self.assertEqual( parser.parse("FOO(BAR)"), _Tree("start", [ _Tree("rule_is_transformed", []), _Token("SOME_TERMINAL", "token is transformed") ]) ) if __name__ == '__main__': main() lark-1.2.2/tests/test_tree_forest_transformer.py000066400000000000000000000157641465673407200222160ustar00rootroot00000000000000from __future__ import absolute_import import unittest from lark import Lark from lark.lexer import Token from lark.tree import Tree from lark.visitors import Visitor, Transformer, Discard from lark.parsers.earley_forest import TreeForestTransformer, handles_ambiguity class TestTreeForestTransformer(unittest.TestCase): grammar = """ start: ab bc cd !ab: "A" "B"? !bc: "B"? "C"? !cd: "C"? "D" """ parser = Lark(grammar, parser='earley', ambiguity='forest') forest = parser.parse("ABCD") def test_identity_resolve_ambiguity(self): l = Lark(self.grammar, parser='earley', ambiguity='resolve') tree1 = l.parse("ABCD") tree2 = TreeForestTransformer(resolve_ambiguity=True).transform(self.forest) self.assertEqual(tree1, tree2) def test_identity_explicit_ambiguity(self): l = Lark(self.grammar, parser='earley', ambiguity='explicit') tree1 = l.parse("ABCD") tree2 = TreeForestTransformer(resolve_ambiguity=False).transform(self.forest) self.assertEqual(tree1, tree2) def test_tree_class(self): class CustomTree(Tree): pass class TreeChecker(Visitor): def __default__(self, tree): assert isinstance(tree, CustomTree) tree = TreeForestTransformer(resolve_ambiguity=False, tree_class=CustomTree).transform(self.forest) TreeChecker().visit(tree) def test_token_calls(self): visited = [False] * 4 class CustomTransformer(TreeForestTransformer): def A(self, node): assert node.type == 'A' visited[0] = True def B(self, node): assert node.type == 'B' visited[1] = True def C(self, node): assert node.type == 'C' visited[2] = True def D(self, node): assert node.type == 'D' visited[3] = True tree = CustomTransformer(resolve_ambiguity=False).transform(self.forest) assert visited == [True] * 4 def test_default_token(self): token_count = [0] class CustomTransformer(TreeForestTransformer): def __default_token__(self, node): token_count[0] += 1 assert isinstance(node, Token) tree = CustomTransformer(resolve_ambiguity=True).transform(self.forest) self.assertEqual(token_count[0], 4) def test_rule_calls(self): visited_start = [False] visited_ab = [False] visited_bc = [False] visited_cd = [False] class CustomTransformer(TreeForestTransformer): def start(self, data): visited_start[0] = True def ab(self, data): visited_ab[0] = True def bc(self, data): visited_bc[0] = True def cd(self, data): visited_cd[0] = True tree = CustomTransformer(resolve_ambiguity=False).transform(self.forest) self.assertTrue(visited_start[0]) self.assertTrue(visited_ab[0]) self.assertTrue(visited_bc[0]) self.assertTrue(visited_cd[0]) def test_default_rule(self): rule_count = [0] class CustomTransformer(TreeForestTransformer): def __default__(self, name, data): rule_count[0] += 1 tree = CustomTransformer(resolve_ambiguity=True).transform(self.forest) self.assertEqual(rule_count[0], 4) def test_default_ambig(self): ambig_count = [0] class CustomTransformer(TreeForestTransformer): def __default_ambig__(self, name, data): if len(data) > 1: ambig_count[0] += 1 tree = CustomTransformer(resolve_ambiguity=False).transform(self.forest) self.assertEqual(ambig_count[0], 1) def test_handles_ambiguity(self): class CustomTransformer(TreeForestTransformer): @handles_ambiguity def start(self, data): assert isinstance(data, list) assert len(data) == 4 for tree in data: assert tree.data == 'start' return 'handled' @handles_ambiguity def ab(self, data): assert isinstance(data, list) assert len(data) == 1 assert data[0].data == 'ab' tree = CustomTransformer(resolve_ambiguity=False).transform(self.forest) self.assertEqual(tree, 'handled') def test_discard(self): class CustomTransformer(TreeForestTransformer): def bc(self, data): return Discard def D(self, node): return Discard class TreeChecker(Transformer): def bc(self, children): assert False def D(self, token): assert False tree = CustomTransformer(resolve_ambiguity=False).transform(self.forest) TreeChecker(visit_tokens=True).transform(tree) def test_aliases(self): visited_ambiguous = [False] visited_full = [False] class CustomTransformer(TreeForestTransformer): @handles_ambiguity def start(self, data): for tree in data: assert tree.data == 'ambiguous' or tree.data == 'full' def ambiguous(self, data): visited_ambiguous[0] = True assert len(data) == 3 assert data[0].data == 'ab' assert data[1].data == 'bc' assert data[2].data == 'cd' return self.tree_class('ambiguous', data) def full(self, data): visited_full[0] = True assert len(data) == 1 assert data[0].data == 'abcd' return self.tree_class('full', data) grammar = """ start: ab bc cd -> ambiguous | abcd -> full !ab: "A" "B"? !bc: "B"? "C"? !cd: "C"? "D" !abcd: "ABCD" """ l = Lark(grammar, parser='earley', ambiguity='forest') forest = l.parse('ABCD') tree = CustomTransformer(resolve_ambiguity=False).transform(forest) self.assertTrue(visited_ambiguous[0]) self.assertTrue(visited_full[0]) def test_transformation(self): class CustomTransformer(TreeForestTransformer): def __default__(self, name, data): result = [] for item in data: if isinstance(item, list): result += item else: result.append(item) return result def __default_token__(self, node): return node.lower() def __default_ambig__(self, name, data): return data[0] result = CustomTransformer(resolve_ambiguity=False).transform(self.forest) expected = ['a', 'b', 'c', 'd'] self.assertEqual(result, expected) if __name__ == '__main__': unittest.main() lark-1.2.2/tests/test_tree_templates.py000066400000000000000000000211271465673407200202560ustar00rootroot00000000000000from __future__ import absolute_import import unittest from copy import deepcopy from lark import Lark, Tree, Token from lark.exceptions import MissingVariableError from lark.tree_templates import TemplateConf, Template, TemplateTranslator SOME_NON_TEMPLATED_STRING = "foo bar" SOME_TEMPLATE_NAME = "thing" SOME_TEMPLATE_STRING = f"${SOME_TEMPLATE_NAME}" SOME_NON_STRING = 12345 SOME_TEMPLATING_GRAMMAR = r""" start: DASHES? foo DASHES? bar DASHES: "--" foo: "foo" | TEMPLATE_NAME -> var bar: "bar" | TEMPLATE_NAME -> var TEMPLATE_NAME: "$" NAME NAME: /[^\W\d]\w*/ %ignore /[\t \f]+/ // WS """ SOME_FOO_TEMPLATE = f"{SOME_TEMPLATE_STRING} bar" SOME_BAR_TEMPLATE = f"foo {SOME_TEMPLATE_STRING}" SOME_NON_TEMPLATE_TREE = Tree("foo", children=["hi"]) __all__ = [ "TestTreeTemplatesConf", "TestTreeTemplatesTemplateTranslator", "TestTreeTemplatesTemplate", "TestTreeTemplatesTemplateDefaultConf", ] class TestTreeTemplatesConf(unittest.TestCase): parser = Lark(SOME_TEMPLATING_GRAMMAR) def test_conf_test_var__not_var(self): conf = TemplateConf(self.parser.parse) non_templates = { "non-templated string": SOME_NON_TEMPLATED_STRING, "non-var tree": Tree("stmt", children=[]), "var tree, non-templated string": Tree( "var", children=[SOME_NON_TEMPLATED_STRING] ), "var tree, templated string not first child": Tree( "var", children=[SOME_NON_TEMPLATED_STRING, SOME_TEMPLATE_STRING] ), "var tree, first child not string": Tree("var", children=[SOME_NON_STRING]), "var tree, no children": Tree("var", children=[]), } for description, test_case in non_templates.items(): with self.subTest(msg=description): self.assertIsNone(conf.test_var(test_case)) def test_conf_test_var__is_var(self): conf = TemplateConf(self.parser.parse) non_templates = { "templated string": SOME_TEMPLATE_STRING, "var tree, non-templated string": Tree( "var", children=[SOME_TEMPLATE_STRING] ), } for description, test_case in non_templates.items(): with self.subTest(msg=description): self.assertEqual(SOME_TEMPLATE_NAME, conf.test_var(test_case)) def test_conf_call__same_tree(self): conf = TemplateConf(self.parser.parse) explicitly_parsed = self.parser.parse(SOME_FOO_TEMPLATE) non_templates = { "to be parsed": SOME_FOO_TEMPLATE, "already parsed": explicitly_parsed, } for description, test_case in non_templates.items(): with self.subTest(msg=description): template = conf(test_case) self.assertEqual(explicitly_parsed, template.tree) def test_template_match__default_conf_match_same_tree__empty_dictionary(self): template = Template(SOME_NON_TEMPLATE_TREE) self.assertEqual({}, template.match(SOME_NON_TEMPLATE_TREE)) def test_template_match__only_tree(self): "This test might become irrelevant in the future" template_tree = Tree('bar', [Tree("var", children=["$foo"])]) template = Template(template_tree) self.assertRaises(TypeError, template.match, Tree('bar', ['BAD'])) class TestTreeTemplatesTemplate(unittest.TestCase): parser = Lark(SOME_TEMPLATING_GRAMMAR) conf = TemplateConf(parser.parse) def test_template_match__same_tree_no_template__empty_dictionary(self): template = Template(SOME_NON_TEMPLATE_TREE, conf=self.conf) self.assertEqual({}, template.match(SOME_NON_TEMPLATE_TREE)) def test_template_match__different_tree_no_template__none(self): template = Template(SOME_NON_TEMPLATE_TREE, conf=self.conf) self.assertIsNone(template.match(Tree("foo", children=["bye"]))) def test_template_match__no_template__empty_dictionary(self): tree = self.parser.parse(SOME_NON_TEMPLATED_STRING) template = Template(tree, conf=self.conf) non_templates = { "un-parsed string": SOME_NON_TEMPLATED_STRING, "parsed tree": tree, } for description, test_case in non_templates.items(): with self.subTest(msg=description): self.assertEqual({}, template.match(test_case)) def test_template_match__with_template__empty_dictionary(self): tree = self.parser.parse(SOME_FOO_TEMPLATE) template = Template(tree, conf=self.conf) non_templates = {"un-parsed string": SOME_FOO_TEMPLATE, "parsed tree": tree} expected_result = {SOME_TEMPLATE_NAME: tree.children[0]} for description, test_case in non_templates.items(): with self.subTest(msg=description): self.assertEqual(expected_result, template.match(test_case)) def test_template_match__different_tree__none(self): tree = self.parser.parse(SOME_FOO_TEMPLATE) template = Template(tree, conf=self.conf) non_templates = { "un-parsed string": SOME_BAR_TEMPLATE, "parsed tree": self.parser.parse(SOME_BAR_TEMPLATE), } for description, test_case in non_templates.items(): with self.subTest(msg=description): self.assertIsNone(template.match(test_case)) def test_template_search__same_tree_no_template__empty_generator(self): template = Template(SOME_NON_TEMPLATE_TREE, conf=self.conf) self.assertEqual([], list(template.search(SOME_NON_TEMPLATE_TREE))) def test_template_search__same_tree_as_child__empty_generator(self): template = Template(SOME_NON_TEMPLATE_TREE, conf=self.conf) self.assertEqual( [], list(template.search(Tree("root", children=[SOME_NON_TEMPLATE_TREE]))) ) def test_template_search__with_template__matched_result_with_parent_tree(self): tree = self.parser.parse(SOME_FOO_TEMPLATE) template = Template(tree, conf=self.conf) non_templates = {"un-parsed string": SOME_FOO_TEMPLATE, "parsed tree": tree} expected_result = [(tree, {SOME_TEMPLATE_NAME: tree.children[0]})] for description, test_case in non_templates.items(): with self.subTest(msg=description): self.assertEqual(expected_result, list(template.search(test_case))) def test_template_apply_vars__empty__exception(self): tree = self.parser.parse(SOME_FOO_TEMPLATE) template = Template(tree, conf=self.conf) with self.assertRaises(MissingVariableError): template.apply_vars({}) def test_template_apply_vars__no_matching_vars__exception(self): tree = self.parser.parse(SOME_FOO_TEMPLATE) template = Template(tree, conf=self.conf) with self.assertRaises(MissingVariableError): template.apply_vars({"not used": SOME_NON_TEMPLATE_TREE}) def test_template_apply_vars__matching_vars__template_replaced(self): tree = self.parser.parse(SOME_FOO_TEMPLATE) template = Template(tree, conf=self.conf) expected_result = deepcopy(tree) expected_result.children[0] = SOME_NON_TEMPLATE_TREE self.assertEqual( expected_result, template.apply_vars({SOME_TEMPLATE_NAME: SOME_NON_TEMPLATE_TREE}), ) class TestTreeTemplatesTemplateTranslator(unittest.TestCase): parser = Lark(SOME_TEMPLATING_GRAMMAR) conf = TemplateConf(parser.parse) def test_translate__empty_translations__same_tree(self): # no translations to match, so doesn't replace anything & can't error translator = TemplateTranslator({}) tree = self.parser.parse(SOME_FOO_TEMPLATE) expected_result = deepcopy(tree) self.assertEqual(expected_result, translator.translate(tree)) def test_translate__one_translations__same_tree(self): translations = { self.conf(f"${SOME_TEMPLATE_NAME} bar"): self.conf( f"--${SOME_TEMPLATE_NAME}-- bar" ) } translator = TemplateTranslator(translations) tree = self.parser.parse(SOME_NON_TEMPLATED_STRING) expected_result = deepcopy(tree) expected_result.children.insert(0, Token("DASHES", "--")) expected_result.children.insert(2, Token("DASHES", "--")) self.assertEqual(expected_result, translator.translate(tree)) class TestTreeTemplatesTemplateDefaultConf(unittest.TestCase): def test_template_match__match_same_tree__empty_dictionary(self): tree = Tree("foo", children=["hi"]) template = Template(tree) self.assertEqual({}, template.match(tree)) if __name__ == "__main__": unittest.main() lark-1.2.2/tests/test_trees.py000066400000000000000000000351021465673407200163610ustar00rootroot00000000000000from __future__ import absolute_import import unittest from functools import partial, reduce, partialmethod from operator import add, mul from unittest import TestCase import copy import pickle import functools from lark.tree import Tree from lark.lexer import Token from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard, Transformer_InPlace, \ Transformer_InPlaceRecursive, Transformer_NonRecursive, merge_transformers class TestTrees(TestCase): def setUp(self): self.tree1 = Tree('a', [Tree(x, y) for x, y in zip('bcd', 'xyz')]) def test_eq(self): assert self.tree1 == self.tree1 assert self.tree1 != 0 def test_copy(self): assert self.tree1 == copy.copy(self.tree1) def test_deepcopy(self): assert self.tree1 == copy.deepcopy(self.tree1) def test_pickle(self): s = copy.deepcopy(self.tree1) data = pickle.dumps(s, protocol=pickle.HIGHEST_PROTOCOL) assert pickle.loads(data) == s def test_repr_runnable(self): assert self.tree1 == eval(repr(self.tree1)) def test_iter_subtrees(self): expected = [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z'), Tree('a', [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')])] nodes = list(self.tree1.iter_subtrees()) self.assertEqual(nodes, expected) def test_iter_subtrees_topdown(self): expected = [Tree('a', [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')]), Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')] nodes = list(self.tree1.iter_subtrees_topdown()) self.assertEqual(nodes, expected) def test_visitor(self): class Visitor1(Visitor): def __init__(self): self.nodes=[] def __default__(self,tree): self.nodes.append(tree) class Visitor1_Recursive(Visitor_Recursive): def __init__(self): self.nodes=[] def __default__(self,tree): self.nodes.append(tree) visitor1=Visitor1() visitor1_recursive=Visitor1_Recursive() expected_top_down = [Tree('a', [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')]), Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')] expected_botton_up= [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z'), Tree('a', [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')])] visitor1.visit(self.tree1) self.assertEqual(visitor1.nodes,expected_botton_up) visitor1_recursive.visit(self.tree1) self.assertEqual(visitor1_recursive.nodes,expected_botton_up) visitor1.nodes=[] visitor1_recursive.nodes=[] visitor1.visit_topdown(self.tree1) self.assertEqual(visitor1.nodes,expected_top_down) visitor1_recursive.visit_topdown(self.tree1) self.assertEqual(visitor1_recursive.nodes,expected_top_down) def test_interp(self): t = Tree('a', [Tree('b', []), Tree('c', []), 'd']) class Interp1(Interpreter): def a(self, tree): return self.visit_children(tree) + ['e'] def b(self, tree): return 'B' def c(self, tree): return 'C' self.assertEqual(Interp1().visit(t), list('BCde')) class Interp2(Interpreter): @visit_children_decor def a(self, values): return values + ['e'] def b(self, tree): return 'B' def c(self, tree): return 'C' self.assertEqual(Interp2().visit(t), list('BCde')) class Interp3(Interpreter): def b(self, tree): return 'B' def c(self, tree): return 'C' self.assertEqual(Interp3().visit(t), list('BCd')) def test_transformer(self): t = Tree('add', [Tree('sub', [Tree('i', ['3']), Tree('f', ['1.1'])]), Tree('i', ['1'])]) class T(Transformer): i = v_args(inline=True)(int) f = v_args(inline=True)(float) sub = lambda self, values: values[0] - values[1] def add(self, values): return sum(values) res = T().transform(t) self.assertEqual(res, 2.9) @v_args(inline=True) class T(Transformer): i = int f = float sub = lambda self, a, b: a-b def add(self, a, b): return a + b res = T().transform(t) self.assertEqual(res, 2.9) @v_args(inline=True) class T(Transformer): i = int f = float from operator import sub, add res = T().transform(t) self.assertEqual(res, 2.9) def test_vargs(self): @v_args() class MyTransformer(Transformer): @staticmethod def integer(args): return 1 # some code here @classmethod def integer2(cls, args): return 2 # some code here hello = staticmethod(lambda args: 'hello') x = MyTransformer().transform( Tree('integer', [2])) self.assertEqual(x, 1) x = MyTransformer().transform( Tree('integer2', [2])) self.assertEqual(x, 2) x = MyTransformer().transform( Tree('hello', [2])) self.assertEqual(x, 'hello') def test_smart_decorator(self): class OtherClass: @staticmethod def ab_staticmethod(a, b): return (a, b) @classmethod def ab_classmethod(cls, a, b): assert cls is OtherClass, cls return (a, b) def ab_method(self, a, b): assert isinstance(self, OtherClass), self return (a, b) @v_args(meta=True) class OtherTransformer(Transformer): @staticmethod def ab_staticmethod(meta, children): return tuple(children) @classmethod def ab_classmethod(cls, meta, children): assert cls is OtherTransformer, cls return tuple(children) def ab_method(self, meta, children): assert isinstance(self, OtherTransformer), self return tuple(children) class CustomCallable: def __call__(self, *args, **kwargs): assert isinstance(self, CustomCallable) return args oc_instance = OtherClass() ot_instance = OtherTransformer() def ab_for_partialmethod(self, a, b): assert isinstance(self, TestCls) return a, b @v_args(inline=True) class TestCls(Transformer): @staticmethod def ab_staticmethod(a, b): return (a, b) @classmethod def ab_classmethod(cls, a, b): assert cls is TestCls return (a, b) def ab_method(self, a, b): assert isinstance(self, TestCls) return (a, b) oc_class_ab_staticmethod = oc_instance.ab_staticmethod oc_class_ab_classmethod = oc_instance.ab_classmethod oc_ab_staticmethod = oc_instance.ab_staticmethod oc_ab_classmethod = oc_instance.ab_classmethod oc_ab_method = oc_instance.ab_method ot_class_ab_staticmethod = ot_instance.ab_staticmethod ot_class_ab_classmethod = ot_instance.ab_classmethod ot_ab_staticmethod = ot_instance.ab_staticmethod ot_ab_classmethod = ot_instance.ab_classmethod ot_ab_method = ot_instance.ab_method ab_partialmethod = partialmethod(ab_for_partialmethod, 1) set_union = set(["a"]).union static_add = staticmethod(add) partial_reduce_mul = partial(reduce, mul) custom_callable = CustomCallable() test_instance = TestCls() expected = { "ab_classmethod": ([1, 2], (1, 2)), "ab_staticmethod": ([1, 2], (1, 2)), "ab_method": ([1, 2], (1, 2)), "oc_ab_classmethod": ([1, 2], (1, 2)), "oc_class_ab_classmethod": ([1, 2], (1, 2)), # AFAIK, these two cases are impossible to deal with. `oc_instance.ab_staticmethod` returns an actual # function object that is impossible to distinguish from a normally defined method. # (i.e. `staticmethod(f).__get__(?, ?) is f` is True) # "oc_ab_staticmethod": ([1, 2], (1, 2)), # "oc_class_ab_staticmethod": ([1, 2], (1, 2)), "oc_ab_method": ([1, 2], (1, 2)), "ot_ab_classmethod": ([1, 2], (1, 2)), "ot_class_ab_classmethod": ([1, 2], (1, 2)), # Same as above # "ot_ab_staticmethod": ([1, 2], (1, 2)), # "ot_class_ab_staticmethod": ([1, 2], (1, 2)), "ot_ab_method": ([1, 2], (1, 2)), "ab_partialmethod": ([2], (1, 2)), "custom_callable": ([1, 2], (1, 2)), "set_union": ([["b"], ["c"]], {"a", "b", "c"}), "static_add": ([1, 2], 3), "partial_reduce_mul": ([[1, 2]], 2), } non_static = {"ab_method", "ab_partialmethod"} for method_name, (children, expected_result) in expected.items(): not_inline = "ot" in method_name result = test_instance.transform(Tree(method_name, children)) self.assertEqual(result, expected_result) if not_inline: result = getattr(test_instance, method_name)(None, children) else: result = getattr(test_instance, method_name)(*children) self.assertEqual(result, expected_result) if method_name not in non_static: if not_inline: result = getattr(TestCls, method_name)(None, children) else: result = getattr(TestCls, method_name)(*children) self.assertEqual(result, expected_result) def test_vargs_set_name(self): # Test with cached_property if available. That actually uses __set_name__ prop = getattr(functools, "cached_property", property) class T(Transformer): @v_args(inline=True) @prop # Not sure why you would ever want to use a property here, but we support it def test(self): return lambda a, b: (self, a, b) t = T() self.assertEqual(t.transform(Tree("test", [1, 2])), (t, 1, 2)) def test_inline_static(self): @v_args(inline=True) class T(Transformer): @staticmethod def test(a, b): return a + b x = T().transform(Tree('test', ['a', 'b'])) self.assertEqual(x, 'ab') def test_vargs_override(self): t = Tree('add', [Tree('sub', [Tree('i', ['3']), Tree('f', ['1.1'])]), Tree('i', ['1'])]) @v_args(inline=True) class T(Transformer): i = int f = float sub = lambda self, a, b: a-b not_a_method = {'other': 'stuff'} @v_args(inline=False) def add(self, values): return sum(values) res = T().transform(t) self.assertEqual(res, 2.9) def test_partial(self): tree = Tree("start", [Tree("a", ["test1"]), Tree("b", ["test2"])]) def test(prefix, s, postfix): return prefix + s.upper() + postfix @v_args(inline=True) class T(Transformer): a = functools.partial(test, "@", postfix="!") b = functools.partial(lambda s: s + "!") res = T().transform(tree) assert res.children == ["@TEST1!", "test2!"] def test_discard(self): class MyTransformer(Transformer): def a(self, args): return 1 # some code here def b(cls, args): return Discard t = Tree('root', [ Tree('b', []), Tree('a', []), Tree('b', []), Tree('c', []), Tree('b', []), ]) t2 = Tree('root', [1, Tree('c', [])]) x = MyTransformer().transform( t ) self.assertEqual(x, t2) def test_transformer_variants(self): tree = Tree('start', [ Tree('add', [Token('N', '1'), Token('N', '2'), Token('IGNORE_TOKEN', '4')]), Tree('add', [Token('N', '3'), Token('N', '4')]), Tree('ignore_tree', [Token('DO', 'NOT PANIC')]), ]) for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): class T(base): def add(self, children): return sum(children) def N(self, token): return int(token) def ignore_tree(self, children): return Discard def IGNORE_TOKEN(self, token): return Discard copied = copy.deepcopy(tree) result = T().transform(copied) self.assertEqual(result, Tree('start', [3, 7])) def test_merge_transformers(self): tree = Tree('start', [ Tree('main', [ Token("A", '1'), Token("B", '2') ]), Tree("module__main", [ Token("A", "2"), Token("B", "3") ]) ]) class T1(Transformer): A = int B = int main = sum start = list def module__main(self, children): return sum(children) class T2(Transformer): A = int B = int main = sum start = list class T3(Transformer): def main(self, children): return sum(children) class T4(Transformer): main = sum t1_res = T1().transform(tree) composed_res = merge_transformers(T2(), module=T3()).transform(tree) self.assertEqual(t1_res, composed_res) composed_res2 = merge_transformers(T2(), module=T4()).transform(tree) self.assertEqual(t1_res, composed_res2) with self.assertRaises(AttributeError): merge_transformers(T1(), module=T3()) def test_transform_token(self): class MyTransformer(Transformer): def INT(self, value): return int(value) t = Token('INT', '123') assert MyTransformer().transform(t) == 123 class MyTransformer(Transformer): def INT(self, value): return Discard assert MyTransformer().transform(t) is None if __name__ == '__main__': unittest.main() lark-1.2.2/tox.ini000066400000000000000000000015171465673407200140020ustar00rootroot00000000000000[tox] envlist = lint, type, py38, py39, py310, py311, py312, py313, pypy3 skip_missing_interpreters = true [testenv] whitelist_externals = git deps = -rtest-requirements.txt passenv = TERM # to always force recreation and avoid unexpected side effects recreate = True # Require since the commands use `git` allowlist_externals = git commands = git submodule sync -q git submodule update --init python -m tests {posargs} [testenv:type] description = run type check on code base skip_install = true recreate = false deps = mypy==1.10 interegular>=0.3.1,<0.4.0 types-atomicwrites types-regex rich<=13.4.1 commands = mypy [testenv:lint] description = run linters on code base skip_install = true recreate = false deps = pre-commit commands = pre-commit run --all-files --show-diff-on-failure