pax_global_header00006660000000000000000000000064141140653160014513gustar00rootroot0000000000000052 comment=be4746f73dfa9514f06dd7f93bf4be9a47d5d490 mwparserfromhell-0.6.3/000077500000000000000000000000001411406531600151125ustar00rootroot00000000000000mwparserfromhell-0.6.3/.clang-format000066400000000000000000000005321411406531600174650ustar00rootroot00000000000000BasedOnStyle: LLVM AlignConsecutiveMacros: AcrossEmptyLines AllowShortFunctionsOnASingleLine: Inline AlwaysBreakAfterReturnType: TopLevelDefinitions BinPackArguments: false BinPackParameters: false BreakBeforeBraces: Linux ColumnLimit: 88 IndentPPDirectives: AfterHash IndentWidth: 4 SpaceAfterCStyleCast: true StatementMacros: - PyObject_HEAD mwparserfromhell-0.6.3/.coveragerc000066400000000000000000000002171411406531600172330ustar00rootroot00000000000000[report] exclude_lines = pragma: no cover raise NotImplementedError() partial_branches = pragma: no branch if py3k: if not py3k: mwparserfromhell-0.6.3/.github/000077500000000000000000000000001411406531600164525ustar00rootroot00000000000000mwparserfromhell-0.6.3/.github/workflows/000077500000000000000000000000001411406531600205075ustar00rootroot00000000000000mwparserfromhell-0.6.3/.github/workflows/build-linux-wheels.yml000066400000000000000000000032511411406531600247540ustar00rootroot00000000000000name: Build manylinux1 wheels on: push jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Build manylinux1 Python wheels uses: RalfG/python-wheels-manylinux-build@e645ea95dae94f606ab25f95f44d3a2caf55764c with: python-versions: 'cp35-cp35m cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39' pip-wheel-args: '-w ./wheelhouse --no-deps' - name: Move to dist/ run: | mkdir -p dist cp -v wheelhouse/*-manylinux*.whl dist/ - name: Publish package to PyPI # Only actually publish if a new tag was pushed if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@37e305e7413032d8422456179fee28fac7d25187 with: user: __token__ password: ${{ secrets.pypi_password }} build_aarch64: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: docker/setup-qemu-action@v1 name: Set up QEMU - name: Build manylinux aarch64 Python wheels uses: RalfG/python-wheels-manylinux-build@v0.3.4-manylinux2014_aarch64 with: python-versions: 'cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39' pip-wheel-args: '-w ./wheelhouse --no-deps' - name: Move to dist/ run: | mkdir -p dist cp -v wheelhouse/*-manylinux*.whl dist/ - name: Publish package to PyPI # Only actually publish if a new tag was pushed if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@37e305e7413032d8422456179fee28fac7d25187 with: user: __token__ password: ${{ secrets.pypi_password }} mwparserfromhell-0.6.3/.github/workflows/build-macos-wheels.yml000066400000000000000000000016511411406531600247210ustar00rootroot00000000000000name: Build macOS wheels on: push jobs: build: runs-on: macos-latest strategy: matrix: python-version: [3.5, 3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v1 with: python-version: ${{ matrix.python-version }} - name: Build wheels run: | python -m pip install --upgrade pip wheel setuptools pip wheel . -w dist/ ls dist/ - name: Publish package to PyPI # Only actually publish if a new tag was pushed if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') # We can't use the pypa action because of https://github.com/pypa/gh-action-pypi-publish/issues/15 run: | pip install twine TWINE_USERNAME="__token__" \ TWINE_PASSWORD="${{ secrets.pypi_password }}" \ twine upload dist/* mwparserfromhell-0.6.3/.gitignore000066400000000000000000000002501411406531600170770ustar00rootroot00000000000000*.pyc *.pyd *.so *.dll *.egg *.egg-info .coverage .eggs .DS_Store __pycache__ build dist docs/_build scripts/*.log htmlcov/ compile_commands.json .idea/ .pytest_cache/ mwparserfromhell-0.6.3/.pre-commit-config.yaml000066400000000000000000000003571411406531600214000ustar00rootroot00000000000000repos: - repo: https://github.com/psf/black rev: 21.8b0 hooks: - id: black - repo: https://github.com/doublify/pre-commit-clang-format rev: 62302476d0da01515660132d76902359bed0f782 hooks: - id: clang-format mwparserfromhell-0.6.3/.travis.yml000066400000000000000000000005371411406531600172300ustar00rootroot00000000000000dist: xenial language: python python: - 3.5 - 3.6 - 3.7 - 3.8 - 3.9 arch: - amd64 - ppc64le install: - pip install coveralls pytest - python setup.py develop script: - coverage run --source=mwparserfromhell -m pytest after_success: - coveralls env: matrix: - WITHOUT_EXTENSION=0 - WITHOUT_EXTENSION=1 mwparserfromhell-0.6.3/CHANGELOG000066400000000000000000000264541411406531600163370ustar00rootroot00000000000000v0.6.3 (released September 2, 2021): - Added Linux AArch64 wheels. (#276) - Fixed C integer conversion, manifesting as parsing errors on big-endian platforms. (#277) v0.6.2 (released May 16, 2021): - Improved parsing of external links. (#232) - Fixed parsing of nested wikilinks. - Ported tests to pytest. (#237) - Moved mwparserfromhell package to src/ dir. - There was no 0.6.1 release due to a packaging error. v0.6 (released December 21, 2020): Thanks to everyone for their patience with this release! - Breaking change: dropped support for end-of-life Python 2.7 and 3.4. - Added support for Python 3.8 and 3.9. - Added binary wheels for Linux and macOS. - Updated Wikicode.matches() to recognize underscores as being equivalent to spaces. (#216) - Added a 'default' parameter to Template.get(), and implement dict-style item access for template parameters. (#252) - Fixed a rare parsing bug involving deeply nested style tags. (#224) - Fixed parsing of section headings inside templates. (#233) - Updated HTML tag definitions. - Internal refactoring and cleanup. v0.5.4 (released May 15, 2019): - Fixed an unlikely crash in the C tokenizer when interrupted while parsing a heading. v0.5.3 (released March 30, 2019): - Fixed manual construction of Node objects, previously unsupported. (#214) - Fixed Wikicode transformation methods (replace(), remove(), etc.) when passed an empty section as an argument. (#212) - Fixed the parser getting stuck inside malformed tables. (#206) v0.5.2 (released November 1, 2018): - Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. (#199, #204) - Fixed signals getting stuck inside the C tokenizer until parsing finishes, in pathological cases. (#206) - Fixed not being considered a single-only tag. (#200) - Fixed a C tokenizer crash on Python 3.7 when compiled with assertions. (#208) - Cleaned up some minor documentation issues. (#207) v0.5.1 (released March 3, 2018): - Improved behavior when adding parameters to templates (via Template.add()) with poorly formatted whitespace conventions. (#185) - Fixed the parser getting stuck in deeply nested HTML tags with unclosed, quoted attributes. (#190) v0.5 (released June 23, 2017): - Added Wikicode.contains() to determine whether a Node or Wikicode object is contained within another Wikicode object. - Added Wikicode.get_ancestors() and Wikicode.get_parent() to find all ancestors and the direct parent of a Node, respectively. - Fixed a long-standing performance issue with deeply nested, invalid syntax (issue #42). The parser should be much faster on certain complex pages. The "max cycle" restriction has also been removed, so some situations where templates at the end of a page were being skipped are now resolved. - Made Template.remove(keep_field=True) behave more reasonably when the parameter is already empty. - Added the keep_template_params argument to Wikicode.strip_code(). If True, then template parameters will be preserved in the output. - Wikicode objects can now be pickled properly (fixed infinite recursion error on incompletely-constructed StringMixIn subclasses). - Fixed Wikicode.matches()'s behavior on iterables besides lists and tuples. - Fixed len() sometimes raising ValueError on empty node lists. - Fixed a rare parsing bug involving self-closing tags inside the attributes of unpaired tags. - Fixed release script after changes to PyPI. v0.4.4 (released December 30, 2016): - Added support for Python 3.6. - Fixed parsing bugs involving: - wikitables nested in templates; - wikitable error recovery when unable to recurse; - templates nested in template parameters before other parameters. - Fixed parsing file-like objects. - Made builds deterministic. - Documented caveats. v0.4.3 (released October 29, 2015): - Added Windows binaries for Python 3.5. - Fixed edge cases involving wikilinks inside of external links and vice versa. - Fixed a C tokenizer crash when a keyboard interrupt happens while parsing. v0.4.2 (released July 30, 2015): - Fixed setup script not including header files in releases. - Fixed Windows binary uploads. v0.4.1 (released July 30, 2015): - The process for building Windows binaries has been fixed, and these should be distributed along with new releases. Windows users can now take advantage of C speedups without having a compiler of their own. - Added support for Python 3.5. - '<' and '>' are now disallowed in wikilink titles and template names. This includes when denoting tags, but not comments. - Fixed the behavior of preserve_spacing in Template.add() and keep_field in Template.remove() on parameters with hidden keys. - Removed _ListProxy.detach(). SmartLists now use weak references and their children are garbage-collected properly. - Fixed parser bugs involving: - templates with completely blank names; - templates with newlines and comments. - Heavy refactoring and fixes to the C tokenizer, including: - corrected a design flaw in text handling, allowing for substantial speed improvements when parsing long strings of plain text; - implemented new Python 3.3 PEP 393 Unicode APIs. - Fixed various bugs in SmartList, including one that was causing memory issues on 64-bit builds of Python 2 on Windows. - Fixed some bugs in the release scripts. v0.4 (released May 23, 2015): - The parser now falls back on pure Python mode if C extensions cannot be built. This fixes an issue that prevented some Windows users from installing the parser. - Added support for parsing wikicode tables (patches by David Winegar). - Added a script to test for memory leaks in scripts/memtest.py. - Added a script to do releases in scripts/release.sh. - skip_style_tags can now be passed to mwparserfromhell.parse() (previously, only Parser().parse() allowed it). - The 'recursive' argument to Wikicode's filter methods now accepts a third option, RECURSE_OTHERS, which recurses over all children except instances of 'forcetype' (for example, `code.filter_templates(code.RECURSE_OTHERS)` returns all un-nested templates). - The parser now understands HTML tag attributes quoted with single quotes. When setting a tag attribute's value, quotes will be added if necessary. As part of this, Attribute's 'quoted' attribute has been changed to 'quotes', and is now either a string or None. - Calling Template.remove() with a Parameter object that is not part of the template now raises ValueError instead of doing nothing. - Parameters with non-integer keys can no longer be created with 'showkey=False', nor have the value of this attribute be set to False later. - _ListProxy.destroy() has been changed to _ListProxy.detach(), and now works in a more useful way. - If something goes wrong while parsing, ParserError will now be raised. Previously, the parser would produce an unclear BadRoute exception or allow an incorrect node tree to be build. - Fixed parser bugs involving: - nested tags; - comments in template names; - tags inside of tags. - Added tests to ensure that parsed trees convert back to wikicode without unintentional modifications. - Added support for a NOWEB environment variable, which disables a unit test that makes a web call. - Test coverage has been improved, and some minor related bugs have been fixed. - Updated and fixed some documentation. v0.3.3 (released April 22, 2014): - Added support for Python 2.6 and 3.4. - Template.has() is now passed 'ignore_empty=False' by default instead of True. This fixes a bug when adding parameters to templates with empty fields, and is a breaking change if you rely on the default behavior. - The 'matches' argument of Wikicode's filter methods now accepts a function (taking one argument, a Node, and returning a bool) in addition to a regex. - Re-added 'flat' argument to Wikicode.get_sections(), fixed the order in which it returns sections, and made it faster. - Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects instead of just a single string or Wikicode. - Given the frequency of issues with the (admittedly insufficient) tag parser, there's a temporary skip_style_tags argument to parse() that ignores '' and ''' until these issues are corrected. - Fixed a parser bug involving nested wikilinks and external links. - C code cleanup and speed improvements. v0.3.2 (released September 1, 2013): - Added support for Python 3.2 (along with current support for 3.3 and 2.7). - Renamed Template.remove()'s first argument from 'name' to 'param', which now accepts Parameter objects in addition to parameter name strings. v0.3.1 (released August 29, 2013): - Fixed a parser bug involving URLs nested inside other markup. - Fixed some typos. v0.3 (released August 24, 2013): - Added complete support for HTML Tags, including forms like foo, , and wiki-markup tags like bold ('''), italics (''), and lists (*, #, ; and :). - Added support for ExternalLinks (http://example.com/ and [http://example.com/ Example]). - Wikicode's filter methods are now passed 'recursive=True' by default instead of False. This is a breaking change if you rely on any filter() methods being non-recursive by default. - Added a matches() method to Wikicode for page/template name comparisons. - The 'obj' param of Wikicode.insert_before(), insert_after(), replace(), and remove() now accepts other Wikicode objects and strings representing parts of wikitext, instead of just nodes. These methods also make all possible substitutions instead of just one. - Renamed Template.has_param() to has() for consistency with Template's other methods; has_param() is now an alias. - The C tokenizer extension now works on Python 3 in addition to Python 2.7. - Various bugfixes, internal changes, and cleanup. v0.2 (released June 20, 2013): - The parser now fully supports Python 3 in addition to Python 2.7. - Added a C tokenizer extension that is significantly faster than its Python equivalent. It is enabled by default (if available) and can be toggled by setting `mwparserfromhell.parser.use_c` to a boolean value. - Added a complete set of unit tests covering parsing and wikicode manipulation. - Renamed Wikicode.filter_links() to filter_wikilinks() (applies to ifilter as well). - Added filter methods for Arguments, Comments, Headings, and HTMLEntities. - Added 'before' param to Template.add(); renamed 'force_nonconformity' to 'preserve_spacing'. - Added 'include_lead' param to Wikicode.get_sections(). - Removed 'flat' param from Wikicode.get_sections(). - Removed 'force_no_field' param from Template.remove(). - Added support for Travis CI. - Added note about Windows build issue in the README. - The tokenizer will limit itself to a realistic recursion depth to prevent errors and unreasonably long parse times. - Fixed how some nodes' attribute setters handle input. - Fixed multiple bugs in the tokenizer's handling of invalid markup. - Fixed bugs in the implementation of SmartList and StringMixIn. - Fixed some broken example code in the README; other copyedits. - Other bugfixes and code cleanup. v0.1.1 (released September 21, 2012): - Added support for Comments () and Wikilinks ([[foo]]). - Added corresponding ifilter_links() and filter_links() methods to Wikicode. - Fixed a bug when parsing incomplete templates. - Fixed strip_code() to affect the contents of headings. - Various copyedits in documentation and comments. v0.1 (released August 23, 2012): - Initial release. mwparserfromhell-0.6.3/LICENSE000066400000000000000000000020761411406531600161240ustar00rootroot00000000000000Copyright (C) 2012-2019 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. mwparserfromhell-0.6.3/MANIFEST.in000066400000000000000000000001321411406531600166440ustar00rootroot00000000000000include LICENSE CHANGELOG recursive-include src *.h recursive-include tests *.py *.mwtest mwparserfromhell-0.6.3/README.rst000066400000000000000000000201701411406531600166010ustar00rootroot00000000000000mwparserfromhell ================ .. image:: https://api.travis-ci.com/earwig/mwparserfromhell.svg?branch=develop :alt: Build Status :target: https://travis-ci.org/earwig/mwparserfromhell .. image:: https://img.shields.io/coveralls/earwig/mwparserfromhell/develop.svg :alt: Coverage Status :target: https://coveralls.io/r/earwig/mwparserfromhell **mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package that provides an easy-to-use and outrageously powerful parser for MediaWiki_ wikicode. It supports Python 3.5+. Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. Full documentation is available on ReadTheDocs_. Development occurs on GitHub_. Installation ------------ The easiest way to install the parser is through the `Python Package Index`_; you can install the latest release with ``pip install mwparserfromhell`` (`get pip`_). Make sure your pip is up-to-date first, especially on Windows. Alternatively, get the latest development version:: git clone https://github.com/earwig/mwparserfromhell.git cd mwparserfromhell python setup.py install The comprehensive unit testing suite requires `pytest`_ (``pip install pytest``) and can be run with ``python -m pytest``. Usage ----- Normal usage is rather straightforward (where ``text`` is page text): >>> import mwparserfromhell >>> wikicode = mwparserfromhell.parse(text) ``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an ordinary ``str`` object with some extra methods. For example: >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" >>> wikicode = mwparserfromhell.parse(text) >>> print(wikicode) I has a template! {{foo|bar|baz|eggs=spam}} See it? >>> templates = wikicode.filter_templates() >>> print(templates) ['{{foo|bar|baz|eggs=spam}}'] >>> template = templates[0] >>> print(template.name) foo >>> print(template.params) ['bar', 'baz', 'eggs=spam'] >>> print(template.get(1).value) bar >>> print(template.get("eggs").value) spam Since nodes can contain other nodes, getting nested templates is trivial: >>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" >>> mwparserfromhell.parse(text).filter_templates() ['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] You can also pass ``recursive=False`` to ``filter_templates()`` and explore templates manually. This is possible because nodes can contain additional ``Wikicode`` objects: >>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") >>> print(code.filter_templates(recursive=False)) ['{{foo|this {{includes a|template}}}}'] >>> foo = code.filter_templates(recursive=False)[0] >>> print(foo.get(1).value) this {{includes a|template}} >>> print(foo.get(1).value.filter_templates()[0]) {{includes a|template}} >>> print(foo.get(1).value.filter_templates()[0].get(1).value) template Templates can be easily modified to add, remove, or alter params. ``Wikicode`` objects can be treated like lists, with ``append()``, ``insert()``, ``remove()``, ``replace()``, and more. They also have a ``matches()`` method for comparing page or template names, which takes care of capitalization and whitespace: >>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" >>> code = mwparserfromhell.parse(text) >>> for template in code.filter_templates(): ... if template.name.matches("Cleanup") and not template.has("date"): ... template.add("date", "July 2012") ... >>> print(code) {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} >>> code.replace("{{uncategorized}}", "{{bar-stub}}") >>> print(code) {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} >>> print(code.filter_templates()) ['{{cleanup|date=July 2012}}', '{{bar-stub}}'] You can then convert ``code`` back into a regular ``str`` object (for saving the page!) by calling ``str()`` on it: >>> text = str(code) >>> print(text) {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} >>> text == code True Limitations ----------- While the MediaWiki parser generates HTML and has access to the contents of templates, among other things, mwparserfromhell acts as a direct interface to the source code only. This has several implications: * Syntax elements produced by a template transclusion cannot be detected. For example, imagine a hypothetical page ``"Template:End-bold"`` that contained the text ````. While MediaWiki would correctly understand that ``foobar{{end-bold}}`` translates to ``foobar``, mwparserfromhell has no way of examining the contents of ``{{end-bold}}``. Instead, it would treat the bold tag as unfinished, possibly extending further down the page. * Templates adjacent to external links, as in ``http://example.com{{foo}}``, are considered part of the link. In reality, this would depend on the contents of the template. * When different syntax elements cross over each other, as in ``{{echo|''Hello}}, world!''``, the parser gets confused because this cannot be represented by an ordinary syntax tree. Instead, the parser will treat the first syntax construct as plain text. In this case, only the italic tag would be properly parsed. **Workaround:** Since this commonly occurs with text formatting and text formatting is often not of interest to users, you may pass *skip_style_tags=True* to ``mwparserfromhell.parse()``. This treats ``''`` and ``'''`` as plain text. A future version of mwparserfromhell may include multiple parsing modes to get around this restriction more sensibly. Additionally, the parser lacks awareness of certain wiki-specific settings: * `Word-ending links`_ are not supported, since the linktrail rules are language-specific. * Localized namespace names aren't recognized, so file links (such as ``[[File:...]]``) are treated as regular wikilinks. * Anything that looks like an XML tag is treated as a tag, even if it is not a recognized tag name, since the list of valid tags depends on loaded MediaWiki extensions. Integration ----------- ``mwparserfromhell`` is used by and originally developed for EarwigBot_; ``Page`` objects have a ``parse`` method that essentially calls ``mwparserfromhell.parse()`` on ``page.get()``. If you're using Pywikibot_, your code might look like this: .. code-block:: python import mwparserfromhell import pywikibot def parse(title): site = pywikibot.Site() page = pywikibot.Page(site, title) text = page.get() return mwparserfromhell.parse(text) If you're not using a library, you can parse any page with the following Python 3 code (using the API_ and the requests_ library): .. code-block:: python import mwparserfromhell import requests API_URL = "https://en.wikipedia.org/w/api.php" def parse(title): params = { "action": "query", "prop": "revisions", "rvprop": "content", "rvslots": "main", "rvlimit": 1, "titles": title, "format": "json", "formatversion": "2", } headers = {"User-Agent": "My-Bot-Name/1.0"} req = requests.get(API_URL, headers=headers, params=params) res = req.json() revision = res["query"]["pages"][0]["revisions"][0] text = revision["slots"]["main"]["content"] return mwparserfromhell.parse(text) .. _MediaWiki: https://www.mediawiki.org .. _ReadTheDocs: https://mwparserfromhell.readthedocs.io .. _Earwig: https://en.wikipedia.org/wiki/User:The_Earwig .. _Σ: https://en.wikipedia.org/wiki/User:%CE%A3 .. _Legoktm: https://en.wikipedia.org/wiki/User:Legoktm .. _GitHub: https://github.com/earwig/mwparserfromhell .. _Python Package Index: https://pypi.org/ .. _get pip: https://pypi.org/project/pip/ .. _pytest: https://docs.pytest.org/ .. _Word-ending links: https://www.mediawiki.org/wiki/Help:Links#linktrail .. _EarwigBot: https://github.com/earwig/earwigbot .. _Pywikibot: https://www.mediawiki.org/wiki/Manual:Pywikibot .. _API: https://www.mediawiki.org/wiki/API:Main_page .. _requests: https://2.python-requests.org mwparserfromhell-0.6.3/appveyor.yml000066400000000000000000000034211411406531600175020ustar00rootroot00000000000000# This config file is used by appveyor.com to build Windows release binaries version: 0.6.3-b{build} branches: only: - master - develop skip_tags: true image: Visual Studio 2019 environment: global: # See: http://stackoverflow.com/a/13751649/163740 WRAPPER: "cmd /E:ON /V:ON /C .\\scripts\\win_wrapper.cmd" PYEXE: "%WRAPPER% %PYTHON%\\python.exe" SETUPPY: "%PYEXE% setup.py --with-extension" PIP: "%PYEXE% -m pip" TWINE: "%PYEXE% -m twine" PYPI_USERNAME: "earwigbot" PYPI_PASSWORD: secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+ matrix: - PYTHON: "C:\\Python36" PYTHON_VERSION: "3.6" PYTHON_ARCH: "32" - PYTHON: "C:\\Python36-x64" PYTHON_VERSION: "3.6" PYTHON_ARCH: "64" - PYTHON: "C:\\Python37" PYTHON_VERSION: "3.7" PYTHON_ARCH: "32" - PYTHON: "C:\\Python37-x64" PYTHON_VERSION: "3.7" PYTHON_ARCH: "64" - PYTHON: "C:\\Python38" PYTHON_VERSION: "3.8" PYTHON_ARCH: "32" - PYTHON: "C:\\Python38-x64" PYTHON_VERSION: "3.8" PYTHON_ARCH: "64" - PYTHON: "C:\\Python39" PYTHON_VERSION: "3.9" PYTHON_ARCH: "32" - PYTHON: "C:\\Python39-x64" PYTHON_VERSION: "3.9" PYTHON_ARCH: "64" install: - "%PIP% install --disable-pip-version-check --user --upgrade pip" - "%PIP% install wheel twine pytest" build_script: - "%SETUPPY% build" - "%SETUPPY% develop --user" test_script: - "%PYEXE% -m pytest" after_test: - "%SETUPPY% bdist_wheel" on_success: - "IF %APPVEYOR_REPO_BRANCH%==master %TWINE% upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%" artifacts: - path: dist\* deploy: off mwparserfromhell-0.6.3/docs/000077500000000000000000000000001411406531600160425ustar00rootroot00000000000000mwparserfromhell-0.6.3/docs/Makefile000066400000000000000000000127441411406531600175120ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/mwparserfromhell.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/mwparserfromhell.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/mwparserfromhell" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/mwparserfromhell" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." mwparserfromhell-0.6.3/docs/api/000077500000000000000000000000001411406531600166135ustar00rootroot00000000000000mwparserfromhell-0.6.3/docs/api/modules.rst000066400000000000000000000001251411406531600210130ustar00rootroot00000000000000mwparserfromhell ================ .. toctree:: :maxdepth: 6 mwparserfromhell mwparserfromhell-0.6.3/docs/api/mwparserfromhell.nodes.extras.rst000066400000000000000000000007421411406531600253550ustar00rootroot00000000000000extras Package ============== :mod:`extras` Package --------------------- .. automodule:: mwparserfromhell.nodes.extras :members: :undoc-members: :mod:`attribute` Module ----------------------- .. automodule:: mwparserfromhell.nodes.extras.attribute :members: :undoc-members: :show-inheritance: :mod:`parameter` Module ----------------------- .. automodule:: mwparserfromhell.nodes.extras.parameter :members: :undoc-members: :show-inheritance: mwparserfromhell-0.6.3/docs/api/mwparserfromhell.nodes.rst000066400000000000000000000033461411406531600240530ustar00rootroot00000000000000nodes Package ============= :mod:`nodes` Package -------------------- .. automodule:: mwparserfromhell.nodes .. autoclass:: mwparserfromhell.nodes.Node :special-members: :mod:`_base` Module ---------------------- .. automodule:: mwparserfromhell.nodes._base :members: :undoc-members: :show-inheritance: :mod:`argument` Module ---------------------- .. automodule:: mwparserfromhell.nodes.argument :members: :undoc-members: :show-inheritance: :mod:`comment` Module --------------------- .. automodule:: mwparserfromhell.nodes.comment :members: :undoc-members: :show-inheritance: :mod:`external_link` Module --------------------------- .. automodule:: mwparserfromhell.nodes.external_link :members: :undoc-members: :show-inheritance: :mod:`heading` Module --------------------- .. automodule:: mwparserfromhell.nodes.heading :members: :undoc-members: :show-inheritance: :mod:`html_entity` Module ------------------------- .. automodule:: mwparserfromhell.nodes.html_entity :members: :undoc-members: :show-inheritance: :mod:`tag` Module ----------------- .. automodule:: mwparserfromhell.nodes.tag :members: :undoc-members: :show-inheritance: :mod:`template` Module ---------------------- .. automodule:: mwparserfromhell.nodes.template :members: :undoc-members: :show-inheritance: :mod:`text` Module ------------------ .. automodule:: mwparserfromhell.nodes.text :members: :undoc-members: :show-inheritance: :mod:`wikilink` Module ---------------------- .. automodule:: mwparserfromhell.nodes.wikilink :members: :undoc-members: :show-inheritance: Subpackages ----------- .. toctree:: mwparserfromhell.nodes.extras mwparserfromhell-0.6.3/docs/api/mwparserfromhell.parser.rst000066400000000000000000000016041411406531600242320ustar00rootroot00000000000000parser Package ============== :mod:`parser` Package --------------------- .. automodule:: mwparserfromhell.parser :members: :undoc-members: :mod:`builder` Module --------------------- .. automodule:: mwparserfromhell.parser.builder :members: :undoc-members: :private-members: :mod:`contexts` Module ---------------------- .. automodule:: mwparserfromhell.parser.contexts :members: :undoc-members: :mod:`errors` Module -------------------- .. automodule:: mwparserfromhell.parser.errors :members: :undoc-members: :mod:`tokenizer` Module ----------------------- .. automodule:: mwparserfromhell.parser.tokenizer :members: :undoc-members: :private-members: .. autoexception:: mwparserfromhell.parser.tokenizer.BadRoute :mod:`tokens` Module -------------------- .. automodule:: mwparserfromhell.parser.tokens :members: :undoc-members: mwparserfromhell-0.6.3/docs/api/mwparserfromhell.rst000066400000000000000000000014761411406531600227460ustar00rootroot00000000000000mwparserfromhell Package ======================== :mod:`mwparserfromhell` Package ------------------------------- .. automodule:: mwparserfromhell.__init__ :members: :undoc-members: :mod:`definitions` Module ------------------------- .. automodule:: mwparserfromhell.definitions :members: :mod:`string_mixin` Module -------------------------- .. automodule:: mwparserfromhell.string_mixin :members: :undoc-members: :mod:`utils` Module ------------------- .. automodule:: mwparserfromhell.utils :members: :undoc-members: :mod:`wikicode` Module ---------------------- .. automodule:: mwparserfromhell.wikicode :members: :undoc-members: :show-inheritance: Subpackages ----------- .. toctree:: mwparserfromhell.nodes mwparserfromhell.parser mwparserfromhell.smart_list mwparserfromhell-0.6.3/docs/api/mwparserfromhell.smart_list.rst000066400000000000000000000010761411406531600251220ustar00rootroot00000000000000smart_list Package ================== :mod:`smart_list` Package ------------------------- .. automodule:: mwparserfromhell.smart_list :members: :undoc-members: :mod:`list_proxy` Module --------------------- .. automodule:: mwparserfromhell.smart_list.list_proxy :members: :undoc-members: :mod:`smart_list` Module --------------------- .. automodule:: mwparserfromhell.smart_list.smart_list :members: :undoc-members: :mod:`utils` Module --------------------- .. automodule:: mwparserfromhell.smart_list.utils :members: :undoc-members: mwparserfromhell-0.6.3/docs/changelog.rst000066400000000000000000000406251411406531600205320ustar00rootroot00000000000000Changelog ========= v0.6.3 ------ `Released September 2, 2021 `_ (`changes `__): - Added Linux AArch64 wheels. (`#276 `_) - Fixed C integer conversion, manifesting as parsing errors on big-endian platforms. (`#277 `_) v0.6.2 ------ `Released May 16, 2021 `_ (`changes `__): - Improved parsing of external links. (`#232 `_) - Fixed parsing of nested wikilinks. - Ported tests to pytest. (`#237 `_) - Moved mwparserfromhell package to src/ dir. - There was no 0.6.1 release due to a packaging error. v0.6 ---- `Released December 21, 2020 `_ (`changes `__): Thanks to everyone for their patience with this release! - Breaking change: dropped support for end-of-life Python 2.7 and 3.4. - Added support for Python 3.8 and 3.9. - Added binary wheels for Linux and macOS. - Updated :meth:`.Wikicode.matches` to recognize underscores as being equivalent to spaces. (`#216 `_) - Added a `default` parameter to :meth:`.Template.get`, and implement dict-style item access for template parameters. (`#252 `_) - Fixed a rare parsing bug involving deeply nested style tags. (`#224 `_) - Fixed parsing of section headings inside templates. (`#233 `_) - Updated HTML tag definitions. - Internal refactoring and cleanup. v0.5.4 ------ `Released May 15, 2019 `_ (`changes `__): - Fixed an unlikely crash in the C tokenizer when interrupted while parsing a heading. v0.5.3 ------ `Released March 30, 2019 `_ (`changes `__): - Fixed manual construction of Node objects, previously unsupported. (`#214 `_) - Fixed :class:`.Wikicode` transformation methods (:meth:`.Wikicode.replace`, :meth:`.Wikicode.remove`, etc.) when passed an empty section as an argument. (`#212 `_) - Fixed the parser getting stuck inside malformed tables. (`#206 `_) v0.5.2 ------ `Released November 1, 2018 `_ (`changes `__): - Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. (`#199 `_, `#204 `_) - Fixed signals getting stuck inside the C tokenizer until parsing finishes, in pathological cases. (`#206 `_) - Fixed `` not being considered a single-only tag. (`#200 `_) - Fixed a C tokenizer crash on Python 3.7 when compiled with assertions. (`#208 `_) - Cleaned up some minor documentation issues. (`#207 `_) v0.5.1 ------ `Released March 3, 2018 `_ (`changes `__): - Improved behavior when adding parameters to templates (via :meth:`.Template.add`) with poorly formatted whitespace conventions. (`#185 `_) - Fixed the parser getting stuck in deeply nested HTML tags with unclosed, quoted attributes. (`#190 `_) v0.5 ---- `Released June 23, 2017 `_ (`changes `__): - Added :meth:`.Wikicode.contains` to determine whether a :class:`.Node` or :class:`.Wikicode` object is contained within another :class:`.Wikicode` object. - Added :meth:`.Wikicode.get_ancestors` and :meth:`.Wikicode.get_parent` to find all ancestors and the direct parent of a :class:`.Node`, respectively. - Fixed a long-standing performance issue with deeply nested, invalid syntax (`issue #42 `_). The parser should be much faster on certain complex pages. The "max cycle" restriction has also been removed, so some situations where templates at the end of a page were being skipped are now resolved. - Made :meth:`Template.remove(keep_field=True) <.Template.remove>` behave more reasonably when the parameter is already empty. - Added the *keep_template_params* argument to :meth:`.Wikicode.strip_code`. If *True*, then template parameters will be preserved in the output. - :class:`.Wikicode` objects can now be pickled properly (fixed infinite recursion error on incompletely-constructed :class:`.StringMixIn` subclasses). - Fixed :meth:`.Wikicode.matches`\ 's behavior on iterables besides lists and tuples. - Fixed ``len()`` sometimes raising ``ValueError`` on empty node lists. - Fixed a rare parsing bug involving self-closing tags inside the attributes of unpaired tags. - Fixed release script after changes to PyPI. v0.4.4 ------ `Released December 30, 2016 `_ (`changes `__): - Added support for Python 3.6. - Fixed parsing bugs involving: - wikitables nested in templates; - wikitable error recovery when unable to recurse; - templates nested in template parameters before other parameters. - Fixed parsing file-like objects. - Made builds deterministic. - Documented caveats. v0.4.3 ------ `Released October 29, 2015 `_ (`changes `__): - Added Windows binaries for Python 3.5. - Fixed edge cases involving wikilinks inside of external links and vice versa. - Fixed a C tokenizer crash when a keyboard interrupt happens while parsing. v0.4.2 ------ `Released July 30, 2015 `__ (`changes `__): - Fixed setup script not including header files in releases. - Fixed Windows binary uploads. v0.4.1 ------ `Released July 30, 2015 `__ (`changes `__): - The process for building Windows binaries has been fixed, and these should be distributed along with new releases. Windows users can now take advantage of C speedups without having a compiler of their own. - Added support for Python 3.5. - ``<`` and ``>`` are now disallowed in wikilink titles and template names. This includes when denoting tags, but not comments. - Fixed the behavior of *preserve_spacing* in :meth:`.Template.add` and *keep_field* in :meth:`.Template.remove` on parameters with hidden keys. - Removed :meth:`._ListProxy.detach`. :class:`.SmartList`\ s now use weak references and their children are garbage-collected properly. - Fixed parser bugs involving: - templates with completely blank names; - templates with newlines and comments. - Heavy refactoring and fixes to the C tokenizer, including: - corrected a design flaw in text handling, allowing for substantial speed improvements when parsing long strings of plain text; - implemented new Python 3.3 `PEP 393 `_ Unicode APIs. - Fixed various bugs in :class:`.SmartList`, including one that was causing memory issues on 64-bit builds of Python 2 on Windows. - Fixed some bugs in the release scripts. v0.4 ---- `Released May 23, 2015 `_ (`changes `__): - The parser now falls back on pure Python mode if C extensions cannot be built. This fixes an issue that prevented some Windows users from installing the parser. - Added support for parsing wikicode tables (patches by David Winegar). - Added a script to test for memory leaks in :file:`scripts/memtest.py`. - Added a script to do releases in :file:`scripts/release.sh`. - *skip_style_tags* can now be passed to :func:`mwparserfromhell.parse() <.parse_anything>` (previously, only :meth:`.Parser.parse` allowed it). - The *recursive* argument to :class:`Wikicode's <.Wikicode>` :meth:`.filter` methods now accepts a third option, ``RECURSE_OTHERS``, which recurses over all children except instances of *forcetype* (for example, ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested templates). - The parser now understands HTML tag attributes quoted with single quotes. When setting a tag attribute's value, quotes will be added if necessary. As part of this, :class:`.Attribute`\ 's :attr:`~.Attribute.quoted` attribute has been changed to :attr:`~.Attribute.quotes`, and is now either a string or ``None``. - Calling :meth:`.Template.remove` with a :class:`.Parameter` object that is not part of the template now raises :exc:`ValueError` instead of doing nothing. - :class:`.Parameter`\ s with non-integer keys can no longer be created with *showkey=False*, nor have the value of this attribute be set to *False* later. - :meth:`._ListProxy.destroy` has been changed to :meth:`._ListProxy.detach`, and now works in a more useful way. - If something goes wrong while parsing, :exc:`.ParserError` will now be raised. Previously, the parser would produce an unclear :exc:`.BadRoute` exception or allow an incorrect node tree to be build. - Fixed parser bugs involving: - nested tags; - comments in template names; - tags inside of ```` tags. - Added tests to ensure that parsed trees convert back to wikicode without unintentional modifications. - Added support for a :envvar:`NOWEB` environment variable, which disables a unit test that makes a web call. - Test coverage has been improved, and some minor related bugs have been fixed. - Updated and fixed some documentation. v0.3.3 ------ `Released April 22, 2014 `_ (`changes `__): - Added support for Python 2.6 and 3.4. - :meth:`.Template.has` is now passed *ignore_empty=False* by default instead of *True*. This fixes a bug when adding parameters to templates with empty fields, **and is a breaking change if you rely on the default behavior.** - The *matches* argument of :class:`Wikicode's <.Wikicode>` :meth:`.filter` methods now accepts a function (taking one argument, a :class:`.Node`, and returning a bool) in addition to a regex. - Re-added *flat* argument to :meth:`.Wikicode.get_sections`, fixed the order in which it returns sections, and made it faster. - :meth:`.Wikicode.matches` now accepts a tuple or list of strings/:class:`.Wikicode` objects instead of just a single string or :class:`.Wikicode`. - Given the frequency of issues with the (admittedly insufficient) tag parser, there's a temporary *skip_style_tags* argument to :meth:`~.Parser.parse` that ignores ``''`` and ``'''`` until these issues are corrected. - Fixed a parser bug involving nested wikilinks and external links. - C code cleanup and speed improvements. v0.3.2 ------ `Released September 1, 2013 `_ (`changes `__): - Added support for Python 3.2 (along with current support for 3.3 and 2.7). - Renamed :meth:`.Template.remove`\ 's first argument from *name* to *param*, which now accepts :class:`.Parameter` objects in addition to parameter name strings. v0.3.1 ------ `Released August 29, 2013 `_ (`changes `__): - Fixed a parser bug involving URLs nested inside other markup. - Fixed some typos. v0.3 ---- `Released August 24, 2013 `_ (`changes `__): - Added complete support for HTML :class:`Tags <.Tag>`, including forms like ``foo``, ````, and wiki-markup tags like bold (``'''``), italics (``''``), and lists (``*``, ``#``, ``;`` and ``:``). - Added support for :class:`.ExternalLink`\ s (``http://example.com/`` and ``[http://example.com/ Example]``). - :class:`Wikicode's <.Wikicode>` :meth:`.filter` methods are now passed *recursive=True* by default instead of *False*. **This is a breaking change if you rely on any filter() methods being non-recursive by default.** - Added a :meth:`.matches` method to :class:`.Wikicode` for page/template name comparisons. - The *obj* param of :meth:`.Wikicode.insert_before`, :meth:`.insert_after`, :meth:`~.Wikicode.replace`, and :meth:`~.Wikicode.remove` now accepts :class:`.Wikicode` objects and strings representing parts of wikitext, instead of just nodes. These methods also make all possible substitutions instead of just one. - Renamed :meth:`.Template.has_param` to :meth:`~.Template.has` for consistency with :class:`.Template`\ 's other methods; :meth:`.has_param` is now an alias. - The C tokenizer extension now works on Python 3 in addition to Python 2.7. - Various bugfixes, internal changes, and cleanup. v0.2 ---- `Released June 20, 2013 `_ (`changes `__): - The parser now fully supports Python 3 in addition to Python 2.7. - Added a C tokenizer extension that is significantly faster than its Python equivalent. It is enabled by default (if available) and can be toggled by setting :attr:`mwparserfromhell.parser.use_c` to a boolean value. - Added a complete set of unit tests covering parsing and wikicode manipulation. - Renamed :meth:`.filter_links` to :meth:`.filter_wikilinks` (applies to :meth:`.ifilter` as well). - Added filter methods for :class:`Arguments <.Argument>`, :class:`Comments <.Comment>`, :class:`Headings <.Heading>`, and :class:`HTMLEntities <.HTMLEntity>`. - Added *before* param to :meth:`.Template.add`; renamed *force_nonconformity* to *preserve_spacing*. - Added *include_lead* param to :meth:`.Wikicode.get_sections`. - Removed *flat* param from :meth:`.get_sections`. - Removed *force_no_field* param from :meth:`.Template.remove`. - Added support for Travis CI. - Added note about Windows build issue in the README. - The tokenizer will limit itself to a realistic recursion depth to prevent errors and unreasonably long parse times. - Fixed how some nodes' attribute setters handle input. - Fixed multiple bugs in the tokenizer's handling of invalid markup. - Fixed bugs in the implementation of :class:`.SmartList` and :class:`.StringMixIn`. - Fixed some broken example code in the README; other copyedits. - Other bugfixes and code cleanup. v0.1.1 ------ `Released September 21, 2012 `_ (`changes `__): - Added support for :class:`Comments <.Comment>` (````) and :class:`Wikilinks <.Wikilink>` (``[[foo]]``). - Added corresponding :meth:`.ifilter_links` and :meth:`.filter_links` methods to :class:`.Wikicode`. - Fixed a bug when parsing incomplete templates. - Fixed :meth:`.strip_code` to affect the contents of headings. - Various copyedits in documentation and comments. v0.1 ---- `Released August 23, 2012 `_: - Initial release. mwparserfromhell-0.6.3/docs/conf.py000066400000000000000000000201761411406531600173470ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # mwparserfromhell documentation build configuration file, created by # sphinx-quickstart on Tue Aug 21 20:47:26 2012. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("..")) import mwparserfromhell # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ["sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.viewcode"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix of source filenames. source_suffix = ".rst" # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = "index" # General information about the project. project = "mwparserfromhell" copyright = "2012–2021 Ben Kurtovic" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = ".".join(mwparserfromhell.__version__.split(".", 2)[:2]) # The full version, including alpha/beta/rc tags. release = mwparserfromhell.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "nature" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = "mwparserfromhelldoc" # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ( "index", "mwparserfromhell.tex", "mwparserfromhell Documentation", "Ben Kurtovic", "manual", ) ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ( "index", "mwparserfromhell", "mwparserfromhell Documentation", ["Ben Kurtovic"], 1, ) ] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( "index", "mwparserfromhell", "mwparserfromhell Documentation", "Ben Kurtovic", "mwparserfromhell", "One line description of project.", "Miscellaneous", ) ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {"http://docs.python.org/": None} mwparserfromhell-0.6.3/docs/index.rst000066400000000000000000000031611411406531600177040ustar00rootroot00000000000000MWParserFromHell v\ |version| Documentation =========================================== :mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python package that provides an easy-to-use and outrageously powerful parser for MediaWiki_ wikicode. It supports Python 3.5+. Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. Development occurs on GitHub_. .. _MediaWiki: https://www.mediawiki.org .. _Earwig: https://en.wikipedia.org/wiki/User:The_Earwig .. _Σ: https://en.wikipedia.org/wiki/User:%CE%A3 .. _Legoktm: https://en.wikipedia.org/wiki/User:Legoktm .. _GitHub: https://github.com/earwig/mwparserfromhell Installation ------------ The easiest way to install the parser is through the `Python Package Index`_; you can install the latest release with ``pip install mwparserfromhell`` (`get pip`_). Make sure your pip is up-to-date first, especially on Windows. Alternatively, get the latest development version:: git clone https://github.com/earwig/mwparserfromhell.git cd mwparserfromhell python setup.py install The comprehensive unit testing suite requires `pytest`_ (``pip install pytest``) and can be run with ``python -m pytest``. .. _Python Package Index: https://pypi.org/ .. _get pip: https://pypi.org/project/pip/ .. _pytest: https://docs.pytest.org/ Contents -------- .. toctree:: :maxdepth: 2 usage limitations integration changelog API Reference Indices and tables ------------------ * :ref:`genindex` * :ref:`modindex` * :ref:`search` mwparserfromhell-0.6.3/docs/integration.rst000066400000000000000000000032521411406531600211210ustar00rootroot00000000000000Integration =========== :mod:`mwparserfromhell` is used by and originally developed for EarwigBot_; :class:`~earwigbot.wiki.page.Page` objects have a :meth:`~earwigbot.wiki.page.Page.parse` method that essentially calls :func:`mwparserfromhell.parse() ` on :meth:`~earwigbot.wiki.page.Page.get`. If you're using Pywikibot_, your code might look like this: import mwparserfromhell import pywikibot def parse(title): site = pywikibot.Site() page = pywikibot.Page(site, title) text = page.get() return mwparserfromhell.parse(text) If you're not using a library, you can parse any page with the following Python 3 code (using the API_ and the requests_ library): import mwparserfromhell import requests API_URL = "https://en.wikipedia.org/w/api.php" def parse(title): params = { "action": "query", "prop": "revisions", "rvprop": "content", "rvslots": "main", "rvlimit": 1, "titles": title, "format": "json", "formatversion": "2", } headers = {"User-Agent": "My-Bot-Name/1.0"} req = requests.get(API_URL, headers=headers, params=params) res = req.json() revision = res["query"]["pages"][0]["revisions"][0] text = revision["slots"]["main"]["content"] return mwparserfromhell.parse(text) .. _EarwigBot: https://github.com/earwig/earwigbot .. _Pywikibot: https://www.mediawiki.org/wiki/Manual:Pywikibot .. _API: https://www.mediawiki.org/wiki/API:Main_page .. _requests: https://2.python-requests.org mwparserfromhell-0.6.3/docs/limitations.rst000066400000000000000000000040771411406531600211400ustar00rootroot00000000000000Limitations =========== While the MediaWiki parser generates HTML and has access to the contents of templates, among other things, mwparserfromhell acts as a direct interface to the source code only. This has several implications: * Syntax elements produced by a template transclusion cannot be detected. For example, imagine a hypothetical page ``"Template:End-bold"`` that contained the text ````. While MediaWiki would correctly understand that ``foobar{{end-bold}}`` translates to ``foobar``, mwparserfromhell has no way of examining the contents of ``{{end-bold}}``. Instead, it would treat the bold tag as unfinished, possibly extending further down the page. * Templates adjacent to external links, as in ``http://example.com{{foo}}``, are considered part of the link. In reality, this would depend on the contents of the template. * When different syntax elements cross over each other, as in ``{{echo|''Hello}}, world!''``, the parser gets confused because this cannot be represented by an ordinary syntax tree. Instead, the parser will treat the first syntax construct as plain text. In this case, only the italic tag would be properly parsed. **Workaround:** Since this commonly occurs with text formatting and text formatting is often not of interest to users, you may pass *skip_style_tags=True* to ``mwparserfromhell.parse()``. This treats ``''`` and ``'''`` as plain text. A future version of mwparserfromhell may include multiple parsing modes to get around this restriction more sensibly. Additionally, the parser lacks awareness of certain wiki-specific settings: * `Word-ending links`_ are not supported, since the linktrail rules are language-specific. * Localized namespace names aren't recognized, so file links (such as ``[[File:...]]``) are treated as regular wikilinks. * Anything that looks like an XML tag is treated as a tag, even if it is not a recognized tag name, since the list of valid tags depends on loaded MediaWiki extensions. .. _Word-ending links: https://www.mediawiki.org/wiki/Help:Links#linktrail mwparserfromhell-0.6.3/docs/usage.rst000066400000000000000000000061331411406531600177030ustar00rootroot00000000000000Usage ===== Normal usage is rather straightforward (where ``text`` is page text):: >>> import mwparserfromhell >>> wikicode = mwparserfromhell.parse(text) ``wikicode`` is a :class:`mwparserfromhell.Wikicode <.Wikicode>` object, which acts like an ordinary ``str`` object with some extra methods. For example:: >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" >>> wikicode = mwparserfromhell.parse(text) >>> print(wikicode) I has a template! {{foo|bar|baz|eggs=spam}} See it? >>> templates = wikicode.filter_templates() >>> print(templates) ['{{foo|bar|baz|eggs=spam}}'] >>> template = templates[0] >>> print(template.name) foo >>> print(template.params) ['bar', 'baz', 'eggs=spam'] >>> print(template.get(1).value) bar >>> print(template.get("eggs").value) spam Since nodes can contain other nodes, getting nested templates is trivial:: >>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" >>> mwparserfromhell.parse(text).filter_templates() ['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] You can also pass *recursive=False* to :meth:`.filter_templates` and explore templates manually. This is possible because nodes can contain additional :class:`.Wikicode` objects:: >>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") >>> print(code.filter_templates(recursive=False)) ['{{foo|this {{includes a|template}}}}'] >>> foo = code.filter_templates(recursive=False)[0] >>> print(foo.get(1).value) this {{includes a|template}} >>> print(foo.get(1).value.filter_templates()[0]) {{includes a|template}} >>> print(foo.get(1).value.filter_templates()[0].get(1).value) template Templates can be easily modified to add, remove, or alter params. :class:`.Wikicode` objects can be treated like lists, with :meth:`~.Wikicode.append`, :meth:`~.Wikicode.insert`, :meth:`~.Wikicode.remove`, :meth:`~.Wikicode.replace`, and more. They also have a :meth:`~.Wikicode.matches` method for comparing page or template names, which takes care of capitalization and whitespace:: >>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" >>> code = mwparserfromhell.parse(text) >>> for template in code.filter_templates(): ... if template.name.matches("Cleanup") and not template.has("date"): ... template.add("date", "July 2012") ... >>> print(code) {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} >>> code.replace("{{uncategorized}}", "{{bar-stub}}") >>> print(code) {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} >>> print(code.filter_templates()) ['{{cleanup|date=July 2012}}', '{{bar-stub}}'] You can then convert ``code`` back into a regular :class:`str` object (for saving the page!) by calling :func:`str` on it:: >>> text = str(code) >>> print(text) {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} >>> text == code True For more tips, check out :class:`Wikicode's full method list <.Wikicode>` and the :mod:`list of Nodes <.nodes>`. mwparserfromhell-0.6.3/scripts/000077500000000000000000000000001411406531600166015ustar00rootroot00000000000000mwparserfromhell-0.6.3/scripts/README000066400000000000000000000002671411406531600174660ustar00rootroot00000000000000This directory contains support files used for *developing* mwparserfromhell, not running it. If you are looking for code examples, read the documentation or explore the source code. mwparserfromhell-0.6.3/scripts/memtest.py000066400000000000000000000135371411406531600206420ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Tests for memory leaks in the CTokenizer. This appears to work mostly fine under Linux, but gives an absurd number of false positives on macOS. I'm not sure why. Running the tests multiple times yields different results (tests don't always leak, and the amount they leak by varies). Increasing the number of loops results in a smaller bytes/loop value, too, indicating the increase in memory usage might be due to something else. Actual memory leaks typically leak very large amounts of memory (megabytes) and scale with the number of loops. """ from locale import LC_ALL, setlocale from multiprocessing import Process, Pipe from os import listdir, path import sys import psutil from mwparserfromhell.parser._tokenizer import CTokenizer LOOPS = 10000 class Color: GRAY = "\x1b[30;1m" GREEN = "\x1b[92m" YELLOW = "\x1b[93m" RESET = "\x1b[0m" class MemoryTest: """Manages a memory test.""" def __init__(self): self._tests = [] self._load() def _parse_file(self, name, text): tests = text.split("\n---\n") counter = 1 digits = len(str(len(tests))) for test in tests: data = {"name": None, "label": None, "input": None, "output": None} for line in test.strip().splitlines(): if line.startswith("name:"): data["name"] = line[len("name:") :].strip() elif line.startswith("label:"): data["label"] = line[len("label:") :].strip() elif line.startswith("input:"): raw = line[len("input:") :].strip() if raw[0] == '"' and raw[-1] == '"': raw = raw[1:-1] raw = raw.encode("raw_unicode_escape") data["input"] = raw.decode("unicode_escape") number = str(counter).zfill(digits) fname = "test_{}{}_{}".format(name, number, data["name"]) self._tests.append((fname, data["input"])) counter += 1 def _load(self): def load_file(filename): with open(filename, "rU") as fp: text = fp.read() name = path.split(filename)[1][: 0 - len(extension)] self._parse_file(name, text) root = path.split(path.dirname(path.abspath(__file__)))[0] directory = path.join(root, "tests", "tokenizer") extension = ".mwtest" if len(sys.argv) > 2 and sys.argv[1] == "--use": for name in sys.argv[2:]: load_file(path.join(directory, name + extension)) sys.argv = [sys.argv[0]] # So unittest doesn't try to load these else: for filename in listdir(directory): if not filename.endswith(extension): continue load_file(path.join(directory, filename)) @staticmethod def _print_results(info1, info2): r1, r2 = info1.rss, info2.rss buff = 8192 if r2 - buff > r1: d = r2 - r1 p = float(d) / r1 bpt = d // LOOPS tmpl = "{0}LEAKING{1}: {2:n} bytes, {3:.2%} inc ({4:n} bytes/loop)" sys.stdout.write(tmpl.format(Color.YELLOW, Color.RESET, d, p, bpt)) else: sys.stdout.write("{}OK{}".format(Color.GREEN, Color.RESET)) def run(self): """Run the memory test suite.""" width = 1 for (name, _) in self._tests: if len(name) > width: width = len(name) tmpl = "{0}[{1:03}/{2}]{3} {4}: " for i, (name, text) in enumerate(self._tests, 1): sys.stdout.write( tmpl.format( Color.GRAY, i, len(self._tests), Color.RESET, name.ljust(width) ) ) sys.stdout.flush() parent, child = Pipe() p = Process(target=_runner, args=(text, child)) p.start() try: proc = psutil.Process(p.pid) parent.recv() parent.send("OK") parent.recv() info1 = proc.get_memory_info() sys.stdout.flush() parent.send("OK") parent.recv() info2 = proc.get_memory_info() self._print_results(info1, info2) sys.stdout.flush() parent.send("OK") finally: proc.kill() print() def _runner(text, child): r1, r2 = range(250), range(LOOPS) for _ in r1: CTokenizer().tokenize(text) child.send("OK") child.recv() child.send("OK") child.recv() for _ in r2: CTokenizer().tokenize(text) child.send("OK") child.recv() if __name__ == "__main__": setlocale(LC_ALL, "") MemoryTest().run() mwparserfromhell-0.6.3/scripts/release.sh000077500000000000000000000116501411406531600205630ustar00rootroot00000000000000#! /usr/bin/env bash if [[ -z "$1" ]]; then echo "usage: $0 1.2.3" exit 1 fi set -euo pipefail VERSION=$1 SCRIPT_DIR=$(dirname "$0") RELEASE_DATE=$(date +"%B %-d, %Y") check_git() { if [[ -n "$(git status --porcelain --untracked-files=no)" ]]; then echo "Aborting: dirty working directory." exit 1 fi if [[ "$(git rev-parse --abbrev-ref HEAD)" != "develop" ]]; then echo "Aborting: not on develop." exit 1 fi echo -n "Are you absolutely ready to release? [yN] " read confirm if [[ ${confirm,,} != "y" ]]; then exit 1 fi } update_version() { echo -n "Updating mwparserfromhell.__version__..." sed -e 's/__version__ = .*/__version__ = "'$VERSION'"/' -i "" src/mwparserfromhell/__init__.py echo " done." } update_appveyor() { filename="appveyor.yml" echo -n "Updating $filename..." sed -e "s/version: .*/version: $VERSION-b{build}/" -i "" $filename echo " done." } update_changelog() { filename="CHANGELOG" echo -n "Updating $filename..." sed -e "1s/.*/v$VERSION (released $RELEASE_DATE):/" -i "" $filename echo " done." } update_docs_changelog() { filename="docs/changelog.rst" echo -n "Updating $filename..." dashes=$(seq 1 $(expr ${#VERSION} + 1) | sed 's/.*/-/' | tr -d '\n') previous_lineno=$(expr $(grep -n -e "^---" $filename | sed '2q;d' | cut -d ':' -f 1) - 1) previous_version=$(sed $previous_lineno'q;d' $filename) sed \ -e "4s/.*/v$VERSION/" \ -e "5s/.*/$dashes/" \ -e "7s/.*/\`Released $RELEASE_DATE \`_/" \ -e "8s/.*/(\`changes \`__):/" \ -i "" $filename echo " done." } do_git_stuff() { echo -n "Git: committing, tagging, and merging release..." git commit -qam "release/$VERSION" git tag v$VERSION -s -m "version $VERSION" git checkout -q master git merge -q --no-ff develop -m "Merge develop into master (release/$VERSION)" echo -n " pushing..." git push -q --tags origin master git checkout -q develop git push -q origin develop echo " done." } upload_to_pypi() { echo -n "PyPI: uploading source tarball..." python setup.py -q sdist twine upload -s dist/mwparserfromhell-$VERSION* echo " done." } post_release() { echo echo "*** Release completed." echo "*** Update: https://github.com/earwig/mwparserfromhell/releases/tag/v$VERSION" echo "*** Verify: https://pypi.org/project/mwparserfromhell" echo "*** Verify: https://ci.appveyor.com/project/earwig/mwparserfromhell" echo "*** Verify: https://mwparserfromhell.readthedocs.io" echo "*** Press enter to sanity-check the release." read } test_release() { echo echo "Checking mwparserfromhell v$VERSION..." echo -n "Creating a virtualenv..." virtdir="mwparser-test-env" python -m venv $virtdir cd $virtdir source bin/activate echo " done." echo -n "Installing mwparserfromhell with pip..." pip -q install --upgrade pip pip -q install mwparserfromhell pytest echo " done." echo -n "Checking version..." reported_version=$(python -c 'print(__import__("mwparserfromhell").__version__)') if [[ "$reported_version" != "$VERSION" ]]; then echo " error." echo "*** ERROR: mwparserfromhell is reporting its version as $reported_version, not $VERSION!" deactivate cd .. rm -rf $virtdir exit 1 else echo " done." fi pip -q uninstall -y mwparserfromhell echo -n "Downloading mwparserfromhell source tarball and GPG signature..." curl -sL "https://pypi.io/packages/source/m/mwparserfromhell/mwparserfromhell-$VERSION.tar.gz" -o "mwparserfromhell.tar.gz" curl -sL "https://pypi.io/packages/source/m/mwparserfromhell/mwparserfromhell-$VERSION.tar.gz.asc" -o "mwparserfromhell.tar.gz.asc" echo " done." echo "Verifying tarball..." gpg --verify mwparserfromhell.tar.gz.asc mwparserfromhell.tar.gz if [[ "$?" != "0" ]]; then echo "*** ERROR: GPG signature verification failed!" deactivate cd .. rm -rf $virtdir exit 1 fi tar -xf mwparserfromhell.tar.gz rm mwparserfromhell.tar.gz mwparserfromhell.tar.gz.asc cd mwparserfromhell-$VERSION echo "Running unit tests..." python setup.py -q install python -m pytest if [[ "$?" != "0" ]]; then echo "*** ERROR: Unit tests failed!" deactivate cd ../.. rm -rf $virtdir exit 1 fi echo -n "Everything looks good. Cleaning up..." deactivate cd ../.. rm -rf $virtdir echo " done." } echo "Preparing mwparserfromhell v$VERSION..." cd "$SCRIPT_DIR/.." check_git update_version update_appveyor update_changelog update_docs_changelog do_git_stuff upload_to_pypi post_release test_release echo "All done." exit 0 mwparserfromhell-0.6.3/scripts/win_wrapper.cmd000066400000000000000000000036701411406531600216310ustar00rootroot00000000000000:: To build extensions for 64 bit Python 3, we need to configure environment :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) :: :: To build extensions for 64 bit Python 2, we need to configure environment :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) :: :: 32 bit builds do not require specific environment configurations. :: :: Note: this script needs to be run with the /E:ON and /V:ON flags for the :: cmd interpreter, at least for (SDK v7.0) :: :: More details at: :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows :: http://stackoverflow.com/a/13751649/163740 :: :: Author: Olivier Grisel :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ @ECHO OFF SET COMMAND_TO_RUN=%* SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf SET MAJOR_PYTHON_VERSION=%PYTHON_VERSION:~0,1% SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2% IF %MAJOR_PYTHON_VERSION% == 2 ( SET WINDOWS_SDK_VERSION="v7.0" ) ELSE IF %MAJOR_PYTHON_VERSION% == 3 ( SET WINDOWS_SDK_VERSION="v7.1" IF %MINOR_PYTHON_VERSION% GEQ 5 ( SET NO_SET_SDK_64=Y ) ) ELSE ( ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" EXIT 1 ) IF "%PYTHON_ARCH%"=="32" ( call %COMMAND_TO_RUN% || EXIT 1 ) ELSE IF "%NO_SET_SDK_64%"=="Y" ( IF EXIST "%WIN_WDK%" ( :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/ REN "%WIN_WDK%" 0wdf ) call %COMMAND_TO_RUN% || EXIT 1 ) ELSE ( SET DISTUTILS_USE_SDK=1 SET MSSdk=1 "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release call %COMMAND_TO_RUN% || EXIT 1 ) mwparserfromhell-0.6.3/setup.cfg000066400000000000000000000000261411406531600167310ustar00rootroot00000000000000[aliases] test=pytest mwparserfromhell-0.6.3/setup.py000066400000000000000000000076111411406531600166310ustar00rootroot00000000000000#! /usr/bin/env python # # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from glob import glob import os import sys from setuptools import find_packages, setup, Extension from setuptools.command.build_ext import build_ext sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src")) from mwparserfromhell import __version__ with open("README.rst") as fp: long_docs = fp.read() use_extension = True fallback = True # Allow env var WITHOUT_EXTENSION and args --with[out]-extension: env_var = os.environ.get("WITHOUT_EXTENSION") if "--without-extension" in sys.argv: use_extension = False elif "--with-extension" in sys.argv: fallback = False elif env_var is not None: if env_var == "1": use_extension = False elif env_var == "0": fallback = False # Remove the command line argument as it isn't understood by setuptools: sys.argv = [ arg for arg in sys.argv if arg not in ("--without-extension", "--with-extension") ] def build_ext_patched(self): try: build_ext_original(self) except Exception as exc: print("error: " + str(exc)) print("Falling back to pure Python mode.") del self.extensions[:] if fallback: build_ext.run, build_ext_original = build_ext_patched, build_ext.run # Project-specific part begins here: tokenizer = Extension( "mwparserfromhell.parser._tokenizer", sources=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.c")), depends=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.h")), ) setup( name="mwparserfromhell", packages=find_packages("src"), package_dir={"": "src"}, ext_modules=[tokenizer] if use_extension else [], setup_requires=["pytest-runner"] if "test" in sys.argv or "pytest" in sys.argv else [], tests_require=["pytest"], version=__version__, python_requires=">= 3.5", author="Ben Kurtovic", author_email="ben.kurtovic@gmail.com", url="https://github.com/earwig/mwparserfromhell", description="MWParserFromHell is a parser for MediaWiki wikicode.", long_description=long_docs, download_url="https://github.com/earwig/mwparserfromhell/tarball/v{}".format( __version__ ), keywords="earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", license="MIT License", classifiers=[ "Development Status :: 4 - Beta", "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Topic :: Text Processing :: Markup", ], ) mwparserfromhell-0.6.3/src/000077500000000000000000000000001411406531600157015ustar00rootroot00000000000000mwparserfromhell-0.6.3/src/mwparserfromhell/000077500000000000000000000000001411406531600212725ustar00rootroot00000000000000mwparserfromhell-0.6.3/src/mwparserfromhell/__init__.py000066400000000000000000000031631411406531600234060ustar00rootroot00000000000000# Copyright (C) 2012-2021 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ `mwparserfromhell `_ (the MediaWiki Parser from Hell) is a Python package that provides an easy-to-use and outrageously powerful parser for `MediaWiki `_ wikicode. """ __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012-2021 Ben Kurtovic" __license__ = "MIT License" __version__ = "0.6.3" __email__ = "ben.kurtovic@gmail.com" from . import definitions, nodes, parser, smart_list, string_mixin, utils, wikicode parse = utils.parse_anything mwparserfromhell-0.6.3/src/mwparserfromhell/definitions.py000066400000000000000000000075131411406531600241650ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Contains data about certain markup, like HTML tags and external links. When updating this file, please also update the the C tokenizer version: - mwparserfromhell/parser/ctokenizer/definitions.c - mwparserfromhell/parser/ctokenizer/definitions.h """ __all__ = [ "get_html_tag", "is_parsable", "is_visible", "is_single", "is_single_only", "is_scheme", ] URI_SCHEMES = { # [wikimedia/mediawiki.git]/includes/DefaultSettings.php @ 5c660de5d0 "bitcoin": False, "ftp": True, "ftps": True, "geo": False, "git": True, "gopher": True, "http": True, "https": True, "irc": True, "ircs": True, "magnet": False, "mailto": False, "mms": True, "news": False, "nntp": True, "redis": True, "sftp": True, "sip": False, "sips": False, "sms": False, "ssh": True, "svn": True, "tel": False, "telnet": True, "urn": False, "worldwind": True, "xmpp": False, } PARSER_BLACKLIST = [ # https://www.mediawiki.org/wiki/Parser_extension_tags @ 2020-12-21 "categorytree", "ce", "chem", "gallery", "graph", "hiero", "imagemap", "inputbox", "math", "nowiki", "pre", "score", "section", "source", "syntaxhighlight", "templatedata", "timeline", ] INVISIBLE_TAGS = [ # https://www.mediawiki.org/wiki/Parser_extension_tags @ 2020-12-21 "categorytree", "gallery", "graph", "imagemap", "inputbox", "math", "score", "section", "templatedata", "timeline", ] # [wikimedia/mediawiki.git]/includes/parser/Sanitizer.php @ 95e17ee645 SINGLE_ONLY = ["br", "wbr", "hr", "meta", "link", "img"] SINGLE = SINGLE_ONLY + ["li", "dt", "dd", "th", "td", "tr"] MARKUP_TO_HTML = { "#": "li", "*": "li", ";": "dt", ":": "dd", } def get_html_tag(markup): """Return the HTML tag associated with the given wiki-markup.""" return MARKUP_TO_HTML[markup] def is_parsable(tag): """Return if the given *tag*'s contents should be passed to the parser.""" return tag.lower() not in PARSER_BLACKLIST def is_visible(tag): """Return whether or not the given *tag* contains visible text.""" return tag.lower() not in INVISIBLE_TAGS def is_single(tag): """Return whether or not the given *tag* can exist without a close tag.""" return tag.lower() in SINGLE def is_single_only(tag): """Return whether or not the given *tag* must exist without a close tag.""" return tag.lower() in SINGLE_ONLY def is_scheme(scheme, slashes=True): """Return whether *scheme* is valid for external links.""" scheme = scheme.lower() if slashes: return scheme in URI_SCHEMES return scheme in URI_SCHEMES and not URI_SCHEMES[scheme] mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/000077500000000000000000000000001411406531600224025ustar00rootroot00000000000000mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/__init__.py000066400000000000000000000037441411406531600245230ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ This package contains :class:`.Wikicode` "nodes", which represent a single unit of wikitext, such as a Template, an HTML tag, a Heading, or plain text. The node "tree" is far from flat, as most types can contain additional :class:`.Wikicode` types within them - and with that, more nodes. For example, the name of a :class:`.Template` is a :class:`.Wikicode` object that can contain text or more templates. """ from . import extras from ._base import Node from .text import Text from .argument import Argument from .comment import Comment from .external_link import ExternalLink from .heading import Heading from .html_entity import HTMLEntity from .tag import Tag from .template import Template from .wikilink import Wikilink __all__ = [ "Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", "Node", "Tag", "Template", "Text", "Wikilink", ] mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/_base.py000066400000000000000000000043631411406531600240330ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from ..string_mixin import StringMixIn __all__ = ["Node"] class Node(StringMixIn): """Represents the base Node type, demonstrating the methods to override. :meth:`__str__` must be overridden. It should return a ``str`` representation of the node. If the node contains :class:`.Wikicode` objects inside of it, :meth:`__children__` should be a generator that iterates over them. If the node is printable (shown when the page is rendered), :meth:`__strip__` should return its printable version, stripping out any formatting marks. It does not have to return a string, but something that can be converted to a string with ``str()``. Finally, :meth:`__showtree__` can be overridden to build a nice tree representation of the node, if desired, for :meth:`~.Wikicode.get_tree`. """ def __str__(self): raise NotImplementedError() def __children__(self): return # pylint: disable=unreachable yield # pragma: no cover (this is a generator that yields nothing) def __strip__(self, **kwargs): return None def __showtree__(self, write, get, mark): write(str(self)) mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/argument.py000066400000000000000000000055631411406531600246070ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from ._base import Node from ..utils import parse_anything __all__ = ["Argument"] class Argument(Node): """Represents a template argument substitution, like ``{{{foo}}}``.""" def __init__(self, name, default=None): super().__init__() self.name = name self.default = default def __str__(self): start = "{{{" + str(self.name) if self.default is not None: return start + "|" + str(self.default) + "}}}" return start + "}}}" def __children__(self): yield self.name if self.default is not None: yield self.default def __strip__(self, **kwargs): if self.default is not None: return self.default.strip_code(**kwargs) return None def __showtree__(self, write, get, mark): write("{{{") get(self.name) if self.default is not None: write(" | ") mark() get(self.default) write("}}}") @property def name(self): """The name of the argument to substitute.""" return self._name @property def default(self): """The default value to substitute if none is passed. This will be ``None`` if the argument wasn't defined with one. The MediaWiki parser handles this by rendering the argument itself in the result, complete braces. To have the argument render as nothing, set default to ``""`` (``{{{arg}}}`` vs. ``{{{arg|}}}``). """ return self._default @name.setter def name(self, value): self._name = parse_anything(value) @default.setter def default(self, default): if default is None: self._default = None else: self._default = parse_anything(default) mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/comment.py000066400000000000000000000031701411406531600244170ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from ._base import Node __all__ = ["Comment"] class Comment(Node): """Represents a hidden HTML comment, like ````.""" def __init__(self, contents): super().__init__() self.contents = contents def __str__(self): return "" @property def contents(self): """The hidden text contained between ````.""" return self._contents @contents.setter def contents(self, value): self._contents = str(value) mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/external_link.py000066400000000000000000000063251411406531600256210ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from ._base import Node from ..utils import parse_anything __all__ = ["ExternalLink"] class ExternalLink(Node): """Represents an external link, like ``[http://example.com/ Example]``.""" def __init__(self, url, title=None, brackets=True, suppress_space=False): super().__init__() self.url = url self.title = title self.brackets = brackets self.suppress_space = suppress_space def __str__(self): if self.brackets: if self.title is not None: if self.suppress_space is True: return "[" + str(self.url) + str(self.title) + "]" return "[" + str(self.url) + " " + str(self.title) + "]" return "[" + str(self.url) + "]" return str(self.url) def __children__(self): yield self.url if self.title is not None: yield self.title def __strip__(self, **kwargs): if self.brackets: if self.title: return self.title.strip_code(**kwargs) return None return self.url.strip_code(**kwargs) def __showtree__(self, write, get, mark): if self.brackets: write("[") get(self.url) if self.title is not None: get(self.title) if self.brackets: write("]") @property def url(self): """The URL of the link target, as a :class:`.Wikicode` object.""" return self._url @property def title(self): """The link title (if given), as a :class:`.Wikicode` object.""" return self._title @property def brackets(self): """Whether to enclose the URL in brackets or display it straight.""" return self._brackets @url.setter def url(self, value): # pylint: disable=import-outside-toplevel from ..parser import contexts self._url = parse_anything(value, contexts.EXT_LINK_URI) @title.setter def title(self, value): self._title = None if value is None else parse_anything(value) @brackets.setter def brackets(self, value): self._brackets = bool(value) mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/extras/000077500000000000000000000000001411406531600237105ustar00rootroot00000000000000mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/extras/__init__.py000066400000000000000000000025051411406531600260230ustar00rootroot00000000000000# Copyright (C) 2012-2016 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ This package contains objects used by :class:`.Node`\\ s, but that are not nodes themselves. This includes template parameters and HTML tag attributes. """ from .attribute import Attribute from .parameter import Parameter mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/extras/attribute.py000066400000000000000000000122561411406531600262730ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from ...string_mixin import StringMixIn from ...utils import parse_anything __all__ = ["Attribute"] class Attribute(StringMixIn): """Represents an attribute of an HTML tag. This is used by :class:`.Tag` objects. For example, the tag ```` contains an Attribute whose name is ``"name"`` and whose value is ``"foo"``. """ def __init__( self, name, value=None, quotes='"', pad_first=" ", pad_before_eq="", pad_after_eq="", ): super().__init__() self.name = name self._quotes = None self.value = value self.quotes = quotes self.pad_first = pad_first self.pad_before_eq = pad_before_eq self.pad_after_eq = pad_after_eq def __str__(self): result = self.pad_first + str(self.name) + self.pad_before_eq if self.value is not None: result += "=" + self.pad_after_eq if self.quotes: return result + self.quotes + str(self.value) + self.quotes return result + str(self.value) return result @staticmethod def _value_needs_quotes(val): """Return valid quotes for the given value, or None if unneeded.""" if not val: return None val = "".join(str(node) for node in val.filter_text(recursive=False)) if not any(char.isspace() for char in val): return None if "'" in val and '"' not in val: return '"' if '"' in val and "'" not in val: return "'" return "\"'" # Either acceptable, " preferred over ' def _set_padding(self, attr, value): """Setter for the value of a padding attribute.""" if not value: setattr(self, attr, "") else: value = str(value) if not value.isspace(): raise ValueError("padding must be entirely whitespace") setattr(self, attr, value) @staticmethod def coerce_quotes(quotes): """Coerce a quote type into an acceptable value, or raise an error.""" orig, quotes = quotes, str(quotes) if quotes else None if quotes not in [None, '"', "'"]: raise ValueError("{!r} is not a valid quote type".format(orig)) return quotes @property def name(self): """The name of the attribute as a :class:`.Wikicode` object.""" return self._name @property def value(self): """The value of the attribute as a :class:`.Wikicode` object.""" return self._value @property def quotes(self): """How to enclose the attribute value. ``"``, ``'``, or ``None``.""" return self._quotes @property def pad_first(self): """Spacing to insert right before the attribute.""" return self._pad_first @property def pad_before_eq(self): """Spacing to insert right before the equal sign.""" return self._pad_before_eq @property def pad_after_eq(self): """Spacing to insert right after the equal sign.""" return self._pad_after_eq @name.setter def name(self, value): self._name = parse_anything(value) @value.setter def value(self, newval): if newval is None: self._value = None else: code = parse_anything(newval) quotes = self._value_needs_quotes(code) if quotes and (not self.quotes or self.quotes not in quotes): self._quotes = quotes[0] self._value = code @quotes.setter def quotes(self, value): value = self.coerce_quotes(value) if not value and self._value_needs_quotes(self.value): raise ValueError("attribute value requires quotes") self._quotes = value @pad_first.setter def pad_first(self, value): self._set_padding("_pad_first", value) @pad_before_eq.setter def pad_before_eq(self, value): self._set_padding("_pad_before_eq", value) @pad_after_eq.setter def pad_after_eq(self, value): self._set_padding("_pad_after_eq", value) mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/extras/parameter.py000066400000000000000000000055141411406531600262470ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import re from ...string_mixin import StringMixIn from ...utils import parse_anything __all__ = ["Parameter"] class Parameter(StringMixIn): """Represents a paramater of a template. For example, the template ``{{foo|bar|spam=eggs}}`` contains two Parameters: one whose name is ``"1"``, value is ``"bar"``, and ``showkey`` is ``False``, and one whose name is ``"spam"``, value is ``"eggs"``, and ``showkey`` is ``True``. """ def __init__(self, name, value, showkey=True): super().__init__() self.name = name self.value = value self.showkey = showkey def __str__(self): if self.showkey: return str(self.name) + "=" + str(self.value) return str(self.value) @staticmethod def can_hide_key(key): """Return whether or not the given key can be hidden.""" return re.match(r"[1-9][0-9]*$", str(key).strip()) @property def name(self): """The name of the parameter as a :class:`.Wikicode` object.""" return self._name @property def value(self): """The value of the parameter as a :class:`.Wikicode` object.""" return self._value @property def showkey(self): """Whether to show the parameter's key (i.e., its "name").""" return self._showkey @name.setter def name(self, newval): self._name = parse_anything(newval) @value.setter def value(self, newval): self._value = parse_anything(newval) @showkey.setter def showkey(self, newval): newval = bool(newval) if not newval and not self.can_hide_key(self.name): raise ValueError("parameter key {!r} cannot be hidden".format(self.name)) self._showkey = newval mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/heading.py000066400000000000000000000044261411406531600243610ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from ._base import Node from ..utils import parse_anything __all__ = ["Heading"] class Heading(Node): """Represents a section heading in wikicode, like ``== Foo ==``.""" def __init__(self, title, level): super().__init__() self.title = title self.level = level def __str__(self): return ("=" * self.level) + str(self.title) + ("=" * self.level) def __children__(self): yield self.title def __strip__(self, **kwargs): return self.title.strip_code(**kwargs) def __showtree__(self, write, get, mark): write("=" * self.level) get(self.title) write("=" * self.level) @property def title(self): """The title of the heading, as a :class:`.Wikicode` object.""" return self._title @property def level(self): """The heading level, as an integer between 1 and 6, inclusive.""" return self._level @title.setter def title(self, value): self._title = parse_anything(value) @level.setter def level(self, value): value = int(value) if value < 1 or value > 6: raise ValueError(value) self._level = value mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/html_entity.py000066400000000000000000000132351411406531600253200ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import html.entities as htmlentities from ._base import Node __all__ = ["HTMLEntity"] class HTMLEntity(Node): """Represents an HTML entity, like `` ``, either named or unnamed.""" def __init__(self, value, named=None, hexadecimal=False, hex_char="x"): super().__init__() self._value = value if named is None: # Try to guess whether or not the entity is named try: int(value) self._named = False self._hexadecimal = False except ValueError: try: int(value, 16) self._named = False self._hexadecimal = True except ValueError: self._named = True self._hexadecimal = False else: self._named = named self._hexadecimal = hexadecimal self._hex_char = hex_char def __str__(self): if self.named: return "&{};".format(self.value) if self.hexadecimal: return "&#{}{};".format(self.hex_char, self.value) return "&#{};".format(self.value) def __strip__(self, **kwargs): if kwargs.get("normalize"): return self.normalize() return self @property def value(self): """The string value of the HTML entity.""" return self._value @property def named(self): """Whether the entity is a string name for a codepoint or an integer. For example, ``Σ``, ``Σ``, and ``Σ`` refer to the same character, but only the first is "named", while the others are integer representations of the codepoint. """ return self._named @property def hexadecimal(self): """If unnamed, this is whether the value is hexadecimal or decimal.""" return self._hexadecimal @property def hex_char(self): """If the value is hexadecimal, this is the letter denoting that. For example, the hex_char of ``"ሴ"`` is ``"x"``, whereas the hex_char of ``"ሴ"`` is ``"X"``. Lowercase and uppercase ``x`` are the only values supported. """ return self._hex_char @value.setter def value(self, newval): newval = str(newval) try: int(newval) except ValueError: try: intval = int(newval, 16) except ValueError: if newval not in htmlentities.entitydefs: raise ValueError( "entity value {!r} is not a valid name".format(newval) ) from None self._named = True self._hexadecimal = False else: if intval < 0 or intval > 0x10FFFF: raise ValueError( "entity value 0x{:x} is not in range(0x110000)".format(intval) ) from None self._named = False self._hexadecimal = True else: test = int(newval, 16 if self.hexadecimal else 10) if test < 0 or test > 0x10FFFF: raise ValueError( "entity value {} is not in range(0x110000)".format(test) ) self._named = False self._value = newval @named.setter def named(self, newval): newval = bool(newval) if newval and self.value not in htmlentities.entitydefs: raise ValueError("entity value {!r} is not a valid name".format(self.value)) if not newval: try: int(self.value, 16) except ValueError as exc: raise ValueError( "current entity value {!r} is not a valid " "Unicode codepoint".format(self.value) ) from exc self._named = newval @hexadecimal.setter def hexadecimal(self, newval): newval = bool(newval) if newval and self.named: raise ValueError("a named entity cannot be hexadecimal") self._hexadecimal = newval @hex_char.setter def hex_char(self, newval): newval = str(newval) if newval not in ("x", "X"): raise ValueError(newval) self._hex_char = newval def normalize(self): """Return the unicode character represented by the HTML entity.""" if self.named: return chr(htmlentities.name2codepoint[self.value]) if self.hexadecimal: return chr(int(self.value, 16)) return chr(int(self.value)) mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/tag.py000066400000000000000000000256641411406531600235440ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from ._base import Node from .extras import Attribute from ..definitions import is_visible from ..utils import parse_anything __all__ = ["Tag"] class Tag(Node): """Represents an HTML-style tag in wikicode, like ````.""" def __init__( self, tag, contents=None, attrs=None, wiki_markup=None, self_closing=False, invalid=False, implicit=False, padding="", closing_tag=None, wiki_style_separator=None, closing_wiki_markup=None, ): super().__init__() self.tag = tag self.contents = contents self._attrs = attrs if attrs else [] self._closing_wiki_markup = None self.wiki_markup = wiki_markup self.self_closing = self_closing self.invalid = invalid self.implicit = implicit self.padding = padding if closing_tag is not None: self.closing_tag = closing_tag self.wiki_style_separator = wiki_style_separator if closing_wiki_markup is not None: self.closing_wiki_markup = closing_wiki_markup def __str__(self): if self.wiki_markup: if self.attributes: attrs = "".join([str(attr) for attr in self.attributes]) else: attrs = "" padding = self.padding or "" separator = self.wiki_style_separator or "" if self.self_closing: return self.wiki_markup + attrs + padding + separator close = self.closing_wiki_markup or "" return ( self.wiki_markup + attrs + padding + separator + str(self.contents) + close ) result = ("" if self.implicit else "/>") else: result += self.padding + ">" + str(self.contents) result += "" return result def __children__(self): if not self.wiki_markup: yield self.tag for attr in self.attributes: yield attr.name if attr.value is not None: yield attr.value if not self.self_closing: yield self.contents if not self.wiki_markup and self.closing_tag: yield self.closing_tag def __strip__(self, **kwargs): if self.contents and is_visible(self.tag): return self.contents.strip_code(**kwargs) return None def __showtree__(self, write, get, mark): write("" if self.implicit else "/>") else: write(">") get(self.contents) write("") @property def tag(self): """The tag itself, as a :class:`.Wikicode` object.""" return self._tag @property def contents(self): """The contents of the tag, as a :class:`.Wikicode` object.""" return self._contents @property def attributes(self): """The list of attributes affecting the tag. Each attribute is an instance of :class:`.Attribute`. """ return self._attrs @property def wiki_markup(self): """The wikified version of a tag to show instead of HTML. If set to a value, this will be displayed instead of the brackets. For example, set to ``''`` to replace ```` or ``----`` to replace ``
``. """ return self._wiki_markup @property def self_closing(self): """Whether the tag is self-closing with no content (like ``
``).""" return self._self_closing @property def invalid(self): """Whether the tag starts with a backslash after the opening bracket. This makes the tag look like a lone close tag. It is technically invalid and is only parsable Wikicode when the tag itself is single-only, like ``
`` and ````. See :func:`.definitions.is_single_only`. """ return self._invalid @property def implicit(self): """Whether the tag is implicitly self-closing, with no ending slash. This is only possible for specific "single" tags like ``
`` and ``
  • ``. See :func:`.definitions.is_single`. This field only has an effect if :attr:`self_closing` is also ``True``. """ return self._implicit @property def padding(self): """Spacing to insert before the first closing ``>``.""" return self._padding @property def closing_tag(self): """The closing tag, as a :class:`.Wikicode` object. This will usually equal :attr:`tag`, unless there is additional spacing, comments, or the like. """ return self._closing_tag @property def wiki_style_separator(self): """The separator between the padding and content in a wiki markup tag. Essentially the wiki equivalent of the TagCloseOpen. """ return self._wiki_style_separator @property def closing_wiki_markup(self): """The wikified version of the closing tag to show instead of HTML. If set to a value, this will be displayed instead of the close tag brackets. If tag is :attr:`self_closing` is ``True`` then this is not displayed. If :attr:`wiki_markup` is set and this has not been set, this is set to the value of :attr:`wiki_markup`. If this has been set and :attr:`wiki_markup` is set to a ``False`` value, this is set to ``None``. """ return self._closing_wiki_markup @tag.setter def tag(self, value): self._tag = self._closing_tag = parse_anything(value) @contents.setter def contents(self, value): self._contents = parse_anything(value) @wiki_markup.setter def wiki_markup(self, value): self._wiki_markup = str(value) if value else None if not value or not self.closing_wiki_markup: self._closing_wiki_markup = self._wiki_markup @self_closing.setter def self_closing(self, value): self._self_closing = bool(value) @invalid.setter def invalid(self, value): self._invalid = bool(value) @implicit.setter def implicit(self, value): self._implicit = bool(value) @padding.setter def padding(self, value): if not value: self._padding = "" else: value = str(value) if not value.isspace(): raise ValueError("padding must be entirely whitespace") self._padding = value @closing_tag.setter def closing_tag(self, value): self._closing_tag = parse_anything(value) @wiki_style_separator.setter def wiki_style_separator(self, value): self._wiki_style_separator = str(value) if value else None @closing_wiki_markup.setter def closing_wiki_markup(self, value): self._closing_wiki_markup = str(value) if value else None def has(self, name): """Return whether any attribute in the tag has the given *name*. Note that a tag may have multiple attributes with the same name, but only the last one is read by the MediaWiki parser. """ for attr in self.attributes: if attr.name == name.strip(): return True return False def get(self, name): """Get the attribute with the given *name*. The returned object is a :class:`.Attribute` instance. Raises :exc:`ValueError` if no attribute has this name. Since multiple attributes can have the same name, we'll return the last match, since all but the last are ignored by the MediaWiki parser. """ for attr in reversed(self.attributes): if attr.name == name.strip(): return attr raise ValueError(name) def add( self, name, value=None, quotes='"', pad_first=" ", pad_before_eq="", pad_after_eq="", ): """Add an attribute with the given *name* and *value*. *name* and *value* can be anything parsable by :func:`.utils.parse_anything`; *value* can be omitted if the attribute is valueless. If *quotes* is not ``None``, it should be a string (either ``"`` or ``'``) that *value* will be wrapped in (this is recommended). ``None`` is only legal if *value* contains no spacing. *pad_first*, *pad_before_eq*, and *pad_after_eq* are whitespace used as padding before the name, before the equal sign (or after the name if no value), and after the equal sign (ignored if no value), respectively. """ if value is not None: value = parse_anything(value) quotes = Attribute.coerce_quotes(quotes) attr = Attribute(parse_anything(name), value, quotes) attr.pad_first = pad_first attr.pad_before_eq = pad_before_eq attr.pad_after_eq = pad_after_eq self.attributes.append(attr) return attr def remove(self, name): """Remove all attributes with the given *name*. Raises :exc:`ValueError` if none were found. """ attrs = [attr for attr in self.attributes if attr.name == name.strip()] if not attrs: raise ValueError(name) for attr in attrs: self.attributes.remove(attr) mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/template.py000066400000000000000000000337741411406531600246050ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from collections import defaultdict import re from ._base import Node from .html_entity import HTMLEntity from .text import Text from .extras import Parameter from ..utils import parse_anything __all__ = ["Template"] FLAGS = re.DOTALL | re.UNICODE # Used to allow None as a valid fallback value _UNSET = object() class Template(Node): """Represents a template in wikicode, like ``{{foo}}``.""" def __init__(self, name, params=None): super().__init__() self.name = name if params: self._params = params else: self._params = [] def __str__(self): if self.params: params = "|".join([str(param) for param in self.params]) return "{{" + str(self.name) + "|" + params + "}}" return "{{" + str(self.name) + "}}" def __children__(self): yield self.name for param in self.params: if param.showkey: yield param.name yield param.value def __strip__(self, **kwargs): if kwargs.get("keep_template_params"): parts = [param.value.strip_code(**kwargs) for param in self.params] return " ".join(part for part in parts if part) return None def __showtree__(self, write, get, mark): write("{{") get(self.name) for param in self.params: write(" | ") mark() get(param.name) write(" = ") mark() get(param.value) write("}}") @staticmethod def _surface_escape(code, char): """Return *code* with *char* escaped as an HTML entity. The main use of this is to escape pipes (``|``) or equal signs (``=``) in parameter names or values so they are not mistaken for new parameters. """ replacement = str(HTMLEntity(value=ord(char))) for node in code.filter_text(recursive=False): if char in node: code.replace(node, node.replace(char, replacement), False) @staticmethod def _select_theory(theories): """Return the most likely spacing convention given different options. Given a dictionary of convention options as keys and their occurrence as values, return the convention that occurs the most, or ``None`` if there is no clear preferred style. """ if theories: values = tuple(theories.values()) best = max(values) confidence = float(best) / sum(values) if confidence > 0.5: return tuple(theories.keys())[values.index(best)] return None @staticmethod def _blank_param_value(value): """Remove the content from *value* while keeping its whitespace. Replace *value*\\ 's nodes with two text nodes, the first containing whitespace from before its content and the second containing whitespace from after its content. """ sval = str(value) if sval.isspace(): before, after = "", sval else: match = re.search(r"^(\s*).*?(\s*)$", sval, FLAGS) before, after = match.group(1), match.group(2) value.nodes = [Text(before), Text(after)] def _get_spacing_conventions(self, use_names): """Try to determine the whitespace conventions for parameters. This will examine the existing parameters and use :meth:`_select_theory` to determine if there are any preferred styles for how much whitespace to put before or after the value. """ before_theories = defaultdict(lambda: 0) after_theories = defaultdict(lambda: 0) for param in self.params: if not param.showkey: continue if use_names: component = str(param.name) else: component = str(param.value) match = re.search(r"^(\s*).*?(\s*)$", component, FLAGS) before, after = match.group(1), match.group(2) if not use_names and component.isspace() and "\n" in before: # If the value is empty, we expect newlines in the whitespace # to be after the content, not before it: before, after = before.split("\n", 1) after = "\n" + after before_theories[before] += 1 after_theories[after] += 1 before = self._select_theory(before_theories) after = self._select_theory(after_theories) return before, after def _fix_dependendent_params(self, i): """Unhide keys if necessary after removing the param at index *i*.""" if not self.params[i].showkey: for param in self.params[i + 1 :]: if not param.showkey: param.showkey = True def _remove_exact(self, needle, keep_field): """Remove a specific parameter, *needle*, from the template.""" for i, param in enumerate(self.params): if param is needle: if keep_field: self._blank_param_value(param.value) else: self._fix_dependendent_params(i) self.params.pop(i) return raise ValueError(needle) def _should_remove(self, i, name): """Look ahead for a parameter with the same name, but hidden. If one exists, we should remove the given one rather than blanking it. """ if self.params[i].showkey: following = self.params[i + 1 :] better_matches = [ after.name.strip() == name and not after.showkey for after in following ] return any(better_matches) return False @property def name(self): """The name of the template, as a :class:`.Wikicode` object.""" return self._name @property def params(self): """The list of parameters contained within the template.""" return self._params @name.setter def name(self, value): self._name = parse_anything(value) def has(self, name, ignore_empty=False): """Return ``True`` if any parameter in the template is named *name*. With *ignore_empty*, ``False`` will be returned even if the template contains a parameter with the name *name*, if the parameter's value is empty. Note that a template may have multiple parameters with the same name, but only the last one is read by the MediaWiki parser. """ name = str(name).strip() for param in self.params: if param.name.strip() == name: if ignore_empty and not param.value.strip(): continue return True return False def has_param(self, name, ignore_empty=False): """Alias for :meth:`has`.""" return self.has(name, ignore_empty) def get(self, name, default=_UNSET): """Get the parameter whose name is *name*. The returned object is a :class:`.Parameter` instance. Raises :exc:`ValueError` if no parameter has this name. If *default* is set, returns that instead. Since multiple parameters can have the same name, we'll return the last match, since the last parameter is the only one read by the MediaWiki parser. """ name = str(name).strip() for param in reversed(self.params): if param.name.strip() == name: return param if default is _UNSET: raise ValueError(name) return default def __getitem__(self, name): return self.get(name) def add(self, name, value, showkey=None, before=None, preserve_spacing=True): """Add a parameter to the template with a given *name* and *value*. *name* and *value* can be anything parsable by :func:`.utils.parse_anything`; pipes and equal signs are automatically escaped from *value* when appropriate. If *name* is already a parameter in the template, we'll replace its value. If *showkey* is given, this will determine whether or not to show the parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of ``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent guess. If *before* is given (either a :class:`.Parameter` object or a name), then we will place the parameter immediately before this one. Otherwise, it will be added at the end. If *before* is a name and exists multiple times in the template, we will place it before the last occurrence. If *before* is not in the template, :exc:`ValueError` is raised. The argument is ignored if *name* is an existing parameter. If *preserve_spacing* is ``True``, we will try to preserve whitespace conventions around the parameter, whether it is new or we are updating an existing value. It is disabled for parameters with hidden keys, since MediaWiki doesn't strip whitespace in this case. """ name, value = parse_anything(name), parse_anything(value) self._surface_escape(value, "|") if self.has(name): self.remove(name, keep_field=True) existing = self.get(name) if showkey is not None: existing.showkey = showkey if not existing.showkey: self._surface_escape(value, "=") nodes = existing.value.nodes if preserve_spacing and existing.showkey: for i in range(2): # Ignore empty text nodes if not nodes[i]: nodes[i] = None existing.value = parse_anything([nodes[0], value, nodes[1]]) else: existing.value = value return existing if showkey is None: if Parameter.can_hide_key(name): int_name = int(str(name)) int_keys = set() for param in self.params: if not param.showkey: int_keys.add(int(str(param.name))) expected = min(set(range(1, len(int_keys) + 2)) - int_keys) if expected == int_name: showkey = False else: showkey = True else: showkey = True if not showkey: self._surface_escape(value, "=") if preserve_spacing and showkey: before_n, after_n = self._get_spacing_conventions(use_names=True) before_v, after_v = self._get_spacing_conventions(use_names=False) name = parse_anything([before_n, name, after_n]) value = parse_anything([before_v, value, after_v]) param = Parameter(name, value, showkey) if before: if not isinstance(before, Parameter): before = self.get(before) self.params.insert(self.params.index(before), param) else: self.params.append(param) return param def __setitem__(self, name, value): return self.add(name, value) def remove(self, param, keep_field=False): """Remove a parameter from the template, identified by *param*. If *param* is a :class:`.Parameter` object, it will be matched exactly, otherwise it will be treated like the *name* argument to :meth:`has` and :meth:`get`. If *keep_field* is ``True``, we will keep the parameter's name, but blank its value. Otherwise, we will remove the parameter completely. When removing a parameter with a hidden name, subsequent parameters with hidden names will be made visible. For example, removing ``bar`` from ``{{foo|bar|baz}}`` produces ``{{foo|2=baz}}`` because ``{{foo|baz}}`` is incorrect. If the parameter shows up multiple times in the template and *param* is not a :class:`.Parameter` object, we will remove all instances of it (and keep only one if *keep_field* is ``True`` - either the one with a hidden name, if it exists, or the first instance). """ if isinstance(param, Parameter): self._remove_exact(param, keep_field) return name = str(param).strip() removed = False to_remove = [] for i, par in enumerate(self.params): if par.name.strip() == name: if keep_field: if self._should_remove(i, name): to_remove.append(i) else: self._blank_param_value(par.value) keep_field = False else: self._fix_dependendent_params(i) to_remove.append(i) if not removed: removed = True if not removed: raise ValueError(name) for i in reversed(to_remove): self.params.pop(i) def __delitem__(self, param): return self.remove(param) mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/text.py000066400000000000000000000033261411406531600237440ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from ._base import Node __all__ = ["Text"] class Text(Node): """Represents ordinary, unformatted text with no special properties.""" def __init__(self, value): super().__init__() self.value = value def __str__(self): return self.value def __strip__(self, **kwargs): return self def __showtree__(self, write, get, mark): write(str(self).encode("unicode_escape").decode("utf8")) @property def value(self): """The actual text itself.""" return self._value @value.setter def value(self, newval): self._value = str(newval) mwparserfromhell-0.6.3/src/mwparserfromhell/nodes/wikilink.py000066400000000000000000000050771411406531600246060ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from ._base import Node from ..utils import parse_anything __all__ = ["Wikilink"] class Wikilink(Node): """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" def __init__(self, title, text=None): super().__init__() self.title = title self.text = text def __str__(self): if self.text is not None: return "[[" + str(self.title) + "|" + str(self.text) + "]]" return "[[" + str(self.title) + "]]" def __children__(self): yield self.title if self.text is not None: yield self.text def __strip__(self, **kwargs): if self.text is not None: return self.text.strip_code(**kwargs) return self.title.strip_code(**kwargs) def __showtree__(self, write, get, mark): write("[[") get(self.title) if self.text is not None: write(" | ") mark() get(self.text) write("]]") @property def title(self): """The title of the linked page, as a :class:`.Wikicode` object.""" return self._title @property def text(self): """The text to display (if any), as a :class:`.Wikicode` object.""" return self._text @title.setter def title(self, value): self._title = parse_anything(value) @text.setter def text(self, value): if value is None: self._text = None else: self._text = parse_anything(value) mwparserfromhell-0.6.3/src/mwparserfromhell/parser/000077500000000000000000000000001411406531600225665ustar00rootroot00000000000000mwparserfromhell-0.6.3/src/mwparserfromhell/parser/__init__.py000066400000000000000000000065761411406531600247150ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ This package contains the actual wikicode parser, split up into two main modules: the :mod:`.tokenizer` and the :mod:`.builder`. This module joins them together into one interface. """ from .builder import Builder from .errors import ParserError try: from ._tokenizer import CTokenizer use_c = True except ImportError: from .tokenizer import Tokenizer CTokenizer = None use_c = False __all__ = ["use_c", "Parser", "ParserError"] class Parser: """Represents a parser for wikicode. Actual parsing is a two-step process: first, the text is split up into a series of tokens by the :class:`.Tokenizer`, and then the tokens are converted into trees of :class:`.Wikicode` objects and :class:`.Node`\\ s by the :class:`.Builder`. Instances of this class or its dependents (:class:`.Tokenizer` and :class:`.Builder`) should not be shared between threads. :meth:`parse` can be called multiple times as long as it is not done concurrently. In general, there is no need to do this because parsing should be done through :func:`mwparserfromhell.parse`, which creates a new :class:`.Parser` object as necessary. """ def __init__(self): if use_c and CTokenizer: self._tokenizer = CTokenizer() else: from .tokenizer import Tokenizer self._tokenizer = Tokenizer() self._builder = Builder() def parse(self, text, context=0, skip_style_tags=False): """Parse *text*, returning a :class:`.Wikicode` object tree. If given, *context* will be passed as a starting context to the parser. This is helpful when this function is used inside node attribute setters. For example, :class:`.ExternalLink`\\ 's :attr:`~.ExternalLink.url` setter sets *context* to :mod:`contexts.EXT_LINK_URI <.contexts>` to prevent the URL itself from becoming an :class:`.ExternalLink`. If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be parsed, but instead will be treated as plain text. If there is an internal error while parsing, :exc:`.ParserError` will be raised. """ tokens = self._tokenizer.tokenize(text, context, skip_style_tags) code = self._builder.build(tokens) return code mwparserfromhell-0.6.3/src/mwparserfromhell/parser/builder.py000066400000000000000000000310651411406531600245730ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from . import tokens from .errors import ParserError from ..nodes import ( Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, Template, Text, Wikilink, ) from ..nodes.extras import Attribute, Parameter from ..smart_list import SmartList from ..wikicode import Wikicode __all__ = ["Builder"] _HANDLERS = {tokens.Text: lambda self, token: Text(token.text)} def _add_handler(token_type): """Create a decorator that adds a handler function to the lookup table.""" def decorator(func): """Add a handler function to the lookup table.""" _HANDLERS[token_type] = func return func return decorator class Builder: """Builds a tree of nodes out of a sequence of tokens. To use, pass a list of :class:`.Token`\\ s to the :meth:`build` method. The list will be exhausted as it is parsed and a :class:`.Wikicode` object containing the node tree will be returned. """ def __init__(self): self._tokens = [] self._stacks = [] def _push(self): """Push a new node list onto the stack.""" self._stacks.append([]) def _pop(self): """Pop the current node list off of the stack. The raw node list is wrapped in a :class:`.SmartList` and then in a :class:`.Wikicode` object. """ return Wikicode(SmartList(self._stacks.pop())) def _write(self, item): """Append a node to the current node list.""" self._stacks[-1].append(item) def _handle_parameter(self, default): """Handle a case where a parameter is at the head of the tokens. *default* is the value to use if no parameter name is defined. """ key = None showkey = False self._push() while self._tokens: token = self._tokens.pop() if isinstance(token, tokens.TemplateParamEquals): key = self._pop() showkey = True self._push() elif isinstance( token, (tokens.TemplateParamSeparator, tokens.TemplateClose) ): self._tokens.append(token) value = self._pop() if key is None: key = Wikicode(SmartList([Text(str(default))])) return Parameter(key, value, showkey) else: self._write(self._handle_token(token)) raise ParserError("_handle_parameter() missed a close token") @_add_handler(tokens.TemplateOpen) def _handle_template(self, token): """Handle a case where a template is at the head of the tokens.""" params = [] default = 1 self._push() while self._tokens: token = self._tokens.pop() if isinstance(token, tokens.TemplateParamSeparator): if not params: name = self._pop() param = self._handle_parameter(default) params.append(param) if not param.showkey: default += 1 elif isinstance(token, tokens.TemplateClose): if not params: name = self._pop() return Template(name, params) else: self._write(self._handle_token(token)) raise ParserError("_handle_template() missed a close token") @_add_handler(tokens.ArgumentOpen) def _handle_argument(self, token): """Handle a case where an argument is at the head of the tokens.""" name = None self._push() while self._tokens: token = self._tokens.pop() if isinstance(token, tokens.ArgumentSeparator): name = self._pop() self._push() elif isinstance(token, tokens.ArgumentClose): if name is not None: return Argument(name, self._pop()) return Argument(self._pop()) else: self._write(self._handle_token(token)) raise ParserError("_handle_argument() missed a close token") @_add_handler(tokens.WikilinkOpen) def _handle_wikilink(self, token): """Handle a case where a wikilink is at the head of the tokens.""" title = None self._push() while self._tokens: token = self._tokens.pop() if isinstance(token, tokens.WikilinkSeparator): title = self._pop() self._push() elif isinstance(token, tokens.WikilinkClose): if title is not None: return Wikilink(title, self._pop()) return Wikilink(self._pop()) else: self._write(self._handle_token(token)) raise ParserError("_handle_wikilink() missed a close token") @_add_handler(tokens.ExternalLinkOpen) def _handle_external_link(self, token): """Handle when an external link is at the head of the tokens.""" brackets, url, suppress_space = token.brackets, None, None self._push() while self._tokens: token = self._tokens.pop() if isinstance(token, tokens.ExternalLinkSeparator): url = self._pop() suppress_space = token.suppress_space self._push() elif isinstance(token, tokens.ExternalLinkClose): if url is not None: return ExternalLink( url, self._pop(), brackets=brackets, suppress_space=suppress_space is True, ) return ExternalLink( self._pop(), brackets=brackets, suppress_space=suppress_space is True, ) else: self._write(self._handle_token(token)) raise ParserError("_handle_external_link() missed a close token") @_add_handler(tokens.HTMLEntityStart) def _handle_entity(self, token): """Handle a case where an HTML entity is at the head of the tokens.""" token = self._tokens.pop() if isinstance(token, tokens.HTMLEntityNumeric): token = self._tokens.pop() if isinstance(token, tokens.HTMLEntityHex): text = self._tokens.pop() self._tokens.pop() # Remove HTMLEntityEnd return HTMLEntity( text.text, named=False, hexadecimal=True, hex_char=token.char ) self._tokens.pop() # Remove HTMLEntityEnd return HTMLEntity(token.text, named=False, hexadecimal=False) self._tokens.pop() # Remove HTMLEntityEnd return HTMLEntity(token.text, named=True, hexadecimal=False) @_add_handler(tokens.HeadingStart) def _handle_heading(self, token): """Handle a case where a heading is at the head of the tokens.""" level = token.level self._push() while self._tokens: token = self._tokens.pop() if isinstance(token, tokens.HeadingEnd): title = self._pop() return Heading(title, level) self._write(self._handle_token(token)) raise ParserError("_handle_heading() missed a close token") @_add_handler(tokens.CommentStart) def _handle_comment(self, token): """Handle a case where an HTML comment is at the head of the tokens.""" self._push() while self._tokens: token = self._tokens.pop() if isinstance(token, tokens.CommentEnd): contents = self._pop() return Comment(contents) self._write(self._handle_token(token)) raise ParserError("_handle_comment() missed a close token") def _handle_attribute(self, start): """Handle a case where a tag attribute is at the head of the tokens.""" name = quotes = None self._push() while self._tokens: token = self._tokens.pop() if isinstance(token, tokens.TagAttrEquals): name = self._pop() self._push() elif isinstance(token, tokens.TagAttrQuote): quotes = token.char elif isinstance( token, (tokens.TagAttrStart, tokens.TagCloseOpen, tokens.TagCloseSelfclose), ): self._tokens.append(token) if name: value = self._pop() else: name, value = self._pop(), None return Attribute( name, value, quotes, start.pad_first, start.pad_before_eq, start.pad_after_eq, ) else: self._write(self._handle_token(token)) raise ParserError("_handle_attribute() missed a close token") @_add_handler(tokens.TagOpenOpen) def _handle_tag(self, token): """Handle a case where a tag is at the head of the tokens.""" close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose) implicit, attrs, contents, closing_tag = False, [], None, None wiki_markup, invalid = token.wiki_markup, token.invalid or False wiki_style_separator, closing_wiki_markup = None, wiki_markup self._push() while self._tokens: token = self._tokens.pop() if isinstance(token, tokens.TagAttrStart): attrs.append(self._handle_attribute(token)) elif isinstance(token, tokens.TagCloseOpen): wiki_style_separator = token.wiki_markup padding = token.padding or "" tag = self._pop() self._push() elif isinstance(token, tokens.TagOpenClose): closing_wiki_markup = token.wiki_markup contents = self._pop() self._push() elif isinstance(token, close_tokens): if isinstance(token, tokens.TagCloseSelfclose): closing_wiki_markup = token.wiki_markup tag = self._pop() self_closing = True padding = token.padding or "" implicit = token.implicit or False else: self_closing = False closing_tag = self._pop() return Tag( tag, contents, attrs, wiki_markup, self_closing, invalid, implicit, padding, closing_tag, wiki_style_separator, closing_wiki_markup, ) else: self._write(self._handle_token(token)) raise ParserError("_handle_tag() missed a close token") def _handle_token(self, token): """Handle a single token.""" try: return _HANDLERS[type(token)](self, token) except KeyError: err = "_handle_token() got unexpected {0}" raise ParserError(err.format(type(token).__name__)) from None def build(self, tokenlist): """Build a Wikicode object from a list tokens and return it.""" self._tokens = tokenlist self._tokens.reverse() self._push() while self._tokens: node = self._handle_token(self._tokens.pop()) self._write(node) return self._pop() del _add_handler mwparserfromhell-0.6.3/src/mwparserfromhell/parser/contexts.py000066400000000000000000000137211411406531600250130ustar00rootroot00000000000000# Copyright (C) 2012-2019 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ This module contains various "context" definitions, which are essentially flags set during the tokenization process, either on the current parse stack (local contexts) or affecting all stacks (global contexts). They represent the context the tokenizer is in, such as inside a template's name definition, or inside a level-two heading. This is used to determine what tokens are valid at the current point and also if the current parsing route is invalid. The tokenizer stores context as an integer, with these definitions bitwise OR'd to set them, AND'd to check if they're set, and XOR'd to unset them. The advantage of this is that contexts can have sub-contexts (as ``FOO == 0b11`` will cover ``BAR == 0b10`` and ``BAZ == 0b01``). Local (stack-specific) contexts: * :const:`TEMPLATE` * :const:`TEMPLATE_NAME` * :const:`TEMPLATE_PARAM_KEY` * :const:`TEMPLATE_PARAM_VALUE` * :const:`ARGUMENT` * :const:`ARGUMENT_NAME` * :const:`ARGUMENT_DEFAULT` * :const:`WIKILINK` * :const:`WIKILINK_TITLE` * :const:`WIKILINK_TEXT` * :const:`EXT_LINK` * :const:`EXT_LINK_URI` * :const:`EXT_LINK_TITLE` * :const:`HEADING` * :const:`HEADING_LEVEL_1` * :const:`HEADING_LEVEL_2` * :const:`HEADING_LEVEL_3` * :const:`HEADING_LEVEL_4` * :const:`HEADING_LEVEL_5` * :const:`HEADING_LEVEL_6` * :const:`TAG` * :const:`TAG_OPEN` * :const:`TAG_ATTR` * :const:`TAG_BODY` * :const:`TAG_CLOSE` * :const:`STYLE` * :const:`STYLE_ITALICS` * :const:`STYLE_BOLD` * :const:`STYLE_PASS_AGAIN` * :const:`STYLE_SECOND_PASS` * :const:`DL_TERM` * :const:`SAFETY_CHECK` * :const:`HAS_TEXT` * :const:`FAIL_ON_TEXT` * :const:`FAIL_NEXT` * :const:`FAIL_ON_LBRACE` * :const:`FAIL_ON_RBRACE` * :const:`FAIL_ON_EQUALS` * :const:`HAS_TEMPLATE` * :const:`TABLE` * :const:`TABLE_OPEN` * :const:`TABLE_CELL_OPEN` * :const:`TABLE_CELL_STYLE` * :const:`TABLE_TD_LINE` * :const:`TABLE_TH_LINE` * :const:`TABLE_CELL_LINE_CONTEXTS` * :const:`HTML_ENTITY` Global contexts: * :const:`GL_HEADING` Aggregate contexts: * :const:`FAIL` * :const:`UNSAFE` * :const:`DOUBLE` * :const:`NO_WIKILINKS` * :const:`NO_EXT_LINKS` """ # Local contexts: TEMPLATE_NAME = 1 << 0 TEMPLATE_PARAM_KEY = 1 << 1 TEMPLATE_PARAM_VALUE = 1 << 2 TEMPLATE = TEMPLATE_NAME + TEMPLATE_PARAM_KEY + TEMPLATE_PARAM_VALUE ARGUMENT_NAME = 1 << 3 ARGUMENT_DEFAULT = 1 << 4 ARGUMENT = ARGUMENT_NAME + ARGUMENT_DEFAULT WIKILINK_TITLE = 1 << 5 WIKILINK_TEXT = 1 << 6 WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT EXT_LINK_URI = 1 << 7 EXT_LINK_TITLE = 1 << 8 EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE HEADING_LEVEL_1 = 1 << 9 HEADING_LEVEL_2 = 1 << 10 HEADING_LEVEL_3 = 1 << 11 HEADING_LEVEL_4 = 1 << 12 HEADING_LEVEL_5 = 1 << 13 HEADING_LEVEL_6 = 1 << 14 HEADING = ( HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 + HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6 ) TAG_OPEN = 1 << 15 TAG_ATTR = 1 << 16 TAG_BODY = 1 << 17 TAG_CLOSE = 1 << 18 TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE STYLE_ITALICS = 1 << 19 STYLE_BOLD = 1 << 20 STYLE_PASS_AGAIN = 1 << 21 STYLE_SECOND_PASS = 1 << 22 STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS DL_TERM = 1 << 23 HAS_TEXT = 1 << 24 FAIL_ON_TEXT = 1 << 25 FAIL_NEXT = 1 << 26 FAIL_ON_LBRACE = 1 << 27 FAIL_ON_RBRACE = 1 << 28 FAIL_ON_EQUALS = 1 << 29 HAS_TEMPLATE = 1 << 30 SAFETY_CHECK = ( HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + FAIL_ON_RBRACE + FAIL_ON_EQUALS + HAS_TEMPLATE ) TABLE_OPEN = 1 << 31 TABLE_CELL_OPEN = 1 << 32 TABLE_CELL_STYLE = 1 << 33 TABLE_ROW_OPEN = 1 << 34 TABLE_TD_LINE = 1 << 35 TABLE_TH_LINE = 1 << 36 TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE TABLE = ( TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN + TABLE_TD_LINE + TABLE_TH_LINE ) HTML_ENTITY = 1 << 37 # Global contexts: GL_HEADING = 1 << 0 # Aggregate contexts: FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE + TABLE UNSAFE = ( TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE ) DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE + TABLE_ROW_OPEN NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK def describe(context): """Return a string describing the given context value, for debugging.""" flags = [] for name, value in globals().items(): if not isinstance(value, int) or name.startswith("GL_"): continue if bin(value).count("1") != 1: continue # Hacky way to skip aggregate contexts if context & value: flags.append((name, value)) flags.sort(key=lambda it: it[1]) return "|".join(it[0] for it in flags) mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/000077500000000000000000000000001411406531600247435ustar00rootroot00000000000000mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/avl_tree.c000066400000000000000000000646701411406531600267250ustar00rootroot00000000000000/* * avl_tree.c - intrusive, nonrecursive AVL tree data structure (self-balancing * binary search tree), implementation file * * Written in 2014-2016 by Eric Biggers * Slight changes for compatibility by Ben Kurtovic * * To the extent possible under law, the author(s) have dedicated all copyright * and related and neighboring rights to this software to the public domain * worldwide via the Creative Commons Zero 1.0 Universal Public Domain * Dedication (the "CC0"). * * This software is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the CC0 for more details. * * You should have received a copy of the CC0 along with this software; if not * see . */ #define false 0 #define true 1 typedef int bool; #include "avl_tree.h" /* Returns the left child (sign < 0) or the right child (sign > 0) of the * specified AVL tree node. * Note: for all calls of this, 'sign' is constant at compilation time, * so the compiler can remove the conditional. */ static AVL_INLINE struct avl_tree_node * avl_get_child(const struct avl_tree_node *parent, int sign) { if (sign < 0) { return parent->left; } else { return parent->right; } } static AVL_INLINE struct avl_tree_node * avl_tree_first_or_last_in_order(const struct avl_tree_node *root, int sign) { const struct avl_tree_node *first = root; if (first) { while (avl_get_child(first, +sign)) { first = avl_get_child(first, +sign); } } return (struct avl_tree_node *) first; } /* Starts an in-order traversal of the tree: returns the least-valued node, or * NULL if the tree is empty. */ struct avl_tree_node * avl_tree_first_in_order(const struct avl_tree_node *root) { return avl_tree_first_or_last_in_order(root, -1); } /* Starts a *reverse* in-order traversal of the tree: returns the * greatest-valued node, or NULL if the tree is empty. */ struct avl_tree_node * avl_tree_last_in_order(const struct avl_tree_node *root) { return avl_tree_first_or_last_in_order(root, 1); } static AVL_INLINE struct avl_tree_node * avl_tree_next_or_prev_in_order(const struct avl_tree_node *node, int sign) { const struct avl_tree_node *next; if (avl_get_child(node, +sign)) { for (next = avl_get_child(node, +sign); avl_get_child(next, -sign); next = avl_get_child(next, -sign)) { } } else { for (next = avl_get_parent(node); next && node == avl_get_child(next, +sign); node = next, next = avl_get_parent(next)) { } } return (struct avl_tree_node *) next; } /* Continues an in-order traversal of the tree: returns the next-greatest-valued * node, or NULL if there is none. */ struct avl_tree_node * avl_tree_next_in_order(const struct avl_tree_node *node) { return avl_tree_next_or_prev_in_order(node, 1); } /* Continues a *reverse* in-order traversal of the tree: returns the * previous-greatest-valued node, or NULL if there is none. */ struct avl_tree_node * avl_tree_prev_in_order(const struct avl_tree_node *node) { return avl_tree_next_or_prev_in_order(node, -1); } /* Starts a postorder traversal of the tree. */ struct avl_tree_node * avl_tree_first_in_postorder(const struct avl_tree_node *root) { const struct avl_tree_node *first = root; if (first) { while (first->left || first->right) { first = first->left ? first->left : first->right; } } return (struct avl_tree_node *) first; } /* Continues a postorder traversal of the tree. @prev will not be deferenced as * it's allowed that its memory has been freed; @prev_parent must be its saved * parent node. Returns NULL if there are no more nodes (i.e. @prev was the * root of the tree). */ struct avl_tree_node * avl_tree_next_in_postorder(const struct avl_tree_node *prev, const struct avl_tree_node *prev_parent) { const struct avl_tree_node *next = prev_parent; if (next && prev == next->left && next->right) { for (next = next->right; next->left || next->right; next = next->left ? next->left : next->right) { } } return (struct avl_tree_node *) next; } /* Sets the left child (sign < 0) or the right child (sign > 0) of the * specified AVL tree node. * Note: for all calls of this, 'sign' is constant at compilation time, * so the compiler can remove the conditional. */ static AVL_INLINE void avl_set_child(struct avl_tree_node *parent, int sign, struct avl_tree_node *child) { if (sign < 0) { parent->left = child; } else { parent->right = child; } } /* Sets the parent and balance factor of the specified AVL tree node. */ static AVL_INLINE void avl_set_parent_balance(struct avl_tree_node *node, struct avl_tree_node *parent, int balance_factor) { node->parent_balance = (uintptr_t) parent | (balance_factor + 1); } /* Sets the parent of the specified AVL tree node. */ static AVL_INLINE void avl_set_parent(struct avl_tree_node *node, struct avl_tree_node *parent) { node->parent_balance = (uintptr_t) parent | (node->parent_balance & 3); } /* Returns the balance factor of the specified AVL tree node --- that is, the * height of its right subtree minus the height of its left subtree. */ static AVL_INLINE int avl_get_balance_factor(const struct avl_tree_node *node) { return (int) (node->parent_balance & 3) - 1; } /* Adds @amount to the balance factor of the specified AVL tree node. * The caller must ensure this still results in a valid balance factor * (-1, 0, or 1). */ static AVL_INLINE void avl_adjust_balance_factor(struct avl_tree_node *node, int amount) { node->parent_balance += amount; } static AVL_INLINE void avl_replace_child(struct avl_tree_node **root_ptr, struct avl_tree_node *parent, struct avl_tree_node *old_child, struct avl_tree_node *new_child) { if (parent) { if (old_child == parent->left) { parent->left = new_child; } else { parent->right = new_child; } } else { *root_ptr = new_child; } } /* * Template for performing a single rotation --- * * sign > 0: Rotate clockwise (right) rooted at A: * * P? P? * | | * A B * / \ / \ * B C? => D? A * / \ / \ * D? E? E? C? * * (nodes marked with ? may not exist) * * sign < 0: Rotate counterclockwise (left) rooted at A: * * P? P? * | | * A B * / \ / \ * C? B => A D? * / \ / \ * E? D? C? E? * * This updates pointers but not balance factors! */ static AVL_INLINE void avl_rotate(struct avl_tree_node **const root_ptr, struct avl_tree_node *const A, const int sign) { struct avl_tree_node *const B = avl_get_child(A, -sign); struct avl_tree_node *const E = avl_get_child(B, +sign); struct avl_tree_node *const P = avl_get_parent(A); avl_set_child(A, -sign, E); avl_set_parent(A, B); avl_set_child(B, +sign, A); avl_set_parent(B, P); if (E) { avl_set_parent(E, A); } avl_replace_child(root_ptr, P, A, B); } /* * Template for performing a double rotation --- * * sign > 0: Rotate counterclockwise (left) rooted at B, then * clockwise (right) rooted at A: * * P? P? P? * | | | * A A E * / \ / \ / \ * B C? => E C? => B A * / \ / \ / \ / \ * D? E B G? D? F?G? C? * / \ / \ * F? G? D? F? * * (nodes marked with ? may not exist) * * sign < 0: Rotate clockwise (right) rooted at B, then * counterclockwise (left) rooted at A: * * P? P? P? * | | | * A A E * / \ / \ / \ * C? B => C? E => A B * / \ / \ / \ / \ * E D? G? B C? G?F? D? * / \ / \ * G? F? F? D? * * Returns a pointer to E and updates balance factors. Except for those * two things, this function is equivalent to: * avl_rotate(root_ptr, B, -sign); * avl_rotate(root_ptr, A, +sign); * * See comment in avl_handle_subtree_growth() for explanation of balance * factor updates. */ static AVL_INLINE struct avl_tree_node * avl_do_double_rotate(struct avl_tree_node **const root_ptr, struct avl_tree_node *const B, struct avl_tree_node *const A, const int sign) { struct avl_tree_node *const E = avl_get_child(B, +sign); struct avl_tree_node *const F = avl_get_child(E, -sign); struct avl_tree_node *const G = avl_get_child(E, +sign); struct avl_tree_node *const P = avl_get_parent(A); const int e = avl_get_balance_factor(E); avl_set_child(A, -sign, G); avl_set_parent_balance(A, E, ((sign * e >= 0) ? 0 : -e)); avl_set_child(B, +sign, F); avl_set_parent_balance(B, E, ((sign * e <= 0) ? 0 : -e)); avl_set_child(E, +sign, A); avl_set_child(E, -sign, B); avl_set_parent_balance(E, P, 0); if (G) { avl_set_parent(G, A); } if (F) { avl_set_parent(F, B); } avl_replace_child(root_ptr, P, A, E); return E; } /* * This function handles the growth of a subtree due to an insertion. * * @root_ptr * Location of the tree's root pointer. * * @node * A subtree that has increased in height by 1 due to an insertion. * * @parent * Parent of @node; must not be NULL. * * @sign * -1 if @node is the left child of @parent; * +1 if @node is the right child of @parent. * * This function will adjust @parent's balance factor, then do a (single * or double) rotation if necessary. The return value will be %true if * the full AVL tree is now adequately balanced, or %false if the subtree * rooted at @parent is now adequately balanced but has increased in * height by 1, so the caller should continue up the tree. * * Note that if %false is returned, no rotation will have been done. * Indeed, a single node insertion cannot require that more than one * (single or double) rotation be done. */ static AVL_INLINE bool avl_handle_subtree_growth(struct avl_tree_node **const root_ptr, struct avl_tree_node *const node, struct avl_tree_node *const parent, const int sign) { int old_balance_factor, new_balance_factor; old_balance_factor = avl_get_balance_factor(parent); if (old_balance_factor == 0) { avl_adjust_balance_factor(parent, sign); /* @parent is still sufficiently balanced (-1 or +1 * balance factor), but must have increased in height. * Continue up the tree. */ return false; } new_balance_factor = old_balance_factor + sign; if (new_balance_factor == 0) { avl_adjust_balance_factor(parent, sign); /* @parent is now perfectly balanced (0 balance factor). * It cannot have increased in height, so there is * nothing more to do. */ return true; } /* @parent is too left-heavy (new_balance_factor == -2) or * too right-heavy (new_balance_factor == +2). */ /* Test whether @node is left-heavy (-1 balance factor) or * right-heavy (+1 balance factor). * Note that it cannot be perfectly balanced (0 balance factor) * because here we are under the invariant that @node has * increased in height due to the insertion. */ if (sign * avl_get_balance_factor(node) > 0) { /* @node (B below) is heavy in the same direction @parent * (A below) is heavy. * * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ * The comment, diagram, and equations below assume sign < 0. * The other case is symmetric! * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ * * Do a clockwise rotation rooted at @parent (A below): * * A B * / \ / \ * B C? => D A * / \ / \ / \ * D E? F? G?E? C? * / \ * F? G? * * Before the rotation: * balance(A) = -2 * balance(B) = -1 * Let x = height(C). Then: * height(B) = x + 2 * height(D) = x + 1 * height(E) = x * max(height(F), height(G)) = x. * * After the rotation: * height(D) = max(height(F), height(G)) + 1 * = x + 1 * height(A) = max(height(E), height(C)) + 1 * = max(x, x) + 1 = x + 1 * balance(B) = 0 * balance(A) = 0 */ avl_rotate(root_ptr, parent, -sign); /* Equivalent to setting @parent's balance factor to 0. */ avl_adjust_balance_factor(parent, -sign); /* A */ /* Equivalent to setting @node's balance factor to 0. */ avl_adjust_balance_factor(node, -sign); /* B */ } else { /* @node (B below) is heavy in the direction opposite * from the direction @parent (A below) is heavy. * * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ * The comment, diagram, and equations below assume sign < 0. * The other case is symmetric! * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ * * Do a counterblockwise rotation rooted at @node (B below), * then a clockwise rotation rooted at @parent (A below): * * A A E * / \ / \ / \ * B C? => E C? => B A * / \ / \ / \ / \ * D? E B G? D? F?G? C? * / \ / \ * F? G? D? F? * * Before the rotation: * balance(A) = -2 * balance(B) = +1 * Let x = height(C). Then: * height(B) = x + 2 * height(E) = x + 1 * height(D) = x * max(height(F), height(G)) = x * * After both rotations: * height(A) = max(height(G), height(C)) + 1 * = x + 1 * balance(A) = balance(E{orig}) >= 0 ? 0 : -balance(E{orig}) * height(B) = max(height(D), height(F)) + 1 * = x + 1 * balance(B) = balance(E{orig} <= 0) ? 0 : -balance(E{orig}) * * height(E) = x + 2 * balance(E) = 0 */ avl_do_double_rotate(root_ptr, node, parent, -sign); } /* Height after rotation is unchanged; nothing more to do. */ return true; } /* Rebalance the tree after insertion of the specified node. */ void avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, struct avl_tree_node *inserted) { struct avl_tree_node *node, *parent; bool done; inserted->left = NULL; inserted->right = NULL; node = inserted; /* Adjust balance factor of new node's parent. * No rotation will need to be done at this level. */ parent = avl_get_parent(node); if (!parent) { return; } if (node == parent->left) { avl_adjust_balance_factor(parent, -1); } else { avl_adjust_balance_factor(parent, +1); } if (avl_get_balance_factor(parent) == 0) { /* @parent did not change in height. Nothing more to do. */ return; } /* The subtree rooted at @parent increased in height by 1. */ do { /* Adjust balance factor of next ancestor. */ node = parent; parent = avl_get_parent(node); if (!parent) { return; } /* The subtree rooted at @node has increased in height by 1. */ if (node == parent->left) { done = avl_handle_subtree_growth(root_ptr, node, parent, -1); } else { done = avl_handle_subtree_growth(root_ptr, node, parent, +1); } } while (!done); } /* * This function handles the shrinkage of a subtree due to a deletion. * * @root_ptr * Location of the tree's root pointer. * * @parent * A node in the tree, exactly one of whose subtrees has decreased * in height by 1 due to a deletion. (This includes the case where * one of the child pointers has become NULL, since we can consider * the "NULL" subtree to have a height of 0.) * * @sign * +1 if the left subtree of @parent has decreased in height by 1; * -1 if the right subtree of @parent has decreased in height by 1. * * @left_deleted_ret * If the return value is not NULL, this will be set to %true if the * left subtree of the returned node has decreased in height by 1, * or %false if the right subtree of the returned node has decreased * in height by 1. * * This function will adjust @parent's balance factor, then do a (single * or double) rotation if necessary. The return value will be NULL if * the full AVL tree is now adequately balanced, or a pointer to the * parent of @parent if @parent is now adequately balanced but has * decreased in height by 1. Also in the latter case, *left_deleted_ret * will be set. */ static AVL_INLINE struct avl_tree_node * avl_handle_subtree_shrink(struct avl_tree_node **const root_ptr, struct avl_tree_node *parent, const int sign, bool *const left_deleted_ret) { struct avl_tree_node *node; int old_balance_factor, new_balance_factor; old_balance_factor = avl_get_balance_factor(parent); if (old_balance_factor == 0) { /* Prior to the deletion, the subtree rooted at * @parent was perfectly balanced. It's now * unbalanced by 1, but that's okay and its height * hasn't changed. Nothing more to do. */ avl_adjust_balance_factor(parent, sign); return NULL; } new_balance_factor = old_balance_factor + sign; if (new_balance_factor == 0) { /* The subtree rooted at @parent is now perfectly * balanced, whereas before the deletion it was * unbalanced by 1. Its height must have decreased * by 1. No rotation is needed at this location, * but continue up the tree. */ avl_adjust_balance_factor(parent, sign); node = parent; } else { /* @parent is too left-heavy (new_balance_factor == -2) or * too right-heavy (new_balance_factor == +2). */ node = avl_get_child(parent, sign); /* The rotations below are similar to those done during * insertion (see avl_handle_subtree_growth()), so full * comments are not provided. The only new case is the * one where @node has a balance factor of 0, and that is * commented. */ if (sign * avl_get_balance_factor(node) >= 0) { avl_rotate(root_ptr, parent, -sign); if (avl_get_balance_factor(node) == 0) { /* * @node (B below) is perfectly balanced. * * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ * The comment, diagram, and equations * below assume sign < 0. The other case * is symmetric! * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ * * Do a clockwise rotation rooted at * @parent (A below): * * A B * / \ / \ * B C? => D A * / \ / \ / \ * D E F? G?E C? * / \ * F? G? * * Before the rotation: * balance(A) = -2 * balance(B) = 0 * Let x = height(C). Then: * height(B) = x + 2 * height(D) = x + 1 * height(E) = x + 1 * max(height(F), height(G)) = x. * * After the rotation: * height(D) = max(height(F), height(G)) + 1 * = x + 1 * height(A) = max(height(E), height(C)) + 1 * = max(x + 1, x) + 1 = x + 2 * balance(A) = -1 * balance(B) = +1 */ /* A: -2 => -1 (sign < 0) * or +2 => +1 (sign > 0) * No change needed --- that's the same as * old_balance_factor. */ /* B: 0 => +1 (sign < 0) * or 0 => -1 (sign > 0) */ avl_adjust_balance_factor(node, -sign); /* Height is unchanged; nothing more to do. */ return NULL; } else { avl_adjust_balance_factor(parent, -sign); avl_adjust_balance_factor(node, -sign); } } else { node = avl_do_double_rotate(root_ptr, node, parent, -sign); } } parent = avl_get_parent(node); if (parent) { *left_deleted_ret = (node == parent->left); } return parent; } /* Swaps node X, which must have 2 children, with its in-order successor, then * unlinks node X. Returns the parent of X just before unlinking, without its * balance factor having been updated to account for the unlink. */ static AVL_INLINE struct avl_tree_node * avl_tree_swap_with_successor(struct avl_tree_node **root_ptr, struct avl_tree_node *X, bool *left_deleted_ret) { struct avl_tree_node *Y, *ret; Y = X->right; if (!Y->left) { /* * P? P? P? * | | | * X Y Y * / \ / \ / \ * A Y => A X => A B? * / \ / \ * (0) B? (0) B? * * [ X unlinked, Y returned ] */ ret = Y; *left_deleted_ret = false; } else { struct avl_tree_node *Q; do { Q = Y; Y = Y->left; } while (Y->left); /* * P? P? P? * | | | * X Y Y * / \ / \ / \ * A ... => A ... => A ... * | | | * Q Q Q * / / / * Y X B? * / \ / \ * (0) B? (0) B? * * * [ X unlinked, Q returned ] */ Q->left = Y->right; if (Q->left) { avl_set_parent(Q->left, Q); } Y->right = X->right; avl_set_parent(X->right, Y); ret = Q; *left_deleted_ret = true; } Y->left = X->left; avl_set_parent(X->left, Y); Y->parent_balance = X->parent_balance; avl_replace_child(root_ptr, avl_get_parent(X), X, Y); return ret; } /* * Removes an item from the specified AVL tree. * * @root_ptr * Location of the AVL tree's root pointer. Indirection is needed * because the root node may change if the tree needed to be rebalanced * because of the deletion or if @node was the root node. * * @node * Pointer to the `struct avl_tree_node' embedded in the item to * remove from the tree. * * Note: This function *only* removes the node and rebalances the tree. * It does not free any memory, nor does it do the equivalent of * avl_tree_node_set_unlinked(). */ void avl_tree_remove(struct avl_tree_node **root_ptr, struct avl_tree_node *node) { struct avl_tree_node *parent; bool left_deleted = false; if (node->left && node->right) { /* @node is fully internal, with two children. Swap it * with its in-order successor (which must exist in the * right subtree of @node and can have, at most, a right * child), then unlink @node. */ parent = avl_tree_swap_with_successor(root_ptr, node, &left_deleted); /* @parent is now the parent of what was @node's in-order * successor. It cannot be NULL, since @node itself was * an ancestor of its in-order successor. * @left_deleted has been set to %true if @node's * in-order successor was the left child of @parent, * otherwise %false. */ } else { struct avl_tree_node *child; /* @node is missing at least one child. Unlink it. Set * @parent to @node's parent, and set @left_deleted to * reflect which child of @parent @node was. Or, if * @node was the root node, simply update the root node * and return. */ child = node->left ? node->left : node->right; parent = avl_get_parent(node); if (parent) { if (node == parent->left) { parent->left = child; left_deleted = true; } else { parent->right = child; left_deleted = false; } if (child) { avl_set_parent(child, parent); } } else { if (child) { avl_set_parent(child, parent); } *root_ptr = child; return; } } /* Rebalance the tree. */ do { if (left_deleted) { parent = avl_handle_subtree_shrink(root_ptr, parent, +1, &left_deleted); } else { parent = avl_handle_subtree_shrink(root_ptr, parent, -1, &left_deleted); } } while (parent); } mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/avl_tree.h000066400000000000000000000274741411406531600267330ustar00rootroot00000000000000/* * avl_tree.h - intrusive, nonrecursive AVL tree data structure (self-balancing * binary search tree), header file * * Written in 2014-2016 by Eric Biggers * Slight changes for compatibility by Ben Kurtovic * * To the extent possible under law, the author(s) have dedicated all copyright * and related and neighboring rights to this software to the public domain * worldwide via the Creative Commons Zero 1.0 Universal Public Domain * Dedication (the "CC0"). * * This software is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the CC0 for more details. * * You should have received a copy of the CC0 along with this software; if not * see . */ #ifndef _AVL_TREE_H_ #define _AVL_TREE_H_ #include #if !defined(_MSC_VER) || (_MSC_VER >= 1600) # include #endif #ifdef __GNUC__ # define AVL_INLINE inline __attribute__((always_inline)) #elif defined(_MSC_VER) && (_MSC_VER < 1900) # define AVL_INLINE __inline #else # define AVL_INLINE inline #endif /* Node in an AVL tree. Embed this in some other data structure. */ struct avl_tree_node { /* Pointer to left child or NULL */ struct avl_tree_node *left; /* Pointer to right child or NULL */ struct avl_tree_node *right; /* Pointer to parent combined with the balance factor. This saves 4 or * 8 bytes of memory depending on the CPU architecture. * * Low 2 bits: One greater than the balance factor of this subtree, * which is equal to height(right) - height(left). The mapping is: * * 00 => -1 * 01 => 0 * 10 => +1 * 11 => undefined * * The rest of the bits are the pointer to the parent node. It must be * 4-byte aligned, and it will be NULL if this is the root node and * therefore has no parent. */ uintptr_t parent_balance; }; /* Cast an AVL tree node to the containing data structure. */ #define avl_tree_entry(entry, type, member) \ ((type *) ((char *) (entry) -offsetof(type, member))) /* Returns a pointer to the parent of the specified AVL tree node, or NULL if it * is already the root of the tree. */ static AVL_INLINE struct avl_tree_node * avl_get_parent(const struct avl_tree_node *node) { return (struct avl_tree_node *) (node->parent_balance & ~3); } /* Marks the specified AVL tree node as unlinked from any tree. */ static AVL_INLINE void avl_tree_node_set_unlinked(struct avl_tree_node *node) { node->parent_balance = (uintptr_t) node; } /* Returns true iff the specified AVL tree node has been marked with * avl_tree_node_set_unlinked() and has not subsequently been inserted into a * tree. */ static AVL_INLINE int avl_tree_node_is_unlinked(const struct avl_tree_node *node) { return node->parent_balance == (uintptr_t) node; } /* (Internal use only) */ extern void avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, struct avl_tree_node *inserted); /* * Looks up an item in the specified AVL tree. * * @root * Pointer to the root of the AVL tree. (This can be NULL --- that just * means the tree is empty.) * * @cmp_ctx * First argument to pass to the comparison callback. This generally * should be a pointer to an object equal to the one being searched for. * * @cmp * Comparison callback. Must return < 0, 0, or > 0 if the first argument * is less than, equal to, or greater than the second argument, * respectively. The first argument will be @cmp_ctx and the second * argument will be a pointer to the AVL tree node of an item in the tree. * * Returns a pointer to the AVL tree node of the resulting item, or NULL if the * item was not found. * * Example: * * struct int_wrapper { * int data; * struct avl_tree_node index_node; * }; * * static int _avl_cmp_int_to_node(const void *intptr, * const struct avl_tree_node *nodeptr) * { * int n1 = *(const int *)intptr; * int n2 = avl_tree_entry(nodeptr, struct int_wrapper, index_node)->data; * if (n1 < n2) * return -1; * else if (n1 > n2) * return 1; * else * return 0; * } * * bool contains_int(struct avl_tree_node *root, int n) * { * struct avl_tree_node *result; * * result = avl_tree_lookup(root, &n, _avl_cmp_int_to_node); * return result ? true : false; * } */ static AVL_INLINE struct avl_tree_node * avl_tree_lookup(const struct avl_tree_node *root, const void *cmp_ctx, int (*cmp)(const void *, const struct avl_tree_node *)) { const struct avl_tree_node *cur = root; while (cur) { int res = (*cmp)(cmp_ctx, cur); if (res < 0) { cur = cur->left; } else if (res > 0) { cur = cur->right; } else { break; } } return (struct avl_tree_node *) cur; } /* Same as avl_tree_lookup(), but uses a more specific type for the comparison * function. Specifically, with this function the item being searched for is * expected to be in the same format as those already in the tree, with an * embedded 'struct avl_tree_node'. */ static AVL_INLINE struct avl_tree_node * avl_tree_lookup_node(const struct avl_tree_node *root, const struct avl_tree_node *node, int (*cmp)(const struct avl_tree_node *, const struct avl_tree_node *)) { const struct avl_tree_node *cur = root; while (cur) { int res = (*cmp)(node, cur); if (res < 0) { cur = cur->left; } else if (res > 0) { cur = cur->right; } else { break; } } return (struct avl_tree_node *) cur; } /* * Inserts an item into the specified AVL tree. * * @root_ptr * Location of the AVL tree's root pointer. Indirection is needed because * the root node may change as a result of rotations caused by the * insertion. Initialize *root_ptr to NULL for an empty tree. * * @item * Pointer to the `struct avl_tree_node' embedded in the item to insert. * No members in it need be pre-initialized, although members in the * containing structure should be pre-initialized so that @cmp can use them * in comparisons. * * @cmp * Comparison callback. Must return < 0, 0, or > 0 if the first argument * is less than, equal to, or greater than the second argument, * respectively. The first argument will be @item and the second * argument will be a pointer to an AVL tree node embedded in some * previously-inserted item to which @item is being compared. * * If no item in the tree is comparatively equal (via @cmp) to @item, inserts * @item and returns NULL. Otherwise does nothing and returns a pointer to the * AVL tree node embedded in the previously-inserted item which compared equal * to @item. * * Example: * * struct int_wrapper { * int data; * struct avl_tree_node index_node; * }; * * #define GET_DATA(i) avl_tree_entry((i), struct int_wrapper, index_node)->data * * static int _avl_cmp_ints(const struct avl_tree_node *node1, * const struct avl_tree_node *node2) * { * int n1 = GET_DATA(node1); * int n2 = GET_DATA(node2); * if (n1 < n2) * return -1; * else if (n1 > n2) * return 1; * else * return 0; * } * * bool insert_int(struct avl_tree_node **root_ptr, int data) * { * struct int_wrapper *i = malloc(sizeof(struct int_wrapper)); * i->data = data; * if (avl_tree_insert(root_ptr, &i->index_node, _avl_cmp_ints)) { * // Duplicate. * free(i); * return false; * } * return true; * } */ static AVL_INLINE struct avl_tree_node * avl_tree_insert(struct avl_tree_node **root_ptr, struct avl_tree_node *item, int (*cmp)(const struct avl_tree_node *, const struct avl_tree_node *)) { struct avl_tree_node **cur_ptr = root_ptr, *cur = NULL; int res; while (*cur_ptr) { cur = *cur_ptr; res = (*cmp)(item, cur); if (res < 0) { cur_ptr = &cur->left; } else if (res > 0) { cur_ptr = &cur->right; } else { return cur; } } *cur_ptr = item; item->parent_balance = (uintptr_t) cur | 1; avl_tree_rebalance_after_insert(root_ptr, item); return NULL; } /* Removes an item from the specified AVL tree. * See implementation for details. */ extern void avl_tree_remove(struct avl_tree_node **root_ptr, struct avl_tree_node *node); /* Nonrecursive AVL tree traversal functions */ extern struct avl_tree_node *avl_tree_first_in_order(const struct avl_tree_node *root); extern struct avl_tree_node *avl_tree_last_in_order(const struct avl_tree_node *root); extern struct avl_tree_node *avl_tree_next_in_order(const struct avl_tree_node *node); extern struct avl_tree_node *avl_tree_prev_in_order(const struct avl_tree_node *node); extern struct avl_tree_node * avl_tree_first_in_postorder(const struct avl_tree_node *root); extern struct avl_tree_node * avl_tree_next_in_postorder(const struct avl_tree_node *prev, const struct avl_tree_node *prev_parent); /* * Iterate through the nodes in an AVL tree in sorted order. * You may not modify the tree during the iteration. * * @child_struct * Variable that will receive a pointer to each struct inserted into the * tree. * @root * Root of the AVL tree. * @struct_name * Type of *child_struct. * @struct_member * Member of @struct_name type that is the AVL tree node. * * Example: * * struct int_wrapper { * int data; * struct avl_tree_node index_node; * }; * * void print_ints(struct avl_tree_node *root) * { * struct int_wrapper *i; * * avl_tree_for_each_in_order(i, root, struct int_wrapper, index_node) * printf("%d\n", i->data); * } */ #define avl_tree_for_each_in_order(child_struct, root, struct_name, struct_member) \ for (struct avl_tree_node *_cur = avl_tree_first_in_order(root); \ _cur && \ ((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1); \ _cur = avl_tree_next_in_order(_cur)) /* * Like avl_tree_for_each_in_order(), but uses the reverse order. */ #define avl_tree_for_each_in_reverse_order( \ child_struct, root, struct_name, struct_member) \ for (struct avl_tree_node *_cur = avl_tree_last_in_order(root); \ _cur && \ ((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1); \ _cur = avl_tree_prev_in_order(_cur)) /* * Like avl_tree_for_each_in_order(), but iterates through the nodes in * postorder, so the current node may be deleted or freed. */ #define avl_tree_for_each_in_postorder(child_struct, root, struct_name, struct_member) \ for (struct avl_tree_node *_cur = avl_tree_first_in_postorder(root), *_parent; \ _cur && \ ((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1) && \ (_parent = avl_get_parent(_cur), 1); \ _cur = avl_tree_next_in_postorder(_cur, _parent)) #endif /* _AVL_TREE_H_ */ mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/common.h000066400000000000000000000072211411406531600264060ustar00rootroot00000000000000/* Copyright (C) 2012-2017 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #ifndef PY_SSIZE_T_CLEAN # define PY_SSIZE_T_CLEAN // See: https://docs.python.org/3/c-api/arg.html #endif #include #include #include #include "avl_tree.h" /* Compatibility macros */ #ifndef uint64_t # define uint64_t unsigned PY_LONG_LONG #endif #define malloc PyObject_Malloc // XXX: yuck #define realloc PyObject_Realloc #define free PyObject_Free /* Unicode support macros */ #define PyUnicode_FROM_SINGLE(chr) \ PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &(chr), 1) /* Error handling macros */ #define BAD_ROUTE self->route_state #define BAD_ROUTE_CONTEXT self->route_context #define FAIL_ROUTE(context) \ do { \ self->route_state = 1; \ self->route_context = context; \ } while (0) #define RESET_ROUTE() self->route_state = 0 /* Shared globals */ extern char **entitydefs; extern PyObject *NOARGS; extern PyObject *definitions; /* Structs */ typedef struct { Py_ssize_t capacity; Py_ssize_t length; PyObject *object; int kind; void *data; } Textbuffer; typedef struct { Py_ssize_t head; uint64_t context; } StackIdent; struct Stack { PyObject *stack; uint64_t context; Textbuffer *textbuffer; StackIdent ident; struct Stack *next; }; typedef struct Stack Stack; typedef struct { PyObject *object; /* base PyUnicodeObject object */ Py_ssize_t length; /* length of object, in code points */ int kind; /* object's kind value */ void *data; /* object's raw unicode buffer */ } TokenizerInput; typedef struct avl_tree_node avl_tree; typedef struct { StackIdent id; struct avl_tree_node node; } route_tree_node; typedef struct { PyObject_HEAD TokenizerInput text; /* text to tokenize */ Stack *topstack; /* topmost stack */ Py_ssize_t head; /* current position in text */ int global; /* global context */ int depth; /* stack recursion depth */ int route_state; /* whether a BadRoute has been triggered */ uint64_t route_context; /* context when the last BadRoute was triggered */ avl_tree *bad_routes; /* stack idents for routes known to fail */ int skip_style_tags; /* temp fix for the sometimes broken tag parser */ } Tokenizer; mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/contexts.h000066400000000000000000000115411411406531600267650ustar00rootroot00000000000000/* Copyright (C) 2012-2017 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once /* Local contexts */ #define LC_TEMPLATE 0x0000000000000007 #define LC_TEMPLATE_NAME 0x0000000000000001 #define LC_TEMPLATE_PARAM_KEY 0x0000000000000002 #define LC_TEMPLATE_PARAM_VALUE 0x0000000000000004 #define LC_ARGUMENT 0x0000000000000018 #define LC_ARGUMENT_NAME 0x0000000000000008 #define LC_ARGUMENT_DEFAULT 0x0000000000000010 #define LC_WIKILINK 0x0000000000000060 #define LC_WIKILINK_TITLE 0x0000000000000020 #define LC_WIKILINK_TEXT 0x0000000000000040 #define LC_EXT_LINK 0x0000000000000180 #define LC_EXT_LINK_URI 0x0000000000000080 #define LC_EXT_LINK_TITLE 0x0000000000000100 #define LC_HEADING 0x0000000000007E00 #define LC_HEADING_LEVEL_1 0x0000000000000200 #define LC_HEADING_LEVEL_2 0x0000000000000400 #define LC_HEADING_LEVEL_3 0x0000000000000800 #define LC_HEADING_LEVEL_4 0x0000000000001000 #define LC_HEADING_LEVEL_5 0x0000000000002000 #define LC_HEADING_LEVEL_6 0x0000000000004000 #define LC_TAG 0x0000000000078000 #define LC_TAG_OPEN 0x0000000000008000 #define LC_TAG_ATTR 0x0000000000010000 #define LC_TAG_BODY 0x0000000000020000 #define LC_TAG_CLOSE 0x0000000000040000 #define LC_STYLE 0x0000000000780000 #define LC_STYLE_ITALICS 0x0000000000080000 #define LC_STYLE_BOLD 0x0000000000100000 #define LC_STYLE_PASS_AGAIN 0x0000000000200000 #define LC_STYLE_SECOND_PASS 0x0000000000400000 #define LC_DLTERM 0x0000000000800000 #define LC_SAFETY_CHECK 0x000000007F000000 #define LC_HAS_TEXT 0x0000000001000000 #define LC_FAIL_ON_TEXT 0x0000000002000000 #define LC_FAIL_NEXT 0x0000000004000000 #define LC_FAIL_ON_LBRACE 0x0000000008000000 #define LC_FAIL_ON_RBRACE 0x0000000010000000 #define LC_FAIL_ON_EQUALS 0x0000000020000000 #define LC_HAS_TEMPLATE 0x0000000040000000 #define LC_TABLE 0x0000001F80000000 #define LC_TABLE_CELL_LINE_CONTEXTS 0x0000001A00000000 #define LC_TABLE_OPEN 0x0000000080000000 #define LC_TABLE_CELL_OPEN 0x0000000100000000 #define LC_TABLE_CELL_STYLE 0x0000000200000000 #define LC_TABLE_ROW_OPEN 0x0000000400000000 #define LC_TABLE_TD_LINE 0x0000000800000000 #define LC_TABLE_TH_LINE 0x0000001000000000 #define LC_HTML_ENTITY 0x0000002000000000 /* Global contexts */ #define GL_HEADING 0x1 /* Aggregate contexts */ #define AGG_FAIL \ (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | \ LC_TAG | LC_STYLE | LC_TABLE_OPEN) #define AGG_UNSAFE \ (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | \ LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) #define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) #define AGG_NO_WIKILINKS \ (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) #define AGG_NO_EXT_LINKS \ (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) /* Tag contexts */ #define TAG_NAME 0x01 #define TAG_ATTR_READY 0x02 #define TAG_ATTR_NAME 0x04 #define TAG_ATTR_VALUE 0x08 #define TAG_QUOTED 0x10 #define TAG_NOTE_SPACE 0x20 #define TAG_NOTE_EQUALS 0x40 #define TAG_NOTE_QUOTE 0x80 mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/definitions.c000066400000000000000000000104611411406531600274240ustar00rootroot00000000000000/* Copyright (C) 2012-2020 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "definitions.h" /* This file should be kept up to date with mwparserfromhell/definitions.py. See the Python version for data sources. */ // clang-format off static const char *URI_SCHEMES[] = { "bitcoin", "ftp", "ftps", "geo", "git", "gopher", "http", "https", "irc", "ircs", "magnet", "mailto", "mms", "news", "nntp", "redis", "sftp", "sip", "sips", "sms", "ssh", "svn", "tel", "telnet", "urn", "worldwind", "xmpp", NULL, }; static const char *URI_SCHEMES_AUTHORITY_OPTIONAL[] = { "bitcoin", "geo", "magnet", "mailto", "news", "sip", "sips", "sms", "tel", "urn", "xmpp", NULL, }; static const char *PARSER_BLACKLIST[] = { "categorytree", "ce", "chem", "gallery", "graph", "hiero", "imagemap", "inputbox", "math", "nowiki", "pre", "score", "section", "source", "syntaxhighlight", "templatedata", "timeline", NULL, }; // clang-format on static const char *SINGLE[] = { "br", "wbr", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", NULL}; static const char *SINGLE_ONLY[] = {"br", "wbr", "hr", "meta", "link", "img", NULL}; /* Convert a PyUnicodeObject to a lowercase ASCII char* array and store it in the second argument. The caller must free the return value when finished. If the return value is NULL, the conversion failed and *string is not set. */ static PyObject * unicode_to_lcase_ascii(PyObject *input, const char **string) { PyObject *lower = PyObject_CallMethod(input, "lower", NULL), *bytes; if (!lower) { return NULL; } bytes = PyUnicode_AsASCIIString(lower); Py_DECREF(lower); if (!bytes) { if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) { PyErr_Clear(); } return NULL; } *string = PyBytes_AS_STRING(bytes); return bytes; } /* Return whether a PyUnicodeObject is in a list of lowercase ASCII strings. */ static int unicode_in_string_list(PyObject *input, const char **list) { const char *string; PyObject *temp = unicode_to_lcase_ascii(input, &string); int retval = 0; if (!temp) { return 0; } while (*list) { if (!strcmp(*(list++), string)) { retval = 1; goto end; } } end: Py_DECREF(temp); return retval; } /* Return if the given tag's contents should be passed to the parser. */ int is_parsable(PyObject *tag) { return !unicode_in_string_list(tag, PARSER_BLACKLIST); } /* Return whether or not the given tag can exist without a close tag. */ int is_single(PyObject *tag) { return unicode_in_string_list(tag, SINGLE); } /* Return whether or not the given tag must exist without a close tag. */ int is_single_only(PyObject *tag) { return unicode_in_string_list(tag, SINGLE_ONLY); } /* Return whether the given scheme is valid for external links. */ int is_scheme(PyObject *scheme, int slashes) { if (slashes) { return unicode_in_string_list(scheme, URI_SCHEMES); } else { return unicode_in_string_list(scheme, URI_SCHEMES_AUTHORITY_OPTIONAL); } } mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/definitions.h000066400000000000000000000026431411406531600274340ustar00rootroot00000000000000/* Copyright (C) 2012-2016 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "common.h" /* This file should be kept up to date with mwparserfromhell/definitions.py. */ /* Functions */ int is_parsable(PyObject *); int is_single(PyObject *); int is_single_only(PyObject *); int is_scheme(PyObject *, int); /* Macros */ #define GET_HTML_TAG(markup) (markup == ':' ? "dd" : markup == ';' ? "dt" : "li") mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/tag_data.c000066400000000000000000000051351411406531600266570ustar00rootroot00000000000000/* Copyright (C) 2012-2016 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "tag_data.h" #include "contexts.h" /* Initialize a new TagData object. */ TagData * TagData_new(TokenizerInput *text) { #define ALLOC_BUFFER(name) \ name = Textbuffer_new(text); \ if (!name) { \ TagData_dealloc(self); \ return NULL; \ } TagData *self = malloc(sizeof(TagData)); if (!self) { PyErr_NoMemory(); return NULL; } self->context = TAG_NAME; ALLOC_BUFFER(self->pad_first) ALLOC_BUFFER(self->pad_before_eq) ALLOC_BUFFER(self->pad_after_eq) self->quoter = 0; self->reset = 0; return self; #undef ALLOC_BUFFER } /* Deallocate the given TagData object. */ void TagData_dealloc(TagData *self) { if (self->pad_first) { Textbuffer_dealloc(self->pad_first); } if (self->pad_before_eq) { Textbuffer_dealloc(self->pad_before_eq); } if (self->pad_after_eq) { Textbuffer_dealloc(self->pad_after_eq); } free(self); } /* Clear the internal buffers of the given TagData object. */ int TagData_reset_buffers(TagData *self) { if (Textbuffer_reset(self->pad_first) || Textbuffer_reset(self->pad_before_eq) || Textbuffer_reset(self->pad_after_eq)) { return -1; } return 0; } mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/tag_data.h000066400000000000000000000027041411406531600266630ustar00rootroot00000000000000/* Copyright (C) 2012-2016 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "common.h" #include "textbuffer.h" /* Structs */ typedef struct { uint64_t context; Textbuffer *pad_first; Textbuffer *pad_before_eq; Textbuffer *pad_after_eq; Py_UCS4 quoter; Py_ssize_t reset; } TagData; /* Functions */ TagData *TagData_new(TokenizerInput *); void TagData_dealloc(TagData *); int TagData_reset_buffers(TagData *); mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/textbuffer.c000066400000000000000000000114751411406531600272750ustar00rootroot00000000000000/* Copyright (C) 2012-2016 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "textbuffer.h" #define INITIAL_CAPACITY 32 #define RESIZE_FACTOR 2 #define CONCAT_EXTRA 32 /* Internal allocation function for textbuffers. */ static int internal_alloc(Textbuffer *self, Py_UCS4 maxchar) { self->capacity = INITIAL_CAPACITY; self->length = 0; self->object = PyUnicode_New(self->capacity, maxchar); if (!self->object) { return -1; } self->kind = PyUnicode_KIND(self->object); self->data = PyUnicode_DATA(self->object); return 0; } /* Internal deallocation function for textbuffers. */ static void internal_dealloc(Textbuffer *self) { Py_DECREF(self->object); } /* Internal resize function. */ static int internal_resize(Textbuffer *self, Py_ssize_t new_cap) { PyObject *newobj; void *newdata; newobj = PyUnicode_New(new_cap, PyUnicode_MAX_CHAR_VALUE(self->object)); if (!newobj) { return -1; } newdata = PyUnicode_DATA(newobj); memcpy(newdata, self->data, self->length * self->kind); Py_DECREF(self->object); self->object = newobj; self->data = newdata; self->capacity = new_cap; return 0; } /* Create a new textbuffer object. */ Textbuffer * Textbuffer_new(TokenizerInput *text) { Textbuffer *self = malloc(sizeof(Textbuffer)); Py_UCS4 maxchar = 0; maxchar = PyUnicode_MAX_CHAR_VALUE(text->object); if (!self) { goto fail_nomem; } if (internal_alloc(self, maxchar) < 0) { goto fail_dealloc; } return self; fail_dealloc: free(self); fail_nomem: PyErr_NoMemory(); return NULL; } /* Deallocate the given textbuffer. */ void Textbuffer_dealloc(Textbuffer *self) { internal_dealloc(self); free(self); } /* Reset a textbuffer to its initial, empty state. */ int Textbuffer_reset(Textbuffer *self) { Py_UCS4 maxchar = 0; maxchar = PyUnicode_MAX_CHAR_VALUE(self->object); internal_dealloc(self); if (internal_alloc(self, maxchar)) { return -1; } return 0; } /* Write a Unicode codepoint to the given textbuffer. */ int Textbuffer_write(Textbuffer *self, Py_UCS4 code) { if (self->length >= self->capacity) { if (internal_resize(self, self->capacity * RESIZE_FACTOR) < 0) { return -1; } } PyUnicode_WRITE(self->kind, self->data, self->length++, code); return 0; } /* Read a Unicode codepoint from the given index of the given textbuffer. This function does not check for bounds. */ Py_UCS4 Textbuffer_read(Textbuffer *self, Py_ssize_t index) { return PyUnicode_READ(self->kind, self->data, index); } /* Return the contents of the textbuffer as a Python Unicode object. */ PyObject * Textbuffer_render(Textbuffer *self) { return PyUnicode_FromKindAndData(self->kind, self->data, self->length); } /* Concatenate the 'other' textbuffer onto the end of the given textbuffer. */ int Textbuffer_concat(Textbuffer *self, Textbuffer *other) { Py_ssize_t newlen = self->length + other->length; if (newlen > self->capacity) { if (internal_resize(self, newlen + CONCAT_EXTRA) < 0) { return -1; } } assert(self->kind == other->kind); memcpy(((Py_UCS1 *) self->data) + self->kind * self->length, other->data, other->length * other->kind); self->length = newlen; return 0; } /* Reverse the contents of the given textbuffer. */ void Textbuffer_reverse(Textbuffer *self) { Py_ssize_t i, end = self->length - 1; Py_UCS4 tmp; for (i = 0; i < self->length / 2; i++) { tmp = PyUnicode_READ(self->kind, self->data, i); PyUnicode_WRITE( self->kind, self->data, i, PyUnicode_READ(self->kind, self->data, end - i)); PyUnicode_WRITE(self->kind, self->data, end - i, tmp); } } mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/textbuffer.h000066400000000000000000000027271411406531600273020ustar00rootroot00000000000000/* Copyright (C) 2012-2016 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "common.h" /* Functions */ Textbuffer *Textbuffer_new(TokenizerInput *); void Textbuffer_dealloc(Textbuffer *); int Textbuffer_reset(Textbuffer *); int Textbuffer_write(Textbuffer *, Py_UCS4); Py_UCS4 Textbuffer_read(Textbuffer *, Py_ssize_t); PyObject *Textbuffer_render(Textbuffer *); int Textbuffer_concat(Textbuffer *, Textbuffer *); void Textbuffer_reverse(Textbuffer *); mwparserfromhell-0.6.3/src/mwparserfromhell/parser/ctokenizer/tok_parse.c000066400000000000000000002606171411406531600271120ustar00rootroot00000000000000/* Copyright (C) 2012-2021 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "tok_parse.h" #include "contexts.h" #include "definitions.h" #include "tag_data.h" #include "tok_support.h" #include "tokens.h" #define DIGITS "0123456789" #define HEXDIGITS "0123456789abcdefABCDEF" #define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" #define URISCHEME "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" #define MAX_BRACES 255 #define MAX_ENTITY_SIZE 8 typedef struct { PyObject *title; int level; } HeadingData; /* Forward declarations */ static PyObject *Tokenizer_really_parse_external_link(Tokenizer *, int, Textbuffer *); static int Tokenizer_parse_entity(Tokenizer *); static int Tokenizer_parse_comment(Tokenizer *); static int Tokenizer_handle_dl_term(Tokenizer *); static int Tokenizer_parse_tag(Tokenizer *); /* Determine whether the given code point is a marker. */ static int is_marker(Py_UCS4 this) { int i; for (i = 0; i < NUM_MARKERS; i++) { if (MARKERS[i] == this) { return 1; } } return 0; } /* Given a context, return the heading level encoded within it. */ static int heading_level_from_context(uint64_t n) { int level; n /= LC_HEADING_LEVEL_1; for (level = 1; n > 1; n >>= 1) { level++; } return level; } /* Sanitize the name of a tag so it can be compared with others for equality. */ static PyObject * strip_tag_name(PyObject *token, int take_attr) { PyObject *text, *rstripped, *lowered; if (take_attr) { text = PyObject_GetAttrString(token, "text"); if (!text) { return NULL; } rstripped = PyObject_CallMethod(text, "rstrip", NULL); Py_DECREF(text); } else { rstripped = PyObject_CallMethod(token, "rstrip", NULL); } if (!rstripped) { return NULL; } lowered = PyObject_CallMethod(rstripped, "lower", NULL); Py_DECREF(rstripped); return lowered; } /* Parse a template at the head of the wikicode string. */ static int Tokenizer_parse_template(Tokenizer *self, int has_content) { PyObject *template; Py_ssize_t reset = self->head; uint64_t context = LC_TEMPLATE_NAME; if (has_content) { context |= LC_HAS_TEMPLATE; } template = Tokenizer_parse(self, context, 1); if (BAD_ROUTE) { self->head = reset; return 0; } if (!template) { return -1; } if (Tokenizer_emit_first(self, TemplateOpen)) { Py_DECREF(template); return -1; } if (Tokenizer_emit_all(self, template)) { Py_DECREF(template); return -1; } Py_DECREF(template); if (Tokenizer_emit(self, TemplateClose)) { return -1; } return 0; } /* Parse an argument at the head of the wikicode string. */ static int Tokenizer_parse_argument(Tokenizer *self) { PyObject *argument; Py_ssize_t reset = self->head; argument = Tokenizer_parse(self, LC_ARGUMENT_NAME, 1); if (BAD_ROUTE) { self->head = reset; return 0; } if (!argument) { return -1; } if (Tokenizer_emit_first(self, ArgumentOpen)) { Py_DECREF(argument); return -1; } if (Tokenizer_emit_all(self, argument)) { Py_DECREF(argument); return -1; } Py_DECREF(argument); if (Tokenizer_emit(self, ArgumentClose)) { return -1; } return 0; } /* Parse a template or argument at the head of the wikicode string. */ static int Tokenizer_parse_template_or_argument(Tokenizer *self) { unsigned int braces = 2, i; int has_content = 0; PyObject *tokenlist; self->head += 2; while (Tokenizer_read(self, 0) == '{' && braces < MAX_BRACES) { self->head++; braces++; } if (Tokenizer_push(self, 0)) { return -1; } while (braces) { if (braces == 1) { if (Tokenizer_emit_text_then_stack(self, "{")) { return -1; } return 0; } if (braces == 2) { if (Tokenizer_parse_template(self, has_content)) { return -1; } if (BAD_ROUTE) { RESET_ROUTE(); if (Tokenizer_emit_text_then_stack(self, "{{")) { return -1; } return 0; } break; } if (Tokenizer_parse_argument(self)) { return -1; } if (BAD_ROUTE) { RESET_ROUTE(); if (Tokenizer_parse_template(self, has_content)) { return -1; } if (BAD_ROUTE) { char text[MAX_BRACES + 1]; RESET_ROUTE(); for (i = 0; i < braces; i++) { text[i] = '{'; } text[braces] = '\0'; if (Tokenizer_emit_text_then_stack(self, text)) { return -1; } return 0; } else { braces -= 2; } } else { braces -= 3; } if (braces) { has_content = 1; self->head++; } } tokenlist = Tokenizer_pop(self); if (!tokenlist) { return -1; } if (Tokenizer_emit_all(self, tokenlist)) { Py_DECREF(tokenlist); return -1; } Py_DECREF(tokenlist); if (self->topstack->context & LC_FAIL_NEXT) { self->topstack->context ^= LC_FAIL_NEXT; } return 0; } /* Handle a template parameter at the head of the string. */ static int Tokenizer_handle_template_param(Tokenizer *self) { PyObject *stack; if (self->topstack->context & LC_TEMPLATE_NAME) { if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) { Tokenizer_fail_route(self); return -1; } self->topstack->context ^= LC_TEMPLATE_NAME; } else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) { self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE; } if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { stack = Tokenizer_pop(self); if (!stack) { return -1; } if (Tokenizer_emit_all(self, stack)) { Py_DECREF(stack); return -1; } Py_DECREF(stack); } else { self->topstack->context |= LC_TEMPLATE_PARAM_KEY; } if (Tokenizer_emit(self, TemplateParamSeparator)) { return -1; } if (Tokenizer_push(self, self->topstack->context)) { return -1; } return 0; } /* Handle a template parameter's value at the head of the string. */ static int Tokenizer_handle_template_param_value(Tokenizer *self) { PyObject *stack; stack = Tokenizer_pop(self); if (!stack) { return -1; } if (Tokenizer_emit_all(self, stack)) { Py_DECREF(stack); return -1; } Py_DECREF(stack); self->topstack->context ^= LC_TEMPLATE_PARAM_KEY; self->topstack->context |= LC_TEMPLATE_PARAM_VALUE; if (Tokenizer_emit(self, TemplateParamEquals)) { return -1; } return 0; } /* Handle the end of a template at the head of the string. */ static PyObject * Tokenizer_handle_template_end(Tokenizer *self) { PyObject *stack; if (self->topstack->context & LC_TEMPLATE_NAME) { if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) { return Tokenizer_fail_route(self); } } else if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { stack = Tokenizer_pop(self); if (!stack) { return NULL; } if (Tokenizer_emit_all(self, stack)) { Py_DECREF(stack); return NULL; } Py_DECREF(stack); } self->head++; stack = Tokenizer_pop(self); return stack; } /* Handle the separator between an argument's name and default. */ static int Tokenizer_handle_argument_separator(Tokenizer *self) { self->topstack->context ^= LC_ARGUMENT_NAME; self->topstack->context |= LC_ARGUMENT_DEFAULT; if (Tokenizer_emit(self, ArgumentSeparator)) { return -1; } return 0; } /* Handle the end of an argument at the head of the string. */ static PyObject * Tokenizer_handle_argument_end(Tokenizer *self) { PyObject *stack = Tokenizer_pop(self); self->head += 2; return stack; } /* Parse an internal wikilink at the head of the wikicode string. */ static int Tokenizer_parse_wikilink(Tokenizer *self) { Py_ssize_t reset; PyObject *extlink, *wikilink, *kwargs; reset = self->head + 1; self->head += 2; // If the wikilink looks like an external link, parse it as such: extlink = Tokenizer_really_parse_external_link(self, 1, NULL); if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset + 1; // Otherwise, actually parse it as a wikilink: wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE, 1); if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; if (Tokenizer_emit_text(self, "[[")) { return -1; } return 0; } if (!wikilink) { return -1; } if (Tokenizer_emit(self, WikilinkOpen)) { Py_DECREF(wikilink); return -1; } if (Tokenizer_emit_all(self, wikilink)) { Py_DECREF(wikilink); return -1; } Py_DECREF(wikilink); if (Tokenizer_emit(self, WikilinkClose)) { return -1; } return 0; } if (!extlink) { return -1; } if (self->topstack->context & LC_EXT_LINK_TITLE) { // In this exceptional case, an external link that looks like a // wikilink inside of an external link is parsed as text: Py_DECREF(extlink); self->head = reset; if (Tokenizer_emit_text(self, "[[")) { return -1; } return 0; } if (Tokenizer_emit_text(self, "[")) { Py_DECREF(extlink); return -1; } kwargs = PyDict_New(); if (!kwargs) { Py_DECREF(extlink); return -1; } PyDict_SetItemString(kwargs, "brackets", Py_True); if (Tokenizer_emit_kwargs(self, ExternalLinkOpen, kwargs)) { Py_DECREF(extlink); return -1; } if (Tokenizer_emit_all(self, extlink)) { Py_DECREF(extlink); return -1; } Py_DECREF(extlink); if (Tokenizer_emit(self, ExternalLinkClose)) { return -1; } return 0; } /* Handle the separator between a wikilink's title and its text. */ static int Tokenizer_handle_wikilink_separator(Tokenizer *self) { self->topstack->context ^= LC_WIKILINK_TITLE; self->topstack->context |= LC_WIKILINK_TEXT; if (Tokenizer_emit(self, WikilinkSeparator)) { return -1; } return 0; } /* Handle the end of a wikilink at the head of the string. */ static PyObject * Tokenizer_handle_wikilink_end(Tokenizer *self) { PyObject *stack = Tokenizer_pop(self); self->head += 1; return stack; } /* Parse the URI scheme of a bracket-enclosed external link. */ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer *self) { static const char *valid = URISCHEME; Textbuffer *buffer; PyObject *scheme; Py_UCS4 this; int slashes, i; if (Tokenizer_check_route(self, LC_EXT_LINK_URI) < 0) { return 0; } if (Tokenizer_push(self, LC_EXT_LINK_URI)) { return -1; } if (Tokenizer_read(self, 0) == '/' && Tokenizer_read(self, 1) == '/') { if (Tokenizer_emit_text(self, "//")) { return -1; } self->head += 2; } else { buffer = Textbuffer_new(&self->text); if (!buffer) { return -1; } while ((this = Tokenizer_read(self, 0))) { i = 0; while (1) { if (!valid[i]) { goto end_of_loop; } if (this == (Py_UCS4) valid[i]) { break; } i++; } Textbuffer_write(buffer, this); if (Tokenizer_emit_char(self, this)) { Textbuffer_dealloc(buffer); return -1; } self->head++; } end_of_loop: if (this != ':') { Textbuffer_dealloc(buffer); Tokenizer_fail_route(self); return 0; } if (Tokenizer_emit_char(self, ':')) { Textbuffer_dealloc(buffer); return -1; } self->head++; slashes = (Tokenizer_read(self, 0) == '/' && Tokenizer_read(self, 1) == '/'); if (slashes) { if (Tokenizer_emit_text(self, "//")) { Textbuffer_dealloc(buffer); return -1; } self->head += 2; } scheme = Textbuffer_render(buffer); Textbuffer_dealloc(buffer); if (!scheme) { return -1; } if (!is_scheme(scheme, slashes)) { Py_DECREF(scheme); Tokenizer_fail_route(self); return 0; } Py_DECREF(scheme); } return 0; } /* Parse the URI scheme of a free (no brackets) external link. */ static int Tokenizer_parse_free_uri_scheme(Tokenizer *self) { static const char *valid = URISCHEME; Textbuffer *scheme_buffer = Textbuffer_new(&self->text); PyObject *scheme; Py_UCS4 ch; Py_ssize_t i; int slashes, j; uint64_t new_context; if (!scheme_buffer) { return -1; } // We have to backtrack through the textbuffer looking for our scheme since // it was just parsed as text: for (i = self->topstack->textbuffer->length - 1; i >= 0; i--) { ch = Textbuffer_read(self->topstack->textbuffer, i); // Stop at the first non-word character (equivalent to \W in regex) if (!Py_UNICODE_ISALNUM(ch) && ch != '_') { break; } j = 0; do { if (!valid[j]) { Textbuffer_dealloc(scheme_buffer); FAIL_ROUTE(0); return 0; } } while (ch != (Py_UCS4) valid[j++]); Textbuffer_write(scheme_buffer, ch); } Textbuffer_reverse(scheme_buffer); scheme = Textbuffer_render(scheme_buffer); if (!scheme) { Textbuffer_dealloc(scheme_buffer); return -1; } slashes = (Tokenizer_read(self, 0) == '/' && Tokenizer_read(self, 1) == '/'); if (!is_scheme(scheme, slashes)) { Py_DECREF(scheme); Textbuffer_dealloc(scheme_buffer); FAIL_ROUTE(0); return 0; } Py_DECREF(scheme); new_context = self->topstack->context | LC_EXT_LINK_URI; if (Tokenizer_check_route(self, new_context) < 0) { Textbuffer_dealloc(scheme_buffer); return 0; } if (Tokenizer_push(self, new_context)) { Textbuffer_dealloc(scheme_buffer); return -1; } if (Tokenizer_emit_textbuffer(self, scheme_buffer)) { return -1; } if (Tokenizer_emit_char(self, ':')) { return -1; } if (slashes) { if (Tokenizer_emit_text(self, "//")) { return -1; } self->head += 2; } return 0; } /* Handle text in a free external link, including trailing punctuation. */ static int Tokenizer_handle_free_link_text(Tokenizer *self, int *parens, Textbuffer *tail, Py_UCS4 this) { #define PUSH_TAIL_BUFFER(tail, error) \ do { \ if (tail && tail->length > 0) { \ if (Textbuffer_concat(self->topstack->textbuffer, tail)) { \ return error; \ } \ if (Textbuffer_reset(tail)) { \ return error; \ } \ } \ } while (0) if (this == '(' && !(*parens)) { *parens = 1; PUSH_TAIL_BUFFER(tail, -1); } else if (this == ',' || this == ';' || this == '\\' || this == '.' || this == ':' || this == '!' || this == '?' || (!(*parens) && this == ')')) { return Textbuffer_write(tail, this); } else { PUSH_TAIL_BUFFER(tail, -1); } return Tokenizer_emit_char(self, this); } /* Return whether the current head is the end of a URI. */ static int Tokenizer_is_uri_end(Tokenizer *self, Py_UCS4 this, Py_UCS4 next) { // Built from Tokenizer_parse()'s end sentinels: Py_UCS4 after = Tokenizer_read(self, 2); uint64_t ctx = self->topstack->context; return (!this || this == '\n' || this == '[' || this == ']' || this == '<' || this == '>' || this == '"' || this == ' ' || (this == '\'' && next == '\'') || (this == '|' && ctx & LC_TEMPLATE) || (this == '=' && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || (this == '}' && next == '}' && (ctx & LC_TEMPLATE || (after == '}' && ctx & LC_ARGUMENT)))); } /* Really parse an external link. */ static PyObject * Tokenizer_really_parse_external_link(Tokenizer *self, int brackets, Textbuffer *extra) { Py_UCS4 this, next; int parens = 0; if (brackets ? Tokenizer_parse_bracketed_uri_scheme(self) : Tokenizer_parse_free_uri_scheme(self)) { return NULL; } if (BAD_ROUTE) { return NULL; } this = Tokenizer_read(self, 0); if (!this || this == '\n' || this == ' ' || this == ']') { return Tokenizer_fail_route(self); } if (!brackets && this == '[') { return Tokenizer_fail_route(self); } while (1) { this = Tokenizer_read(self, 0); next = Tokenizer_read(self, 1); if (this == '&') { PUSH_TAIL_BUFFER(extra, NULL); if (Tokenizer_parse_entity(self)) { return NULL; } } else if (this == '<' && next == '!' && Tokenizer_read(self, 2) == '-' && Tokenizer_read(self, 3) == '-') { PUSH_TAIL_BUFFER(extra, NULL); if (Tokenizer_parse_comment(self)) { return NULL; } } else if (this == '{' && next == '{' && Tokenizer_CAN_RECURSE(self)) { PUSH_TAIL_BUFFER(extra, NULL); if (Tokenizer_parse_template_or_argument(self)) { return NULL; } } else if (brackets) { if (!this || this == '\n') { return Tokenizer_fail_route(self); } if (this == ']') { return Tokenizer_pop(self); } if (Tokenizer_is_uri_end(self, this, next)) { if (this == ' ') { if (Tokenizer_emit(self, ExternalLinkSeparator)) { return NULL; } self->head++; } else { PyObject *kwargs = PyDict_New(); if (!kwargs) { return NULL; } PyDict_SetItemString(kwargs, "suppress_space", Py_True); if (Tokenizer_emit_kwargs(self, ExternalLinkSeparator, kwargs)) { return NULL; } } self->topstack->context ^= LC_EXT_LINK_URI; self->topstack->context |= LC_EXT_LINK_TITLE; return Tokenizer_parse(self, 0, 0); } if (Tokenizer_emit_char(self, this)) { return NULL; } } else { if (Tokenizer_is_uri_end(self, this, next)) { if (this == ' ') { if (Textbuffer_write(extra, this)) { return NULL; } } else { self->head--; } return Tokenizer_pop(self); } if (Tokenizer_handle_free_link_text(self, &parens, extra, this)) { return NULL; } } self->head++; } } /* Remove the URI scheme of a new external link from the textbuffer. */ static int Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer *self, PyObject *link) { PyObject *text = PyObject_GetAttrString(PyList_GET_ITEM(link, 0), "text"), *split, *scheme; Py_ssize_t length; if (!text) { return -1; } split = PyObject_CallMethod(text, "split", "si", ":", 1); Py_DECREF(text); if (!split) { return -1; } scheme = PyList_GET_ITEM(split, 0); length = PyUnicode_GET_LENGTH(scheme); Py_DECREF(split); self->topstack->textbuffer->length -= length; return 0; } /* Parse an external link at the head of the wikicode string. */ static int Tokenizer_parse_external_link(Tokenizer *self, int brackets) { #define NOT_A_LINK \ do { \ if (!brackets && self->topstack->context & LC_DLTERM) { \ return Tokenizer_handle_dl_term(self); \ } \ return Tokenizer_emit_char(self, Tokenizer_read(self, 0)); \ } while (0) Py_ssize_t reset = self->head; PyObject *link, *kwargs; Textbuffer *extra; if (self->topstack->context & AGG_NO_EXT_LINKS || !(Tokenizer_CAN_RECURSE(self))) { NOT_A_LINK; } extra = Textbuffer_new(&self->text); if (!extra) { return -1; } self->head++; link = Tokenizer_really_parse_external_link(self, brackets, extra); if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; Textbuffer_dealloc(extra); NOT_A_LINK; } if (!link) { Textbuffer_dealloc(extra); return -1; } if (!brackets) { if (Tokenizer_remove_uri_scheme_from_textbuffer(self, link)) { Textbuffer_dealloc(extra); Py_DECREF(link); return -1; } } kwargs = PyDict_New(); if (!kwargs) { Textbuffer_dealloc(extra); Py_DECREF(link); return -1; } PyDict_SetItemString(kwargs, "brackets", brackets ? Py_True : Py_False); if (Tokenizer_emit_kwargs(self, ExternalLinkOpen, kwargs)) { Textbuffer_dealloc(extra); Py_DECREF(link); return -1; } if (Tokenizer_emit_all(self, link)) { Textbuffer_dealloc(extra); Py_DECREF(link); return -1; } Py_DECREF(link); if (Tokenizer_emit(self, ExternalLinkClose)) { Textbuffer_dealloc(extra); return -1; } if (extra->length > 0) { return Tokenizer_emit_textbuffer(self, extra); } Textbuffer_dealloc(extra); return 0; } /* Parse a section heading at the head of the wikicode string. */ static int Tokenizer_parse_heading(Tokenizer *self) { Py_ssize_t reset = self->head; int best = 1, i, context, diff; HeadingData *heading; PyObject *level, *kwargs; self->global |= GL_HEADING; self->head += 1; while (Tokenizer_read(self, 0) == '=') { best++; self->head++; } context = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1); heading = (HeadingData *) Tokenizer_parse(self, context, 1); if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset + best - 1; for (i = 0; i < best; i++) { if (Tokenizer_emit_char(self, '=')) { return -1; } } self->global ^= GL_HEADING; return 0; } if (!heading) { return -1; } level = PyLong_FromSsize_t(heading->level); if (!level) { Py_DECREF(heading->title); free(heading); return -1; } kwargs = PyDict_New(); if (!kwargs) { Py_DECREF(level); Py_DECREF(heading->title); free(heading); return -1; } PyDict_SetItemString(kwargs, "level", level); Py_DECREF(level); if (Tokenizer_emit_kwargs(self, HeadingStart, kwargs)) { Py_DECREF(heading->title); free(heading); return -1; } if (heading->level < best) { diff = best - heading->level; for (i = 0; i < diff; i++) { if (Tokenizer_emit_char(self, '=')) { Py_DECREF(heading->title); free(heading); return -1; } } } if (Tokenizer_emit_all(self, heading->title)) { Py_DECREF(heading->title); free(heading); return -1; } Py_DECREF(heading->title); free(heading); if (Tokenizer_emit(self, HeadingEnd)) { return -1; } self->global ^= GL_HEADING; return 0; } /* Handle the end of a section heading at the head of the string. */ static HeadingData * Tokenizer_handle_heading_end(Tokenizer *self) { Py_ssize_t reset = self->head; int best, i, current, level, diff; HeadingData *after, *heading; PyObject *stack; self->head += 1; best = 1; while (Tokenizer_read(self, 0) == '=') { best++; self->head++; } current = heading_level_from_context(self->topstack->context); level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); after = (HeadingData *) Tokenizer_parse(self, self->topstack->context, 1); if (BAD_ROUTE) { RESET_ROUTE(); if (level < best) { diff = best - level; for (i = 0; i < diff; i++) { if (Tokenizer_emit_char(self, '=')) { return NULL; } } } self->head = reset + best - 1; } else { if (!after) { return NULL; } for (i = 0; i < best; i++) { if (Tokenizer_emit_char(self, '=')) { Py_DECREF(after->title); free(after); return NULL; } } if (Tokenizer_emit_all(self, after->title)) { Py_DECREF(after->title); free(after); return NULL; } Py_DECREF(after->title); level = after->level; free(after); } stack = Tokenizer_pop(self); if (!stack) { return NULL; } heading = malloc(sizeof(HeadingData)); if (!heading) { PyErr_NoMemory(); return NULL; } heading->title = stack; heading->level = level; return heading; } /* Actually parse an HTML entity and ensure that it is valid. */ static int Tokenizer_really_parse_entity(Tokenizer *self) { PyObject *kwargs, *charobj, *textobj; Py_UCS4 this; int numeric, hexadecimal, i, j, zeroes, test; char *valid, *text, *buffer, *def; #define FAIL_ROUTE_AND_EXIT() \ do { \ Tokenizer_fail_route(self); \ free(text); \ return 0; \ } while (0) if (Tokenizer_emit(self, HTMLEntityStart)) { return -1; } self->head++; this = Tokenizer_read(self, 0); if (!this) { Tokenizer_fail_route(self); return 0; } if (this == '#') { numeric = 1; if (Tokenizer_emit(self, HTMLEntityNumeric)) { return -1; } self->head++; this = Tokenizer_read(self, 0); if (!this) { Tokenizer_fail_route(self); return 0; } if (this == 'x' || this == 'X') { hexadecimal = 1; kwargs = PyDict_New(); if (!kwargs) { return -1; } if (!(charobj = PyUnicode_FROM_SINGLE(this))) { Py_DECREF(kwargs); return -1; } PyDict_SetItemString(kwargs, "char", charobj); Py_DECREF(charobj); if (Tokenizer_emit_kwargs(self, HTMLEntityHex, kwargs)) { return -1; } self->head++; } else { hexadecimal = 0; } } else { numeric = hexadecimal = 0; } if (hexadecimal) { valid = HEXDIGITS; } else if (numeric) { valid = DIGITS; } else { valid = ALPHANUM; } text = calloc(MAX_ENTITY_SIZE, sizeof(char)); if (!text) { PyErr_NoMemory(); return -1; } i = 0; zeroes = 0; while (1) { this = Tokenizer_read(self, 0); if (this == ';') { if (i == 0) { FAIL_ROUTE_AND_EXIT(); } break; } if (i == 0 && this == '0') { zeroes++; self->head++; continue; } if (i >= MAX_ENTITY_SIZE) { FAIL_ROUTE_AND_EXIT(); } if (is_marker(this)) { FAIL_ROUTE_AND_EXIT(); } j = 0; while (1) { if (!valid[j]) { FAIL_ROUTE_AND_EXIT(); } if (this == (Py_UCS4) valid[j]) { break; } j++; } text[i] = (char) this; self->head++; i++; } if (numeric) { sscanf(text, (hexadecimal ? "%x" : "%d"), &test); if (test < 1 || test > 0x10FFFF) { FAIL_ROUTE_AND_EXIT(); } } else { i = 0; while (1) { def = entitydefs[i]; if (!def) { // We've reached the end of the defs without finding it FAIL_ROUTE_AND_EXIT(); } if (strcmp(text, def) == 0) { break; } i++; } } if (zeroes) { buffer = calloc(strlen(text) + zeroes + 1, sizeof(char)); if (!buffer) { free(text); PyErr_NoMemory(); return -1; } for (i = 0; i < zeroes; i++) { strcat(buffer, "0"); } strcat(buffer, text); free(text); text = buffer; } textobj = PyUnicode_FromString(text); if (!textobj) { free(text); return -1; } free(text); kwargs = PyDict_New(); if (!kwargs) { Py_DECREF(textobj); return -1; } PyDict_SetItemString(kwargs, "text", textobj); Py_DECREF(textobj); if (Tokenizer_emit_kwargs(self, Text, kwargs)) { return -1; } if (Tokenizer_emit(self, HTMLEntityEnd)) { return -1; } return 0; } /* Parse an HTML entity at the head of the wikicode string. */ static int Tokenizer_parse_entity(Tokenizer *self) { Py_ssize_t reset = self->head; PyObject *tokenlist; if (Tokenizer_check_route(self, LC_HTML_ENTITY) < 0) { goto on_bad_route; } if (Tokenizer_push(self, LC_HTML_ENTITY)) { return -1; } if (Tokenizer_really_parse_entity(self)) { return -1; } if (BAD_ROUTE) { on_bad_route: RESET_ROUTE(); self->head = reset; if (Tokenizer_emit_char(self, '&')) { return -1; } return 0; } tokenlist = Tokenizer_pop(self); if (!tokenlist) { return -1; } if (Tokenizer_emit_all(self, tokenlist)) { Py_DECREF(tokenlist); return -1; } Py_DECREF(tokenlist); return 0; } /* Parse an HTML comment at the head of the wikicode string. */ static int Tokenizer_parse_comment(Tokenizer *self) { Py_ssize_t reset = self->head + 3; PyObject *comment; Py_UCS4 this; self->head += 4; if (Tokenizer_push(self, 0)) { return -1; } while (1) { this = Tokenizer_read(self, 0); if (!this) { comment = Tokenizer_pop(self); Py_XDECREF(comment); self->head = reset; return Tokenizer_emit_text(self, " TagOpenOpen = make("TagOpenOpen") # < TagAttrStart = make("TagAttrStart") TagAttrEquals = make("TagAttrEquals") # = TagAttrQuote = make("TagAttrQuote") # ", ' TagCloseOpen = make("TagCloseOpen") # > TagCloseSelfclose = make("TagCloseSelfclose") # /> TagOpenClose = make("TagOpenClose") # del make mwparserfromhell-0.6.3/src/mwparserfromhell/smart_list/000077500000000000000000000000001411406531600234535ustar00rootroot00000000000000mwparserfromhell-0.6.3/src/mwparserfromhell/smart_list/__init__.py000066400000000000000000000027031411406531600255660ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ This module contains the :class:`.SmartList` type, as well as its :class:`.ListProxy` child, which together implement a list whose sublists reflect changes made to the main list, and vice-versa. """ from .list_proxy import ListProxy as _ListProxy from .smart_list import SmartList mwparserfromhell-0.6.3/src/mwparserfromhell/smart_list/list_proxy.py000066400000000000000000000170511411406531600262450ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from .utils import _SliceNormalizerMixIn, inheritdoc class ListProxy(_SliceNormalizerMixIn, list): """Implement the ``list`` interface by getting elements from a parent. This is created by a :class:`.SmartList` object when slicing. It does not actually store the list at any time; instead, whenever the list is needed, it builds it dynamically using the :meth:`_render` method. """ def __init__(self, parent, sliceinfo): super().__init__() self._parent = parent self._sliceinfo = sliceinfo def __repr__(self): return repr(self._render()) def __lt__(self, other): if isinstance(other, ListProxy): return self._render() < list(other) return self._render() < other def __le__(self, other): if isinstance(other, ListProxy): return self._render() <= list(other) return self._render() <= other def __eq__(self, other): if isinstance(other, ListProxy): return self._render() == list(other) return self._render() == other def __ne__(self, other): if isinstance(other, ListProxy): return self._render() != list(other) return self._render() != other def __gt__(self, other): if isinstance(other, ListProxy): return self._render() > list(other) return self._render() > other def __ge__(self, other): if isinstance(other, ListProxy): return self._render() >= list(other) return self._render() >= other def __bool__(self): return bool(self._render()) def __len__(self): return max((self._stop - self._start) // self._step, 0) def __getitem__(self, key): if isinstance(key, slice): key = self._normalize_slice(key, clamp=True) keystart = min(self._start + key.start, self._stop) keystop = min(self._start + key.stop, self._stop) adjusted = slice(keystart, keystop, key.step) return self._parent[adjusted] return self._render()[key] def __setitem__(self, key, item): if isinstance(key, slice): key = self._normalize_slice(key, clamp=True) keystart = min(self._start + key.start, self._stop) keystop = min(self._start + key.stop, self._stop) adjusted = slice(keystart, keystop, key.step) self._parent[adjusted] = item else: length = len(self) if key < 0: key = length + key if key < 0 or key >= length: raise IndexError("list assignment index out of range") self._parent[self._start + key] = item def __delitem__(self, key): if isinstance(key, slice): key = self._normalize_slice(key, clamp=True) keystart = min(self._start + key.start, self._stop) keystop = min(self._start + key.stop, self._stop) adjusted = slice(keystart, keystop, key.step) del self._parent[adjusted] else: length = len(self) if key < 0: key = length + key if key < 0 or key >= length: raise IndexError("list assignment index out of range") del self._parent[self._start + key] def __iter__(self): i = self._start while i < self._stop: yield self._parent[i] i += self._step def __reversed__(self): i = self._stop - 1 while i >= self._start: yield self._parent[i] i -= self._step def __contains__(self, item): return item in self._render() def __add__(self, other): return type(self._parent)(list(self) + other) def __radd__(self, other): return type(self._parent)(other + list(self)) def __iadd__(self, other): self.extend(other) return self def __mul__(self, other): return type(self._parent)(list(self) * other) def __rmul__(self, other): return type(self._parent)(other * list(self)) def __imul__(self, other): self.extend(list(self) * (other - 1)) return self @property def _start(self): """The starting index of this list, inclusive.""" return self._sliceinfo[0] @property def _stop(self): """The ending index of this list, exclusive.""" if self._sliceinfo[1] is None: return len(self._parent) return self._sliceinfo[1] @property def _step(self): """The number to increase the index by between items.""" return self._sliceinfo[2] def _render(self): """Return the actual list from the stored start/stop/step.""" return list(self._parent)[self._start : self._stop : self._step] @inheritdoc def append(self, item): self._parent.insert(self._stop, item) @inheritdoc def count(self, item): return self._render().count(item) @inheritdoc def index(self, item, start=None, stop=None): if start is not None: if stop is not None: return self._render().index(item, start, stop) return self._render().index(item, start) return self._render().index(item) @inheritdoc def extend(self, item): self._parent[self._stop : self._stop] = item @inheritdoc def insert(self, index, item): if index < 0: index = len(self) + index self._parent.insert(self._start + index, item) @inheritdoc def pop(self, index=None): length = len(self) if index is None: index = length - 1 elif index < 0: index = length + index if index < 0 or index >= length: raise IndexError("pop index out of range") return self._parent.pop(self._start + index) @inheritdoc def remove(self, item): index = self.index(item) del self._parent[self._start + index] @inheritdoc def reverse(self): item = self._render() item.reverse() self._parent[self._start : self._stop : self._step] = item @inheritdoc def sort(self, key=None, reverse=None): item = self._render() kwargs = {} if key is not None: kwargs["key"] = key if reverse is not None: kwargs["reverse"] = reverse item.sort(**kwargs) self._parent[self._start : self._stop : self._step] = item mwparserfromhell-0.6.3/src/mwparserfromhell/smart_list/smart_list.py000066400000000000000000000126371411406531600262170ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from weakref import ref from .list_proxy import ListProxy from .utils import _SliceNormalizerMixIn, inheritdoc class SmartList(_SliceNormalizerMixIn, list): """Implements the ``list`` interface with special handling of sublists. When a sublist is created (by ``list[i:j]``), any changes made to this list (such as the addition, removal, or replacement of elements) will be reflected in the sublist, or vice-versa, to the greatest degree possible. This is implemented by having sublists - instances of the :class:`.ListProxy` type - dynamically determine their elements by storing their slice info and retrieving that slice from the parent. Methods that change the size of the list also change the slice info. For example:: >>> parent = SmartList([0, 1, 2, 3]) >>> parent [0, 1, 2, 3] >>> child = parent[2:] >>> child [2, 3] >>> child.append(4) >>> child [2, 3, 4] >>> parent [0, 1, 2, 3, 4] """ def __init__(self, iterable=None): if iterable: super().__init__(iterable) else: super().__init__() self._children = {} def __getitem__(self, key): if not isinstance(key, slice): return super().__getitem__(key) key = self._normalize_slice(key, clamp=False) sliceinfo = [key.start, key.stop, key.step] child = ListProxy(self, sliceinfo) child_ref = ref(child, self._delete_child) self._children[id(child_ref)] = (child_ref, sliceinfo) return child def __setitem__(self, key, item): if not isinstance(key, slice): super().__setitem__(key, item) return item = list(item) super().__setitem__(key, item) key = self._normalize_slice(key, clamp=True) diff = len(item) + (key.start - key.stop) // key.step if not diff: return for child, (start, stop, _step) in self._children.values(): if start > key.stop: self._children[id(child)][1][0] += diff if stop is not None and stop >= key.stop: self._children[id(child)][1][1] += diff def __delitem__(self, key): super().__delitem__(key) if isinstance(key, slice): key = self._normalize_slice(key, clamp=True) else: key = slice(key, key + 1, 1) diff = (key.stop - key.start) // key.step for child, (start, stop, _step) in self._children.values(): if start > key.start: self._children[id(child)][1][0] -= diff if stop is not None and stop >= key.stop: self._children[id(child)][1][1] -= diff def __add__(self, other): return SmartList(list(self) + other) def __radd__(self, other): return SmartList(other + list(self)) def __iadd__(self, other): self.extend(other) return self def _delete_child(self, child_ref): """Remove a child reference that is about to be garbage-collected.""" del self._children[id(child_ref)] def _detach_children(self): """Remove all children and give them independent parent copies.""" children = [val[0] for val in self._children.values()] for child in children: child()._parent = list(self) self._children.clear() @inheritdoc def append(self, item): head = len(self) self[head:head] = [item] @inheritdoc def extend(self, item): head = len(self) self[head:head] = item @inheritdoc def insert(self, index, item): self[index:index] = [item] @inheritdoc def pop(self, index=None): if index is None: index = len(self) - 1 item = self[index] del self[index] return item @inheritdoc def remove(self, item): del self[self.index(item)] @inheritdoc def reverse(self): self._detach_children() super().reverse() @inheritdoc def sort(self, key=None, reverse=None): self._detach_children() kwargs = {} if key is not None: kwargs["key"] = key if reverse is not None: kwargs["reverse"] = reverse super().sort(**kwargs) mwparserfromhell-0.6.3/src/mwparserfromhell/smart_list/utils.py000066400000000000000000000041421411406531600251660ustar00rootroot00000000000000# Copyright (C) 2012-2016 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from sys import maxsize __all__ = [] def inheritdoc(method): """Set __doc__ of *method* to __doc__ of *method* in its parent class. Since this is used on :class:`.SmartList`, the "parent class" used is ``list``. This function can be used as a decorator. """ method.__doc__ = getattr(list, method.__name__).__doc__ return method class _SliceNormalizerMixIn: """MixIn that provides a private method to normalize slices.""" def _normalize_slice(self, key, clamp=False): """Return a slice equivalent to the input *key*, standardized.""" if key.start is None: start = 0 else: start = (len(self) + key.start) if key.start < 0 else key.start if key.stop is None or key.stop == maxsize: stop = len(self) if clamp else None else: stop = (len(self) + key.stop) if key.stop < 0 else key.stop return slice(start, stop, key.step or 1) mwparserfromhell-0.6.3/src/mwparserfromhell/string_mixin.py000066400000000000000000000064411411406531600243630ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ This module contains the :class:`.StringMixIn` type, which implements the interface for the ``str`` type in a dynamic manner. """ from sys import getdefaultencoding __all__ = ["StringMixIn"] def inheritdoc(method): """Set __doc__ of *method* to __doc__ of *method* in its parent class. Since this is used on :class:`.StringMixIn`, the "parent class" used is ``str``. This function can be used as a decorator. """ method.__doc__ = getattr(str, method.__name__).__doc__ return method class StringMixIn: """Implement the interface for ``str`` in a dynamic manner. To use this class, inherit from it and override the :meth:`__str__` method to return the string representation of the object. The various string methods will operate on the value of :meth:`__str__` instead of the immutable ``self`` like the regular ``str`` type. """ def __str__(self): raise NotImplementedError() def __bytes__(self): return bytes(self.__str__(), getdefaultencoding()) def __repr__(self): return repr(self.__str__()) def __lt__(self, other): return self.__str__() < other def __le__(self, other): return self.__str__() <= other def __eq__(self, other): return self.__str__() == other def __ne__(self, other): return self.__str__() != other def __gt__(self, other): return self.__str__() > other def __ge__(self, other): return self.__str__() >= other def __bool__(self): return bool(self.__str__()) def __len__(self): return len(self.__str__()) def __iter__(self): yield from self.__str__() def __getitem__(self, key): return self.__str__()[key] def __reversed__(self): return reversed(self.__str__()) def __contains__(self, item): return str(item) in self.__str__() def __getattr__(self, attr): if not hasattr(str, attr): raise AttributeError( "{!r} object has no attribute {!r}".format(type(self).__name__, attr) ) return getattr(self.__str__(), attr) maketrans = str.maketrans # Static method can't rely on __getattr__ del inheritdoc mwparserfromhell-0.6.3/src/mwparserfromhell/utils.py000066400000000000000000000062361411406531600230130ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ This module contains accessory functions for other parts of the library. Parser users generally won't need stuff from here. """ __all__ = ["parse_anything"] def parse_anything(value, context=0, skip_style_tags=False): """Return a :class:`.Wikicode` for *value*, allowing multiple types. This differs from :meth:`.Parser.parse` in that we accept more than just a string to be parsed. Strings, bytes, integers (converted to strings), ``None``, existing :class:`.Node` or :class:`.Wikicode` objects, as well as an iterable of these types, are supported. This is used to parse input on-the-fly by various methods of :class:`.Wikicode` and others like :class:`.Template`, such as :meth:`wikicode.insert() <.Wikicode.insert>` or setting :meth:`template.name <.Template.name>`. Additional arguments are passed directly to :meth:`.Parser.parse`. """ # pylint: disable=cyclic-import,import-outside-toplevel from .nodes import Node from .parser import Parser from .smart_list import SmartList from .wikicode import Wikicode if isinstance(value, Wikicode): return value if isinstance(value, Node): return Wikicode(SmartList([value])) if isinstance(value, str): return Parser().parse(value, context, skip_style_tags) if isinstance(value, bytes): return Parser().parse(value.decode("utf8"), context, skip_style_tags) if isinstance(value, int): return Parser().parse(str(value), context, skip_style_tags) if value is None: return Wikicode(SmartList()) if hasattr(value, "read"): return parse_anything(value.read(), context, skip_style_tags) try: nodelist = SmartList() for item in value: nodelist += parse_anything(item, context, skip_style_tags).nodes return Wikicode(nodelist) except TypeError as exc: error = ( "Needs string, Node, Wikicode, file, int, None, or " "iterable of these, but got {0}: {1}" ) raise ValueError(error.format(type(value).__name__, value)) from exc mwparserfromhell-0.6.3/src/mwparserfromhell/wikicode.py000066400000000000000000000736031411406531600234530ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import re from itertools import chain from .nodes import ( Argument, Comment, ExternalLink, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink, ) from .smart_list.list_proxy import ListProxy from .string_mixin import StringMixIn from .utils import parse_anything __all__ = ["Wikicode"] FLAGS = re.IGNORECASE | re.DOTALL | re.UNICODE class Wikicode(StringMixIn): """A ``Wikicode`` is a container for nodes that operates like a string. Additionally, it contains methods that can be used to extract data from or modify the nodes, implemented in an interface similar to a list. For example, :meth:`index` can get the index of a node in the list, and :meth:`insert` can add a new node at that index. The :meth:`filter() ` series of functions is very useful for extracting and iterating over, for example, all of the templates in the object. """ RECURSE_OTHERS = 2 def __init__(self, nodes): super().__init__() self._nodes = nodes def __str__(self): return "".join([str(node) for node in self.nodes]) @staticmethod def _get_children(node, contexts=False, restrict=None, parent=None): """Iterate over all child :class:`.Node`\\ s of a given *node*.""" yield (parent, node) if contexts else node if restrict and isinstance(node, restrict): return for code in node.__children__(): for child in code.nodes: sub = Wikicode._get_children(child, contexts, restrict, code) yield from sub @staticmethod def _slice_replace(code, index, old, new): """Replace the string *old* with *new* across *index* in *code*.""" nodes = [str(node) for node in code.get(index)] substring = "".join(nodes).replace(old, new) code.nodes[index] = parse_anything(substring).nodes @staticmethod def _build_matcher(matches, flags): """Helper for :meth:`_indexed_ifilter` and others. If *matches* is a function, return it. If it's a regex, return a wrapper around it that can be called with a node to do a search. If it's ``None``, return a function that always returns ``True``. """ if matches: if callable(matches): return matches return lambda obj: re.search(matches, str(obj), flags) return lambda obj: True def _indexed_ifilter( self, recursive=True, matches=None, flags=FLAGS, forcetype=None ): """Iterate over nodes and their corresponding indices in the node list. The arguments are interpreted as for :meth:`ifilter`. For each tuple ``(i, node)`` yielded by this method, ``self.index(node) == i``. Note that if *recursive* is ``True``, ``self.nodes[i]`` might not be the node itself, but will still contain it. """ match = self._build_matcher(matches, flags) if recursive: restrict = forcetype if recursive == self.RECURSE_OTHERS else None def getter(i, node): for ch in self._get_children(node, restrict=restrict): yield (i, ch) inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes))) else: inodes = enumerate(self.nodes) for i, node in inodes: if (not forcetype or isinstance(node, forcetype)) and match(node): yield (i, node) def _is_child_wikicode(self, obj, recursive=True): """Return whether the given :class:`.Wikicode` is a descendant.""" def deref(nodes): if isinstance(nodes, ListProxy): return nodes._parent # pylint: disable=protected-access return nodes target = deref(obj.nodes) if target is deref(self.nodes): return True if recursive: todo = [self] while todo: code = todo.pop() if target is deref(code.nodes): return True for node in code.nodes: todo += list(node.__children__()) return False def _do_strong_search(self, obj, recursive=True): """Search for the specific element *obj* within the node list. *obj* can be either a :class:`.Node` or a :class:`.Wikicode` object. If found, we return a tuple (*context*, *index*) where *context* is the :class:`.Wikicode` that contains *obj* and *index* is its index there, as a :class:`slice`. Note that if *recursive* is ``False``, *context* will always be ``self`` (since we only look for *obj* among immediate descendants), but if *recursive* is ``True``, then it could be any :class:`.Wikicode` contained by a node within ``self``. If *obj* is not found, :exc:`ValueError` is raised. """ if isinstance(obj, Wikicode): if not self._is_child_wikicode(obj, recursive): raise ValueError(obj) return obj, slice(0, len(obj.nodes)) if isinstance(obj, Node): mkslice = lambda i: slice(i, i + 1) if not recursive: return self, mkslice(self.index(obj)) for node in self.nodes: for context, child in self._get_children(node, contexts=True): if obj is child: if not context: context = self return context, mkslice(context.index(child)) raise ValueError(obj) raise TypeError(obj) def _do_weak_search(self, obj, recursive): """Search for an element that looks like *obj* within the node list. This follows the same rules as :meth:`_do_strong_search` with some differences. *obj* is treated as a string that might represent any :class:`.Node`, :class:`.Wikicode`, or combination of the two present in the node list. Thus, matching is weak (using string comparisons) rather than strong (using ``is``). Because multiple nodes can match *obj*, the result is a list of tuples instead of just one (however, :exc:`ValueError` is still raised if nothing is found). Individual matches will never overlap. The tuples contain a new first element, *exact*, which is ``True`` if we were able to match *obj* exactly to one or more adjacent nodes, or ``False`` if we found *obj* inside a node or incompletely spanning multiple nodes. """ obj = parse_anything(obj) if not obj or obj not in self: raise ValueError(obj) results = [] contexts = [self] while contexts: context = contexts.pop() i = len(context.nodes) - 1 while i >= 0: node = context.get(i) if obj.get(-1) == node: for j in range(-len(obj.nodes), -1): if obj.get(j) != context.get(i + j + 1): break else: i -= len(obj.nodes) - 1 index = slice(i, i + len(obj.nodes)) results.append((True, context, index)) elif recursive and obj in node: contexts.extend(node.__children__()) i -= 1 if not results: if not recursive: raise ValueError(obj) results.append((False, self, slice(0, len(self.nodes)))) return results def _get_tree(self, code, lines, marker, indent): """Build a tree to illustrate the way the Wikicode object was parsed. The method that builds the actual tree is ``__showtree__`` of ``Node`` objects. *code* is the ``Wikicode`` object to build a tree for. *lines* is the list to append the tree to, which is returned at the end of the method. *marker* is some object to be used to indicate that the builder should continue on from the last line instead of starting a new one; it should be any object that can be tested for with ``is``. *indent* is the starting indentation. """ def write(*args): """Write a new line following the proper indentation rules.""" if lines and lines[-1] is marker: # Continue from the last line lines.pop() # Remove the marker last = lines.pop() lines.append(last + " ".join(args)) else: lines.append(" " * 6 * indent + " ".join(args)) get = lambda code: self._get_tree(code, lines, marker, indent + 1) mark = lambda: lines.append(marker) for node in code.nodes: node.__showtree__(write, get, mark) return lines @classmethod def _build_filter_methods(cls, **meths): """Given Node types, build the corresponding i?filter shortcuts. The should be given as keys storing the method's base name paired with values storing the corresponding :class:`.Node` type. For example, the dict may contain the pair ``("templates", Template)``, which will produce the methods :meth:`ifilter_templates` and :meth:`filter_templates`, which are shortcuts for :meth:`ifilter(forcetype=Template) ` and :meth:`filter(forcetype=Template) `, respectively. These shortcuts are added to the class itself, with an appropriate docstring. """ doc = """Iterate over {0}. This is equivalent to :meth:`{1}` with *forcetype* set to :class:`~{2.__module__}.{2.__name__}`. """ make_ifilter = lambda ftype: ( lambda self, *a, **kw: self.ifilter(forcetype=ftype, *a, **kw) ) make_filter = lambda ftype: ( lambda self, *a, **kw: self.filter(forcetype=ftype, *a, **kw) ) for name, ftype in meths.items(): ifilt = make_ifilter(ftype) filt = make_filter(ftype) ifilt.__doc__ = doc.format(name, "ifilter", ftype) filt.__doc__ = doc.format(name, "filter", ftype) setattr(cls, "ifilter_" + name, ifilt) setattr(cls, "filter_" + name, filt) @property def nodes(self): """A list of :class:`.Node` objects. This is the internal data actually stored within a :class:`.Wikicode` object. """ return self._nodes @nodes.setter def nodes(self, value): if not isinstance(value, list): value = parse_anything(value).nodes self._nodes = value def get(self, index): """Return the *index*\\ th node within the list of nodes.""" return self.nodes[index] def set(self, index, value): """Set the ``Node`` at *index* to *value*. Raises :exc:`IndexError` if *index* is out of range, or :exc:`ValueError` if *value* cannot be coerced into one :class:`.Node`. To insert multiple nodes at an index, use :meth:`get` with either :meth:`remove` and :meth:`insert` or :meth:`replace`. """ nodes = parse_anything(value).nodes if len(nodes) > 1: raise ValueError("Cannot coerce multiple nodes into one index") if index >= len(self.nodes) or -1 * index > len(self.nodes): raise IndexError("List assignment index out of range") if nodes: self.nodes[index] = nodes[0] else: self.nodes.pop(index) def contains(self, obj): """Return whether this Wikicode object contains *obj*. If *obj* is a :class:`.Node` or :class:`.Wikicode` object, then we search for it exactly among all of our children, recursively. Otherwise, this method just uses :meth:`.__contains__` on the string. """ if not isinstance(obj, (Node, Wikicode)): return obj in self try: self._do_strong_search(obj, recursive=True) except ValueError: return False return True def index(self, obj, recursive=False): """Return the index of *obj* in the list of nodes. Raises :exc:`ValueError` if *obj* is not found. If *recursive* is ``True``, we will look in all nodes of ours and their descendants, and return the index of our direct descendant node within *our* list of nodes. Otherwise, the lookup is done only on direct descendants. """ strict = isinstance(obj, Node) equivalent = (lambda o, n: o is n) if strict else (lambda o, n: o == n) for i, node in enumerate(self.nodes): if recursive: for child in self._get_children(node): if equivalent(obj, child): return i elif equivalent(obj, node): return i raise ValueError(obj) def get_ancestors(self, obj): """Return a list of all ancestor nodes of the :class:`.Node` *obj*. The list is ordered from the most shallow ancestor (greatest great- grandparent) to the direct parent. The node itself is not included in the list. For example:: >>> text = "{{a|{{b|{{c|{{d}}}}}}}}" >>> code = mwparserfromhell.parse(text) >>> node = code.filter_templates(matches=lambda n: n == "{{d}}")[0] >>> code.get_ancestors(node) ['{{a|{{b|{{c|{{d}}}}}}}}', '{{b|{{c|{{d}}}}}}', '{{c|{{d}}}}'] Will return an empty list if *obj* is at the top level of this Wikicode object. Will raise :exc:`ValueError` if it wasn't found. """ def _get_ancestors(code, needle): for node in code.nodes: if node is needle: return [] for code in node.__children__(): ancestors = _get_ancestors(code, needle) if ancestors is not None: return [node] + ancestors return None if isinstance(obj, Wikicode): obj = obj.get(0) elif not isinstance(obj, Node): raise ValueError(obj) ancestors = _get_ancestors(self, obj) if ancestors is None: raise ValueError(obj) return ancestors def get_parent(self, obj): """Return the direct parent node of the :class:`.Node` *obj*. This function is equivalent to calling :meth:`.get_ancestors` and taking the last element of the resulting list. Will return None if the node exists but does not have a parent; i.e., it is at the top level of the Wikicode object. """ ancestors = self.get_ancestors(obj) return ancestors[-1] if ancestors else None def insert(self, index, value): """Insert *value* at *index* in the list of nodes. *value* can be anything parsable by :func:`.parse_anything`, which includes strings or other :class:`.Wikicode` or :class:`.Node` objects. """ nodes = parse_anything(value).nodes for node in reversed(nodes): self.nodes.insert(index, node) def insert_before(self, obj, value, recursive=True): """Insert *value* immediately before *obj*. *obj* can be either a string, a :class:`.Node`, or another :class:`.Wikicode` object (as created by :meth:`get_sections`, for example). If *obj* is a string, we will operate on all instances of that string within the code, otherwise only on the specific instance given. *value* can be anything parsable by :func:`.parse_anything`. If *recursive* is ``True``, we will try to find *obj* within our child nodes even if it is not a direct descendant of this :class:`.Wikicode` object. If *obj* is not found, :exc:`ValueError` is raised. """ if isinstance(obj, (Node, Wikicode)): context, index = self._do_strong_search(obj, recursive) context.insert(index.start, value) else: for exact, context, index in self._do_weak_search(obj, recursive): if exact: context.insert(index.start, value) else: obj = str(obj) self._slice_replace(context, index, obj, str(value) + obj) def insert_after(self, obj, value, recursive=True): """Insert *value* immediately after *obj*. *obj* can be either a string, a :class:`.Node`, or another :class:`.Wikicode` object (as created by :meth:`get_sections`, for example). If *obj* is a string, we will operate on all instances of that string within the code, otherwise only on the specific instance given. *value* can be anything parsable by :func:`.parse_anything`. If *recursive* is ``True``, we will try to find *obj* within our child nodes even if it is not a direct descendant of this :class:`.Wikicode` object. If *obj* is not found, :exc:`ValueError` is raised. """ if isinstance(obj, (Node, Wikicode)): context, index = self._do_strong_search(obj, recursive) context.insert(index.stop, value) else: for exact, context, index in self._do_weak_search(obj, recursive): if exact: context.insert(index.stop, value) else: obj = str(obj) self._slice_replace(context, index, obj, obj + str(value)) def replace(self, obj, value, recursive=True): """Replace *obj* with *value*. *obj* can be either a string, a :class:`.Node`, or another :class:`.Wikicode` object (as created by :meth:`get_sections`, for example). If *obj* is a string, we will operate on all instances of that string within the code, otherwise only on the specific instance given. *value* can be anything parsable by :func:`.parse_anything`. If *recursive* is ``True``, we will try to find *obj* within our child nodes even if it is not a direct descendant of this :class:`.Wikicode` object. If *obj* is not found, :exc:`ValueError` is raised. """ if isinstance(obj, (Node, Wikicode)): context, index = self._do_strong_search(obj, recursive) for _ in range(index.start, index.stop): context.nodes.pop(index.start) context.insert(index.start, value) else: for exact, context, index in self._do_weak_search(obj, recursive): if exact: for _ in range(index.start, index.stop): context.nodes.pop(index.start) context.insert(index.start, value) else: self._slice_replace(context, index, str(obj), str(value)) def append(self, value): """Insert *value* at the end of the list of nodes. *value* can be anything parsable by :func:`.parse_anything`. """ nodes = parse_anything(value).nodes for node in nodes: self.nodes.append(node) def remove(self, obj, recursive=True): """Remove *obj* from the list of nodes. *obj* can be either a string, a :class:`.Node`, or another :class:`.Wikicode` object (as created by :meth:`get_sections`, for example). If *obj* is a string, we will operate on all instances of that string within the code, otherwise only on the specific instance given. If *recursive* is ``True``, we will try to find *obj* within our child nodes even if it is not a direct descendant of this :class:`.Wikicode` object. If *obj* is not found, :exc:`ValueError` is raised. """ if isinstance(obj, (Node, Wikicode)): context, index = self._do_strong_search(obj, recursive) for _ in range(index.start, index.stop): context.nodes.pop(index.start) else: for exact, context, index in self._do_weak_search(obj, recursive): if exact: for _ in range(index.start, index.stop): context.nodes.pop(index.start) else: self._slice_replace(context, index, str(obj), "") def matches(self, other): """Do a loose equivalency test suitable for comparing page names. *other* can be any string-like object, including :class:`.Wikicode`, or an iterable of these. This operation is symmetric; both sides are adjusted. Specifically, whitespace and markup is stripped and the first letter's case is normalized. Typical usage is ``if template.name.matches("stub"): ...``. """ normalize = lambda s: (s[0].upper() + s[1:]).replace("_", " ") if s else s this = normalize(self.strip_code().strip()) if isinstance(other, (str, bytes, Wikicode, Node)): that = parse_anything(other).strip_code().strip() return this == normalize(that) for obj in other: that = parse_anything(obj).strip_code().strip() if this == normalize(that): return True return False def ifilter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None): """Iterate over nodes in our list matching certain conditions. If *forcetype* is given, only nodes that are instances of this type (or tuple of types) are yielded. Setting *recursive* to ``True`` will iterate over all children and their descendants. ``RECURSE_OTHERS`` will only iterate over children that are not the instances of *forcetype*. ``False`` will only iterate over immediate children. ``RECURSE_OTHERS`` can be used to iterate over all un-nested templates, even if they are inside of HTML tags, like so: >>> code = mwparserfromhell.parse("{{foo}}{{foo|{{bar}}}}") >>> code.filter_templates(code.RECURSE_OTHERS) ["{{foo}}", "{{foo|{{bar}}}}"] *matches* can be used to further restrict the nodes, either as a function (taking a single :class:`.Node` and returning a boolean) or a regular expression (matched against the node's string representation with :func:`re.search`). If *matches* is a regex, the flags passed to :func:`re.search` are :const:`re.IGNORECASE`, :const:`re.DOTALL`, and :const:`re.UNICODE`, but custom flags can be specified by passing *flags*. """ gen = self._indexed_ifilter(recursive, matches, flags, forcetype) return (node for i, node in gen) def filter(self, *args, **kwargs): """Return a list of nodes within our list matching certain conditions. This is equivalent to calling :func:`list` on :meth:`ifilter`. """ return list(self.ifilter(*args, **kwargs)) def get_sections( self, levels=None, matches=None, flags=FLAGS, flat=False, include_lead=None, include_headings=True, ): """Return a list of sections within the page. Sections are returned as :class:`.Wikicode` objects with a shared node list (implemented using :class:`.SmartList`) so that changes to sections are reflected in the parent Wikicode object. Each section contains all of its subsections, unless *flat* is ``True``. If *levels* is given, it should be a iterable of integers; only sections whose heading levels are within it will be returned. If *matches* is given, it should be either a function or a regex; only sections whose headings match it (without the surrounding equal signs) will be included. *flags* can be used to override the default regex flags (see :meth:`ifilter`) if a regex *matches* is used. If *include_lead* is ``True``, the first, lead section (without a heading) will be included in the list; ``False`` will not include it; the default will include it only if no specific *levels* were given. If *include_headings* is ``True``, the section's beginning :class:`.Heading` object will be included; otherwise, this is skipped. """ title_matcher = self._build_matcher(matches, flags) matcher = lambda heading: ( title_matcher(heading.title) and (not levels or heading.level in levels) ) iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading) sections = [] # Tuples of (index_of_first_node, section) # Tuples of (index, heading), where index and heading.level are both # monotonically increasing open_headings = [] # Add the lead section if appropriate: if include_lead or not (include_lead is not None or matches or levels): itr = self._indexed_ifilter(recursive=False, forcetype=Heading) try: first = next(itr)[0] sections.append((0, Wikicode(self.nodes[:first]))) except StopIteration: # No headings in page sections.append((0, Wikicode(self.nodes[:]))) # Iterate over headings, adding sections to the list as they end: for i, heading in iheadings: if flat: # With flat, all sections close at the next heading newly_closed, open_headings = open_headings, [] else: # Otherwise, figure out which sections have closed, if any closed_start_index = len(open_headings) for j, (start, last_heading) in enumerate(open_headings): if heading.level <= last_heading.level: closed_start_index = j break newly_closed = open_headings[closed_start_index:] del open_headings[closed_start_index:] for start, closed_heading in newly_closed: if matcher(closed_heading): sections.append((start, Wikicode(self.nodes[start:i]))) start = i if include_headings else (i + 1) open_headings.append((start, heading)) # Add any remaining open headings to the list of sections: for start, heading in open_headings: if matcher(heading): sections.append((start, Wikicode(self.nodes[start:]))) # Ensure that earlier sections are earlier in the returned list: return [section for i, section in sorted(sections)] def strip_code(self, normalize=True, collapse=True, keep_template_params=False): """Return a rendered string without unprintable code such as templates. The way a node is stripped is handled by the :meth:`~.Node.__strip__` method of :class:`.Node` objects, which generally return a subset of their nodes or ``None``. For example, templates and tags are removed completely, links are stripped to just their display part, headings are stripped to just their title. If *normalize* is ``True``, various things may be done to strip code further, such as converting HTML entities like ``Σ``, ``Σ``, and ``Σ`` to ``Σ``. If *collapse* is ``True``, we will try to remove excess whitespace as well (three or more newlines are converted to two, for example). If *keep_template_params* is ``True``, then template parameters will be preserved in the output (normally, they are removed completely). """ kwargs = { "normalize": normalize, "collapse": collapse, "keep_template_params": keep_template_params, } nodes = [] for node in self.nodes: stripped = node.__strip__(**kwargs) if stripped: nodes.append(str(stripped)) if collapse: stripped = "".join(nodes).strip("\n") while "\n\n\n" in stripped: stripped = stripped.replace("\n\n\n", "\n\n") return stripped return "".join(nodes) def get_tree(self): """Return a hierarchical tree representation of the object. The representation is a string makes the most sense printed. It is built by calling :meth:`_get_tree` on the :class:`.Wikicode` object and its children recursively. The end result may look something like the following:: >>> text = "Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}" >>> print(mwparserfromhell.parse(text).get_tree()) Lorem ipsum {{ foo | 1 = bar | 2 = {{ baz }} | spam = eggs }} """ marker = object() # Random object we can find with certainty in a list return "\n".join(self._get_tree(self, [], marker, 0)) Wikicode._build_filter_methods( arguments=Argument, comments=Comment, external_links=ExternalLink, headings=Heading, html_entities=HTMLEntity, tags=Tag, templates=Template, text=Text, wikilinks=Wikilink, ) mwparserfromhell-0.6.3/tests/000077500000000000000000000000001411406531600162545ustar00rootroot00000000000000mwparserfromhell-0.6.3/tests/MWPFHTestCase.tmlanguage000066400000000000000000000053671411406531600226520ustar00rootroot00000000000000 fileTypes mwtest name MWParserFromHell Test Case patterns match --- name markup.heading.divider.mwpfh captures 1 name keyword.other.name.mwpfh 2 name variable.other.name.mwpfh match (name:)\s*(\w*) name meta.name.mwpfh captures 1 name keyword.other.label.mwpfh 2 name comment.line.other.label.mwpfh match (label:)\s*(.*) name meta.label.mwpfh captures 1 name keyword.other.input.mwpfh 2 name string.quoted.double.input.mwpfh match (input:)\s*(.*) name meta.input.mwpfh captures 1 name keyword.other.output.mwpfh match (output:) name meta.output.mwpfh captures 1 name support.language.token.mwpfh match (\w+)\s*\( name meta.name.token.mwpfh captures 1 name variable.parameter.token.mwpfh match (\w+)\s*(=) name meta.name.parameter.token.mwpfh match ".*?" name string.quoted.double.mwpfh scopeName text.mwpfh uuid cd3e2ffa-a57d-4c40-954f-1a2e87ffd638 mwparserfromhell-0.6.3/tests/__init__.py000066400000000000000000000000001411406531600203530ustar00rootroot00000000000000mwparserfromhell-0.6.3/tests/conftest.py000066400000000000000000000140031411406531600204510ustar00rootroot00000000000000# Copyright (C) 2012-2021 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from mwparserfromhell.nodes import ( Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, Template, Text, Wikilink, ) from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode wrap = lambda L: Wikicode(SmartList(L)) wraptext = lambda *args: wrap([Text(t) for t in args]) def _assert_node_equal(expected, actual): """Assert that two Nodes have the same type and have the same data.""" registry = { Argument: _assert_argument_node_equal, Comment: _assert_comment_node_equal, ExternalLink: _assert_external_link_node_equal, Heading: _assert_heading_node_equal, HTMLEntity: _assert_html_entity_node_equal, Tag: _assert_tag_node_equal, Template: _assert_template_node_equal, Text: _assert_text_node_equal, Wikilink: _assert_wikilink_node_equal, } # pylint: disable=unidiomatic-typecheck assert type(expected) == type(actual) registry[type(expected)](expected, actual) def _assert_argument_node_equal(expected, actual): """Assert that two Argument nodes have the same data.""" assert_wikicode_equal(expected.name, actual.name) if expected.default is not None: assert_wikicode_equal(expected.default, actual.default) else: assert actual.default is None def _assert_comment_node_equal(expected, actual): """Assert that two Comment nodes have the same data.""" assert expected.contents == actual.contents def _assert_external_link_node_equal(expected, actual): """Assert that two ExternalLink nodes have the same data.""" assert_wikicode_equal(expected.url, actual.url) if expected.title is not None: assert_wikicode_equal(expected.title, actual.title) else: assert actual.title is None assert expected.brackets is actual.brackets assert expected.suppress_space is actual.suppress_space def _assert_heading_node_equal(expected, actual): """Assert that two Heading nodes have the same data.""" assert_wikicode_equal(expected.title, actual.title) assert expected.level == actual.level def _assert_html_entity_node_equal(expected, actual): """Assert that two HTMLEntity nodes have the same data.""" assert expected.value == actual.value assert expected.named is actual.named assert expected.hexadecimal is actual.hexadecimal assert expected.hex_char == actual.hex_char def _assert_tag_node_equal(expected, actual): """Assert that two Tag nodes have the same data.""" assert_wikicode_equal(expected.tag, actual.tag) if expected.contents is not None: assert_wikicode_equal(expected.contents, actual.contents) else: assert actual.contents is None length = len(expected.attributes) assert length == len(actual.attributes) for i in range(length): exp_attr = expected.attributes[i] act_attr = actual.attributes[i] assert_wikicode_equal(exp_attr.name, act_attr.name) if exp_attr.value is not None: assert_wikicode_equal(exp_attr.value, act_attr.value) assert exp_attr.quotes == act_attr.quotes else: assert act_attr.value is None assert exp_attr.pad_first == act_attr.pad_first assert exp_attr.pad_before_eq == act_attr.pad_before_eq assert exp_attr.pad_after_eq == act_attr.pad_after_eq assert expected.wiki_markup == actual.wiki_markup assert expected.self_closing is actual.self_closing assert expected.invalid is actual.invalid assert expected.implicit is actual.implicit assert expected.padding == actual.padding assert_wikicode_equal(expected.closing_tag, actual.closing_tag) def _assert_template_node_equal(expected, actual): """Assert that two Template nodes have the same data.""" assert_wikicode_equal(expected.name, actual.name) length = len(expected.params) assert length == len(actual.params) for i in range(length): exp_param = expected.params[i] act_param = actual.params[i] assert_wikicode_equal(exp_param.name, act_param.name) assert_wikicode_equal(exp_param.value, act_param.value) assert exp_param.showkey is act_param.showkey def _assert_text_node_equal(expected, actual): """Assert that two Text nodes have the same data.""" assert expected.value == actual.value def _assert_wikilink_node_equal(expected, actual): """Assert that two Wikilink nodes have the same data.""" assert_wikicode_equal(expected.title, actual.title) if expected.text is not None: assert_wikicode_equal(expected.text, actual.text) else: assert actual.text is None def assert_wikicode_equal(expected, actual): """Assert that two Wikicode objects have the same data.""" assert isinstance(actual, Wikicode) length = len(expected.nodes) assert length == len(actual.nodes) for i in range(length): _assert_node_equal(expected.get(i), actual.get(i)) mwparserfromhell-0.6.3/tests/test_argument.py000066400000000000000000000071251411406531600215140ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the Argument node. """ import pytest from mwparserfromhell.nodes import Argument, Text from .conftest import assert_wikicode_equal, wrap, wraptext def test_str(): """test Argument.__str__()""" node = Argument(wraptext("foobar")) assert "{{{foobar}}}" == str(node) node2 = Argument(wraptext("foo"), wraptext("bar")) assert "{{{foo|bar}}}" == str(node2) def test_children(): """test Argument.__children__()""" node1 = Argument(wraptext("foobar")) node2 = Argument(wraptext("foo"), wrap([Text("bar"), Text("baz")])) gen1 = node1.__children__() gen2 = node2.__children__() assert node1.name is next(gen1) assert node2.name is next(gen2) assert node2.default is next(gen2) with pytest.raises(StopIteration): next(gen1) with pytest.raises(StopIteration): next(gen2) def test_strip(): """test Argument.__strip__()""" node1 = Argument(wraptext("foobar")) node2 = Argument(wraptext("foo"), wraptext("bar")) assert node1.__strip__() is None assert "bar" == node2.__strip__() def test_showtree(): """test Argument.__showtree__()""" output = [] getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) node1 = Argument(wraptext("foobar")) node2 = Argument(wraptext("foo"), wraptext("bar")) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) valid = [ "{{{", (getter, node1.name), "}}}", "{{{", (getter, node2.name), " | ", marker, (getter, node2.default), "}}}", ] assert valid == output def test_name(): """test getter/setter for the name attribute""" name = wraptext("foobar") node1 = Argument(name) node2 = Argument(name, wraptext("baz")) assert name is node1.name assert name is node2.name node1.name = "héhehé" node2.name = "héhehé" assert_wikicode_equal(wraptext("héhehé"), node1.name) assert_wikicode_equal(wraptext("héhehé"), node2.name) def test_default(): """test getter/setter for the default attribute""" default = wraptext("baz") node1 = Argument(wraptext("foobar")) node2 = Argument(wraptext("foobar"), default) assert None is node1.default assert default is node2.default node1.default = "buzz" node2.default = None assert_wikicode_equal(wraptext("buzz"), node1.default) assert None is node2.default mwparserfromhell-0.6.3/tests/test_attribute.py000066400000000000000000000103641411406531600216740ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the Attribute node extra. """ import pytest from mwparserfromhell.nodes import Template from mwparserfromhell.nodes.extras import Attribute from .conftest import assert_wikicode_equal, wrap, wraptext def test_str(): """test Attribute.__str__()""" node = Attribute(wraptext("foo")) assert " foo" == str(node) node2 = Attribute(wraptext("foo"), wraptext("bar")) assert ' foo="bar"' == str(node2) node3 = Attribute(wraptext("a"), wraptext("b"), '"', "", " ", " ") assert 'a = "b"' == str(node3) node4 = Attribute(wraptext("a"), wraptext("b"), "'", "", " ", " ") assert "a = 'b'" == str(node4) node5 = Attribute(wraptext("a"), wraptext("b"), None, "", " ", " ") assert "a = b" == str(node5) node6 = Attribute(wraptext("a"), wrap([]), None, " ", "", " ") assert " a= " == str(node6) def test_name(): """test getter/setter for the name attribute""" name = wraptext("id") node = Attribute(name, wraptext("bar")) assert name is node.name node.name = "{{id}}" assert_wikicode_equal(wrap([Template(wraptext("id"))]), node.name) def test_value(): """test getter/setter for the value attribute""" value = wraptext("foo") node = Attribute(wraptext("id"), value) assert value is node.value node.value = "{{bar}}" assert_wikicode_equal(wrap([Template(wraptext("bar"))]), node.value) node.value = None assert None is node.value node2 = Attribute(wraptext("id"), wraptext("foo"), None) node2.value = "foo bar baz" assert_wikicode_equal(wraptext("foo bar baz"), node2.value) assert '"' == node2.quotes node2.value = 'foo "bar" baz' assert_wikicode_equal(wraptext('foo "bar" baz'), node2.value) assert "'" == node2.quotes node2.value = "foo 'bar' baz" assert_wikicode_equal(wraptext("foo 'bar' baz"), node2.value) assert '"' == node2.quotes node2.value = "fo\"o 'bar' b\"az" assert_wikicode_equal(wraptext("fo\"o 'bar' b\"az"), node2.value) assert '"' == node2.quotes def test_quotes(): """test getter/setter for the quotes attribute""" node1 = Attribute(wraptext("id"), wraptext("foo"), None) node2 = Attribute(wraptext("id"), wraptext("bar")) node3 = Attribute(wraptext("id"), wraptext("foo bar baz")) assert None is node1.quotes assert '"' == node2.quotes node1.quotes = "'" node2.quotes = None assert "'" == node1.quotes assert None is node2.quotes with pytest.raises(ValueError): node1.__setattr__("quotes", "foobar") with pytest.raises(ValueError): node3.__setattr__("quotes", None) with pytest.raises(ValueError): Attribute(wraptext("id"), wraptext("foo bar baz"), None) def test_padding(): """test getter/setter for the padding attributes""" for pad in ["pad_first", "pad_before_eq", "pad_after_eq"]: node = Attribute(wraptext("id"), wraptext("foo"), **{pad: "\n"}) assert "\n" == getattr(node, pad) setattr(node, pad, " ") assert " " == getattr(node, pad) setattr(node, pad, None) assert "" == getattr(node, pad) with pytest.raises(ValueError): node.__setattr__(pad, True) mwparserfromhell-0.6.3/tests/test_builder.py000066400000000000000000000667021411406531600213260ustar00rootroot00000000000000# Copyright (C) 2012-2019 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Tests for the builder, which turns tokens into Wikicode objects. """ import pytest from mwparserfromhell.nodes import ( Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, Template, Text, Wikilink, ) from mwparserfromhell.nodes.extras import Attribute, Parameter from mwparserfromhell.parser import tokens, ParserError from mwparserfromhell.parser.builder import Builder from .conftest import assert_wikicode_equal, wrap, wraptext @pytest.fixture() def builder(): return Builder() @pytest.mark.parametrize( "test,valid", [ ([tokens.Text(text="foobar")], wraptext("foobar")), ([tokens.Text(text="fóóbar")], wraptext("fóóbar")), ( [tokens.Text(text="spam"), tokens.Text(text="eggs")], wraptext("spam", "eggs"), ), ], ) def test_text(builder, test, valid): """tests for building Text nodes""" assert_wikicode_equal(valid, builder.build(test)) @pytest.mark.parametrize( "test,valid", [ ( [tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()], wrap([Template(wraptext("foobar"))]), ), ( [ tokens.TemplateOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.TemplateClose(), ], wrap([Template(wraptext("spam", "eggs"))]), ), ( [ tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateClose(), ], wrap( [ Template( wraptext("foo"), params=[ Parameter(wraptext("1"), wraptext("bar"), showkey=False) ], ) ] ), ), ( [ tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateClose(), ], wrap( [ Template( wraptext("foo"), params=[Parameter(wraptext("bar"), wraptext("baz"))], ) ] ), ), ( [ tokens.TemplateOpen(), tokens.TemplateParamSeparator(), tokens.TemplateParamSeparator(), tokens.TemplateParamEquals(), tokens.TemplateParamSeparator(), tokens.TemplateClose(), ], wrap( [ Template( wrap([]), params=[ Parameter(wraptext("1"), wrap([]), showkey=False), Parameter(wrap([]), wrap([]), showkey=True), Parameter(wraptext("2"), wrap([]), showkey=False), ], ) ] ), ), ( [ tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateParamSeparator(), tokens.Text(text="biz"), tokens.TemplateParamSeparator(), tokens.Text(text="buzz"), tokens.TemplateParamSeparator(), tokens.Text(text="3"), tokens.TemplateParamEquals(), tokens.Text(text="buff"), tokens.TemplateParamSeparator(), tokens.Text(text="baff"), tokens.TemplateClose(), ], wrap( [ Template( wraptext("foo"), params=[ Parameter(wraptext("bar"), wraptext("baz")), Parameter(wraptext("1"), wraptext("biz"), showkey=False), Parameter(wraptext("2"), wraptext("buzz"), showkey=False), Parameter(wraptext("3"), wraptext("buff")), Parameter(wraptext("3"), wraptext("baff"), showkey=False), ], ) ] ), ), ], ) def test_template(builder, test, valid): """tests for building Template nodes""" assert_wikicode_equal(valid, builder.build(test)) @pytest.mark.parametrize( "test,valid", [ ( [tokens.ArgumentOpen(), tokens.Text(text="foobar"), tokens.ArgumentClose()], wrap([Argument(wraptext("foobar"))]), ), ( [ tokens.ArgumentOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.ArgumentClose(), ], wrap([Argument(wraptext("spam", "eggs"))]), ), ( [ tokens.ArgumentOpen(), tokens.Text(text="foo"), tokens.ArgumentSeparator(), tokens.Text(text="bar"), tokens.ArgumentClose(), ], wrap([Argument(wraptext("foo"), wraptext("bar"))]), ), ( [ tokens.ArgumentOpen(), tokens.Text(text="foo"), tokens.Text(text="bar"), tokens.ArgumentSeparator(), tokens.Text(text="baz"), tokens.Text(text="biz"), tokens.ArgumentClose(), ], wrap([Argument(wraptext("foo", "bar"), wraptext("baz", "biz"))]), ), ], ) def test_argument(builder, test, valid): """tests for building Argument nodes""" assert_wikicode_equal(valid, builder.build(test)) @pytest.mark.parametrize( "test,valid", [ ( [tokens.WikilinkOpen(), tokens.Text(text="foobar"), tokens.WikilinkClose()], wrap([Wikilink(wraptext("foobar"))]), ), ( [ tokens.WikilinkOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.WikilinkClose(), ], wrap([Wikilink(wraptext("spam", "eggs"))]), ), ( [ tokens.WikilinkOpen(), tokens.Text(text="foo"), tokens.WikilinkSeparator(), tokens.Text(text="bar"), tokens.WikilinkClose(), ], wrap([Wikilink(wraptext("foo"), wraptext("bar"))]), ), ( [ tokens.WikilinkOpen(), tokens.Text(text="foo"), tokens.Text(text="bar"), tokens.WikilinkSeparator(), tokens.Text(text="baz"), tokens.Text(text="biz"), tokens.WikilinkClose(), ], wrap([Wikilink(wraptext("foo", "bar"), wraptext("baz", "biz"))]), ), ], ) def test_wikilink(builder, test, valid): """tests for building Wikilink nodes""" assert_wikicode_equal(valid, builder.build(test)) @pytest.mark.parametrize( "test,valid", [ ( [ tokens.ExternalLinkOpen(brackets=False), tokens.Text(text="http://example.com/"), tokens.ExternalLinkClose(), ], wrap([ExternalLink(wraptext("http://example.com/"), brackets=False)]), ), ( [ tokens.ExternalLinkOpen(brackets=True), tokens.Text(text="http://example.com/"), tokens.ExternalLinkClose(), ], wrap([ExternalLink(wraptext("http://example.com/"))]), ), ( [ tokens.ExternalLinkOpen(brackets=True), tokens.Text(text="http://example.com/"), tokens.ExternalLinkSeparator(), tokens.ExternalLinkClose(), ], wrap([ExternalLink(wraptext("http://example.com/"), wrap([]))]), ), ( [ tokens.ExternalLinkOpen(brackets=True), tokens.Text(text="http://example.com/"), tokens.ExternalLinkSeparator(), tokens.Text(text="Example"), tokens.ExternalLinkClose(), ], wrap([ExternalLink(wraptext("http://example.com/"), wraptext("Example"))]), ), ( [ tokens.ExternalLinkOpen(brackets=False), tokens.Text(text="http://example"), tokens.Text(text=".com/foo"), tokens.ExternalLinkClose(), ], wrap( [ExternalLink(wraptext("http://example", ".com/foo"), brackets=False)] ), ), ( [ tokens.ExternalLinkOpen(brackets=True), tokens.Text(text="http://example"), tokens.Text(text=".com/foo"), tokens.ExternalLinkSeparator(), tokens.Text(text="Example"), tokens.Text(text=" Web Page"), tokens.ExternalLinkClose(), ], wrap( [ ExternalLink( wraptext("http://example", ".com/foo"), wraptext("Example", " Web Page"), ) ] ), ), ], ) def test_external_link(builder, test, valid): """tests for building ExternalLink nodes""" assert_wikicode_equal(valid, builder.build(test)) @pytest.mark.parametrize( "test,valid", [ ( [ tokens.HTMLEntityStart(), tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), ], wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)]), ), ( [ tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), tokens.Text(text="107"), tokens.HTMLEntityEnd(), ], wrap([HTMLEntity("107", named=False, hexadecimal=False)]), ), ( [ tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), tokens.HTMLEntityHex(char="X"), tokens.Text(text="6B"), tokens.HTMLEntityEnd(), ], wrap([HTMLEntity("6B", named=False, hexadecimal=True, hex_char="X")]), ), ], ) def test_html_entity(builder, test, valid): """tests for building HTMLEntity nodes""" assert_wikicode_equal(valid, builder.build(test)) @pytest.mark.parametrize( "test,valid", [ ( [ tokens.HeadingStart(level=2), tokens.Text(text="foobar"), tokens.HeadingEnd(), ], wrap([Heading(wraptext("foobar"), 2)]), ), ( [ tokens.HeadingStart(level=4), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.HeadingEnd(), ], wrap([Heading(wraptext("spam", "eggs"), 4)]), ), ], ) def test_heading(builder, test, valid): """tests for building Heading nodes""" assert_wikicode_equal(valid, builder.build(test)) @pytest.mark.parametrize( "test,valid", [ ( [tokens.CommentStart(), tokens.Text(text="foobar"), tokens.CommentEnd()], wrap([Comment("foobar")]), ), ( [ tokens.CommentStart(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.CommentEnd(), ], wrap([Comment("spameggs")]), ), ], ) def test_comment(builder, test, valid): """tests for building Comment nodes""" assert_wikicode_equal(valid, builder.build(test)) @pytest.mark.parametrize( "test,valid", [ # ( [ tokens.TagOpenOpen(), tokens.Text(text="ref"), tokens.TagCloseOpen(padding=""), tokens.TagOpenClose(), tokens.Text(text="ref"), tokens.TagCloseClose(), ], wrap([Tag(wraptext("ref"), wrap([]), closing_tag=wraptext("ref"))]), ), # ( [ tokens.TagOpenOpen(), tokens.Text(text="ref"), tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), tokens.Text(text="name"), tokens.TagCloseOpen(padding=""), tokens.TagOpenClose(), tokens.Text(text="ref"), tokens.TagCloseClose(), ], wrap([Tag(wraptext("ref"), wrap([]), attrs=[Attribute(wraptext("name"))])]), ), # ( [ tokens.TagOpenOpen(), tokens.Text(text="ref"), tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), tokens.Text(text="name"), tokens.TagAttrEquals(), tokens.TagAttrQuote(char='"'), tokens.Text(text="abc"), tokens.TagCloseSelfclose(padding=" "), ], wrap( [ Tag( wraptext("ref"), attrs=[Attribute(wraptext("name"), wraptext("abc"))], self_closing=True, padding=" ", ) ] ), ), #
    ( [ tokens.TagOpenOpen(), tokens.Text(text="br"), tokens.TagCloseSelfclose(padding=""), ], wrap([Tag(wraptext("br"), self_closing=True)]), ), #
  • ( [ tokens.TagOpenOpen(), tokens.Text(text="li"), tokens.TagCloseSelfclose(padding="", implicit=True), ], wrap([Tag(wraptext("li"), self_closing=True, implicit=True)]), ), #
    ( [ tokens.TagOpenOpen(invalid=True), tokens.Text(text="br"), tokens.TagCloseSelfclose(padding="", implicit=True), ], wrap([Tag(wraptext("br"), self_closing=True, invalid=True, implicit=True)]), ), #
    ( [ tokens.TagOpenOpen(invalid=True), tokens.Text(text="br"), tokens.TagCloseSelfclose(padding=""), ], wrap([Tag(wraptext("br"), self_closing=True, invalid=True)]), ), # [[Source]] ( [ tokens.TagOpenOpen(), tokens.Text(text="ref"), tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), tokens.Text(text="name"), tokens.TagAttrEquals(), tokens.TemplateOpen(), tokens.Text(text="abc"), tokens.TemplateClose(), tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), tokens.Text(text="foo"), tokens.TagAttrEquals(), tokens.TagAttrQuote(char='"'), tokens.Text(text="bar "), tokens.TemplateOpen(), tokens.Text(text="baz"), tokens.TemplateClose(), tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), tokens.Text(text="abc"), tokens.TagAttrEquals(), tokens.TemplateOpen(), tokens.Text(text="de"), tokens.TemplateClose(), tokens.Text(text="f"), tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), tokens.Text(text="ghi"), tokens.TagAttrEquals(), tokens.Text(text="j"), tokens.TemplateOpen(), tokens.Text(text="k"), tokens.TemplateClose(), tokens.TemplateOpen(), tokens.Text(text="l"), tokens.TemplateClose(), tokens.TagAttrStart( pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" " ), tokens.Text(text="mno"), tokens.TagAttrEquals(), tokens.TagAttrQuote(char="'"), tokens.TemplateOpen(), tokens.Text(text="p"), tokens.TemplateClose(), tokens.Text(text=" "), tokens.WikilinkOpen(), tokens.Text(text="q"), tokens.WikilinkClose(), tokens.Text(text=" "), tokens.TemplateOpen(), tokens.Text(text="r"), tokens.TemplateClose(), tokens.TagCloseOpen(padding=""), tokens.WikilinkOpen(), tokens.Text(text="Source"), tokens.WikilinkClose(), tokens.TagOpenClose(), tokens.Text(text="ref"), tokens.TagCloseClose(), ], wrap( [ Tag( wraptext("ref"), wrap([Wikilink(wraptext("Source"))]), [ Attribute( wraptext("name"), wrap([Template(wraptext("abc"))]), None, ), Attribute( wraptext("foo"), wrap([Text("bar "), Template(wraptext("baz"))]), pad_first=" ", ), Attribute( wraptext("abc"), wrap([Template(wraptext("de")), Text("f")]), None, ), Attribute( wraptext("ghi"), wrap( [ Text("j"), Template(wraptext("k")), Template(wraptext("l")), ] ), None, ), Attribute( wraptext("mno"), wrap( [ Template(wraptext("p")), Text(" "), Wikilink(wraptext("q")), Text(" "), Template(wraptext("r")), ] ), "'", " \n ", " ", " ", ), ], ) ] ), ), # "''italic text''" ( [ tokens.TagOpenOpen(wiki_markup="''"), tokens.Text(text="i"), tokens.TagCloseOpen(), tokens.Text(text="italic text"), tokens.TagOpenClose(), tokens.Text(text="i"), tokens.TagCloseClose(), ], wrap([Tag(wraptext("i"), wraptext("italic text"), wiki_markup="''")]), ), # * bullet ( [ tokens.TagOpenOpen(wiki_markup="*"), tokens.Text(text="li"), tokens.TagCloseSelfclose(), tokens.Text(text=" bullet"), ], wrap( [ Tag(wraptext("li"), wiki_markup="*", self_closing=True), Text(" bullet"), ] ), ), ], ) def test_tag(builder, test, valid): """tests for building Tag nodes""" assert_wikicode_equal(valid, builder.build(test)) def test_integration(builder): """a test for building a combination of templates together""" # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}} test = [ tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateClose(), tokens.Text(text="bar"), tokens.TemplateParamSeparator(), tokens.Text(text="baz"), tokens.TemplateParamEquals(), tokens.Text(text="biz"), tokens.TemplateClose(), tokens.Text(text="buzz"), tokens.TemplateClose(), tokens.Text(text="usr"), tokens.TemplateParamSeparator(), tokens.TemplateOpen(), tokens.Text(text="bin"), tokens.TemplateClose(), tokens.TemplateClose(), ] valid = wrap( [ Template( wrap( [ Template( wrap( [ Template( wrap([Template(wraptext("foo")), Text("bar")]), params=[ Parameter(wraptext("baz"), wraptext("biz")) ], ), Text("buzz"), ] ) ), Text("usr"), ] ), params=[ Parameter( wraptext("1"), wrap([Template(wraptext("bin"))]), showkey=False ) ], ) ] ) assert_wikicode_equal(valid, builder.build(test)) def test_integration2(builder): """an even more audacious test for building a horrible wikicode mess""" # {{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}]]{{i|j= }} test = [ tokens.TemplateOpen(), tokens.Text(text="a"), tokens.TemplateParamSeparator(), tokens.Text(text="b"), tokens.TemplateParamSeparator(), tokens.TemplateOpen(), tokens.Text(text="c"), tokens.TemplateParamSeparator(), tokens.WikilinkOpen(), tokens.Text(text="d"), tokens.WikilinkClose(), tokens.ArgumentOpen(), tokens.Text(text="e"), tokens.ArgumentClose(), tokens.TemplateClose(), tokens.TemplateClose(), tokens.WikilinkOpen(), tokens.Text(text="f"), tokens.WikilinkSeparator(), tokens.ArgumentOpen(), tokens.Text(text="g"), tokens.ArgumentClose(), tokens.CommentStart(), tokens.Text(text="h"), tokens.CommentEnd(), tokens.WikilinkClose(), tokens.TemplateOpen(), tokens.Text(text="i"), tokens.TemplateParamSeparator(), tokens.Text(text="j"), tokens.TemplateParamEquals(), tokens.HTMLEntityStart(), tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), tokens.TemplateClose(), ] valid = wrap( [ Template( wraptext("a"), params=[ Parameter(wraptext("1"), wraptext("b"), showkey=False), Parameter( wraptext("2"), wrap( [ Template( wraptext("c"), params=[ Parameter( wraptext("1"), wrap( [ Wikilink(wraptext("d")), Argument(wraptext("e")), ] ), showkey=False, ) ], ) ] ), showkey=False, ), ], ), Wikilink(wraptext("f"), wrap([Argument(wraptext("g")), Comment("h")])), Template( wraptext("i"), params=[ Parameter(wraptext("j"), wrap([HTMLEntity("nbsp", named=True)])) ], ), ] ) assert_wikicode_equal(valid, builder.build(test)) @pytest.mark.parametrize( "tokens", [ [tokens.TemplateOpen(), tokens.TemplateParamSeparator()], [tokens.TemplateOpen()], [tokens.ArgumentOpen()], [tokens.WikilinkOpen()], [tokens.ExternalLinkOpen()], [tokens.HeadingStart()], [tokens.CommentStart()], [tokens.TagOpenOpen(), tokens.TagAttrStart()], [tokens.TagOpenOpen()], ], ) def test_parser_errors(builder, tokens): """test whether ParserError gets thrown for bad input""" with pytest.raises(ParserError): builder.build(tokens) def test_parser_errors_templateclose(builder): with pytest.raises( ParserError, match=r"_handle_token\(\) got unexpected TemplateClose" ): builder.build([tokens.TemplateClose()]) mwparserfromhell-0.6.3/tests/test_comment.py000066400000000000000000000037741411406531600213420ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the Comment node. """ import pytest from mwparserfromhell.nodes import Comment def test_str(): """test Comment.__str__()""" node = Comment("foobar") assert "" == str(node) def test_children(): """test Comment.__children__()""" node = Comment("foobar") gen = node.__children__() with pytest.raises(StopIteration): next(gen) def test_strip(): """test Comment.__strip__()""" node = Comment("foobar") assert node.__strip__() is None def test_showtree(): """test Comment.__showtree__()""" output = [] node = Comment("foobar") node.__showtree__(output.append, None, None) assert [""] == output def test_contents(): """test getter/setter for the contents attribute""" node = Comment("foobar") assert "foobar" == node.contents node.contents = "barfoo" assert "barfoo" == node.contents mwparserfromhell-0.6.3/tests/test_docs.py000066400000000000000000000116221411406531600206170ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Integration test cases for mwparserfromhell's documentation. """ import json from io import StringIO import os from urllib.parse import urlencode from urllib.request import urlopen import pytest import mwparserfromhell def assert_print(value, output): """Assertion check that *value*, when printed, produces *output*.""" buff = StringIO() print(value, end="", file=buff) buff.seek(0) assert output == buff.read() def test_readme_1(): """test a block of example code in the README""" text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" wikicode = mwparserfromhell.parse(text) assert_print(wikicode, "I has a template! {{foo|bar|baz|eggs=spam}} See it?") templates = wikicode.filter_templates() assert_print(templates, "['{{foo|bar|baz|eggs=spam}}']") template = templates[0] assert_print(template.name, "foo") assert_print(template.params, "['bar', 'baz', 'eggs=spam']") assert_print(template.get(1).value, "bar") assert_print(template.get("eggs").value, "spam") def test_readme_2(): """test a block of example code in the README""" text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" temps = mwparserfromhell.parse(text).filter_templates() res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" assert_print(temps, res) def test_readme_3(): """test a block of example code in the README""" code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") assert_print( code.filter_templates(recursive=False), "['{{foo|this {{includes a|template}}}}']", ) foo = code.filter_templates(recursive=False)[0] assert_print(foo.get(1).value, "this {{includes a|template}}") assert_print(foo.get(1).value.filter_templates()[0], "{{includes a|template}}") assert_print(foo.get(1).value.filter_templates()[0].get(1).value, "template") def test_readme_4(): """test a block of example code in the README""" text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" code = mwparserfromhell.parse(text) for template in code.filter_templates(): if template.name.matches("Cleanup") and not template.has("date"): template.add("date", "July 2012") res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}" assert_print(code, res) code.replace("{{uncategorized}}", "{{bar-stub}}") res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" assert_print(code, res) res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" assert_print(code.filter_templates(), res) text = str(code) res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" assert_print(text, res) assert text == code @pytest.mark.skipif("NOWEB" in os.environ, reason="web test disabled by environ var") def test_readme_5(): """test a block of example code in the README; includes a web call""" url1 = "https://en.wikipedia.org/w/api.php" url2 = "https://en.wikipedia.org/w/index.php?title={0}&action=raw" title = "Test" data = { "action": "query", "prop": "revisions", "rvprop": "content", "rvslots": "main", "rvlimit": 1, "titles": title, "format": "json", "formatversion": "2", } try: raw = urlopen(url1, urlencode(data).encode("utf8")).read() except OSError: pytest.skip("cannot continue because of unsuccessful web call") res = json.loads(raw.decode("utf8")) revision = res["query"]["pages"][0]["revisions"][0] text = revision["slots"]["main"]["content"] try: expected = urlopen(url2.format(title)).read().decode("utf8") except OSError: pytest.skip("cannot continue because of unsuccessful web call") actual = mwparserfromhell.parse(text) assert expected == actual mwparserfromhell-0.6.3/tests/test_external_link.py000066400000000000000000000115121411406531600225240ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the ExternalLink node. """ import pytest from mwparserfromhell.nodes import ExternalLink, Text from .conftest import assert_wikicode_equal, wrap, wraptext def test_str(): """test ExternalLink.__str__()""" node = ExternalLink(wraptext("http://example.com/"), brackets=False) assert "http://example.com/" == str(node) node2 = ExternalLink(wraptext("http://example.com/")) assert "[http://example.com/]" == str(node2) node3 = ExternalLink(wraptext("http://example.com/"), wrap([])) assert "[http://example.com/ ]" == str(node3) node4 = ExternalLink(wraptext("http://example.com/"), wraptext("Example Web Page")) assert "[http://example.com/ Example Web Page]" == str(node4) def test_children(): """test ExternalLink.__children__()""" node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) node2 = ExternalLink( wraptext("http://example.com/"), wrap([Text("Example"), Text("Page")]) ) gen1 = node1.__children__() gen2 = node2.__children__() assert node1.url == next(gen1) assert node2.url == next(gen2) assert node2.title == next(gen2) with pytest.raises(StopIteration): next(gen1) with pytest.raises(StopIteration): next(gen2) def test_strip(): """test ExternalLink.__strip__()""" node1 = ExternalLink(wraptext("http://example.com"), brackets=False) node2 = ExternalLink(wraptext("http://example.com")) node3 = ExternalLink(wraptext("http://example.com"), wrap([])) node4 = ExternalLink(wraptext("http://example.com"), wraptext("Link")) assert "http://example.com" == node1.__strip__() assert node2.__strip__() is None assert node3.__strip__() is None assert "Link" == node4.__strip__() def test_showtree(): """test ExternalLink.__showtree__()""" output = [] getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) node1 = ExternalLink(wraptext("http://example.com"), brackets=False) node2 = ExternalLink(wraptext("http://example.com"), wraptext("Link")) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) valid = [(getter, node1.url), "[", (getter, node2.url), (getter, node2.title), "]"] assert valid == output def test_url(): """test getter/setter for the url attribute""" url = wraptext("http://example.com/") node1 = ExternalLink(url, brackets=False) node2 = ExternalLink(url, wraptext("Example")) assert url is node1.url assert url is node2.url node1.url = "mailto:héhehé@spam.com" node2.url = "mailto:héhehé@spam.com" assert_wikicode_equal(wraptext("mailto:héhehé@spam.com"), node1.url) assert_wikicode_equal(wraptext("mailto:héhehé@spam.com"), node2.url) def test_title(): """test getter/setter for the title attribute""" title = wraptext("Example!") node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) node2 = ExternalLink(wraptext("http://example.com/"), title) assert None is node1.title assert title is node2.title node2.title = None assert None is node2.title node2.title = "My Website" assert_wikicode_equal(wraptext("My Website"), node2.title) def test_brackets(): """test getter/setter for the brackets attribute""" node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) node2 = ExternalLink(wraptext("http://example.com/"), wraptext("Link")) assert node1.brackets is False assert node2.brackets is True node1.brackets = True node2.brackets = False assert node1.brackets is True assert node2.brackets is False assert "[http://example.com/]" == str(node1) assert "http://example.com/" == str(node2) mwparserfromhell-0.6.3/tests/test_heading.py000066400000000000000000000057761411406531600213030ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the Heading node. """ import pytest from mwparserfromhell.nodes import Heading, Text from .conftest import assert_wikicode_equal, wrap, wraptext def test_str(): """test Heading.__str__()""" node = Heading(wraptext("foobar"), 2) assert "==foobar==" == str(node) node2 = Heading(wraptext(" zzz "), 5) assert "===== zzz =====" == str(node2) def test_children(): """test Heading.__children__()""" node = Heading(wrap([Text("foo"), Text("bar")]), 3) gen = node.__children__() assert node.title == next(gen) with pytest.raises(StopIteration): next(gen) def test_strip(): """test Heading.__strip__()""" node = Heading(wraptext("foobar"), 3) assert "foobar" == node.__strip__() def test_showtree(): """test Heading.__showtree__()""" output = [] getter = object() get = lambda code: output.append((getter, code)) node1 = Heading(wraptext("foobar"), 3) node2 = Heading(wraptext(" baz "), 4) node1.__showtree__(output.append, get, None) node2.__showtree__(output.append, get, None) valid = ["===", (getter, node1.title), "===", "====", (getter, node2.title), "===="] assert valid == output def test_title(): """test getter/setter for the title attribute""" title = wraptext("foobar") node = Heading(title, 3) assert title is node.title node.title = "héhehé" assert_wikicode_equal(wraptext("héhehé"), node.title) def test_level(): """test getter/setter for the level attribute""" node = Heading(wraptext("foobar"), 3) assert 3 == node.level node.level = 5 assert 5 == node.level with pytest.raises(ValueError): node.__setattr__("level", 0) with pytest.raises(ValueError): node.__setattr__("level", 7) with pytest.raises(ValueError): node.__setattr__("level", "abc") with pytest.raises(ValueError): node.__setattr__("level", False) mwparserfromhell-0.6.3/tests/test_html_entity.py000066400000000000000000000146531411406531600222360ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the HTMLEntity node. """ import pytest from mwparserfromhell.nodes import HTMLEntity def test_str(): """test HTMLEntity.__str__()""" node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) node2 = HTMLEntity("107", named=False, hexadecimal=False) node3 = HTMLEntity("6b", named=False, hexadecimal=True) node4 = HTMLEntity("6C", named=False, hexadecimal=True, hex_char="X") assert " " == str(node1) assert "k" == str(node2) assert "k" == str(node3) assert "l" == str(node4) def test_children(): """test HTMLEntity.__children__()""" node = HTMLEntity("nbsp", named=True, hexadecimal=False) gen = node.__children__() with pytest.raises(StopIteration): next(gen) def test_strip(): """test HTMLEntity.__strip__()""" node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) node2 = HTMLEntity("107", named=False, hexadecimal=False) node3 = HTMLEntity("e9", named=False, hexadecimal=True) assert "\xa0" == node1.__strip__(normalize=True) assert " " == node1.__strip__(normalize=False) assert "k" == node2.__strip__(normalize=True) assert "k" == node2.__strip__(normalize=False) assert "é" == node3.__strip__(normalize=True) assert "é" == node3.__strip__(normalize=False) def test_showtree(): """test HTMLEntity.__showtree__()""" output = [] node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) node2 = HTMLEntity("107", named=False, hexadecimal=False) node3 = HTMLEntity("e9", named=False, hexadecimal=True) node1.__showtree__(output.append, None, None) node2.__showtree__(output.append, None, None) node3.__showtree__(output.append, None, None) res = [" ", "k", "é"] assert res == output def test_value(): """test getter/setter for the value attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") assert "nbsp" == node1.value assert "107" == node2.value assert "e9" == node3.value node1.value = "ffa4" node2.value = 72 node3.value = "Sigma" assert "ffa4" == node1.value assert node1.named is False assert node1.hexadecimal is True assert "72" == node2.value assert node2.named is False assert node2.hexadecimal is False assert "Sigma" == node3.value assert node3.named is True assert node3.hexadecimal is False node1.value = "10FFFF" node2.value = 110000 node2.value = 1114111 with pytest.raises(ValueError): node3.__setattr__("value", "") with pytest.raises(ValueError): node3.__setattr__("value", "foobar") with pytest.raises(ValueError): node3.__setattr__("value", True) with pytest.raises(ValueError): node3.__setattr__("value", -1) with pytest.raises(ValueError): node1.__setattr__("value", 110000) with pytest.raises(ValueError): node1.__setattr__("value", "1114112") with pytest.raises(ValueError): node1.__setattr__("value", "12FFFF") def test_named(): """test getter/setter for the named attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") assert node1.named is True assert node2.named is False assert node3.named is False node1.named = 1 node2.named = 0 node3.named = 0 assert node1.named is True assert node2.named is False assert node3.named is False with pytest.raises(ValueError): node1.__setattr__("named", False) with pytest.raises(ValueError): node2.__setattr__("named", True) with pytest.raises(ValueError): node3.__setattr__("named", True) def test_hexadecimal(): """test getter/setter for the hexadecimal attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") assert node1.hexadecimal is False assert node2.hexadecimal is False assert node3.hexadecimal is True node1.hexadecimal = False node2.hexadecimal = True node3.hexadecimal = False assert node1.hexadecimal is False assert node2.hexadecimal is True assert node3.hexadecimal is False with pytest.raises(ValueError): node1.__setattr__("hexadecimal", True) def test_hex_char(): """test getter/setter for the hex_char attribute""" node1 = HTMLEntity("e9") node2 = HTMLEntity("e9", hex_char="X") assert "x" == node1.hex_char assert "X" == node2.hex_char node1.hex_char = "X" node2.hex_char = "x" assert "X" == node1.hex_char assert "x" == node2.hex_char with pytest.raises(ValueError): node1.__setattr__("hex_char", 123) with pytest.raises(ValueError): node1.__setattr__("hex_char", "foobar") with pytest.raises(ValueError): node1.__setattr__("hex_char", True) def test_normalize(): """test getter/setter for the normalize attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") node4 = HTMLEntity("1f648") node5 = HTMLEntity("-2") node6 = HTMLEntity("110000", named=False, hexadecimal=True) assert "\xa0" == node1.normalize() assert "k" == node2.normalize() assert "é" == node3.normalize() assert "\U0001F648" == node4.normalize() with pytest.raises(ValueError): node5.normalize() with pytest.raises(ValueError): node6.normalize() mwparserfromhell-0.6.3/tests/test_parameter.py000066400000000000000000000052641411406531600216540ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the Parameter node extra. """ import pytest from mwparserfromhell.nodes.extras import Parameter from .conftest import assert_wikicode_equal, wraptext def test_str(): """test Parameter.__str__()""" node = Parameter(wraptext("1"), wraptext("foo"), showkey=False) assert "foo" == str(node) node2 = Parameter(wraptext("foo"), wraptext("bar")) assert "foo=bar" == str(node2) def test_name(): """test getter/setter for the name attribute""" name1 = wraptext("1") name2 = wraptext("foobar") node1 = Parameter(name1, wraptext("foobar"), showkey=False) node2 = Parameter(name2, wraptext("baz")) assert name1 is node1.name assert name2 is node2.name node1.name = "héhehé" node2.name = "héhehé" assert_wikicode_equal(wraptext("héhehé"), node1.name) assert_wikicode_equal(wraptext("héhehé"), node2.name) def test_value(): """test getter/setter for the value attribute""" value = wraptext("bar") node = Parameter(wraptext("foo"), value) assert value is node.value node.value = "héhehé" assert_wikicode_equal(wraptext("héhehé"), node.value) def test_showkey(): """test getter/setter for the showkey attribute""" node1 = Parameter(wraptext("1"), wraptext("foo"), showkey=False) node2 = Parameter(wraptext("foo"), wraptext("bar")) assert node1.showkey is False assert node2.showkey is True node1.showkey = True assert node1.showkey is True node1.showkey = "" assert node1.showkey is False with pytest.raises(ValueError): node2.__setattr__("showkey", False) mwparserfromhell-0.6.3/tests/test_parser.py000066400000000000000000000072371411406531600211720ustar00rootroot00000000000000# Copyright (C) 2012-2016 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Tests for the Parser class itself, which tokenizes and builds nodes. """ import pytest from mwparserfromhell import parser from mwparserfromhell.nodes import Tag, Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter from .conftest import assert_wikicode_equal, wrap, wraptext @pytest.fixture() def pyparser(): """make sure the correct tokenizer is used""" restore = parser.use_c if parser.use_c: parser.use_c = False yield parser.use_c = restore def test_use_c(pyparser): assert parser.Parser()._tokenizer.USES_C is False def test_parsing(pyparser): """integration test for parsing overall""" text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" expected = wrap( [ Text("this is text; "), Template( wraptext("this"), [ Parameter(wraptext("is"), wraptext("a")), Parameter( wraptext("template"), wrap( [ Template( wraptext("with"), [ Parameter( wraptext("1"), wrap([Wikilink(wraptext("links"))]), showkey=False, ), Parameter( wraptext("2"), wraptext("in"), showkey=False ), ], ), Text("it"), ] ), ), ], ), ] ) actual = parser.Parser().parse(text) assert_wikicode_equal(expected, actual) def test_skip_style_tags(pyparser): """test Parser.parse(skip_style_tags=True)""" text = "This is an example with ''italics''!" a = wrap( [ Text("This is an example with "), Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"), Text("!"), ] ) b = wraptext("This is an example with ''italics''!") with_style = parser.Parser().parse(text, skip_style_tags=False) without_style = parser.Parser().parse(text, skip_style_tags=True) assert_wikicode_equal(a, with_style) assert_wikicode_equal(b, without_style) mwparserfromhell-0.6.3/tests/test_smart_list.py000066400000000000000000000315021411406531600220470ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the SmartList class and its child, ListProxy. """ import pytest from mwparserfromhell.smart_list import SmartList from mwparserfromhell.smart_list.list_proxy import ListProxy def _test_get_set_del_item(builder): """Run tests on __get/set/delitem__ of a list built with *builder*.""" list1 = builder([0, 1, 2, 3, "one", "two"]) list2 = builder(list(range(10))) assert 1 == list1[1] assert "one" == list1[-2] assert [2, 3] == list1[2:4] with pytest.raises(IndexError): list1[6] with pytest.raises(IndexError): list1[-7] assert [0, 1, 2] == list1[:3] assert [0, 1, 2, 3, "one", "two"] == list1[:] assert [3, "one", "two"] == list1[3:] assert [3, "one", "two"] == list1[3:100] assert ["one", "two"] == list1[-2:] assert [0, 1] == list1[:-4] assert [] == list1[6:] assert [] == list1[4:2] assert [0, 2, "one"] == list1[0:5:2] assert [0, 2] == list1[0:-3:2] assert [0, 1, 2, 3, "one", "two"] == list1[::] assert [2, 3, "one", "two"] == list1[2::] assert [0, 1, 2, 3] == list1[:4:] assert [2, 3] == list1[2:4:] assert [0, 2, 4, 6, 8] == list2[::2] assert [2, 5, 8] == list2[2::3] assert [0, 3] == list2[:6:3] assert [2, 5, 8] == list2[-8:9:3] assert [] == list2[100000:1000:-100] list1[3] = 100 assert 100 == list1[3] list1[-3] = 101 assert [0, 1, 2, 101, "one", "two"] == list1 list1[5:] = [6, 7, 8] assert [6, 7, 8] == list1[5:] assert [0, 1, 2, 101, "one", 6, 7, 8] == list1 list1[2:4] = [-1, -2, -3, -4, -5] assert [0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8] == list1 list1[0:-3] = [99] assert [99, 6, 7, 8] == list1 list2[0:6:2] = [100, 102, 104] assert [100, 1, 102, 3, 104, 5, 6, 7, 8, 9] == list2 list2[::3] = [200, 203, 206, 209] assert [200, 1, 102, 203, 104, 5, 206, 7, 8, 209] == list2 list2[::] = range(7) assert [0, 1, 2, 3, 4, 5, 6] == list2 with pytest.raises(ValueError): list2[0:5:2] = [100, 102, 104, 106] with pytest.raises(IndexError): list2[7] = "foo" with pytest.raises(IndexError): list2[-8] = "foo" del list2[2] assert [0, 1, 3, 4, 5, 6] == list2 del list2[-3] assert [0, 1, 3, 5, 6] == list2 with pytest.raises(IndexError): del list2[100] with pytest.raises(IndexError): del list2[-6] list2[:] = range(10) del list2[3:6] assert [0, 1, 2, 6, 7, 8, 9] == list2 del list2[-2:] assert [0, 1, 2, 6, 7] == list2 del list2[:2] assert [2, 6, 7] == list2 list2[:] = range(10) del list2[2:8:2] assert [0, 1, 3, 5, 7, 8, 9] == list2 def _test_add_radd_iadd(builder): """Run tests on __r/i/add__ of a list built with *builder*.""" list1 = builder(range(5)) list2 = builder(range(5, 10)) assert [0, 1, 2, 3, 4, 5, 6] == list1 + [5, 6] assert [0, 1, 2, 3, 4] == list1 assert list(range(10)) == list1 + list2 assert [-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1 assert [0, 1, 2, 3, 4] == list1 list1 += ["foo", "bar", "baz"] assert [0, 1, 2, 3, 4, "foo", "bar", "baz"] == list1 def _test_other_magic_methods(builder): """Run tests on other magic methods of a list built with *builder*.""" list1 = builder([0, 1, 2, 3, "one", "two"]) list2 = builder([]) list3 = builder([0, 2, 3, 4]) list4 = builder([0, 1, 2]) assert "[0, 1, 2, 3, 'one', 'two']" == str(list1) assert b"\x00\x01\x02" == bytes(list4) assert "[0, 1, 2, 3, 'one', 'two']" == repr(list1) assert list1 < list3 assert list1 <= list3 assert list1 != list3 assert list1 != list3 assert list1 <= list3 assert list1 < list3 other1 = [0, 2, 3, 4] assert list1 < other1 assert list1 <= other1 assert list1 != other1 assert list1 != other1 assert list1 <= other1 assert list1 < other1 other2 = [0, 0, 1, 2] assert list1 >= other2 assert list1 > other2 assert list1 != other2 assert list1 != other2 assert list1 > other2 assert list1 >= other2 other3 = [0, 1, 2, 3, "one", "two"] assert list1 >= other3 assert list1 <= other3 assert list1 == other3 assert list1 == other3 assert list1 <= other3 assert list1 >= other3 assert bool(list1) is True assert bool(list2) is False assert 6 == len(list1) assert 0 == len(list2) out = [] for obj in list1: out.append(obj) assert [0, 1, 2, 3, "one", "two"] == out out = [] for ch in list2: out.append(ch) assert [] == out gen1 = iter(list1) out = [] for _ in range(len(list1)): out.append(next(gen1)) with pytest.raises(StopIteration): next(gen1) assert [0, 1, 2, 3, "one", "two"] == out gen2 = iter(list2) with pytest.raises(StopIteration): next(gen2) assert ["two", "one", 3, 2, 1, 0] == list(reversed(list1)) assert [] == list(reversed(list2)) assert "one" in list1 assert 3 in list1 assert 10 not in list1 assert 0 not in list2 assert [] == list2 * 5 assert [] == 5 * list2 assert [0, 1, 2, 0, 1, 2, 0, 1, 2] == list4 * 3 assert [0, 1, 2, 0, 1, 2, 0, 1, 2] == 3 * list4 list4 *= 2 assert [0, 1, 2, 0, 1, 2] == list4 def _test_list_methods(builder): """Run tests on the public methods of a list built with *builder*.""" list1 = builder(range(5)) list2 = builder(["foo"]) list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) list1.append(5) list1.append(1) list1.append(2) assert [0, 1, 2, 3, 4, 5, 1, 2] == list1 assert 0 == list1.count(6) assert 2 == list1.count(1) list1.extend(range(5, 8)) assert [0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7] == list1 assert 1 == list1.index(1) assert 6 == list1.index(1, 3) assert 6 == list1.index(1, 3, 7) with pytest.raises(ValueError): list1.index(1, 3, 5) list1.insert(0, -1) assert [-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7] == list1 list1.insert(-1, 6.5) assert [-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7] == list1 list1.insert(13, 8) assert [-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8] == list1 assert 8 == list1.pop() assert 7 == list1.pop() assert [-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5] == list1 assert -1 == list1.pop(0) assert 5 == list1.pop(5) assert 6.5 == list1.pop(-1) assert [0, 1, 2, 3, 4, 1, 2, 5, 6] == list1 assert "foo" == list2.pop() with pytest.raises(IndexError): list2.pop() assert [] == list2 list1.remove(6) assert [0, 1, 2, 3, 4, 1, 2, 5] == list1 list1.remove(1) assert [0, 2, 3, 4, 1, 2, 5] == list1 list1.remove(1) assert [0, 2, 3, 4, 2, 5] == list1 with pytest.raises(ValueError): list1.remove(1) list1.reverse() assert [5, 2, 4, 3, 2, 0] == list1 list1.sort() assert [0, 2, 2, 3, 4, 5] == list1 list1.sort(reverse=True) assert [5, 4, 3, 2, 2, 0] == list1 list3.sort(key=lambda i: i[1]) assert [("d", 2), ("c", 3), ("a", 5), ("b", 8)] == list3 list3.sort(key=lambda i: i[1], reverse=True) assert [("b", 8), ("a", 5), ("c", 3), ("d", 2)] == list3 def _dispatch_test_for_children(meth): """Run a test method on various different types of children.""" meth(lambda L: SmartList(list(L))[:]) meth(lambda L: SmartList([999] + list(L))[1:]) meth(lambda L: SmartList(list(L) + [999])[:-1]) meth(lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]) def test_docs(): """make sure the methods of SmartList/ListProxy have docstrings""" methods = [ "append", "count", "extend", "index", "insert", "pop", "remove", "reverse", "sort", ] for meth in methods: expected = getattr(list, meth).__doc__ smartlist_doc = getattr(SmartList, meth).__doc__ listproxy_doc = getattr(ListProxy, meth).__doc__ assert expected == smartlist_doc assert expected == listproxy_doc def test_doctest(): """make sure the test embedded in SmartList's docstring passes""" parent = SmartList([0, 1, 2, 3]) assert [0, 1, 2, 3] == parent child = parent[2:] assert [2, 3] == child child.append(4) assert [2, 3, 4] == child assert [0, 1, 2, 3, 4] == parent def test_parent_get_set_del(): """make sure SmartList's getitem/setitem/delitem work""" _test_get_set_del_item(SmartList) def test_parent_add(): """make sure SmartList's add/radd/iadd work""" _test_add_radd_iadd(SmartList) def test_parent_other_magics(): """make sure SmartList's other magically implemented features work""" _test_other_magic_methods(SmartList) def test_parent_methods(): """make sure SmartList's non-magic methods work, like append()""" _test_list_methods(SmartList) def test_child_get_set_del(): """make sure ListProxy's getitem/setitem/delitem work""" _dispatch_test_for_children(_test_get_set_del_item) def test_child_add(): """make sure ListProxy's add/radd/iadd work""" _dispatch_test_for_children(_test_add_radd_iadd) def test_child_other_magics(): """make sure ListProxy's other magically implemented features work""" _dispatch_test_for_children(_test_other_magic_methods) def test_child_methods(): """make sure ListProxy's non-magic methods work, like append()""" _dispatch_test_for_children(_test_list_methods) def test_influence(): """make sure changes are propagated from parents to children""" parent = SmartList([0, 1, 2, 3, 4, 5]) child1 = parent[2:] child2 = parent[2:5] assert [0, 1, 2, 3, 4, 5] == parent assert [2, 3, 4, 5] == child1 assert [2, 3, 4] == child2 assert 2 == len(parent._children) parent.append(6) child1.append(7) child2.append(4.5) assert [0, 1, 2, 3, 4, 4.5, 5, 6, 7] == parent assert [2, 3, 4, 4.5, 5, 6, 7] == child1 assert [2, 3, 4, 4.5] == child2 parent.insert(0, -1) parent.insert(4, 2.5) parent.insert(10, 6.5) assert [-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7] == parent assert [2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7] == child1 assert [2, 2.5, 3, 4, 4.5] == child2 assert 7 == parent.pop() assert 6.5 == child1.pop() assert 4.5 == child2.pop() assert [-1, 0, 1, 2, 2.5, 3, 4, 5, 6] == parent assert [2, 2.5, 3, 4, 5, 6] == child1 assert [2, 2.5, 3, 4] == child2 parent.remove(-1) child1.remove(2.5) assert [0, 1, 2, 3, 4, 5, 6] == parent assert [2, 3, 4, 5, 6] == child1 assert [2, 3, 4] == child2 assert 0 == parent.pop(0) assert [1, 2, 3, 4, 5, 6] == parent assert [2, 3, 4, 5, 6] == child1 assert [2, 3, 4] == child2 child2.reverse() assert [1, 4, 3, 2, 5, 6] == parent assert [4, 3, 2, 5, 6] == child1 assert [4, 3, 2] == child2 parent.extend([7, 8]) child1.extend([8.1, 8.2]) child2.extend([1.9, 1.8]) assert [1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2] == parent assert [4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2] == child1 assert [4, 3, 2, 1.9, 1.8] == child2 child3 = parent[9:] assert [8, 8.1, 8.2] == child3 del parent[8:] assert [1, 4, 3, 2, 1.9, 1.8, 5, 6] == parent assert [4, 3, 2, 1.9, 1.8, 5, 6] == child1 assert [4, 3, 2, 1.9, 1.8] == child2 assert [] == child3 assert 0 == len(child3) del child1 assert [1, 4, 3, 2, 1.9, 1.8, 5, 6] == parent assert [4, 3, 2, 1.9, 1.8] == child2 assert [] == child3 assert 2 == len(parent._children) del child3 assert [1, 4, 3, 2, 1.9, 1.8, 5, 6] == parent assert [4, 3, 2, 1.9, 1.8] == child2 assert 1 == len(parent._children) parent.remove(1.9) parent.remove(1.8) assert [1, 4, 3, 2, 5, 6] == parent assert [4, 3, 2] == child2 parent.reverse() assert [6, 5, 2, 3, 4, 1] == parent assert [4, 3, 2] == child2 assert 0 == len(parent._children) mwparserfromhell-0.6.3/tests/test_string_mixin.py000066400000000000000000000343541411406531600224100ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the StringMixIn class. """ import sys from types import GeneratorType import pytest from mwparserfromhell.string_mixin import StringMixIn class _FakeString(StringMixIn): def __init__(self, data): self._data = data def __str__(self): return self._data @pytest.mark.parametrize( "method", [ "capitalize", "casefold", "center", "count", "encode", "endswith", "expandtabs", "find", "format", "format_map", "index", "isalnum", "isalpha", "isdecimal", "isdigit", "isidentifier", "islower", "isnumeric", "isprintable", "isspace", "istitle", "isupper", "join", "ljust", "lower", "lstrip", "maketrans", "partition", "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", "title", "translate", "upper", "zfill", ], ) def test_docs(method): """make sure the various methods of StringMixIn have docstrings""" expected = getattr("foo", method).__doc__ actual = getattr(_FakeString("foo"), method).__doc__ assert expected == actual def test_types(): """make sure StringMixIns convert to different types correctly""" fstr = _FakeString("fake string") assert str(fstr) == "fake string" assert bytes(fstr) == b"fake string" assert repr(fstr) == "'fake string'" assert isinstance(str(fstr), str) assert isinstance(bytes(fstr), bytes) assert isinstance(repr(fstr), str) def test_comparisons(): """make sure comparison operators work""" str1 = _FakeString("this is a fake string") str2 = _FakeString("this is a fake string") str3 = _FakeString("fake string, this is") str4 = "this is a fake string" str5 = "fake string, this is" assert str1 <= str2 assert str1 >= str2 assert str1 == str2 assert str1 == str2 assert str1 >= str2 assert str1 <= str2 assert str1 > str3 assert str1 >= str3 assert str1 != str3 assert str1 != str3 assert str1 >= str3 assert str1 > str3 assert str1 <= str4 assert str1 >= str4 assert str1 == str4 assert str1 == str4 assert str1 >= str4 assert str1 <= str4 assert str5 <= str1 assert str5 < str1 assert str5 != str1 assert str5 != str1 assert str5 < str1 assert str5 <= str1 def test_other_magics(): """test other magically implemented features, like len() and iter()""" str1 = _FakeString("fake string") str2 = _FakeString("") expected = ["f", "a", "k", "e", " ", "s", "t", "r", "i", "n", "g"] assert bool(str1) is True assert bool(str2) is False assert 11 == len(str1) assert 0 == len(str2) out = [] for ch in str1: out.append(ch) assert expected == out out = [] for ch in str2: out.append(ch) assert [] == out gen1 = iter(str1) gen2 = iter(str2) assert isinstance(gen1, GeneratorType) assert isinstance(gen2, GeneratorType) out = [] for _ in range(len(str1)): out.append(next(gen1)) with pytest.raises(StopIteration): next(gen1) assert expected == out with pytest.raises(StopIteration): next(gen2) assert "gnirts ekaf" == "".join(list(reversed(str1))) assert [] == list(reversed(str2)) assert "f" == str1[0] assert " " == str1[4] assert "g" == str1[10] assert "n" == str1[-2] with pytest.raises(IndexError): str1[11] with pytest.raises(IndexError): str2[0] assert "k" in str1 assert "fake" in str1 assert "str" in str1 assert "" in str1 assert "" in str2 assert "real" not in str1 assert "s" not in str2 def test_other_methods(): """test the remaining non-magic methods of StringMixIn""" str1 = _FakeString("fake string") assert "Fake string" == str1.capitalize() assert " fake string " == str1.center(15) assert " fake string " == str1.center(16) assert "qqfake stringqq" == str1.center(15, "q") assert 1 == str1.count("e") assert 0 == str1.count("z") assert 1 == str1.count("r", 7) assert 0 == str1.count("r", 8) assert 1 == str1.count("r", 5, 9) assert 0 == str1.count("r", 5, 7) str3 = _FakeString("𐌲𐌿𐍄") actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" assert b"fake string" == str1.encode() assert actual == str3.encode("utf-8") assert actual == str3.encode(encoding="utf-8") if sys.getdefaultencoding() == "ascii": with pytest.raises(UnicodeEncodeError): str3.encode() elif sys.getdefaultencoding() == "utf-8": assert actual == str3.encode() with pytest.raises(UnicodeEncodeError): str3.encode("ascii") with pytest.raises(UnicodeEncodeError): str3.encode("ascii", "strict") if sys.getdefaultencoding() == "ascii": with pytest.raises(UnicodeEncodeError): str3.encode("ascii", errors="strict") elif sys.getdefaultencoding() == "utf-8": assert actual == str3.encode(errors="strict") assert b"" == str3.encode("ascii", "ignore") if sys.getdefaultencoding() == "ascii": assert b"" == str3.encode(errors="ignore") elif sys.getdefaultencoding() == "utf-8": assert actual == str3.encode(errors="ignore") assert str1.endswith("ing") is True assert str1.endswith("ingh") is False str4 = _FakeString("\tfoobar") assert "fake string" == str1 assert " foobar" == str4.expandtabs() assert " foobar" == str4.expandtabs(4) assert 3 == str1.find("e") assert -1 == str1.find("z") assert 7 == str1.find("r", 7) assert -1 == str1.find("r", 8) assert 7 == str1.find("r", 5, 9) assert -1 == str1.find("r", 5, 7) str5 = _FakeString("foo{0}baz") str6 = _FakeString("foo{abc}baz") str7 = _FakeString("foo{0}{abc}buzz") str8 = _FakeString("{0}{1}") assert "fake string" == str1.format() assert "foobarbaz" == str5.format("bar") assert "foobarbaz" == str6.format(abc="bar") assert "foobarbazbuzz" == str7.format("bar", abc="baz") with pytest.raises(IndexError): str8.format("abc") assert "fake string" == str1.format_map({}) assert "foobarbaz" == str6.format_map({"abc": "bar"}) with pytest.raises(ValueError): str5.format_map({0: "abc"}) assert 3 == str1.index("e") with pytest.raises(ValueError): str1.index("z") assert 7 == str1.index("r", 7) with pytest.raises(ValueError): str1.index("r", 8) assert 7 == str1.index("r", 5, 9) with pytest.raises(ValueError): str1.index("r", 5, 7) str9 = _FakeString("foobar") str10 = _FakeString("foobar123") str11 = _FakeString("foo bar") assert str9.isalnum() is True assert str10.isalnum() is True assert str11.isalnum() is False assert str9.isalpha() is True assert str10.isalpha() is False assert str11.isalpha() is False str12 = _FakeString("123") str13 = _FakeString("\u2155") str14 = _FakeString("\u00B2") assert str9.isdecimal() is False assert str12.isdecimal() is True assert str13.isdecimal() is False assert str14.isdecimal() is False assert str9.isdigit() is False assert str12.isdigit() is True assert str13.isdigit() is False assert str14.isdigit() is True assert str9.isidentifier() is True assert str10.isidentifier() is True assert str11.isidentifier() is False assert str12.isidentifier() is False str15 = _FakeString("") str16 = _FakeString("FooBar") assert str9.islower() is True assert str15.islower() is False assert str16.islower() is False assert str9.isnumeric() is False assert str12.isnumeric() is True assert str13.isnumeric() is True assert str14.isnumeric() is True str16B = _FakeString("\x01\x02") assert str9.isprintable() is True assert str13.isprintable() is True assert str14.isprintable() is True assert str15.isprintable() is True assert str16B.isprintable() is False str17 = _FakeString(" ") str18 = _FakeString("\t \t \r\n") assert str1.isspace() is False assert str9.isspace() is False assert str17.isspace() is True assert str18.isspace() is True str19 = _FakeString("This Sentence Looks Like A Title") str20 = _FakeString("This sentence doesn't LookLikeATitle") assert str15.istitle() is False assert str19.istitle() is True assert str20.istitle() is False str21 = _FakeString("FOOBAR") assert str9.isupper() is False assert str15.isupper() is False assert str21.isupper() is True assert "foobar" == str15.join(["foo", "bar"]) assert "foo123bar123baz" == str12.join(("foo", "bar", "baz")) assert "fake string " == str1.ljust(15) assert "fake string " == str1.ljust(16) assert "fake stringqqqq" == str1.ljust(15, "q") str22 = _FakeString("ß") assert "" == str15.lower() assert "foobar" == str16.lower() assert "ß" == str22.lower() assert "" == str15.casefold() assert "foobar" == str16.casefold() assert "ss" == str22.casefold() str23 = _FakeString(" fake string ") assert "fake string" == str1.lstrip() assert "fake string " == str23.lstrip() assert "ke string" == str1.lstrip("abcdef") assert ("fa", "ke", " string") == str1.partition("ke") assert ("fake string", "", "") == str1.partition("asdf") str24 = _FakeString("boo foo moo") assert "real string" == str1.replace("fake", "real") assert "bu fu moo" == str24.replace("oo", "u", 2) assert 3 == str1.rfind("e") assert -1 == str1.rfind("z") assert 7 == str1.rfind("r", 7) assert -1 == str1.rfind("r", 8) assert 7 == str1.rfind("r", 5, 9) assert -1 == str1.rfind("r", 5, 7) assert 3 == str1.rindex("e") with pytest.raises(ValueError): str1.rindex("z") assert 7 == str1.rindex("r", 7) with pytest.raises(ValueError): str1.rindex("r", 8) assert 7 == str1.rindex("r", 5, 9) with pytest.raises(ValueError): str1.rindex("r", 5, 7) assert " fake string" == str1.rjust(15) assert " fake string" == str1.rjust(16) assert "qqqqfake string" == str1.rjust(15, "q") assert ("fa", "ke", " string") == str1.rpartition("ke") assert ("", "", "fake string") == str1.rpartition("asdf") str25 = _FakeString(" this is a sentence with whitespace ") actual = ["this", "is", "a", "sentence", "with", "whitespace"] assert actual == str25.rsplit() assert actual == str25.rsplit(None) actual = [ "", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", "", ] assert actual == str25.rsplit(" ") actual = [" this is a", "sentence", "with", "whitespace"] assert actual == str25.rsplit(None, 3) actual = [" this is a sentence with", "", "whitespace", ""] assert actual == str25.rsplit(" ", 3) actual = [" this is a", "sentence", "with", "whitespace"] assert actual == str25.rsplit(maxsplit=3) assert "fake string" == str1.rstrip() assert " fake string" == str23.rstrip() assert "fake stri" == str1.rstrip("ngr") actual = ["this", "is", "a", "sentence", "with", "whitespace"] assert actual == str25.split() assert actual == str25.split(None) actual = [ "", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", "", ] assert actual == str25.split(" ") actual = ["this", "is", "a", "sentence with whitespace "] assert actual == str25.split(None, 3) actual = ["", "", "", "this is a sentence with whitespace "] assert actual == str25.split(" ", 3) actual = ["this", "is", "a", "sentence with whitespace "] assert actual == str25.split(maxsplit=3) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") assert ["lines", "of", "text", "are", "presented", "here"] == str26.splitlines() assert [ "lines\n", "of\n", "text\r\n", "are\r\n", "presented\n", "here", ] == str26.splitlines(True) assert str1.startswith("fake") is True assert str1.startswith("faker") is False assert "fake string" == str1.strip() assert "fake string" == str23.strip() assert "ke stri" == str1.strip("abcdefngr") assert "fOObAR" == str16.swapcase() assert "Fake String" == str1.title() table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", 111: "4", 117: "5"}) table2 = StringMixIn.maketrans("aeiou", "12345") table3 = StringMixIn.maketrans("aeiou", "12345", "rts") assert "f1k2 str3ng" == str1.translate(table1) assert "f1k2 str3ng" == str1.translate(table2) assert "f1k2 3ng" == str1.translate(table3) assert "" == str15.upper() assert "FOOBAR" == str16.upper() assert "123" == str12.zfill(3) assert "000123" == str12.zfill(6) mwparserfromhell-0.6.3/tests/test_tag.py000066400000000000000000000331111411406531600204370ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the Tag node. """ import pytest from mwparserfromhell.nodes import Tag, Template, Text from mwparserfromhell.nodes.extras import Attribute from .conftest import assert_wikicode_equal, wrap, wraptext agen = lambda name, value: Attribute(wraptext(name), wraptext(value)) agennv = lambda name: Attribute(wraptext(name)) agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None) agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c) agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c) def test_str(): """test Tag.__str__()""" node1 = Tag(wraptext("ref")) node2 = Tag(wraptext("span"), wraptext("foo"), [agen("style", "color: red;")]) node3 = Tag( wraptext("ref"), attrs=[agennq("name", "foo"), agenpnv("some_attr", " ", "", "")], self_closing=True, ) node4 = Tag(wraptext("br"), self_closing=True, padding=" ") node5 = Tag(wraptext("br"), self_closing=True, implicit=True) node6 = Tag(wraptext("br"), self_closing=True, invalid=True, implicit=True) node7 = Tag(wraptext("br"), self_closing=True, invalid=True, padding=" ") node8 = Tag(wraptext("hr"), wiki_markup="----", self_closing=True) node9 = Tag(wraptext("i"), wraptext("italics!"), wiki_markup="''") assert "" == str(node1) assert 'foo' == str(node2) assert "" == str(node3) assert "
    " == str(node4) assert "
    " == str(node5) assert "
    " == str(node6) assert "
    " == str(node7) assert "----" == str(node8) assert "''italics!''" == str(node9) def test_children(): """test Tag.__children__()""" # foobar node1 = Tag(wraptext("ref"), wraptext("foobar")) # '''bold text''' node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''") # node3 = Tag( wraptext("img"), attrs=[agen("id", "foo"), agen("class", "bar"), agennv("selected")], self_closing=True, padding=" ", ) gen1 = node1.__children__() gen2 = node2.__children__() gen3 = node3.__children__() assert node1.tag == next(gen1) assert node3.tag == next(gen3) assert node3.attributes[0].name == next(gen3) assert node3.attributes[0].value == next(gen3) assert node3.attributes[1].name == next(gen3) assert node3.attributes[1].value == next(gen3) assert node3.attributes[2].name == next(gen3) assert node1.contents == next(gen1) assert node2.contents == next(gen2) assert node1.closing_tag == next(gen1) with pytest.raises(StopIteration): next(gen1) with pytest.raises(StopIteration): next(gen2) with pytest.raises(StopIteration): next(gen3) def test_strip(): """test Tag.__strip__()""" node1 = Tag(wraptext("i"), wraptext("foobar")) node2 = Tag(wraptext("math"), wraptext("foobar")) node3 = Tag(wraptext("br"), self_closing=True) assert "foobar" == node1.__strip__() assert node2.__strip__() is None assert node3.__strip__() is None def test_showtree(): """test Tag.__showtree__()""" output = [] getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) node1 = Tag( wraptext("ref"), wraptext("text"), [agen("name", "foo"), agennv("selected")] ) node2 = Tag(wraptext("br"), self_closing=True, padding=" ") node3 = Tag( wraptext("br"), self_closing=True, invalid=True, implicit=True, padding=" " ) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) node3.__showtree__(output.append, get, mark) valid = [ "<", (getter, node1.tag), (getter, node1.attributes[0].name), " = ", marker, (getter, node1.attributes[0].value), (getter, node1.attributes[1].name), ">", (getter, node1.contents), "", "<", (getter, node2.tag), "/>", "", ] assert valid == output def test_tag(): """test getter/setter for the tag attribute""" tag = wraptext("ref") node = Tag(tag, wraptext("text")) assert tag is node.tag assert tag is node.closing_tag node.tag = "span" assert_wikicode_equal(wraptext("span"), node.tag) assert_wikicode_equal(wraptext("span"), node.closing_tag) assert "text" == node def test_contents(): """test getter/setter for the contents attribute""" contents = wraptext("text") node = Tag(wraptext("ref"), contents) assert contents is node.contents node.contents = "text and a {{template}}" parsed = wrap([Text("text and a "), Template(wraptext("template"))]) assert_wikicode_equal(parsed, node.contents) assert "text and a {{template}}" == node def test_attributes(): """test getter for the attributes attribute""" attrs = [agen("name", "bar")] node1 = Tag(wraptext("ref"), wraptext("foo")) node2 = Tag(wraptext("ref"), wraptext("foo"), attrs) assert [] == node1.attributes assert attrs is node2.attributes def test_wiki_markup(): """test getter/setter for the wiki_markup attribute""" node = Tag(wraptext("i"), wraptext("italic text")) assert None is node.wiki_markup node.wiki_markup = "''" assert "''" == node.wiki_markup assert "''italic text''" == node node.wiki_markup = False assert node.wiki_markup is None assert "italic text" == node def test_self_closing(): """test getter/setter for the self_closing attribute""" node = Tag(wraptext("ref"), wraptext("foobar")) assert node.self_closing is False node.self_closing = True assert node.self_closing is True assert "" == node node.self_closing = 0 assert node.self_closing is False assert "foobar" == node def test_invalid(): """test getter/setter for the invalid attribute""" node = Tag(wraptext("br"), self_closing=True, implicit=True) assert node.invalid is False node.invalid = True assert node.invalid is True assert "
    " == node node.invalid = 0 assert node.invalid is False assert "
    " == node def test_implicit(): """test getter/setter for the implicit attribute""" node = Tag(wraptext("br"), self_closing=True) assert node.implicit is False node.implicit = True assert node.implicit is True assert "
    " == node node.implicit = 0 assert node.implicit is False assert "
    " == node def test_padding(): """test getter/setter for the padding attribute""" node = Tag(wraptext("ref"), wraptext("foobar")) assert "" == node.padding node.padding = " " assert " " == node.padding assert "foobar" == node node.padding = None assert "" == node.padding assert "foobar" == node with pytest.raises(ValueError): node.__setattr__("padding", True) def test_closing_tag(): """test getter/setter for the closing_tag attribute""" tag = wraptext("ref") node = Tag(tag, wraptext("foobar")) assert tag is node.closing_tag node.closing_tag = "ref {{ignore me}}" parsed = wrap([Text("ref "), Template(wraptext("ignore me"))]) assert_wikicode_equal(parsed, node.closing_tag) assert "foobar" == node def test_wiki_style_separator(): """test getter/setter for wiki_style_separator attribute""" node = Tag(wraptext("table"), wraptext("\n")) assert None is node.wiki_style_separator node.wiki_style_separator = "|" assert "|" == node.wiki_style_separator node.wiki_markup = "{" assert "{|\n{" == node node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|") assert "|" == node2.wiki_style_separator def test_closing_wiki_markup(): """test getter/setter for closing_wiki_markup attribute""" node = Tag(wraptext("table"), wraptext("\n")) assert None is node.closing_wiki_markup node.wiki_markup = "{|" assert "{|" == node.closing_wiki_markup node.closing_wiki_markup = "|}" assert "|}" == node.closing_wiki_markup assert "{|\n|}" == node node.wiki_markup = "!!" assert "|}" == node.closing_wiki_markup assert "!!\n|}" == node node.wiki_markup = False assert node.closing_wiki_markup is None assert "\n
    " == node node2 = Tag( wraptext("table"), wraptext("\n"), attrs=[agen("id", "foo")], wiki_markup="{|", closing_wiki_markup="|}", ) assert "|}" == node2.closing_wiki_markup assert '{| id="foo"\n|}' == node2 def test_has(): """test Tag.has()""" node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")]) assert node.has("name") is True assert node.has(" name ") is True assert node.has(wraptext("name")) is True assert node.has("Name") is False assert node.has("foo") is False attrs = [ agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t"), ] node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True) assert node2.has("id") is True assert node2.has("class") is True assert ( node2.has(attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq) is True ) assert node2.has(attrs[3]) is True assert node2.has(str(attrs[3])) is True assert node2.has("idclass") is False assert node2.has("id class") is False assert node2.has("id=foo") is False def test_get(): """test Tag.get()""" attrs = [agen("name", "foo")] node = Tag(wraptext("ref"), wraptext("cite"), attrs) assert attrs[0] is node.get("name") assert attrs[0] is node.get(" name ") assert attrs[0] is node.get(wraptext("name")) with pytest.raises(ValueError): node.get("Name") with pytest.raises(ValueError): node.get("foo") attrs = [ agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t"), ] node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True) assert attrs[0] is node2.get("id") assert attrs[1] is node2.get("class") assert attrs[1] is node2.get( attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq ) assert attrs[3] is node2.get(attrs[3]) assert attrs[3] is node2.get(str(attrs[3])) assert attrs[3] is node2.get(" foo") with pytest.raises(ValueError): node2.get("idclass") with pytest.raises(ValueError): node2.get("id class") with pytest.raises(ValueError): node2.get("id=foo") def test_add(): """test Tag.add()""" node = Tag(wraptext("ref"), wraptext("cite")) node.add("name", "value") node.add("name", "value", quotes=None) node.add("name", "value", quotes="'") node.add("name") node.add(1, False) node.add("style", "{{foobar}}") node.add("name", "value", '"', "\n", " ", " ") attr1 = ' name="value"' attr2 = " name=value" attr3 = " name='value'" attr4 = " name" attr5 = ' 1="False"' attr6 = ' style="{{foobar}}"' attr7 = '\nname = "value"' assert attr1 == node.attributes[0] assert attr2 == node.attributes[1] assert attr3 == node.attributes[2] assert attr4 == node.attributes[3] assert attr5 == node.attributes[4] assert attr6 == node.attributes[5] assert attr7 == node.attributes[6] assert attr7 == node.get("name") assert_wikicode_equal( wrap([Template(wraptext("foobar"))]), node.attributes[5].value ) assert ( "".join( ("cite
    ") ) == node ) with pytest.raises(ValueError): node.add("name", "foo", quotes="bar") with pytest.raises(ValueError): node.add("name", "a bc d", quotes=None) def test_remove(): """test Tag.remove()""" attrs = [ agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t"), ] node = Tag(wraptext("div"), attrs=attrs, self_closing=True) node.remove("class") assert '
    ' == node node.remove("foo") assert '
    ' == node with pytest.raises(ValueError): node.remove("foo") node.remove("id") assert "
    " == node mwparserfromhell-0.6.3/tests/test_template.py000066400000000000000000000610511411406531600215030ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the Template node. """ from difflib import unified_diff import pytest from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell import parse from .conftest import assert_wikicode_equal, wrap, wraptext pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) def test_str(): """test Template.__str__()""" node = Template(wraptext("foobar")) assert "{{foobar}}" == str(node) node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) assert "{{foo|bar|abc=def}}" == str(node2) def test_children(): """test Template.__children__()""" node2p1 = Parameter(wraptext("1"), wraptext("bar"), showkey=False) node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]), showkey=True) node1 = Template(wraptext("foobar")) node2 = Template(wraptext("foo"), [node2p1, node2p2]) gen1 = node1.__children__() gen2 = node2.__children__() assert node1.name == next(gen1) assert node2.name == next(gen2) assert node2.params[0].value == next(gen2) assert node2.params[1].name == next(gen2) assert node2.params[1].value == next(gen2) with pytest.raises(StopIteration): next(gen1) with pytest.raises(StopIteration): next(gen2) def test_strip(): """test Template.__strip__()""" node1 = Template(wraptext("foobar")) node2 = Template( wraptext("foo"), [pgenh("1", "bar"), pgens("foo", ""), pgens("abc", "def")] ) node3 = Template( wraptext("foo"), [ pgenh("1", "foo"), Parameter( wraptext("2"), wrap([Template(wraptext("hello"))]), showkey=False ), pgenh("3", "bar"), ], ) assert node1.__strip__(keep_template_params=False) is None assert node2.__strip__(keep_template_params=False) is None assert "" == node1.__strip__(keep_template_params=True) assert "bar def" == node2.__strip__(keep_template_params=True) assert "foo bar" == node3.__strip__(keep_template_params=True) def test_showtree(): """test Template.__showtree__()""" output = [] getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) node1 = Template(wraptext("foobar")) node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) valid = [ "{{", (getter, node1.name), "}}", "{{", (getter, node2.name), " | ", marker, (getter, node2.params[0].name), " = ", marker, (getter, node2.params[0].value), " | ", marker, (getter, node2.params[1].name), " = ", marker, (getter, node2.params[1].value), "}}", ] assert valid == output def test_name(): """test getter/setter for the name attribute""" name = wraptext("foobar") node1 = Template(name) node2 = Template(name, [pgenh("1", "bar")]) assert name is node1.name assert name is node2.name node1.name = "asdf" node2.name = "téstïng" assert_wikicode_equal(wraptext("asdf"), node1.name) assert_wikicode_equal(wraptext("téstïng"), node2.name) def test_params(): """test getter for the params attribute""" node1 = Template(wraptext("foobar")) plist = [pgenh("1", "bar"), pgens("abc", "def")] node2 = Template(wraptext("foo"), plist) assert [] == node1.params assert plist is node2.params def test_has(): """test Template.has()""" node1 = Template(wraptext("foobar")) node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("\nabc ", "def")]) node3 = Template( wraptext("foo"), [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")] ) node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) assert node1.has("foobar", False) is False assert node2.has(1, False) is True assert node2.has("abc", False) is True assert node2.has("def", False) is False assert node3.has("1", False) is True assert node3.has(" b ", False) is True assert node4.has("b", False) is True assert node3.has("b", True) is True assert node4.has("b", True) is False assert node1.has_param("foobar", False) is False assert node2.has_param(1, False) is True def test_get(): """test Template.get()""" node1 = Template(wraptext("foobar")) node2p1 = pgenh("1", "bar") node2p2 = pgens("abc", "def") node2 = Template(wraptext("foo"), [node2p1, node2p2]) node3p1 = pgens("b", "c") node3p2 = pgens("1", "d") node3 = Template(wraptext("foo"), [pgenh("1", "a"), node3p1, node3p2]) node4p1 = pgens(" b", " ") node4 = Template(wraptext("foo"), [pgenh("1", "a"), node4p1]) with pytest.raises(ValueError): node1.get("foobar") assert node2p1 is node2.get(1) assert node2p2 is node2.get("abc") with pytest.raises(ValueError): node2.get("def") assert node3p1 is node3.get("b") assert node3p2 is node3.get("1") assert node4p1 is node4.get("b ") def test_add(): """test Template.add()""" node1 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) node2 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) node3 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) node4 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) node5 = Template(wraptext("a"), [pgens("b", "c"), pgens(" d ", "e")]) node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"), pgens("b", "e")]) node7 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) node8p = pgenh("1", "d") node8 = Template(wraptext("a"), [pgens("b", "c"), node8p]) node9 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")]) node11 = Template(wraptext("a"), [pgens("b", "c")]) node12 = Template(wraptext("a"), [pgens("b", "c")]) node13 = Template( wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] ) node14 = Template( wraptext("a\n"), [ pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), pgens("h ", " i\n"), ], ) node15 = Template( wraptext("a"), [pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")], ) node16 = Template( wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] ) node17 = Template(wraptext("a"), [pgenh("1", "b")]) node18 = Template(wraptext("a"), [pgenh("1", "b")]) node19 = Template(wraptext("a"), [pgenh("1", "b")]) node20 = Template( wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), pgenh("3", "d"), pgenh("4", "e")], ) node21 = Template( wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), pgens("4", "d"), pgens("5", "e")], ) node22 = Template( wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), pgens("4", "d"), pgens("5", "e")], ) node23 = Template(wraptext("a"), [pgenh("1", "b")]) node24 = Template(wraptext("a"), [pgenh("1", "b")]) node25 = Template(wraptext("a"), [pgens("b", "c")]) node26 = Template(wraptext("a"), [pgenh("1", "b")]) node27 = Template(wraptext("a"), [pgenh("1", "b")]) node28 = Template(wraptext("a"), [pgens("1", "b")]) node29 = Template( wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] ) node30 = Template( wraptext("a\n"), [ pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), pgens("h ", " i\n"), ], ) node31 = Template( wraptext("a"), [pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")], ) node32 = Template( wraptext("a"), [pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")], ) node33 = Template( wraptext("a"), [ pgens("b", "c"), pgens("d", "e"), pgens("b", "f"), pgens("b", "h"), pgens("i", "j"), ], ) node34 = Template( wraptext("a"), [pgens("1", "b"), pgens("x", "y"), pgens("1", "c"), pgens("2", "d")], ) node35 = Template( wraptext("a"), [pgens("1", "b"), pgens("x", "y"), pgenh("1", "c"), pgenh("2", "d")], ) node36 = Template( wraptext("a"), [pgens("b", "c"), pgens("d", "e"), pgens("f", "g")] ) node37 = Template(wraptext("a"), [pgenh("1", "")]) node38 = Template(wraptext("abc")) node39 = Template(wraptext("a"), [pgenh("1", " b ")]) node40 = Template(wraptext("a"), [pgenh("1", " b"), pgenh("2", " c")]) node41 = Template(wraptext("a"), [pgens("1", " b"), pgens("2", " c")]) node42 = Template(wraptext("a"), [pgens("b", " \n")]) node1.add("e", "f", showkey=True) node2.add(2, "g", showkey=False) node3.add("e", "foo|bar", showkey=True) node4.add("e", "f", showkey=True, before="b") node5.add("f", "g", showkey=True, before=" d ") node6.add("f", "g", showkey=True, before="b") with pytest.raises(ValueError): node7.add("e", "f", showkey=True, before="q") node8.add("e", "f", showkey=True, before=node8p) node9.add("e", "f", showkey=True, before=pgenh("1", "d")) with pytest.raises(ValueError): node10.add("e", "f", showkey=True, before=pgenh("1", "d")) node11.add("d", "foo=bar", showkey=True) node12.add("1", "foo=bar", showkey=False) node13.add("h", "i", showkey=True) node14.add("j", "k", showkey=True) node15.add("h", "i", showkey=True) node16.add("h", "i", showkey=True, preserve_spacing=False) node17.add("2", "c") node18.add("3", "c") node19.add("c", "d") node20.add("5", "f") node21.add("3", "f") node22.add("6", "f") node23.add("c", "foo=bar") node24.add("2", "foo=bar") node25.add("b", "d") node26.add("1", "foo=bar") node27.add("1", "foo=bar", showkey=True) node28.add("1", "foo=bar", showkey=False) node29.add("d", "foo") node30.add("f", "foo") node31.add("f", "foo") node32.add("d", "foo", preserve_spacing=False) node33.add("b", "k") node34.add("1", "e") node35.add("1", "e") node36.add("d", "h", before="b") node37.add(1, "b") node38.add("1", "foo") with pytest.raises(ValueError): node38.add("z", "bar", showkey=False) node39.add("1", "c") node40.add("3", "d") node41.add("3", "d") node42.add("b", "hello") assert "{{a|b=c|d|e=f}}" == node1 assert "{{a|b=c|d|g}}" == node2 assert "{{a|b=c|d|e=foo|bar}}" == node3 assert isinstance(node3.params[2].value.get(1), HTMLEntity) assert "{{a|e=f|b=c|d}}" == node4 assert "{{a|b=c|f=g| d =e}}" == node5 assert "{{a|b=c|b=d|f=g|b=e}}" == node6 assert "{{a|b=c|d}}" == node7 assert "{{a|b=c|e=f|d}}" == node8 assert "{{a|b=c|e=f|d}}" == node9 assert "{{a|b=c|e}}" == node10 assert "{{a|b=c|d=foo=bar}}" == node11 assert "{{a|b=c|foo=bar}}" == node12 assert isinstance(node12.params[1].value.get(1), HTMLEntity) assert "{{a|\nb = c|\nd = e|\nf = g|\nh = i}}" == node13 assert "{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}" == node14 assert "{{a|b = c\n|\nd = e|\nf =g |\nh = i}}" == node15 assert "{{a|\nb = c|\nd = e|\nf = g|h=i}}" == node16 assert "{{a|b|c}}" == node17 assert "{{a|b|3=c}}" == node18 assert "{{a|b|c=d}}" == node19 assert "{{a|b|c|d|e|f}}" == node20 assert "{{a|b|c|4=d|5=e|f}}" == node21 assert "{{a|b|c|4=d|5=e|6=f}}" == node22 assert "{{a|b|c=foo=bar}}" == node23 assert "{{a|b|foo=bar}}" == node24 assert isinstance(node24.params[1].value.get(1), HTMLEntity) assert "{{a|b=d}}" == node25 assert "{{a|foo=bar}}" == node26 assert isinstance(node26.params[0].value.get(1), HTMLEntity) assert "{{a|1=foo=bar}}" == node27 assert "{{a|foo=bar}}" == node28 assert isinstance(node28.params[0].value.get(1), HTMLEntity) assert "{{a|\nb = c|\nd = foo|\nf = g}}" == node29 assert "{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}" == node30 assert "{{a|b = c\n|\nd = e|\nf =foo }}" == node31 assert "{{a|\nb = c |\nd =foo|\nf = g }}" == node32 assert "{{a|b=k|d=e|i=j}}" == node33 assert "{{a|1=e|x=y|2=d}}" == node34 assert "{{a|x=y|e|d}}" == node35 assert "{{a|b=c|d=h|f=g}}" == node36 assert "{{a|b}}" == node37 assert "{{abc|foo}}" == node38 assert "{{a|c}}" == node39 assert "{{a| b| c|d}}" == node40 assert "{{a|1= b|2= c|3= d}}" == node41 assert "{{a|b=hello \n}}" == node42 def test_remove(): """test Template.remove()""" node1 = Template(wraptext("foobar")) node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) node3 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) node4 = Template(wraptext("foo"), [pgenh("1", "bar"), pgenh("2", "baz")]) node5 = Template( wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] ) node6 = Template( wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] ) node7 = Template( wraptext("foo"), [pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")] ) node8 = Template( wraptext("foo"), [pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")] ) node9 = Template( wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] ) node10 = Template( wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] ) node11 = Template( wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] ) node12 = Template( wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] ) node13 = Template( wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] ) node14 = Template( wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] ) node15 = Template( wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] ) node16 = Template( wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] ) node17 = Template( wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] ) node18 = Template( wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] ) node19 = Template( wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] ) node20 = Template( wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] ) node21 = Template( wraptext("foo"), [ pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), pgens("a", "b"), ], ) node22 = Template( wraptext("foo"), [ pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), pgens("a", "b"), ], ) node23 = Template( wraptext("foo"), [ pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), pgens("a", "b"), ], ) node24 = Template( wraptext("foo"), [ pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), pgens("a", "b"), ], ) node25 = Template( wraptext("foo"), [ pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), pgens("a", "b"), ], ) node26 = Template( wraptext("foo"), [ pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), pgens("a", "b"), ], ) node27 = Template(wraptext("foo"), [pgenh("1", "bar")]) node28 = Template(wraptext("foo"), [pgenh("1", "bar")]) node2.remove("1") node2.remove("abc") node3.remove(1, keep_field=True) node3.remove("abc", keep_field=True) node4.remove("1", keep_field=False) node5.remove("a", keep_field=False) node6.remove("a", keep_field=True) node7.remove(1, keep_field=True) node8.remove(1, keep_field=False) node9.remove(1, keep_field=True) node10.remove(1, keep_field=False) node11.remove(node11.params[0], keep_field=False) node12.remove(node12.params[0], keep_field=True) node13.remove(node13.params[1], keep_field=False) node14.remove(node14.params[1], keep_field=True) node15.remove(node15.params[2], keep_field=False) node16.remove(node16.params[2], keep_field=True) node17.remove(node17.params[0], keep_field=False) node18.remove(node18.params[0], keep_field=True) node19.remove(node19.params[1], keep_field=False) node20.remove(node20.params[1], keep_field=True) node21.remove("a", keep_field=False) node22.remove("a", keep_field=True) node23.remove(node23.params[0], keep_field=False) node24.remove(node24.params[0], keep_field=True) node25.remove(node25.params[3], keep_field=False) node26.remove(node26.params[3], keep_field=True) with pytest.raises(ValueError): node1.remove(1) with pytest.raises(ValueError): node1.remove("a") with pytest.raises(ValueError): node2.remove("1") assert "{{foo}}" == node2 assert "{{foo||abc=}}" == node3 assert "{{foo|2=baz}}" == node4 assert "{{foo|b=c}}" == node5 assert "{{foo| a=|b=c}}" == node6 assert "{{foo|1 =|2=c}}" == node7 assert "{{foo|2=c}}" == node8 assert "{{foo||c}}" == node9 assert "{{foo|2=c}}" == node10 assert "{{foo|b=c|a =d}}" == node11 assert "{{foo| a=|b=c|a =d}}" == node12 assert "{{foo| a=b|a =d}}" == node13 assert "{{foo| a=b|b=|a =d}}" == node14 assert "{{foo| a=b|b=c}}" == node15 assert "{{foo| a=b|b=c|a =}}" == node16 assert "{{foo|b|c}}" == node17 assert "{{foo|1 =|b|c}}" == node18 assert "{{foo|1 =a|2=c}}" == node19 assert "{{foo|1 =a||c}}" == node20 assert "{{foo|c=d|e=f}}" == node21 assert "{{foo|a=|c=d|e=f}}" == node22 assert "{{foo|c=d|e=f|a=b|a=b}}" == node23 assert "{{foo|a=|c=d|e=f|a=b|a=b}}" == node24 assert "{{foo|a=b|c=d|e=f|a=b}}" == node25 assert "{{foo|a=b|c=d|e=f|a=|a=b}}" == node26 with pytest.raises(ValueError): node27.remove(node28.get(1)) def test_formatting(): """test realistic param manipulation with complex whitespace formatting (assumes that parsing works correctly)""" tests = [ # https://en.wikipedia.org/w/index.php?title=Lamar_County,_Georgia&oldid=792356004 ( """{{Infobox U.S. county | county = Lamar County | state = Georgia | seal = | founded = 1920 | seat wl = Barnesville | largest city wl = Barnesville | area_total_sq_mi = 186 | area_land_sq_mi = 184 | area_water_sq_mi = 2.3 | area percentage = 1.3% | census yr = 2010 | pop = 18317 | density_sq_mi = 100 | time zone = Eastern | footnotes = | web = www.lamarcountyga.com | ex image = Lamar County Georgia Courthouse.jpg | ex image cap = Lamar County courthouse in Barnesville | district = 3rd | named for = [[Lucius Quintus Cincinnatus Lamar II]] }}""", """@@ -11,4 +11,4 @@ | area percentage = 1.3% -| census yr = 2010 -| pop = 18317 +| census estimate yr = 2016 +| pop = 12345example ref | density_sq_mi = 100""", ), # https://en.wikipedia.org/w/index.php?title=Rockdale_County,_Georgia&oldid=792359760 ( """{{Infobox U.S. County| county = Rockdale County | state = Georgia | seal = | founded = October 18, 1870 | seat wl = Conyers | largest city wl = Conyers | area_total_sq_mi = 132 | area_land_sq_mi = 130 | area_water_sq_mi = 2.3 | area percentage = 1.7% | census yr = 2010| pop = 85215 | density_sq_mi = 657 | web = www.rockdalecounty.org | ex image = Rockdale-county-courthouse.jpg | ex image cap = Rockdale County Courthouse in Conyers | district = 4th | time zone= Eastern }}""", """@@ -11,4 +11,4 @@ area percentage = 1.7% | - census yr = 2010| - pop = 85215 | + census estimate yr = 2016 | + pop = 12345example ref | density_sq_mi = 657 |""", ), # https://en.wikipedia.org/w/index.php?title=Spalding_County,_Georgia&oldid=792360413 ( """{{Infobox U.S. County| | county = Spalding County | | state = Georgia | | seal = | | founded = 1851 | | seat wl = Griffin | | largest city wl = Griffin | | area_total_sq_mi = 200 | | area_land_sq_mi = 196 | | area_water_sq_mi = 3.1 | | area percentage = 1.6% | | census yr = 2010| | pop = 64073 | | density_sq_mi = 326 | | web = www.spaldingcounty.com | | named for = [[Thomas Spalding]] | ex image = Spalding County Courthouse (NE corner).JPG | ex image cap = Spalding County Courthouse in Griffin | district = 3rd | time zone = Eastern }}""", """@@ -11,4 +11,4 @@ | area percentage = 1.6% | -| census yr = 2010| -| pop = 64073 | +| +| census estimate yr = 2016 | pop = 12345example ref | | density_sq_mi = 326 |""", ), # https://en.wikipedia.org/w/index.php?title=Clinton_County,_Illinois&oldid=794694648 ( """{{Infobox U.S. county |county = Clinton County |state = Illinois | ex image = File:Clinton County Courthouse, Carlyle.jpg | ex image cap = [[Clinton County Courthouse (Illinois)|Clinton County Courthouse]] |seal = |founded = 1824 |named for = [[DeWitt Clinton]] |seat wl= Carlyle | largest city wl = Breese |time zone=Central |area_total_sq_mi = 503 |area_land_sq_mi = 474 |area_water_sq_mi = 29 |area percentage = 5.8% |census yr = 2010 |pop = 37762 |density_sq_mi = 80 |web = www.clintonco.illinois.gov | district = 15th }}""", """@@ -15,4 +15,4 @@ |area percentage = 5.8% - |census yr = 2010 - |pop = 37762 + |census estimate yr = 2016 + |pop = 12345example ref |density_sq_mi = 80""", ), # https://en.wikipedia.org/w/index.php?title=Winnebago_County,_Illinois&oldid=789193800 ( """{{Infobox U.S. county | county = Winnebago County | state = Illinois | seal = Winnebago County il seal.png | named for = [[Winnebago (tribe)|Winnebago Tribe]] | seat wl= Rockford | largest city wl = Rockford| area_total_sq_mi = 519 | area_land_sq_mi = 513| area_water_sq_mi = 5.9 | area percentage = 1.1% | census yr = 2010| pop = 295266 | density_sq_mi = 575 | web = www.wincoil.us | founded year = 1836 | founded date = January 16 | time zone = Central | district = 16th | district2 = 17th }}""", """@@ -11,4 +11,4 @@ area percentage = 1.1% | - census yr = 2010| - pop = 295266 | + census estimate yr = 2016| + pop = 12345example ref | density_sq_mi = 575""", ), ] for (original, expected) in tests: code = parse(original) template = code.filter_templates()[0] template.add("pop", "12345example ref") template.add("census estimate yr", "2016", before="pop") template.remove("census yr") oldlines = original.splitlines(True) newlines = str(code).splitlines(True) difflines = unified_diff(oldlines, newlines, n=1) diff = "".join(list(difflines)[2:]).strip() assert expected == diff mwparserfromhell-0.6.3/tests/test_text.py000066400000000000000000000044731411406531600206610ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the Text node. """ import pytest from mwparserfromhell.nodes import Text def test_str(): """test Text.__str__()""" node = Text("foobar") assert "foobar" == str(node) node2 = Text("fóóbar") assert "fóóbar" == str(node2) def test_children(): """test Text.__children__()""" node = Text("foobar") gen = node.__children__() with pytest.raises(StopIteration): next(gen) def test_strip(): """test Text.__strip__()""" node = Text("foobar") assert node is node.__strip__() def test_showtree(): """test Text.__showtree__()""" output = [] node1 = Text("foobar") node2 = Text("fóóbar") node3 = Text("𐌲𐌿𐍄") node1.__showtree__(output.append, None, None) node2.__showtree__(output.append, None, None) node3.__showtree__(output.append, None, None) res = ["foobar", r"f\xf3\xf3bar", "\\U00010332\\U0001033f\\U00010344"] assert res == output def test_value(): """test getter/setter for the value attribute""" node = Text("foobar") assert "foobar" == node.value assert isinstance(node.value, str) node.value = "héhéhé" assert "héhéhé" == node.value assert isinstance(node.value, str) mwparserfromhell-0.6.3/tests/test_tokenizer.py000066400000000000000000000121701411406531600217000ustar00rootroot00000000000000# Copyright (C) 2012-2016 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import codecs from os import listdir, path import warnings import pytest from mwparserfromhell.parser import contexts, tokens from mwparserfromhell.parser.builder import Builder from mwparserfromhell.parser.tokenizer import Tokenizer as PyTokenizer try: from mwparserfromhell.parser._tokenizer import CTokenizer except ImportError: CTokenizer = None class _TestParseError(Exception): """Raised internally when a test could not be parsed.""" def _parse_test(test, data): """Parse an individual *test*, storing its info in *data*.""" for line in test.strip().splitlines(): if line.startswith("name:"): data["name"] = line[len("name:") :].strip() elif line.startswith("label:"): data["label"] = line[len("label:") :].strip() elif line.startswith("input:"): raw = line[len("input:") :].strip() if raw[0] == '"' and raw[-1] == '"': raw = raw[1:-1] raw = raw.encode("raw_unicode_escape") data["input"] = raw.decode("unicode_escape") elif line.startswith("output:"): raw = line[len("output:") :].strip() try: data["output"] = eval(raw, vars(tokens)) except Exception as err: raise _TestParseError(err) from err def _load_tests(filename, name, text): """Load all tests in *text* from the file *filename*.""" tests = text.split("\n---\n") for test in tests: data = {"name": None, "label": None, "input": None, "output": None} try: _parse_test(test, data) except _TestParseError as err: if data["name"]: error = "Could not parse test '{0}' in '{1}':\n\t{2}" warnings.warn(error.format(data["name"], filename, err)) else: error = "Could not parse a test in '{0}':\n\t{1}" warnings.warn(error.format(filename, err)) continue if not data["name"]: error = "A test in '{0}' was ignored because it lacked a name" warnings.warn(error.format(filename)) continue if data["input"] is None or data["output"] is None: error = ( "Test '{}' in '{}' was ignored because it lacked an input or an output" ) warnings.warn(error.format(data["name"], filename)) continue # Include test filename in name data["name"] = "{}:{}".format(name, data["name"]) yield data def build(): """Load and install all tests from the 'tokenizer' directory.""" directory = path.join(path.dirname(__file__), "tokenizer") extension = ".mwtest" for filename in listdir(directory): if not filename.endswith(extension): continue fullname = path.join(directory, filename) with codecs.open(fullname, "r", encoding="utf8") as fp: text = fp.read() name = path.split(fullname)[1][: -len(extension)] yield from _load_tests(fullname, name, text) @pytest.mark.parametrize( "tokenizer", filter(None, (CTokenizer, PyTokenizer)), ids=lambda t: "CTokenizer" if t.USES_C else "PyTokenizer", ) @pytest.mark.parametrize("data", build(), ids=lambda data: data["name"]) def test_tokenizer(tokenizer, data): expected = data["output"] actual = tokenizer().tokenize(data["input"]) assert expected == actual @pytest.mark.parametrize("data", build(), ids=lambda data: data["name"]) def test_roundtrip(data): expected = data["input"] actual = str(Builder().build(data["output"][:])) assert expected == actual @pytest.mark.skipif(CTokenizer is None, reason="CTokenizer not available") def test_c_tokenizer_uses_c(): """make sure the C tokenizer identifies as using a C extension""" assert CTokenizer.USES_C is True assert CTokenizer().USES_C is True def test_describe_context(): assert "" == contexts.describe(0) ctx = contexts.describe(contexts.TEMPLATE_PARAM_KEY | contexts.HAS_TEXT) assert "TEMPLATE_PARAM_KEY|HAS_TEXT" == ctx mwparserfromhell-0.6.3/tests/test_tokens.py000066400000000000000000000063411411406531600211740ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the Token class and its subclasses. """ import pytest from mwparserfromhell.parser import tokens @pytest.mark.parametrize("name", tokens.__all__) def test_issubclass(name): """check that all classes within the tokens module are really Tokens""" klass = getattr(tokens, name) assert issubclass(klass, tokens.Token) is True assert isinstance(klass(), klass) assert isinstance(klass(), tokens.Token) def test_attributes(): """check that Token attributes can be managed properly""" token1 = tokens.Token() token2 = tokens.Token(foo="bar", baz=123) assert "bar" == token2.foo assert 123 == token2.baz assert token1.foo is None assert token2.bar is None token1.spam = "eggs" token2.foo = "ham" del token2.baz assert "eggs" == token1.spam assert "ham" == token2.foo assert token2.baz is None with pytest.raises(KeyError): token2.__delattr__("baz") def test_repr(): """check that repr() on a Token works as expected""" token1 = tokens.Token() token2 = tokens.Token(foo="bar", baz=123) token3 = tokens.Text(text="earwig" * 100) hundredchars = ("earwig" * 100)[:97] + "..." assert "Token()" == repr(token1) assert repr(token2) in ("Token(foo='bar', baz=123)", "Token(baz=123, foo='bar')") assert "Text(text='" + hundredchars + "')" == repr(token3) def test_equality(): """check that equivalent tokens are considered equal""" token1 = tokens.Token() token2 = tokens.Token() token3 = tokens.Token(foo="bar", baz=123) token4 = tokens.Text(text="asdf") token5 = tokens.Text(text="asdf") token6 = tokens.TemplateOpen(text="asdf") assert token1 == token2 assert token2 == token1 assert token4 == token5 assert token5 == token4 assert token1 != token3 assert token2 != token3 assert token4 != token6 assert token5 != token6 @pytest.mark.parametrize( "token", [tokens.Token(), tokens.Token(foo="bar", baz=123), tokens.Text(text="earwig")], ) def test_repr_equality(token): """check that eval(repr(token)) == token""" assert token == eval(repr(token), vars(tokens)) mwparserfromhell-0.6.3/tests/test_utils.py000066400000000000000000000046261411406531600210350ustar00rootroot00000000000000# Copyright (C) 2012-2016 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Tests for the utils module, which provides parse_anything(). """ import pytest from mwparserfromhell.nodes import Template, Text from mwparserfromhell.utils import parse_anything from .conftest import assert_wikicode_equal, wrap, wraptext @pytest.mark.parametrize( "test,valid", [ (wraptext("foobar"), wraptext("foobar")), (Template(wraptext("spam")), wrap([Template(wraptext("spam"))])), ("fóóbar", wraptext("fóóbar")), (b"foob\xc3\xa1r", wraptext("foobár")), (123, wraptext("123")), (True, wraptext("True")), (None, wrap([])), ([Text("foo"), Text("bar"), Text("baz")], wraptext("foo", "bar", "baz")), ( [wraptext("foo"), Text("bar"), "baz", 123, 456], wraptext("foo", "bar", "baz", "123", "456"), ), ([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar")), ], ) def test_parse_anything_valid(test, valid): """tests for valid input to utils.parse_anything()""" assert_wikicode_equal(valid, parse_anything(test)) @pytest.mark.parametrize( "invalid", [Ellipsis, object, object(), type, ["foo", [object]]] ) def test_parse_anything_invalid(invalid): """tests for invalid input to utils.parse_anything()""" with pytest.raises(ValueError): parse_anything(invalid) mwparserfromhell-0.6.3/tests/test_wikicode.py000066400000000000000000000540021411406531600214640ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Tests for the Wikicode class, which manages a list of nodes. """ from functools import partial import re from types import GeneratorType import pytest from mwparserfromhell.nodes import Argument, Heading, Template, Text from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode from mwparserfromhell import parse from .conftest import wrap, wraptext def test_str(): """test Wikicode.__str__()""" code1 = parse("foobar") code2 = parse("Have a {{template}} and a [[page|link]]") assert "foobar" == str(code1) assert "Have a {{template}} and a [[page|link]]" == str(code2) def test_nodes(): """test getter/setter for the nodes attribute""" code = parse("Have a {{template}}") assert ["Have a ", "{{template}}"] == code.nodes L1 = SmartList([Text("foobar"), Template(wraptext("abc"))]) L2 = [Text("barfoo"), Template(wraptext("cba"))] L3 = "abc{{def}}" code.nodes = L1 assert L1 is code.nodes code.nodes = L2 assert L2 is code.nodes code.nodes = L3 assert ["abc", "{{def}}"] == code.nodes with pytest.raises(ValueError): code.__setattr__("nodes", object) def test_get(): """test Wikicode.get()""" code = parse("Have a {{template}} and a [[page|link]]") assert code.nodes[0] is code.get(0) assert code.nodes[2] is code.get(2) with pytest.raises(IndexError): code.get(4) def test_set(): """test Wikicode.set()""" code = parse("Have a {{template}} and a [[page|link]]") code.set(1, "{{{argument}}}") assert "Have a {{{argument}}} and a [[page|link]]" == code assert isinstance(code.get(1), Argument) code.set(2, None) assert "Have a {{{argument}}}[[page|link]]" == code code.set(-3, "This is an ") assert "This is an {{{argument}}}[[page|link]]" == code with pytest.raises(ValueError): code.set(1, "foo {{bar}}") with pytest.raises(IndexError): code.set(3, "{{baz}}") with pytest.raises(IndexError): code.set(-4, "{{baz}}") def test_contains(): """test Wikicode.contains()""" code = parse("Here is {{aaa|{{bbb|xyz{{ccc}}}}}} and a [[page|link]]") tmpl1, tmpl2, tmpl3 = code.filter_templates() tmpl4 = parse("{{ccc}}").filter_templates()[0] assert code.contains(tmpl1) is True assert code.contains(tmpl3) is True assert code.contains(tmpl4) is False assert code.contains(str(tmpl4)) is True assert code.contains(tmpl2.params[0].value) is True def test_index(): """test Wikicode.index()""" code = parse("Have a {{template}} and a [[page|link]]") assert 0 == code.index("Have a ") assert 3 == code.index("[[page|link]]") assert 1 == code.index(code.get(1)) with pytest.raises(ValueError): code.index("foo") code = parse("{{foo}}{{bar|{{baz}}}}") assert 1 == code.index("{{bar|{{baz}}}}") assert 1 == code.index("{{baz}}", recursive=True) assert 1 == code.index(code.get(1).get(1).value, recursive=True) with pytest.raises(ValueError): code.index("{{baz}}", recursive=False) with pytest.raises(ValueError): code.index(code.get(1).get(1).value, recursive=False) def test_get_ancestors_parent(): """test Wikicode.get_ancestors() and Wikicode.get_parent()""" code = parse("{{a|{{b|{{d|{{e}}{{f}}}}{{g}}}}}}{{c}}") tmpl = code.filter_templates(matches=lambda n: n.name == "f")[0] parent1 = code.filter_templates(matches=lambda n: n.name == "d")[0] parent2 = code.filter_templates(matches=lambda n: n.name == "b")[0] parent3 = code.filter_templates(matches=lambda n: n.name == "a")[0] fake = parse("{{f}}").get(0) assert [parent3, parent2, parent1] == code.get_ancestors(tmpl) assert parent1 is code.get_parent(tmpl) assert [] == code.get_ancestors(parent3) assert None is code.get_parent(parent3) with pytest.raises(ValueError): code.get_ancestors(fake) with pytest.raises(ValueError): code.get_parent(fake) def test_insert(): """test Wikicode.insert()""" code = parse("Have a {{template}} and a [[page|link]]") code.insert(1, "{{{argument}}}") assert "Have a {{{argument}}}{{template}} and a [[page|link]]" == code assert isinstance(code.get(1), Argument) code.insert(2, None) assert "Have a {{{argument}}}{{template}} and a [[page|link]]" == code code.insert(-3, Text("foo")) assert "Have a {{{argument}}}foo{{template}} and a [[page|link]]" == code code2 = parse("{{foo}}{{bar}}{{baz}}") code2.insert(1, "abc{{def}}ghi[[jk]]") assert "{{foo}}abc{{def}}ghi[[jk]]{{bar}}{{baz}}" == code2 assert [ "{{foo}}", "abc", "{{def}}", "ghi", "[[jk]]", "{{bar}}", "{{baz}}", ] == code2.nodes code3 = parse("{{foo}}bar") code3.insert(1000, "[[baz]]") code3.insert(-1000, "derp") assert "derp{{foo}}bar[[baz]]" == code3 def _test_search(meth, expected): """Base test for insert_before(), insert_after(), and replace().""" code = parse("{{a}}{{b}}{{c}}{{d}}{{e}}") func = partial(meth, code) func("{{b}}", "x", recursive=True) func("{{d}}", "[[y]]", recursive=False) func(code.get(2), "z") assert expected[0] == code with pytest.raises(ValueError): func("{{r}}", "n", recursive=True) with pytest.raises(ValueError): func("{{r}}", "n", recursive=False) fake = parse("{{a}}").get(0) with pytest.raises(ValueError): func(fake, "n", recursive=True) with pytest.raises(ValueError): func(fake, "n", recursive=False) code2 = parse("{{a}}{{a}}{{a}}{{b}}{{b}}{{b}}") func = partial(meth, code2) func(code2.get(1), "c", recursive=False) func("{{a}}", "d", recursive=False) func(code2.get(-1), "e", recursive=True) func("{{b}}", "f", recursive=True) assert expected[1] == code2 code3 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") func = partial(meth, code3) obj = code3.get(0).params[0].value.get(0) with pytest.raises(ValueError): func(obj, "x", recursive=False) func(obj, "x", recursive=True) with pytest.raises(ValueError): func("{{f}}", "y", recursive=False) func("{{f}}", "y", recursive=True) assert expected[2] == code3 code4 = parse("{{a}}{{b}}{{c}}{{d}}{{e}}{{f}}{{g}}{{h}}{{i}}{{j}}") func = partial(meth, code4) fake = parse("{{b}}{{c}}") with pytest.raises(ValueError): func(fake, "q", recursive=False) with pytest.raises(ValueError): func(fake, "q", recursive=True) func("{{b}}{{c}}", "w", recursive=False) func("{{d}}{{e}}", "x", recursive=True) func(Wikicode(code4.nodes[-2:]), "y", recursive=False) func(Wikicode(code4.nodes[-2:]), "z", recursive=True) assert expected[3] == code4 with pytest.raises(ValueError): func("{{c}}{{d}}", "q", recursive=False) with pytest.raises(ValueError): func("{{c}}{{d}}", "q", recursive=True) code5 = parse("{{a|{{b}}{{c}}|{{f|{{g}}={{h}}{{i}}}}}}") func = partial(meth, code5) with pytest.raises(ValueError): func("{{b}}{{c}}", "x", recursive=False) func("{{b}}{{c}}", "x", recursive=True) obj = code5.get(0).params[1].value.get(0).params[0].value with pytest.raises(ValueError): func(obj, "y", recursive=False) func(obj, "y", recursive=True) assert expected[4] == code5 code6 = parse("here is {{some text and a {{template}}}}") func = partial(meth, code6) with pytest.raises(ValueError): func("text and", "ab", recursive=False) func("text and", "ab", recursive=True) with pytest.raises(ValueError): func("is {{some", "cd", recursive=False) func("is {{some", "cd", recursive=True) assert expected[5] == code6 code7 = parse("{{foo}}{{bar}}{{baz}}{{foo}}{{baz}}") func = partial(meth, code7) obj = wrap([code7.get(0), code7.get(2)]) with pytest.raises(ValueError): func(obj, "{{lol}}") func("{{foo}}{{baz}}", "{{lol}}") assert expected[6] == code7 code8 = parse("== header ==") func = partial(meth, code8) sec1, sec2 = code8.get_sections(include_headings=False) func(sec1, "lead\n") func(sec2, "\nbody") assert expected[7] == code8 code9 = parse("{{foo}}") meth(code9.get_sections()[0], code9.get_sections()[0], "{{bar}}") meth(code9.get_sections()[0], code9, "{{baz}}") meth(code9, code9, "{{qux}}") meth(code9, code9.get_sections()[0], "{{quz}}") assert expected[8] == code9 def test_insert_before(): """test Wikicode.insert_before()""" meth = lambda code, *args, **kw: code.insert_before(*args, **kw) expected = [ "{{a}}xz{{b}}{{c}}[[y]]{{d}}{{e}}", "d{{a}}cd{{a}}d{{a}}f{{b}}f{{b}}ef{{b}}", "{{a|x{{b}}|{{c|d=y{{f}}}}}}", "{{a}}w{{b}}{{c}}x{{d}}{{e}}{{f}}{{g}}{{h}}yz{{i}}{{j}}", "{{a|x{{b}}{{c}}|{{f|{{g}}=y{{h}}{{i}}}}}}", "here cdis {{some abtext and a {{template}}}}", "{{foo}}{{bar}}{{baz}}{{lol}}{{foo}}{{baz}}", "lead\n== header ==\nbody", "{{quz}}{{qux}}{{baz}}{{bar}}{{foo}}", ] _test_search(meth, expected) def test_insert_after(): """test Wikicode.insert_after()""" meth = lambda code, *args, **kw: code.insert_after(*args, **kw) expected = [ "{{a}}{{b}}xz{{c}}{{d}}[[y]]{{e}}", "{{a}}d{{a}}dc{{a}}d{{b}}f{{b}}f{{b}}fe", "{{a|{{b}}x|{{c|d={{f}}y}}}}", "{{a}}{{b}}{{c}}w{{d}}{{e}}x{{f}}{{g}}{{h}}{{i}}{{j}}yz", "{{a|{{b}}{{c}}x|{{f|{{g}}={{h}}{{i}}y}}}}", "here is {{somecd text andab a {{template}}}}", "{{foo}}{{bar}}{{baz}}{{foo}}{{baz}}{{lol}}", "lead\n== header ==\nbody", "{{foo}}{{bar}}{{baz}}{{qux}}{{quz}}", ] _test_search(meth, expected) def test_replace(): """test Wikicode.replace()""" meth = lambda code, *args, **kw: code.replace(*args, **kw) expected = [ "{{a}}xz[[y]]{{e}}", "dcdffe", "{{a|x|{{c|d=y}}}}", "{{a}}wx{{f}}{{g}}z", "{{a|x|{{f|{{g}}=y}}}}", "here cd ab a {{template}}}}", "{{foo}}{{bar}}{{baz}}{{lol}}", "lead\n== header ==\nbody", "{{quz}}", ] _test_search(meth, expected) def test_append(): """test Wikicode.append()""" code = parse("Have a {{template}}") code.append("{{{argument}}}") assert "Have a {{template}}{{{argument}}}" == code assert isinstance(code.get(2), Argument) code.append(None) assert "Have a {{template}}{{{argument}}}" == code code.append(Text(" foo")) assert "Have a {{template}}{{{argument}}} foo" == code with pytest.raises(ValueError): code.append(slice(0, 1)) def test_remove(): """test Wikicode.remove()""" meth = lambda code, obj, value, **kw: code.remove(obj, **kw) expected = [ "{{a}}{{c}}", "", "{{a||{{c|d=}}}}", "{{a}}{{f}}", "{{a||{{f|{{g}}=}}}}", "here a {{template}}}}", "{{foo}}{{bar}}{{baz}}", "== header ==", "", ] _test_search(meth, expected) def test_matches(): """test Wikicode.matches()""" code1 = parse("Cleanup") code2 = parse("\nstub") code3 = parse("Hello world!") code4 = parse("World,_hello?") code5 = parse("") assert code1.matches("Cleanup") is True assert code1.matches("cleanup") is True assert code1.matches(" cleanup\n") is True assert code1.matches("CLEANup") is False assert code1.matches("Blah") is False assert code2.matches("stub") is True assert code2.matches("Stub") is True assert code2.matches("StuB") is False assert code1.matches(("cleanup", "stub")) is True assert code2.matches(("cleanup", "stub")) is True assert code2.matches(("StuB", "sTUb", "foobar")) is False assert code2.matches(["StuB", "sTUb", "foobar"]) is False assert code2.matches(("StuB", "sTUb", "foo", "bar", "Stub")) is True assert code2.matches(["StuB", "sTUb", "foo", "bar", "Stub"]) is True assert code3.matches("hello world!") is True assert code3.matches("hello_world!") is True assert code3.matches("hello__world!") is False assert code4.matches("World,_hello?") is True assert code4.matches("World, hello?") is True assert code4.matches("World, hello?") is False assert code5.matches("") is True assert code5.matches("") is True assert code5.matches(("a", "b", "")) is True def test_filter_family(): """test the Wikicode.i?filter() family of functions""" def genlist(gen): assert isinstance(gen, GeneratorType) return list(gen) ifilter = lambda code: (lambda *a, **k: genlist(code.ifilter(*a, **k))) code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") for func in (code.filter, ifilter(code)): assert [ "a", "{{b}}", "b", "c", "[[d]]", "d", "{{{e}}}", "e", "{{f}}", "f", "[[g]]", "g", ] == func() assert ["{{{e}}}"] == func(forcetype=Argument) assert code.get(4) is func(forcetype=Argument)[0] assert list("abcdefg") == func(forcetype=Text) assert [] == func(forcetype=Heading) with pytest.raises(TypeError): func(forcetype=True) funcs = [ lambda name, **kw: getattr(code, "filter_" + name)(**kw), lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw)), ] for get_filter in funcs: assert ["{{{e}}}"] == get_filter("arguments") assert code.get(4) is get_filter("arguments")[0] assert [] == get_filter("comments") assert [] == get_filter("external_links") assert [] == get_filter("headings") assert [] == get_filter("html_entities") assert [] == get_filter("tags") assert ["{{b}}", "{{f}}"] == get_filter("templates") assert list("abcdefg") == get_filter("text") assert ["[[d]]", "[[g]]"] == get_filter("wikilinks") code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") for func in (code2.filter, ifilter(code2)): assert ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] == func( recursive=False, forcetype=Template ) assert [ "{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}", ] == func(recursive=True, forcetype=Template) code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}{{barfoo}}") for func in (code3.filter, ifilter(code3)): assert ["{{foobar}}", "{{barfoo}}"] == func( False, matches=lambda node: "foo" in node ) assert ["{{foobar}}", "{{FOO}}", "{{barfoo}}"] == func(False, matches=r"foo") assert ["{{foobar}}", "{{FOO}}"] == func(matches=r"^{{foo.*?}}") assert ["{{foobar}}"] == func(matches=r"^{{foo.*?}}", flags=re.UNICODE) assert ["{{baz}}", "{{bz}}"] == func(matches=r"^{{b.*?z") assert ["{{baz}}"] == func(matches=r"^{{b.+?z}}") exp_rec = [ "{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}", ] exp_unrec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] assert exp_rec == code2.filter_templates() assert exp_unrec == code2.filter_templates(recursive=False) assert exp_rec == code2.filter_templates(recursive=True) assert exp_rec == code2.filter_templates(True) assert exp_unrec == code2.filter_templates(False) assert ["{{foobar}}"] == code3.filter_templates( matches=lambda node: node.name.matches("Foobar") ) assert ["{{baz}}", "{{bz}}"] == code3.filter_templates(matches=r"^{{b.*?z") assert [] == code3.filter_tags(matches=r"^{{b.*?z") assert [] == code3.filter_tags(matches=r"^{{b.*?z", flags=0) with pytest.raises(TypeError): code.filter_templates(a=42) with pytest.raises(TypeError): code.filter_templates(forcetype=Template) with pytest.raises(TypeError): code.filter_templates(1, 0, 0, Template) code4 = parse("{{foo}}{{foo|{{bar}}}}") actual1 = code4.filter_templates(recursive=code4.RECURSE_OTHERS) actual2 = code4.filter_templates(code4.RECURSE_OTHERS) assert ["{{foo}}", "{{foo|{{bar}}}}"] == actual1 assert ["{{foo}}", "{{foo|{{bar}}}}"] == actual2 def test_get_sections(): """test Wikicode.get_sections()""" page1 = parse("") page2 = parse("==Heading==") page3 = parse("===Heading===\nFoo bar baz\n====Gnidaeh====\n") p4_lead = "This is a lead.\n" p4_IA = "=== Section I.A ===\nSection I.A [[body]].\n" p4_IB1 = "==== Section I.B.1 ====\nSection I.B.1 body.\n\n•Some content.\n\n" p4_IB = "=== Section I.B ===\n" + p4_IB1 p4_I = "== Section I ==\nSection I body. {{and a|template}}\n" + p4_IA + p4_IB p4_II = "== Section II ==\nSection II body.\n\n" p4_IIIA1a = "===== Section III.A.1.a =====\nMore text.\n" p4_IIIA2ai1 = "======= Section III.A.2.a.i.1 =======\nAn invalid section!" p4_IIIA2 = "==== Section III.A.2 ====\nEven more text.\n" + p4_IIIA2ai1 p4_IIIA = "=== Section III.A ===\nText.\n" + p4_IIIA1a + p4_IIIA2 p4_III = "== Section III ==\n" + p4_IIIA page4 = parse(p4_lead + p4_I + p4_II + p4_III) assert [""] == page1.get_sections() assert ["", "==Heading=="] == page2.get_sections() assert [ "", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n", ] == page3.get_sections() assert [ p4_lead, p4_I, p4_IA, p4_IB, p4_IB1, p4_II, p4_III, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1, ] == page4.get_sections() assert ["====Gnidaeh====\n"] == page3.get_sections(levels=[4]) assert ["===Heading===\nFoo bar baz\n====Gnidaeh====\n"] == page3.get_sections( levels=(2, 3) ) assert ["===Heading===\nFoo bar baz\n"] == page3.get_sections( levels=(2, 3), flat=True ) assert [] == page3.get_sections(levels=[0]) assert ["", "====Gnidaeh====\n"] == page3.get_sections( levels=[4], include_lead=True ) assert [ "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n", ] == page3.get_sections(include_lead=False) assert ["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"] == page3.get_sections( flat=True, include_lead=False ) assert [p4_IB1, p4_IIIA2] == page4.get_sections(levels=[4]) assert [p4_IA, p4_IB, p4_IIIA] == page4.get_sections(levels=[3]) assert [ p4_IA, "=== Section I.B ===\n", "=== Section III.A ===\nText.\n", ] == page4.get_sections(levels=[3], flat=True) assert ["", ""] == page2.get_sections(include_headings=False) assert [ "\nSection I.B.1 body.\n\n•Some content.\n\n", "\nEven more text.\n" + p4_IIIA2ai1, ] == page4.get_sections(levels=[4], include_headings=False) assert [] == page4.get_sections(matches=r"body") assert [p4_I, p4_IA, p4_IB, p4_IB1] == page4.get_sections( matches=r"Section\sI[.\s].*?" ) assert [p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1] == page4.get_sections( matches=r".*?a.*?" ) assert [p4_IIIA1a, p4_IIIA2ai1] == page4.get_sections( matches=r".*?a.*?", flags=re.U ) assert ["\nMore text.\n", "\nAn invalid section!"] == page4.get_sections( matches=r".*?a.*?", flags=re.U, include_headings=False ) sections = page2.get_sections(include_headings=False) sections[0].append("Lead!\n") sections[1].append("\nFirst section!") assert "Lead!\n==Heading==\nFirst section!" == page2 page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz") section = page5.get_sections(matches="Foo")[0] section.replace("\nBar\n", "\nBarf ") section.append("{{Haha}}\n") assert "== Foo ==\nBarf {{Haha}}\n" == section assert "X\n== Foo ==\nBarf {{Haha}}\n== Baz ==\nBuzz" == page5 def test_strip_code(): """test Wikicode.strip_code()""" # Since individual nodes have test cases for their __strip__ methods, # we're only going to do an integration test: code = parse("Foo [[bar]]\n\n{{baz|hello}}\n\n[[a|b]] Σ") assert "Foo bar\n\nb Σ" == code.strip_code(normalize=True, collapse=True) assert "Foo bar\n\n\n\nb Σ" == code.strip_code(normalize=True, collapse=False) assert "Foo bar\n\nb Σ" == code.strip_code(normalize=False, collapse=True) assert "Foo bar\n\n\n\nb Σ" == code.strip_code( normalize=False, collapse=False ) assert "Foo bar\n\nhello\n\nb Σ" == code.strip_code( normalize=True, collapse=True, keep_template_params=True ) def test_get_tree(): """test Wikicode.get_tree()""" # Since individual nodes have test cases for their __showtree___ # methods, and the docstring covers all possibilities for the output of # __showtree__, we'll test it only: code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}") expected = ( "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + "{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" ) assert expected.expandtabs(4) == code.get_tree() mwparserfromhell-0.6.3/tests/test_wikilink.py000066400000000000000000000070711411406531600215130ustar00rootroot00000000000000# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """ Test cases for the Wikilink node. """ import pytest from mwparserfromhell.nodes import Text, Wikilink from .conftest import assert_wikicode_equal, wrap, wraptext def test_str(): """test Wikilink.__str__()""" node = Wikilink(wraptext("foobar")) assert "[[foobar]]" == str(node) node2 = Wikilink(wraptext("foo"), wraptext("bar")) assert "[[foo|bar]]" == str(node2) def test_children(): """test Wikilink.__children__()""" node1 = Wikilink(wraptext("foobar")) node2 = Wikilink(wraptext("foo"), wrap([Text("bar"), Text("baz")])) gen1 = node1.__children__() gen2 = node2.__children__() assert node1.title == next(gen1) assert node2.title == next(gen2) assert node2.text == next(gen2) with pytest.raises(StopIteration): next(gen1) with pytest.raises(StopIteration): next(gen2) def test_strip(): """test Wikilink.__strip__()""" node = Wikilink(wraptext("foobar")) node2 = Wikilink(wraptext("foo"), wraptext("bar")) assert "foobar" == node.__strip__() assert "bar" == node2.__strip__() def test_showtree(): """test Wikilink.__showtree__()""" output = [] getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) node1 = Wikilink(wraptext("foobar")) node2 = Wikilink(wraptext("foo"), wraptext("bar")) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) valid = [ "[[", (getter, node1.title), "]]", "[[", (getter, node2.title), " | ", marker, (getter, node2.text), "]]", ] assert valid == output def test_title(): """test getter/setter for the title attribute""" title = wraptext("foobar") node1 = Wikilink(title) node2 = Wikilink(title, wraptext("baz")) assert title is node1.title assert title is node2.title node1.title = "héhehé" node2.title = "héhehé" assert_wikicode_equal(wraptext("héhehé"), node1.title) assert_wikicode_equal(wraptext("héhehé"), node2.title) def test_text(): """test getter/setter for the text attribute""" text = wraptext("baz") node1 = Wikilink(wraptext("foobar")) node2 = Wikilink(wraptext("foobar"), text) assert None is node1.text assert text is node2.text node1.text = "buzz" node2.text = None assert_wikicode_equal(wraptext("buzz"), node1.text) assert None is node2.text mwparserfromhell-0.6.3/tests/tokenizer/000077500000000000000000000000001411406531600202665ustar00rootroot00000000000000mwparserfromhell-0.6.3/tests/tokenizer/arguments.mwtest000066400000000000000000000061351411406531600235450ustar00rootroot00000000000000name: blank label: argument with no content input: "{{{}}}" output: [ArgumentOpen(), ArgumentClose()] --- name: blank_with_default label: argument with no content but a pipe input: "{{{|}}}" output: [ArgumentOpen(), ArgumentSeparator(), ArgumentClose()] --- name: basic label: simplest type of argument input: "{{{argument}}}" output: [ArgumentOpen(), Text(text="argument"), ArgumentClose()] --- name: default label: argument with a default value input: "{{{foo|bar}}}" output: [ArgumentOpen(), Text(text="foo"), ArgumentSeparator(), Text(text="bar"), ArgumentClose()] --- name: blank_with_multiple_defaults label: no content, multiple pipes input: "{{{|||}}}" output: [ArgumentOpen(), ArgumentSeparator(), Text(text="||"), ArgumentClose()] --- name: multiple_defaults label: multiple values separated by pipes input: "{{{foo|bar|baz}}}" output: [ArgumentOpen(), Text(text="foo"), ArgumentSeparator(), Text(text="bar|baz"), ArgumentClose()] --- name: newline label: newline as only content input: "{{{\n}}}" output: [ArgumentOpen(), Text(text="\n"), ArgumentClose()] --- name: right_braces label: multiple } scattered throughout text input: "{{{foo}b}a}r}}}" output: [ArgumentOpen(), Text(text="foo}b}a}r"), ArgumentClose()] --- name: right_braces_default label: multiple } scattered throughout text, with a default value input: "{{{foo}b}|}a}r}}}" output: [ArgumentOpen(), Text(text="foo}b}"), ArgumentSeparator(), Text(text="}a}r"), ArgumentClose()] --- name: nested label: an argument nested within another argument input: "{{{{{{foo}}}|{{{bar}}}}}}" output: [ArgumentOpen(), ArgumentOpen(), Text(text="foo"), ArgumentClose(), ArgumentSeparator(), ArgumentOpen(), Text(text="bar"), ArgumentClose(), ArgumentClose()] --- name: invalid_braces label: invalid argument: multiple braces that are not part of a template or argument input: "{{{foo{{[a}}}}}" output: [Text(text="{{{foo{{[a}}}}}")] --- name: incomplete_open_only label: incomplete arguments: just an open input: "{{{" output: [Text(text="{{{")] --- name: incomplete_open_text label: incomplete arguments: an open with some text input: "{{{foo" output: [Text(text="{{{foo")] --- name: incomplete_open_text_pipe label: incomplete arguments: an open, text, then a pipe input: "{{{foo|" output: [Text(text="{{{foo|")] --- name: incomplete_open_pipe label: incomplete arguments: an open, then a pipe input: "{{{|" output: [Text(text="{{{|")] --- name: incomplete_open_pipe_text label: incomplete arguments: an open, then a pipe, then text input: "{{{|foo" output: [Text(text="{{{|foo")] --- name: incomplete_open_pipes_text label: incomplete arguments: a pipe, then text then two pipes input: "{{{|f||" output: [Text(text="{{{|f||")] --- name: incomplete_open_partial_close label: incomplete arguments: an open, then one right brace input: "{{{{}" output: [Text(text="{{{{}")] --- name: incomplete_preserve_previous label: incomplete arguments: a valid argument followed by an invalid one input: "{{{foo}}} {{{bar" output: [ArgumentOpen(), Text(text="foo"), ArgumentClose(), Text(text=" {{{bar")] mwparserfromhell-0.6.3/tests/tokenizer/comments.mwtest000066400000000000000000000020041411406531600233540ustar00rootroot00000000000000name: blank label: a blank comment input: "" output: [CommentStart(), CommentEnd()] --- name: basic label: a basic comment input: "" output: [CommentStart(), Text(text=" comment "), CommentEnd()] --- name: tons_of_nonsense label: a comment with tons of ignorable garbage in it input: "" output: [CommentStart(), Text(text=" foo{{bar}}[[basé\n\n]{}{}{}{}]{{{{{{haha{{--a>aabsp;" output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(text="bsp;")] --- name: rich_tags label: a HTML tag with tons of other things in it input: "{{dubious claim}}[[Source]]" output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()] --- name: wildcard label: a wildcard assortment of various things input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] --- name: wildcard_redux label: an even wilder assortment of various things input: "{{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}]]{{i|j= }}" output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="b"), TemplateParamSeparator(), TemplateOpen(), Text(text="c"), TemplateParamSeparator(), WikilinkOpen(), Text(text="d"), WikilinkClose(), ArgumentOpen(), Text(text="e"), ArgumentClose(), TemplateClose(), TemplateClose(), WikilinkOpen(), Text(text="f"), WikilinkSeparator(), ArgumentOpen(), Text(text="g"), ArgumentClose(), CommentStart(), Text(text="h"), CommentEnd(), WikilinkClose(), TemplateOpen(), Text(text="i"), TemplateParamSeparator(), Text(text="j"), TemplateParamEquals(), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), TemplateClose()] --- name: link_inside_dl label: an external link inside a def list, such that the external link is parsed input: ";;;mailto:example" output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), ExternalLinkOpen(brackets=False), Text(text="mailto:example"), ExternalLinkClose()] --- name: link_inside_dl_2 label: an external link inside a def list, such that the external link is not parsed input: ";;;malito:example" output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="malito"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="example")] --- name: link_inside_template label: an external link nested inside a template, before the end input: "{{URL|http://example.com}}" output: [TemplateOpen(), Text(text="URL"), TemplateParamSeparator(), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), TemplateClose()] --- name: link_inside_template_2 label: an external link nested inside a template, before a separator input: "{{URL|http://example.com|foobar}}" output: [TemplateOpen(), Text(text="URL"), TemplateParamSeparator(), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), TemplateParamSeparator(), Text(text="foobar"), TemplateClose()] --- name: link_inside_template_3 label: an external link nested inside a template, before an equal sign input: "{{URL|http://example.com=foobar}}" output: [TemplateOpen(), Text(text="URL"), TemplateParamSeparator(), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), TemplateParamEquals(), Text(text="foobar"), TemplateClose()] --- name: link_inside_argument label: an external link nested inside an argument input: "{{{URL|http://example.com}}}" output: [ArgumentOpen(), Text(text="URL"), ArgumentSeparator(), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), ArgumentClose()] --- name: link_inside_heading label: an external link nested inside a heading input: "==http://example.com==" output: [HeadingStart(level=2), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), HeadingEnd()] --- name: link_inside_tag_body label: an external link nested inside the body of a tag input: "http://example.com" output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=""), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()] --- name: link_inside_tag_style label: an external link nested inside style tags input: "''http://example.com''" output: [TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), TagOpenClose(), Text(text="i"), TagCloseClose()] --- name: style_tag_inside_link label: style tags disrupting an external link input: "http://example.com/foo''bar''" output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com/foo"), ExternalLinkClose(), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="bar"), TagOpenClose(), Text(text="i"), TagCloseClose()] --- name: comment_inside_link label: an HTML comment inside an external link input: "http://example.com/foobar" output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com/foo"), CommentStart(), Text(text="comment"), CommentEnd(), Text(text="bar"), ExternalLinkClose()] --- name: bracketed_link_inside_template label: a bracketed external link nested inside a template, before the end input: "{{URL|[http://example.com}}]" output: [Text(text="{{URL|"), ExternalLinkOpen(brackets=True), Text(text="http://example.com}}"), ExternalLinkClose()] --- name: comment_inside_bracketed_link label: an HTML comment inside a bracketed external link input: "[http://example.com/foobar]" output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), CommentStart(), Text(text="comment"), CommentEnd(), Text(text="bar"), ExternalLinkClose()] --- name: wikilink_inside_external_link label: a wikilink inside an external link, which the parser considers valid (see issue #61) input: "[http://example.com/foo Foo [[Bar]]]" output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), ExternalLinkSeparator(), Text(text="Foo "), WikilinkOpen(), Text(text="Bar"), WikilinkClose(), ExternalLinkClose()] --- name: external_link_inside_wikilink label: an external link inside a wikilink, valid in the case of images (see issue #62) input: "[[File:Example.png|thumb|http://example.com]]" output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), WikilinkClose()] --- name: external_link_inside_wikilink_brackets label: an external link with brackets inside a wikilink input: "[[File:Example.png|thumb|[http://example.com Example]]]" output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose(), WikilinkClose()] --- name: external_link_inside_wikilink_title label: an external link inside a wikilink title, which is not parsed input: "[[File:Example.png http://example.com]]" output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()] --- name: italics_inside_external_link_inside_incomplete_list label: italic text inside an external link inside an incomplete list input: "
  • [http://www.example.com ''example'']" output: [TagOpenOpen(), Text(text="li"), TagCloseSelfclose(padding="", implicit=True), ExternalLinkOpen(brackets=True), Text(text="http://www.example.com"), ExternalLinkSeparator(), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="example"), TagOpenClose(), Text(text="i"), TagCloseClose(), ExternalLinkClose()] --- name: nodes_inside_external_link_after_punct label: various complex nodes inside an external link following punctuation input: "http://example.com/foo.{{bar}}baz.&biz;bingo" output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com/foo."), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz.&biz;"), CommentStart(), Text(text="hello"), CommentEnd(), Text(text="bingo"), ExternalLinkClose()] --- name: newline_and_comment_in_template_name label: a template name containing a newline followed by a comment input: "{{foobar\n}}" output: [TemplateOpen(), Text(text="foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), TemplateClose()] --- name: newline_and_comment_in_template_name_2 label: a template name containing a newline followed by a comment input: "{{foobar\n|key=value}}" output: [TemplateOpen(), Text(text="foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), TemplateParamSeparator(), Text(text="key"), TemplateParamEquals(), Text(text="value"), TemplateClose()] --- name: newline_and_comment_in_template_name_3 label: a template name containing a newline followed by a comment input: "{{foobar\n\n|key=value}}" output: [TemplateOpen(), Text(text="foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\n"), TemplateParamSeparator(), Text(text="key"), TemplateParamEquals(), Text(text="value"), TemplateClose()] --- name: newline_and_comment_in_template_name_4 label: a template name containing a newline followed by a comment input: "{{foobar\ninvalid|key=value}}" output: [Text(text="{{foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="invalid|key=value}}")] --- name: newline_and_comment_in_template_name_5 label: a template name containing a newline followed by a comment input: "{{foobar\n\ninvalid|key=value}}" output: [Text(text="{{foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\ninvalid|key=value}}")] --- name: newline_and_comment_in_template_name_6 label: a template name containing a newline followed by a comment input: "{{foobar\n\nfoobar\n}}" output: [TemplateOpen(), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\nfoobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), TemplateClose()] --- name: tag_in_link_title label: HTML tags are invalid in link titles, even when complete input: "[[foobarbaz]]" output: [Text(text="[[foo"), TagOpenOpen(), Text(text="i"), TagCloseOpen(padding=""), Text(text="bar"), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text="baz]]")] --- name: tag_in_template_name label: HTML tags are invalid in template names, even when complete input: "{{foobarbaz}}" output: [Text(text="{{foo"), TagOpenOpen(), Text(text="i"), TagCloseOpen(padding=""), Text(text="bar"), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text="baz}}")] --- name: tag_in_link_text label: HTML tags are valid in link text input: "[[foo|barbaz]]" output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), TagOpenOpen(), Text(text="i"), TagCloseOpen(padding=""), Text(text="bar"), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text="baz"), WikilinkClose()] --- name: comment_in_link_title label: comments are valid in link titles input: "[[foobaz]]" output: [WikilinkOpen(), Text(text="foo"), CommentStart(), Text(text="bar"), CommentEnd(), Text(text="baz"), WikilinkClose()] --- name: incomplete_comment_in_link_title label: incomplete comments are invalid in link titles input: "[[foo