pax_global_header00006660000000000000000000000064145716022160014516gustar00rootroot0000000000000052 comment=b24840900c42a5d81259afcacfc2e26189ac1458 utidylib-0.10/000077500000000000000000000000001457160221600132635ustar00rootroot00000000000000utidylib-0.10/.github/000077500000000000000000000000001457160221600146235ustar00rootroot00000000000000utidylib-0.10/.github/FUNDING.yml000066400000000000000000000000561457160221600164410ustar00rootroot00000000000000github: [nijel] liberapay: nijel polar: nijel utidylib-0.10/.github/dependabot.yml000066400000000000000000000005451457160221600174570ustar00rootroot00000000000000# This file is generated in https://github.com/WeblateOrg/meta/ version: 2 updates: - package-ecosystem: github-actions directory: / schedule: interval: daily labels: - dependencies - automerge - package-ecosystem: pip directory: / schedule: interval: daily labels: - dependencies - automerge utidylib-0.10/.github/workflows/000077500000000000000000000000001457160221600166605ustar00rootroot00000000000000utidylib-0.10/.github/workflows/pre-commit.yml000066400000000000000000000014011457160221600214530ustar00rootroot00000000000000name: Pre-commit check on: push: branches-ignore: - dependabot/** - weblate pull_request: jobs: pre-commit: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: | ~/.cache/pip ~/.cache/pre-commit key: ${{ runner.os }}-pre-commit-${{ hashFiles('**/requirements*.txt') }}-${{ hashFiles('.pre-commit-config.yaml') }} - name: Setup Python uses: actions/setup-python@v5 with: python-version: 3.9 - name: Install dependencies run: | python -m pip install --upgrade pip wheel pip install -r requirements-lint.txt - name: pre-commit run: pre-commit run --all utidylib-0.10/.github/workflows/setup.yml000066400000000000000000000016341457160221600205470ustar00rootroot00000000000000name: Distribution on: [push, pull_request] jobs: setup: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-setup-${{ hashFiles('**/requirements*.txt') }} - name: Setup Python uses: actions/setup-python@v5 with: python-version: 3.12 - name: Install dependencies run: | python -m pip install --upgrade pip wheel pip install -r requirements-test.txt - name: build run: python ./setup.py sdist bdist_wheel - name: twine run: twine check dist/* - name: Publish package if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@v1.8.12 with: user: __token__ password: ${{ secrets.pypi_password }} utidylib-0.10/.github/workflows/test.yml000066400000000000000000000036701457160221600203700ustar00rootroot00000000000000name: Test on: [push, pull_request] jobs: test: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: - ubuntu-22.04 - macos-latest python-version: - '3.8' - '3.9' - '3.10' - '3.11' - '3.12' tidy-version: - 5.6.0 include: - os: ubuntu-22.04 python-version: '3.11' tidy-version: 5.8.0 - os: ubuntu-20.04 python-version: '3.11' tidy-version: os name: ${{ matrix.os }}, Python ${{ matrix.python-version }}, tidy ${{ matrix.tidy-version }} steps: - uses: actions/checkout@v4 - name: Cache pip uses: actions/cache@v4 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/requirements*.txt') }} - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install tidy run: ./scripts/install-tidy.sh ${{ matrix.tidy-version }} - name: Install pip dependencies run: | python -m pip install --upgrade pip wheel pip install -r requirements.txt -r requirements-test.txt - name: Install editable run: pip install -e . - name: Check used library run: python -c 'import tidy.lib; print(tidy.lib._tidy.lib._name)' - name: Check used library version run: python -c 'import tidy.lib; print(tidy.lib.getTidyVersion())' - name: Test run: | py.test --cov=tidy tidy - name: Coverage run: | coverage xml - uses: codecov/codecov-action@v4 with: token: ${{secrets.CODECOV_TOKEN}} flags: unittests name: Python ${{ matrix.python-version }}, ${{ matrix.os }}, tidy ${{ matrix.tidy-version }} utidylib-0.10/.gitignore000066400000000000000000000001421457160221600152500ustar00rootroot00000000000000*.swp *.pyc /build/ /dist/ /.venv* /uTidylib.egg-info/ .coverage /.cache/ /.pytest_cache/ /.idea/ utidylib-0.10/.pre-commit-config.yaml000066400000000000000000000022521457160221600175450ustar00rootroot00000000000000# See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - id: check-toml - id: requirements-txt-fixer - id: check-merge-conflict - id: debug-statements - id: mixed-line-ending args: [--fix=lf] - repo: https://github.com/adrienverge/yamllint rev: v1.29.0 hooks: - id: yamllint - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.3.0 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - id: ruff-format - repo: https://github.com/asottile/blacken-docs rev: 1.13.0 hooks: - id: blacken-docs - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks rev: v2.12.0 hooks: - id: pretty-format-toml args: [--autofix] - id: pretty-format-yaml args: [--autofix, --indent, '2', --offset, '2'] - repo: meta hooks: - id: check-hooks-apply - id: check-useless-excludes exclude: ^docs/make.bat$ utidylib-0.10/.readthedocs.yml000066400000000000000000000010501457160221600163450ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py # Optionally build your docs in additional formats such as PDF and ePub formats: all build: os: ubuntu-22.04 tools: python: '3.11' apt_packages: - libtidy5deb1 # Optionally set the version of Python and requirements required to build your docs python: install: - requirements: docs/requirements.txt utidylib-0.10/.yamllint.yml000066400000000000000000000001401457160221600157100ustar00rootroot00000000000000extends: default rules: line-length: max: 500 level: error document-start: disable utidylib-0.10/CHANGES.rst000066400000000000000000000017561457160221600150760ustar00rootroot00000000000000Changes ======= 0.10 ---- * Dropped support for Python 3.7. * Added support for Python 3.12. * Added type hints. * Improved documentation. * Always call CleanAndRepair after parsing. * Fixed handling char_encoding argument. 0.9 --- * Dropped support for Python 3.6. * Added support for Python 3.10 and 3.11. * Compatibility with html-tidy 5.8.0. * Added support for specifying library full path using TIDY_LIBRARY_FULL_PATH. * Added getTidyVersion to get libtidy version. 0.8 --- * Code cleanups. * Fixed typo in 0.7 release notes. 0.7 --- * Dropped support for Python 2. 0.6 --- * First official release PyPI. 0.5 --- * Fixed compatibility with Debian patched libtidy5deb1. 0.4 --- * Compatibility with html-tidy 5.6.0. * Added support for Python 3. 0.3 --- * Initial release under new maintainer. * Incorporated Debian patches. * Various compatiblity fixes (eg. with 64-bit machines). * Various code cleanups. * New test suite. * New documentation. * Support for new HTML 5 tidy library. utidylib-0.10/LICENSE000066400000000000000000000022061457160221600142700ustar00rootroot00000000000000The MIT License Copyright (c) 2003 Cory Dodt Copyright (c) 2014-2018 Michal Čihař Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. utidylib-0.10/MANIFEST.in000066400000000000000000000004161457160221600150220ustar00rootroot00000000000000include LICENSE include *.py include scripts/* include README.* include CHANGES.* include MANIFEST.in include docs/conf.py include docs/Makefile include docs/make.bat include pytest.ini include requirements.txt include requirements-test.txt recursive-include docs *.rst utidylib-0.10/README.rst000066400000000000000000000034611457160221600147560ustar00rootroot00000000000000uTidylib ======== .. image:: https://github.com/nijel/utidylib/actions/workflows/test.yml/badge.svg :target: https://github.com/nijel/utidylib/actions/workflows/test.yml :alt: Build Status .. image:: https://codecov.io/gh/nijel/utidylib/branch/master/graph/badge.svg :target: https://codecov.io/gh/nijel/utidylib :alt: Coverage Status .. image:: https://readthedocs.org/projects/utidylib/badge/?version=latest :target: http://utidylib.readthedocs.org/en/latest/ :alt: Documentation .. image:: https://img.shields.io/pypi/v/uTidylib :target: https://pypi.org/project/uTidylib/ :alt: PyPI - Version This is uTidylib, the Python wrapper for the HTML cleaning library named TidyLib. It supports both original Tidy and new HTML5 enabled Tidy . The package is available on PyPI . Once installed, there are two ways to get help. The simplest is: .. code-block:: sh $ python >>> import tidy >>> help(tidy) . . . Then, of course, there's the API documentation, which is available at . 10 Second Tutorial ------------------ .. code-block:: pycon >>> import tidy >>> print( ... tidy.parseString( ... "Hello Tidy!", ... output_xhtml=1, ... add_xml_decl=1, ... indent=1, ... tidy_mark=0, ... doctype="transitional", ... ) ... ) Hello Tidy! Good luck! utidylib-0.10/docs/000077500000000000000000000000001457160221600142135ustar00rootroot00000000000000utidylib-0.10/docs/.gitignore000066400000000000000000000000071457160221600162000ustar00rootroot00000000000000_build utidylib-0.10/docs/Makefile000066400000000000000000000151621457160221600156600ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/uTidylib.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/uTidylib.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/uTidylib" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/uTidylib" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." utidylib-0.10/docs/conf.py000066400000000000000000000057661457160221600155300ustar00rootroot00000000000000# # uTidylib documentation build configuration file, created by # sphinx-quickstart on Wed Aug 13 11:25:46 2014. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import os import sys # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("..")) # -- General configuration ------------------------------------------------ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", ] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix of source filenames. source_suffix = ".rst" # The master toctree document. master_doc = "index" # General information about the project. project = "uTidylib" copyright = "uTidylib contributors" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = "0.10" # The full version, including alpha/beta/rc tags. release = version # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ["_build"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "furo" # Output file base name for HTML help builder. htmlhelp_basename = "uTidylibdoc" # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ ( "index", "uTidylib.tex", "uTidylib Documentation", "uTidylib contributors", "manual", ), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ("index", "utidylib", "uTidylib Documentation", ["uTidylib contributors"], 1), ] # -- Options for Epub output ---------------------------------------------- # Bibliographic Dublin Core info. epub_title = "uTidylib" epub_author = "uTidylib contributors" epub_publisher = "uTidylib contributors" epub_copyright = copyright # A list of files that should not be packed into the epub file. epub_exclude_files = ["search.html"] utidylib-0.10/docs/index.rst000066400000000000000000000031671457160221600160630ustar00rootroot00000000000000Welcome to uTidylib's documentation! ==================================== .. automodule:: tidy .. autofunction:: parse .. autofunction:: parseString .. autoclass:: Document :members: .. autoclass:: ReportItem :members: .. autoexception:: TidyLibError .. autoexception:: InvalidOptionError .. autoexception:: OptionArgError Installing ========== To use uTidylib, you need to have HTML tidy library installed. Check for instructions how to obtain it. Once you have installed the library, install uTidylib: .. code-block:: sh pip install uTidylib Contributing ============ You are welcome to contribute on GitHub, we use it for source code management, issue tracking and patches submission, see . Running testsuite ================= The testsuite can be exececuted using pytest: .. code-block:: sh pytest tidy Building documentation ====================== To build the doc, just run: .. code-block:: sh make -C docs html This requires that you have Sphinx installed. The API documentation will be built in the :file:`docs/_build/html/` directory. License ======= .. include:: ../LICENSE .. include:: ../CHANGES.rst History ======= This is fork of the original uTidylib with permission with original author. Originally it incorporated patches from Debian and other distributions, now it also brings compatibility with recent html-tidy versions and works with Python 3. The original source code is still available at https://github.com/xdissent/utidylib/. Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` utidylib-0.10/docs/make.bat000066400000000000000000000150611457160221600156230ustar00rootroot00000000000000@ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=_build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . set I18NSPHINXOPTS=%SPHINXOPTS% . if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. xml to make Docutils-native XML files echo. pseudoxml to make pseudoxml-XML files for display purposes echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) %SPHINXBUILD% 2> nul if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\uTidylib.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\uTidylib.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdf" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdfja" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf-ja cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) if "%1" == "xml" ( %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml if errorlevel 1 exit /b 1 echo. echo.Build finished. The XML files are in %BUILDDIR%/xml. goto end ) if "%1" == "pseudoxml" ( %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml if errorlevel 1 exit /b 1 echo. echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. goto end ) :end utidylib-0.10/docs/requirements.txt000066400000000000000000000000361457160221600174760ustar00rootroot00000000000000furo==2024.1.29 Sphinx==7.2.6 utidylib-0.10/pyproject.toml000066400000000000000000000014271457160221600162030ustar00rootroot00000000000000[tool.ruff] target-version = "py38" [tool.ruff.lint] extend-safe-fixes = [ "ANN", "D", "TCH", "FLY", "RUF005", "SIM", "UP" ] ignore = [ "PT", # CONFIG: Not using pytest "D203", # CONFIG: incompatible with D211 "D212", # CONFIG: incompatible with D213 "COM812", # CONFIG: incompatible with formatter "ISC001", # CONFIG: incompatible with formatter "ANN101", # TODO: Missing type annotation for `self` in method "S101", # TODO: Use of `assert` detected "D100", "D101", "D102", "D103", "D105", "D107", "PTH100", # TODO: Use pathlib "PTH118", # TODO: Use pathlib "PTH120", # TODO: Use pathlib "EM101", "TRY003" ] select = ["ALL"] [tool.ruff.lint.per-file-ignores] "docs/conf.py" = ["INP001", "A001"] "tidy/lib.py" = ["N802", "N816"] utidylib-0.10/pytest.ini000066400000000000000000000000451457160221600153130ustar00rootroot00000000000000[pytest] addopts = --doctest-modules utidylib-0.10/requirements-lint.txt000066400000000000000000000000301457160221600175040ustar00rootroot00000000000000pre-commit==3.5.0 twine utidylib-0.10/requirements-test.txt000066400000000000000000000000771457160221600175300ustar00rootroot00000000000000-r requirements-lint.txt -r requirements.txt pytest pytest-cov utidylib-0.10/requirements.txt000066400000000000000000000000001457160221600165350ustar00rootroot00000000000000utidylib-0.10/scripts/000077500000000000000000000000001457160221600147525ustar00rootroot00000000000000utidylib-0.10/scripts/create-release000077500000000000000000000013161457160221600175620ustar00rootroot00000000000000#!/bin/sh set -e if [ "x$1" = "x--help" -o "x$1" = "x-h" ] ; then echo "Usage: ./scripts/create-release [--tag]" exit 1 fi if ! git diff --exit-code --quiet ; then echo "There are not committed changes!" exit 1 fi # Grab version version=`python -c 'import tidy; print(tidy.__version__)'` # What are we going to build? if [ "x$1" = "x--tag" ] ; then git tag -s v$version -m "Version $version" fi # Create tarball ./setup.py sdist --formats bztar,gztar,zip # Optionally upload if [ "x$1" = "x--tag" ] ; then twine upload --sign --identity 63CB1DF1EF12CF2AC0EE5A329C27B31342B7511D dist/uTidylib-$version.tar.gz scp dist/uTidylib-$version.* web:/home/nijel/srv/dl.cihar.com/utidylib/ fi utidylib-0.10/scripts/install-tidy.sh000077500000000000000000000011211457160221600177210ustar00rootroot00000000000000#!/bin/sh set -e if [ -z "$1" ] ; then echo "Usage: install-tidy.sh VERSION" exit 1 fi CMAKE_ARGS="" if which apt-get ; then if [ "$1" = "os" ] ; then sudo apt-get install -y libtidy5deb1 exit 0 else sudo apt-get purge libtidy5deb1 tidy libtidy-dev fi CMAKE_ARGS="-DCMAKE_INSTALL_PREFIX=/usr" fi wget -O tidy.tar.gz https://github.com/htacg/tidy-html5/archive/$1.tar.gz mkdir tidy-source tar xvf tidy.tar.gz --strip-components=1 -C tidy-source rm tidy.tar.gz cd tidy-source/build/cmake/ cmake ../.. -DCMAKE_BUILD_TYPE=Release $CMAKE_ARGS make sudo make install utidylib-0.10/setup.cfg000066400000000000000000000034111457160221600151030ustar00rootroot00000000000000[metadata] name = uTidylib version = attr: tidy.__version__ author = Michal Čihař author_email = michal@cihar.com license = MIT description = Wrapper for HTML Tidy url = https://github.com/nijel/utidylib project_urls = Issue Tracker=https://github.com/nijel/utidylib/issues Documentation=https://utidylib.readthedocs.io/ Source Code=https://github.com/nijel/utidylib Funding=https://liberapay.com/nijel download_url = https://github.com/nijel/utidylib long_description = file: README.rst long_description_content_type = text/x-rst classifiers = Development Status :: 5 - Production/Stable Topic :: Internet License :: OSI Approved :: MIT License Intended Audience :: Developers Environment :: Web Environment Programming Language :: Python Programming Language :: Python :: 3 Programming Language :: Python :: 3 :: Only Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 Programming Language :: Python :: 3.12 [options] packages = tidy python_requires = >=3.8 include_package_data = 1 [options.package_data] tidy = test_data/*.html [flake8] max-complexity = 16 select = E,W1,W2,W3,W504,W505,W6 enable-extensions = B,C,D,F,G,I,M,N,R,SF # Should be fixed: # D10 - we are missing many docstrings # D20* - wrong docstring formatting # D40* - many strings need rephrasing ignore = D10,D200,D202,D204,D205,D209,D400,D401,SF01,N816,N802 exclude = .git,.venv*,build max-line-length = 88 [pycodestyle] select = E,W1,W2,W3,W504,W505,W6 exclude = .git,.venv*,build max-line-length = 88 [isort] multi_line_output = 3 include_trailing_comma = True force_grid_wrap = 0 use_parentheses = True line_length = 88 known_first_party = tidy utidylib-0.10/setup.py000077500000000000000000000001461457160221600150010ustar00rootroot00000000000000#! /usr/bin/env python """Setup file for easy installation.""" from setuptools import setup setup() utidylib-0.10/tidy/000077500000000000000000000000001457160221600142345ustar00rootroot00000000000000utidylib-0.10/tidy/__init__.py000066400000000000000000000032511457160221600163460ustar00rootroot00000000000000""" The Tidy wrapper. I am the main interface to TidyLib. This package supports processing HTML with Tidy, with all the options that the tidy command line supports. For more information on the tidy options, see the reference. These options can be given as keyword arguments to parse and parseString, by changing dashes (-) to underscores(_). For example: >>> import tidy >>> from __future__ import print_function >>> print(tidy.parseString( ... 'Hello Tidy!', ... output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0, ... doctype='transitional' ... )) Hello Tidy! For options like newline and output_encoding, which must be set to one of a fixed number of choices, you can provide either the numeric or string version of the choice; so both tidy.parseString('foo', newline=2) and tidy.parseString('foo', newline='CR') do the same thing. There are no plans to support other features of TidyLib, such as document-tree traversal, since Python has several quality DOM implementations. (The author uses Twisted's implementation, twisted.web.microdom). """ from tidy.error import InvalidOptionError, OptionArgError, TidyLibError from tidy.lib import Document, ReportItem, parse, parseString __all__ = [ "error", "lib", "parse", "parseString", "Document", "ReportItem", "TidyLibError", "InvalidOptionError", "OptionArgError", ] __version__ = "0.10" utidylib-0.10/tidy/error.py000066400000000000000000000007161457160221600157430ustar00rootroot00000000000000"""Exceptions for uTidylib.""" from __future__ import annotations __all__ = ("TidyLibError", "InvalidOptionError", "OptionArgError") class TidyLibError(Exception): """Generic Tidy exception.""" class InvalidOptionError(TidyLibError): """Exception for invalid option.""" def __str__(self) -> str: return "%s was not a valid Tidy option." % (self.args[0]) class OptionArgError(TidyLibError): """Exception for invalid parameter.""" utidylib-0.10/tidy/lib.py000066400000000000000000000255201457160221600153600ustar00rootroot00000000000000from __future__ import annotations import ctypes import io import os import os.path import weakref from abc import ABC, abstractmethod from errno import ENOMEM from typing import ( TYPE_CHECKING, Any, BinaryIO, Callable, ClassVar, Mapping, TypeVar, ) from tidy.error import InvalidOptionError, OptionArgError if TYPE_CHECKING: OPTION_TYPE = str | int | bool | None OPTION_DICT_TYPE = dict[str, OPTION_TYPE] LIBNAMES = ( # Linux "libtidy.so", # MacOS "libtidy.dylib", # Windows "tidy", # Cygwin "cygtidy-0-99-0", # Linux, full soname "libtidy-0.99.so.0", # Linux, full soname "libtidy-0.99.so.0.0.0", # HTML tidy "libtidy.so.5", # Linux, HTML tidy v5.8 "libtidy.so.58", # Debian changed soname "libtidy.so.5deb1", # Windows? "libtidy", # Windows? "tidylib", ) class Loader: """ ctypes.CDLL wrapper. I am a trivial wrapper that eliminates the need for tidy.tidyFoo, so you can just access tidy.Foo. """ def __init__(self, libnames: tuple[str, ...] | None = None) -> None: self.lib: ctypes.CDLL self.libnames: tuple[str, ...] = libnames or LIBNAMES # Add package directory to search path os.environ["PATH"] = "".join( (os.path.dirname(__file__), os.pathsep, os.environ["PATH"]), ) # Add full path to a library lib_path = os.environ.get("TIDY_LIBRARY_FULL_PATH") if lib_path: self.libnames = (lib_path, *self.libnames) # Try loading library for libname in self.libnames: try: self.lib = ctypes.CDLL(libname) break except OSError: continue else: # Fail in case we could not load it raise OSError("Couldn't find libtidy, please make sure it is installed.") # Adjust some types self.Create.restype = ctypes.POINTER(ctypes.c_void_p) self.LibraryVersion.restype = ctypes.c_char_p def __getattr__(self, name: str) -> Any: # noqa: ANN401 return getattr(self.lib, "tidy%s" % name) _tidy = Loader() _putByteFunction = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_char) # define a callback to pass to Tidylib @_putByteFunction def putByte(handle: int, char: int) -> int: """Lookup sink by handle and call its putByte method.""" sinkfactory[handle].putByte(char) return 0 class _OutputSink(ctypes.Structure): _fields_ = (("sinkData", ctypes.c_int), ("putByte", _putByteFunction)) class _Sink: def __init__(self, handle: int) -> None: self._data = io.BytesIO() self.struct = _OutputSink() self.struct.putByte = putByte self.handle = handle def putByte(self, byte: bytes) -> None: self._data.write(byte) def getvalue(self) -> bytes: return self._data.getvalue() class ReportItem: """Error report item as returned by tidy.""" severities: ClassVar[dict[str, str]] = { "W": "Warning", "E": "Error", "C": "Config", "D": "Document", } def __init__(self, err: str) -> None: self.err: str = err #: Whole error message as returned by tidy self.full_severity: str #: Full severity string self.severity: str #: D, W, E or C indicating severity self.message: str #: Error message itself self.line: int | None #: Line where error was fired (can be None) self.col: int | None #: Column where error was fired (can be None) # Parses: # line column - (Error|Warning): # It might be also useful to gnu-emacs reporting mode if err.startswith("line"): tokens = err.split(" ", 6) self.full_severity = tokens[5] self.severity = tokens[5][0] # W, E or C self.line = int(tokens[1]) self.col = int(tokens[3]) self.message = tokens[6] else: tokens = err.split(" ", 1) self.full_severity = tokens[0] self.severity = tokens[0][0] self.message = tokens[1] self.line = None self.col = None def get_severity(self) -> str: try: return self.severities[self.severity] except KeyError: return self.full_severity.strip().rstrip(":") def __str__(self) -> str: if self.line: return "line {} col {} - {}: {}".format( self.line, self.col, self.get_severity(), self.message, ) return f"{self.get_severity()}: {self.message}" def __repr__(self) -> str: return "{}('{}')".format(self.__class__.__name__, str(self).replace("'", "\\'")) K = TypeVar("K") V = TypeVar("V") class FactoryDict(ABC, dict, Mapping[K, V]): """ Custom dict wrapper. I am a dict with a create method and no __setitem__. This allows me to control my own keys. """ @abstractmethod def create(self) -> V: """Generate a new item.""" raise NotImplementedError def _setitem(self, name: K, value: V) -> None: dict.__setitem__(self, name, value) def __setitem__(self, _: K, __: V) -> None: raise TypeError("Use create() to get a new object") class SinkFactory(FactoryDict[int, _Sink]): """Mapping for lookup of sinks by handle.""" def __init__(self) -> None: super().__init__() self.lastsink: int = 0 def create(self) -> _Sink: sink = _Sink(self.lastsink) sink.struct.sinkData = self.lastsink FactoryDict._setitem(self, self.lastsink, sink) # noqa: SLF001 self.lastsink = self.lastsink + 1 return sink sinkfactory = SinkFactory() class Document: """Document object as returned by :func:`parseString` or :func:`parse`.""" def __init__(self, options: OPTION_DICT_TYPE) -> None: self.cdoc = _tidy.Create() self.options = options self.errsink = sinkfactory.create() _tidy.SetErrorSink(self.cdoc, ctypes.byref(self.errsink.struct)) self._set_options() def _set_options(self) -> None: for key, value in self.options.items(): # this will flush out most argument type errors... if value is None: value = "" # noqa: PLW2901 if isinstance(value, bool): value = int(value) # noqa: PLW2901 _tidy.OptParseValue( self.cdoc, key.replace("_", "-").encode("utf-8"), str(value).encode("utf-8"), ) if self.errors: for error in ERROR_MAP: if self.errors[-1].message.startswith(error): raise ERROR_MAP[error](self.errors[-1].message) def __del__(self) -> None: del sinkfactory[self.errsink.handle] def write(self, stream: BinaryIO) -> None: """ :param stream: Writable file like object. Writes document to the stream. """ stream.write(self.getvalue()) def get_errors(self) -> list[ReportItem]: """Return list of errors as a list of :class:`ReportItem`.""" ret = [] for line in self.errsink.getvalue().decode("utf-8").splitlines(): line = line.strip() # noqa: PLW2901 if line: ret.append(ReportItem(line)) return ret @property def errors(self) -> list[ReportItem]: return self.get_errors() def getvalue(self) -> bytes: """Raw string as returned by tidy.""" stlen = ctypes.c_int(8192) string_buffer = ctypes.create_string_buffer(stlen.value) result = _tidy.SaveString(self.cdoc, string_buffer, ctypes.byref(stlen)) if result == -ENOMEM: # buffer too small string_buffer = ctypes.create_string_buffer(stlen.value) _tidy.SaveString(self.cdoc, string_buffer, ctypes.byref(stlen)) return string_buffer.value def gettext(self) -> str: """Unicode text for output returned by tidy.""" output_encoding = self.options["output_encoding"] assert isinstance(output_encoding, str) return self.getvalue().decode(output_encoding) def __str__(self) -> str: return self.gettext() ERROR_MAP = { "missing or malformed argument for option: ": OptionArgError, "unknown option: ": InvalidOptionError, } class DocumentFactory(FactoryDict[weakref.ReferenceType, Document]): @staticmethod def load( doc: Document, arg: bytes, loader: Callable[[Document, bytes], int], ) -> None: status = loader(doc.cdoc, arg) if status >= 0: _tidy.CleanAndRepair(doc.cdoc) def loadFile(self, doc: Document, filename: str) -> None: self.load(doc, filename.encode("utf-8"), _tidy.ParseFile) def loadString(self, doc: Document, text: bytes) -> None: self.load(doc, text, _tidy.ParseString) def create(self, **kwargs: OPTION_TYPE) -> Document: enc = kwargs.get("char_encoding", "utf8") if "output_encoding" not in kwargs: kwargs["output_encoding"] = enc if "input_encoding" not in kwargs: kwargs["input_encoding"] = enc doc = Document(kwargs) ref = weakref.ref(doc, self.releaseDoc) FactoryDict._setitem(self, ref, doc.cdoc) # noqa: SLF001 return doc def parse(self, filename: str, **kwargs: OPTION_TYPE) -> Document: """ Open and process filename as an HTML file. Returning a processed document object. :param kwargs: named options to pass to TidyLib for processing the input file. :param filename: the name of a file to process :return: a :class:`Document` object """ doc = self.create(**kwargs) self.loadFile(doc, filename) return doc def parseString(self, text: bytes | str, **kwargs: OPTION_TYPE) -> Document: """ Use text as an HTML file. Returning a processed document object. :param kwargs: named options to pass to TidyLib for processing the input file. :param text: the string to parse :return: a :class:`Document` object """ doc = self.create(**kwargs) if isinstance(text, str): input_encoding = doc.options["input_encoding"] assert isinstance(input_encoding, str) text = text.encode(input_encoding) self.loadString(doc, text) return doc def releaseDoc(self, ref: weakref.ReferenceType) -> None: _tidy.Release(self[ref]) docfactory = DocumentFactory() parse = docfactory.parse parseString = docfactory.parseString def getTidyVersion() -> str: version = _tidy.lib.tidyLibraryVersion() assert isinstance(version, bytes) return version.decode() utidylib-0.10/tidy/py.typed000066400000000000000000000000001457160221600157210ustar00rootroot00000000000000utidylib-0.10/tidy/test_data/000077500000000000000000000000001457160221600162045ustar00rootroot00000000000000utidylib-0.10/tidy/test_data/test.html000066400000000000000000000001521457160221600200470ustar00rootroot00000000000000

woot


é utidylib-0.10/tidy/test_tidy.py000066400000000000000000000124331457160221600166210ustar00rootroot00000000000000from __future__ import annotations import io import os import pathlib import unittest import tidy import tidy.lib DATA_STORAGE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_data") class TidyTestCase(unittest.TestCase): input1 = "" input2 = "\n" + "

asdkfjhasldkfjhsldjas\n" * 100 test_file = os.path.join(DATA_STORAGE, "test.html") def default_docs(self) -> tuple[tidy.Document, tidy.Document, tidy.Document]: doc1 = tidy.parseString(self.input1) doc2 = tidy.parseString(self.input2) doc3 = tidy.parse(self.test_file, char_encoding="ascii") return (doc1, doc2, doc3) def test_bad_options(self) -> None: badopts = [{"foo": 1}] for opts in badopts: with self.assertRaisesRegex( tidy.InvalidOptionError, "not a valid Tidy option", ): tidy.parseString(self.input2, **opts) def test_bad_option_values(self) -> None: badopts: list[tidy.lib.OPTION_DICT_TYPE] = [ {"indent": "---"}, {"indent_spaces": None}, ] for opts in badopts: with self.assertRaisesRegex( tidy.OptionArgError, "missing or malformed argument", ): tidy.parseString(self.input2, **opts) def test_encodings(self) -> None: text = ( pathlib.Path(self.test_file) .read_bytes() .decode("utf8") .encode("ascii", "xmlcharrefreplace") ) doc1u = tidy.parseString(text, input_encoding="ascii", output_encoding="latin1") self.assertTrue(doc1u.getvalue().find(b"\xe9") >= 0) doc2u = tidy.parseString(text, input_encoding="ascii", output_encoding="utf8") self.assertTrue(doc2u.getvalue().find(b"\xc3\xa9") >= 0) def test_error_lines(self) -> None: for doc in self.default_docs(): self.assertEqual(doc.errors[0].line, 1) def test_nonexisting(self) -> None: os.environ.pop("IGNORE_MISSING_TIDY", None) doc = tidy.parse(os.path.join(DATA_STORAGE, "missing.html")) self.assertEqual(str(doc).strip(), "") self.assertIn("missing.html", doc.errors[0].message) if doc.errors[0].severity == "E": self.assertEqual(doc.errors[0].severity, "E") self.assertTrue(str(doc.errors[0]).startswith("Error")) else: # Tidy 5.5.19 and newer self.assertEqual(doc.errors[0].severity, "D") self.assertTrue(str(doc.errors[0]).startswith("Document")) def test_options(self) -> None: doc1 = tidy.parseString( self.input1, add_xml_decl=1, show_errors=1, newline="CR", output_xhtml=True, ) self.assertIn("CDATA", str(doc1)) doc2 = tidy.parseString( "", add_xml_decl=1, show_errors=1, newline="CR", output_xhtml=True, ) self.assertTrue(str(doc2).startswith(" None: doc1, doc2, doc3 = self.default_docs() self.assertIn("", str(doc1)) self.assertIn("", str(doc2)) self.assertIn("", doc3.gettext()) def test_big(self) -> None: text = "x" * 16384 doc = tidy.parseString(f"{text}") self.assertIn(text, str(doc)) def test_unicode(self) -> None: doc = tidy.parseString("zkouška") self.assertIn("zkouška", doc.gettext()) def test_write(self) -> None: doc = tidy.parseString(self.input1) handle = io.BytesIO() doc.write(handle) self.assertEqual(doc.getvalue(), handle.getvalue()) def test_errors(self) -> None: doc = tidy.parseString(self.input1) for error in doc.errors: self.assertTrue(str(error).startswith("line")) self.assertTrue(repr(error).startswith("ReportItem")) def test_report_item(self) -> None: item = tidy.ReportItem("Invalid: error") self.assertEqual(item.get_severity(), "Invalid") def test_missing_load(self) -> None: with self.assertRaises(OSError): tidy.lib.Loader(libnames=("not-existing-library",)) def test_lib_from_environ(self) -> None: os.environ["TIDY_LIBRARY_FULL_PATH"] = "/foo/bar/tidy" loader = tidy.lib.Loader() expected_libnames = ( "/foo/bar/tidy", "libtidy.so", "libtidy.dylib", "tidy", "cygtidy-0-99-0", "libtidy-0.99.so.0", "libtidy-0.99.so.0.0.0", "libtidy.so.5", "libtidy.so.58", "libtidy.so.5deb1", "libtidy", "tidylib", ) self.assertEqual(loader.libnames, expected_libnames) def test_lib_version(self) -> None: self.assertEqual(len(tidy.lib.getTidyVersion().split(".")), 3) if __name__ == "__main__": unittest.main()