pax_global_header00006660000000000000000000000064147675103550014527gustar00rootroot0000000000000052 comment=ac17fde8a825ac24e55a4b0fc92848c0486ecbab pyglossary-5.0.9/000077500000000000000000000000001476751035500137565ustar00rootroot00000000000000pyglossary-5.0.9/.github/000077500000000000000000000000001476751035500153165ustar00rootroot00000000000000pyglossary-5.0.9/.github/ISSUE_TEMPLATE/000077500000000000000000000000001476751035500175015ustar00rootroot00000000000000pyglossary-5.0.9/.github/ISSUE_TEMPLATE/feature-request.md000066400000000000000000000014371476751035500231510ustar00rootroot00000000000000______________________________________________________________________ name: Feature request about: Suggest/request a feature (new format, option, parameter etc) title: '' labels: ______________________________________________________________________ **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. **Describe the solution you'd like** A clear and concise description of what you want to happen. **Provide links and sample file(s)** Provide links to the official website and/or download page of the related software or format. Provide sample file(s) for the format/feature you want to be supported. Attach the file(s) if you can. If no sample file is publicly downloadable due to copyright, please mention and explain. pyglossary-5.0.9/.github/scripts/000077500000000000000000000000001476751035500170055ustar00rootroot00000000000000pyglossary-5.0.9/.github/scripts/create-release.sh000077500000000000000000000004531476751035500222270ustar00rootroot00000000000000VERSION=$(./scripts/version-core) if pip index versions pyglossary --pre --ignore-requires-python | grep "$VERSION,"; then echo "Package version $VERSION already exists on pypi" echo "skipnext=true" >>$GITHUB_OUTPUT exit 0 fi sudo rm -rf dist/* build/* || true python3 setup.py sdist bdist_wheel pyglossary-5.0.9/.github/scripts/get-ruff.sh000077500000000000000000000003011476751035500210550ustar00rootroot00000000000000wget -c https://github.com/astral-sh/ruff/releases/download/0.11.0/ruff-x86_64-unknown-linux-gnu.tar.gz tar -xzf ruff-*.tar.gz mv ruff-x86_64-unknown-linux-gnu/ruff . ls -l ruff chmod a+x ruff pyglossary-5.0.9/.github/scripts/no-diff.sh000077500000000000000000000002311476751035500206620ustar00rootroot00000000000000CHANGES=$(git diff --name-only HEAD --) if [ -n "$CHANGES" ]; then echo "There are changes after running gen.sh:" echo "$CHANGES" git diff exit 1 fi pyglossary-5.0.9/.github/scripts/test.sh000066400000000000000000000004371476751035500203240ustar00rootroot00000000000000set -x set +e export NO_CLEANUP=1 set -o pipefail bash ./scripts/test.sh 2>&1 | tee test.out STATUS=$? set +o pipefail mkdir artifacts cp test.out artifacts grep -o "'/tmp/pyglossary/[^']*'" test.out | sed "s/'//g" | xargs '-I{}' cp '{}' artifacts ls -l artifacts set -e exit $STATUS pyglossary-5.0.9/.github/workflows/000077500000000000000000000000001476751035500173535ustar00rootroot00000000000000pyglossary-5.0.9/.github/workflows/codeql.yml000066400000000000000000000103511476751035500213450ustar00rootroot00000000000000# For most projects, this workflow file will not need changing; you simply need # to commit it to your repository. # # You may wish to alter this file to override the set of languages analyzed, # or to provide custom queries or build logic. # # ******** NOTE ******** # We have attempted to detect the languages in your repository. Please check # the `language` matrix defined below to confirm you have the correct set of # supported CodeQL languages. # name: "CodeQL Advanced" on: push: branches: ["master", "dev", "github-action"] pull_request: branches: ["master"] schedule: - cron: "16 7 * * 6" jobs: analyze: name: Analyze (${{ matrix.language }}) # Runner size impacts CodeQL analysis time. To learn more, please see: # - https://gh.io/recommended-hardware-resources-for-running-codeql # - https://gh.io/supported-runners-and-hardware-resources # - https://gh.io/using-larger-runners (GitHub.com only) # Consider using larger runners or machines with greater resources for possible analysis time improvements. runs-on: ubuntu-latest permissions: # required for all workflows security-events: write # required to fetch internal or private CodeQL packs packages: read # only required for workflows in private repositories actions: read contents: read strategy: fail-fast: false matrix: include: - language: python build-mode: none # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages steps: - name: Checkout repository uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs # queries: security-extended,security-and-quality # If the analyze step fails for one of the languages you are analyzing with # "We were unable to automatically build your code", modify the matrix above # to set the build mode to "manual" for that language. Then modify this step # to build your code. # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - if: matrix.build-mode == 'manual' shell: bash run: | echo 'If you are using a "manual" build mode for one or more of the' \ 'languages you are analyzing, replace this with the commands to build' \ 'your code, for example:' echo ' make bootstrap' echo ' make release' exit 1 - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 with: category: "/language:${{matrix.language}}" pyglossary-5.0.9/.github/workflows/gen.yml000066400000000000000000000012111476751035500206420ustar00rootroot00000000000000name: "Gen" on: push: pull_request: # The branches below must be a subset of the branches above schedule: - cron: "33 1 * * 3" jobs: gen: name: "Generated files check" runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: 3.13 - name: List files run: ls -l - name: Download dependencies run: python -m pip install mako lxml - name: Generate run: ./scripts/gen.sh - name: Check for changes run: ./.github/scripts/no-diff.sh pyglossary-5.0.9/.github/workflows/mdformat.yml000066400000000000000000000013111476751035500217030ustar00rootroot00000000000000name: "mdformat" on: push: pull_request: # The branches below must be a subset of the branches above schedule: - cron: "33 1 * * 3" jobs: mdformat: name: "Markdown format checking" runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: 3.13 - name: List files run: ls -l - name: Download dependencies run: python -m pip install mdformat==0.7.18 - name: Format .md files run: find . -name '*.md' | grep -v '/__' | xargs mdformat - name: Check for changes run: ./.github/scripts/no-diff.sh pyglossary-5.0.9/.github/workflows/plugin-validate.yml000066400000000000000000000012031476751035500231570ustar00rootroot00000000000000name: "Validate Plugins" on: push: branches: ["master", "dev", "github-action"] pull_request: schedule: - cron: "33 1 * * 3" jobs: ubuntu: name: Ubuntu strategy: fail-fast: false matrix: python-version: ["3.13"] runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: List files run: ls -l - name: Validate Plugins run: python ./scripts/plugin-validate.py pyglossary-5.0.9/.github/workflows/pypi-release.yml000066400000000000000000000015371476751035500225030ustar00rootroot00000000000000name: pypi-publish on: push: branches: ["master", "dev"] jobs: pypi-publish: name: Upload release to PyPI runs-on: ubuntu-latest # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') environment: name: pypi url: https://pypi.org/p/pyglossary permissions: id-token: write # IMPORTANT: this permission is mandatory for trusted publishing steps: - name: Checkout uses: actions/checkout@v4 - name: Create release id: create-release run: ./.github/scripts/create-release.sh - name: Publish package distributions to PyPI if: ( steps.create-release.outputs.skipnext != 'true' ) uses: pypa/gh-action-pypi-publish@release/v1 with: skip-existing: true verbose: true password: ${{ secrets.PYPI_API_TOKEN }} pyglossary-5.0.9/.github/workflows/ruff.yml000066400000000000000000000007351476751035500210450ustar00rootroot00000000000000name: "Ruff" on: push: pull_request: # The branches below must be a subset of the branches above schedule: - cron: "33 1 * * 3" jobs: ruff: name: "See: docs.astral.sh/ruff" runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: List files run: ls -l - name: Download ruff run: ./.github/scripts/get-ruff.sh - name: Run ruff run: ./ruff check ./pyglossary ./tests/ pyglossary-5.0.9/.github/workflows/test.yml000066400000000000000000000017761476751035500210700ustar00rootroot00000000000000name: "Tests" on: push: branches: ["master", "dev", "github-action"] pull_request: schedule: - cron: "33 1 * * 3" jobs: ubuntu: name: Ubuntu strategy: fail-fast: false matrix: python-version: ["3.10", "3.11", "3.12", "3.13"] runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: List files run: ls -l - name: Install dependencies run: sh ./scripts/test-deps.sh - name: Remove test cache run: rm -rf /home/runner/.cache/pyglossary/test || true - name: Run tests run: bash ./.github/scripts/test.sh - name: Upload test artifacts if: always() uses: actions/upload-artifact@v4 with: name: pyglossary-test-ubuntu-py${{ matrix.python-version }} path: artifacts pyglossary-5.0.9/.gitignore000066400000000000000000000003711476751035500157470ustar00rootroot00000000000000import-analyzer/ .pyre #.vscode *~ *.py[oc] /build /dist /bin /pyglossary.egg-info /.mypy_cache/ /plugins /ui *.cover* *,cover htmlcov *.htmlcov vulture.* imports_from_set.json imports_set.json module-attrs.json dist.*/ build.*/ .idea/ **/.DS_Store pyglossary-5.0.9/.sh-list000066400000000000000000000006611476751035500153450ustar00rootroot00000000000000run-with-docker.sh scripts/autofix-plugin-types scripts/check-missing-types scripts/check-style scripts/check-style-slow scripts/docker-deb-setup.sh scripts/doc-pypi-links.sh scripts/format-code scripts/gen.sh scripts/get-unlisted-formats.sh scripts/mypy-deps.sh scripts/test-cover-html-plugin.sh scripts/test-cover-html.sh scripts/test-deps.sh scripts/test-glossary.sh scripts/test.sh scripts/version scripts/release-new-version.sh pyglossary-5.0.9/.vscode/000077500000000000000000000000001476751035500153175ustar00rootroot00000000000000pyglossary-5.0.9/.vscode/extensions.json000066400000000000000000000001411476751035500204050ustar00rootroot00000000000000{ "recommendations": [ "tamasfe.even-better-toml", "ms-python.python" ] }pyglossary-5.0.9/.vscode/settings.json000066400000000000000000000003261476751035500200530ustar00rootroot00000000000000{ "python.testing.unittestArgs": [ "-v", "-s", "./tests", "-p", "*_test.py" ], "python.testing.pytestEnabled": false, "python.testing.unittestEnabled": true }pyglossary-5.0.9/AUTHORS000066400000000000000000000013531476751035500150300ustar00rootroot00000000000000⚫︎ Saeed Rasooli (ilius) Thanks to: ⚫︎ Kubtek for contributions in codebase and StarDict and BGL plugins ⚫︎ Xiaoqiang Wang for codes / contributions in MDict, DSL and AppleDict plugins ⚫︎ Thomas Vogt for fixing several bugs ⚫︎ Raul Fernandes and Karl Grill for reverse engineering on BGL format ⚫︎ Nilton Volpato for https://github.com/niltonvolpato/python-progressbar ⚫︎ Jeff Quast for https://github.com/jquast/wcwidth See res/resources.xml file for authors of icons / images. PyGlossary is not associated with Python project or Python Software Foundation. pyglossary-5.0.9/CODE_OF_CONDUCT.md000066400000000000000000000007431476751035500165610ustar00rootroot00000000000000The PyGlossary code of conduct is derived from [The Ruby Community Conduct Guideline](https://www.ruby-lang.org/en/conduct/). - Participants will be tolerant of opposing views. - Participants must ensure that their language and actions are free of personal attacks and disparaging personal remarks. - When interpreting the words and actions of others, participants should always assume good intentions. - Behavior that can be reasonably considered harassment will not be tolerated. pyglossary-5.0.9/Dockerfile000066400000000000000000000004111476751035500157440ustar00rootroot00000000000000FROM bitnami/minideb MAINTAINER Saeed Rasooli saeed.gnu@gmail.com LABEL Description="Dockefile to run PyGlossary inside a Debian-based Docker image" COPY . /opt/pyglossary RUN /opt/pyglossary/scripts/docker-deb-setup.sh CMD python3 /opt/pyglossary/main.py --cmd pyglossary-5.0.9/LICENSE000066400000000000000000001045131476751035500147670ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . pyglossary-5.0.9/README.md000066400000000000000000000345321476751035500152440ustar00rootroot00000000000000# PyGlossary

[![PyPI](https://img.shields.io/pypi/v/pyglossary.svg)](https://pypi.org/project/pyglossary/) [![Supported Python versions](https://img.shields.io/pypi/pyversions/pyglossary)](https://pypi.org/project/pyglossary/) [![tests](https://github.com/ilius/pyglossary/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/ilius/pyglossary/actions/workflows/test.yml?query=branch%3Amaster) A tool for converting dictionary files aka glossaries. The primary purpose is to be able to use our offline glossaries in any Open Source dictionary we like on any OS/device. There are countless formats, and my time is limited, so I implement formats that seem more useful for myself, or for Open Source community. Also diversity of languages is taken into account. Pull requests are welcome. ## Screenshots Linux - Gtk3-based interface ______________________________________________________________________ Windows - Tkinter-based interface ______________________________________________________________________ Linux - command-line interface ______________________________________________________________________ Android Termux - interactive command-line interface ______________________________________________________________________ Web interface ## Supported formats | Format | | Extension | Read | Write | | ------------------------------------------------------- | :-: | :-------------: | :--: | :---: | | [Aard 2 (slob)](./doc/p/aard2_slob.md) | 🔢 | .slob | ✅ | ✅ | | [AppleDict Binary](./doc/p/appledict_bin.md) | 📁 | .dictionary | ✅ | ❌ | | [AppleDict Source](./doc/p/appledict.md) | 📁 | | | ✅ | | [Babylon BGL](./doc/p/babylon_bgl.md) | 🔢 | .bgl | ✅ | ❌ | | [CSV](./doc/p/csv.md) | 📝 | .csv | ✅ | ✅ | | [DICT.org / Dictd server](./doc/p/dict_org.md) | 📁 | (📝.index) | ✅ | ✅ | | [DICT.org / dictfmt source](./doc/p/dict_org_source.md) | 📝 | (.dtxt) | | ✅ | | [dictunformat output file](./doc/p/dictunformat.md) | 📝 | (.dictunformat) | ✅ | | | [DictionaryForMIDs](./doc/p/dicformids.md) | 📁 | (📁.mids) | ✅ | ✅ | | [DIKT JSON](./doc/p/dikt_json.md) | 📝 | (.json) | | ✅ | | [EPUB-2 E-Book](./doc/p/epub2.md) | 📦 | .epub | ❌ | ✅ | | [FreeDict](./doc/p/freedict.md) | 📝 | .tei | ✅ | ❌ | | [Gettext Source](./doc/p/gettext_po.md) | 📝 | .po | ✅ | ✅ | | [HTML Directory (by file size)](./doc/p/html_dir.md) | 📁 | | ❌ | ✅ | | [JSON](./doc/p/json.md) | 📝 | .json | | ✅ | | [Kobo E-Reader Dictionary](./doc/p/kobo.md) | 📦 | .kobo.zip | ❌ | ✅ | | [Kobo E-Reader Dictfile](./doc/p/kobo_dictfile.md) | 📝 | .df | ✅ | ✅ | | [Lingoes Source](./doc/p/lingoes_ldf.md) | 📝 | .ldf | ✅ | ✅ | | [Mobipocket E-Book](./doc/p/mobi.md) | 🔢 | .mobi | ❌ | ✅ | | [Octopus MDict](./doc/p/octopus_mdict.md) | 🔢 | .mdx | ✅ | ❌ | | [QuickDic version 6](./doc/p/quickdic6.md) | 🔢 | .quickdic | ✅ | ✅ | | [SQL](./doc/p/sql.md) | 📝 | .sql | ❌ | ✅ | | [StarDict](./doc/p/stardict.md) | 📁 | (📝.ifo) | ✅ | ✅ | | [StarDict Textual File](./doc/p/stardict_textual.md) | 📝 | (.xml) | ✅ | ✅ | | [Tabfile](./doc/p/tabfile.md) | 📝 | .txt, .tab | ✅ | ✅ | | [Wiktextract](./doc/p/wiktextract.md) | 📝 | .jsonl | ✅ | ❌ | | [XDXF](./doc/p/xdxf.md) | 📝 | .xdxf | ✅ | ❌ | | [Zim (Kiwix)](./doc/p/zim.md) | 🔢 | .zim | ✅ | | | [ABBYY Lingvo DSL](./doc/p/dsl.md) 🇷🇺 | 📝 | .dsl | ✅ | ❌ | | [Almaany.com](./doc/p/almaany.md) (Arabic) | 🛢️ | .db | ✅ | ❌ | | [cc-kedict](./doc/p/cc_kedict.md) 🇰🇷 | 📝 | | ✅ | ❌ | | [Dict.cc](./doc/p/dict_cc.md) 🇩🇪 | 🛢️ | .db | ✅ | | | [DigitalNK](./doc/p/digitalnk.md) 🇰🇵 | 🛢️ | .db | ✅ | | | [EDICT2 (CEDICT)](./doc/p/edict2.md) 🇨🇳 | 📝 | (.u8) | ✅ | ❌ | | [JMDict](./doc/p/jmdict.md) 🇯🇵 | 📝 | | ✅ | ❌ | | [JMnedict](./doc/p/jmnedict.md) 🇯🇵 | 📝 | | ✅ | ❌ | | [WordNet](./doc/p/wordnet.md) 🇬🇧 | 📁 | | ✅ | ❌ | | [@wordset dictionary](./doc/p/wordset.md) 🇬🇧 | 📁 | | ✅ | | | [Yomichan / Yomitan](./doc/p/yomichan.md) 🇯🇵 | 📦 | (.zip) | | ✅ | Legend: - 📁 Directory - 📝 Text file - 📦 Package/archive file - 🛢️ SQLite file - 🔢 Binary file - ✅ Supported - ❌ Will not be supported **Note**: SQLite-based formats are not detected by extension (`.db`); So you need to select the format (with UI or `--read-format` flag). **Also don't confuse SQLite-based formats with [SQLite mode](#sqlite-mode).** ## Requirements PyGlossary requires **Python 3.10 or higher**, and works in practically all modern operating systems. While primarily designed for *GNU/Linux*, it works on *Windows*, *Mac OS X* and other Unix-based operating systems as well. As shown in screenshots, there are multiple User Interface types (multiple ways to use the program). - **Gtk3-based interface**, uses [PyGI](http://pygobject.readthedocs.io/en/latest/getting_started.html)+Gtk3. See [doc/gtk3.md](./doc/gtk3.md) for how to install it on Linux and Mac OS X. - **Gtk4-based interface**, uses [PyGI](http://pygobject.readthedocs.io/en/latest/getting_started.html)+Gtk4. See [doc/gtk4.md](./doc/gtk4.md). This is still not as complete as Gtk3 interface. - **Tkinter-based interface**, meant to be used in the lack of Gtk. Specially on Windows where Tkinter library is installed with Python itself. You can [install Tkinter](./doc/tkinter.md) on Linux or Mac OS X. - **Command-line interface**, works in all operating systems without any specific requirements, just type `./main.py --help` or `pyglossary --help` - **Interactive command-line interface** - Requires: `pip install prompt_toolkit` - Perfect for mobile devices (like Termux on Android) where no GUI is available - Automatically selected if output file argument is not passed **and** one of these: - On Linux and `$DISPLAY` environment variable is empty or not set - For example when you are using a remote Linux machine over SSH - On Mac and no `tkinter` module is found - Manually select with `--cmd` or `--ui=cmd` - Minimally: `./main.py --cmd` - You can still pass input file, or any flag/option - If both input and output files are passed, non-interactive cmd ui will be default. - Pass `--interactive` to change it. - If you are writing a script, you can pass `--no-interactive` to force disable interactive ui - Then you have to pass both input and output file arguments - Don't forget to use *Up/Down* or *Tab* keys in prompts! - Up/Down key shows you recent values you have used - Tab key shows available values/options - You can press Control+C (on Linux/Windows) at any prompt to exit ## UI (User Interface) selection When you run PyGlossary without any command-line arguments or options/flags, PyGlossary will try to run the first available interface: - It tries to find PyGI+Gtk3 and open **Gtk3-based** interface. - It tries to find PyGI+Gtk4 and open **Gtk4-based** interface. - It tries to find Tkinter and open **Tkinter-based** interface. - If it's run in command line (with stdin connected to a terminal) it tries to find `prompt_toolkit` and run **interactive command-line** interface. - It runs a HTTP server and opens the **web interface** in your browser. The order depends on operating system. Currently on Mac OS and Windows, Tkinter is checked before Gtk. You can explicitly select user interface type using `--ui` - `./main.py --ui=gtk3` - `./main.py --ui=gtk4` - `./main.py --ui=gtk` which currently selects `gtk3` - `./main.py --ui=tk` - `./main.py --ui=web` - `./main.py --ui=cmd` ## Installation on Windows - [Download and install Python](https://www.python.org/downloads/windows/) (3.10 or above) - Open Start -> type Command -> right-click on Command Prompt -> Run as administrator - To ensure you have `pip`, run: `python -m ensurepip --upgrade` - To install, run: `pip install --upgrade pyglossary` - Now you should be able to run `pyglossary` command - If command was not found, make sure Python environment variables are set up: ## Feature-specific requirements - Using [Sort by Locale](#sorting) feature requires [PyICU](./doc/pyicu.md) - Using `--remove-html-all` flag requires: `pip install lxml beautifulsoup4` Some formats have additional requirements. If you have trouble with any format, please check the [link given for that format](#supported-formats) to see its documentations. **Using Termux on Android?** See [doc/termux.md](./doc/termux.md) ## Configuration See [doc/config.rst](./doc/config.rst). ## Direct and indirect modes Indirect mode means that input glossary is completely read and loaded into RAM, then converted into output format. This was the only method available in old versions (before [3.0.0](https://github.com/ilius/pyglossary/releases/tag/3.0.0)). Direct mode means entries are one-at-a-time read, processed and written into output glossary. Direct mode was added to limit memory usage for large glossaries; But it may reduce the conversion time for most cases as well. Converting glossaries into these formats requires [sorting](#sorting) entries: - [StarDict](./doc/p/stardict.md) - [EPUB-2](./doc/p/epub2.md) - [Mobipocket E-Book](./doc/p/mobi.md) - [Yomichan](./doc/p/yomichan.md) - [DictionaryForMIDs](./doc/p/dicformids.md) That's why direct mode will not work for these formats, and PyGlossary has to switch to indirect mode (or it previously had to, see [SQLite mode](#sqlite-mode)). For other formats, direct mode will be default. You may override this by `--indirect` flag. ## SQLite mode As mentioned above, converting glossaries to some specific formats will need them to loaded into RAM. This can be problematic if the glossary is too big to fit into RAM. That's when you should try adding `--sqlite` flag to your command. Then it uses SQLite3 as intermediate storage for storing, sorting and then fetching entries. This fixes the memory issue, and may even reduce running time of conversion (depending on your home directory storage). The temporary SQLite file is stored in [cache directory](#cache-directory) then deleted after conversion (unless you pass `--no-cleanup` flag). SQLite mode is automatically enabled for writing these formats if `auto_sqlite` [config parameter](./doc/config.rst) is `true` (which is default). This also applies to when you pass `--sort` flag for any format. You may use `--no-sqlite` to override this and switch to indirect mode. Currently you can not disable alternates in SQLite mode (`--no-alts` is ignored). ## Sorting There are two things than can activate sorting entries: - Output format requires sorting (as explained [above](#direct-and-indirect-modes)) - You pass `--sort` flag in command line. In the case of passing `--sort`, you can also pass: - `--sort-key` to select sort key aka sorting order (including locale), see [doc/sort-key.md](./doc/sort-key.md) - `--sort-encoding` to change the encoding used for sort - UTF-8 is the default encoding for all sort keys and all output formats (unless mentioned otherwise) - This will only effect the order of entries, and will not corrupt words / definition - Non-encodable characters are replaced with `?` byte (*only for sorting*) ## Cache directory Cache directory is used for storing temporary files which are either moved or deleted after conversion. You can pass `--no-cleanup` flag in order to keep them. The path for cache directory: - Linux or BSD: `~/.cache/pyglossary/` - Mac: `~/Library/Caches/PyGlossary/` - Windows: `C:\Users\USERNAME\AppData\Local\PyGlossary\Cache\` ## User plugins If you want to add your own plugin without adding it to source code directory, or you want to use a plugin that has been removed from repository, you can place it in this directory: - Linux or BSD: `~/.pyglossary/plugins/` - Mac: `~/Library/Preferences/PyGlossary/plugins/` - Windows: `C:\Users\USERNAME\AppData\Roaming\PyGlossary\plugins\` ## Linux packaging status [![Packaging status](https://repology.org/badge/vertical-allrepos/pyglossary.svg?columns=3&header=PyGlossary)](https://repology.org/project/pyglossary/versions) ## Using PyGlossary as a library See [doc/lib-usage.md](./doc/lib-usage.md) for how to use PyGlossary as a Python library. ## Internals See [doc/internals.md](./doc/internals.md) for information about internal glossary structure and entry filters. pyglossary-5.0.9/__init__.py000066400000000000000000000000001476751035500160550ustar00rootroot00000000000000pyglossary-5.0.9/_license-dialog000066400000000000000000000013731476751035500167230ustar00rootroot00000000000000PyGlossary - A tool for converting dictionary files Copyright © 2008-2025 Saeed Rasooli This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. Or on Debian systems, from /usr/share/common-licenses/GPL. If not, see http://www.gnu.org/licenses/gpl.txt pyglossary-5.0.9/about000066400000000000000000000007551476751035500150220ustar00rootroot00000000000000PyGlossary is a tool for converting dictionary files aka glossaries, from/to various formats used by different dictionary applications Copyleft © 2008-2025 Saeed Rasooli This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. PyGlossary is not associated with Python project or Python Software Foundation. pyglossary-5.0.9/check-shell-scripts.sh000077500000000000000000000001021476751035500201550ustar00rootroot00000000000000#!/bin/sh xargs -n1 shellcheck --exclude=SC2046,SC2148 <.sh-list pyglossary-5.0.9/config.json000066400000000000000000000017251476751035500161230ustar00rootroot00000000000000{ "log_time": false, "cleanup": true, "auto_sqlite": true, "lower": false, "utf8_check": false, "enable_alts": true, "skip_resources": false, "rtl": false, "remove_html": "", "remove_html_all": false, "normalize_html": false, "save_info_json": false, "skip_duplicate_headword": false, "trim_arabic_diacritics": false, "unescape_word_links": false, "color.enable.cmd.unix": true, "color.enable.cmd.windows": false, "color.cmd.critical": 196, "color.cmd.error": 1, "color.cmd.warning": 208, "cmdi.prompt.indent.str": ">", "cmdi.prompt.indent.color": 2, "cmdi.prompt.msg.color": -1, "cmdi.msg.color": -1, "ui_autoSetFormat": true, "tk.progressbar.color.fill": "blue", "tk.progressbar.color.background": "gray", "tk.progressbar.color.text": "yellow", "tk.progressbar.font": "Sans", "reverse_matchWord": true, "reverse_showRel": "Percent", "reverse_saveStep": 1000, "reverse_minRel": 0.3, "reverse_maxNum": -1, "reverse_includeDefs": false } pyglossary-5.0.9/doc/000077500000000000000000000000001476751035500145235ustar00rootroot00000000000000pyglossary-5.0.9/doc/apple.md000066400000000000000000000044101476751035500161450ustar00rootroot00000000000000### Required Python libraries for AppleDict - **Reading from AppleDict Binary (.dictionary)** `sudo pip3 install lxml` - **Writing to AppleDict** `sudo pip3 install lxml beautifulsoup4 html5lib` ### Requirements for AppleDict on Mac OS X If you want to convert glossaries into AppleDict format on Mac OS X, you also need: - GNU make as part of [Command Line Tools for Xcode](http://developer.apple.com/downloads). - Dictionary Development Kit as part of [Additional Tools for Xcode](http://developer.apple.com/downloads). Extract to `/Applications/Utilities/Dictionary Development Kit` ### Convert Babylon (bgl) to Mac OS X dictionary Let's assume the Babylon dict is at `~/Documents/Duden_Synonym/Duden_Synonym.BGL`: ```sh cd ~/Documents/Duden_Synonym/ python3 ~/Software/pyglossary/main.py --write-format=AppleDict Duden_Synonym.BGL Duden_Synonym-apple cd Duden_Synonym-apple make make install ``` Launch Dictionary.app and test. ### Convert Octopus Mdict to Mac OS X dictionary Let's assume the MDict dict is at `~/Documents/Duden-Oxford/Duden-Oxford DEED ver.20110408.mdx`. Run the following command: ```sh cd ~/Documents/Duden-Oxford/ python3 ~/Software/pyglossary/main.py --write-format=AppleDict "Duden-Oxford DEED ver.20110408.mdx" "Duden-Oxford DEED ver.20110408-apple" cd "Duden-Oxford DEED ver.20110408-apple" make make install ``` Launch Dictionary.app and test. Let's assume the MDict dict is at `~/Downloads/oald8/oald8.mdx`, along with the image/audio resources file `oald8.mdd`. Run the following commands: : ```sh cd ~/Downloads/oald8/ python3 ~/Software/pyglossary/main.py --write-format=AppleDict oald8.mdx oald8-apple cd oald8-apple ``` This extracts dictionary into `oald8.xml` and data resources into folder `OtherResources`. Hyperlinks use relative path. : ```sh sed -i "" 's:src="/:src=":g' oald8.xml ``` Convert audio file from SPX format to WAV format. You need package `speex` from [MacPorts](https://www.macports.org) : ```sh find OtherResources -name "*.spx" -execdir sh -c 'spx={};speexdec $spx ${spx%.*}.wav' \; sed -i "" 's|sound://\([/_a-zA-Z0-9]*\).spx|\1.wav|g' oald8.xml ``` But be warned that the decoded WAVE audio can consume ~5 times more disk space! Compile and install. : ```sh make make install ``` Launch Dictionary.app and test. pyglossary-5.0.9/doc/babylon/000077500000000000000000000000001476751035500161515ustar00rootroot00000000000000pyglossary-5.0.9/doc/babylon/BGL.svgz000066400000000000000000000147551476751035500175040ustar00rootroot000000000000002_BGL.svg\]oHv}ϯ H0 贵@cM6Y;9(MY&=ӓk%nսSU|7nZFTluX^ǿN'Gߺ_>>JUIJ?v˻Ӈ6S:mگͶߚΧ|Y-˻?lyOt~=/wѧޞ_UԘߨ,5z_ӟl?WTէ {>{w'g<ǹ\{;[}͟fom xr`3czy]o;Zpv#nhnV ĖR&f1jGξIR}9;1:N uv%Z_X߭1n7oxzWV&GI~?c>N镏p~~2yƟ -.S?C>)DMl-h 䔼g`h'$:Ϣj>=>|Z76ʅZ&m4-+LjG5,ϗM#wINEoMpqkэOaτv&Lͯ5Y\kp7^h>b XSuy:gYoM^;a4qz:ƤݍVMkwz|Cᠯucb&_De9 _;8 ^opBz;$vB%ކ+?NW9=Sio+jV6#+FbC絯L2'ֲlY EsE5^ʍwM阱}zqL{rYZaLsd׷C| /퐷}~m٘{OmM[+:ﷴBnjoPXakVX2j  ^Oj~2?lS\(r~!?_H"b0/WYv 30 ԛ%"1:[by >Th:UsWo[gU{]&>c7ߵUϐ^h XBGsy`1]@]WpϤ靛IZVtz5IbnꏁDzH*V1G23`9' q 9PJ,3VS~cB|?|2ή~N\rA>KS-=~^5^'$LD1d3ltNٗ-6)gh(cCgj׸[!ČYrzeI:WQ;l G\q?KSa_i|̯: Vdy m< H2Z\Y`iqV(ӑiуh>@*%i|552aSdH:y}ۂoAL,){4ߒ31HFx`.ÜuE K˹1_MHތ5[0c×SAg 0rl[.ᛐ^-*#&|d=kBEO%i‡lEC`knu}D!񰾊0 HvaxПh &z\__"pCl rl B9" oA[ f8`y !6%\ XFa^E”6Ȑ6VP.I3: k*s0GQ_/a'T$nьw*cL8peEYXk @ܯ%10l@$ &&>51 XTwPF'|)1!0F%+0^]|| otJ ^x3E7iüND\#3 --&dax"X+f, 9 (d t\$"aɺ0bԲ)Z,jE 0Dn!|A[-2&Z-@jLI(Z@V @V]-o>4)W>,@ )Ѕ"bNE Ua#g!FJ73E %5pAXQt+:&<-ԟccrmk`m/&j@DR.19̈ю2 aPM75\fFjn9 2),]y\@~,cC8DTUǃO!?"wU[%k)$SօkbZ-7IH& ?[z&;[ K7s)"uAm $u7qsѲm)#B`\)3ȈAs[I0\OJA+TEb L ZGbsdHG`VЊE/ hjT}$_ v\GbqAJ[9ʅ BqÅ?{pY#pH+X Ā K)ʎQ\Y Zr㲻t(tB[ŗfKGM-r2 6%S &| ecqUD/A<4=%ZU =4\HA UW4$KYŹdhopTwN\/8܊xK!4*N"[!cs')iv\`brc0fMy{&U'Bb;:=R@A&2xP#59n,`'9+Y"^$Z eցMR{P>_QZ/{TkK +svښf}2z%)mapoD:AxGthdF%QG'a#^$cg=![D=)ŁۘlBB[*h?}r>X,jyb%Da]jOT qqsAgM6ѽ; x8+)㪂!#|z,D\`h解pXn4A!" Y-'q5lTR4hl岦jeó,J3189T$I#Q6"-ۤp d1%Gj؞uJċ(R$ґp(a&җMv_'mW׊~*4H/,d_D]?e9<M|*!8H a Z#:wzأD>1/RJekJj0Dܱ>Bk!lKb2x!`dd\>(-D<wjf00{B<#71]zqy__aH<^{͓61EO? tM2# Tu-&p*Sy ;uz#{övjGNJNs܇BHIS5P +Ir"Z䛒,41OZ< MH5Q4P| ?⪤˞'*w°,u:ߗ-p繷|=EU,eR%GF=oO$ FVIbҁ { |\IVOGnCtE[&n12Nzf%ě½$f]\ag,V&n|wd-j3[ɄdClIGmy}ig|x"-QaWe[TwbY?a zk7i<eA#ؘT%+۰}Y Ԯ"jszvQ/+ydz.!?"OE۶.>ȱt:Y'y䇇2|qM:FWϓ> (0ƧW8jzw6ʛniwHE$ {#TH1m.'`[m.F/\(}OA<ʢ=9ZQVk-+^3 qG=*RP0u|X Fy ;t1oNLڬ7b& <yпY pgW+=73t/yBeddh]$@B=O6/}_p}7.Xp]n0{h>%6rIM;K*RRFZ KRrg8ka}X+rcn2^>Du(G .QsS@h &~ 5-̂z~a #F\ג<,$@(q~S`Q!nesSj"4'Ω1Q/"JfPG?+M2V$gi,bI!H',8$c=Y|P8S;KKMD/ѹ Q&=-i;uv~' ʃL=7~&Җ17 7l"Ŵr@ T4>4}%FJѥ3ch]i퀲sq7Q} '_C7'Cb[h[h2 ǜ[_[<ͨi-~<-Y[,c<EZrm5ì*~FWܔҳm:7Rm>_x>}M诜ЅJ=HzL ޕʉ N\OM xnVJ@biԦd7gC,ԘRBw6F@)[_3<ָo9<\VoΊT+\ZD~wO{=~ܿDBܲU@V*2^YwupbD| {/Uơ^4vh~+Dhvvrr.ow_ƘVkwbo( spyglossary-5.0.9/doc/babylon/bgl_info.md000066400000000000000000000061211476751035500202520ustar00rootroot00000000000000## bgl_numEntries (0x0c) `bgl_numEntries` does not always matches the number of entries in the dictionary, but it's close to it. The difference is usually +-1 or 2, in rare cases may be 9, 29 and more. ## bgl_length (0x43) The length of the substring match in a term. For example, if your glossary contains the term "Dog" and the substring length is 2, search of the substrings "Do" or "og" will retrieve the term dog. Use substring length 0 for exact match. ## bgl_contractions (0x3b) Contains a value like this: ``` V-0#Verb|V-0.0#|V-0.1#Infinitive|V-0.1.1#|V-1.0#|V-1.1#|V-1.1.1#Present Simple|V-1.1.2#Present Simple (3rd pers. sing.)|V-2.0#|V-2.1#|V-2.1.1#Past Simple|V-3.0#|V-3.1#|V-3.1.1#Present Participle|V-4.0#|V-4.1#|V-4.1.1#Past Participle|V-5.0#|V-5.1#|V-5.1.1#Future|V2-0#|V2-0.0#|V2-0.1#Infinitive|V2-0.1.1#|V2-1.0#|V2-1.1#|V2-1.1.1#Present Simple (1st pers. sing.)|V2-1.1.2#Present Simple (2nd pers. sing. & plural forms)|V2-1.1.3#Present Simple (3rd pers. sing.)|V2-2.0#|V2-2.1#|V2-2.1.1#Past Simple (1st & 3rd pers. sing.)|V2-2.1.2#Past Simple (2nd pers. sing. & plural forms)|V2-3.0#|V2-3.1#|V2-3.1.1#Present Participle|V2-4.0#|V2-4.1#|V2-4.1.1#Past Participle|V2-5.0#|V2-5.1#|V2-5.1.1#Future||N-0#Noun|N-1.0#|N-1.1#|N-1.1.1#Singular|N-2.0#|N-2.1#|N-2.1.1#Plural|N4-1.0#|N4-1.1#|N4-1.1.1#Singular Masc.|N4-1.1.2#Singular Fem.|N4-2.0#|N4-2.1#|N4-2.1.1#Plural Masc.|N4-2.1.2#Plural Fem.||ADJ-0#Adjective|ADJ-1.0#|ADJ-1.1#|ADJ-1.1.1#Adjective|ADJ-1.1.2#Comparative|ADJ-1.1.3#Superlative|| ``` Value format: `( "#" [] "|")+` The value is in second language, that is for `Babylon Russian-English.BGL` the value in russian. For `Babylon English-Spanish.BGL` the value is spanish (I guess), etc. ## bgl_about: Glossary manual file (0x41) Additional information about the dictionary in `.txt` format this may be short info like this: ``` Biology Glossary Author name: Hafez Divandari Author email: hafezdivandari@gmail.com ------------------------------------------- A functional glossary for translating English biological articles to fluent Farsi ------------------------------------------- Copyright (c) 2009 All rights reserved. ``` In `.pdf` format this may be a quite large document (about 30 pages), an introduction into the dictionary. It describing structure of an article, editors, how to use the dictionary. Format: ` "\x00" ` File extension may be: ".txt", ".pdf" ## bgl_purchaseLicenseMsg (0x2c) Contains a value like this: ``` In order to view this glossary, you must purchase a license.
Click here to purchase. ``` ## bgl_licenseExpiredMsg (0x2d) Contains a value like this: ``` Your license for this glossary has expired. In order to view this glossary, you must have a valid license.
Renew your license today. ``` ## bgl_purchaseAddress (0x2e) Contains a value like this: ``` http://www.babylon.com/redirects/purchase.cgi?type=169&trid=BPCOT or mailto:larousse@babylon.com ``` pyglossary-5.0.9/doc/config.rst000066400000000000000000000360071476751035500165300ustar00rootroot00000000000000Configuration Parameters ------------------------ +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | Name | Command Flags | Type | Default | Comment | +=====================================+===============================+=======+===============+=============================================================================+ | ``log_time`` | | ``--log-time`` | bool | ``false`` | Show date and time in logs | | | | ``--no-log-time`` | | | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``cleanup`` | | ``--cleanup`` | bool | ``true`` | Cleanup cache or temporary files after conversion | | | | ``--no-cleanup`` | | | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``auto_sqlite`` | | bool | ``true`` | Auto-enable ``--sqlite`` to limit RAM usage when direct | | | | | | mode is not possible. Can override with ``--no-sqlite`` | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``enable_alts`` | | ``--alts`` | bool | ``true`` | Enable alternates | | | | ``--no-alts`` | | | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``skip_resources`` | ``--skip-resources`` | bool | ``false`` | Skip resources (images, audio, css, etc) | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``save_info_json`` | ``--info`` | bool | ``false`` | Save .info file alongside output file(s) | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``lower`` | | ``--lower`` | bool | ``false`` | Lowercase word(s) | | | | ``--no-lower`` | | | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``utf8_check`` | | ``--utf8-check`` | bool | ``false`` | Fix Unicode in word(s) and definition | | | | ``--no-utf8-check`` | | | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``rtl`` | ``--rtl`` | bool | ``false`` | Make definition right-to-left | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``remove_html`` | ``--remove-html`` | str | ``""`` | Remove given comma-separated HTML tags (not their contents) from definition | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``remove_html_all`` | ``--remove-html-all`` | bool | ``false`` | Remove all HTML tags (not their contents) from definition | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``normalize_html`` | ``--normalize-html`` | bool | ``false`` | Normalize HTML tags in definition (WIP) | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``skip_duplicate_headword`` | ``--skip-duplicate-headword`` | bool | ``false`` | Skip entries with a duplicate headword | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``trim_arabic_diacritics`` | ``--trim-arabic-diacritics`` | bool | ``false`` | Trim Arabic diacritics from headword | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``unescape_word_links`` | ``--unescape-word-links`` | bool | ``false`` | Unescape Word Links | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``color.enable.cmd.unix`` | ``--no-color`` | bool | ``true`` | Enable colors in Linux/Unix command line | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``color.enable.cmd.windows`` | ``--no-color`` | bool | ``false`` | Enable colors in Windows command line | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``color.cmd.critical`` | | int | ``196`` | | Color code for critical errors in command line | | | | | |image0| | | See `term-colors.md <./term-colors.md/>`_ | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``color.cmd.error`` | | int | ``1`` | | Color code for errors in command line | | | | | |image1| | | See `term-colors.md <./term-colors.md/>`_ | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``color.cmd.warning`` | | int | ``208`` | | Color code for warnings in command line | | | | | |image2| | | See `term-colors.md <./term-colors.md/>`_ | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``cmdi.prompt.indent.str`` | | str | ``">"`` | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``cmdi.prompt.indent.color`` | | int | ``2`` | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``cmdi.prompt.msg.color`` | | int | ``-1`` | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``cmdi.msg.color`` | | int | ``-1`` | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``ui_autoSetFormat`` | | bool | ``true`` | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``tk.progressbar.color.fill`` | | str | ``"blue"`` | Tkinter: progressbar fill color | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``tk.progressbar.color.background`` | | str | ``"gray"`` | Tkinter: progressbar background color | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``tk.progressbar.color.text`` | | str | ``"yellow"`` | Tkinter: progressbar text color | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``tk.progressbar.font`` | | str | ``"Sans"`` | Tkinter: progressbar text font. Example: "Sans", "Sans 15" | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``reverse_matchWord`` | | bool | ``true`` | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``reverse_showRel`` | | str | ``"Percent"`` | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``reverse_saveStep`` | | int | ``1000`` | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``reverse_minRel`` | | float | ``0.3`` | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``reverse_maxNum`` | | int | ``-1`` | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ | ``reverse_includeDefs`` | | bool | ``false`` | | +-------------------------------------+-------------------------------+-------+---------------+-----------------------------------------------------------------------------+ Configuration Files ------------------- The default configuration values are stored in `config.json <./../config.json/>`_ file in source/installation directory. The user configuration file - if exists - will override default configuration values. The location of this file depends on the operating system: - Linux or BSD: ``~/.pyglossary/config.json`` - Mac: ``~/Library/Preferences/PyGlossary/config.json`` - Windows: ``C:\Users\USERNAME\AppData\Roaming\PyGlossary\config.json`` Using as library ---------------- When you use PyGlossary as a library, neither of ``config.json`` files are loaded. So if you want to change the config, you should set ``glos.config`` property (which you can do only once for each instance of ``Glossary``). For example: .. code:: python glos = Glossary() glos.config = { "lower": True, } .. |image0| image:: https://via.placeholder.com/20/ff0000/000000?text=+ .. |image1| image:: https://via.placeholder.com/20/aa0000/000000?text=+ .. |image2| image:: https://via.placeholder.com/20/ff8700/000000?text=+pyglossary-5.0.9/doc/dsl/000077500000000000000000000000001476751035500153055ustar00rootroot00000000000000pyglossary-5.0.9/doc/dsl/README.rst000066400000000000000000000022171476751035500167760ustar00rootroot00000000000000 {{COMMENT}}..{{/COMMENT}} MAINENTRY: entry word MULTIWORD: entry words STYLE-LEVEL: spoken DEFINITION: definition PRON: pronunciation PART-OF-SPEECH: word class INFLECTION: list of inflection types INFLECTION-TYPE: singular/plural for noun, comparative/superlative for adj INFLECTION-ENTRY: SENSE-NUM: meaning number HOMO-NUM: meaning number SYNTAX-CODING: Thesaurus: PATTERNS-COLLOCATIONS: EXAMPLE: Main entry: DIALECT: See also: Phrase: Phrasal Verb: [m#] Indent Level [*]...[/* ] Optional Text. Show only in full mode [p]...[/p] Label defined in abbrev.dsl [s]...[/s] Sound/Picture File Text Format ============== [c color_name]...[/c] Color Name, e.g red, orange, [b]...[/b] Bold [']...[/'] [u]...[/u] Underline [i]...[/i] Italic [sup]...[/sup] Superscript [sub]...[/sub] Subscript Text Zone ========= [trn]...[/trn] Translation [ex]...[/ex] Example [com]...[/com] Comment [!trs]...[/!trs] text between these tags will not be indexed [t]...[/t] Unknown [url]...[/url] URL Link <<...>> Reference pyglossary-5.0.9/doc/entry-filters.md000066400000000000000000000067321476751035500176640ustar00rootroot00000000000000## Entry Filters | Name | Default Enabled | Command Flags | Description | | ---------------------------- | --------------- | ------------------------------------ | --------------------------------------------------------------------------- | | `trim_whitespaces` | Yes | | Remove leading/trailing whitespaces from word(s) and definition | | `non_empty_word` | Yes | | Skip entries with empty word | | `skip_resources` | No | `--skip-resources` | Skip resources / data files | | `utf8_check` | No | `--utf8-check`
`--no-utf8-check` | Fix Unicode in word(s) and definition | | `lower` | No | `--lower`
`--no-lower` | Lowercase word(s) | | `skip_duplicate_headword` | No | `--skip-duplicate-headword` | Skip entries with a duplicate headword | | `trim_arabic_diacritics` | No | `--trim-arabic-diacritics` | Trim Arabic diacritics from headword | | `rtl` | No | `--rtl` | Make definition right-to-left | | `remove_html_all` | No | `--remove-html-all` | Remove all HTML tags (not their contents) from definition | | `remove_html` | No | `--remove-html` | Remove given comma-separated HTML tags (not their contents) from definition | | `normalize_html` | No | `--normalize-html` | Normalize HTML tags in definition (WIP) | | `unescape_word_links` | No | `--unescape-word-links` | Unescape Word Links | | `lang` | Yes | | Language-specific cleanup/fixes | | `non_empty_word` | Yes | | Skip entries with empty word | | `non_empty_defi` | Yes | | Skip entries with empty definition | | `remove_empty_dup_alt_words` | Yes | | Remove empty and duplicate alternate words | | `prevent_duplicate_words` | No | | Prevent duplicate words | | `strip_full_html` | No | | Replace a full HTML document with it's body | | `max_memory_usage` | No | | Show Max Memory Usage | pyglossary-5.0.9/doc/gtk3.md000066400000000000000000000004551476751035500157210ustar00rootroot00000000000000## Gtk3 interface requirements - Debian/Ubuntu: `apt install python3-gi python3-gi-cairo gir1.2-gtk-3.0` - openSUSE: `zypper install python3-gobject gtk3` - Fedora: `dnf install pygobject3 python3-gobject gtk3` - ArchLinux: `pacman -S python-gobject gtk3` - Mac OS X: `brew install pygobject3 gtk+3` pyglossary-5.0.9/doc/gtk4.md000066400000000000000000000004541476751035500157210ustar00rootroot00000000000000## Gtk4 interface requirements - Debian/Ubuntu: `apt install python3-gi python3-gi-cairo gir1.2-gtk-4.0` - openSUSE: `zypper install python3-gobject gtk4` - Fedora: `dnf install pygobject3 python3-gobject gtk4` - ArchLinux: `pacman -S python-gobject gtk4` - Mac OS X: `brew install pygobject3 gtk4` pyglossary-5.0.9/doc/internals.md000066400000000000000000000031031476751035500170410ustar00rootroot00000000000000## Internal glossary structure A glossary contains a number of entries. Each entry contains: - Headword (title or main phrase for lookup) - Alternates (some alternative phrases for lookup) - Definition In PyGlossary, headword and alternates together are accessible as a single Python list `entry.l_word` `entry.defi` is the definition as a Python Unicode `str`. Also `entry.b_defi` is definition in UTF-8 byte array. `entry.defiFormat` is definition format. If definition is plaintext (not rich text), the value is `m`. And if it's in HTML (contains any html tag), then `defiFormat` is `h`. The value `x` is also allowed for XFXF, but XDXF is not widely supported in dictionary applications. There is another type of entry which is called **Data Entry**, and generally contains an image, audio, css, or any other file that was included in input glossary. For data entries: - `entry.s_word` is file name (and `l_word` is still a list containing this string), - `entry.defiFormat` is `b` - `entry.data` gives the content of file in `bytes`. ## Entry filters Entry filters are internal objects that modify words/definition of entries, or remove entries (in some special cases). Like several filters in a pipe which connects a `reader` object to a `writer` object (with both of their classes defined in plugins and instantiated in `Glossary` class). You can enable/disable some of these filters using config parameters / command like flags, which are documented in [doc/config.rst](./config.rst). The full list of entry filters is also documented in [doc/entry-filters.md](./entry-filters.md). pyglossary-5.0.9/doc/lib-examples/000077500000000000000000000000001476751035500171055ustar00rootroot00000000000000pyglossary-5.0.9/doc/lib-examples/any_to_txt.py000077500000000000000000000007251476751035500216560ustar00rootroot00000000000000#!/usr/bin/env python3 import sys from pyglossary import Glossary # Glossary.init() must be called only once, so make sure you put it # in the right place Glossary.init() glos = Glossary() glos.convert( inputFilename=sys.argv[1], outputFilename=f"{sys.argv[1]}.txt", # although it can detect format for *.txt, you can still pass outputFormat outputFormat="Tabfile", # you can pass readOptions or writeOptions as a dict # writeOptions={"encoding": "utf-8"}, ) pyglossary-5.0.9/doc/lib-examples/oxford.py000066400000000000000000000017741476751035500207710ustar00rootroot00000000000000from pyglossary import Glossary def takePhonetic_oxford_gb(glos): phonGlos = Glossary() # phonetic glossary phonGlos.setInfo("name", glos.getInfo("name") + "_phonetic") for entry in glos: word = entry.s_word defi = entry.defi if not defi.startswith("/"): continue # Now set the phonetic to the `ph` variable. ph = "" for s in ( "/ adj", "/ v", "/ n", "/ adv", "/adj", "/v", "/n", "/adv", "/ n", "/ the", ): i = defi.find(s, 2, 85) if i == -1: continue ph = defi[: i + 1] break ph = ( ph.replace(";", "\t") .replace(",", "\t") .replace(" ", "\t") .replace(" ", "\t") .replace(" ", "\t") .replace("//", "/") .replace("\t/\t", "\t") .replace("US\t", "\tUS: ") .replace("US", "\tUS: ") .replace("\t\t\t", "\t") .replace("\t\t", "\t") ) # .replace("/", "") # .replace("\\n ", "\\n") # .replace("\\n ", "\\n") if ph: phonGlos.addEntryObj(phonGlos.newEntry(word, ph)) return phonGlos pyglossary-5.0.9/doc/lib-examples/py-to-stardict.py000066400000000000000000000007111476751035500223410ustar00rootroot00000000000000from pyglossary.glossary import Glossary Glossary.init() glos = Glossary() defiFormat = "m" # "m" for plain text, "h" for HTML mydict = { "a": "test1", "b": "test2", "c": "test3", "d": "test4", "e": "test5", "f": "test6", } for word, defi in mydict.items(): glos.addEntryObj(glos.newEntry(word, defi, defiFormat=defiFormat)) glos.setInfo("title", "My Test StarDict") glos.setInfo("author", "John Doe") glos.write("test.ifo", format="Stardict") pyglossary-5.0.9/doc/lib-usage.md000066400000000000000000000051001476751035500167110ustar00rootroot00000000000000# Using PyGlossary as a Python library There are a few examples in [doc/lib-examples](./doc/lib-examples) directory. Here is a basic script that converts any supported glossary format to [Tabfile](./doc/p/tabfile.md): ```python import sys from pyglossary import Glossary # Glossary.init() should be called only once, so make sure you put it # in the right place Glossary.init() glos = Glossary() glos.convert( inputFilename=sys.argv[1], outputFilename=f"{sys.argv[1]}.txt", # although it can detect format for *.txt, you can still pass outputFormat outputFormat="Tabfile", # you can pass readOptions or writeOptions as a dict # writeOptions={"encoding": "utf-8"}, ) ``` And if you choose to use `glossary_v2`: ```python import sys from pyglossary.glossary_v2 import ConvertArgs, Glossary # Glossary.init() should be called only once, so make sure you put it # in the right place Glossary.init() glos = Glossary() glos.convert(ConvertArgs( inputFilename=sys.argv[1], outputFilename=f"{sys.argv[1]}.txt", # although it can detect format for *.txt, you can still pass outputFormat outputFormat="Tabfile", # you can pass readOptions or writeOptions as a dict # writeOptions={"encoding": "utf-8"}, )) ``` You may look at docstring of `Glossary.convert` for full list of keyword arguments. If you need to add entries inside your Python program (rather than converting one glossary into another), then you use `write` instead of `convert`, here is an example: ```python from pyglossary import Glossary Glossary.init() glos = Glossary() mydict = { "a": "test1", "b": "test2", "c": "test3", } for word, defi in mydict.items(): glos.addEntryObj(glos.newEntry( word, defi, defiFormat="m", # "m" for plain text, "h" for HTML )) glos.setInfo("title", "My Test StarDict") glos.setInfo("author", "John Doe") glos.write("test.ifo", format="Stardict") ``` **Note:** `addEntryObj` is renamed to `addEntry` in `pyglossary.glossary_v2`. **Note:** Switching to `glossary_v2` is optional and recommended. And if you need to read a glossary from file into a `Glossary` object in RAM (without immediately converting it), you can use `glos.read(filename, format=inputFormat)`. Be wary of RAM usage in this case. If you want to include images, css, js or other files in a glossary that you are creating, you need to add them as **Data Entries**, for example: ```python with open(os.path.join(imageDir, "a.jpeg")) as fp: glos.addEntry(glos.newDataEntry("img/a.jpeg", fp.read())) ``` The first argument to `newDataEntry` must be the relative path (that generally html codes of your definitions points to). pyglossary-5.0.9/doc/lzo.md000066400000000000000000000011761476751035500156560ustar00rootroot00000000000000## Install `python-lzo` - **On Linux** - Make sure `liblzo2-dev` or `liblzo2-devel` is installed. - Run `sudo pip3 install python-lzo` - **On Android with Termux** - `apt install liblzo` - `pip install python-lzo` - **On Windows**: - Open this page: https://www.lfd.uci.edu/~gohlke/pythonlibs/#python-lzo - If you are using Python 3.7 (32 bit) for example, click on `python_lzo‑1.12‑cp37‑cp37m‑win32.whl` - Open Start -> type Command -> right-click on Command Prompt -> Run as administrator - Run `pip install C:\....\python_lzo‑1.12‑cp37‑cp37m‑win32.whl` command, giving the path of downloaded file pyglossary-5.0.9/doc/octopus_mdict/000077500000000000000000000000001476751035500173775ustar00rootroot00000000000000pyglossary-5.0.9/doc/octopus_mdict/MDD.svgz000066400000000000000000000066641476751035500207320ustar00rootroot00000000000000_MDD.svg.new]mo~hz]`}C~sl%6֖[q6-;ԛe[r,qT!8 9>m>CpMbCa4q݇z4|)v# -&Q"=2Mo$~8^!KG"&U88ZG++Q?0J}/a|&mMo}\Lۇk:Z'XW(~ 4~O?WƏM];]ŋ0L/EC޻b|e_W鿡1%1[$ !rOܗOb2yiY)J] | 7W>S.F_>ӛe P9'NEM)[2bpg;-Qq >GIx.v:Pr¾nuD_)jt26˼-5]u0>oOI㴱& #.)@Xg_$9B^1҅Fg77N191^g[50η[~VW}$4:;nBy|J.\tԢD1g8qyvW/.5Lӵ8e_)a?\cnrf7LJO #Oc4^2$MEX?*s'$[ 3kWbEG5b=} F،m!ˁ҈#c*0G]4neUf; >v j˕2hӪLbY!+Hg_32?Gp1*Qй̞ͦ 1B,ri;Tk@K,h3)+ qhD0j^d5QAe 7l#;o\8X^FuU.K1N3 32 rY qVMCR+,+F:, ЍAf%fX,-wJe,1?g-aE9 [=tj.)S,ݫ:u{B6X$D! j 0_M<#a)l-PFNǮZ/lz~N鿇Yt/ZOH =sm"(v4r3T}c|,~@+ u ?|T @.)v P)&a-f);{ʁmyw\^HylؚBxRh'$7|#S,ǹˌ> (" Ql۠"f9uN LDyV5y|)cмc ~.9(HЊ$X_Ney|f]'Hն RMo^[ Cl-izh^FK%hwJi+&̬a25;|trurMV3֭|!vIS[)Xr Tcͺ{p 180H2 W(GkN!)bَ>OFi\BsrRck 1킋 T_bޚ4bfd8".a[,fn ybMX[Q: h,v2G2ݫ9ػ`z\Յ Z8>mP0M*Edzgh~NHsys _dxR% ^dEWO4NAӜ{/e6q)Eď|NM8FA Jq^AbȌycRRC5Y,$䪆N`j6Q$nVkȊʽTO {$,k0e`b8RǧdA;o@:{Vԝ+Vζp=AnE bA{T`#l v% [,tE+>qWW XVMr]_֋od%t3LWaNgVO٫_sGlcf11bO 7ߖ_tr3I'#㛿~Njo&V_>{3}q6A>hCI:٬I#7ӻ9>9zw8`T 0O <*[x[' ~zCc_e2 t`!>o*u2MCnE=r E1Ko׃[3 \R%̒ X&tFR?눂S%uD)]FݶGXVAB89>r9Gc#[.#fɆ-I_zkLZ.c"3̸i5]H-TALR ple:8/@zcjH4Y7->_>7{ڤbL9=o7(gW>LW6s޻y;QR](ӆdMYTPovWP3Κ`e&b8XH36j46_]L852uO+_~Z)D2s͖] %m9˗ɚ>ɦ̻r.瘫ͳ~]d#l/XeMh/(ŃS΋K\Ym4 (ݝۋM0k+럗w\[귽^6qFV_ ћ(xOow"P/HSE,BRx&G?[,۠z 0_oeS͙{YfL»&8`I.sl 4)YxQJR.SlRs'vlʨrhceXKiA<ү!*Sjrv7w*JȈ֟,\'|bĖ=c,⣕2V;:VCkfZHfɲlBH {(Ky>k6zg:\|IquZ gnK1J+u fA\&@w,6 Ha4mmޯWiyz ?yu, b=Z jMJRQXO8XOQc=r[r,o¥-fM9wB8c$.c&K[zjXatPG )w#\Iϱ&+lZ5*N; CKH$wGI)Ti,8K-`4+ }q ,+qח.eX|Q ?` G&S !tŒJ#{0R) zcRFّbUPrK#vCbxr1,l5MG(wo/b~3Iao%u4ӈsZa>d@V&9YQ> }"PtI}y l1ld铆m-JT5l%|LXV} t5l#K1Vc^&+ӄ0^U [IuKTVۘxjDX-ZW2=fսo`2 2'U<C*iA N%\3Y,g#1I:yj0(jp ;Liδ"Xnl>(c砄35/5u"9$``[pGO_9ez=9"[S͎GI͉[SY#x[4 c1@~Y6z;AM;nY:A)G>g/FuѹP3 =-W[Brp0wzM0KL0դ^#~gU-|]_N4!Ϣ8*{]GYrDWtߖEJ^j{|ZH @mRw: @mOGvf}s̿?W)WT A-;Ğ1)L7{D6Y##o/D>F6 + p+6bĠ^kۜӑZԶpG5y @GM^j[27;xyn9Wx”*O 6Zܖ.x[ܑy*4Q:aqr}I(0ݣ m;Ҟa3=`oJlîRyNLv#D_ ! ѓ^,##c# ӏHY+N؟{6a#~4-Xn+{#=IC1sVW6ꋷ "E41/1i/A ˲K,^:{`Cml*^s.Lg~ ){v9CǨ5U)砶R0}t /,&Ʈ!mٝA^j[vWB^j[y"8ʸwkww[eXy 949k'T9!</~[7yd\ɮo^0EH!ȎPgZg&e* V439ۑ)HNoC鸳Δë⎳}pv 1΄}磶숚!c֓9;MP۝L^1gP۝]I:|k|RNkg$ OAUy#iqUv*9)FR|ͥ[ lAwYoEw4Z}roOG6>f|((Z`]TD/1e:Mܣa!ȣa¹~@efG^$vޞJe3A{Sqe\FuHte~Y'dm3/E<0iR)IYM٭]Y ],ۢ-Mx$ Yn&ɚL76lr[,o0J֧]@!hJr2+xgkW:hDɷ"`%4j'Vu~!67a>hφP\j CL 1 Zq{ Z " 2]l(,{Gso0\q񘒚{s8,c'ζOBunBQpe~Uj܄ V=7PN|Tv`ʈ#uY5!5.-ZCv?|uϟh4͊>QIu!.,)֚ZY56HIgmą`0m:X-;!tM_)7{} 6Hh>w8?kʶJ>J u8o9f<@w7W55Մغ Lt᮰=&r(㯧(/*CcB~Y$k;tu//w-aXaw'"l>;l%`\;aIq⤢+6D7jwo16zKrU +i71R:dD6++WQ![jMn5A`B ?tϣ%2Փ.1oTf(sطjU 9LN_Me"P-BJcxz sU _| E a= [u0.,Y1me \C@y9 vF99Y<'{01i$foU$<7xC1^ERV c,(XEx6 ԶO`G6U^+9ڗ{ٳ8 UFZZuV2| '`A0l`W{|>NeSŜkz$÷;1ÇW'/5C=y _x{"=FԎP tK(͎| hs1Zwpyglossary-5.0.9/doc/octopus_mdict/README.md000066400000000000000000000045521476751035500206640ustar00rootroot00000000000000# An Analysis of MDX/MDD File Format > MDict is a multi-platform open dictionary which are both questionable. It is not available for every platform, e.g. OS X, Linux. Its dictionary file format is not open. But this has not hindered its popularity, and many dictionaries have been created for it. This is an attempt to reveal MDX/MDD file format, so that my favarite dictionaries, created by MDict users, could be used elsewhere. # MDict Files MDict stores the dictionary definitions, i.e. (key word, explanation) in MDX file and the dictionary reference data, e.g. images, pronunciations, stylesheets in MDD file. Although holding different contents, these two file formats share the same structure. # MDX and MDD File Formats See [MDX.svgz](./MDX.svgz) and [MDD.svgz](./MDD.svgz) # Example Programs ## readmdict.py readmdict.py is an example implementation in Python. This program can read/extract mdx/mdd files. **NOTE:** python-lzo is required to read mdx files created with enegine 1.2. Get Windows version from http://www.lfd.uci.edu/~gohlke/pythonlibs/#python-lzo It can be used as a command line tool. Suppose one has oald8.mdx and oald8.mdd:: ``` $ python readmdict.py -x oald8.mdx ``` This will creates *oald8.txt* dictionary file and creates a folder *data* for images, pronunciation audio files. On Windows, one can also double click it and select the file in the popup dialog. Or as a module:: ``` In [1]: from readmdict import MDX, MDD ``` Read MDX file and print the first entry:: ``` In [2]: mdx = MDX('oald8.mdx') In [3]: items = mdx.items() In [4]: items.next() Out[4]: ('A', '.........') ``` `mdx` is an object having all info from a MDX file. `items` is an iterator producing 2-item tuples. Of each tuple, the first element is the entry text and the second is the explanation. Both are UTF-8 encoded strings. Read MDD file and print the first entry:: ``` In [5]: mdd = MDD('oald8.mdd') In [6]: items = mdd.items() In [7]: items = mdd.next() Out[7]: ('\\pic\\accordion_concertina.jpg', '\xff\xd8\xff\xe0\x00\x10JFIF...........') ``` `mdd` is an object having all info from a MDD file. `items` is an iterator producing 2-item tuples. Of each tuple, the first element is the file name and the second element is the corresponding file content. The file name is encoded in UTF-8. The file content is a plain bytes array. pyglossary-5.0.9/doc/p/000077500000000000000000000000001476751035500147625ustar00rootroot00000000000000pyglossary-5.0.9/doc/p/__index__.md000066400000000000000000000063451476751035500172170ustar00rootroot00000000000000| Description | Name | Doc Link | | ----------- | ---- | -------- | | Aard 2 (.slob) | Aard2Slob | [aard2_slob.md](./aard2_slob.md) | | Almaany.com (SQLite3) | Almaany | [almaany.md](./almaany.md) | | AppleDict Source | AppleDict | [appledict.md](./appledict.md) | | AppleDict Binary | AppleDictBin | [appledict_bin.md](./appledict_bin.md) | | AyanDict SQLite | AyanDictSQLite | [ayandict_sqlite.md](./ayandict_sqlite.md) | | Babylon (.BGL) | BabylonBgl | [babylon_bgl.md](./babylon_bgl.md) | | cc-kedict | cc-kedict | [cc_kedict.md](./cc_kedict.md) | | Crawler Directory | CrawlerDir | [crawler_dir.md](./crawler_dir.md) | | CSV (.csv) | Csv | [csv.md](./csv.md) | | DictionaryForMIDs | Dicformids | [dicformids.md](./dicformids.md) | | Dict.cc (SQLite3) | Dictcc | [dict_cc.md](./dict_cc.md) | | Dict.cc (SQLite3) - Split | Dictcc_split | [dict_cc_split.md](./dict_cc_split.md) | | DICT.org file format (.index) | DictOrg | [dict_org.md](./dict_org.md) | | DICT.org dictfmt source file | DictOrgSource | [dict_org_source.md](./dict_org_source.md) | | dictunformat output file | Dictunformat | [dictunformat.md](./dictunformat.md) | | DigitalNK (SQLite3, N-Korean) | DigitalNK | [digitalnk.md](./digitalnk.md) | | DIKT JSON (.json) | DiktJson | [dikt_json.md](./dikt_json.md) | | ABBYY Lingvo DSL (.dsl) | ABBYYLingvoDSL | [dsl.md](./dsl.md) | | EPUB-2 E-Book | Epub2 | [epub2.md](./epub2.md) | | Kobo E-Reader Dictionary | Kobo | [kobo.md](./kobo.md) | | Kobo E-Reader Dictfile (.df) | Dictfile | [kobo_dictfile.md](./kobo_dictfile.md) | | Mobipocket (.mobi) E-Book | Mobi | [mobi.md](./mobi.md) | | EDICT2 (CEDICT) (.u8) | EDICT2 | [edict2.md](./edict2.md) | | EDLIN | Edlin | [edlin.md](./edlin.md) | | FreeDict (.tei) | FreeDict | [freedict.md](./freedict.md) | | Gettext Source (.po) | GettextPo | [gettext_po.md](./gettext_po.md) | | HTML Directory | HtmlDir | [html_dir.md](./html_dir.md) | | Glossary Info (.info) | Info | [info.md](./info.md) | | JMDict (xml) | JMDict | [jmdict.md](./jmdict.md) | | JMnedict | JMnedict | [jmnedict.md](./jmnedict.md) | | JSON (.json) | Json | [json.md](./json.md) | | Lingoes Source (.ldf) | LingoesLDF | [lingoes_ldf.md](./lingoes_ldf.md) | | Makindo Medical Reference (SQLite3) | MakindoMedical | [makindo_medical.md](./makindo_medical.md) | | Octopus MDict (.mdx) | OctopusMdict | [octopus_mdict.md](./octopus_mdict.md) | | QuickDic version 6 (.quickdic) | QuickDic6 | [quickdic6.md](./quickdic6.md) | | SQL (.sql) | Sql | [sql.md](./sql.md) | | StarDict (.ifo) | Stardict | [stardict.md](./stardict.md) | | StarDict (Merge Syns) | StardictMergeSyns | [stardict_merge_syns.md](./stardict_merge_syns.md) | | StarDict Textual File (.xml) | StardictTextual | [stardict_textual.md](./stardict_textual.md) | | Tabfile (.txt, .dic) | Tabfile | [tabfile.md](./tabfile.md) | | Wiktextract (.jsonl) | Wiktextract | [wiktextract.md](./wiktextract.md) | | WordNet | Wordnet | [wordnet.md](./wordnet.md) | | Wordset.org JSON directory | Wordset | [wordset.md](./wordset.md) | | XDXF (.xdxf) | Xdxf | [xdxf.md](./xdxf.md) | | XDXF with CSS and JS | XdxfCss | [xdxf_css.md](./xdxf_css.md) | | XDXF Lax (.xdxf) | XdxfLax | [xdxf_lax.md](./xdxf_lax.md) | | Yomichan (.zip) | Yomichan | [yomichan.md](./yomichan.md) | | Zim (.zim, for Kiwix) | Zim | [zim.md](./zim.md) | pyglossary-5.0.9/doc/p/aard2_slob.md000066400000000000000000000063011476751035500173140ustar00rootroot00000000000000## Aard 2 (.slob) ### General Information | Attribute | Value | | --------------- | -------------------------------------------------------- | | Name | Aard2Slob | | snake_case_name | aard2_slob | | Description | Aard 2 (.slob) | | Extensions | `.slob` | | Read support | Yes | | Write support | Yes | | Single-file | Yes | | Kind | 🔢 binary | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | [@itkach/slob/wiki](https://github.com/itkach/slob/wiki) | | Website | [aarddict.org](http://aarddict.org/) | ### Write options | Name | Default | Type | Comment | | ---------------------------------- | ------- | ---- | --------------------------------------------------------------- | | compression | `zlib` | str | Compression Algorithm | | content_type | | str | Content Type | | file_size_approx | `0` | int | split up by given approximate file size
examples: 100m, 1g | | file_size_approx_check_num_entries | `100` | int | for file_size_approx, check every `[?]` entries | | separate_alternates | `False` | bool | add alternate headwords as separate entries to slob | | word_title | `False` | bool | add headwords title to beginning of definition | | version_info | `False` | bool | add version info tags to slob file | | audio_goldendict | `False` | bool | Convert audio links for GoldenDict (desktop) | ### Dependencies for reading and writing PyPI Links: [PyICU](https://pypi.org/project/PyICU) To install, run: ```sh pip3 install PyICU ``` ### PyICU See [doc/pyicu.md](../pyicu.md) file for more detailed instructions on how to install PyICU. ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ------------------------------------------ | ---------------------------------------------------------------- | ------- | --------- | -------- | | [Aard 2 for Android](http://aarddict.org/) | [@itkach/aard2-android](https://github.com/itkach/aard2-android) | GPL | Android | Java | | [Aard2 for Web](http://aarddict.org/) | [@itkach/aard2-web](https://github.com/itkach/aard2-web) | MPL | Web | Java | pyglossary-5.0.9/doc/p/almaany.md000066400000000000000000000041071476751035500167300ustar00rootroot00000000000000## Almaany.com (SQLite3) ### General Information | Attribute | Value | | --------------- | ------------------------------------------------------------------------------------------------------------- | | Name | Almaany | | snake_case_name | almaany | | Description | Almaany.com (SQLite3) | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 🔢 binary | | Wiki | ― | | Website | [Almaany.com Arabic Dictionary - Google Play](https://play.google.com/store/apps/details?id=com.almaany.arar) | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ----------------------------------------------------------------------------------------------- | ----------- | ------- | --------- | -------- | | [Almaany.com Arabic Dictionary](https://play.google.com/store/apps/details?id=com.almaany.arar) | ― | Unknown | Android | | pyglossary-5.0.9/doc/p/appledict.md000066400000000000000000000063761476751035500172650ustar00rootroot00000000000000## AppleDict Source ### General Information | Attribute | Value | | --------------- | --------------------------------------------------------------------------------------------- | | Name | AppleDict | | snake_case_name | appledict | | Description | AppleDict Source | | Extensions | `.apple` | | Read support | No | | Write support | Yes | | Single-file | No | | Kind | 📁 directory | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | ― | | Website | [Dictionary User Guide for Mac](https://support.apple.com/en-gu/guide/dictionary/welcome/mac) | ### Write options | Name | Default | Type | Comment | | ----------------- | ------- | ---- | ---------------------------------------- | | clean_html | `True` | bool | use BeautifulSoup parser | | css | | str | custom .css file path | | xsl | | str | custom XSL transformations file path | | default_prefs | `None` | dict | default prefs in python dict format | | prefs_html | | str | preferences XHTML file path | | front_back_matter | | str | XML file path with top-level tag | | jing | `False` | bool | run Jing check on generated XML | | indexes | | str | Additional indexes to dictionary entries | ### Dependencies for writing PyPI Links: [lxml](https://pypi.org/project/lxml), [beautifulsoup4](https://pypi.org/project/beautifulsoup4), [html5lib](https://pypi.org/project/html5lib) To install, run ```sh pip3 install lxml beautifulsoup4 html5lib ``` ### Also see: See [doc/apple.md](../apple.md) for additional AppleDict instructions. ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ------------------------------------------------------------------------------------------- | ----------- | ------- | --------- | -------- | | [Dictionary Development Kit](https://github.com/SebastianSzturo/Dictionary-Development-Kit) | ― | Unknown | Mac | | pyglossary-5.0.9/doc/p/appledict_bin.md000066400000000000000000000045751476751035500201140ustar00rootroot00000000000000## AppleDict Binary ### General Information | Attribute | Value | | --------------- | --------------------------------------------------------------------------------------------- | | Name | AppleDictBin | | snake_case_name | appledict_bin | | Description | AppleDict Binary | | Extensions | `.dictionary`, `.data` | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 🔢 binary | | Wiki | ― | | Website | [Dictionary User Guide for Mac](https://support.apple.com/en-gu/guide/dictionary/welcome/mac) | ### Read options | Name | Default | Type | Comment | | --------- | ------- | ---- | --------------------------------------------------- | | html | `True` | bool | Entries are HTML | | html_full | `True` | bool | Turn every entry's definition into an HTML document | ### Dependencies for reading PyPI Links: [lxml](https://pypi.org/project/lxml), [biplist](https://pypi.org/project/biplist) To install, run: ```sh pip3 install lxml biplist ``` ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | -------------------------------------------------------------------------------- | ----------- | ----------- | --------- | -------- | | [Apple Dictionary](https://support.apple.com/en-gu/guide/dictionary/welcome/mac) | ― | Proprietary | Mac | | pyglossary-5.0.9/doc/p/ayandict_sqlite.md000066400000000000000000000033241476751035500204630ustar00rootroot00000000000000## AyanDict SQLite ### General Information | Attribute | Value | | --------------- | --------------------------------------------------- | | Name | AyanDictSQLite | | snake_case_name | ayandict_sqlite | | Description | AyanDict SQLite | | Extensions | | | Read support | Yes | | Write support | Yes | | Single-file | Yes | | Kind | 🔢 binary | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | ― | | Website | [ilius/ayandict](https://github.com/ilius/ayandict) | ### Write options | Name | Default | Type | Comment | | ----- | ------- | ---- | ------------------------ | | fuzzy | `True` | bool | Create fuzzy search data | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | --------------------------------------------- | ---------------------------------------------------- | ------- | ------------------- | -------- | | [AyanDict](https://github.com/ilius/ayandict) | [@ilius/ayandict](https://github.com/ilius/ayandict) | GPL | Linux, Windows, Mac | Go | pyglossary-5.0.9/doc/p/babylon_bgl.md000066400000000000000000000042461476751035500175640ustar00rootroot00000000000000## Babylon (.BGL) ### General Information | Attribute | Value | | --------------- | -------------- | | Name | BabylonBgl | | snake_case_name | babylon_bgl | | Description | Babylon (.BGL) | | Extensions | `.bgl` | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 🔢 binary | | Wiki | ― | | Website | ― | ### Read options | Name | Default | Type | Comment | | --------------------------- | -------- | ---- | ------------------------------------------- | | default_encoding_overwrite | | str | Default encoding (overwrite) | | source_encoding_overwrite | | str | Source encoding (overwrite) | | target_encoding_overwrite | | str | Target encoding (overwrite) | | part_of_speech_color | `007000` | str | Color for Part of Speech | | no_control_sequence_in_defi | `False` | bool | No control sequence in definitions | | strict_string_conversion | `False` | bool | Strict string conversion | | process_html_in_key | `True` | bool | Process HTML in (entry or info) key | | key_rstrip_chars | | str | Characters to strip from right-side of keys | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ------------------------------------------------------- | ----------- | ----------- | -------------- | -------- | | [Babylon Translator](https://www.babylon-software.com/) | ― | Freemium | Windows | | | [GoldenDict](http://goldendict.org/) | ― | GPL | Linux, Windows | | | [GoldenDict Mobile (Free)](http://goldendict.mobi/) | ― | Freeware | Android | | | [GoldenDict Mobile (Full)](http://goldendict.mobi/) | ― | Proprietary | Android | | pyglossary-5.0.9/doc/p/cc_kedict.md000066400000000000000000000023321476751035500172140ustar00rootroot00000000000000## cc-kedict ### General Information | Attribute | Value | | --------------- | -------------------------------------------------------------- | | Name | cc-kedict | | snake_case_name | cc_kedict | | Description | cc-kedict | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 📝 text | | Wiki | ― | | Website | [@mhagiwara/cc-kedict](https://github.com/mhagiwara/cc-kedict) | ### Dependencies for reading PyPI Links: [PyYAML](https://pypi.org/project/PyYAML), [lxml](https://pypi.org/project/lxml) To install, run: ```sh pip3 install PyYAML lxml ``` pyglossary-5.0.9/doc/p/crawler_dir.md000066400000000000000000000014631476751035500176050ustar00rootroot00000000000000## Crawler Directory ### General Information | Attribute | Value | | --------------- | ------------------ | | Name | CrawlerDir | | snake_case_name | crawler_dir | | Description | Crawler Directory | | Extensions | `.crawler` | | Read support | Yes | | Write support | Yes | | Single-file | Yes | | Kind | 📁 directory | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | ― | | Website | ― | ### Write options | Name | Default | Type | Comment | | ----------- | ------- | ---- | --------------------- | | compression | | str | Compression Algorithm | pyglossary-5.0.9/doc/p/csv.md000066400000000000000000000060421476751035500161010ustar00rootroot00000000000000## CSV (.csv) ### General Information | Attribute | Value | | --------------- | ------------------------------------------------------------------------------ | | Name | Csv | | snake_case_name | csv | | Description | CSV (.csv) | | Extensions | `.csv` | | Read support | Yes | | Write support | Yes | | Single-file | Yes | | Kind | 📝 text | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | [Comma-separated values](https://en.wikipedia.org/wiki/Comma-separated_values) | | Website | ― | ### Read options | Name | Default | Type | Comment | | --------- | ------- | ---- | ---------------- | | encoding | `utf-8` | str | Encoding/charset | | newline | `\n` | str | Newline string | | delimiter | `,` | str | Column delimiter | ### Write options | Name | Default | Type | Comment | | --------------- | ------- | ---- | ---------------------------------------------- | | encoding | `utf-8` | str | Encoding/charset | | newline | `\n` | str | Newline string | | resources | `True` | bool | Enable resources / data files | | delimiter | `,` | str | Column delimiter | | add_defi_format | `False` | bool | enable adding defiFormat (m/h/x) | | enable_info | `True` | bool | Enable glossary info / metedata | | word_title | `False` | bool | add headwords title to beginning of definition | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ---------------------------------------------------------------------- | ----------- | ----------- | ------------------- | -------- | | [LibreOffice Calc](https://www.libreoffice.org/discover/calc/) | ― | MPL/GPL | Linux, Windows, Mac | | | [Microsoft Excel](https://www.microsoft.com/en-us/microsoft-365/excel) | ― | Proprietary | Windows | | pyglossary-5.0.9/doc/p/dicformids.md000066400000000000000000000034361476751035500174350ustar00rootroot00000000000000## DictionaryForMIDs ### General Information | Attribute | Value | | --------------- | ------------------------------------------------------------------------ | | Name | Dicformids | | snake_case_name | dicformids | | Description | DictionaryForMIDs | | Extensions | `.mids` | | Read support | Yes | | Write support | Yes | | Single-file | No | | Kind | 📁 directory | | Sort-on-write | Always | | Sort key | `dicformids` | | Wiki | ― | | Website | [DictionaryForMIDs - SourceForge](http://dictionarymid.sourceforge.net/) | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ---------------------------------------------------------- | ----------- | ------- | --------------------------------- | -------- | | [DictionaryForMIDs](http://dictionarymid.sourceforge.net/) | ― | GPL | Android, Web, Windows, Linux, Mac | Java | pyglossary-5.0.9/doc/p/dict_cc.md000066400000000000000000000036121476751035500166760ustar00rootroot00000000000000## Dict.cc (SQLite3) ### General Information | Attribute | Value | | --------------- | ------------------------------------------------------------------------------------------------ | | Name | Dictcc | | snake_case_name | dict_cc | | Description | Dict.cc (SQLite3) | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 🔢 binary | | Wiki | [Dict.cc](https://en.wikipedia.org/wiki/Dict.cc) | | Website | [dict.cc dictionary - Google Play](https://play.google.com/store/apps/details?id=cc.dict.dictcc) | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ---------------------------------------------------------------------------------- | ----------- | ----------- | --------- | -------- | | [dict.cc dictionary](https://play.google.com/store/apps/details?id=cc.dict.dictcc) | ― | Proprietary | Android | | pyglossary-5.0.9/doc/p/dict_cc_split.md000066400000000000000000000036221476751035500201120ustar00rootroot00000000000000## Dict.cc (SQLite3) - Split ### General Information | Attribute | Value | | --------------- | ------------------------------------------------------------------------------------------------ | | Name | Dictcc_split | | snake_case_name | dict_cc_split | | Description | Dict.cc (SQLite3) - Split | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 🔢 binary | | Wiki | [Dict.cc](https://en.wikipedia.org/wiki/Dict.cc) | | Website | [dict.cc dictionary - Google Play](https://play.google.com/store/apps/details?id=cc.dict.dictcc) | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ---------------------------------------------------------------------------------- | ----------- | ----------- | --------- | -------- | | [dict.cc dictionary](https://play.google.com/store/apps/details?id=cc.dict.dictcc) | ― | Proprietary | Android | | pyglossary-5.0.9/doc/p/dict_org.md000066400000000000000000000046661476751035500171120ustar00rootroot00000000000000## DICT.org file format (.index) ### General Information | Attribute | Value | | --------------- | ---------------------------------------------------------------------------- | | Name | DictOrg | | snake_case_name | dict_org | | Description | DICT.org file format (.index) | | Extensions | `.index` | | Read support | Yes | | Write support | Yes | | Single-file | No | | Kind | 📁 directory | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | [DICT#DICT file format](https://en.wikipedia.org/wiki/DICT#DICT_file_format) | | Website | [The DICT Development Group](http://dict.org/bin/Dict) | ### Write options | Name | Default | Type | Comment | | ------- | ------- | ---- | --------------------------------------- | | dictzip | `False` | bool | Compress .dict file to .dict.dz | | install | `True` | bool | Install dictionary to /usr/share/dictd/ | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | --------------------------------------------------------------- | ----------- | ------- | --------- | -------- | | [Dictd](https://directory.fsf.org/wiki/Dictd) | ― | GPL | Linux | | | [GNOME Dictionary](https://wiki.gnome.org/Apps/Dictionary) | ― | GPL | Linux | | | [Xfce4 Dictionary](https://docs.xfce.org/apps/xfce4-dict/start) | ― | GPL | linux | | | [Ding](https://www-user.tu-chemnitz.de/~fri/ding/) | ― | GPL | linux | | pyglossary-5.0.9/doc/p/dict_org_source.md000066400000000000000000000031171476751035500204600ustar00rootroot00000000000000## DICT.org dictfmt source file ### General Information | Attribute | Value | | --------------- | -------------------------------------------------- | | Name | DictOrgSource | | snake_case_name | dict_org_source | | Description | DICT.org dictfmt source file | | Extensions | `.dtxt` | | Read support | No | | Write support | Yes | | Single-file | Yes | | Kind | 📝 text | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | [DICT](https://en.wikipedia.org/wiki/DICT) | | Website | [@cheusov/dictd](https://github.com/cheusov/dictd) | ### Write options | Name | Default | Type | Comment | | --------------- | ------- | ---- | -------------------- | | remove_html_all | `True` | bool | Remove all HTML tags | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ---------------------------------------------- | ----------- | ------- | --------- | -------- | | [dictfmt](https://linux.die.net/man/1/dictfmt) | ― | GPL | Linux | | pyglossary-5.0.9/doc/p/dictunformat.md000066400000000000000000000044021476751035500200030ustar00rootroot00000000000000## dictunformat output file ### General Information | Attribute | Value | | --------------- | ---------------------------------------------------------------------------------------------------------- | | Name | Dictunformat | | snake_case_name | dictunformat | | Description | dictunformat output file | | Extensions | `.dictunformat` | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 📝 text | | Wiki | [Dictd](https://directory.fsf.org/wiki/Dictd) | | Website | [dictd/dictunformat.1.in - @cheusov/dictd](https://github.com/cheusov/dictd/blob/master/dictunformat.1.in) | ### Read options | Name | Default | Type | Comment | | ------------------ | ------- | ---- | ------------------------------------- | | encoding | `utf-8` | str | Encoding/charset | | headword_separator | `; ` | str | separator for headword and alternates | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | -------------------------------------------------------- | ----------- | ------- | --------- | -------- | | [dictunformat](https://linux.die.net/man/1/dictunformat) | ― | GPL | Linux | | pyglossary-5.0.9/doc/p/digitalnk.md000066400000000000000000000025141476751035500172540ustar00rootroot00000000000000## DigitalNK (SQLite3, N-Korean) ### General Information | Attribute | Value | | --------------- | -------------------------------------------------------- | | Name | DigitalNK | | snake_case_name | digitalnk | | Description | DigitalNK (SQLite3, N-Korean) | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 🔢 binary | | Wiki | ― | | Website | [@digitalprk/dicrs](https://github.com/digitalprk/dicrs) | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | --------------------------------------------- | ----------- | ------------ | --------- | -------- | | [Dic.rs](https://github.com/digitalprk/dicrs) | ― | BSD-2-Clause | Linux | | pyglossary-5.0.9/doc/p/dikt_json.md000066400000000000000000000025561476751035500173000ustar00rootroot00000000000000## DIKT JSON (.json) ### General Information | Attribute | Value | | --------------- | ------------------------------------ | | Name | DiktJson | | snake_case_name | dikt_json | | Description | DIKT JSON (.json) | | Extensions | | | Read support | No | | Write support | Yes | | Single-file | Yes | | Kind | 📝 text | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | ― | | Website | https://github.com/maxim-saplin/dikt | ### Write options | Name | Default | Type | Comment | | ----------- | ------- | ---- | ---------------------------------------------- | | encoding | `utf-8` | str | Encoding/charset | | enable_info | `True` | bool | Enable glossary info / metedata | | resources | `True` | bool | Enable resources / data files | | word_title | `False` | bool | add headwords title to beginning of definition | pyglossary-5.0.9/doc/p/dsl.md000066400000000000000000000037451476751035500160770ustar00rootroot00000000000000## ABBYY Lingvo DSL (.dsl) ### General Information | Attribute | Value | | --------------- | ---------------------------------------------------------- | | Name | ABBYYLingvoDSL | | snake_case_name | dsl | | Description | ABBYY Lingvo DSL (.dsl) | | Extensions | `.dsl` | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 📝 text | | Wiki | [ABBYY Lingvo](https://ru.wikipedia.org/wiki/ABBYY_Lingvo) | | Website | [www.lingvo.ru](https://www.lingvo.ru/) | ### Read options | Name | Default | Type | Comment | | ------------- | ----------- | ---- | ---------------------------------------------- | | encoding | | str | Encoding/charset | | audio | `True` | bool | Enable audio objects | | example_color | `steelblue` | str | Examples color | | abbrev | `hover` | str | Load and apply abbreviation file (`_abrv.dsl`) | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | -------------------------------------- | ----------- | ----------- | --------------------------------------------------- | -------- | | [ABBYY Lingvo](https://www.lingvo.ru/) | ― | Proprietary | Windows, Mac, Android, iOS, Windows Mobile, Symbian | | pyglossary-5.0.9/doc/p/edict2.md000066400000000000000000000026721476751035500164650ustar00rootroot00000000000000## EDICT2 (CEDICT) (.u8) ### General Information | Attribute | Value | | --------------- | ---------------------------------------------- | | Name | EDICT2 | | snake_case_name | edict2 | | Description | EDICT2 (CEDICT) (.u8) | | Extensions | `.u8` | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 📝 text | | Wiki | [CEDICT](https://en.wikipedia.org/wiki/CEDICT) | | Website | ― | ### Read options | Name | Default | Type | Comment | | ----------------- | ------- | ---- | --------------------------------------------- | | encoding | `utf-8` | str | Encoding/charset | | traditional_title | `False` | bool | Use traditional Chinese for entry titles/keys | | colorize_tones | `True` | bool | Set to false to disable tones coloring | ### Dependencies for reading PyPI Links: [lxml](https://pypi.org/project/lxml) To install, run: ```sh pip3 install lxml ``` pyglossary-5.0.9/doc/p/edlin.md000066400000000000000000000020361476751035500164000ustar00rootroot00000000000000## EDLIN ### General Information | Attribute | Value | | --------------- | ------------------ | | Name | Edlin | | snake_case_name | edlin | | Description | EDLIN | | Extensions | `.edlin` | | Read support | Yes | | Write support | Yes | | Single-file | No | | Kind | 📁 directory | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | ― | | Website | ― | ### Read options | Name | Default | Type | Comment | | -------- | ------- | ---- | ---------------- | | encoding | `utf-8` | str | Encoding/charset | ### Write options | Name | Default | Type | Comment | | --------- | ------- | ---- | ----------------------------- | | encoding | `utf-8` | str | Encoding/charset | | prev_link | `True` | bool | Enable link to previous entry | pyglossary-5.0.9/doc/p/epub2.md000066400000000000000000000052521476751035500163250ustar00rootroot00000000000000## EPUB-2 E-Book ### General Information | Attribute | Value | | --------------- | ------------------------------------------ | | Name | Epub2 | | snake_case_name | epub2 | | Description | EPUB-2 E-Book | | Extensions | `.epub` | | Read support | No | | Write support | Yes | | Single-file | Yes | | Kind | 📦 package | | Sort-on-write | Always | | Sort key | `ebook` | | Wiki | [EPUB](https://en.wikipedia.org/wiki/EPUB) | | Website | ― | ### Write options | Name | Default | Type | Comment | | ---------------------- | ------- | ---- | -------------------------- | | keep | `False` | bool | Keep temp files | | group_by_prefix_length | `2` | int | Prefix length for grouping | | include_index_page | `False` | bool | Include index page | | compress | `True` | bool | Enable compression | | css | | str | Path to css file | | cover_path | | str | Path to cover file | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | -------------------------------------------------------------------------- | ----------- | ----------- | ------------------- | -------- | | [calibre](https://calibre-ebook.com/) | ― | GPL | Linux, Windows, Mac | | | [Okular](https://okular.kde.org/) | ― | GPL | Linux, Windows, Mac | | | [Book Reader](https://f-droid.org/en/packages/com.github.axet.bookreader/) | ― | GPL | Android | | | [Kobo eReader](https://www.kobo.com) | ― | Proprietary | Kobo eReader | | | [Icecream Ebook Reader](https://icecreamapps.com/Ebook-Reader/) | ― | Proprietary | Windows | | | [Aldiko](https://www.demarque.com/aldiko) | ― | Proprietary | Android, iOS | | pyglossary-5.0.9/doc/p/freedict.md000066400000000000000000000044371476751035500171010ustar00rootroot00000000000000## FreeDict (.tei) ### General Information | Attribute | Value | | --------------- | ---------------------------------------------------------------------------------- | | Name | FreeDict | | snake_case_name | freedict | | Description | FreeDict (.tei) | | Extensions | `.tei` | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 📝 text | | Wiki | [@freedict/fd-dictionaries/wiki](https://github.com/freedict/fd-dictionaries/wiki) | | Website | [FreeDict.org](https://freedict.org/) | ### Read options | Name | Default | Type | Comment | | --------------- | ------- | ---- | ------------------------------------------------ | | discover | `False` | bool | Find and show unsupported tags | | auto_rtl | `None` | bool | Auto-detect and mark Right-to-Left text | | auto_comma | `True` | bool | Auto-detect comma sign based on text | | comma | `, ` | str | Comma sign (following space) to use as separator | | word_title | `False` | bool | Add headwords title to beginning of definition | | pron_color | `gray` | str | Pronunciation color | | gram_color | `green` | str | Grammar color | | example_padding | `10` | int | Padding for examples (in px) | ### Dependencies for reading PyPI Links: [lxml](https://pypi.org/project/lxml) To install, run: ```sh pip3 install lxml ``` pyglossary-5.0.9/doc/p/gettext_po.md000066400000000000000000000040551476751035500174720ustar00rootroot00000000000000## Gettext Source (.po) ### General Information | Attribute | Value | | --------------- | ------------------------------------------------------------- | | Name | GettextPo | | snake_case_name | gettext_po | | Description | Gettext Source (.po) | | Extensions | `.po` | | Read support | Yes | | Write support | Yes | | Single-file | Yes | | Kind | 📝 text | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | [Gettext](https://en.wikipedia.org/wiki/Gettext) | | Website | [gettext - GNU Project](https://www.gnu.org/software/gettext) | ### Write options | Name | Default | Type | Comment | | --------- | ------- | ---- | ----------------------------- | | resources | `True` | bool | Enable resources / data files | ### Dependencies for reading and writing PyPI Links: [polib](https://pypi.org/project/polib) To install, run: ```sh pip3 install polib ``` ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ------------------------------------------------ | ----------- | --------------- | ------------------- | -------- | | [gettext](https://www.gnu.org/software/gettext/) | ― | GPL | Linux, Windows | | | [poEdit](https://github.com/vslavik/poedit) | ― | MIT / Shareware | Linux, Windows, Mac | | pyglossary-5.0.9/doc/p/html_dir.md000066400000000000000000000030471476751035500171120ustar00rootroot00000000000000## HTML Directory ### General Information | Attribute | Value | | --------------- | ------------------ | | Name | HtmlDir | | snake_case_name | html_dir | | Description | HTML Directory | | Extensions | `.hdir` | | Read support | No | | Write support | Yes | | Single-file | No | | Kind | 📁 directory | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | ― | | Website | ― | ### Write options | Name | Default | Type | Comment | | --------------- | -------------- | ---- | ---------------------------------------------- | | encoding | `utf-8` | str | Encoding/charset | | resources | `True` | bool | Enable resources / data files | | max_file_size | `102400` | int | Maximum file size in bytes | | filename_format | `{n:05d}.html` | str | Filename format, default: {n:05d}.html | | escape_defi | `False` | bool | Escape definitions | | dark | `True` | bool | Use dark style | | css | | str | Path to css file | | word_title | `True` | bool | Add headwords title to beginning of definition | pyglossary-5.0.9/doc/p/info.md000066400000000000000000000012421476751035500162360ustar00rootroot00000000000000## Glossary Info (.info) ### General Information | Attribute | Value | | --------------- | --------------------- | | Name | Info | | snake_case_name | info | | Description | Glossary Info (.info) | | Extensions | `.info` | | Read support | Yes | | Write support | Yes | | Single-file | Yes | | Kind | 📝 text | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | ― | | Website | ― | pyglossary-5.0.9/doc/p/jmdict.md000066400000000000000000000031321476751035500165550ustar00rootroot00000000000000## JMDict (xml) ### General Information | Attribute | Value | | --------------- | ---------------------------------------------------------------- | | Name | JMDict | | snake_case_name | jmdict | | Description | JMDict (xml) | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 📝 text | | Wiki | [JMdict](https://en.wikipedia.org/wiki/JMdict) | | Website | [The JMDict Project](https://www.edrdg.org/jmdict/j_jmdict.html) | ### Read options | Name | Default | Type | Comment | | --------------- | ------- | ---- | -------------------------------------- | | example_padding | `10` | int | Padding for examples (in px) | | example_color | | str | Examples color | | translitation | `False` | bool | Add translitation (romaji) of keywords | ### Dependencies for reading PyPI Links: [lxml](https://pypi.org/project/lxml) To install, run: ```sh pip3 install lxml ``` pyglossary-5.0.9/doc/p/jmnedict.md000066400000000000000000000022151476751035500171010ustar00rootroot00000000000000## JMnedict ### General Information | Attribute | Value | | --------------- | ------------------------------------------------------------ | | Name | JMnedict | | snake_case_name | jmnedict | | Description | JMnedict | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 📝 text | | Wiki | [JMdict](https://en.wikipedia.org/wiki/JMdict) | | Website | [EDRDG Wiki](https://www.edrdg.org/wiki/index.php/Main_Page) | ### Dependencies for reading PyPI Links: [lxml](https://pypi.org/project/lxml) To install, run: ```sh pip3 install lxml ``` pyglossary-5.0.9/doc/p/json.md000066400000000000000000000030351476751035500162560ustar00rootroot00000000000000## JSON (.json) ### General Information | Attribute | Value | | --------------- | ------------------------------------------------- | | Name | Json | | snake_case_name | json | | Description | JSON (.json) | | Extensions | `.json` | | Read support | No | | Write support | Yes | | Single-file | Yes | | Kind | 📝 text | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | [JSON](https://en.wikipedia.org/wiki/JSON) | | Website | [www.json.org](https://www.json.org/json-en.html) | ### Write options | Name | Default | Type | Comment | | ----------- | ------- | ---- | ---------------------------------------------- | | encoding | `utf-8` | str | Encoding/charset | | enable_info | `True` | bool | Enable glossary info / metedata | | resources | `True` | bool | Enable resources / data files | | word_title | `False` | bool | add headwords title to beginning of definition | pyglossary-5.0.9/doc/p/kobo.md000066400000000000000000000031761476751035500162450ustar00rootroot00000000000000## Kobo E-Reader Dictionary ### General Information | Attribute | Value | | --------------- | ---------------------------------------------------------- | | Name | Kobo | | snake_case_name | kobo | | Description | Kobo E-Reader Dictionary | | Extensions | `.kobo` | | Read support | No | | Write support | Yes | | Single-file | No | | Kind | 📦 package | | Sort-on-write | Never | | Sort key | (`headword_lower`) | | Wiki | [Kobo eReader](https://en.wikipedia.org/wiki/Kobo_eReader) | | Website | [www.kobo.com](https://www.kobo.com) | ### Dependencies for writing PyPI Links: [marisa-trie](https://pypi.org/project/marisa-trie) To install, run ```sh pip3 install marisa-trie ``` ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ------------------------------------ | ----------- | ----------- | ------------ | -------- | | [Kobo eReader](https://www.kobo.com) | ― | Proprietary | Kobo eReader | | pyglossary-5.0.9/doc/p/kobo_dictfile.md000066400000000000000000000045551476751035500201120ustar00rootroot00000000000000## Kobo E-Reader Dictfile (.df) ### General Information | Attribute | Value | | --------------- | --------------------------------------------------------------------------- | | Name | Dictfile | | snake_case_name | kobo_dictfile | | Description | Kobo E-Reader Dictfile (.df) | | Extensions | `.df` | | Read support | Yes | | Write support | Yes | | Single-file | Yes | | Kind | 📝 text | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | ― | | Website | [dictgen - dictutil](https://pgaskin.net/dictutil/dictgen/#dictfile-format) | ### Read options | Name | Default | Type | Comment | | --------------------- | ------- | ---- | --------------------- | | encoding | `utf-8` | str | Encoding/charset | | extract_inline_images | `True` | bool | Extract inline images | ### Write options | Name | Default | Type | Comment | | -------- | ------- | ---- | ---------------- | | encoding | `utf-8` | str | Encoding/charset | ### Dependencies for reading PyPI Links: [mistune 3.0.1](https://pypi.org/project/mistune/3.0.1) To install, run: ```sh pip3 install mistune==3.0.1 ``` ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ------------------------------------------------ | ----------- | ------- | ------------------- | -------- | | [dictgen](https://pgaskin.net/dictutil/dictgen/) | ― | MIT | Linux, Windows, Mac | | pyglossary-5.0.9/doc/p/lingoes_ldf.md000066400000000000000000000042201476751035500175670ustar00rootroot00000000000000## Lingoes Source (.ldf) ### General Information | Attribute | Value | | --------------- | ------------------------------------------------------------------- | | Name | LingoesLDF | | snake_case_name | lingoes_ldf | | Description | Lingoes Source (.ldf) | | Extensions | `.ldf` | | Read support | Yes | | Write support | Yes | | Single-file | Yes | | Kind | 📝 text | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | [Lingoes](https://en.wikipedia.org/wiki/Lingoes) | | Website | [Lingoes.net](http://www.lingoes.net/en/dictionary/dict_format.php) | ### Read options | Name | Default | Type | Comment | | -------- | ------- | ---- | ---------------- | | encoding | `utf-8` | str | Encoding/charset | ### Write options | Name | Default | Type | Comment | | --------- | ------- | ---- | ----------------------------- | | newline | `\n` | str | Newline string | | resources | `True` | bool | Enable resources / data files | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ---------------------------------------------------------------------------------- | ----------- | ------- | --------- | -------- | | [Lingoes Dictionary Creator](http://www.lingoes.net/en/dictionary/dict_format.php) | ― | Unknown | | | pyglossary-5.0.9/doc/p/makindo_medical.md000066400000000000000000000037211476751035500204070ustar00rootroot00000000000000## Makindo Medical Reference (SQLite3) ### General Information | Attribute | Value | | --------------- | ----------------------------------------------------------------------------------------------- | | Name | MakindoMedical | | snake_case_name | makindo_medical | | Description | Makindo Medical Reference (SQLite3) | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 🔢 binary | | Wiki | ― | | Website | [Makindo.co.uk Comprehensive Medical Encyclopedia](https://www.makindo.co.uk/topics/_index.php) | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ----------------------------------------------------------------------------------------------------------- | ----------- | ------- | --------- | -------- | | [Makindo Medical Reference](https://play.google.com/store/apps/details?id=com.pocketmednotes2014.secondapp) | ― | Unknown | Android | | pyglossary-5.0.9/doc/p/mobi.md000066400000000000000000000064561476751035500162450ustar00rootroot00000000000000## Mobipocket (.mobi) E-Book ### General Information | Attribute | Value | | --------------- | ------------------------------------------------------ | | Name | Mobi | | snake_case_name | mobi | | Description | Mobipocket (.mobi) E-Book | | Extensions | `.mobi` | | Read support | No | | Write support | Yes | | Single-file | No | | Kind | 📦 package | | Sort-on-write | Yes (by default) | | Sort key | `ebook` | | Wiki | [Mobipocket](https://en.wikipedia.org/wiki/Mobipocket) | | Website | ― | ### Write options | Name | Default | Type | Comment | | ---------------------- | -------- | ---- | -------------------------------------------------------------- | | keep | `False` | bool | Keep temp files | | group_by_prefix_length | `2` | int | Prefix length for grouping | | css | | str | Path to css file | | cover_path | | str | Path to cover file | | kindlegen_path | | str | Path to kindlegen executable | | file_size_approx | `271360` | int | Approximate size of each xhtml file (example: 200kb) | | hide_word_index | `False` | bool | Hide headword in tap-to-check interface | | spellcheck | `True` | bool | Enable wildcard search and spell correction during word lookup | | exact | `False` | bool | Exact-match Parameter | ### Other Requirements Install [KindleGen](https://wiki.mobileread.com/wiki/KindleGen) for creating Mobipocket e-books. ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | -------------------------------------------------------------------------- | ----------- | ----------- | ------------------- | -------- | | [Amazon Kindle](https://www.amazon.com/kindle) | ― | Proprietary | Amazon Kindle | | | [calibre](https://calibre-ebook.com/) | ― | GPL | Linux, Windows, Mac | | | [Okular](https://okular.kde.org/) | ― | GPL | Linux, Windows, Mac | | | [Book Reader](https://f-droid.org/en/packages/com.github.axet.bookreader/) | ― | GPL | Android | | pyglossary-5.0.9/doc/p/octopus_mdict.md000066400000000000000000000042131476751035500201600ustar00rootroot00000000000000## Octopus MDict (.mdx) ### General Information | Attribute | Value | | --------------- | -------------------------------------------------------------------- | | Name | OctopusMdict | | snake_case_name | octopus_mdict | | Description | Octopus MDict (.mdx) | | Extensions | `.mdx` | | Read support | Yes | | Write support | No | | Single-file | No | | Kind | 🔢 binary | | Wiki | ― | | Website | [Download - MDict.cn](https://www.mdict.cn/wp/?page_id=5325&lang=en) | ### Read options | Name | Default | Type | Comment | | ------------------- | ------- | ---- | ----------------------------------- | | encoding | | str | Encoding/charset | | substyle | `True` | bool | Enable substyle | | same_dir_data_files | `False` | bool | Read data files from same directory | | audio | `False` | bool | Enable audio objects | ### `python-lzo` is required for **some** MDX glossaries. First try converting your MDX file, if failed (`AssertionError` probably), then try to install [LZO library and Python binding](../lzo.md). ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ------------------------------ | ----------- | ----------- | -------------------------- | -------- | | [MDict](https://www.mdict.cn/) | ― | Proprietary | Android, iOS, Windows, Mac | | pyglossary-5.0.9/doc/p/quickdic6.md000066400000000000000000000053241476751035500171720ustar00rootroot00000000000000## QuickDic version 6 (.quickdic) ### General Information | Attribute | Value | | --------------- | ------------------------------------------------------------------------------ | | Name | QuickDic6 | | snake_case_name | quickdic6 | | Description | QuickDic version 6 (.quickdic) | | Extensions | `.quickdic`, `.quickdic.v006.zip` | | Read support | Yes | | Write support | Yes | | Single-file | Yes | | Kind | 🔢 binary | | Sort-on-write | Never | | Sort key | (`headword_lower`) | | Wiki | ― | | Website | [github.com/rdoeffinger/Dictionary](https://github.com/rdoeffinger/Dictionary) | ### Write options | Name | Default | Type | Comment | | ---------------- | ------- | ---- | --------------------------------------------- | | normalizer_rules | | str | ICU normalizer rules to use for index sorting | ### Dependencies for reading PyPI Links: [PyICU](https://pypi.org/project/PyICU) To install, run: ```sh pip3 install PyICU ``` ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------ | --------- | -------- | | [Dictionary](https://play.google.com/store/apps/details?id=de.reimardoeffinger.quickdic) | [@rdoeffinger/Dictionary](https://github.com/rdoeffinger/Dictionary) | Apache License 2.0 | Android | Java | | [DictionaryPC](https://github.com/rdoeffinger/DictionaryPC) | [@rdoeffinger/DictionaryPC](https://github.com/rdoeffinger/DictionaryPC) | Apache License 2.0 | Windows | Java | pyglossary-5.0.9/doc/p/sql.md000066400000000000000000000026101476751035500161020ustar00rootroot00000000000000## SQL (.sql) ### General Information | Attribute | Value | | --------------- | ---------------------------------------- | | Name | Sql | | snake_case_name | sql | | Description | SQL (.sql) | | Extensions | `.sql` | | Read support | No | | Write support | Yes | | Single-file | Yes | | Kind | 📝 text | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | [SQL](https://en.wikipedia.org/wiki/SQL) | | Website | ― | ### Write options | Name | Default | Type | Comment | | -------------- | ------- | ---- | ---------------------------- | | encoding | `utf-8` | str | Encoding/charset | | info_keys | `None` | list | List of dbinfo table columns | | add_extra_info | `True` | bool | Create dbinfo_extra table | | newline | `
` | str | Newline string | | transaction | `False` | bool | Use TRANSACTION | pyglossary-5.0.9/doc/p/stardict.md000066400000000000000000000154141476751035500171260ustar00rootroot00000000000000## StarDict (.ifo) ### General Information | Attribute | Value | | --------------- | ---------------------------------------------------- | | Name | Stardict | | snake_case_name | stardict | | Description | StarDict (.ifo) | | Extensions | `.ifo` | | Read support | Yes | | Write support | Yes | | Single-file | No | | Kind | 📁 directory | | Sort-on-write | Always | | Sort key | `stardict` | | Wiki | [StarDict](https://en.wikipedia.org/wiki/StarDict) | | Website | [huzheng.org/stardict](http://huzheng.org/stardict/) | ### Read options | Name | Default | Type | Comment | | -------------- | -------- | ---- | --------------------------------------- | | xdxf_to_html | `True` | bool | Convert XDXF entries to HTML | | xsl | `False` | bool | Use XSL transformation | | unicode_errors | `strict` | str | What to do with Unicode decoding errors | ### Write options | Name | Default | Type | Comment | | ---------------- | ------- | ---- | ------------------------------------------------------------------------ | | large_file | `False` | bool | Use idxoffsetbits=64 bits, for large files only | | dictzip | `True` | bool | Compress .dict file to .dict.dz | | sametypesequence | | str | Definition format: h=html, m=plaintext, x=xdxf | | stardict_client | `False` | bool | Modify html entries for StarDict 3.0 | | audio_goldendict | `False` | bool | Convert audio links for GoldenDict (desktop) | | audio_icon | `True` | bool | Add glossary's audio icon | | sqlite | `None` | bool | Use SQLite to limit memory usage. Default depends on global SQLite mode. | ### For sdcv and KOReader users Use [StarDict (Merge Syns)](./stardict_merge_syns.md) plugin (instead of this one) to create glossaries for using in [sdcv](https://dushistov.github.io/sdcv/) or [KOReader](http://koreader.rocks/) ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------ | ----------- | ----------------------------------------------------------- | -------- | | [AyanDict](https://github.com/ilius/ayandict) | [@ilius/ayandict](https://github.com/ilius/ayandict) | GPL | Linux, Windows, Mac | Go | | [GoldenDict-NG by @xiaoyifang](https://xiaoyifang.github.io/goldendict-ng/) | [@xiaoyifang/goldendict-ng](https://github.com/xiaoyifang/goldendict-ng) | GPL | Linux, Windows, Mac | C++ | | [GoldenDict](http://goldendict.org/) | [@goldendict/goldendict](https://github.com/goldendict/goldendict) | GPL | Linux, Windows, Mac | C++ | | [StarDict](http://huzheng.org/stardict/) | [@huzheng001/stardict-3](https://github.com/huzheng001/stardict-3) | GPL | Linux, Windows, Mac | C++ | | [QStarDict](https://github.com/a-rodin/qstardict) | [@a-rodin/qstardict](https://github.com/a-rodin/qstardict) | GPLv2 | Linux, Windows, Mac | C++ | | [GoldenDict Mobile (Free)](http://goldendict.mobi/) | ― | Freemium | Android | | | [GoldenDict Mobile (Full)](http://goldendict.mobi/) | ― | Proprietary | Android | | | [Twinkle Star Dictionary](https://play.google.com/store/apps/details?id=com.qtier.dict) | ― | Unknown | Android | | | [WordMateX](https://apkcombo.com/wordmatex/org.d1scw0rld.wordmatex/) | ― | Proprietary | Android | | | [QDict](https://play.google.com/store/apps/details?id=com.annie.dictionary) | [@namndev/QDict](https://github.com/namndev/QDict) | Apache 2.0 | Android | Java | | [Fora Dictionary](https://play.google.com/store/apps/details?id=com.ngc.fora) | ― | Freemium | Android | | | [Fora Dictionary Pro](https://play.google.com/store/apps/details?id=com.ngc.fora.android) | ― | Proprietary | Android | | | [KOReader](http://koreader.rocks/) | [@koreader/koreader](https://github.com/koreader/koreader) | AGPLv3 | Android, Amazon Kindle, Kobo eReader, PocketBook, Cervantes | Lua | | [sdcv](https://dushistov.github.io/sdcv/) | [@Dushistov/sdcv](https://github.com/Dushistov/sdcv) | GPLv2 | Linux, Windows, Mac, Android | C++ | pyglossary-5.0.9/doc/p/stardict_merge_syns.md000066400000000000000000000052271476751035500213620ustar00rootroot00000000000000## StarDict (Merge Syns) ### General Information | Attribute | Value | | --------------- | ---------------------------------------------------- | | Name | StardictMergeSyns | | snake_case_name | stardict_merge_syns | | Description | StarDict (Merge Syns) | | Extensions | | | Read support | No | | Write support | Yes | | Single-file | No | | Kind | 📁 directory | | Sort-on-write | Always | | Sort key | `stardict` | | Wiki | [StarDict](https://en.wikipedia.org/wiki/StarDict) | | Website | [huzheng.org/stardict](http://huzheng.org/stardict/) | ### Write options | Name | Default | Type | Comment | | ---------------- | ------- | ---- | ------------------------------------------------------------------------ | | large_file | `False` | bool | Use idxoffsetbits=64 bits, for large files only | | dictzip | `True` | bool | Compress .dict file to .dict.dz | | sametypesequence | | str | Definition format: h=html, m=plaintext, x=xdxf | | audio_icon | `True` | bool | Add glossary's audio icon | | sqlite | `None` | bool | Use SQLite to limit memory usage. Default depends on global SQLite mode. | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ----------------------------------------- | ---------------------------------------------------------- | ------- | ----------------------------------------------------------- | -------- | | [KOReader](http://koreader.rocks/) | [@koreader/koreader](https://github.com/koreader/koreader) | AGPLv3 | Android, Amazon Kindle, Kobo eReader, PocketBook, Cervantes | Lua | | [sdcv](https://dushistov.github.io/sdcv/) | [@Dushistov/sdcv](https://github.com/Dushistov/sdcv) | GPLv2 | Linux, Windows, Mac, Android | C++ | pyglossary-5.0.9/doc/p/stardict_textual.md000066400000000000000000000063621476751035500206760ustar00rootroot00000000000000## StarDict Textual File (.xml) ### General Information | Attribute | Value | | --------------- | ------------------------------------------------------------------------------------------------------------------------ | | Name | StardictTextual | | snake_case_name | stardict_textual | | Description | StarDict Textual File (.xml) | | Extensions | | | Read support | Yes | | Write support | Yes | | Single-file | Yes | | Kind | 📝 text | | Sort-on-write | No (by default) | | Sort key | `stardict` | | Wiki | ― | | Website | [TextualDictionaryFileFormat](https://github.com/huzheng001/stardict-3/blob/master/dict/doc/TextualDictionaryFileFormat) | ### Read options | Name | Default | Type | Comment | | ------------ | ------- | ---- | ---------------------------- | | encoding | `utf-8` | str | Encoding/charset | | xdxf_to_html | `True` | bool | Convert XDXF entries to HTML | ### Write options | Name | Default | Type | Comment | | -------- | ------- | ---- | ---------------- | | encoding | `utf-8` | str | Encoding/charset | ### Dependencies for reading and writing PyPI Links: [lxml](https://pypi.org/project/lxml) To install, run: ```sh pip3 install lxml ``` ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------ | ------- | ------------------- | -------- | | [StarDict-Editor (Tools)](https://github.com/huzheng001/stardict-3/blob/master/tools/README) | [@huzheng001/stardict-3](https://github.com/huzheng001/stardict-3) | GPL | Linux, Windows, Mac | C | pyglossary-5.0.9/doc/p/tabfile.md000066400000000000000000000056561476751035500167260ustar00rootroot00000000000000## Tabfile (.txt, .dic) ### General Information | Attribute | Value | | --------------- | -------------------------------------------------------------------------- | | Name | Tabfile | | snake_case_name | tabfile | | Description | Tabfile (.txt, .dic) | | Extensions | `.txt`, `.tab`, `.tsv` | | Read support | Yes | | Write support | Yes | | Single-file | Yes | | Kind | 📝 text | | Sort-on-write | No (by default) | | Sort key | (`headword_lower`) | | Wiki | [Tab-separated values](https://en.wikipedia.org/wiki/Tab-separated_values) | | Website | ― | ### Read options | Name | Default | Type | Comment | | -------- | ------- | ---- | ---------------- | | encoding | `utf-8` | str | Encoding/charset | ### Write options | Name | Default | Type | Comment | | ---------------- | ------- | ---- | --------------------------------------------------------------- | | encoding | `utf-8` | str | Encoding/charset | | enable_info | `True` | bool | Enable glossary info / metedata | | resources | `True` | bool | Enable resources / data files | | file_size_approx | `0` | int | Split up by given approximate file size
examples: 100m, 1g | | word_title | `False` | bool | Add headwords title to beginning of definition | ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------ | ------- | ------------------- | -------- | | [StarDict-Editor (Tools)](https://github.com/huzheng001/stardict-3/blob/master/tools/README) | [@huzheng001/stardict-3](https://github.com/huzheng001/stardict-3) | GPL | Linux, Windows, Mac | C | pyglossary-5.0.9/doc/p/wiktextract.md000066400000000000000000000041451476751035500176610ustar00rootroot00000000000000## Wiktextract (.jsonl) ### General Information | Attribute | Value | | --------------- | -------------------------------------------------------------------- | | Name | Wiktextract | | snake_case_name | wiktextract | | Description | Wiktextract (.jsonl) | | Extensions | `.jsonl` | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 📝 text | | Wiki | ― | | Website | [@tatuylonen/wiktextract](https://github.com/tatuylonen/wiktextract) | ### Read options | Name | Default | Type | Comment | | --------------- | ---------------- | ---- | ---------------------------------------------- | | word_title | `False` | bool | Add headwords title to beginning of definition | | pron_color | `gray` | str | Pronunciation color | | gram_color | `green` | str | Grammar color | | example_padding | `10px 20px` | str | Padding for examples (css value) | | audio | `True` | bool | Enable audio | | audio_formats | `['ogg', 'mp3']` | list | List of audio formats to use | | categories | `False` | bool | Enable categories | ### Dependencies for reading PyPI Links: [lxml](https://pypi.org/project/lxml) To install, run: ```sh pip3 install lxml ``` pyglossary-5.0.9/doc/p/wordnet.md000066400000000000000000000022641476751035500167720ustar00rootroot00000000000000## WordNet ### General Information | Attribute | Value | | --------------- | -------------------------------------------------------------------------- | | Name | Wordnet | | snake_case_name | wordnet | | Description | WordNet | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | No | | Kind | 📁 directory | | Wiki | [WordNet](https://en.wikipedia.org/wiki/WordNet) | | Website | [WordNet - A Lexical Database for English](https://wordnet.princeton.edu/) | pyglossary-5.0.9/doc/p/wordset.md000066400000000000000000000026071476751035500170000ustar00rootroot00000000000000## Wordset.org JSON directory ### General Information | Attribute | Value | | --------------- | ---------------------------------------------------------------------------- | | Name | Wordset | | snake_case_name | wordset | | Description | Wordset.org JSON directory | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | No | | Kind | 📁 directory | | Wiki | ― | | Website | [@wordset/wordset-dictionary](https://github.com/wordset/wordset-dictionary) | ### Read options | Name | Default | Type | Comment | | -------- | ------- | ---- | ---------------- | | encoding | `utf-8` | str | Encoding/charset | pyglossary-5.0.9/doc/p/xdxf.md000066400000000000000000000063201476751035500162560ustar00rootroot00000000000000## XDXF (.xdxf) ### General Information | Attribute | Value | | --------------- | -------------------------------------------------------------------------------------------------------------- | | Name | Xdxf | | snake_case_name | xdxf | | Description | XDXF (.xdxf) | | Extensions | `.xdxf` | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 📝 text | | Wiki | [XDXF](https://en.wikipedia.org/wiki/XDXF) | | Website | [XDXF standard - @soshial/xdxf_makedict](https://github.com/soshial/xdxf_makedict/tree/master/format_standard) | ### Read options | Name | Default | Type | Comment | | ---- | ------- | ---- | ---------------------- | | html | `True` | bool | Entries are HTML | | xsl | `False` | bool | Use XSL transformation | ### Dependencies for reading PyPI Links: [lxml](https://pypi.org/project/lxml) To install, run: ```sh pip3 install lxml ``` ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | --------------------------------------------------------------------------- | ------------------------------------------------------------------------ | -------- | ---------------------------- | -------- | | [GoldenDict-NG by @xiaoyifang](https://xiaoyifang.github.io/goldendict-ng/) | [@xiaoyifang/goldendict-ng](https://github.com/xiaoyifang/goldendict-ng) | GPL | Linux, Windows, Mac | C++ | | [GoldenDict](http://goldendict.org/) | [@goldendict/goldendict](https://github.com/goldendict/goldendict) | GPL | Linux, Windows, Mac | C++ | | [QTranslate](https://qtranslate.en.lo4d.com/windows) | ― | Freeware | Windows | C++ | | [Alpus](https://alpusapp.com/) | ― | Freeware | Windows, Mac, Linux, Android | Java | pyglossary-5.0.9/doc/p/xdxf_css.md000066400000000000000000000036131476751035500171300ustar00rootroot00000000000000## XDXF with CSS and JS ### General Information | Attribute | Value | | --------------- | -------------------------------------------------------------------------------------------------------------- | | Name | XdxfCss | | snake_case_name | xdxf_css | | Description | XDXF with CSS and JS | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 📝 text | | Wiki | [XDXF](https://en.wikipedia.org/wiki/XDXF) | | Website | [XDXF standard - @soshial/xdxf_makedict](https://github.com/soshial/xdxf_makedict/tree/master/format_standard) | ### Read options | Name | Default | Type | Comment | | ---- | ------- | ---- | ---------------- | | html | `True` | bool | Entries are HTML | ### Dependencies for reading PyPI Links: [lxml](https://pypi.org/project/lxml) To install, run: ```sh pip3 install lxml ``` pyglossary-5.0.9/doc/p/xdxf_lax.md000066400000000000000000000037141476751035500171260ustar00rootroot00000000000000## XDXF Lax (.xdxf) ### General Information | Attribute | Value | | --------------- | -------------------------------------------------------------------------------------------------------------- | | Name | XdxfLax | | snake_case_name | xdxf_lax | | Description | XDXF Lax (.xdxf) | | Extensions | | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 📝 text | | Wiki | [XDXF](https://en.wikipedia.org/wiki/XDXF) | | Website | [XDXF standard - @soshial/xdxf_makedict](https://github.com/soshial/xdxf_makedict/tree/master/format_standard) | ### Read options | Name | Default | Type | Comment | | ---- | ------- | ---- | ---------------------- | | html | `True` | bool | Entries are HTML | | xsl | `False` | bool | Use XSL transformation | ### Dependencies for reading PyPI Links: [lxml](https://pypi.org/project/lxml) To install, run: ```sh pip3 install lxml ``` pyglossary-5.0.9/doc/p/yomichan.md000066400000000000000000000245461476751035500171260ustar00rootroot00000000000000## Yomichan (.zip) ### General Information | Attribute | Value | | --------------- | ----------------------------------------------------- | | Name | Yomichan | | snake_case_name | yomichan | | Description | Yomichan (.zip) | | Extensions | `.zip` | | Read support | No | | Write support | Yes | | Single-file | Yes | | Kind | 📦 package | | Sort-on-write | Always | | Sort key | `headword` | | Wiki | ― | | Website | [foosoft.net](https://foosoft.net/projects/yomichan/) | ### Write options | Name | Default | Type | Comment | | ---------------------------- | ------- | ---- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | term_bank_size | `10000` | int | The number of terms in each term bank json file. | | term_from_headword_only | `True` | bool | If set to true, only create a term for the headword for each entry, as opposed to create one term for each alternate word. If the headword is ignored by the `ignore_word_with_pattern` option, the next word in the alternate list that is not ignored is used as headword. | | no_term_from_reading | `True` | bool | When there are multiple alternate words, don't create term for the one that is the same as the the reading form, which is chosen to be the first alternate forms that consists solely of Hiragana and Katakana. For example, an entry could contain both 'だいがく' and '大学' as alternate words. Setting this option to true would prevent a term to be created for the former. | | delete_word_pattern | | str | When given, all non-overlapping matches of this regular expression are removed from word strings. For example, if an entry has word 'あま·い', setting the pattern to `·` removes all center dots, or more precisely use `·(?=[\u3040-\u309F])` to only remove center dots that precede Hiragana characters. Either way, the original word is replaced with 'あまい'. | | ignore_word_with_pattern | | str | When given, don't create terms for a word if any of its substrings matches this regular expression. For example, an entry could contain both 'だいがく【大学】' and '大学' as alternate words. Setting this option with value `r'【.+】'` would prevent a term to be created for the former. | | alternates_from_word_pattern | | str | When given, the regular expression is used to find additional alternate words for the same entry from matching substrings in the original words. If there are no capturing groups in the regular expression, then all matched substrings are added to the list of alternate words. If there are capturing groups, then substrings matching the groups are added to the alternate words list instead. For example, if an entry has 'だいがく【大学】' as a word, then `\w+(?=【)` adds 'だいがく' as an additional word, while `(\w+)【(\w+)】` adds both 'だいがく' and '大学'. | | alternates_from_defi_pattern | | str | When given, the regular expression is used to find additional alternate words for the same entry from matching substrings in the definition. `^` and `$` can be used to match start and end of lines, respectively. If there are no capturing groups in the regular expression, then all matched substrings are added to the list of alternate words. If there are capturing groups, then substrings matching the groups are added to the alternate words list instead. For example, if an entry has 'だいがく【大学】' in its definition, then `\w+【(\w+)】` adds '大学' as an additional word. | | rule_v1_defi_pattern | | str | When given, if any substring of an entry's definition matches this regular expression, then the term(s) created from entry are labeled as ichidan verb. Yomichan uses this information to match conjugated forms of words. `^` and `$` can be used to match start and end of lines, respectively. For example, setting this option to `^\(動[上下]一\)$` identifies entries where there's a line of '(動上一)' or '(動下一)'. | | rule_v5_defi_pattern | | str | When given, if any substring of an entry's definition matches this regular expression, then the term(s) created from entry are labeled as godan verb. Yomichan uses this information to match conjugated forms of words. `^` and `$` can be used to match start and end of lines, respectively. For example, setting this option to `^\(動五\)$` identifies entries where there's a line of '(動五)'. | | rule_vs_defi_pattern | | str | When given, if any substring of an entry's definition matches this regular expression, then the term(s) created from entry are labeled as suru verb. Yomichan uses this information to match conjugated forms of words. `^` and `$` can be used to match start and end of lines, respectively. For example, setting this option to `^スル$` identifies entries where there's a line of 'スル'. | | rule_vk_defi_pattern | | str | When given, if any substring of an entry's definition matches this regular expression, then the term(s) created from entry are labeled as kuru verb. Yomichan uses this information to match conjugated forms of words. `^` and `$` can be used to match start and end of lines, respectively. For example, setting this option to `^\(動カ変\)$` identifies entries where there's a line of '(動カ変)'. | | rule_adji_defi_pattern | | str | When given, if any substring of an entry's definition matches this regular expression, then the term(s) created from entry are labeled as i-adjective. Yomichan uses this information to match conjugated forms of words. `^` and `$` can be used to match start and end of lines, respectively. For example, setting this option to `r'^\(形\)$'` identify entries where there's a line of '(形)'. | ### Dependencies for writing PyPI Links: [beautifulsoup4](https://pypi.org/project/beautifulsoup4) To install, run ```sh pip3 install beautifulsoup4 ``` ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | -------------------------------- | -------------------------------------------------------- | ------- | ---------------------------- | ---------- | | [Yomitan](https://yomitan.wiki/) | [@yomidevs/yomitan](https://github.com/yomidevs/yomitan) | GPL | Chrome, Firefox, Edge, Brave | JavaScript | pyglossary-5.0.9/doc/p/zim.md000066400000000000000000000051321476751035500161040ustar00rootroot00000000000000## Zim (.zim, for Kiwix) ### General Information | Attribute | Value | | --------------- | ---------------------------------------------------------------------- | | Name | Zim | | snake_case_name | zim | | Description | Zim (.zim, for Kiwix) | | Extensions | `.zim` | | Read support | Yes | | Write support | No | | Single-file | Yes | | Kind | 🔢 binary | | Wiki | [ZIM (file format)]() | | Website | [OpenZIM](https://wiki.openzim.org/wiki/OpenZIM) | ### Read options | Name | Default | Type | Comment | | ------------------- | --------- | ---- | ------------------------------------------------------------------- | | text_unicode_errors | `replace` | str | Unicode Errors for plaintext, values: `strict`, `ignore`, `replace` | | html_unicode_errors | `replace` | str | Unicode Errors for HTML, values: `strict`, `ignore`, `replace` | ### Dependencies for reading PyPI Links: [libzim>=1.0](https://pypi.org/project/libzim%3E=1.0) To install, run: ```sh pip3 install libzim>=1.0 ``` ### Dictionary Applications/Tools | Name & Website | Source code | License | Platforms | Language | | ----------------------------------------------------------- | ----------- | ------- | -------------- | -------- | | [Kiwix Desktop](https://github.com/kiwix/kiwix-desktop) | ― | GPL | Linux, Windows | | | [Kiwix JS](https://github.com/kiwix/kiwix-js) | ― | GPL | Windows | | | [Kiwix Serve](https://github.com/kiwix/kiwix-tools) | ― | GPL | Linux, Windows | | | [Kiwix for Apple Mac OS X](macos.kiwix.org) | ― | | Mac | | | [Kiwix for Android](https://github.com/kiwix/kiwix-android) | ― | GPL | Android | | pyglossary-5.0.9/doc/pyicu.md000066400000000000000000000026431476751035500162030ustar00rootroot00000000000000# [PyICU](https://pyicu.org) ## Installation on Linux - Debian `sudo apt-get install python3-icu` - Ubuntu: `sudo apt install pyicu` - openSUSE: `sudo zypper install python3-PyICU` - Fedora: `sudo dnf install python3-pyicu` - Other distros: - Install [ICU](https://icu.unicode.org/) >= 4.8 - Run `sudo pip3 install PyICU` or `pip3 install PyICU --user` ## Installation on Android with Termux - Run `pkg install libicu` - Run `pip install PyICU` ## Installation on Mac OS ```sh brew install pkg-config icu4c export PATH="$(brew --prefix)/opt/icu4c/bin:$(brew --prefix)/opt/icu4c/sbin:$PATH" export PKG_CONFIG_PATH="$PKG_CONFIG_PATH:/usr/local/opt/icu4c/lib/pkgconfig" # ensure system clang is used for proper libstdc++ # https://github.com/ovalhub/pyicu/issues/5#issuecomment-291631507 unset CC CXX python3 -m pip install PyICU ``` ## Installation on Windows - Open https://github.com/cgohlke/pyicu-build/releases - Download latest file that matches your system: - `cp39` for Python 3.9, `cp38` for Python 3.8, etc. - `win_amd64` for Windows 64-bit, `win32` for Windows 32-bit. For example: - `PyICU‑2.6‑cp39‑cp39‑win_amd64.whl` for 64-bit with Python 3.9 - `PyICU‑2.6‑cp39‑cp39‑win32.whl` for 32-bit with Python 3.9 - Open Start -> type Command -> right-click on Command Prompt -> Run as administrator - Type `pip install ` then drag-and-drop downloaded file into Command Prompt and press Enter. pyglossary-5.0.9/doc/releases/000077500000000000000000000000001476751035500163265ustar00rootroot00000000000000pyglossary-5.0.9/doc/releases/3.0.0.md000066400000000000000000000137671476751035500173240ustar00rootroot00000000000000# Changes since version 2016.03.18 ## New versioning - Using *date* as the version was a mistake I made 7 years ago - From now on, versions are in **X.Y.Z** format (*major.minor.patch*) - While X, Y and Z are digits(0-9) for simplicity (version strings can be compared alphabetically) - Starting from 3.0.0 - Take it for migrating to Python 3.x, or Gtk 3.x, or being alphabetically larger than previous versions (date string) Since I believe this is the first *standard version*, I'm not sure which code revision should I compare it with. So I just write the most important recent changes, in both application-view and library-view. ## Breaking Compatibility - **Config migration** - Config file becomes a **config directory** containing config file - Config file format changes from Python (loaded by `exec`) to **JSON** - Remove some obsolete / unused config parameters, and rename some - Remove permanent `sort` boolean flag - Must give `--sort` in command line to enable sorting for most of output formats - Load user-defined plugins from a directory named `plugins` inside config directory - **Glossary class** - Remove some obsolete / unused method - `copy`, `attach`, `merge`, `deepMerge`, `takeWords`, `getInputList`, `getOutputList` - Rename some methods: - `reverseDic` -> `reverse` - Make some public attributes private: - `data` -> `_data` - `info` -> `_info` - `filename` -> `_filename` - Clear (reset) the Glossary instance (data, info, etc) after `write` operation - Glossary class is for converting from file(s) to file, not keeping data in memory - New methods: - `convert`: - `convert` method is added to be used instead of `read` and then `write` - Not just for convenience, but it's also recommended, - and let's Glossary class to have a better default behavior - for example it enables *direct* mode by default (stay tuned) if sorting is not enabled (by user or plugin) - all UI modules (Command line, Gtk3, Tkinter) use Glossary.convert method now - Sorting policy - `sort` boolean flag is now an argument to `write` method - sort=True if user gives `--sort` in command line - sort=False if user gives `--no-sort` in command line - sort=None if user does not give either, so `write` method itself decides what to do - Now we allow plugins to specify sorting policy based on output format - By `sortOnWrite` variable in plugin, with allowed values: - `ALWAYS`: force sorting even if sort=False (user gives `--no-sort`), used only for writing StarDict - `DEFAULT_YES`: enable sorting unless sort=False (user gives `--no-sort`) - `DEFAULT_NO`: disable sorting unless sort=True (user gives `--sort`) - `NEVER`: disable sorting even if sort=True (user gives `--sort`) - The default and common value is: `sortOnWrite = DEFAULT_NO` - Plugin can also have a global `sortKey` function to be used for sorting - (like the `key` argument to `list.sort` method, See `pydoc list.sort`) - New way of interacting with Glossary instance in plugins: - `glos.data.append((word, defi))` -> `glos.addEntry(word, defi)` - `for item in glos.data:` -> `for entry in glos:` - `for key, value in glos.info.items():` -> `for key, value in glos.iterInfo():` ## Gtk2 to Gtk3 - Replace obsolete PyGTK-based interface with a simpler PyGI-based (Gtk3) interface ## Migrating to Python 3 - Even though `master` branch was based on Python 3 since 2016 Apr 29, there was some problem that are fixed in this release - If you are still forced need to use Python 2.7, you can use branch `python2.7` ## Introducing Direct mode - `--direct` command line option - reads and writes at the same time, without loading the whole data into memory - Partial sorting is supported - `--sort` in command line - `--sort-cache-size=1000` is optional - If plugin defines sortOnWrite=ALWAYS, it means output format requires full sorting, so direct mode will be disabled - As mentioned above (using `Glossary.convert` method), direct mode is enabled by default if sorting is not enabled (by user or plugin) - Of course user can manually disable direct mode by giving `--indirect` option in command line ## Progress Bar Automatic command line Progress Bar for all input / output formats is now supported - Implemented based on plugins Reader classes - Works both for direct mode and indirect mode - Only one progress bar for direct mode - Two progress bars for indirect mode (one while reading, one while writing) - Plugins must not update the progress bar anymore - Still no progress bar when both `--direct` and `--sort` flags are given, will be fixed later - User can disable progress bar by giving `--no-progress-bar` option (recommended for Windows users) ## BGL Plugin - BGL plugin works better now (comparing to latest Python 2.7 code), and it's much cleaner too - I totally refactored the code, made it fully Python3-compatible, and much more easier to understand - This fixes bytes/str bugs (like Bug [#54](https://github.com/ilius/pyglossary/issues/54)), and CRC check problem for some glossaries (Bug [#55](https://github.com/ilius/pyglossary/issues/55)) - I'm a fan of micro-commits and I usually hate single-commit refactoring, but this time I had no choice! ## Other Changes **Feature**: Add `encoding` option to read and write drivers of some plain-text formats **Feature**: SQL and SQLite: read/write extra information from/to a new table dbinfo_extra, backward compatible **New format** invented and implemented for *later implementation of a Glossary Editor* - `edlin.py` (*Editable Linked List of Entries*) is optimized for adding/modifying/removing one entry at a time - while we can save the changes instantly after each modification - Using the ideas of Doubly Linked List, and Git's hash-based object database Rewrite non-working **Reverse** functionality - The old code was messy, not working by default, slow, and language-dependent - It's much faster and cleaner now Improve and complete command line help (`-h` or `--help`) pyglossary-5.0.9/doc/releases/3.0.1.md000066400000000000000000000002521476751035500173060ustar00rootroot00000000000000# Changes since [3.0.0](./3.0.0.md) - Fix some minor bugs in Glossary class - Fix wrong exist status in command line from `pyglossary.pyw` - Fix exception in BGL plugin pyglossary-5.0.9/doc/releases/3.0.2.md000066400000000000000000000004021476751035500173040ustar00rootroot00000000000000# Changes since [3.0.1](./3.0.1.md) - Fix a bug in `setup.py`, making it not to work - Fix a bug in logger class, occurring when pyglossary is imported as a library - Fix a few bugs in Octopus MDict reader - Fix a minor bug in BGL reader - Update README.md pyglossary-5.0.9/doc/releases/3.0.3.md000066400000000000000000000005421476751035500173120ustar00rootroot00000000000000# Changes since [3.0.2](./3.0.2.md) - Fixes in AppleDict plugin - Improve Tkinter interface: fix Not Responding bug, make window icon colorful - Fix visual bug in command line Progress Bar (percentage did not become 100.0%) - BGL reader: add support for `Python < 3.5`, with a warning to install Python 3.5 - Fixes in Reverse feature - Update README.md pyglossary-5.0.9/doc/releases/3.0.4.md000066400000000000000000000042511476751035500173140ustar00rootroot00000000000000# Changes since [3.0.3](./3.0.3.md) ## Changes in `Glossary` code base - Fix critical bug in Glossary: `ZeroDivisionError` if `wordCount < 500`, [#61](https://github.com/ilius/pyglossary/issues/61) - Bug fix in Glossary.progress: make sure ui.progress is not called with a number more than 1.0 - Fix non-working write to SQL, [#67](https://github.com/ilius/pyglossary/issues/67) - Bug fix & Feature: add newline argument to `Glossary.writeTxt` Because Python's `open` converts (modifies) newlines automatically, [#66](https://github.com/ilius/pyglossary/issues/66) - Break compatibility about using `Glossary.writeTxt` method Replace argument `sep` which was a tuple of length two, with two mandatory arguments: `sep1` and `sep2` ## Changes in plugins - Fix in StarDict plugin: fix some Python3-related errors, [#71](https://github.com/ilius/pyglossary/issues/71) - Fix in Dict.org plugin: `install` was not working - Fix in DSL plugin: replace backslash at the end of line with `
`, [#61](https://github.com/ilius/pyglossary/issues/61) - Fix in SQL plugin: specify `encoding='utf-8'` while opening file for write, [#67](https://github.com/ilius/pyglossary/issues/67) - Fix in Octopus Mdict Source plugin: specify `encoding='utf-8'` while opening file for read, [#78](https://github.com/ilius/pyglossary/issues/78) - Fix (probable) bugs of bad newlines in 4 plugins (use `newline` argument to `Glossary.writeTxt`), [#66](https://github.com/ilius/pyglossary/issues/66) - Octopus MDict Source - Babylon Source (gls) - Lingoes Source (LDF) - Sdictionary Source (sdct) - Feature in Lingoes Source plugin: add `newline` write option - Minor fix in AppleDict plugin: fix beautifulsoup4 error message, [#72](https://github.com/ilius/pyglossary/issues/72) - BGL plugin: better compatibility with Python 3.4 Fix `CRC check failed` error for some (rare) glossaries with Python 3.4 ## Other Changes - Bug fix in parsing command line read options`--read-options` and `--write-options` (happened in very rare cases) - Fix wrong shebang line in setup.py: must run with python3, fix [#75](https://github.com/ilius/pyglossary/issues/75) - Update `pyglossary.spec` - Change Categories for `pyglossary.desktop` pyglossary-5.0.9/doc/releases/3.1.0.md000066400000000000000000000042171476751035500173130ustar00rootroot00000000000000# Changes since [3.0.4](./3.0.4.md) - Refactor StarDict plugin, and improve the performance - Detect HTML definitions when reading, and mark them as HTML when converting to StarDict - Fix [#135](https://github.com/ilius/pyglossary/issues/135) in StarDict writer: - Alternates were pointing at a wrong word in case there are resource/image files - Refactor AppleDict plugin - Refactor and improve BGL plugin - Style fixes including pep-8 fixes - Change indentations to tabs, and single quote to double quotes - Allow `--ui=none` flag - Allow `--skip-resources` flag - SQL plugin: add `encoding` write option - Octopus MDict Source plugin: add `encoding` read option - Drop sqlite3 support, xFarDic support, and read support for Omnidic - Improvement and cleaning in the code base and different plugins - Introduce DataEntry - Allowing to access resource files when iterating over entries (words) of Glossary - Glossary: `write` and `convert` methods return absolute path of output file, or None - Changes in master branch since [3.0.4](./3.0.4.md): - Update README.md - Update pyglossary.spec - Fixes in setup.py - BGL: add `gzip_no_crc.py` for Python 36 (required for some non-standard BGL files) - AppleDict: give `encoding='utf8'` while opening xml file, fix for [#84](https://github.com/ilius/pyglossary/issues/84) - Avoid lines that require trailing backslash, to avoid bugs like [#67](https://github.com/ilius/pyglossary/issues/67) - babylon_source.py: remove extra %s, fix [#92](https://github.com/ilius/pyglossary/issues/92) - AppleDict: force encoding="utf-8" for plist file, fix [#94](https://github.com/ilius/pyglossary/issues/94) - Fix str/bytes bug in stardict.py (fix [#98](https://github.com/ilius/pyglossary/issues/98)) and some renames for clarification - Fix [#102](https://github.com/ilius/pyglossary/issues/102): exception in dict_org.py - Fix wrong path of static files when running from dist-packages - readmdict.py: change by Xiaoqiang Wang: no encryption if Encrypted is not in header - Fix [#118](https://github.com/ilius/pyglossary/issues/118), SyntaxError (`return` with argument inside generator) in Glossary.reverse with Python 3.6 pyglossary-5.0.9/doc/releases/3.2.0.md000066400000000000000000000015221476751035500173100ustar00rootroot00000000000000## Changes since [3.1.0](./3.1.0.md) - Add read support for CC-CEDICT plugin - Pull request [#140](https://github.com/ilius/pyglossary/pull/140), with some fixes and improvements by me - Fixes in DSL (ABBYY Lingvo) plugin: - Fix [#136](https://github.com/ilius/pyglossary/issues/136), removing one extra character after `#CONTENTS_LANGUAGE:` - Fix [#137](https://github.com/ilius/pyglossary/issues/137), regexp for re_lang_open - Improvement in Gtk interface: - Avoid changing Format combobox based on file extension if a format is already selected, [#141](https://github.com/ilius/pyglossary/issues/141) - Fix encoding problem with non-UTF-8 system locales - Fix [#147](https://github.com/ilius/pyglossary/issues/147), give encoding="utf-8" when opening text files, for non-UTF-8 system locales - Improvements in `Glossary` class pyglossary-5.0.9/doc/releases/3.2.1.md000066400000000000000000000016551476751035500173200ustar00rootroot00000000000000# Changes since [3.2.0](./3.2.0.md) - Changes in StarDict plugin: - Add sametypesequence write option (PR [#162](https://github.com/ilius/pyglossary/pull/162)) - Fix some bugs - Cleaning - Disable gzip CRC check for BGL files with Python 3.7 - Fix a bug in octopus_mdict.py - Fix Gtk warnings in ui_gtk - Allow seeing/customizing warnings by setting environment variable WARNINGS - Fix not being able to run the program when installed inside virtualenv ([#168](https://github.com/ilius/pyglossary/issues/168)) - Show a tip about -h when no UI were found, [#169](https://github.com/ilius/pyglossary/issues/169) - octopus_mdict_source.py: fix [#68](https://github.com/ilius/pyglossary/issues/68), add support for inconsecutive links with --read-options=links=True - Auto-detect UTF-16 encoding of DSL files - Update README.md (fix Archlinux pkg name, add AUR, add instructions for installing python-lzo on Windows, etc) - Some clean up pyglossary-5.0.9/doc/releases/3.3.0.md000066400000000000000000000107121476751035500173120ustar00rootroot00000000000000# Changes since [3.2.1](./3.2.1.md) - Require Python 3.6 or higher (mainly because of f-strings) - New format support - Add support to write Kobo dictionary, [#205](https://github.com/ilius/pyglossary/issues/205) - Add support to write EPUB-2 - Add support to read AppleDict Binary (.dictionary) - Add support to read and write Aard 2 (slob), [#116](https://github.com/ilius/pyglossary/issues/116) - Glossary: detect and load Writer class from plugins - Remove write function from plugin if it has Writer class - Glossary: call `gc.collect()` on indirect mode after reading/writing each 128 entries - To free up memory and avoid running out of RAM for large glossaries - Glossary: remove empty and duplicate alternate words when converting, using Entry Filter, [#188](https://github.com/ilius/pyglossary/issues/188) - Add command line options to remove html tags: - `--remove-html=tag1,tag2,tag3` - `--remove-html-all` - Re-design format-specific options - Allow specifying format-specific read/write options in ui_gtk and ui_tk - Add much better and cleaner codebase for handling options in `option.py` - Implement validation of options in command line, GTK and Tkinter interfaces - Add tests for `option.py` in `option_test.py` - Avoid using None as default value of option argument - Check default value of plugin options and show warning if invalid - Add IntOption class, use it in Omnidic plugin - Add DictOption, use it for appledict defaultPrefs - And `optionsProp` to all plugins - Containing value type, allowed values and optional comment - Remove `readOptions` and `writeOptions` from all plugins - Detect options from functions' signature and `optionsProp` variables - Avoid using `**kwargs` in plugin `read`, `Reader.open` or `write` functions - Add `depends` variable to plugins - To let GUI install plugin dependencies - Type: `dict`, keys are module names, values are pip's package name - Add `Glossary.formatsDepends` - Minor fixes and improvements in Glossary class: - Return with error if output file path is an existing directory - Fix empty zip when creating `DIRECTORY.zip` as output glossary - Do not uncompress gz/bz2/zip input files automatically - Ignore "read" function of plugin if "Reader" class is present - Cleaning: Add Glossary.init() classmethod to initialize the class, can be called multiple times - Some refactoring and cleaning, and add some logs - Small optimization: `index % 100` -> `index & 0x7f` - Allow having progressbar by position in file and size of file - use for `appledict_bin.py` - Do not write resource file names as entries to text file in `Glossary.writeTxt` - StarDict plugin - Always open `.ifo` file as UTF-8 - Fix output filenames without .ifo extension creating hidden files, [#187](https://github.com/ilius/pyglossary/issues/187) - Babylon BGL plugin - Fix bytes metedata values `b'...'` and some refactoring in readType3 - Skip empty info values - Fix non-string info values written as empty - Prefix 3 info keys with `bgl_` - Fix NameError in debug mode in `stripHtmlTags` - Some refactoring - Octopus MDict plugin - Fix Python 3 bug in `readmdict.py`: https://bitbucket.org/xwang/mdict-analysis/commits/8f66c30 - Support multiple mdd files ([#203](https://github.com/ilius/pyglossary/issues/203)) - Change yes/no options in AppleDict and ABBYY Lingvo DSL plugins to boolean - To keep compatibility of command line flags, fix yes/no manually in ui_cmd.py - AppleDict plugin: - Fix `echo` problem in `Makefile` ([#177](https://github.com/ilius/pyglossary/issues/177)) - Add dark mode support for AppleDict output ([#177](https://github.com/ilius/pyglossary/issues/177)) - Add comments for `optionsProp` - Use keyword argument `features=` and fix a warning about from_encoding= - Fix misspelled "extension" (as "extension") in plugins - Detect entries with `span` tag as html, [#193](https://github.com/ilius/pyglossary/issues/193) - Refactoring in ui_gtk and ui_tk - Fix some deprecated API in ui_gtk - Fix minor bugs and improvements in ui_tk and ui_gtk - Update setup.py to adapt packaging with wheel, [#189](https://github.com/ilius/pyglossary/issues/189) - Add type hints to codebase and plugins - Refactoring and style changes: - rename `pyglossary.pyw` to main.py, add a small `pyglossary.pyw` for compatibility - Switch to f-strings in glossary.py and freedict.py - main.py: replace single quotes with double quotes - PEP-8 style fixes pyglossary-5.0.9/doc/releases/4.0.0.md000066400000000000000000000261041476751035500173120ustar00rootroot00000000000000# Changes since [3.3.0](./3.3.0.md) - Require Python 3.7 or 3.8, drop support for Python 3.4, 3.5 and 3.6 - Fix / rewrite `setup.py` - Fix `python3 setup.py sdist bdist_wheel`, and pypi package - Had to move `ui/` directory into `pyglossary/` - Switch from `distutils` to `setuptools` - Remove `py2exe` - Add interactive command line user interface - Automatically selected if input & output file arguments are not passed **and** one of these: - On Linux and no `$DISPLAY` is not set - On Mac and no `tkinter` module is found - `--ui=cmd` flag is passed - New format support: - Add read support for FreeDict, [#206](https://github.com/ilius/pyglossary/issues/206) - Add read support for Zim (Kiwix) - Add read and write support for Kobo E-Reader Dictfile (.df) - Add write support for DICT.org `dictfmt` source file - Add read support for [dictunformat](https://linux.die.net/man/1/dictunformat) output file - Add write support for JSON - Add read support for Dict.cc (SQLite3) - Add read support for [JMDict](https://www.edrdg.org/jmdict/j_jmdict.html), [#239](https://github.com/ilius/pyglossary/issues/239) - Add basic read support for Wiktionary Dump (.xml) - Add read support for [cc-kedict](https://github.com/mhagiwara/cc-kedict) - Add read support for [DigitalNK](https://github.com/digitalprk/dicrs) (SQLite3) - Add read support for [Wordset.org](https://github.com/wordset/wordset-dictionary) JSON directory - Remove Omnidic write support (Unmaintained J2ME dictionary) - Remove Octopus MDict Source plugin - Remove Babylon Source plugin - BGL Weader: improvements - DictionaryForMIDs Writer: fix non-working code - Gettext Source (po) Writer: fix info header - MOBI E-Book Writer: fix sort order, fix and test kindlegen codes, add `kindlegen_path` option, [#112](https://github.com/ilius/pyglossary/issues/112) - EPUB-2 E-Book Writer: fix sort order - XDXF Reader: rewrite with `etree.iterparse` to avoid using too much RAM - Lingoes Source (LDF) Reader: fix ignoring info/metadata header - dict_org.py: rewrite broken plugin (Reader and Writer) - DSL Reader: fix losing metadata/info - Aard 2 (slob) Reader: - Fix adding css/js files as normal entries - Add `bword://` prefix to entry links - Fix duplicate entries issue by keeping a set of blob IDs, [#224](https://github.com/ilius/pyglossary/issues/224) - Detect and pass defiFormat - Aard 2 (slob) Writer: - Fix content_type detection - Remove `bword://` prefix from entry links - Add resource files / data entries, [#243](https://github.com/ilius/pyglossary/issues/243) - Fix replacing image paths - Show log events from `slob.py` in debug mode - Change default `compression` to `zlib` - Allow passing empty `compression` - Octopus MDict Reader: - Read MDX file twice to load links - Count data entries as part of `len(reader)` for progressbar - StarDict Writer: - Copy "copyright" and "publisher" values to "description" - Add source and target language codes to the end of bookname - Add write-option `stardict_client: bool` Set `True` to make glossary more compatible with StarDict 3.x - Fix broken result when `sametypesequence` option is given and a definitions contains `|` - Allow `sametypesequence=x` for xdxf - Add `merge_syns` option - Allow `sametypesequence=None` option - XDXF Reader: - Fix/improve xdxf to html transformation - Kobo Writer: - Fix get_prefix algorithm and sorting order, with tests, [#219](https://github.com/ilius/pyglossary/issues/219) - Replace ` "Generator[None, BaseEntry, None]"` - Entries must be fetched with `entry = yield` in a `while True` loop: ```python while True: entry = yield if entry is None: break # process and write entry into file(s) ``` - `finish(self)` - Read options and write options must be set to their default values as class attributes - See `pyglossary/plugins/csv_pyg.py` plugin for example - `sortKey` must be an instance method of Writer, instead of a function outside any class - Only for plugins that need sorting before write - Refactor and cleanup `Glossary` class - Removed or replaced most of class/static attributes of `Glossary` - To see the diff, run `git diff [3.3.0](./3.3.0.md)..master -- pyglossary/glossary.py` - Removed `glos.addEntry` method - If you use it in your program, replace with `glos.addEntryObj(glos.newEntry(word, defi, defiFormat))` - Removed instance methods: - `getMostUsedDefiFormats` - `iterEntryBuckets` - `zipOutDir` and `archiveOutDir` - Moved to `pyglossary/glossary_utils.py` - `archiveOutDir` renamed to `compressOutDir` - `writeDict` - `iterSqlLines` -> moved to `pyglossary/plugins/sql.py` - `reverse`, `takeOutputWords`, `searchWordInDef` -> moved to `pyglossary/reverse.py` - Values of `Glossary.plugins` is changed to `plugin_prop.PluginProp` instances - Change `glos.writeTxt` arguments - Replace `sep1` and `sep2` with `entryFmt` - Replace `rplList` with `defiEscapeFunc`, `wordEscapeFunc` and `tail` - Remove `iterEntries`, `entryFilterFunc` - Method returns `Generator[None, BaseEntry, None]` instead of `bool` - See for usage example: - `pyglossary/glossary.py` -> `def writeTabfile` - `pyglossary/plugins/dict_org_source.py` - `pyglossary/plugins/json_plugin.py` - `pyglossary/plugins/lingoes_ldf.py` - `pyglossary/plugins/sdict_source.py` - Refactor, cleanup and fixes in `Entry` and `DataEntry` classes - Replace `entry.getWord()` with `entry.word` - Replace `entry.getWords()` with `entry.l_word` - Replace `entry.getDefi()` with `entry.defi` - Remove `entry.getDefis()` - Drop handling alternate definitions in `Entry` objects - Replace `entry.getDefiFormat()` with `entry.defiFormat` - Add `entry.b_word` and `entry.b_defi` shortcuts that give `bytes` (UTF-8) - Replace `dataEntry.getData()` with `dataEntry.data` - Add `__slots__` to Entry and DataEntry classes - Fix `DataEntry` in indirect mode - Mistaken for Entry with defi=DATA, and file content discarded - Save resource files in user's cache directory when loading input glossary into memory - Move file to output glossary on `dataEntry.save(...)` - Fix `Entry.getRawEntrySortKey` not being alternates-aware, broke StarDict Writer - `DataEntry`: save: use `shutil.copy` if has `_tmpPath`, and set `_tmpPath` - New features of `Entry` - `entry.stripFullHtml()`, remove `......` - Used in Kobo and Kobo Dictfile writers - Add tests - Fix `glos.writeTabfile`: - Remove `\r` from definitions and info values - Fix not escaping word - Fix/improve html detection in definitions - Switch to lazy imports of non-standard modules in plugins - Optimize RAM usage of indirect conversion - To write StarDict, EPUB and DictionaryForMIDs glossaries, we need to load all entries into RAM to sort them - Other new features of Glossary class - `glos.getAuthor()` to get "author", or "publisher" (as fallback) - `glos.removeHtmlTagsAll()` method, can be called by plugins' writer - `glos.collectDefiFormat(maxCount)` extract defiFormat counts - by reading first `maxCount` entries. (then iterator will be reset) - Used in StarDict Writer - Show memory usage in trace mode - Bug fixes and improvements in code base - Apply entry filter when iterating over reader, fix [#251](https://github.com/ilius/pyglossary/issues/251) - Fixes wrong sort order for some glossaries (converting to StarDict or other formats that need sort) - Fixes and improvements in `TextGlossaryReader` class - Fix ignoring glossary defaultDefiFormat - Fix evaluating `None` value in read/write options - Support reading multi-file Tabfile or other text formats - Example: `file.txt`, `file.txt.1`, `file.txt.2` - Need to add `file_count` info key, for example: `##file_count 3` - Fixes in Tabfile Writer - Fix not escaping "" - Add/update documentation - Update README.md - Add Termux guides in `doc/termux.md` - Move AppleDict guides to `doc/apple.md` - Move LZO notes to `doc/lzo.md` - Minify and compress `.svg` files in `doc/` folder - Switch to f-strings, pep8 fixes, add types, style changes and refactoring - New command line flags: - `--log-time` to show datetime in logs (override `log_time` in config.json) - `--no-alts` to disable alternates handling - `--normalize-html` to lowercase tags (for now) - `--cleanup` and `--no-cleanup` - `--info` to save `.info` file alongside output file pyglossary-5.0.9/doc/releases/4.1.0.md000066400000000000000000000306041476751035500173130ustar00rootroot00000000000000# Changes since [4.0.0](./4.0.0.md) There are a lot of changes since last release, but here is what I could gather and organize! Please see the commit list for more! - Improvements in ui_gtk - Improvements in ui_tk - Improvements in ui_cmd_interactive - Refactoring and improvements in ui-related codebase - Fix not loading config with `--ui=none` - Code style fixes and cleanup - Documentation - Update most documentations. - Add comments for read/write options. - Generate documentation for all formats - Placed in [doc/p](../p), linked to in `README.md` - Generating with `scripts/plugin-doc-gen.py` script - Read list of dictionary tools/applications from TOML files in [plugins-meta/tools](../../plugins-meta/tools) - Add `Dockerfile` and `run-with-docker.sh` script - New command-line flags: - `--json-read-options` and `--json-write-options` - To allow using `;` in option values - Example: `'--json-write-options={"delimiter": ";"}'` - `--gtk`, `--tk` and `--cmd` as shortcut for `--ui=gtk` etc - `--rtl` to change direction of definitions, [#268](https://github.com/ilius/pyglossary/issues/268), also added to `config.json` - Fix non-working `--remove-html` flag - Changes in `Glossary` class - Rename `glos.getPref` to `glos.getConfig` - Change `formatsReadOptions` and `formatsWriteOptions` to `Dict[str, OrderedDict[str, Any]]` - to include default values - remove `glos.writeTabfile`, replace with a func in `pyglossary/text_writer.py` - `Glossary.init`: avoid showing error if user plugin directory does not exist - Fixes and improvements code base - Prevent `dataEntry.save()` from raising exception because of invalid filename or permission - Avoid exception if removing temp file/folder failed - Avoid `mktemp` and more improvements - use `~/.cache/pyglossary/` directory instead of `/tmp/` - Fixes and improvements in `runDictzip` - Raise `RuntimeError` instead of `StopIteration` when iterating over a non-open reader - Avoid exception if no zip command was found, fix [#294](https://github.com/ilius/pyglossary/issues/294) - Remove directory after creating .zip, and some refactoring, [#294](https://github.com/ilius/pyglossary/issues/294) - `DataEntry`: replace `inTmp` argument with `tmpPath` argument - `Entry`: fix html pattern for hyperlinks, [#330](https://github.com/ilius/pyglossary/issues/330) - Fix incorrect virtual env directory detection - Refactor `dataDir` detection, [#307](https://github.com/ilius/pyglossary/issues/307) [#316](https://github.com/ilius/pyglossary/issues/316) - Show warning if failed to create user plugins directory - fix possible exception in `log.emit` - Add support for Conda in `dataDir` detection, [#321](https://github.com/ilius/pyglossary/issues/321) - Fix f-string in `StdLogHandler.emit` - Fixes and improvements in Windows - Fix bad `dataDir` on Windows, [#307](https://github.com/ilius/pyglossary/issues/307) - Fix `shutil.rmtree` exception on Windows - Support creating .zip on Windows 10, [#294](https://github.com/ilius/pyglossary/issues/294) - Check zip command before tar on Windows, [#294](https://github.com/ilius/pyglossary/issues/294) - Show graphical error on exceptions on Windows - Fix dataDir detection on Windows, [#323](https://github.com/ilius/pyglossary/issues/323) $324 - Changes in Config: - Rename config key `skipResources` to `skip_resources` - Add it to config.json and configDefDict - Rename config key `utf8Check` to `utf8_check` - User should edit ~/.pyglossary/config.json manually - Implement direct compression and uncompression, and some refactoring - change glos.detectInputFormat to return (filename, format, compression) or None - remove Glossary.formatsReadFileObj and Glossary.formatsWriteFileObj - remove `fileObj=` argument from `glos.writeTxt` - use optional 'compressions' list/tuple from Writer or Reader classes for direct compression/uncompression - refactoring in glossary_utils.py - Update `setup.py` - Show version from 'git describe --always' on `--version` - `FileSize` option (used in many formats): - Switch to metric (powers of 1000) for `K`, `M`, `G` units - Add `KiB`, `MiB`, `GiB` for powers of 1024 - Add `extensionCreate` variable (str) to plugins and plugin API - Use it to improve ui_tk - Text-based glossary code-base (effecting Tabfile, Kobo Dictfile, LDF) - Optimize TextGlossaryReader - Change multi-file text glossary file names from `.N.txt` to `.txt.N` (where `N>=1`) - Enable reading pyglossary-written multi-file text glossary by adding `file_count=-1` to metadata - because the number of files is not known when creating the first txt file - Tabfile - Rename option `writeInfo` to `enable_info` - Reader: read resource files from `*.txt_res` directory if exists - Add `*.txt_res` directory to \*.zip file - Zim Reader: - Migrate to libzim 1.0 - Add mimetype `image/webp`, fix [#329](https://github.com/ilius/pyglossary/issues/329) - Slob and Tabfile Writer: add `file_size_approx` option to allow writing multi-part output - support values like: `5500k`, `100m`, `1.2g` - Add `word_title=False` option to some writers - Slob Writer: add `word_title=False` option - Tabfile Writer: add `word_title=False` option - CSV Writer: add `word_title=False` option - JSON Writer: add `word_title=False` option - Dict.cc Reader: do not add word title - FreeDict Reader: rename `keywords_header` option to `word_title` - Add `glos.wordTitleStr`, used in plugins with `word_title` option - Add `definition_has_headwords=True` info key to avoid adding the title next time we read the glossary - Aard2 (slob) - Writer: add option `separate_alternates=False`, [#270](https://github.com/ilius/pyglossary/issues/270) - Writer: fix handling `content_type` option - Writer: use `~/.cache/pyglossary/` instead of `/tmp` - Writer: add mp3 to mime types, [#289](https://github.com/ilius/pyglossary/issues/289) - Writer: add support for .ini data file, [#289](https://github.com/ilius/pyglossary/issues/289) - Writer: support .webp files, [#329](https://github.com/ilius/pyglossary/issues/329) - Writer: supoort .tiff and .tif files - Reader: read glossary name/title and creation time from tags - Reader: extract all metedata / tags - `slob.py` library: Refactoring and cleanup - StarDict: - Reader: add option unicode_errors for invalid UTF-8 data, [#309](https://github.com/ilius/pyglossary/issues/309) - Writer: add bool write-option `audio_goldendict`, [#327](https://github.com/ilius/pyglossary/issues/327) - Writer: add option `audio_icon=True`, and add option comment, [#327](https://github.com/ilius/pyglossary/issues/327) - FreeDict Reader - Fix two slashes before and after `pron` - Avoid running `unescape_unicode` by `encoding="utf-8"` arg to `ET.htmlfile` - Fix exception if `edition` is missing in header, and few other fixes - Support `` with `` inside it - Support `` inside nested second-level(nested) `` - Add `"lang"` attribute to html elements - Add option "example_padding" - Fix rendering ``, refactoring and improvement - Handle `` inside `` - Support `` in `` - Mark external refs with `` - Support comment in `` - Support `` inside `` - Implement many tags under `` - Improvements and refactoring - XDXF - Fix not finding `xdxf.xsl` in installed mode - Effecting XDXF and StarDict formats - `xdxf.xsl`: generate `` instead of `` - StarDict Reader: Add `xdxf_to_html=True` option, [#258](https://github.com/ilius/pyglossary/issues/258) - StarDict Reader: Import `xdxf_transform` lazily - Remove forced dependency to `lxml`, [#261](https://github.com/ilius/pyglossary/issues/261) - XDXF plugin: fix glos.setDefaultDefiFormat call * `xdxf_transform.py`: remove warnings for , [#322](https://github.com/ilius/pyglossary/issues/322) - Merge PR [#317](https://github.com/ilius/pull/issues/317) - Parse `sr`, `gr`, `ex_orig`, `ex_transl` tags and `audio` - Remove `None` attribute from `audio` tag - Use unicode symbols for audio and external link - Use another speaker symbol for audio - Add audio controls - Use plain link without an audio tag - Mobi - Update ebook_mobi.py and README.md, [#299](https://github.com/ilius/pyglossary/issues/299) - Add PR [#335](https://github.com/ilius/pyglossary/pull/335) with some modifications - Changes in `ebook_base.py` (Mobi and EPUB) - Avoid exception if removing tmpDir failed - Use `style.css` dataEntry, [#299](https://github.com/ilius/pyglossary/issues/299) - DSL Reader: - Strip whitespaces around language names, [#264](https://github.com/ilius/pyglossary/issues/264) - Add progressbar support, [#264](https://github.com/ilius/pyglossary/issues/264) - Run `html.escape` on text before adding html tags, [#265](https://github.com/ilius/pyglossary/issues/265) - Strip and unquote glossary name - Generate `` and `` instead of `` - Avoid adding html comment - Remove `\ufeff` from header lines, [#306](https://github.com/ilius/pyglossary/issues/306) - AppleDict Source - Change path of Dictionary Development Kit, [#300](https://github.com/ilius/pyglossary/issues/300) - Open all text files with `encoding="utf-8"` - Some refactporing * Rename 4 options: - cleanHTML -> clean_html - defaultPrefs -> default_prefs - prefsHTML -> prefs_html - frontBackMatter -> front_back_matter - AppleDict Binary - Improvements, [#299](https://github.com/ilius/pyglossary/issues/299) - Read `DefaultStyle.css` file, add as `style.css`, [#299](https://github.com/ilius/pyglossary/issues/299) - Change default value of option: `html=True` - Octopus MDict (MDX) - Fix image links - Do not set empty title - Minor improvement in `readmdict.py` - Handle exception when reading from a corrupt MDD file - Add bool flag same_dir_data_files, [#289](https://github.com/ilius/pyglossary/issues/289) - Add read-option: `audio=True` (default: `False`), [#327](https://github.com/ilius/pyglossary/issues/327) - `audio`: remove extra attrs and add comments - DICT.org plugin: - `installToDictd`: skip if target directory does not exist - Make rendering dictd files a bit clear in pure txt - Fix indentation issue and add bword prefix as url - Fixes and improvements in Dict.cc (SQLite3) plugin: - Fix typo, and avoid iterating over cur, use `fetchall()`, [#296](https://github.com/ilius/pyglossary/issues/296) - Remove gender from headword, add it to definition, [#296](https://github.com/ilius/pyglossary/issues/296) - Avoid running `unescape_unicode` - JMDict - Support reading compressed file directly - Show pos before gloss (translations) - Avoid running `unescape_unicode` - DigitalNK: work around Python's sqlite bug, [#282](https://github.com/ilius/pyglossary/issues/282) - Changes in `dict_org.py` plugin, By Justin Yang - Use
to replace newline - Replace words with {} around to true web link - CC-CEDICT Reader: - Fix import error in `conv.py` - Switch from jinja2 to lxml - Fix not escaping `<`, `>` and `&` - Note: lxml inserts `&[#160](https://github.com/ilius/pyglossary/issues/160);` instead of ` ` - Use `` instead of `` - add option to use Traditional Chinese for entry name * Avoid colorizing if tones count does not match `len(syllables)`, [#328](https://github.com/ilius/pyglossary/issues/328) * Add `` for each syllable in case of mismatch tones, [#328](https://github.com/ilius/pyglossary/issues/328) - Rename read/write options: - DSL: rename option onlyFixMarkUp to only_fix_markup - SQL: rename 2 options: - `infoKeys` -> `info_keys` - `addExtraInfo` -> `add_extra_info` - EDLIN: rename option `havePrevLink` to `prev_link` - CSV: rename option `writeInfo` to `enable_info` - JSON: rename option `writeInfo` to `enable_info` - BGL: rename all read/write options (to cameCase to snake_case) - New formats: - Read "ABC Medical Notes (SQLite3)", `plugins/abc_medical_notes.py`, [#267](https://github.com/ilius/pyglossary/issues/267) - Read "Almaany.com (SQLite3)", `plugins/almaany.py`, [#267](https://github.com/ilius/pyglossary/issues/267) [#268](https://github.com/ilius/pyglossary/issues/268) - Remove TreeDict plugin, `plugins/treedict.py` - Remove FreeDict writer pyglossary-5.0.9/doc/releases/4.2.0.md000066400000000000000000000025671476751035500173230ustar00rootroot00000000000000# Changes since [4.1.0](./4.1.0.md) - Breaking changes: - Replace `glos.getAuthor()` with `glos.author` - This looks for "author" and then "publisher" keys in info/metadata - Rename option `apply_css` to `css` for mobi and epub2 - `glos.getInfo` and `glos.setInfo` only accept `str` as key (or a subclass of `str`) - Bug fixes: - Indirect mode: Fix handling '|' character in words. - Escape/unescape `|` in words when converting `entry` \<-> `rawEntry` - Escape/unescape `|` in words when writing/reading text-based file formats - JSON: Prevent duplicate keys in json output, [#344](https://github.com/ilius/pyglossary/issues/344) - Add new method `glos.preventDuplicateWords()` - Features and improvements - Add SQLite mode with `--sqlite` flag for converting to StarDict. - Eliminates the need to load all entries into RAM, limiting RAM usage. - You can add `--sqlite` to you command, even for running GUI. - For example: `python3 main.py --tk --sqlite` - See [README.md](../../README.md#sqlite-mode) for more details. - Add `--source-lang` and `--target-lang` flags - XDXF: support more tags and improvements - Add unit tests for `Glossary` class, and some functions in `text_utils.py` - Windows: change cache directory to `%LOCALAPPDATA%` - Some refactoring and optimization - Update, improve and re-format documentations pyglossary-5.0.9/doc/releases/4.2.1.md000066400000000000000000000017021476751035500173120ustar00rootroot00000000000000# Changes since version [4.2.0](./4.2.0.md) ### Minor bug fixes and improvements: - `text_utils.py` - Minor bug: fix legacy function `urlToPath` using `urllib.parse.unquote` - Minor bug: `replacePostSpaceChar`: remove trailing space from the output str - Cleanup: - Remove unused function `isControlChar` - Remove unused function `formatByteStr` - Remove argument `exclude` from function `isASCII` - Add unit tests - `ui_cmd_interactive.py`: fix a minor bug and some small refactoring - Command line: Override input glossary info with `--source-lang` and `--target-lang` flags - Add unit tests for CSV -> Tabfile conversion - CSV plugin: some refactoring, and rename the module to `csv_plugin.py` - Update `setup.py`: add `python_requires=">=3.7.0"`, update `extras_require` - Update README.md ### Fearures: - Command line: Add `--name` flag for changing glossary name - `Glossary`: `convert`: add `infoOverride` optional argument pyglossary-5.0.9/doc/releases/4.3.0.md000066400000000000000000000071411476751035500173150ustar00rootroot00000000000000# Changes since [4.2.1](./4.2.1.md) ## Bug fixes - Tabfile writer: fix replacing `\` with `\\` - `--remove-html` flag: fix bad regex - ui_cmd_interactive: fix a few bugs - Lowercase word/entry links (`
` with `\n`, #394 by @tomtung - Treat `
` the same way `

` is treated. - Mobi: add `mobi7-forcing` switch to `kindlegen` command, #374 by @holyspiritomb - Octopus MDict: ignore directories with `same_dir_data_files`, #362 - StarDict reader: handle definitions with mixed types/formats - Dictfile: strip whitespaces from word and defi before going through entry filters - BGL: strip whitespaces from word and defi before going through entry filters - Improvement in `glos.write`: avoid printing exception for invalid encoding - Remove empty logs in `glos.convert` - StarDict reader: fix validating `sametypesequence`, and add test - `glos.convert`: Allow an existing empty directory as output path - `TextGlossaryReader`: replace `nextPair` method with `nextBlock` which returns resource files as third item - ui_cmd_interactive: allow converting several times before exiting - Change title tag for Greek from `` to `` - Update language data set (`langs.json`) - `ui/main.py`: print 1-line error instead of full exception on `ImportError` - `ui/main.py`: Windows: try Tkinter before Gtk - `ebook_base.py`: avoid `shutil.move` on Windows, #368 - `TextGlossaryReader`: fix loading info and some refactoring, #370 36b9cd83d4c79b32e34bf64c3101cb89093b2a4e - `Entry`: Allow `word` to be `tuple` in `Entry(word=...)` - `glos.iterInfo()` return `Iterator` rather than `Iterable` - Zim: change dependency to `libzim>=1.0`, and some comments - Mobi: work with kindlegen executable in `PATH` directories, #401 - ui: limit the length of option comments in Format Options dialog - ui_gtk: improvement: show (last) critical error on status bar - ui_gtk: set intial focus - ui_gtk: improvements in About tab - ui_tk: revert most `ttk` widgets to `tk` because the theme doesn't match - Add SVG icon, #414 by @proletarius101 - Prevent exception/traceback on Ctrl+C - Optimize progress bar - Aard2 slob: show info log before and after `slobWriter.finalize()`, #437 ## Removed features - Remove read support for Wiktiomary Dump, #48 - Remove support for Sdictionary Binary and Source ## Octopus MDict MDX: features and improvements - Support MDict V3 fomrat by updating `readmdict`, #385 by @xiaoqiangwang - Fix files created without UUID in header, #387 by @xiaoqiangwang - MdxBuilder 4.0 RC2 and before creates files without UUID header - Decode mdict title & description if they're bytes, #393 by @tomtung - `readmdict`: Skip zlib decompress exceptions, #384 - `readmdict`: Use `__name__` as logger name, and add 2 debug logs, #384 - `readmdict`: improve exception msg for xxhash, #385 ## XDXF: fixes / imrovements, issue #376 - Support `` - Support embedded tags in `` - Fix ignoring `` - Fix extra newlines - Get rid of warning for `` - Fix/improve newline and space issues - Fix and improve tests - Update url for format description - Support any tag/string in ``, #396 - Support reading compressed files directly (`.xdxf.gz`, `.xdxf.bz2`, `.xdxf.lzma`) - Allow using XSL using `--write-options=xsl=True` - Update XSL - Other improvements in XDXF to HTML transformation ## AppleDict Binary: features, bug fixes, improvements, refactoring - Fix css name on `html_full=True` - Fix using `self._encoding` when should use `utf-8` - Fix internal links, #343 - Remove `x-dictionary:d:` prefix from `href` - First fix for `x-dictionary:r:`: use title if present - Add `bword://` prefix to `href` (unless it points to http/https) - Read entry IDs on open and fix links with `x-dictionary:r:` - Add plistlib to dependencies - Add tests - Replace `` with `

` - Fix bad exception formatting - Fixes from PR #436 - Support morphology (alternates): #434 by @soshial - Support different AppleDict offsets, #417 by @soshial - Extract AppleDict meta-info (langs, title, author), #418 by @soshial - Progress Bar on `open()` / loading `KeyText.data` - Improve memory usage of loading `KeyText.data` - Replace `appledict_bin.py` with `appledict_bin` directory and more refactoring ## Glossary class (`glossary.py`) - Lots of refactoring in `glossary.py` - Improve the design and readability - Reduce complexity of methods - Move some code into new classes that `Glossary` inherits from - Improve error messages - Introduce `glossary_v2.py`, and maintain API backward-compatibility for `glossary.py` (as far as documented) - See [README.md](../../README.md#using-pyglossary-as-a-python-library) for sample code. ## Refactoring - Fix style errors using `ruff` based on [pyproject.toml](../../pyproject.toml) configuration - Remove all usages of pyglossary.plugins.formats_common - Use `str.startswith(tuple)` and `str.endswith(tuple)` - Reduce complexity of `Glossary` methods - Rename entry filter `strip` to `trim_whitespaces` - Some refactoring in StarDict reader - Use [f-string equal syntax](https://github.com/python/cpython/issues/80998) added in Python 3.8 - Use `str.removeprefix` and `str.removesuffix` added in Python 3.9 - `langs/writing_system.py`: - Change `iso` field to list - Add new scripts - Add `getAllWritingSystemsFromText` - More refactoring - Split up `TextGlossaryReader.loadInfo` method - `plugin_manager.py`: make some methods private ## Documentation - Update plugins' documentation - Glossary: add comments about `entryFilters` - Update `config.rst` - Update `doc/entry-filters.md` - Update `README.md` - Update `doc/sort-key.md` - Update `doc/pyicu.md` - Update `plugins/testformat.py` - Add types for arguments and result of all functions/methods - Add types for r/w options in reader/writer classes - Fix a few incorrect type annotations - `README.md`: Add document for adding data entries, #412 - `README.md`: Fix -> nixos command, #400 by @srghma - Update [bgl_info.md](../babylon/bgl_info.md) and move it from `pyglossary/plugins/babylon_bgl/` to `doc/babylon/` ## Testing - Add test for DSL -> Tabfile conversion - `dsl_test.py`: fix method names not starting with `test_` - StarDict reader: better testing for handling definitions with mixed types - StarDict writer: much better testing, coverage of `stardict.py`: from %62 to %83 - Refactoring and improvements in tests of Glossary, along with new tests - Add test for dictunformat -> Tabfile - AppleDict (source) tests: validate plist file contents - Allow forking and branching `pyglossary-test` repo - See [tests/glossary_v2_test.py](../../tests/glossary_v2_test.py#L28) - Fix some failing tests on Windows - Slob: test `file_size_approx` - Test Tabfile -> SQL conversion - Test StarDict error/warning for sortKeyName with and without locale - Print useful messages for unhandled warnings - Improve logs - Add `showDiff=False` arg to `compareTextFiles` and `convert` ## Packaging - Update and refactor `Dockerfile` and `run-with-docker.sh` - `Dockerfile`: change `WORKDIR` to `/root/home` which is mapped to host's home dir - `run-with-docker.sh`: create `confDir` before docker build (to check the owner later) - `run-with-docker.sh`: accept version (image tag) as argument - Use host's (non-root) user in docker run - Map host user's `$HOME` to docker's user home - Re-use existing docker image with same tag - Update `setup.py` pyglossary-5.0.9/doc/releases/4.6.1.md000066400000000000000000000027561476751035500173300ustar00rootroot00000000000000# Changes since [4.6.0](./4.6.0.md) ## Bug fixes - Fix a bug causing broken installation if `~/.local/lib` is a symbolic link - or `site-packages` or any of its parents are a symbolic link - Fix incompatibilty with Python 3.9 (despite documentation) - Fix `scripts/entry-filters-doc.py`, `scripts/plugin-doc.py` and `doc/entry-filters.md` - AppleDict: Fix typos in Chinese language module ## Features: - Use environment variable `VERBOSITY` as default (a number from 0 to 5) ## Improvements - AppleDict Binary: set `html_full=True` by default - Update `wcwidth` to `0.2.6` ## Refactoring - Add `glos.stripFullHtml(errorHandler)` and use it in 3 plugins - Add entry filter `StripFullHtml` and change `entry.stripFullHtml()` to return error - Refactor `entryFiltersRules` - Remove empty plugin gettext_mo.py - Remove `glos.titleElement` from `glossary_v2.Glossary` - Add to `glossary.Glossary` for compatibility - `glossary.Glossary` is a wrapper (child class) on top on `glossary_v2.Glossary` ## Documentation - Update `doc/entry-filters.md` to list some entry filters that were enabled conditionally (besides config) - Remove `sdict.md` and `sdict_source.md` (removed plugins) ## Type checking - Add missing method in `GlossaryType` class - Fix `mypy` errors on most of code base and some of plugins - Use builtin types `list, dict, tuple, set` for type annotations - Replace `Optional[X]` with `X or None` - will not effect runtime, but type checking now only works with Python 3.10+ pyglossary-5.0.9/doc/releases/4.7.0.md000066400000000000000000000037221476751035500173220ustar00rootroot00000000000000# Changes since `4.6.1` - Update README.md by @ilius in https://github.com/ilius/pyglossary/pull/460 - Update pyicu docs for Debian by @master-bob in https://github.com/ilius/pyglossary/pull/461 - AppleDict-bin: support UTF16 4-byte symbols #473 by @soshial in https://github.com/ilius/pyglossary/pull/476 - AppleDict-bin: fix crash for multidirectional dicts by @soshial in https://github.com/ilius/pyglossary/pull/480 - DSL: support substitution of tilda ~ by @soshial in https://github.com/ilius/pyglossary/pull/485 - AppleDict-bin: fix crash for Korean glossary by @soshial in https://github.com/ilius/pyglossary/pull/484 - Update copyright name for ratijas by @ratijas in https://github.com/ilius/pyglossary/pull/488 - Improve Python typing in dsl plugin by @ratijas in https://github.com/ilius/pyglossary/pull/489 - New DSL plugin by @ilius in https://github.com/ilius/pyglossary/pull/492 - stardict xdxf: handle text within

by @Crissium in https://github.com/ilius/pyglossary/pull/511 - ruff by @ilius in https://github.com/ilius/pyglossary/pull/514 - format .github/workflows/\*.yml by @ilius in https://github.com/ilius/pyglossary/pull/515 - Replace dictzip with idzip by @bergentroll in https://github.com/ilius/pyglossary/pull/512 - fix: unable to select files from within flatpak by @proletarius101 in https://github.com/ilius/pyglossary/pull/531 - slob: make audio work in AARD2 for android and aard2-web (breaks audio in goldendict desktop) by @glowinthedark in https://github.com/ilius/pyglossary/pull/534 # New Contributors - @master-bob made their first contribution in https://github.com/ilius/pyglossary/pull/461 - @Crissium made their first contribution in https://github.com/ilius/pyglossary/pull/511 - @bergentroll made their first contribution in https://github.com/ilius/pyglossary/pull/512 - @glowinthedark made their first contribution in https://github.com/ilius/pyglossary/pull/534 **Full Changelog**: https://github.com/ilius/pyglossary/compare/4.6.1...4.7.0 pyglossary-5.0.9/doc/releases/4.7.1.md000066400000000000000000000036251476751035500173250ustar00rootroot00000000000000# Changes since `4.7.0` ## Breaking changes: 4c78aa4f replace `CC-CEDICT` plugin with `EDICT2` plugin ## Bug fixes and improvements: f5a420c2 Bugfix: Glossary: removeHtmlTagsAll was ineffective with --sort same for preventDuplicateWords 01b56060 Yomichan: merge entries with same headword, #574 5fe93f4b Yomichan: add beautifulsoup4 to dependencies, #577 2a239662 use `python3` in scripts/view-glossary and scripts/diff-glossary to bypass pyenv c878cbd5 zimfile: replace OSError on Windows with a warning, #580 1573d5cf Wiktextract: rewrite writeSenseExample and fix #572 - Fix TypeError: got invalid input value of type `` - Create a list of examples - Add the example type as prefix in bold 7f64af55 Wiktextract: keep warnings in a Counter, remove duplicate messages and show at end ## New Features aa6765b9 add new plugin xdxf_css (XdxfCss) based on PR #570 by @soshial 0e9d2210 add read_options to .info file fea2223b StarDict Textual writer: save resource files in res/ folder, #558 3800fac9 add Dyula language, #575 08c41dad add glos.readOptions property ## Refactoring, linting and testing 6786880c fix ruff preview error in appledict_bin/__init__.py fd09e166 github actions: switch to ruff 0.5.2 019740eb fix ruff error 69bcbf9b fix ruff preview error: B909 Mutation to loop iterable during iteration 5596b7f1 switch to ruff 0.6.4 03a509b5 fix ruff preview errors, use str.removesuffix 6ca99022 fix some mypy errors eac286b9 github test: use lxml==5.2 to fix jmdict test f2eb39de move info writer out of plugins 578c8540 fix tests: test_save_info_json 0f4d885a update pyproject.toml 1e20a1ab format pyglossary/glossary_v2.py e231b64c update scripts/format-code 4aa4f099 github action test: remove test cache acdbedeb github test: upload failed test files 1f095ada fix test action 9df1ed6f update jmdict test and switch to lxml==5.3 **Full Changelog**: https://github.com/ilius/pyglossary/compare/4.7.0...4.7.1 pyglossary-5.0.9/doc/releases/5.0.0.md000066400000000000000000000032511476751035500173110ustar00rootroot00000000000000## Breaking changes for library users - 38f8f917 `glossary_v2.Glossary` class raises `Error` exception if operation failed, instead of `log.critical` and `return None` - Applies to these methods: `convert`, `read`, `write` - `glossary.Glossary` (and `pyglossary.Glossary`) still behaves the same way (return `None` if failed) - a5204bb3 Breaking changes in `Glossary.detectInputFormat` and `Glossary.detectOutputFormat` methods: - `format` argument is renamed to `formatName` - `quiet` argument is removed (must handle `Error` exception instead) - 9cc2887f `Glossary.wordTitleStr`: rename `_class` argument to `class_` - Remove `toBytes` and `replaceStringTable` functions from `text_utils.py` and `plugins/formats_common.py` Breaking change for plugins outside this repo ## Deprecated API for library users - `glossary.Glossary` is deprecated, use `glossary_v2.Glossary` - `format` variable in plugins is deprecated, rename it to `name` - `info` argument to `Glossary()` is deprecated. Use `glos.setInfo(key, value)` - `Glossary`: `format` arguments to `read`, `directRead` and `write` methods are deprecated, rename them to `formatName` ## What's changed since last version? We have a web-based user interface by @glowinthedark, a new plugin [StardictMergeSyns](https://github.com/ilius/pyglossary/blob/master/doc/p/stardict_merge_syns.md), new options in various plugins/formats, lots of improvements, refactoring and cleanup. **Full Changelog**: https://github.com/ilius/pyglossary/compare/4.7.1...5.0.0 ## New Contributors - @a1ess made their first contribution in https://github.com/ilius/pyglossary/pull/590 [PyPI package is released](https://pypi.org/project/pyglossary/) pyglossary-5.0.9/doc/releases/5.0.1.md000066400000000000000000000011531476751035500173110ustar00rootroot00000000000000## What's Changed - Glossary info: map "creationTime" metadata to/from "date" metadata (used by StarDict) - Gettext `.po`: fix broken syntax due to missing quotations, unescape `|`, and fix duplicate msgids - Wiktextract: improvements and better testing - Disable categories by default with an option to enable it - FreeDict: refactoring - Web UI: add `setup.py `metadata by @glowinthedark in #609 - Allow disabling in-memory SQLite with an environment variable - Better testing, fix/add type annotations and (as usual) some refactoring **Full Changelog**: https://github.com/ilius/pyglossary/compare/5.0.0...5.0.1 pyglossary-5.0.9/doc/releases/5.0.2.md000066400000000000000000000010351476751035500173110ustar00rootroot00000000000000## What's Changed - New PyGlossary icon logo - Zimfile: fix possible `NameError` - Web UI: add glossary preview buttons by @glowinthedark in https://github.com/ilius/pyglossary/pull/610 - Remove plugin: IUPAC goldbook (.xml) - Replace all usages of `OrderedDict` with `dict` which is ordered since python 3.6 - FreeDict: improve test coverage - Refactor Yomichan and Web UI, and cleanup in BGL - Remove `pyglossary.pyw`, `pkg` directory and `res/resize-16.png` **Full Changelog**: https://github.com/ilius/pyglossary/compare/5.0.1...5.0.2 pyglossary-5.0.9/doc/releases/5.0.3.md000066400000000000000000000013131476751035500173110ustar00rootroot00000000000000## What's Changed - Fix in PyGlossary icon / logo, visible in light background - Web UI: update `favicon.ico` - Mobipocket: refactor, run kindlegen with relative file path, #613 - Add back `pkg/pyglossary.desktop` for flathub build, #614 - Rename plugin `ABCMedicalNotes` to `MakindoMedical` (#267) - Make plugins' documentation tidier - Update `project.urls` in `pyproject.toml` according to [packaging.python.org](https://packaging.python.org/en/latest/tutorials/packaging-projects/) - Add or update `__all__` in imported modules - Fewer uses of `sys.exit` - Refactor `pyglossary/ui/main.py` and add `mainNoExit` function (c19ba565) **Full Changelog**: https://github.com/ilius/pyglossary/compare/5.0.2...5.0.3 pyglossary-5.0.9/doc/releases/5.0.4.md000066400000000000000000000014001476751035500173070ustar00rootroot00000000000000## What's Changed - Fix regression in `glossary_v2.py` effecting deprecated `Glossary` usage - Fix docstring for `glossary_v2.Glossary.write` method - Fix broken script `scripts/view-glossary-plaintext` - Feature: include `write_options` in `.info` file with `--info` flag - Testing: fix `scripts/test.sh` not testing deprecated stuff - Testing: fix deprecated tests - Testing: add `SKIP_MISSING` env var to skip testing plugins with missing dependencies - Fix / update automation scripts - Add recent releases' doc - Improve and refactor type annotations - Break up all plugins into directories (with `reader.py` and/or `writer.py`) - Fix ruff 0.8.5 errors - Some refactoring (as usual) **Full Changelog**: https://github.com/ilius/pyglossary/compare/5.0.3...5.0.4 pyglossary-5.0.9/doc/releases/5.0.6.md000066400000000000000000000012321476751035500173140ustar00rootroot00000000000000## What's Changed since `5.0.4` **Skipping `5.0.5` because of a packaging bug** - Fix bug in `ui/main.py` interpreting `sys.argv[0]` as input filename - Fix `UnicodeEncodeError` with non-utf8 stdout in frozen envs by @glowinthedark in #617 - Epub and Kobo: fix unclosed `
` tags in `.xhtml` files - Make `.epub` and `.zip` files reproducable for testing (WIP) - Move tools `.toml` files (used to generate docs) into plugin directories - Update docs - Fix ruff 0.9.0 errors and re-format - Add `useByteProgress` attribute to all Reader classes - Some refactoring in code base and UI **Full Changelog**: https://github.com/ilius/pyglossary/compare/5.0.4...5.0.6 pyglossary-5.0.9/doc/releases/5.0.7.md000066400000000000000000000017661476751035500173310ustar00rootroot00000000000000## What's Changed ### Plugins - Aard2 slob writer: add opus, oga extensions (#618) by @glowinthedark - Aard2 slob writer: change "already exists" exception to `WriteError` to avoid showing stacktrace - AppleDict writer: Russian index: switch to `pymorphy3`, #620 - Refactoring in many plugins ### User interface - Respect `NO_COLOR` environment variable in command line - Interactive command line interface: fixes / improvements - Fix 2 bugs: bad formatting (missing colon+space) and extra colon+space - Add colors to checkbox prompt - Avoid skipping empty string as config / rw option value - Gtk4 UI: fix "Browse" buttons not working - Web UI: get rid of a warning - Tkinter UI: improvements: - Layout improvements for About, Authors, License tabs - Update progress bar text position on window resize - Optimize progress bar - Add missing type annotations - Refactoring in gtk4, tkinter and interactive cmd interface **Full Changelog**: https://github.com/ilius/pyglossary/compare/5.0.6...5.0.7 pyglossary-5.0.9/doc/releases/5.0.8.md000066400000000000000000000012621476751035500173210ustar00rootroot00000000000000## What's Changed - Fix critical bug when reading multi-part text files - `text_reader.py`: infinite loop when `file_count` info is `-1` - Tkinter: add config params to customize progress bar appearances - Prefer Tkinter over Gtk on Mac - Update `doc/tkinter.md` - `text_writer.py`: change glossary name/title when splitting glossary, add log - Optimizations in text reader and text writer: - `text_reader.py`: avoid reading res dir with --skip-resources - `text_reader.py`: avoid calculating file size if progressbar is disabled - `text_writer.py`: avoid creating res dir with --skip-resources **Full Changelog**: https://github.com/ilius/pyglossary/compare/5.0.7...5.0.8 pyglossary-5.0.9/doc/sort-key.md000066400000000000000000000036201476751035500166230ustar00rootroot00000000000000# Sort Key ## Supported `sortKey` names / `--sort-key` argument values | Name/Value | Description | Default for formats | Supports locale | | ---------------------- | ------------------------- | ------------------------------------------------- | :-------------: | | `headword` | Headword | | Yes | | `headword_lower` | Lowercase Headword | All other formats (given `--sort`) | Yes | | `headword_bytes_lower` | ASCII-Lowercase Headword | | No | | `stardict` | StarDict | [StarDict](./p/stardict.md) | No | | `ebook` | E-Book (prefix length: 2) | [EPUB-2](./p/epub2.md), [Mobipocket](./p/mobi.md) | No | | `ebook_length3` | E-Book (prefix length: 3) | | No | | `dicformids` | DictionaryForMIDs | [DictionaryForMIDs](./p/dicformids.md) | No | | `random` | Random | | Yes | ## Sort Locale You can pass an [ICU Locale name/identifier](https://unicode-org.github.io/icu/userguide/locale/) as part of `sortKey` / `--sort-key` value, after a `:` symbol. For example: - `--sort-key=:fa_IR.UTF-8`: Persian (then case-insensitive Latin) - `--sort-key=headword:fa_IR.UTF-8`: Persian (then case-sensitive Latin) - `--sort-key=headword:es`: case-sensitive Spanish - `--sort-key=headword_lower:es`: case-insensitive Spanish - `--sort-key=:es`: Spanish (case-insensitive by default) - `--sort-key=:latn-arab`: first Latin, then Arabic - `--sort-key=:fa-u-kr-latn-arab`: first Latin, then Persian pyglossary-5.0.9/doc/stardict/000077500000000000000000000000001476751035500163405ustar00rootroot00000000000000pyglossary-5.0.9/doc/stardict/stardict_sametypesequence.md000066400000000000000000000101201476751035500241310ustar00rootroot00000000000000To convert to a StarDict dictionary with the `sametypesequence` option, use `sametypesequence=[type of definitions]` write option. If the sametypesequence option is set, it tells StarDict that each word's data in the .dict file will have the same sequence of datatypes. Suppose a dictionary contains phonetic information and a meaning for each word. The sametypesequence option for this dictionary would be: ``` sametypesequence=tm ``` # Examples: Definitions type is plain text: ``` pyglossary mydic.txt mydic.ifo --write-options=sametypesequence=m ``` Definitions type is HTML: ``` pyglossary mydic.txt mydic.ifo --write-options=sametypesequence=h ``` # Type identifiers Here are the single-character type identifiers that may be used with the "sametypesequence" option in the .idx file, or may appear in the dict file itself if the "sametypesequence" option is not used. Lower-case characters signify that a field's size is determined by a terminating `\0`, while upper-case characters indicate that the data begins with a network byte-ordered guint32 that gives the length of the following data's size (NOT the whole size which is 4 bytes bigger). ## `m` Word's pure text meaning. The data should be a utf-8 string ending with `\0`. ## `l` Word's pure text meaning.
The data is NOT a utf-8 string, but is instead a string in locale encoding, ending with `\0`. Sometimes using this type will save disk space, but its use is discouraged. This is only a idea. ## `g` A utf-8 string which is marked up with the Pango text markup language.
For more information about this markup language, See the [Pango Reference Manual](http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html).
You might have it installed locally \[here\](file:///usr/share/gtk-doc/html/pango/PangoMarkupFormat.html) ## `t` English phonetic string.
The data should be a utf-8 string ending with `\0`. Here are some utf-8 phonetic characters:
`θʃŋʧðʒæıʌʊɒɛəɑɜɔˌˈːˑṃṇḷ`
`æɑɒʌәєŋvθðʃʒɚːɡˏˊˋ` ## `x` A utf-8 string which is marked up with the [xdxf language](https://github.com/soshial/xdxf_makedict).
StarDict has these extensions: - `` can have "type" attribute, it can be "image", "sound", "video" and "attach". - `` can have "k" attribute. ## `y` Chinese YinBiao or Japanese KANA.
The data should be a utf-8 string ending with `\0`. ## `k` [KingSoft](https://en.wikipedia.org/wiki/Kingsoft) [PowerWord](https://en.wikipedia.org/wiki/PowerWord)'s data. The data is a utf-8 string ending with `\0`. And it's in XML format. ## `w` [MediaWiki markup language](https://www.mediawiki.org/wiki/Help:Formatting). ## `h` Html codes. ## `n` WordNet data. ## `r` Resource file list.
The content can be: - `img:pic/example.jpg` Image file - `snd:apple.wav` Sound file - `vdo:film.avi` Video file - `att:file.bin` Attachment file More than one line is supported as a list of available files.
StarDict will find the files in the Resource Storage.
The image will be shown, the sound file will have a play button.
You can "save as" the attachment file and so on.
The file list must be a utf-8 string ending with `\0`.
Use `\n` for separating new lines.
Use `/` character as directory separator.
## `W` `.wav` audio file.
The data begins with a network byte-ordered guint32 to identify the wav file's size, immediately followed by the file's content. This is only a idea, it is better to use `r` Resource file list in most case. ## `P` Picture file.
The data begins with a network byte-ordered guint32 to identify the picture file's size, immediately followed by the file's content.
This feature is implemented, as stardict-advertisement-plugin needs it. Anyway, it is better to use `r` Resource file list in most case. ## `X` This type identifier is reserved for experimental extensions. # For more information Refer to StarDict documentations at: [https://github.com/huzheng001/stardict-3/blob/master/dict/doc/StarDictFileFormat](https://github.com/huzheng001/stardict-3/blob/master/dict/doc/StarDictFileFormat) pyglossary-5.0.9/doc/term-colors.md000066400000000000000000000740111476751035500173160ustar00rootroot00000000000000## Terminal / ANSI Colors | Sample | Code | Hex | RGB | HSL | | ----------------------------------------------------------- | ---- | --------- | ------------- | ----------------- | | ![](https://via.placeholder.com/60x30/000000/000000?text=+) | 0 | `#000000` | 0, 0, 0 | 0, 0, 0 | | ![](https://via.placeholder.com/60x30/aa0000/000000?text=+) | 1 | `#aa0000` | 170, 0, 0 | 0, 1, 0.333 | | ![](https://via.placeholder.com/60x30/00aa00/000000?text=+) | 2 | `#00aa00` | 0, 170, 0 | 120, 1, 0.333 | | ![](https://via.placeholder.com/60x30/aa5500/000000?text=+) | 3 | `#aa5500` | 170, 85, 0 | 30, 1, 0.333 | | ![](https://via.placeholder.com/60x30/0000aa/000000?text=+) | 4 | `#0000aa` | 0, 0, 170 | 240, 1, 0.333 | | ![](https://via.placeholder.com/60x30/aa00aa/000000?text=+) | 5 | `#aa00aa` | 170, 0, 170 | 300, 1, 0.333 | | ![](https://via.placeholder.com/60x30/00aaaa/000000?text=+) | 6 | `#00aaaa` | 0, 170, 170 | 180, 1, 0.333 | | ![](https://via.placeholder.com/60x30/b9b9b9/000000?text=+) | 7 | `#b9b9b9` | 185, 185, 185 | 0, 0, 0.725 | | ![](https://via.placeholder.com/60x30/555555/000000?text=+) | 8 | `#555555` | 85, 85, 85 | 0, 0, 0.333 | | ![](https://via.placeholder.com/60x30/ff5555/000000?text=+) | 9 | `#ff5555` | 255, 85, 85 | 0, 1, 0.667 | | ![](https://via.placeholder.com/60x30/55ff55/000000?text=+) | 10 | `#55ff55` | 85, 255, 85 | 120, 1, 0.667 | | ![](https://via.placeholder.com/60x30/ffff55/000000?text=+) | 11 | `#ffff55` | 255, 255, 85 | 60, 1, 0.667 | | ![](https://via.placeholder.com/60x30/5555ff/000000?text=+) | 12 | `#5555ff` | 85, 85, 255 | 240, 1, 0.667 | | ![](https://via.placeholder.com/60x30/ff55ff/000000?text=+) | 13 | `#ff55ff` | 255, 85, 255 | 300, 1, 0.667 | | ![](https://via.placeholder.com/60x30/55ffff/000000?text=+) | 14 | `#55ffff` | 85, 255, 255 | 180, 1, 0.667 | | ![](https://via.placeholder.com/60x30/ffffff/000000?text=+) | 15 | `#ffffff` | 255, 255, 255 | 0, 0, 1 | | ![](https://via.placeholder.com/60x30/000000/000000?text=+) | 16 | `#000000` | 0, 0, 0 | 0, 0, 0 | | ![](https://via.placeholder.com/60x30/00005f/000000?text=+) | 17 | `#00005f` | 0, 0, 95 | 240, 1, 0.186 | | ![](https://via.placeholder.com/60x30/000087/000000?text=+) | 18 | `#000087` | 0, 0, 135 | 240, 1, 0.265 | | ![](https://via.placeholder.com/60x30/0000af/000000?text=+) | 19 | `#0000af` | 0, 0, 175 | 240, 1, 0.343 | | ![](https://via.placeholder.com/60x30/0000d7/000000?text=+) | 20 | `#0000d7` | 0, 0, 215 | 240, 1, 0.422 | | ![](https://via.placeholder.com/60x30/0000ff/000000?text=+) | 21 | `#0000ff` | 0, 0, 255 | 240, 1, 0.5 | | ![](https://via.placeholder.com/60x30/005f00/000000?text=+) | 22 | `#005f00` | 0, 95, 0 | 120, 1, 0.186 | | ![](https://via.placeholder.com/60x30/005f5f/000000?text=+) | 23 | `#005f5f` | 0, 95, 95 | 180, 1, 0.186 | | ![](https://via.placeholder.com/60x30/005f87/000000?text=+) | 24 | `#005f87` | 0, 95, 135 | 197.778, 1, 0.265 | | ![](https://via.placeholder.com/60x30/005faf/000000?text=+) | 25 | `#005faf` | 0, 95, 175 | 207.429, 1, 0.343 | | ![](https://via.placeholder.com/60x30/005fd7/000000?text=+) | 26 | `#005fd7` | 0, 95, 215 | 213.488, 1, 0.422 | | ![](https://via.placeholder.com/60x30/005fff/000000?text=+) | 27 | `#005fff` | 0, 95, 255 | 217.647, 1, 0.5 | | ![](https://via.placeholder.com/60x30/008700/000000?text=+) | 28 | `#008700` | 0, 135, 0 | 120, 1, 0.265 | | ![](https://via.placeholder.com/60x30/00875f/000000?text=+) | 29 | `#00875f` | 0, 135, 95 | 162.222, 1, 0.265 | | ![](https://via.placeholder.com/60x30/008787/000000?text=+) | 30 | `#008787` | 0, 135, 135 | 180, 1, 0.265 | | ![](https://via.placeholder.com/60x30/0087af/000000?text=+) | 31 | `#0087af` | 0, 135, 175 | 193.714, 1, 0.343 | | ![](https://via.placeholder.com/60x30/0087d7/000000?text=+) | 32 | `#0087d7` | 0, 135, 215 | 202.326, 1, 0.422 | | ![](https://via.placeholder.com/60x30/0087ff/000000?text=+) | 33 | `#0087ff` | 0, 135, 255 | 208.235, 1, 0.5 | | ![](https://via.placeholder.com/60x30/00af00/000000?text=+) | 34 | `#00af00` | 0, 175, 0 | 120, 1, 0.343 | | ![](https://via.placeholder.com/60x30/00af5f/000000?text=+) | 35 | `#00af5f` | 0, 175, 95 | 152.571, 1, 0.343 | | ![](https://via.placeholder.com/60x30/00af87/000000?text=+) | 36 | `#00af87` | 0, 175, 135 | 166.286, 1, 0.343 | | ![](https://via.placeholder.com/60x30/00afaf/000000?text=+) | 37 | `#00afaf` | 0, 175, 175 | 180, 1, 0.343 | | ![](https://via.placeholder.com/60x30/00afd7/000000?text=+) | 38 | `#00afd7` | 0, 175, 215 | 191.163, 1, 0.422 | | ![](https://via.placeholder.com/60x30/00afff/000000?text=+) | 39 | `#00afff` | 0, 175, 255 | 198.824, 1, 0.5 | | ![](https://via.placeholder.com/60x30/00d700/000000?text=+) | 40 | `#00d700` | 0, 215, 0 | 120, 1, 0.422 | | ![](https://via.placeholder.com/60x30/00d75f/000000?text=+) | 41 | `#00d75f` | 0, 215, 95 | 146.512, 1, 0.422 | | ![](https://via.placeholder.com/60x30/00d787/000000?text=+) | 42 | `#00d787` | 0, 215, 135 | 157.674, 1, 0.422 | | ![](https://via.placeholder.com/60x30/00d7af/000000?text=+) | 43 | `#00d7af` | 0, 215, 175 | 168.837, 1, 0.422 | | ![](https://via.placeholder.com/60x30/00d7d7/000000?text=+) | 44 | `#00d7d7` | 0, 215, 215 | 180, 1, 0.422 | | ![](https://via.placeholder.com/60x30/00d7ff/000000?text=+) | 45 | `#00d7ff` | 0, 215, 255 | 189.412, 1, 0.5 | | ![](https://via.placeholder.com/60x30/00ff00/000000?text=+) | 46 | `#00ff00` | 0, 255, 0 | 120, 1, 0.5 | | ![](https://via.placeholder.com/60x30/00ff5f/000000?text=+) | 47 | `#00ff5f` | 0, 255, 95 | 142.353, 1, 0.5 | | ![](https://via.placeholder.com/60x30/00ff87/000000?text=+) | 48 | `#00ff87` | 0, 255, 135 | 151.765, 1, 0.5 | | ![](https://via.placeholder.com/60x30/00ffaf/000000?text=+) | 49 | `#00ffaf` | 0, 255, 175 | 161.176, 1, 0.5 | | ![](https://via.placeholder.com/60x30/00ffd7/000000?text=+) | 50 | `#00ffd7` | 0, 255, 215 | 170.588, 1, 0.5 | | ![](https://via.placeholder.com/60x30/00ffff/000000?text=+) | 51 | `#00ffff` | 0, 255, 255 | 180, 1, 0.5 | | ![](https://via.placeholder.com/60x30/5f0000/000000?text=+) | 52 | `#5f0000` | 95, 0, 0 | 0, 1, 0.186 | | ![](https://via.placeholder.com/60x30/5f005f/000000?text=+) | 53 | `#5f005f` | 95, 0, 95 | 300, 1, 0.186 | | ![](https://via.placeholder.com/60x30/5f0087/000000?text=+) | 54 | `#5f0087` | 95, 0, 135 | 282.222, 1, 0.265 | | ![](https://via.placeholder.com/60x30/5f00af/000000?text=+) | 55 | `#5f00af` | 95, 0, 175 | 272.571, 1, 0.343 | | ![](https://via.placeholder.com/60x30/5f00d7/000000?text=+) | 56 | `#5f00d7` | 95, 0, 215 | 266.512, 1, 0.422 | | ![](https://via.placeholder.com/60x30/5f00ff/000000?text=+) | 57 | `#5f00ff` | 95, 0, 255 | 262.353, 1, 0.5 | | ![](https://via.placeholder.com/60x30/5f5f00/000000?text=+) | 58 | `#5f5f00` | 95, 95, 0 | 60, 1, 0.186 | | ![](https://via.placeholder.com/60x30/5f5f5f/000000?text=+) | 59 | `#5f5f5f` | 95, 95, 95 | 0, 0, 0.373 | | ![](https://via.placeholder.com/60x30/5f5f87/000000?text=+) | 60 | `#5f5f87` | 95, 95, 135 | 240, 0.174, 0.451 | | ![](https://via.placeholder.com/60x30/5f5faf/000000?text=+) | 61 | `#5f5faf` | 95, 95, 175 | 240, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/5f5fd7/000000?text=+) | 62 | `#5f5fd7` | 95, 95, 215 | 240, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/5f5fff/000000?text=+) | 63 | `#5f5fff` | 95, 95, 255 | 240, 1, 0.686 | | ![](https://via.placeholder.com/60x30/5f8700/000000?text=+) | 64 | `#5f8700` | 95, 135, 0 | 77.778, 1, 0.265 | | ![](https://via.placeholder.com/60x30/5f875f/000000?text=+) | 65 | `#5f875f` | 95, 135, 95 | 120, 0.174, 0.451 | | ![](https://via.placeholder.com/60x30/5f8787/000000?text=+) | 66 | `#5f8787` | 95, 135, 135 | 180, 0.174, 0.451 | | ![](https://via.placeholder.com/60x30/5f87af/000000?text=+) | 67 | `#5f87af` | 95, 135, 175 | 210, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/5f87d7/000000?text=+) | 68 | `#5f87d7` | 95, 135, 215 | 220, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/5f87ff/000000?text=+) | 69 | `#5f87ff` | 95, 135, 255 | 225, 1, 0.686 | | ![](https://via.placeholder.com/60x30/5faf00/000000?text=+) | 70 | `#5faf00` | 95, 175, 0 | 87.429, 1, 0.343 | | ![](https://via.placeholder.com/60x30/5faf5f/000000?text=+) | 71 | `#5faf5f` | 95, 175, 95 | 120, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/5faf87/000000?text=+) | 72 | `#5faf87` | 95, 175, 135 | 150, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/5fafaf/000000?text=+) | 73 | `#5fafaf` | 95, 175, 175 | 180, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/5fafd7/000000?text=+) | 74 | `#5fafd7` | 95, 175, 215 | 200, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/5fafff/000000?text=+) | 75 | `#5fafff` | 95, 175, 255 | 210, 1, 0.686 | | ![](https://via.placeholder.com/60x30/5fd700/000000?text=+) | 76 | `#5fd700` | 95, 215, 0 | 93.488, 1, 0.422 | | ![](https://via.placeholder.com/60x30/5fd75f/000000?text=+) | 77 | `#5fd75f` | 95, 215, 95 | 120, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/5fd787/000000?text=+) | 78 | `#5fd787` | 95, 215, 135 | 140, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/5fd7af/000000?text=+) | 79 | `#5fd7af` | 95, 215, 175 | 160, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/5fd7d7/000000?text=+) | 80 | `#5fd7d7` | 95, 215, 215 | 180, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/5fd7ff/000000?text=+) | 81 | `#5fd7ff` | 95, 215, 255 | 195, 1, 0.686 | | ![](https://via.placeholder.com/60x30/5fff00/000000?text=+) | 82 | `#5fff00` | 95, 255, 0 | 97.647, 1, 0.5 | | ![](https://via.placeholder.com/60x30/5fff5f/000000?text=+) | 83 | `#5fff5f` | 95, 255, 95 | 120, 1, 0.686 | | ![](https://via.placeholder.com/60x30/5fff87/000000?text=+) | 84 | `#5fff87` | 95, 255, 135 | 135, 1, 0.686 | | ![](https://via.placeholder.com/60x30/5fffaf/000000?text=+) | 85 | `#5fffaf` | 95, 255, 175 | 150, 1, 0.686 | | ![](https://via.placeholder.com/60x30/5fffd7/000000?text=+) | 86 | `#5fffd7` | 95, 255, 215 | 165, 1, 0.686 | | ![](https://via.placeholder.com/60x30/5fffff/000000?text=+) | 87 | `#5fffff` | 95, 255, 255 | 180, 1, 0.686 | | ![](https://via.placeholder.com/60x30/870000/000000?text=+) | 88 | `#870000` | 135, 0, 0 | 0, 1, 0.265 | | ![](https://via.placeholder.com/60x30/87005f/000000?text=+) | 89 | `#87005f` | 135, 0, 95 | 317.778, 1, 0.265 | | ![](https://via.placeholder.com/60x30/870087/000000?text=+) | 90 | `#870087` | 135, 0, 135 | 300, 1, 0.265 | | ![](https://via.placeholder.com/60x30/8700af/000000?text=+) | 91 | `#8700af` | 135, 0, 175 | 286.286, 1, 0.343 | | ![](https://via.placeholder.com/60x30/8700d7/000000?text=+) | 92 | `#8700d7` | 135, 0, 215 | 277.674, 1, 0.422 | | ![](https://via.placeholder.com/60x30/8700ff/000000?text=+) | 93 | `#8700ff` | 135, 0, 255 | 271.765, 1, 0.5 | | ![](https://via.placeholder.com/60x30/875f00/000000?text=+) | 94 | `#875f00` | 135, 95, 0 | 42.222, 1, 0.265 | | ![](https://via.placeholder.com/60x30/875f5f/000000?text=+) | 95 | `#875f5f` | 135, 95, 95 | 0, 0.174, 0.451 | | ![](https://via.placeholder.com/60x30/875f87/000000?text=+) | 96 | `#875f87` | 135, 95, 135 | 300, 0.174, 0.451 | | ![](https://via.placeholder.com/60x30/875faf/000000?text=+) | 97 | `#875faf` | 135, 95, 175 | 270, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/875fd7/000000?text=+) | 98 | `#875fd7` | 135, 95, 215 | 260, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/875fff/000000?text=+) | 99 | `#875fff` | 135, 95, 255 | 255, 1, 0.686 | | ![](https://via.placeholder.com/60x30/878700/000000?text=+) | 100 | `#878700` | 135, 135, 0 | 60, 1, 0.265 | | ![](https://via.placeholder.com/60x30/87875f/000000?text=+) | 101 | `#87875f` | 135, 135, 95 | 60, 0.174, 0.451 | | ![](https://via.placeholder.com/60x30/878787/000000?text=+) | 102 | `#878787` | 135, 135, 135 | 0, 0, 0.529 | | ![](https://via.placeholder.com/60x30/8787af/000000?text=+) | 103 | `#8787af` | 135, 135, 175 | 240, 0.2, 0.608 | | ![](https://via.placeholder.com/60x30/8787d7/000000?text=+) | 104 | `#8787d7` | 135, 135, 215 | 240, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/8787ff/000000?text=+) | 105 | `#8787ff` | 135, 135, 255 | 240, 1, 0.765 | | ![](https://via.placeholder.com/60x30/87af00/000000?text=+) | 106 | `#87af00` | 135, 175, 0 | 73.714, 1, 0.343 | | ![](https://via.placeholder.com/60x30/87af5f/000000?text=+) | 107 | `#87af5f` | 135, 175, 95 | 90, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/87af87/000000?text=+) | 108 | `#87af87` | 135, 175, 135 | 120, 0.2, 0.608 | | ![](https://via.placeholder.com/60x30/87afaf/000000?text=+) | 109 | `#87afaf` | 135, 175, 175 | 180, 0.2, 0.608 | | ![](https://via.placeholder.com/60x30/87afd7/000000?text=+) | 110 | `#87afd7` | 135, 175, 215 | 210, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/87afff/000000?text=+) | 111 | `#87afff` | 135, 175, 255 | 220, 1, 0.765 | | ![](https://via.placeholder.com/60x30/87d700/000000?text=+) | 112 | `#87d700` | 135, 215, 0 | 82.326, 1, 0.422 | | ![](https://via.placeholder.com/60x30/87d75f/000000?text=+) | 113 | `#87d75f` | 135, 215, 95 | 100, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/87d787/000000?text=+) | 114 | `#87d787` | 135, 215, 135 | 120, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/87d7af/000000?text=+) | 115 | `#87d7af` | 135, 215, 175 | 150, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/87d7d7/000000?text=+) | 116 | `#87d7d7` | 135, 215, 215 | 180, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/87d7ff/000000?text=+) | 117 | `#87d7ff` | 135, 215, 255 | 200, 1, 0.765 | | ![](https://via.placeholder.com/60x30/87ff00/000000?text=+) | 118 | `#87ff00` | 135, 255, 0 | 88.235, 1, 0.5 | | ![](https://via.placeholder.com/60x30/87ff5f/000000?text=+) | 119 | `#87ff5f` | 135, 255, 95 | 105, 1, 0.686 | | ![](https://via.placeholder.com/60x30/87ff87/000000?text=+) | 120 | `#87ff87` | 135, 255, 135 | 120, 1, 0.765 | | ![](https://via.placeholder.com/60x30/87ffaf/000000?text=+) | 121 | `#87ffaf` | 135, 255, 175 | 140, 1, 0.765 | | ![](https://via.placeholder.com/60x30/87ffd7/000000?text=+) | 122 | `#87ffd7` | 135, 255, 215 | 160, 1, 0.765 | | ![](https://via.placeholder.com/60x30/87ffff/000000?text=+) | 123 | `#87ffff` | 135, 255, 255 | 180, 1, 0.765 | | ![](https://via.placeholder.com/60x30/af0000/000000?text=+) | 124 | `#af0000` | 175, 0, 0 | 0, 1, 0.343 | | ![](https://via.placeholder.com/60x30/af005f/000000?text=+) | 125 | `#af005f` | 175, 0, 95 | 327.429, 1, 0.343 | | ![](https://via.placeholder.com/60x30/af0087/000000?text=+) | 126 | `#af0087` | 175, 0, 135 | 313.714, 1, 0.343 | | ![](https://via.placeholder.com/60x30/af00af/000000?text=+) | 127 | `#af00af` | 175, 0, 175 | 300, 1, 0.343 | | ![](https://via.placeholder.com/60x30/af00d7/000000?text=+) | 128 | `#af00d7` | 175, 0, 215 | 288.837, 1, 0.422 | | ![](https://via.placeholder.com/60x30/af00ff/000000?text=+) | 129 | `#af00ff` | 175, 0, 255 | 281.176, 1, 0.5 | | ![](https://via.placeholder.com/60x30/af5f00/000000?text=+) | 130 | `#af5f00` | 175, 95, 0 | 32.571, 1, 0.343 | | ![](https://via.placeholder.com/60x30/af5f5f/000000?text=+) | 131 | `#af5f5f` | 175, 95, 95 | 0, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/af5f87/000000?text=+) | 132 | `#af5f87` | 175, 95, 135 | 330, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/af5faf/000000?text=+) | 133 | `#af5faf` | 175, 95, 175 | 300, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/af5fd7/000000?text=+) | 134 | `#af5fd7` | 175, 95, 215 | 280, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/af5fff/000000?text=+) | 135 | `#af5fff` | 175, 95, 255 | 270, 1, 0.686 | | ![](https://via.placeholder.com/60x30/af8700/000000?text=+) | 136 | `#af8700` | 175, 135, 0 | 46.286, 1, 0.343 | | ![](https://via.placeholder.com/60x30/af875f/000000?text=+) | 137 | `#af875f` | 175, 135, 95 | 30, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/af8787/000000?text=+) | 138 | `#af8787` | 175, 135, 135 | 0, 0.2, 0.608 | | ![](https://via.placeholder.com/60x30/af87af/000000?text=+) | 139 | `#af87af` | 175, 135, 175 | 300, 0.2, 0.608 | | ![](https://via.placeholder.com/60x30/af87d7/000000?text=+) | 140 | `#af87d7` | 175, 135, 215 | 270, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/af87ff/000000?text=+) | 141 | `#af87ff` | 175, 135, 255 | 260, 1, 0.765 | | ![](https://via.placeholder.com/60x30/afaf00/000000?text=+) | 142 | `#afaf00` | 175, 175, 0 | 60, 1, 0.343 | | ![](https://via.placeholder.com/60x30/afaf5f/000000?text=+) | 143 | `#afaf5f` | 175, 175, 95 | 60, 0.333, 0.529 | | ![](https://via.placeholder.com/60x30/afaf87/000000?text=+) | 144 | `#afaf87` | 175, 175, 135 | 60, 0.2, 0.608 | | ![](https://via.placeholder.com/60x30/afafaf/000000?text=+) | 145 | `#afafaf` | 175, 175, 175 | 0, 0, 0.686 | | ![](https://via.placeholder.com/60x30/afafd7/000000?text=+) | 146 | `#afafd7` | 175, 175, 215 | 240, 0.333, 0.765 | | ![](https://via.placeholder.com/60x30/afafff/000000?text=+) | 147 | `#afafff` | 175, 175, 255 | 240, 1, 0.843 | | ![](https://via.placeholder.com/60x30/afd700/000000?text=+) | 148 | `#afd700` | 175, 215, 0 | 71.163, 1, 0.422 | | ![](https://via.placeholder.com/60x30/afd75f/000000?text=+) | 149 | `#afd75f` | 175, 215, 95 | 80, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/afd787/000000?text=+) | 150 | `#afd787` | 175, 215, 135 | 90, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/afd7af/000000?text=+) | 151 | `#afd7af` | 175, 215, 175 | 120, 0.333, 0.765 | | ![](https://via.placeholder.com/60x30/afd7d7/000000?text=+) | 152 | `#afd7d7` | 175, 215, 215 | 180, 0.333, 0.765 | | ![](https://via.placeholder.com/60x30/afd7ff/000000?text=+) | 153 | `#afd7ff` | 175, 215, 255 | 210, 1, 0.843 | | ![](https://via.placeholder.com/60x30/afff00/000000?text=+) | 154 | `#afff00` | 175, 255, 0 | 78.824, 1, 0.5 | | ![](https://via.placeholder.com/60x30/afff5f/000000?text=+) | 155 | `#afff5f` | 175, 255, 95 | 90, 1, 0.686 | | ![](https://via.placeholder.com/60x30/afff87/000000?text=+) | 156 | `#afff87` | 175, 255, 135 | 100, 1, 0.765 | | ![](https://via.placeholder.com/60x30/afffaf/000000?text=+) | 157 | `#afffaf` | 175, 255, 175 | 120, 1, 0.843 | | ![](https://via.placeholder.com/60x30/afffd7/000000?text=+) | 158 | `#afffd7` | 175, 255, 215 | 150, 1, 0.843 | | ![](https://via.placeholder.com/60x30/afffff/000000?text=+) | 159 | `#afffff` | 175, 255, 255 | 180, 1, 0.843 | | ![](https://via.placeholder.com/60x30/d70000/000000?text=+) | 160 | `#d70000` | 215, 0, 0 | 0, 1, 0.422 | | ![](https://via.placeholder.com/60x30/d7005f/000000?text=+) | 161 | `#d7005f` | 215, 0, 95 | 333.488, 1, 0.422 | | ![](https://via.placeholder.com/60x30/d70087/000000?text=+) | 162 | `#d70087` | 215, 0, 135 | 322.326, 1, 0.422 | | ![](https://via.placeholder.com/60x30/d700af/000000?text=+) | 163 | `#d700af` | 215, 0, 175 | 311.163, 1, 0.422 | | ![](https://via.placeholder.com/60x30/d700d7/000000?text=+) | 164 | `#d700d7` | 215, 0, 215 | 300, 1, 0.422 | | ![](https://via.placeholder.com/60x30/d700ff/000000?text=+) | 165 | `#d700ff` | 215, 0, 255 | 290.588, 1, 0.5 | | ![](https://via.placeholder.com/60x30/d75f00/000000?text=+) | 166 | `#d75f00` | 215, 95, 0 | 26.512, 1, 0.422 | | ![](https://via.placeholder.com/60x30/d75f5f/000000?text=+) | 167 | `#d75f5f` | 215, 95, 95 | 0, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/d75f87/000000?text=+) | 168 | `#d75f87` | 215, 95, 135 | 340, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/d75faf/000000?text=+) | 169 | `#d75faf` | 215, 95, 175 | 320, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/d75fd7/000000?text=+) | 170 | `#d75fd7` | 215, 95, 215 | 300, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/d75fff/000000?text=+) | 171 | `#d75fff` | 215, 95, 255 | 285, 1, 0.686 | | ![](https://via.placeholder.com/60x30/d78700/000000?text=+) | 172 | `#d78700` | 215, 135, 0 | 37.674, 1, 0.422 | | ![](https://via.placeholder.com/60x30/d7875f/000000?text=+) | 173 | `#d7875f` | 215, 135, 95 | 20, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/d78787/000000?text=+) | 174 | `#d78787` | 215, 135, 135 | 0, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/d787af/000000?text=+) | 175 | `#d787af` | 215, 135, 175 | 330, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/d787d7/000000?text=+) | 176 | `#d787d7` | 215, 135, 215 | 300, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/d787ff/000000?text=+) | 177 | `#d787ff` | 215, 135, 255 | 280, 1, 0.765 | | ![](https://via.placeholder.com/60x30/d7af00/000000?text=+) | 178 | `#d7af00` | 215, 175, 0 | 48.837, 1, 0.422 | | ![](https://via.placeholder.com/60x30/d7af5f/000000?text=+) | 179 | `#d7af5f` | 215, 175, 95 | 40, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/d7af87/000000?text=+) | 180 | `#d7af87` | 215, 175, 135 | 30, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/d7afaf/000000?text=+) | 181 | `#d7afaf` | 215, 175, 175 | 0, 0.333, 0.765 | | ![](https://via.placeholder.com/60x30/d7afd7/000000?text=+) | 182 | `#d7afd7` | 215, 175, 215 | 300, 0.333, 0.765 | | ![](https://via.placeholder.com/60x30/d7afff/000000?text=+) | 183 | `#d7afff` | 215, 175, 255 | 270, 1, 0.843 | | ![](https://via.placeholder.com/60x30/d7d700/000000?text=+) | 184 | `#d7d700` | 215, 215, 0 | 60, 1, 0.422 | | ![](https://via.placeholder.com/60x30/d7d75f/000000?text=+) | 185 | `#d7d75f` | 215, 215, 95 | 60, 0.6, 0.608 | | ![](https://via.placeholder.com/60x30/d7d787/000000?text=+) | 186 | `#d7d787` | 215, 215, 135 | 60, 0.5, 0.686 | | ![](https://via.placeholder.com/60x30/d7d7af/000000?text=+) | 187 | `#d7d7af` | 215, 215, 175 | 60, 0.333, 0.765 | | ![](https://via.placeholder.com/60x30/d7d7d7/000000?text=+) | 188 | `#d7d7d7` | 215, 215, 215 | 0, 0, 0.843 | | ![](https://via.placeholder.com/60x30/d7d7ff/000000?text=+) | 189 | `#d7d7ff` | 215, 215, 255 | 240, 1, 0.922 | | ![](https://via.placeholder.com/60x30/d7ff00/000000?text=+) | 190 | `#d7ff00` | 215, 255, 0 | 69.412, 1, 0.5 | | ![](https://via.placeholder.com/60x30/d7ff5f/000000?text=+) | 191 | `#d7ff5f` | 215, 255, 95 | 75, 1, 0.686 | | ![](https://via.placeholder.com/60x30/d7ff87/000000?text=+) | 192 | `#d7ff87` | 215, 255, 135 | 80, 1, 0.765 | | ![](https://via.placeholder.com/60x30/d7ffaf/000000?text=+) | 193 | `#d7ffaf` | 215, 255, 175 | 90, 1, 0.843 | | ![](https://via.placeholder.com/60x30/d7ffd7/000000?text=+) | 194 | `#d7ffd7` | 215, 255, 215 | 120, 1, 0.922 | | ![](https://via.placeholder.com/60x30/d7ffff/000000?text=+) | 195 | `#d7ffff` | 215, 255, 255 | 180, 1, 0.922 | | ![](https://via.placeholder.com/60x30/ff0000/000000?text=+) | 196 | `#ff0000` | 255, 0, 0 | 0, 1, 0.5 | | ![](https://via.placeholder.com/60x30/ff005f/000000?text=+) | 197 | `#ff005f` | 255, 0, 95 | 337.647, 1, 0.5 | | ![](https://via.placeholder.com/60x30/ff0087/000000?text=+) | 198 | `#ff0087` | 255, 0, 135 | 328.235, 1, 0.5 | | ![](https://via.placeholder.com/60x30/ff00af/000000?text=+) | 199 | `#ff00af` | 255, 0, 175 | 318.824, 1, 0.5 | | ![](https://via.placeholder.com/60x30/ff00d7/000000?text=+) | 200 | `#ff00d7` | 255, 0, 215 | 309.412, 1, 0.5 | | ![](https://via.placeholder.com/60x30/ff00ff/000000?text=+) | 201 | `#ff00ff` | 255, 0, 255 | 300, 1, 0.5 | | ![](https://via.placeholder.com/60x30/ff5f00/000000?text=+) | 202 | `#ff5f00` | 255, 95, 0 | 22.353, 1, 0.5 | | ![](https://via.placeholder.com/60x30/ff5f5f/000000?text=+) | 203 | `#ff5f5f` | 255, 95, 95 | 0, 1, 0.686 | | ![](https://via.placeholder.com/60x30/ff5f87/000000?text=+) | 204 | `#ff5f87` | 255, 95, 135 | 345, 1, 0.686 | | ![](https://via.placeholder.com/60x30/ff5faf/000000?text=+) | 205 | `#ff5faf` | 255, 95, 175 | 330, 1, 0.686 | | ![](https://via.placeholder.com/60x30/ff5fd7/000000?text=+) | 206 | `#ff5fd7` | 255, 95, 215 | 315, 1, 0.686 | | ![](https://via.placeholder.com/60x30/ff5fff/000000?text=+) | 207 | `#ff5fff` | 255, 95, 255 | 300, 1, 0.686 | | ![](https://via.placeholder.com/60x30/ff8700/000000?text=+) | 208 | `#ff8700` | 255, 135, 0 | 31.765, 1, 0.5 | | ![](https://via.placeholder.com/60x30/ff875f/000000?text=+) | 209 | `#ff875f` | 255, 135, 95 | 15, 1, 0.686 | | ![](https://via.placeholder.com/60x30/ff8787/000000?text=+) | 210 | `#ff8787` | 255, 135, 135 | 0, 1, 0.765 | | ![](https://via.placeholder.com/60x30/ff87af/000000?text=+) | 211 | `#ff87af` | 255, 135, 175 | 340, 1, 0.765 | | ![](https://via.placeholder.com/60x30/ff87d7/000000?text=+) | 212 | `#ff87d7` | 255, 135, 215 | 320, 1, 0.765 | | ![](https://via.placeholder.com/60x30/ff87ff/000000?text=+) | 213 | `#ff87ff` | 255, 135, 255 | 300, 1, 0.765 | | ![](https://via.placeholder.com/60x30/ffaf00/000000?text=+) | 214 | `#ffaf00` | 255, 175, 0 | 41.176, 1, 0.5 | | ![](https://via.placeholder.com/60x30/ffaf5f/000000?text=+) | 215 | `#ffaf5f` | 255, 175, 95 | 30, 1, 0.686 | | ![](https://via.placeholder.com/60x30/ffaf87/000000?text=+) | 216 | `#ffaf87` | 255, 175, 135 | 20, 1, 0.765 | | ![](https://via.placeholder.com/60x30/ffafaf/000000?text=+) | 217 | `#ffafaf` | 255, 175, 175 | 0, 1, 0.843 | | ![](https://via.placeholder.com/60x30/ffafd7/000000?text=+) | 218 | `#ffafd7` | 255, 175, 215 | 330, 1, 0.843 | | ![](https://via.placeholder.com/60x30/ffafff/000000?text=+) | 219 | `#ffafff` | 255, 175, 255 | 300, 1, 0.843 | | ![](https://via.placeholder.com/60x30/ffd700/000000?text=+) | 220 | `#ffd700` | 255, 215, 0 | 50.588, 1, 0.5 | | ![](https://via.placeholder.com/60x30/ffd75f/000000?text=+) | 221 | `#ffd75f` | 255, 215, 95 | 45, 1, 0.686 | | ![](https://via.placeholder.com/60x30/ffd787/000000?text=+) | 222 | `#ffd787` | 255, 215, 135 | 40, 1, 0.765 | | ![](https://via.placeholder.com/60x30/ffd7af/000000?text=+) | 223 | `#ffd7af` | 255, 215, 175 | 30, 1, 0.843 | | ![](https://via.placeholder.com/60x30/ffd7d7/000000?text=+) | 224 | `#ffd7d7` | 255, 215, 215 | 0, 1, 0.922 | | ![](https://via.placeholder.com/60x30/ffd7ff/000000?text=+) | 225 | `#ffd7ff` | 255, 215, 255 | 300, 1, 0.922 | | ![](https://via.placeholder.com/60x30/ffff00/000000?text=+) | 226 | `#ffff00` | 255, 255, 0 | 60, 1, 0.5 | | ![](https://via.placeholder.com/60x30/ffff5f/000000?text=+) | 227 | `#ffff5f` | 255, 255, 95 | 60, 1, 0.686 | | ![](https://via.placeholder.com/60x30/ffff87/000000?text=+) | 228 | `#ffff87` | 255, 255, 135 | 60, 1, 0.765 | | ![](https://via.placeholder.com/60x30/ffffaf/000000?text=+) | 229 | `#ffffaf` | 255, 255, 175 | 60, 1, 0.843 | | ![](https://via.placeholder.com/60x30/ffffd7/000000?text=+) | 230 | `#ffffd7` | 255, 255, 215 | 60, 1, 0.922 | | ![](https://via.placeholder.com/60x30/ffffff/000000?text=+) | 231 | `#ffffff` | 255, 255, 255 | 0, 0, 1 | | ![](https://via.placeholder.com/60x30/080808/000000?text=+) | 232 | `#080808` | 8, 8, 8 | 0, 0, 0.031 | | ![](https://via.placeholder.com/60x30/121212/000000?text=+) | 233 | `#121212` | 18, 18, 18 | 0, 0, 0.071 | | ![](https://via.placeholder.com/60x30/1c1c1c/000000?text=+) | 234 | `#1c1c1c` | 28, 28, 28 | 0, 0, 0.11 | | ![](https://via.placeholder.com/60x30/262626/000000?text=+) | 235 | `#262626` | 38, 38, 38 | 0, 0, 0.149 | | ![](https://via.placeholder.com/60x30/303030/000000?text=+) | 236 | `#303030` | 48, 48, 48 | 0, 0, 0.188 | | ![](https://via.placeholder.com/60x30/3a3a3a/000000?text=+) | 237 | `#3a3a3a` | 58, 58, 58 | 0, 0, 0.227 | | ![](https://via.placeholder.com/60x30/444444/000000?text=+) | 238 | `#444444` | 68, 68, 68 | 0, 0, 0.267 | | ![](https://via.placeholder.com/60x30/4e4e4e/000000?text=+) | 239 | `#4e4e4e` | 78, 78, 78 | 0, 0, 0.306 | | ![](https://via.placeholder.com/60x30/585858/000000?text=+) | 240 | `#585858` | 88, 88, 88 | 0, 0, 0.345 | | ![](https://via.placeholder.com/60x30/626262/000000?text=+) | 241 | `#626262` | 98, 98, 98 | 0, 0, 0.384 | | ![](https://via.placeholder.com/60x30/6c6c6c/000000?text=+) | 242 | `#6c6c6c` | 108, 108, 108 | 0, 0, 0.424 | | ![](https://via.placeholder.com/60x30/767676/000000?text=+) | 243 | `#767676` | 118, 118, 118 | 0, 0, 0.463 | | ![](https://via.placeholder.com/60x30/808080/000000?text=+) | 244 | `#808080` | 128, 128, 128 | 0, 0, 0.502 | | ![](https://via.placeholder.com/60x30/8a8a8a/000000?text=+) | 245 | `#8a8a8a` | 138, 138, 138 | 0, 0, 0.541 | | ![](https://via.placeholder.com/60x30/949494/000000?text=+) | 246 | `#949494` | 148, 148, 148 | 0, 0, 0.58 | | ![](https://via.placeholder.com/60x30/9e9e9e/000000?text=+) | 247 | `#9e9e9e` | 158, 158, 158 | 0, 0, 0.62 | | ![](https://via.placeholder.com/60x30/a8a8a8/000000?text=+) | 248 | `#a8a8a8` | 168, 168, 168 | 0, 0, 0.659 | | ![](https://via.placeholder.com/60x30/b2b2b2/000000?text=+) | 249 | `#b2b2b2` | 178, 178, 178 | 0, 0, 0.698 | | ![](https://via.placeholder.com/60x30/bcbcbc/000000?text=+) | 250 | `#bcbcbc` | 188, 188, 188 | 0, 0, 0.737 | | ![](https://via.placeholder.com/60x30/c6c6c6/000000?text=+) | 251 | `#c6c6c6` | 198, 198, 198 | 0, 0, 0.776 | | ![](https://via.placeholder.com/60x30/d0d0d0/000000?text=+) | 252 | `#d0d0d0` | 208, 208, 208 | 0, 0, 0.816 | | ![](https://via.placeholder.com/60x30/dadada/000000?text=+) | 253 | `#dadada` | 218, 218, 218 | 0, 0, 0.855 | | ![](https://via.placeholder.com/60x30/e4e4e4/000000?text=+) | 254 | `#e4e4e4` | 228, 228, 228 | 0, 0, 0.894 | | ![](https://via.placeholder.com/60x30/eeeeee/000000?text=+) | 255 | `#eeeeee` | 238, 238, 238 | 0, 0, 0.933 | pyglossary-5.0.9/doc/termux.md000066400000000000000000000014211476751035500163670ustar00rootroot00000000000000## Feature-specific Requirements on [Termux](https://github.com/termux/termux-app) - **Using `--remove-html-all` flag** - `apt install libxml2 libxslt` - `pip install lxml beautifulsoup4` - **Reading from FreeDict, XDXF, JMDict, AppleDict Binary (.dictionary) or CC-CEDICT** - `apt install libxml2 libxslt` - `pip install lxml` - **Reading from cc-kedict** - `apt install libxml2 libxslt` - `pip install lxml PyYAML` - **Reading or writing Aard 2 (.slob)** - `pkg install libicu` - `pip install PyICU` - **Writing to Kobo E-Reader Dictionary** - `pip install marisa-trie` - **Reading from Zim** - `apt install libzim` - `pip install libzim` - **Writing to AppleDict** - `apt install libxml2 libxslt` - `pip install lxml beautifulsoup4 html5lib` pyglossary-5.0.9/doc/tkinter.md000066400000000000000000000003721476751035500165270ustar00rootroot00000000000000## Tkinter installation - Debian/Ubuntu: `apt-get install python3-tk` - openSUSE: `zypper install python3-tk` - Fedora: `yum install python3-tkinter` - Mac OS X: `brew install tcl-tk python-tk` - Nix / NixOS: `nix-shell -p python38Packages.tkinter` pyglossary-5.0.9/help000066400000000000000000000105341476751035500146340ustar00rootroot00000000000000PyGlossary is a tool for working with dictionary databases (glossaries) Basic Usage: PyGI (Gtk3) Interface: To open PyGlossary window: ${CMD} PyGI is the default interface (so you never need to use "--ui=gtk" or --gtk option) If PyGI was not found (not installed), then PyGlossary will fallback to Tkinter. Tkinter Interface: To open PyGlossary window: ${CMD} --tk Or ${CMD} --ui=tk Usually good for Windows and Mac OS X Web Interface: To open PyGlossary window: ${CMD} --web Or ${CMD} --ui=web For OS's with a modern web browser Command-line interface: To show this help: ${CMD} --help To show program version: ${CMD} --version To Convert: ${CMD} INPUT_FILE OUTPUT_FILE To Reverse: ${CMD} INPUT_FILE OUTPUT_FILE.txt --reverse Input and output formats will be detected from extensions if possible. If not, you need to specify input or output format, for example: ${CMD} test.utf8 test.ifo --read-format=tabfile ${CMD} test.utf8 test.ifo --read-format tabfile ${CMD} test.ifo test.utf8 --write-format=tabfile ${CMD} test.ifo test.utf8 --write-format tabfile Interactive command-line interface: Minimal command: ${CMD} --cmd Or ${CMD} --ui=cmd Additionally you can pass any flag to act to act as default General Options: Verbosity: -v0 or '--verbosity 0' for critical errors only -v1 or '--verbosity 1' for errors only -v2 or '--verbosity 2' for errors and warnings -v3 or '--verbosity 3' for errors, warnings and info -v4 or '--verbosity 4' for debug mode -v5 or '--verbosity 5' for trace mode Appearance: --no-progress-bar and --no-color, useful for scripts Full Convert Usage: ${CMD} INPUT_FILE OUTPUT_FILE [-vN] [--read-format=FORMAT] [--write-format=FORMAT] [--direct|--indirect|--sqlite] [--no-alts] [--sort|--no-sort] [--sort-cache-size=2000] [--utf8-check|--no-utf8-check] [--lower|--no-lower] [--read-options=READ_OPTIONS] [--write-options=WRITE_OPTIONS] [--source-lang=LANGUAGE] [--target-lang=LANGUAGE] ['--name=GLOSSARY NAME'] Direct and indirect modes Indirect mode means the input glossary is completely read and loaded into RAM, then converted into the output format. This was the only method available in old versions (before 3.0.0). Direct mode means entries are one-at-a-time read, processed and written into the output glossary. Direct mode was added to limit the memory usage for large glossaries; But it may reduce the conversion time for most cases as well. Converting glossaries into some formats like StarDict and EPUB-2 requires sorting entries. That's why direct mode will not work for these format, and PyGlossary will use indirect mode. Otherwise direct mode will be the default. You may override this by --indirect flag. SQLite mode: As mentioned above, converting glossaries into some formats like StarDict will need them to loaded into RAM. This can be problematic if the glossary is too big to fit into RAM. That's when you should try adding --sqlite flag to your command. Then it uses SQLite as intermediate storage for sorting then fetching entries. This fixes the memory issue, and may even reduce running time of conversion (depending on your home directory storage). The temporary SQLite file is stored in ~/.cache/pyglossary/ then deleted after conversion, unless you pass --no-cleanup flag. Currently you can not disable alternates in SQLite mode (--no-alts is ignored). Command line arguments and options (and arguments for options) is parsed with GNU getopt method Compressing with gz, bz2 and zip is supported. Just append these extension to the file name, for example: ${CMD} mydic.ifo mydic.txt.gz And if the input file has these extensions (gz, bz2, zip), it will be extracted before loading pyglossary-5.0.9/main.py000077500000000000000000000002311476751035500152530ustar00rootroot00000000000000#!/usr/bin/env -S python3 -O import sys from os.path import dirname sys.path.insert(0, dirname(__file__)) from pyglossary.ui.main import main main() pyglossary-5.0.9/pkg/000077500000000000000000000000001476751035500145375ustar00rootroot00000000000000pyglossary-5.0.9/pkg/pyglossary.desktop000077500000000000000000000004021476751035500203450ustar00rootroot00000000000000#!/usr/bin/env xdg-open [Desktop Entry] Name=PyGlossary GenericName=Glossary Converter Comment=Working on glossaries Exec=pyglossary Terminal=false Type=Application StartupNotify=true Icon=pyglossary Categories=Education; X-GNOME-FullName=Glossary Converter pyglossary-5.0.9/plugins-meta/000077500000000000000000000000001476751035500163635ustar00rootroot00000000000000pyglossary-5.0.9/plugins-meta/index.json000066400000000000000000001347231476751035500203770ustar00rootroot00000000000000[ { "module": "aard2_slob", "lname": "aard2_slob", "name": "Aard2Slob", "description": "Aard 2 (.slob)", "extensions": [ ".slob" ], "singleFile": true, "optionsProp": { "compression": { "class": "StrOption", "type": "str", "customValue": false, "values": [ "", "bz2", "zlib", "lzma2" ], "comment": "Compression Algorithm" }, "content_type": { "class": "StrOption", "type": "str", "customValue": true, "values": [ "text/plain; charset=utf-8", "text/html; charset=utf-8" ], "comment": "Content Type" }, "file_size_approx": { "class": "FileSizeOption", "type": "int", "customValue": true, "comment": "split up by given approximate file size\nexamples: 100m, 1g" }, "file_size_approx_check_num_entries": { "class": "IntOption", "type": "int", "customValue": true, "comment": "for file_size_approx, check every `[?]` entries" }, "separate_alternates": { "class": "BoolOption", "type": "bool", "comment": "add alternate headwords as separate entries to slob" }, "word_title": { "class": "BoolOption", "type": "bool", "comment": "add headwords title to beginning of definition" }, "version_info": { "class": "BoolOption", "type": "bool", "comment": "add version info tags to slob file" }, "audio_goldendict": { "class": "BoolOption", "type": "bool", "comment": "Convert audio links for GoldenDict (desktop)" } }, "canRead": true, "canWrite": true, "readOptions": {}, "writeOptions": { "compression": "zlib", "content_type": "", "file_size_approx": 0, "file_size_approx_check_num_entries": 100, "separate_alternates": false, "word_title": false, "version_info": false, "audio_goldendict": false }, "readDepends": { "icu": "PyICU" }, "writeDepends": { "icu": "PyICU" } }, { "module": "almaany", "lname": "almaany", "name": "Almaany", "description": "Almaany.com (SQLite3)", "extensions": [], "singleFile": true, "optionsProp": {}, "canRead": true, "canWrite": false, "readOptions": {} }, { "module": "appledict", "lname": "appledict", "name": "AppleDict", "description": "AppleDict Source", "extensions": [ ".apple" ], "singleFile": false, "optionsProp": { "clean_html": { "class": "BoolOption", "type": "bool", "comment": "use BeautifulSoup parser" }, "css": { "class": "StrOption", "type": "str", "customValue": true, "comment": "custom .css file path" }, "xsl": { "class": "StrOption", "type": "str", "customValue": true, "comment": "custom XSL transformations file path" }, "default_prefs": { "class": "DictOption", "type": "dict", "comment": "default prefs in python dict format" }, "prefs_html": { "class": "StrOption", "type": "str", "customValue": true, "comment": "preferences XHTML file path" }, "front_back_matter": { "class": "StrOption", "type": "str", "customValue": true, "comment": "XML file path with top-level tag" }, "jing": { "class": "BoolOption", "type": "bool", "comment": "run Jing check on generated XML" }, "indexes": { "class": "StrOption", "type": "str", "customValue": false, "values": [ "", "ru", "zh" ], "comment": "Additional indexes to dictionary entries" } }, "canRead": false, "canWrite": true, "writeOptions": { "clean_html": true, "css": "", "xsl": "", "default_prefs": null, "prefs_html": "", "front_back_matter": "", "jing": false, "indexes": "" }, "writeDepends": { "lxml": "lxml", "bs4": "beautifulsoup4", "html5lib": "html5lib" } }, { "module": "appledict_bin", "lname": "appledict_bin", "name": "AppleDictBin", "description": "AppleDict Binary", "extensions": [ ".dictionary", ".data" ], "singleFile": true, "optionsProp": { "html": { "class": "BoolOption", "type": "bool", "comment": "Entries are HTML" }, "html_full": { "class": "BoolOption", "type": "bool", "comment": "Turn every entry's definition into an HTML document" } }, "canRead": true, "canWrite": false, "readOptions": { "html": true, "html_full": true }, "readDepends": { "lxml": "lxml", "biplist": "biplist" } }, { "module": "ayandict_sqlite", "lname": "ayandict_sqlite", "name": "AyanDictSQLite", "description": "AyanDict SQLite", "extensions": [], "singleFile": true, "optionsProp": { "fuzzy": { "class": "BoolOption", "type": "bool", "comment": "Create fuzzy search data" } }, "canRead": true, "canWrite": true, "readOptions": {}, "writeOptions": { "fuzzy": true } }, { "module": "babylon_bdc", "lname": "babylon_bdc", "name": "BabylonBdc", "description": "Babylon (bdc)", "extensions": [ ".bdc" ], "singleFile": true, "optionsProp": {}, "canRead": false, "canWrite": false, "enable": false }, { "module": "babylon_bgl", "lname": "babylon_bgl", "name": "BabylonBgl", "description": "Babylon (.BGL)", "extensions": [ ".bgl" ], "singleFile": true, "optionsProp": { "default_encoding_overwrite": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Default encoding (overwrite)" }, "source_encoding_overwrite": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Source encoding (overwrite)" }, "target_encoding_overwrite": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Target encoding (overwrite)" }, "part_of_speech_color": { "class": "HtmlColorOption", "type": "str", "comment": "Color for Part of Speech" }, "no_control_sequence_in_defi": { "class": "BoolOption", "type": "bool", "comment": "No control sequence in definitions" }, "strict_string_conversion": { "class": "BoolOption", "type": "bool", "comment": "Strict string conversion" }, "process_html_in_key": { "class": "BoolOption", "type": "bool", "comment": "Process HTML in (entry or info) key" }, "key_rstrip_chars": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Characters to strip from right-side of keys" }, "search_char_samples": { "class": "BoolOption", "type": "bool", "comment": "(debug) Search character samples" }, "collect_metadata2": { "class": "BoolOption", "type": "bool", "comment": "(debug) Collect second pass metadata from definitions" }, "write_gz": { "class": "BoolOption", "type": "bool", "comment": "(debug) Create a file named *-data.gz" }, "char_samples_path": { "class": "StrOption", "type": "str", "customValue": true, "comment": "(debug) File path for character samples" }, "msg_log_path": { "class": "StrOption", "type": "str", "customValue": true, "comment": "(debug) File path for message log" }, "raw_dump_path": { "class": "StrOption", "type": "str", "customValue": true, "comment": "(debug) File path for writing raw blocks" }, "unpacked_gzip_path": { "class": "StrOption", "type": "str", "customValue": true, "comment": "(debug) Path to create unzipped file" } }, "canRead": true, "canWrite": false, "readOptions": { "default_encoding_overwrite": "", "source_encoding_overwrite": "", "target_encoding_overwrite": "", "part_of_speech_color": "007000", "no_control_sequence_in_defi": false, "strict_string_conversion": false, "process_html_in_key": true, "key_rstrip_chars": "" } }, { "module": "cc_kedict", "lname": "cc_kedict", "name": "cc-kedict", "description": "cc-kedict", "extensions": [], "singleFile": true, "optionsProp": {}, "canRead": true, "canWrite": false, "readOptions": {}, "readDepends": { "yaml": "PyYAML", "lxml": "lxml" } }, { "module": "crawler_dir", "lname": "crawler_dir", "name": "CrawlerDir", "description": "Crawler Directory", "extensions": [ ".crawler" ], "singleFile": true, "optionsProp": { "compression": { "class": "StrOption", "type": "str", "customValue": false, "values": [ "", "gz", "bz2", "lzma" ], "comment": "Compression Algorithm" } }, "canRead": true, "canWrite": true, "readOptions": {}, "writeOptions": { "compression": "" } }, { "module": "csv_plugin", "lname": "csv", "name": "Csv", "description": "CSV (.csv)", "extensions": [ ".csv" ], "singleFile": true, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "newline": { "class": "NewlineOption", "type": "str", "customValue": true, "values": [ "\r\n", "\n", "\r" ], "comment": "Newline string" }, "resources": { "class": "BoolOption", "type": "bool", "comment": "Enable resources / data files" }, "delimiter": { "class": "Option", "type": "str", "customValue": true, "values": [ ",", ";", "@" ], "comment": "Column delimiter" }, "add_defi_format": { "class": "BoolOption", "type": "bool", "comment": "enable adding defiFormat (m/h/x)" }, "enable_info": { "class": "BoolOption", "type": "bool", "comment": "Enable glossary info / metedata" }, "word_title": { "class": "BoolOption", "type": "bool", "comment": "add headwords title to beginning of definition" } }, "canRead": true, "canWrite": true, "readOptions": { "encoding": "utf-8", "newline": "\n", "delimiter": "," }, "writeOptions": { "encoding": "utf-8", "newline": "\n", "resources": true, "delimiter": ",", "add_defi_format": false, "enable_info": true, "word_title": false }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "dicformids", "lname": "dicformids", "name": "Dicformids", "description": "DictionaryForMIDs", "extensions": [ ".mids" ], "singleFile": false, "optionsProp": {}, "canRead": true, "canWrite": true, "sortOnWrite": "always", "sortKeyName": "dicformids", "readOptions": {}, "writeOptions": {} }, { "module": "dict_cc", "lname": "dict_cc", "name": "Dictcc", "description": "Dict.cc (SQLite3)", "extensions": [], "singleFile": true, "optionsProp": {}, "canRead": true, "canWrite": false, "readOptions": {} }, { "module": "dict_cc_split", "lname": "dict_cc_split", "name": "Dictcc_split", "description": "Dict.cc (SQLite3) - Split", "extensions": [], "singleFile": true, "optionsProp": {}, "canRead": true, "canWrite": false, "readOptions": {} }, { "module": "dict_org", "lname": "dict_org", "name": "DictOrg", "description": "DICT.org file format (.index)", "extensions": [ ".index" ], "singleFile": false, "optionsProp": { "dictzip": { "class": "BoolOption", "type": "bool", "comment": "Compress .dict file to .dict.dz" }, "install": { "class": "BoolOption", "type": "bool", "comment": "Install dictionary to /usr/share/dictd/" } }, "canRead": true, "canWrite": true, "readOptions": {}, "writeOptions": { "dictzip": false, "install": true } }, { "module": "dict_org_source", "lname": "dict_org_source", "name": "DictOrgSource", "description": "DICT.org dictfmt source file", "extensions": [ ".dtxt" ], "singleFile": true, "optionsProp": { "remove_html_all": { "class": "BoolOption", "type": "bool", "comment": "Remove all HTML tags" } }, "canRead": false, "canWrite": true, "writeOptions": { "remove_html_all": true } }, { "module": "dictunformat", "lname": "dictunformat", "name": "Dictunformat", "description": "dictunformat output file", "extensions": [ ".dictunformat" ], "singleFile": true, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "headword_separator": { "class": "StrOption", "type": "str", "customValue": true, "comment": "separator for headword and alternates" } }, "canRead": true, "canWrite": false, "readOptions": { "encoding": "utf-8", "headword_separator": "; " }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "digitalnk", "lname": "digitalnk", "name": "DigitalNK", "description": "DigitalNK (SQLite3, N-Korean)", "extensions": [], "singleFile": true, "optionsProp": {}, "canRead": true, "canWrite": false, "readOptions": {} }, { "module": "dikt_json", "lname": "dikt_json", "name": "DiktJson", "description": "DIKT JSON (.json)", "extensions": [], "singleFile": true, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "enable_info": { "class": "BoolOption", "type": "bool", "comment": "Enable glossary info / metedata" }, "resources": { "class": "BoolOption", "type": "bool", "comment": "Enable resources / data files" }, "word_title": { "class": "BoolOption", "type": "bool", "comment": "add headwords title to beginning of definition" } }, "canRead": false, "canWrite": true, "writeOptions": { "encoding": "utf-8", "enable_info": true, "resources": true, "word_title": false } }, { "module": "dsl", "lname": "dsl", "name": "ABBYYLingvoDSL", "description": "ABBYY Lingvo DSL (.dsl)", "extensions": [ ".dsl" ], "singleFile": true, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "audio": { "class": "BoolOption", "type": "bool", "comment": "Enable audio objects" }, "example_color": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Examples color" }, "abbrev": { "class": "StrOption", "type": "str", "customValue": false, "values": [ "", "hover" ], "comment": "Load and apply abbreviation file (`_abrv.dsl`)" } }, "canRead": true, "canWrite": false, "readOptions": { "encoding": "", "audio": true, "example_color": "steelblue", "abbrev": "hover" }, "readCompressions": [ "gz", "bz2", "lzma", "dz" ] }, { "module": "ebook_epub2", "lname": "epub2", "name": "Epub2", "description": "EPUB-2 E-Book", "extensions": [ ".epub" ], "singleFile": true, "optionsProp": { "group_by_prefix_length": { "class": "IntOption", "type": "int", "customValue": true, "comment": "Prefix length for grouping" }, "compress": { "class": "BoolOption", "type": "bool", "comment": "Enable compression" }, "keep": { "class": "BoolOption", "type": "bool", "comment": "Keep temp files" }, "include_index_page": { "class": "BoolOption", "type": "bool", "comment": "Include index page" }, "css": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Path to css file" }, "cover_path": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Path to cover file" } }, "canRead": false, "canWrite": true, "sortOnWrite": "always", "sortKeyName": "ebook", "writeOptions": { "keep": false, "group_by_prefix_length": 2, "include_index_page": false, "compress": true, "css": "", "cover_path": "" } }, { "module": "ebook_kobo", "lname": "kobo", "name": "Kobo", "description": "Kobo E-Reader Dictionary", "extensions": [ ".kobo" ], "singleFile": false, "optionsProp": {}, "canRead": false, "canWrite": true, "sortOnWrite": "never", "writeOptions": {}, "writeDepends": { "marisa_trie": "marisa-trie" } }, { "module": "ebook_kobo_dictfile", "lname": "kobo_dictfile", "name": "Dictfile", "description": "Kobo E-Reader Dictfile (.df)", "extensions": [ ".df" ], "singleFile": true, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "extract_inline_images": { "class": "BoolOption", "type": "bool", "comment": "Extract inline images" } }, "canRead": true, "canWrite": true, "readOptions": { "encoding": "utf-8", "extract_inline_images": true }, "writeOptions": { "encoding": "utf-8" }, "readDepends": { "mistune": "mistune==3.0.1" }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "ebook_mobi", "lname": "mobi", "name": "Mobi", "description": "Mobipocket (.mobi) E-Book", "extensions": [ ".mobi" ], "singleFile": false, "optionsProp": { "group_by_prefix_length": { "class": "IntOption", "type": "int", "customValue": true, "comment": "Prefix length for grouping" }, "kindlegen_path": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Path to kindlegen executable" }, "compress": { "class": "BoolOption", "type": "bool", "comment": "Enable compression", "disabled": true }, "keep": { "class": "BoolOption", "type": "bool", "comment": "Keep temp files" }, "include_index_page": { "class": "BoolOption", "type": "bool", "comment": "Include index page", "disabled": true }, "css": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Path to css file" }, "cover_path": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Path to cover file" }, "file_size_approx": { "class": "FileSizeOption", "type": "int", "customValue": true, "comment": "Approximate size of each xhtml file (example: 200kb)" }, "hide_word_index": { "class": "BoolOption", "type": "bool", "comment": "Hide headword in tap-to-check interface" }, "spellcheck": { "class": "BoolOption", "type": "bool", "comment": "Enable wildcard search and spell correction during word lookup" }, "exact": { "class": "BoolOption", "type": "bool", "comment": "Exact-match Parameter" } }, "canRead": false, "canWrite": true, "sortOnWrite": "default_yes", "sortKeyName": "ebook", "writeOptions": { "keep": false, "group_by_prefix_length": 2, "css": "", "cover_path": "", "kindlegen_path": "", "file_size_approx": 271360, "hide_word_index": false, "spellcheck": true, "exact": false } }, { "module": "edict2", "lname": "edict2", "name": "EDICT2", "description": "EDICT2 (CEDICT) (.u8)", "extensions": [ ".u8" ], "singleFile": true, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "traditional_title": { "class": "BoolOption", "type": "bool", "comment": "Use traditional Chinese for entry titles/keys" }, "colorize_tones": { "class": "BoolOption", "type": "bool", "comment": "Set to false to disable tones coloring" } }, "canRead": true, "canWrite": false, "readOptions": { "encoding": "utf-8", "traditional_title": false, "colorize_tones": true }, "readDepends": { "lxml": "lxml" } }, { "module": "edlin", "lname": "edlin", "name": "Edlin", "description": "EDLIN", "extensions": [ ".edlin" ], "singleFile": false, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "prev_link": { "class": "BoolOption", "type": "bool", "comment": "Enable link to previous entry" } }, "canRead": true, "canWrite": true, "readOptions": { "encoding": "utf-8" }, "writeOptions": { "encoding": "utf-8", "prev_link": true } }, { "module": "freedict", "lname": "freedict", "name": "FreeDict", "description": "FreeDict (.tei)", "extensions": [ ".tei" ], "singleFile": true, "optionsProp": { "resources": { "class": "BoolOption", "type": "bool", "comment": "Enable resources / data files" }, "discover": { "class": "BoolOption", "type": "bool", "comment": "Find and show unsupported tags" }, "auto_rtl": { "class": "BoolOption", "type": "bool", "comment": "Auto-detect and mark Right-to-Left text" }, "auto_comma": { "class": "BoolOption", "type": "bool", "comment": "Auto-detect comma sign based on text" }, "comma": { "class": "StrOption", "type": "str", "customValue": true, "values": [ ", ", "\u060c " ], "comment": "Comma sign (following space) to use as separator" }, "word_title": { "class": "BoolOption", "type": "bool", "comment": "Add headwords title to beginning of definition" }, "pron_color": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Pronunciation color" }, "gram_color": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Grammar color" }, "example_padding": { "class": "IntOption", "type": "int", "customValue": true, "comment": "Padding for examples (in px)" } }, "canRead": true, "canWrite": false, "readOptions": { "discover": false, "auto_rtl": null, "auto_comma": true, "comma": ", ", "word_title": false, "pron_color": "gray", "gram_color": "green", "example_padding": 10 }, "readDepends": { "lxml": "lxml" }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "gettext_po", "lname": "gettext_po", "name": "GettextPo", "description": "Gettext Source (.po)", "extensions": [ ".po" ], "singleFile": true, "optionsProp": { "resources": { "class": "BoolOption", "type": "bool", "comment": "Enable resources / data files" } }, "canRead": true, "canWrite": true, "readOptions": {}, "writeOptions": { "resources": true }, "readDepends": { "polib": "polib" }, "writeDepends": { "polib": "polib" } }, { "module": "html_dir", "lname": "html_dir", "name": "HtmlDir", "description": "HTML Directory", "extensions": [ ".hdir" ], "singleFile": false, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "resources": { "class": "BoolOption", "type": "bool", "comment": "Enable resources / data files" }, "max_file_size": { "class": "IntOption", "type": "int", "customValue": true, "comment": "Maximum file size in bytes" }, "filename_format": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Filename format, default: {n:05d}.html" }, "escape_defi": { "class": "BoolOption", "type": "bool", "comment": "Escape definitions" }, "dark": { "class": "BoolOption", "type": "bool", "comment": "Use dark style" }, "css": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Path to css file" }, "word_title": { "class": "BoolOption", "type": "bool", "comment": "Add headwords title to beginning of definition" } }, "canRead": false, "canWrite": true, "writeOptions": { "encoding": "utf-8", "resources": true, "max_file_size": 102400, "filename_format": "{n:05d}.html", "escape_defi": false, "dark": true, "css": "", "word_title": true } }, { "module": "info_plugin", "lname": "info", "name": "Info", "description": "Glossary Info (.info)", "extensions": [ ".info" ], "singleFile": true, "optionsProp": {}, "canRead": true, "canWrite": true, "readOptions": {}, "writeOptions": {} }, { "module": "jmdict", "lname": "jmdict", "name": "JMDict", "description": "JMDict (xml)", "extensions": [], "singleFile": true, "optionsProp": { "example_color": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Examples color" }, "example_padding": { "class": "IntOption", "type": "int", "customValue": true, "comment": "Padding for examples (in px)" }, "translitation": { "class": "BoolOption", "type": "bool", "comment": "Add translitation (romaji) of keywords" } }, "canRead": true, "canWrite": false, "readOptions": { "example_padding": 10, "example_color": "", "translitation": false }, "readDepends": { "lxml": "lxml" }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "jmnedict", "lname": "jmnedict", "name": "JMnedict", "description": "JMnedict", "extensions": [], "singleFile": true, "optionsProp": {}, "canRead": true, "canWrite": false, "readOptions": {}, "readDepends": { "lxml": "lxml" }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "json_plugin", "lname": "json", "name": "Json", "description": "JSON (.json)", "extensions": [ ".json" ], "singleFile": true, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "enable_info": { "class": "BoolOption", "type": "bool", "comment": "Enable glossary info / metedata" }, "resources": { "class": "BoolOption", "type": "bool", "comment": "Enable resources / data files" }, "word_title": { "class": "BoolOption", "type": "bool", "comment": "add headwords title to beginning of definition" } }, "canRead": false, "canWrite": true, "writeOptions": { "encoding": "utf-8", "enable_info": true, "resources": true, "word_title": false } }, { "module": "lingoes_ldf", "lname": "lingoes_ldf", "name": "LingoesLDF", "description": "Lingoes Source (.ldf)", "extensions": [ ".ldf" ], "singleFile": true, "optionsProp": { "newline": { "class": "NewlineOption", "type": "str", "customValue": true, "values": [ "\r\n", "\n", "\r" ], "comment": "Newline string" }, "resources": { "class": "BoolOption", "type": "bool", "comment": "Enable resources / data files" }, "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" } }, "canRead": true, "canWrite": true, "readOptions": { "encoding": "utf-8" }, "writeOptions": { "newline": "\n", "resources": true }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "makindo_medical", "lname": "makindo_medical", "name": "MakindoMedical", "description": "Makindo Medical Reference (SQLite3)", "extensions": [], "singleFile": true, "optionsProp": {}, "canRead": true, "canWrite": false, "readOptions": {} }, { "module": "octopus_mdict_new", "lname": "octopus_mdict", "name": "OctopusMdict", "description": "Octopus MDict (.mdx)", "extensions": [ ".mdx" ], "singleFile": false, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "substyle": { "class": "BoolOption", "type": "bool", "comment": "Enable substyle" }, "same_dir_data_files": { "class": "BoolOption", "type": "bool", "comment": "Read data files from same directory" }, "audio": { "class": "BoolOption", "type": "bool", "comment": "Enable audio objects" } }, "canRead": true, "canWrite": false, "readOptions": { "encoding": "", "substyle": true, "same_dir_data_files": false, "audio": false } }, { "module": "quickdic6", "lname": "quickdic6", "name": "QuickDic6", "description": "QuickDic version 6 (.quickdic)", "extensions": [ ".quickdic", ".quickdic.v006.zip" ], "singleFile": true, "optionsProp": { "normalizer_rules": { "class": "StrOption", "type": "str", "customValue": true, "comment": "ICU normalizer rules to use for index sorting" } }, "canRead": true, "canWrite": true, "sortOnWrite": "never", "readOptions": {}, "writeOptions": { "normalizer_rules": "" }, "readDepends": { "icu": "PyICU" } }, { "module": "sql", "lname": "sql", "name": "Sql", "description": "SQL (.sql)", "extensions": [ ".sql" ], "singleFile": true, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "info_keys": { "class": "ListOption", "type": "list", "comment": "List of dbinfo table columns" }, "add_extra_info": { "class": "BoolOption", "type": "bool", "comment": "Create dbinfo_extra table" }, "newline": { "class": "NewlineOption", "type": "str", "customValue": true, "values": [ "\r\n", "\n", "\r" ], "comment": "Newline string" }, "transaction": { "class": "BoolOption", "type": "bool", "comment": "Use TRANSACTION" } }, "canRead": false, "canWrite": true, "writeOptions": { "encoding": "utf-8", "info_keys": null, "add_extra_info": true, "newline": "
", "transaction": false } }, { "module": "stardict", "lname": "stardict", "name": "Stardict", "description": "StarDict (.ifo)", "extensions": [ ".ifo" ], "singleFile": false, "optionsProp": { "large_file": { "class": "BoolOption", "type": "bool", "comment": "Use idxoffsetbits=64 bits, for large files only" }, "stardict_client": { "class": "BoolOption", "type": "bool", "comment": "Modify html entries for StarDict 3.0" }, "dictzip": { "class": "BoolOption", "type": "bool", "comment": "Compress .dict file to .dict.dz" }, "sametypesequence": { "class": "StrOption", "type": "str", "customValue": false, "values": [ "", "h", "m", "x", null ], "comment": "Definition format: h=html, m=plaintext, x=xdxf" }, "xdxf_to_html": { "class": "BoolOption", "type": "bool", "comment": "Convert XDXF entries to HTML" }, "xsl": { "class": "BoolOption", "type": "bool", "comment": "Use XSL transformation" }, "unicode_errors": { "class": "StrOption", "type": "str", "customValue": false, "values": [ "strict", "ignore", "replace", "backslashreplace" ], "comment": "What to do with Unicode decoding errors" }, "audio_goldendict": { "class": "BoolOption", "type": "bool", "comment": "Convert audio links for GoldenDict (desktop)" }, "audio_icon": { "class": "BoolOption", "type": "bool", "comment": "Add glossary's audio icon" }, "sqlite": { "class": "BoolOption", "type": "bool", "comment": "Use SQLite to limit memory usage. Default depends on global SQLite mode." } }, "canRead": true, "canWrite": true, "sortOnWrite": "always", "sortKeyName": "stardict", "readOptions": { "xdxf_to_html": true, "xsl": false, "unicode_errors": "strict" }, "writeOptions": { "large_file": false, "dictzip": true, "sametypesequence": "", "stardict_client": false, "audio_goldendict": false, "audio_icon": true, "sqlite": null } }, { "module": "stardict_merge_syns", "lname": "stardict_merge_syns", "name": "StardictMergeSyns", "description": "StarDict (Merge Syns)", "extensions": [], "singleFile": false, "optionsProp": { "large_file": { "class": "BoolOption", "type": "bool", "comment": "Use idxoffsetbits=64 bits, for large files only" }, "dictzip": { "class": "BoolOption", "type": "bool", "comment": "Compress .dict file to .dict.dz" }, "sametypesequence": { "class": "StrOption", "type": "str", "customValue": false, "values": [ "", "h", "m", "x", null ], "comment": "Definition format: h=html, m=plaintext, x=xdxf" }, "xdxf_to_html": { "class": "BoolOption", "type": "bool", "comment": "Convert XDXF entries to HTML" }, "xsl": { "class": "BoolOption", "type": "bool", "comment": "Use XSL transformation" }, "unicode_errors": { "class": "StrOption", "type": "str", "customValue": false, "values": [ "strict", "ignore", "replace", "backslashreplace" ], "comment": "What to do with Unicode decoding errors" }, "audio_icon": { "class": "BoolOption", "type": "bool", "comment": "Add glossary's audio icon" }, "sqlite": { "class": "BoolOption", "type": "bool", "comment": "Use SQLite to limit memory usage. Default depends on global SQLite mode." } }, "canRead": false, "canWrite": true, "sortOnWrite": "always", "sortKeyName": "stardict", "writeOptions": { "large_file": false, "dictzip": true, "sametypesequence": "", "audio_icon": true, "sqlite": null } }, { "module": "stardict_textual", "lname": "stardict_textual", "name": "StardictTextual", "description": "StarDict Textual File (.xml)", "extensions": [], "singleFile": true, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "xdxf_to_html": { "class": "BoolOption", "type": "bool", "comment": "Convert XDXF entries to HTML" } }, "canRead": true, "canWrite": true, "sortKeyName": "stardict", "readOptions": { "encoding": "utf-8", "xdxf_to_html": true }, "writeOptions": { "encoding": "utf-8" }, "readDepends": { "lxml": "lxml" }, "writeDepends": { "lxml": "lxml" }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "tabfile", "lname": "tabfile", "name": "Tabfile", "description": "Tabfile (.txt, .dic)", "extensions": [ ".txt", ".tab", ".tsv" ], "singleFile": true, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" }, "enable_info": { "class": "BoolOption", "type": "bool", "comment": "Enable glossary info / metedata" }, "resources": { "class": "BoolOption", "type": "bool", "comment": "Enable resources / data files" }, "file_size_approx": { "class": "FileSizeOption", "type": "int", "customValue": true, "comment": "Split up by given approximate file size\nexamples: 100m, 1g" }, "word_title": { "class": "BoolOption", "type": "bool", "comment": "Add headwords title to beginning of definition" } }, "canRead": true, "canWrite": true, "readOptions": { "encoding": "utf-8" }, "writeOptions": { "encoding": "utf-8", "enable_info": true, "resources": true, "file_size_approx": 0, "word_title": false }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "testformat", "lname": "testformat", "name": "Test", "description": "Test Format File(.test)", "extensions": [ ".test", ".tst" ], "singleFile": true, "optionsProp": {}, "canRead": true, "canWrite": true, "readOptions": {}, "writeOptions": {}, "enable": false }, { "module": "wiktextract", "lname": "wiktextract", "name": "Wiktextract", "description": "Wiktextract (.jsonl)", "extensions": [ ".jsonl" ], "singleFile": true, "optionsProp": { "resources": { "class": "BoolOption", "type": "bool", "comment": "Enable resources / data files" }, "word_title": { "class": "BoolOption", "type": "bool", "comment": "Add headwords title to beginning of definition" }, "pron_color": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Pronunciation color" }, "gram_color": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Grammar color" }, "example_padding": { "class": "StrOption", "type": "str", "customValue": true, "comment": "Padding for examples (css value)" }, "audio": { "class": "BoolOption", "type": "bool", "comment": "Enable audio" }, "audio_formats": { "class": "ListOption", "type": "list", "comment": "List of audio formats to use" }, "categories": { "class": "BoolOption", "type": "bool", "comment": "Enable categories" } }, "canRead": true, "canWrite": false, "readOptions": { "word_title": false, "pron_color": "gray", "gram_color": "green", "example_padding": "10px 20px", "audio": true, "audio_formats": [ "ogg", "mp3" ], "categories": false }, "readDepends": { "lxml": "lxml" }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "wordnet", "lname": "wordnet", "name": "Wordnet", "description": "WordNet", "extensions": [], "singleFile": false, "optionsProp": {}, "canRead": true, "canWrite": false, "readOptions": {} }, { "module": "wordset", "lname": "wordset", "name": "Wordset", "description": "Wordset.org JSON directory", "extensions": [], "singleFile": false, "optionsProp": { "encoding": { "class": "EncodingOption", "type": "str", "customValue": true, "comment": "Encoding/charset" } }, "canRead": true, "canWrite": false, "readOptions": { "encoding": "utf-8" } }, { "module": "xdxf", "lname": "xdxf", "name": "Xdxf", "description": "XDXF (.xdxf)", "extensions": [ ".xdxf" ], "singleFile": true, "optionsProp": { "html": { "class": "BoolOption", "type": "bool", "comment": "Entries are HTML" }, "xsl": { "class": "BoolOption", "type": "bool", "comment": "Use XSL transformation" } }, "canRead": true, "canWrite": false, "readOptions": { "html": true, "xsl": false }, "readDepends": { "lxml": "lxml" }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "xdxf_css", "lname": "xdxf_css", "name": "XdxfCss", "description": "XDXF with CSS and JS", "extensions": [], "singleFile": true, "optionsProp": { "html": { "class": "BoolOption", "type": "bool", "comment": "Entries are HTML" } }, "canRead": true, "canWrite": false, "readOptions": { "html": true }, "readDepends": { "lxml": "lxml" }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "xdxf_lax", "lname": "xdxf_lax", "name": "XdxfLax", "description": "XDXF Lax (.xdxf)", "extensions": [], "singleFile": true, "optionsProp": { "html": { "class": "BoolOption", "type": "bool", "comment": "Entries are HTML" }, "xsl": { "class": "BoolOption", "type": "bool", "comment": "Use XSL transformation" } }, "canRead": true, "canWrite": false, "readOptions": { "html": true, "xsl": false }, "readDepends": { "lxml": "lxml" }, "readCompressions": [ "gz", "bz2", "lzma" ] }, { "module": "yomichan", "lname": "yomichan", "name": "Yomichan", "description": "Yomichan (.zip)", "extensions": [ ".zip" ], "singleFile": true, "optionsProp": { "term_bank_size": { "class": "IntOption", "type": "int", "customValue": true, "comment": "The number of terms in each term bank json file." }, "term_from_headword_only": { "class": "BoolOption", "type": "bool", "comment": "If set to true, only create a term for the headword for each entry, as opposed to create one term for each alternate word. If the headword is ignored by the `ignore_word_with_pattern` option, the next word in the alternate list that is not ignored is used as headword." }, "no_term_from_reading": { "class": "BoolOption", "type": "bool", "comment": "When there are multiple alternate words, don't create term for the one that is the same as the the reading form, which is chosen to be the first alternate forms that consists solely of Hiragana and Katakana. For example, an entry could contain both '\u3060\u3044\u304c\u304f' and '\u5927\u5b66' as alternate words. Setting this option to true would prevent a term to be created for the former." }, "delete_word_pattern": { "class": "StrOption", "type": "str", "customValue": true, "comment": "When given, all non-overlapping matches of this regular expression are removed from word strings. For example, if an entry has word '\u3042\u307e\u00b7\u3044', setting the pattern to `\u00b7` removes all center dots, or more precisely use `\u00b7(?=[\\u3040-\\u309F])` to only remove center dots that precede Hiragana characters. Either way, the original word is replaced with '\u3042\u307e\u3044'." }, "ignore_word_with_pattern": { "class": "StrOption", "type": "str", "customValue": true, "comment": "When given, don't create terms for a word if any of its substrings matches this regular expression. For example, an entry could contain both '\u3060\u3044\u304c\u304f\u3010\u5927\u5b66\u3011' and '\u5927\u5b66' as alternate words. Setting this option with value `r'\u3010.+\u3011'` would prevent a term to be created for the former." }, "alternates_from_word_pattern": { "class": "StrOption", "type": "str", "customValue": true, "comment": "When given, the regular expression is used to find additional alternate words for the same entry from matching substrings in the original words. If there are no capturing groups in the regular expression, then all matched substrings are added to the list of alternate words. If there are capturing groups, then substrings matching the groups are added to the alternate words list instead. For example, if an entry has '\u3060\u3044\u304c\u304f\u3010\u5927\u5b66\u3011' as a word, then `\\w+(?=\u3010)` adds '\u3060\u3044\u304c\u304f' as an additional word, while `(\\w+)\u3010(\\w+)\u3011` adds both '\u3060\u3044\u304c\u304f' and '\u5927\u5b66'." }, "alternates_from_defi_pattern": { "class": "StrOption", "type": "str", "customValue": true, "comment": "When given, the regular expression is used to find additional alternate words for the same entry from matching substrings in the definition. `^` and `$` can be used to match start and end of lines, respectively. If there are no capturing groups in the regular expression, then all matched substrings are added to the list of alternate words. If there are capturing groups, then substrings matching the groups are added to the alternate words list instead. For example, if an entry has '\u3060\u3044\u304c\u304f\u3010\u5927\u5b66\u3011' in its definition, then `\\w+\u3010(\\w+)\u3011` adds '\u5927\u5b66' as an additional word." }, "rule_v1_defi_pattern": { "class": "StrOption", "type": "str", "customValue": true, "comment": "When given, if any substring of an entry's definition matches this regular expression, then the term(s) created from entry are labeled as ichidan verb. Yomichan uses this information to match conjugated forms of words. `^` and `$` can be used to match start and end of lines, respectively. For example, setting this option to `^\\(\u52d5[\u4e0a\u4e0b]\u4e00\\)$` identifies entries where there's a line of '(\u52d5\u4e0a\u4e00)' or '(\u52d5\u4e0b\u4e00)'." }, "rule_v5_defi_pattern": { "class": "StrOption", "type": "str", "customValue": true, "comment": "When given, if any substring of an entry's definition matches this regular expression, then the term(s) created from entry are labeled as godan verb. Yomichan uses this information to match conjugated forms of words. `^` and `$` can be used to match start and end of lines, respectively. For example, setting this option to `^\\(\u52d5\u4e94\\)$` identifies entries where there's a line of '(\u52d5\u4e94)'." }, "rule_vs_defi_pattern": { "class": "StrOption", "type": "str", "customValue": true, "comment": "When given, if any substring of an entry's definition matches this regular expression, then the term(s) created from entry are labeled as suru verb. Yomichan uses this information to match conjugated forms of words. `^` and `$` can be used to match start and end of lines, respectively. For example, setting this option to `^\u30b9\u30eb$` identifies entries where there's a line of '\u30b9\u30eb'." }, "rule_vk_defi_pattern": { "class": "StrOption", "type": "str", "customValue": true, "comment": "When given, if any substring of an entry's definition matches this regular expression, then the term(s) created from entry are labeled as kuru verb. Yomichan uses this information to match conjugated forms of words. `^` and `$` can be used to match start and end of lines, respectively. For example, setting this option to `^\\(\u52d5\u30ab\u5909\\)$` identifies entries where there's a line of '(\u52d5\u30ab\u5909)'." }, "rule_adji_defi_pattern": { "class": "StrOption", "type": "str", "customValue": true, "comment": "When given, if any substring of an entry's definition matches this regular expression, then the term(s) created from entry are labeled as i-adjective. Yomichan uses this information to match conjugated forms of words. `^` and `$` can be used to match start and end of lines, respectively. For example, setting this option to `r'^\\(\u5f62\\)$'` identify entries where there's a line of '(\u5f62)'." } }, "canRead": false, "canWrite": true, "sortOnWrite": "always", "sortKeyName": "headword", "writeOptions": { "term_bank_size": 10000, "term_from_headword_only": true, "no_term_from_reading": true, "delete_word_pattern": "", "ignore_word_with_pattern": "", "alternates_from_word_pattern": "", "alternates_from_defi_pattern": "", "rule_v1_defi_pattern": "", "rule_v5_defi_pattern": "", "rule_vs_defi_pattern": "", "rule_vk_defi_pattern": "", "rule_adji_defi_pattern": "" }, "writeDepends": { "bs4": "beautifulsoup4" } }, { "module": "zimfile", "lname": "zim", "name": "Zim", "description": "Zim (.zim, for Kiwix)", "extensions": [ ".zim" ], "singleFile": true, "optionsProp": { "text_unicode_errors": { "class": "UnicodeErrorsOption", "type": "str", "comment": "Unicode Errors for plaintext, values: `strict`, `ignore`, `replace`" }, "html_unicode_errors": { "class": "UnicodeErrorsOption", "type": "str", "comment": "Unicode Errors for HTML, values: `strict`, `ignore`, `replace`" } }, "canRead": true, "canWrite": false, "readOptions": { "text_unicode_errors": "replace", "html_unicode_errors": "replace" }, "readDepends": { "libzim": "libzim>=1.0" } } ]pyglossary-5.0.9/pyglossary/000077500000000000000000000000001476751035500161725ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/__init__.py000066400000000000000000000001741476751035500203050ustar00rootroot00000000000000from .core import VERSION from .glossary import Glossary __version__ = VERSION __all__ = [ "Glossary", "__version__", ] pyglossary-5.0.9/pyglossary/apple_utils.py000066400000000000000000000113441476751035500210700ustar00rootroot00000000000000# list of css params that are defined in Apple's WebKit and used in # Apple dictionary files (binary and source formats) # but to make the css work in other dictionaries, we have to substitute them # default / system font of Mac OS X is Helvetica Neue / Neue Helvetica # https://en.wikipedia.org/wiki/Helvetica # list of fonts that are shipped with Mac OS X # https://en.wikipedia.org/wiki/List_of_typefaces_included_with_macOS # but we actually prefer to set font that are free and more widely available # in all operating systems # also see: # https://github.com/servo/servo/blob/master/components/style/properties/counted_unknown_properties.py from __future__ import annotations import re from .core import log __all__ = ["substituteAppleCSS"] # remove these keys along with their value cssKeyRemove = { b"-webkit-text-combine", # ^ value: horizontal b"-apple-color-filter", # ^ value: apple-invert-lightness() b"-webkit-overflow-scrolling", # ^ controls whether or not touch devices use momentum-based scrolling # https://developer.mozilla.org/en-US/docs/Web/CSS/-webkit-overflow-scrolling # values: touch, auto } cssKeyRemovePattern = re.compile( rb"[ \t]*(" + b"|".join(cssKeyRemove) + rb")\s*:[^;}]*;\s*", ) cssMapping: dict[str, str] = { # I didn't actually find these font values: "-apple-system-body": '"Helvetica Neue"', "-apple-system": '"Helvetica Neue"', "-webkit-link": "rgb(0, 0, 238)", # value, color of
links "-webkit-control": "normal normal normal normal 13px/normal system-ui", "-webkit-mini-control": "normal normal normal normal 9px/normal system-ui", "-webkit-small-control": "normal normal normal normal 11px/normal system-ui", "-webkit-isolate": "isolate", # value for "unicode-bidi" "-webkit-isolate-override": "isolate-override", # value for "unicode-bidi" "-webkit-border-bottom-left-radius": "border-bottom-left-radius", # key "-webkit-border-bottom-right-radius": "border-bottom-right-radius", # key "-webkit-border-radius": "border-radius", # key "-webkit-border-top-left-radius": "border-top-left-radius", # key "-webkit-border-top-right-radius": "border-top-right-radius", # key "-webkit-hyphens": "hyphens", # key "-webkit-writing-mode": "writing-mode", # key "-webkit-column-width": "column-width", # key "-webkit-column-rule-color": "column-rule-color", # key "-webkit-column-rule-style": "column-rule-style", # key "-webkit-column-rule-width": "column-rule-width", # key "-webkit-ruby-position": "ruby-position", # key # not so sure about this: "-webkit-padding-start": "padding-inline-start", # key "-apple-system-alternate-selected-text": "rgb(255, 255, 255)", "-apple-system-blue": "rgb(0, 122, 255)", "-apple-system-brown": "rgb(162, 132, 94)", "-apple-system-container-border": "rgba(0, 0, 0, 0.247)", "-apple-system-control-accent": "rgb(0, 122, 255)", "-apple-system-control-background": "rgb(255, 255, 255)", "-apple-system-even-alternating-content-background": "rgb(255, 255, 255)", "-apple-system-find-highlight-background": "rgb(255, 255, 0)", "-apple-system-gray": "rgb(142, 142, 147)", "-apple-system-green": "rgb(40, 205, 65)", "-apple-system-grid": "rgb(230, 230, 230)", "-apple-system-header-text": "rgba(0, 0, 0, 0.847)", "-apple-system-label": "rgba(0, 0, 0, 0.847)", "-apple-system-odd-alternating-content-background": "rgb(244, 245, 245)", "-apple-system-orange": "rgb(255, 149, 0)", "-apple-system-pink": "rgb(255, 45, 85)", "-apple-system-placeholder-text": "rgba(0, 0, 0, 0.247)", "-apple-system-purple": "rgb(175, 82, 222)", "-apple-system-quaternary-label": "rgba(0, 0, 0, 0.098)", "-apple-system-red": "rgb(255, 59, 48)", "-apple-system-secondary-label": "rgba(0, 0, 0, 0.498)", "-apple-system-selected-content-background": "rgb(0, 99, 225)", "-apple-system-selected-text": "rgb(0, 0, 0)", "-apple-system-selected-text-background": "rgba(128, 188, 254, 0.6)", "-apple-system-separator": "rgba(0, 0, 0, 0.098)", "-apple-system-tertiary-label": "rgba(0, 0, 0, 0.26)", "-apple-system-text-background": "rgb(255, 255, 255)", "-apple-system-unemphasized-selected-content-background": "rgb(220, 220, 220)", "-apple-system-unemphasized-selected-text": "rgb(0, 0, 0)", "-apple-system-unemphasized-selected-text-background": "rgb(220, 220, 220)", "-apple-system-yellow": "rgb(255, 204, 0)", "-apple-wireless-playback-target-active": "rgb(0, 122, 255)", } cssParamPattern = re.compile( rb"(-(apple|webkit)-[a-z\-]+)", ) def _subCSS(m: re.Match) -> bytes: b_key = m.group(0) value = cssMapping.get(b_key.decode("ascii")) if value is None: log.warning(f"unrecognized CSS param: {b_key.decode('ascii')!r}") return b_key return value.encode("ascii") def substituteAppleCSS(css: bytes) -> bytes: css = cssKeyRemovePattern.sub(b"", css) return cssParamPattern.sub(_subCSS, css) pyglossary-5.0.9/pyglossary/arabic_utils.py000066400000000000000000000000301476751035500211760ustar00rootroot00000000000000# -*- coding: utf-8 -*- pyglossary-5.0.9/pyglossary/compression.py000066400000000000000000000073611476751035500211140ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import logging import os from os.path import join from typing import TYPE_CHECKING if TYPE_CHECKING: import io from collections.abc import Callable stdCompressions = ("gz", "bz2", "lzma") log = logging.getLogger("pyglossary") __all__ = [ "compress", "compressionOpen", "compressionOpenFunc", "stdCompressions", "uncompress", "zipFileOrDir", ] def compressionOpenFunc(c: str) -> Callable | None: if not c: return open if c == "gz": import gzip return gzip.open if c == "bz2": import bz2 return bz2.open if c == "lzma": import lzma return lzma.open if c == "dz": import gzip return gzip.open return None def compressionOpen( filename: str, dz: bool = False, **kwargs, # noqa: ANN003 ) -> io.IOBase: from os.path import splitext filenameNoExt, ext = splitext(filename) ext = ext.lower().lstrip(".") try: int(ext) except ValueError: pass else: _, ext = splitext(filenameNoExt) ext = ext.lower().lstrip(".") if ext in stdCompressions or (dz and ext == "dz"): openFunc = compressionOpenFunc(ext) if not openFunc: raise RuntimeError(f"no compression found for {ext=}") file = openFunc(filename, **kwargs) file.compression = ext return file return open(filename, **kwargs) # noqa: SIM115 def zipFileOrDir(filename: str) -> None: import shutil from os.path import ( isdir, isfile, split, ) from pyglossary.repro_zipfile import ReproducibleZipFile as ZipFile from .os_utils import indir def _zipFileAdd(zf: ZipFile, filename: str) -> None: if isfile(filename): zf.write(filename) return if not isdir(filename): raise OSError(f"Not a file or directory: {filename}") for subFname in os.listdir(filename): _zipFileAdd(zf, join(filename, subFname)) with ZipFile(f"{filename}.zip", mode="w") as zf: if isdir(filename): dirn, name = split(filename) with indir(filename): for subFname in os.listdir(filename): _zipFileAdd(zf, subFname) shutil.rmtree(filename) return dirn, name = split(filename) files = [name] if isdir(f"{filename}_res"): files.append(f"{name}_res") with indir(dirn): for fname in files: _zipFileAdd(zf, fname) def compress(filename: str, compression: str) -> str: """ Filename is the existing file path. supported compressions: "gz", "bz2", "lzma", "zip". """ import shutil from os.path import isfile log.info(f"Compressing {filename!r} with {compression!r}") compFilename = f"{filename}.{compression}" if compression in stdCompressions: openFunc = compressionOpenFunc(compression) if not openFunc: raise RuntimeError(f"invalid {compression=}") with openFunc(compFilename, mode="wb") as dest: with open(filename, mode="rb") as source: shutil.copyfileobj(source, dest) return compFilename if compression == "zip": try: os.remove(compFilename) except OSError: pass try: zipFileOrDir(filename) except Exception as e: log.error( f'{e}\nFailed to compress file "{filename}"', ) else: raise ValueError(f"unexpected {compression=}") if isfile(compFilename): return compFilename return filename def uncompress(srcFilename: str, dstFilename: str, compression: str) -> None: """ Filename is the existing file path. supported compressions: "gz", "bz2", "lzma". """ import shutil log.info(f"Uncompressing {srcFilename!r} to {dstFilename!r}") if compression in stdCompressions: openFunc = compressionOpenFunc(compression) if not openFunc: raise RuntimeError(f"invalid {compression=}") with openFunc(srcFilename, mode="rb") as source: with open(dstFilename, mode="wb") as dest: shutil.copyfileobj(source, dest) return # TODO: if compression == "zip": raise ValueError(f"unsupported compression {compression!r}") pyglossary-5.0.9/pyglossary/core.py000066400000000000000000000123001476751035500174700ustar00rootroot00000000000000from __future__ import annotations import os import platform import sys from os.path import ( abspath, dirname, exists, isdir, isfile, join, ) from . import logger from .logger import TRACE, trace def exc_note(e: Exception, note: str) -> Exception: try: e.add_note(note) # pyright: ignore[reportAttributeAccessIssue] except AttributeError: if hasattr(e, "msg"): e.msg += "\n" + note # pyright: ignore[reportAttributeAccessIssue] return e __all__ = [ "TRACE", "VERSION", "appResDir", "cacheDir", "checkCreateConfDir", "confDir", "confJsonFile", "dataDir", "getDataDir", "homeDir", "homePage", "isDebug", "log", "noColor", "pip", "pluginsDir", "rootConfJsonFile", "rootDir", "sysName", "tmpDir", "trace", "uiDir", "userPluginsDir", ] VERSION = "5.0.9" homePage = "https://github.com/ilius/pyglossary" noColor = False def checkCreateConfDir() -> None: if not isdir(confDir): if exists(confDir): # file, or anything other than directory os.rename(confDir, confDir + ".bak") # we do not import old config os.mkdir(confDir) if not exists(userPluginsDir): try: os.mkdir(userPluginsDir) except Exception as e: log.warning(f"failed to create user plugins directory: {e}") if not isfile(confJsonFile): with ( open(rootConfJsonFile, encoding="utf-8") as srcF, open(confJsonFile, "w", encoding="utf-8") as usrF, ): usrF.write(srcF.read()) def _in_virtualenv() -> bool: if hasattr(sys, "real_prefix"): return True return hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix def getDataDir() -> str: if _in_virtualenv(): pass # TODO # print(f"prefix={sys.prefix}, base_prefix={sys.base_prefix}") # return join( # dirname(dirname(dirname(rootDir))), # os.getenv("VIRTUAL_ENV"), "share", "pyglossary", # ) if not rootDir.endswith(("dist-packages", "site-packages")): return rootDir parent3 = dirname(dirname(dirname(rootDir))) if os.sep == "/": return join(parent3, "share", "pyglossary") direc = join( parent3, f"Python{sys.version_info.major}{sys.version_info.minor}", "share", "pyglossary", ) if isdir(direc): return direc direc = join(parent3, "Python3", "share", "pyglossary") if isdir(direc): return direc direc = join(parent3, "Python", "share", "pyglossary") if isdir(direc): return direc direc = join(sys.prefix, "share", "pyglossary") if isdir(direc): return direc if CONDA_PREFIX := os.getenv("CONDA_PREFIX"): direc = join(CONDA_PREFIX, "share", "pyglossary") if isdir(direc): return direc raise OSError("failed to detect dataDir") # __________________________________________________________________________ # log = logger.setupLogging() def isDebug() -> bool: return log.getVerbosity() >= 4 # noqa: PLR2004 sysName = platform.system().lower() # platform.system() is in ["Linux", "Windows", "Darwin", "FreeBSD"] # sysName is in ["linux", "windows", "darwin', "freebsd"] # can set env var WARNINGS to: # "error", "ignore", "always", "default", "module", "once" if WARNINGS := os.getenv("WARNINGS"): if WARNINGS in {"default", "error", "ignore", "always", "module", "once"}: import warnings warnings.filterwarnings(WARNINGS) # type: ignore # noqa: PGH003 else: log.error(f"invalid env var {WARNINGS = }") if getattr(sys, "frozen", False): # PyInstaller frozen executable log.info(f"sys.frozen = {getattr(sys, 'frozen', False)}") rootDir = dirname(sys.executable) uiDir = join(rootDir, "pyglossary", "ui") else: _srcDir = dirname(abspath(__file__)) uiDir = join(_srcDir, "ui") rootDir = dirname(_srcDir) dataDir = getDataDir() appResDir = join(dataDir, "res") if os.sep == "/": # Operating system is Unix-Like homeDir = os.getenv("HOME", "/") tmpDir = os.getenv("TMPDIR", "/tmp") # noqa: S108 if sysName == "darwin": # MacOS X _libDir = join(homeDir, "Library") confDir = join(_libDir, "Preferences", "PyGlossary") # or maybe: join(_libDir, "PyGlossary") # os.environ["OSTYPE"] == "darwin10.0" # os.environ["MACHTYPE"] == "x86_64-apple-darwin10.0" # platform.dist() == ("", "", "") # platform.release() == "10.3.0" cacheDir = join(_libDir, "Caches", "PyGlossary") pip = "pip3" else: # GNU/Linux, Termux, FreeBSD, etc # should switch to "$XDG_CONFIG_HOME/pyglossary" in version 6.0.0 # which generally means ~/.config/pyglossary confDir = join(homeDir, ".pyglossary") cacheDir = join(homeDir, ".cache", "pyglossary") pip = "pip3" if "/com.termux/" in homeDir else "sudo pip3" elif os.sep == "\\": # Operating system is Windows # FIXME: default values _homeDrive = os.getenv("HOMEDRIVE", "") _homePath = os.getenv("HOMEPATH", "") homeDir = join(_homeDrive, _homePath) tmpDir = os.getenv("TEMP", "") _appData = os.getenv("APPDATA", "") confDir = join(_appData, "PyGlossary") # Windows Vista or older do not have LOCALAPPDATA _localAppData = os.getenv("LOCALAPPDATA") or abspath(join(_appData, "..", "Local")) cacheDir = join(_localAppData, "PyGlossary", "Cache") pip = "pip3" else: raise RuntimeError( f"Unknown path separator(os.sep=={os.sep!r}), unknown operating system!", ) pluginsDir = join(rootDir, "pyglossary", "plugins") confJsonFile = join(confDir, "config.json") rootConfJsonFile = join(dataDir, "config.json") userPluginsDir = join(confDir, "plugins") pyglossary-5.0.9/pyglossary/core_test.py000066400000000000000000000033301476751035500205320ustar00rootroot00000000000000from __future__ import annotations import logging __all__ = [ "MockLogHandler", "getMockLogger", ] class MockLogHandler(logging.Handler): def __init__(self) -> None: logging.Handler.__init__(self) self.clear() def clear(self) -> None: self.recordsByLevel: dict[int, list[logging.LogRecord]] = {} def emit(self, record: logging.LogRecord) -> None: level = record.levelno if level in self.recordsByLevel: self.recordsByLevel[level].append(record) else: self.recordsByLevel[level] = [record] def popLog(self, level: int, msg: str, partial=False) -> logging.LogRecord | None: if level not in self.recordsByLevel: return None records = self.recordsByLevel[level] for index, record in list(enumerate(records)): rec_msg = record.getMessage() if msg == rec_msg or (msg in rec_msg and partial): return records.pop(index) return None def printRemainingLogs(self, level, method: str = "") -> int: if level not in self.recordsByLevel: return 0 count = 0 for record in self.recordsByLevel[level]: count += 1 msg = self.format(record) print(f"{method}: {msg!r}") return count def printRemainingErrors(self, method: str = "") -> int: count = self.printRemainingLogs(logging.CRITICAL, method) count += self.printRemainingLogs(logging.ERROR, method) return count def printRemainingwWarnings(self, method: str = "") -> int: return self.printRemainingLogs(logging.WARNING, method) mockLog = None def getMockLogger(): global mockLog if mockLog is not None: return mockLog log = logging.getLogger("pyglossary") for handler in log.handlers: log.removeHandler(handler) mockLog = MockLogHandler() mockLog.setLevel(logging.WARNING) log.addHandler(mockLog) return mockLog pyglossary-5.0.9/pyglossary/ebook_base.py000066400000000000000000000313141476751035500206370ustar00rootroot00000000000000# -*- coding: utf-8 -*- # The MIT License (MIT) # Copyright © 2012-2016 Alberto Pettarin (alberto@albertopettarin.it) # Copyright © 2016-2019 Saeed Rasooli # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import annotations import logging import os import shutil import tempfile import zipfile from datetime import datetime from os.path import join from typing import TYPE_CHECKING, cast from pyglossary.os_utils import indir, rmtree from pyglossary.repro_zipfile import ReproducibleZipFile as ZipFile if TYPE_CHECKING: import io from collections.abc import Generator from typing import Any from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["EbookWriter"] log = logging.getLogger("pyglossary") class GroupState: def __init__(self, writer: EbookWriter) -> None: self.writer = writer self.last_prefix = "" self.group_index = -1 self.reset() def reset(self) -> None: self.first_word = "" self.last_word = "" self.group_contents: list[str] = [] def is_new(self, prefix: str) -> bool: return bool(self.last_prefix) and prefix != self.last_prefix def add(self, entry: EntryType, prefix: str) -> None: word = entry.s_word defi = entry.defi.replace("
", "
").replace("
", "
") if not self.first_word: self.first_word = word self.last_word = word self.last_prefix = prefix self.group_contents.append(self.writer.format_group_content(word, defi)) class EbookWriter: """ A class representing a generic ebook containing a dictionary. It can be used to output a MOBI or an EPUB 2 container. The ebook must have an OPF, and one or more group XHTML files. Optionally, it can have a cover image, an NCX TOC, an index XHTML file. The actual file templates are provided by the caller. """ _keep: bool = False _group_by_prefix_length: int = 2 _include_index_page: bool = False _compress: bool = True _css: str = "" # path to css file, or "" _cover_path: str = "" # path to cover file, or "" CSS_CONTENTS = b"" GROUP_XHTML_TEMPLATE = "" GROUP_XHTML_INDEX_LINK = "" GROUP_XHTML_WORD_DEFINITION_TEMPLATE = "" GROUP_XHTML_WORD_DEFINITION_JOINER = "\n" MIMETYPE_CONTENTS = "" CONTAINER_XML_CONTENTS = "" GROUP_START_INDEX = 2 COVER_TEMPLATE = "{cover}" INDEX_XHTML_TEMPLATE = """ {title}

{indexTitle}

{links}

""" INDEX_XHTML_LINK_TEMPLATE = ( '
{label}' ) INDEX_XHTML_LINK_JOINER = " •\n" OPF_MANIFEST_ITEM_TEMPLATE = ( ' ' ) OPF_SPINE_ITEMREF_TEMPLATE = ' ' OPF_TEMPLATE = "" def __init__( self, glos: WriterGlossaryType, escape_strings: bool = False, # ignore_synonyms=False, # flatten_synonyms=False, ) -> None: self._glos = glos self._filename = "" self._escape_strings = escape_strings # self._ignore_synonyms = ignore_synonyms # self._flatten_synonyms = flatten_synonyms # Penelope's extra options: # "bookeen_collation_function": None, # bookeen format # "bookeen_install_file": False, # bookeen format # "group_by_prefix_merge_across_first": False, # "group_by_prefix_merge_min_size": 0, self._tmpDir = tempfile.mkdtemp() self.cover = "" self.files: list[dict[str, Any]] = [] self.manifest_files: list[dict[str, str]] = [] self._group_labels: list[str] = [] def finish(self) -> None: self._filename = "" def myOpen(self, fname: str, mode: str) -> io.IOBase: return cast( "io.IOBase", open( join(self._tmpDir, fname), mode=mode, ), ) def add_file( self, relative_path: str, contents: bytes, mode: int | None = None, ) -> None: if mode is None: mode = zipfile.ZIP_DEFLATED file_path = os.path.join(self._tmpDir, relative_path) with self.myOpen(file_path, "wb") as file_obj: file_obj.write(contents) self.files.append( { "path": relative_path, "mode": mode, }, ) def write_cover(self, cover_path: str) -> None: if not cover_path: return basename = os.path.basename(cover_path) with self.myOpen(cover_path, "rb") as cover_obj: cover = cover_obj.read() b = basename.lower() mimetype = "image/jpeg" if b.endswith(".png"): mimetype = "image/png" elif b.endswith(".gif"): mimetype = "image/gif" self.add_file_manifest("OEBPS/" + basename, basename, cover, mimetype) self.cover = basename def write_css(self, custom_css_path_absolute: str) -> None: css = self.CSS_CONTENTS if custom_css_path_absolute: try: with self.myOpen(custom_css_path_absolute, "rb") as css_obj: css = css_obj.read() # NESTED 4 except Exception: log.exception("") if not css: return self.add_file_manifest("OEBPS/style.css", "style.css", css, "text/css") def add_file_manifest( self, relative_path: str, id_: str, contents: bytes, mimetype: str, ) -> None: self.add_file(relative_path, contents) self.manifest_files.append( { "path": relative_path, "id": id_, "mimetype": mimetype, }, ) def get_group_xhtml_file_name_from_index(self, index: int) -> str: if index < self.GROUP_START_INDEX: # or index >= groupCount + self.GROUP_START_INDEX: # number of groups are not known, FIXME # so we can not say if the current group is the last or not return "#groupPage" return f"g{index:06d}.xhtml" def _add_group( self, group_labels: list[str], state: GroupState, ) -> None: if not state.last_prefix: return state.group_index += 1 index = state.group_index + self.GROUP_START_INDEX group_label = state.last_prefix if group_label != "SPECIAL": group_label = state.first_word + "–" + state.last_word log.debug(f"add_group: {state.group_index}, {state.last_prefix!r}") group_labels.append(group_label) previous_link = self.get_group_xhtml_file_name_from_index(index - 1) next_link = self.get_group_xhtml_file_name_from_index(index + 1) group_xhtml_path = self.get_group_xhtml_file_name_from_index(index) contents = self.GROUP_XHTML_TEMPLATE.format( title=group_label, group_title=group_label, previous_link=previous_link, index_link=( self.GROUP_XHTML_INDEX_LINK if self._include_index_page else "" ), next_link=next_link, group_contents=self.GROUP_XHTML_WORD_DEFINITION_JOINER.join( state.group_contents, ), ).encode("utf-8") self.add_file_manifest( "OEBPS/" + group_xhtml_path, group_xhtml_path, contents, "application/xhtml+xml", ) def write_data_entry(self, entry: EntryType) -> None: if entry.getFileName() == "style.css": self.add_file_manifest( "OEBPS/style.css", "style.css", entry.data, "text/css", ) def get_prefix(self, word: str) -> str: raise NotImplementedError def write_groups(self) -> Generator[None, EntryType, None]: # TODO: rtl=False option # TODO: handle alternates better (now shows word1|word2... in title) group_labels: list[str] = [] state = GroupState(self) while True: entry = yield if entry is None: break if entry.isData(): self.write_data_entry(entry) continue prefix = self.get_prefix(entry.s_word) if state.is_new(prefix): self._add_group(group_labels, state) state.reset() state.add(entry, prefix) self._add_group(group_labels, state) self._group_labels = group_labels def format_group_content( self, word: str, defi: str, variants: list[str] | None = None, # noqa: ARG002 ) -> str: return self.GROUP_XHTML_WORD_DEFINITION_TEMPLATE.format( headword=self.escape_if_needed(word), definition=self.escape_if_needed(defi), ) def escape_if_needed(self, string: str) -> str: if not self._escape_strings: return string return ( string.replace("&", "&") .replace('"', """) .replace("'", "'") .replace(">", ">") .replace("<", "<") ) def write_index(self, group_labels: list[str]) -> None: """group_labels: a list of labels.""" links = [] for label_i, label in enumerate(group_labels): ref = self.get_group_xhtml_file_name_from_index( self.GROUP_START_INDEX + label_i, ) links.append( self.INDEX_XHTML_LINK_TEMPLATE.format( ref=ref, label=label, ), ) links_str = self.INDEX_XHTML_LINK_JOINER.join(links) title = self._glos.getInfo("name") contents = self.INDEX_XHTML_TEMPLATE.format( title=title, indexTitle=title, links=links_str, ).encode("utf-8") self.add_file_manifest( "OEBPS/index.xhtml", "index.xhtml", contents, "application/xhtml+xml", ) def get_opf_contents( # noqa: F811 self, manifest_contents: str, spine_contents: str, ) -> bytes: cover = "" if self.cover: cover = self.COVER_TEMPLATE.format(cover=self.cover) creationDate = datetime.now().strftime("%Y-%m-%d") return self.OPF_TEMPLATE.format( identifier=self._glos.getInfo("uuid"), sourceLang=self._glos.sourceLangName, targetLang=self._glos.targetLangName, title=self._glos.getInfo("name"), creator=self._glos.author, copyright=self._glos.getInfo("copyright"), creationDate=creationDate, cover=cover, manifest=manifest_contents, spine=spine_contents, ).encode("utf-8") def write_opf(self) -> None: manifest_lines = [] spine_lines = [] for mi in self.manifest_files: manifest_lines.append( self.OPF_MANIFEST_ITEM_TEMPLATE.format( ref=mi["id"], id=mi["id"], mediaType=mi["mimetype"], ), ) if mi["mimetype"] == "application/xhtml+xml": spine_lines.append( self.OPF_SPINE_ITEMREF_TEMPLATE.format( id=mi["id"], # NESTED 4 ), ) manifest_contents = "\n".join(manifest_lines) spine_contents = "\n".join(spine_lines) opf_contents = self.get_opf_contents( manifest_contents, spine_contents, ) self.add_file("OEBPS/content.opf", opf_contents) def write_ncx(self, group_labels: list[str]) -> None: """ write_ncx. only for epub. """ def open(self, filename: str) -> None: self._filename = filename def _doZip(self) -> None: with ZipFile( self._filename, mode="w", compression=zipfile.ZIP_DEFLATED, ) as zipFp: for fileDict in self.files: zipFp.write( fileDict["path"], compress_type=fileDict["mode"], ) if not self._keep: rmtree(self._tmpDir) def write(self) -> Generator[None, EntryType, None]: filename = self._filename # self._group_by_prefix_length # self._include_index_page css = self._css cover_path = self._cover_path with indir(self._tmpDir): if cover_path: cover_path = os.path.abspath(cover_path) if css: css = os.path.abspath(css) os.makedirs("META-INF") os.makedirs("OEBPS") if self.MIMETYPE_CONTENTS: self.add_file( "mimetype", self.MIMETYPE_CONTENTS.encode("utf-8"), mode=zipfile.ZIP_STORED, ) if self.CONTAINER_XML_CONTENTS: self.add_file( "META-INF/container.xml", self.CONTAINER_XML_CONTENTS.encode("utf-8"), ) try: self.write_cover(cover_path) except Exception: log.exception("") self.write_css(css) yield from self.write_groups() group_labels = self._group_labels if self._include_index_page: self.write_index(group_labels) self.write_ncx(group_labels) self.write_opf() if self._compress: self._doZip() return if self._keep: shutil.copytree(self._tmpDir, filename) return if os.sep == "\\": shutil.copytree(self._tmpDir, filename) self._glos.addCleanupPath(self._tmpDir) return shutil.move(self._tmpDir, filename) pyglossary-5.0.9/pyglossary/entry.py000066400000000000000000000225001476751035500177040ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import logging import os import re import shutil from os.path import ( dirname, getsize, join, ) from typing import TYPE_CHECKING from .entry_base import BaseEntry, MultiStr from .iter_utils import unique_everseen from .text_utils import joinByBar if TYPE_CHECKING: from collections.abc import Callable from typing import ( Any, ) from .glossary_types import RawEntryType __all__ = ["DataEntry", "Entry"] log = logging.getLogger("pyglossary") # aka Resource class DataEntry(BaseEntry): # noqa: PLR0904 __slots__ = [ "_byteProgress", "_data", "_fname", "_tmpPath", ] @classmethod def isData(cls) -> bool: return True def __init__( self, fname: str, data: bytes = b"", tmpPath: str | None = None, byteProgress: tuple[int, int] | None = None, ) -> None: if data and tmpPath: os.makedirs(dirname(tmpPath), mode=0o755, exist_ok=True) with open(tmpPath, "wb") as toFile: toFile.write(data) data = b"" self._fname = fname self._data = data # bytes instance self._tmpPath = tmpPath self._byteProgress = byteProgress # tuple[int, int] | None def getFileName(self) -> str: return self._fname @property def data(self) -> bytes: if self._tmpPath: with open(self._tmpPath, "rb") as _file: return _file.read() else: return self._data def size(self) -> int: if self._tmpPath: return getsize(self._tmpPath) return len(self._data) def save(self, directory: str) -> str: fname = self._fname fpath = join(directory, fname) fdir = dirname(fpath) try: os.makedirs(fdir, mode=0o755, exist_ok=True) if self._tmpPath: shutil.move(self._tmpPath, fpath) self._tmpPath = fpath else: with open(fpath, "wb") as toFile: toFile.write(self._data) # NESTED 4 except FileNotFoundError as e: log.error(f"error in DataEntry.save: {e}") except Exception: log.exception(f"error while saving {fpath}") return "" return fpath @property def s_word(self) -> str: return self._fname @property def l_word(self) -> list[str]: return [self._fname] @property def lb_word(self) -> list[bytes]: return [self._fname.encode("trf-8")] @property def defi(self) -> str: return f"File: {self._fname}" def byteProgress(self) -> tuple[int, int] | None: return self._byteProgress @property def defiFormat(self) -> str: return "b" @defiFormat.setter def defiFormat(self, defiFormat: str) -> None: pass def detectDefiFormat(self, default: str = "") -> str: # noqa: ARG002, PLR6301 return "b" def addAlt(self, alt: str) -> None: pass def editFuncWord(self, func: Callable[[str], str]) -> None: pass def editFuncDefi(self, func: Callable[[str], str]) -> None: pass def strip(self) -> None: pass def replaceInWord(self, source: str, target: str) -> None: pass def replaceInDefi(self, source: str, target: str) -> None: pass def replace(self, source: str, target: str) -> None: pass def removeEmptyAndDuplicateAltWords(self) -> None: pass def stripFullHtml(self) -> str | None: pass # Too many public methods (21 > 20) class Entry(BaseEntry): # noqa: PLR0904 xdxfPattern = re.compile("^[^<>]*", re.DOTALL | re.IGNORECASE) htmlPattern = re.compile( ".*(?:" + "|".join( [ r"]", r"", r"]", r"]", r"]", r"", r"]*href=" r"]", r"]", r"]", r"]", r"]", r"]", r"]", r"]", r"]", r"]", r"]", ], ) + "|&[a-z]{2,8};|&#x?[0-9]{2,5};)", re.DOTALL | re.IGNORECASE, ) __slots__ = [ "_byteProgress", "_defi", "_defiFormat", "_word", ] @classmethod def isData(cls) -> bool: return False @staticmethod def getRawEntrySortKey( key: Callable[[list[str]], Any], ) -> Callable[[RawEntryType], Any]: def newKey(x: RawEntryType) -> Any: # noqa: ANN401 # x is rawEntry, so x[2:] is list[bytes]: list of words in bytes return key([b.decode("utf-8") for b in x[2:]]) # type: ignore return newKey def __init__( self, word: MultiStr, defi: str, defiFormat: str = "m", byteProgress: tuple[int, int] | None = None, ) -> None: """ Create a new Entry. word: string or a list of strings (including alternate words) defi: string or a list of strings (including alternate definitions) defiFormat (optional): definition format: "m": plain text "h": html "x": xdxf. """ # memory optimization: if isinstance(word, list | tuple): if len(word) == 1: word = word[0] elif not isinstance(word, str): raise TypeError(f"invalid word type {type(word)}") if isinstance(defi, list): if len(defi) == 1: defi = defi[0] elif not isinstance(defi, str): raise TypeError(f"invalid defi type {type(defi)}") if defiFormat not in {"m", "h", "x"}: raise ValueError(f"invalid defiFormat {defiFormat!r}") self._word = word self._defi = defi self._defiFormat = defiFormat self._byteProgress = byteProgress # tuple[int, int] | None def getFileName(self) -> str: # noqa: PLR6301 return "" def __repr__(self) -> str: return f"Entry({self._word!r}, {self._defi!r}, defiFormat={self._defiFormat!r})" @property def s_word(self) -> str: """Returns string of word, and all the alternate words separated by "|".""" if isinstance(self._word, str): return self._word return joinByBar(self._word) @property def l_word(self) -> list[str]: """Returns list of the word and all the alternate words.""" if isinstance(self._word, str): return [self._word] return self._word @property def lb_word(self) -> list[bytes]: """Returns list of the word and all the alternate words.""" if isinstance(self._word, str): return [self._word.encode("utf-8")] return [word.encode("utf-8") for word in self._word] @property def defi(self) -> str: """Returns string of definition.""" return self._defi @property def defiFormat(self) -> str: """ Returns definition format. Values: "m": plain text "h": html "x": xdxf. """ # TODO: type: Literal["m", "h", "x"] return self._defiFormat @defiFormat.setter def defiFormat(self, defiFormat: str) -> None: """ Set definition format. defiFormat: "m": plain text "h": html "x": xdxf. """ self._defiFormat = defiFormat def detectDefiFormat(self, default: str = "") -> str: if self._defiFormat == "h": return "h" if self._defiFormat == "x": return "x" if self._defiFormat == "m": if Entry.xdxfPattern.match(self.defi): self._defiFormat = "x" return "x" if Entry.htmlPattern.match(self.defi): self._defiFormat = "h" return "h" return "m" log.error(f"invalid defiFormat={self._defiFormat}, using {default!r}") return default def byteProgress(self) -> tuple[int, int] | None: return self._byteProgress def addAlt(self, alt: str) -> None: l_word = self.l_word l_word.append(alt) self._word = l_word def editFuncWord(self, func: Callable[[str], str]) -> None: """ Run function `func` on all the words. `func` must accept only one string as argument and return the modified string. """ if isinstance(self._word, str): self._word = func(self._word) return self._word = [func(st) for st in self._word] def editFuncDefi(self, func: Callable[[str], str]) -> None: """ Run function `func` on all the definitions. `func` must accept only one string as argument and return the modified string. """ self._defi = func(self._defi) @classmethod def _stripTrailingBR(cls, s: str) -> str: while s.endswith(("
", "
")): s = s[:-4] return s def strip(self) -> None: """Strip whitespaces from all words and definitions.""" self.editFuncWord(str.strip) self.editFuncDefi(str.strip) self.editFuncDefi(self._stripTrailingBR) def replaceInWord(self, source: str, target: str) -> None: """Replace string `source` with `target` in all words.""" if isinstance(self._word, str): self._word = self._word.replace(source, target) return self._word = [st.replace(source, target) for st in self._word] def replaceInDefi(self, source: str, target: str) -> None: """Replace string `source` with `target` in all definitions.""" self._defi = self._defi.replace(source, target) def replace(self, source: str, target: str) -> None: """Replace string `source` with `target` in all words and definitions.""" self.replaceInWord(source, target) self.replaceInDefi(source, target) def removeEmptyAndDuplicateAltWords(self) -> None: l_word = self.l_word if len(l_word) == 1: return l_word = [word for word in l_word if word] l_word = list(unique_everseen(l_word)) self._word = l_word def stripFullHtml(self) -> str | None: """Remove tags and returns error.""" defi = self._defi if not defi.startswith("<"): return None if defi.startswith(""): defi = defi[len("") :].strip() if not defi.startswith(" but no " elif not defi.startswith(""): return None i = defi.find("") if i == -1: return "'>' after None: self._word: str | list[str] def isData(self) -> bool: ... def getFileName(self) -> str: raise NotImplementedError @property def data(self) -> bytes: raise NotImplementedError def size(self) -> int: raise NotImplementedError def save(self, directory: str) -> str: raise NotImplementedError @property def s_word(self) -> str: raise NotImplementedError @property def l_word(self) -> list[str]: raise NotImplementedError @property def lb_word(self) -> list[bytes]: raise NotImplementedError @property def defi(self) -> str: raise NotImplementedError @property def b_word(self) -> bytes: """Returns bytes of word and all the alternate words separated by b"|".""" return self.s_word.encode("utf-8") @property def b_defi(self) -> bytes: """Returns definition in bytes.""" return self.defi.encode("utf-8") @property def defiFormat(self) -> str: # TODO: type: Literal["m", "h", "x", "b"] ... @defiFormat.setter def defiFormat(self, defiFormat: str) -> None: # TODO: type: Literal["m", "h", "x", "b"] ... def detectDefiFormat(self, default: str = "") -> str: ... def addAlt(self, alt: str) -> None: ... def editFuncWord(self, func: Callable[[str], str]) -> None: ... def editFuncDefi(self, func: Callable[[str], str]) -> None: ... def strip(self) -> None: ... def replaceInWord(self, source: str, target: str) -> None: ... def replaceInDefi(self, source: str, target: str) -> None: ... def replace(self, source: str, target: str) -> None: ... def byteProgress(self) -> tuple[int, int] | None: ... def removeEmptyAndDuplicateAltWords(self) -> None: ... def stripFullHtml(self) -> str | None: ... pyglossary-5.0.9/pyglossary/entry_filters.py000066400000000000000000000305111476751035500214350ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import logging import re import typing from typing import TYPE_CHECKING from . import core from .text_utils import ( fixUtf8, ) if TYPE_CHECKING: from pyglossary.langs import Lang from .glossary_types import Callable, EntryType __all__ = [ "EntryFilterType", "PreventDuplicateWords", "RemoveHtmlTagsAll", "ShowMaxMemoryUsage", "StripFullHtml", "entryFiltersRules", ] log = logging.getLogger("pyglossary") class _GlossaryType(typing.Protocol): @property def sourceLang(self) -> Lang | None: ... @property def targetLang(self) -> Lang | None: ... def progress(self, pos: int, total: int, unit: str = "entries") -> None: ... def __len__(self) -> int: ... class EntryFilterType(typing.Protocol): name: str = "" desc: str = "" falseComment: str = "" def __init__(self, glos: _GlossaryType) -> None: raise NotImplementedError def prepare(self) -> None: raise NotImplementedError def run(self, entry: EntryType) -> EntryType | None: raise NotImplementedError class EntryFilter: name: str = "" desc: str = "" falseComment: str = "" def __init__(self, glos: _GlossaryType) -> None: self.glos = glos def prepare(self) -> None: """Run this after glossary info is set and ready.""" def run(self, entry: EntryType) -> EntryType | None: # noqa: PLR6301 """ Return an Entry object, or None to skip. may return the same `entry`, or modify and return it, or return a new Entry object. """ return entry class TrimWhitespaces(EntryFilter): name = "trim_whitespaces" desc = "Remove leading/trailing whitespaces from word(s) and definition" def run(self, entry: EntryType) -> EntryType | None: # noqa: PLR6301 entry.strip() entry.replace("\r", "") return entry class NonEmptyWordFilter(EntryFilter): name = "non_empty_word" desc = "Skip entries with empty word" def run(self, entry: EntryType) -> EntryType | None: # noqa: PLR6301 if not entry.s_word: return None return entry class NonEmptyDefiFilter(EntryFilter): name = "non_empty_defi" desc = "Skip entries with empty definition" def run(self, entry: EntryType) -> EntryType | None: # noqa: PLR6301 if not entry.defi: return None return entry class RemoveEmptyAndDuplicateAltWords(EntryFilter): name = "remove_empty_dup_alt_words" desc = "Remove empty and duplicate alternate words" def run(self, entry: EntryType) -> EntryType | None: # noqa: PLR6301 entry.removeEmptyAndDuplicateAltWords() if not entry.l_word: return None return entry class FixUnicode(EntryFilter): name = "utf8_check" desc = "Fix Unicode in word(s) and definition" falseComment = "Do not fix Unicode in word(s) and definition" def run(self, entry: EntryType) -> EntryType | None: # noqa: PLR6301 entry.editFuncWord(fixUtf8) entry.editFuncDefi(fixUtf8) return entry class LowerWord(EntryFilter): name = "lower" desc = "Lowercase word(s)" falseComment = "Do not lowercase words before writing" def __init__(self, glos: _GlossaryType) -> None: EntryFilter.__init__(self, glos) self._re_word_ref = re.compile("href=[\"'](bword://[^\"']+)[\"']") def lowerWordRefs(self, defi: str) -> str: return self._re_word_ref.sub( lambda m: m.group(0).lower(), defi, ) def run(self, entry: EntryType) -> EntryType | None: entry.editFuncWord(str.lower) entry.editFuncDefi(self.lowerWordRefs) return entry class RTLDefi(EntryFilter): name = "rtl" desc = "Make definition right-to-left" def run(self, entry: EntryType) -> EntryType | None: # noqa: PLR6301 entry.editFuncDefi(lambda defi: f'
{defi}
') return entry class RemoveHtmlTagsAll(EntryFilter): name = "remove_html_all" desc = "Remove all HTML tags (not their contents) from definition" def __init__( self, glos: _GlossaryType, # noqa: ARG002 ) -> None: self._p_pattern = re.compile( "]*?)?>(.*?)

", re.DOTALL, ) self._div_pattern = re.compile( "]*?)?>(.*?)
", re.DOTALL, ) self._br_pattern = re.compile( "", re.IGNORECASE, ) def run(self, entry: EntryType) -> EntryType | None: from bs4 import BeautifulSoup def fixStr(st: str) -> str: st = self._p_pattern.sub("\\2\n", st) # if there is

left without opening, replace with
st = st.replace("

", "\n") st = self._div_pattern.sub("\\2\n", st) # if there is
left without opening, replace with
st = st.replace("", "\n") st = self._br_pattern.sub("\n", st) st = BeautifulSoup(st, "lxml").text st = st.strip() return st # noqa: RET504 entry.editFuncDefi(fixStr) return entry class RemoveHtmlTags(EntryFilter): name = "remove_html" desc = "Remove given comma-separated HTML tags (not their contents) from definition" def __init__(self, glos: _GlossaryType, tagsStr: str) -> None: tags = tagsStr.split(",") self.glos = glos self.tags = tags tagsRE = "|".join(self.tags) self.pattern = re.compile(f"]*)?>") def run(self, entry: EntryType) -> EntryType | None: def fixStr(st: str) -> str: return self.pattern.sub("", st) entry.editFuncDefi(fixStr) return entry class StripFullHtml(EntryFilter): name = "strip_full_html" desc = "Replace a full HTML document with it's body" def __init__( self, glos: _GlossaryType, # noqa: ARG002 errorHandler: Callable[[EntryType, str], None] | None, ) -> None: self._errorHandler = errorHandler def run(self, entry: EntryType) -> EntryType | None: err = entry.stripFullHtml() if err and self._errorHandler: self._errorHandler(entry, err) return entry # FIXME: It's is not safe to lowercases everything between < and > # including class name, element ids/names, scripts,
# etc. How can we fix that? class NormalizeHtml(EntryFilter): name = "normalize_html" desc = "Normalize HTML tags in definition (WIP)" _tags = ( "a", "font", "i", "b", "u", "p", "sup", "div", "span", "table", "tr", "th", "td", "ul", "ol", "li", "img", "br", "hr", ) def __init__( self, glos: _GlossaryType, # noqa: ARG002 ) -> None: log.info("Normalizing HTML tags") self._pattern = re.compile( "(" + "|".join(rf"]*?>" for tag in self._tags) + ")", re.DOTALL | re.IGNORECASE, ) @staticmethod def _subLower(m: re.Match) -> str: return m.group(0).lower() def _fixDefi(self, st: str) -> str: return self._pattern.sub(self._subLower, st) def run(self, entry: EntryType) -> EntryType | None: entry.editFuncDefi(self._fixDefi) return entry class SkipDataEntry(EntryFilter): name = "skip_resources" desc = "Skip resources / data files" def run(self, entry: EntryType) -> EntryType | None: # noqa: PLR6301 if entry.isData(): return None return entry class LanguageCleanup(EntryFilter): name = "lang" desc = "Language-specific cleanup/fixes" def __init__(self, glos: _GlossaryType) -> None: EntryFilter.__init__(self, glos) self._run_func: Callable[[EntryType], EntryType | None] | None = None def prepare(self) -> None: langCodes = { lang.code for lang in (self.glos.sourceLang, self.glos.targetLang) if lang is not None } if "fa" in langCodes: self._run_func = self.run_fa log.info("Using Persian filter") def run_fa(self, entry: EntryType) -> EntryType | None: # noqa: PLR6301 from .persian_utils import faEditStr entry.editFuncWord(faEditStr) entry.editFuncDefi(faEditStr) return entry def run(self, entry: EntryType) -> EntryType | None: if self._run_func: return self._run_func(entry) return entry class TextListSymbolCleanup(EntryFilter): """ Symbols like ♦ (diamond) ● (black circle) or * (star) are used in some plaintext or even html glossaries to represent items of a list (like
  • in proper html). This EntryFilter cleans up spaces/newlines issues around them. """ name = "text_list_symbol_cleanup" desc = "Text List Symbol Cleanup" winNewlinePattern = re.compile("[\r\n]+") spacesNewlinePattern = re.compile(" *\n *") blocksNewlinePattern = re.compile("♦\n+♦") def cleanDefi(self, st: str) -> str: st = st.replace("♦ ", "♦ ") st = self.winNewlinePattern.sub("\n", st) st = self.spacesNewlinePattern.sub("\n", st) st = self.blocksNewlinePattern.sub("♦", st) st = st.removesuffix(" EntryType | None: entry.editFuncDefi(self.cleanDefi) return entry class PreventDuplicateWords(EntryFilter): name = "prevent_duplicate_words" desc = "Prevent duplicate words" def __init__(self, glos: _GlossaryType) -> None: EntryFilter.__init__(self, glos) self._wordSet: set[str] = set() def run(self, entry: EntryType) -> EntryType | None: if entry.isData(): return entry wordSet = self._wordSet word = entry.s_word if word not in wordSet: wordSet.add(word) return entry n = 2 while f"{word} ({n})" in wordSet: n += 1 word = f"{word} ({n})" wordSet.add(word) entry._word = word # type: ignore # use entry.editFuncWord? return entry class SkipEntriesWithDuplicateHeadword(EntryFilter): name = "skip_duplicate_headword" desc = "Skip entries with a duplicate headword" def __init__(self, glos: _GlossaryType) -> None: EntryFilter.__init__(self, glos) self._wset: set[str] = set() def run(self, entry: EntryType) -> EntryType | None: word = entry.l_word[0] if word in self._wset: return None self._wset.add(word) return entry class TrimArabicDiacritics(EntryFilter): name = "trim_arabic_diacritics" desc = "Trim Arabic diacritics from headword" def __init__(self, glos: _GlossaryType) -> None: EntryFilter.__init__(self, glos) self._pat = re.compile("[\u064b-\u065f]") def run(self, entry: EntryType) -> EntryType | None: words = list(entry.l_word) hw = words[0] hw_t = self._pat.sub("", hw) hw_t = hw_t.replace("\u0622", "\u0627").replace("\u0623", "\u0627") if hw_t == hw or not hw_t: return entry entry._word = [hw_t, *words] # type: ignore return entry class UnescapeWordLinks(EntryFilter): name = "unescape_word_links" desc = "Unescape Word Links" def __init__(self, glos: _GlossaryType) -> None: from pyglossary.html_utils import unescape_unicode EntryFilter.__init__(self, glos) self._pat = re.compile( r'href="bword://[^<>"]*&#?\w+;[^<>"]*"', re.IGNORECASE, ) self._unescape = unescape_unicode def _sub(self, m: re.Match) -> str: return self._unescape(m.group(0)) def run(self, entry: EntryType) -> EntryType | None: if entry.isData(): return entry entry._defi = self._pat.sub(self._sub, entry.defi) # type: ignore return entry class ShowMaxMemoryUsage(EntryFilter): name = "max_memory_usage" desc = "Show Max Memory Usage" MAX_WORD_LEN = 30 def __init__(self, glos: _GlossaryType) -> None: import os import psutil EntryFilter.__init__(self, glos) self._process = psutil.Process(os.getpid()) self._max_mem_usage = 0 def run(self, entry: EntryType) -> EntryType | None: usage = self._process.memory_info().rss // 1024 if usage > self._max_mem_usage: self._max_mem_usage = usage word = entry.s_word if len(word) > self.MAX_WORD_LEN: word = word[: self.MAX_WORD_LEN - 3] + "..." core.trace(log, f"MaxMemUsage: {usage:,}, {word=}") return entry entryFiltersRules = [ (None, True, TrimWhitespaces), (None, True, NonEmptyWordFilter), ("skip_resources", False, SkipDataEntry), ("utf8_check", False, FixUnicode), ("lower", False, LowerWord), ("skip_duplicate_headword", False, SkipEntriesWithDuplicateHeadword), ("trim_arabic_diacritics", False, TrimArabicDiacritics), ("rtl", False, RTLDefi), ("remove_html_all", False, RemoveHtmlTagsAll), ("remove_html", "", RemoveHtmlTags), ("normalize_html", False, NormalizeHtml), ("unescape_word_links", False, UnescapeWordLinks), (None, True, LanguageCleanup), # ------------------------------------- # TODO # ("text_list_symbol_cleanup", False, TextListSymbolCleanup), # ------------------------------------- (None, True, NonEmptyWordFilter), (None, True, NonEmptyDefiFilter), (None, True, RemoveEmptyAndDuplicateAltWords), # ------------------------------------- # filters that are enabled by plugins using glossary methods: (None, False, PreventDuplicateWords), (None, False, StripFullHtml), # ------------------------------------- # filters are added conditionally (other than with config or glossary methods): (None, False, ShowMaxMemoryUsage), ] pyglossary-5.0.9/pyglossary/entry_list.py000066400000000000000000000051611476751035500207430ustar00rootroot00000000000000# -*- coding: utf-8 -*- # entry_list.py # # Copyright © 2020-2023 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import logging from time import perf_counter as now from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Callable, Iterator from typing import Any from .glossary_types import EntryType, RawEntryType from .sort_keys import NamedSortKey from .entry import Entry __all__ = ["EntryList"] log = logging.getLogger("pyglossary") class EntryList: def __init__( self, entryToRaw: Callable[[EntryType], RawEntryType], entryFromRaw: Callable[[RawEntryType], EntryType], ) -> None: self._l: list[RawEntryType] = [] self._entryToRaw = entryToRaw self._entryFromRaw = entryFromRaw self._sortKey: Callable[[RawEntryType], Any] | None = None def append(self, entry: EntryType) -> None: self._l.append(self._entryToRaw(entry)) def clear(self) -> None: self._l.clear() def __len__(self) -> int: return len(self._l) def __iter__(self) -> Iterator[EntryType]: entryFromRaw = self._entryFromRaw for rawEntry in self._l: yield entryFromRaw(rawEntry) def hasSortKey(self) -> bool: return bool(self._sortKey) def setSortKey( self, namedSortKey: NamedSortKey, sortEncoding: str | None, writeOptions: dict[str, Any], ) -> None: if namedSortKey.normal is None: raise NotImplementedError( f"sort key {namedSortKey.name!r} is not supported", ) kwargs = writeOptions.copy() if sortEncoding: kwargs["sortEncoding"] = sortEncoding sortKey = namedSortKey.normal(**kwargs) self._sortKey = Entry.getRawEntrySortKey( key=sortKey, ) def sort(self) -> None: if self._sortKey is None: raise ValueError("EntryList.sort: sortKey is not set") t0 = now() self._l.sort(key=self._sortKey) log.info(f"Sorting took {now() - t0:.1f} seconds") def close(self) -> None: pass pyglossary-5.0.9/pyglossary/entry_merge.py000066400000000000000000000043011476751035500210620ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.entry import Entry from pyglossary.xdxf.transform import XdxfTransformer if TYPE_CHECKING: from collections.abc import Iterator from pyglossary.glossary_types import EntryType _xdxfTr: XdxfTransformer | None = None def xdxf_transform(text: str) -> str: global _xdxfTr if _xdxfTr is None: # if self._xsl: # self._xdxfTr = XslXdxfTransformer(encoding="utf-8") # return _xdxfTr = XdxfTransformer(encoding="utf-8") return _xdxfTr.transformByInnerString(text) # type: ignore def getHtmlDefi(entry: EntryType) -> str: if entry.defiFormat == "m": return f"
    {entry.defi}
    " if entry.defiFormat == "x": return xdxf_transform(entry.defi) # now assume it's html defi = entry.defi if len(entry.l_word) > 1: defi = "".join(f"{word}
    " for word in entry.l_word) + defi return defi def mergeHtmlEntriesWithSameHeadword( entryIter: Iterator[EntryType], ) -> Iterator[EntryType]: try: last: EntryType | None = next(entryIter) except StopIteration: return last.detectDefiFormat() for entry in entryIter: if entry.isData(): if last is not None: yield last last = None continue entry.detectDefiFormat() if last is None: last = entry continue if entry.l_word[0] != last.l_word[0]: yield last last = entry continue defi = getHtmlDefi(last) + "\n
    \n" + getHtmlDefi(entry) last = Entry( # pyright: ignore entry.l_word[0], defi, defiFormat="h", ) if last is not None: yield last def mergePlaintextEntriesWithSameHeadword( entryIter: Iterator[EntryType], ) -> Iterator[EntryType]: try: last: EntryType | None = next(entryIter) except StopIteration: return for entry in entryIter: if entry.isData(): if last is not None: yield last last = None continue if last is None: last = entry continue if entry.l_word[0] != last.l_word[0]: yield last last = entry continue defi = ( last.defi + "\n\n" + "-" * 40 + "\n" + ", ".join(entry.l_word) + "\n" + entry.defi ) last = Entry( # pyright: ignore entry.l_word[0], defi, defiFormat="m", ) if last is not None: yield last pyglossary-5.0.9/pyglossary/file_utils.py000066400000000000000000000010141476751035500206770ustar00rootroot00000000000000from __future__ import annotations import sys from itertools import ( repeat, takewhile, ) __all__ = ["fileCountLines"] def fileCountLines(filename: str, newline: bytes = b"\n") -> int: with open(filename, "rb") as _file: bufgen = takewhile( lambda x: x, # predicate (_file.read(1024 * 1024) for _ in repeat(None)), # iterable ) return sum(buf.count(newline) for buf in bufgen if buf) if __name__ == "__main__": for filename in sys.argv[1:]: print(fileCountLines(filename), filename) # noqa: T201 pyglossary-5.0.9/pyglossary/flags.py000066400000000000000000000013351476751035500176420ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from typing import TypeAlias __all__ = [ "ALWAYS", "DEFAULT_NO", "DEFAULT_YES", "NEVER", "StrWithDesc", "YesNoAlwaysNever", "flagsByName", ] flagsByName = {} class StrWithDesc(str): desc: str __slots__ = ["desc"] def __new__(cls: type, name: str, desc: str) -> StrWithDesc: s: StrWithDesc = str.__new__(cls, name) s.desc = desc flagsByName[name] = s return s ALWAYS = StrWithDesc("always", "Always") DEFAULT_YES = StrWithDesc("default_yes", "Yes (by default)") DEFAULT_NO = StrWithDesc("default_no", "No (by default)") NEVER = StrWithDesc("never", "Never") # to satisfy mypy: YesNoAlwaysNever: TypeAlias = StrWithDesc pyglossary-5.0.9/pyglossary/glossary.py000066400000000000000000000137671476751035500204250ustar00rootroot00000000000000# -*- coding: utf-8 -*- # glossary.py # # Copyright © 2008-2022 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import warnings from os.path import relpath from time import perf_counter as now from typing import TYPE_CHECKING from pyglossary.plugin_handler import PluginHandler from .core import log from .glossary_v2 import ConvertArgs, Error, GlossaryCommon, ReadError, WriteError from .sort_keys import lookupSortKey if TYPE_CHECKING: from typing import Any from .glossary_types import EntryType from .plugin_handler import DetectedFormat from .ui_type import UIType __all__ = ["Glossary"] class Glossary(GlossaryCommon, PluginHandler): GLOSSARY_API_VERSION = "1.0" def __init__( self, info: dict[str, str] | None = None, ui: UIType | None = None, # noqa: F821 ) -> None: """ info: dict instance, or None no need to copy dict instance before passing here we will not reference to it. """ warnings.warn( "This class is deprecated. Use glossary_v2.Glossary", category=DeprecationWarning, stacklevel=2, ) GlossaryCommon.__init__(self, ui=ui) if info: if not isinstance(info, dict): raise TypeError( "Glossary: `info` has invalid type, dict or OrderedDict expected", ) for key, value in info.items(): self.setInfo(key, value) def titleElement( # noqa: ANN201 self, hf, # noqa: ANN001, type: ignore sample: str = "", ): # type: ignore return hf.element(self.titleTag(sample)) def read( self, filename: str, direct: bool = False, progressbar: bool = True, **kwargs, # noqa: ANN003 ) -> bool: """ Read from a given glossary file. Parameters ---------- filename (str): name/path of input file formatName or format (str): name of input format, or "" to detect from file extension direct (bool): enable direct mode progressbar (bool): enable progressbar. read-options can be passed as additional keyword arguments """ if type(filename) is not str: raise TypeError("filename must be str") # don't allow direct=False when there are readers # (read is called before with direct=True) if self._readers and not direct: raise ValueError( f"there are already {len(self._readers)} readers" ", you can not read with direct=False mode", ) self._setTmpDataDir(filename) self._progressbar = progressbar self._read( filename=filename, direct=direct, **kwargs, ) return True def addEntryObj(self, entry: EntryType) -> None: self._data.append(entry) @staticmethod def updateIter() -> None: log.warning("calling glos.updateIter() is no longer needed.") def sortWords( self, sortKeyName: str = "headword_lower", sortEncoding: str = "utf-8", writeOptions: dict[str, Any] | None = None, ) -> None: """sortKeyName: see doc/sort-key.md.""" if self._readers: raise NotImplementedError( "can not use sortWords in direct mode", ) if self._sqlite: raise NotImplementedError( "can not use sortWords in SQLite mode", ) namedSortKey = lookupSortKey(sortKeyName) if namedSortKey is None: log.critical(f"invalid {sortKeyName = }") return if not sortEncoding: sortEncoding = "utf-8" if writeOptions is None: writeOptions = {} t0 = now() self._data.setSortKey( namedSortKey=namedSortKey, sortEncoding=sortEncoding, writeOptions=writeOptions, ) self._data.sort() log.info(f"Sorting took {now() - t0:.1f} seconds") self._sort = True self._iter = self._loadedEntryGen() @classmethod def detectInputFormat( # type: ignore # pyright: ignore[reportIncompatibleMethodOverride] cls, *args, **kwargs, ) -> DetectedFormat | None: try: return PluginHandler.detectInputFormat(*args, **kwargs) except Error as e: log.critical(str(e)) return None @classmethod def detectOutputFormat( # type: ignore # pyright: ignore[reportIncompatibleMethodOverride] cls, *args, **kwargs, ) -> DetectedFormat | None: try: return PluginHandler.detectOutputFormat(*args, **kwargs) except Error as e: log.critical(str(e)) return None def convert( # noqa: PLR0913 self, inputFilename: str, inputFormat: str = "", direct: bool | None = None, progressbar: bool = True, outputFilename: str = "", outputFormat: str = "", sort: bool | None = None, sortKeyName: str | None = None, sortEncoding: str | None = None, readOptions: dict[str, Any] | None = None, writeOptions: dict[str, Any] | None = None, sqlite: bool | None = None, infoOverride: dict[str, str] | None = None, ) -> str | None: self.progressbar = progressbar try: return GlossaryCommon.convertV2( self, ConvertArgs( inputFilename=inputFilename, inputFormat=inputFormat, direct=direct, outputFilename=outputFilename, outputFormat=outputFormat, sort=sort, sortKeyName=sortKeyName, sortEncoding=sortEncoding, readOptions=readOptions, writeOptions=writeOptions, sqlite=sqlite, infoOverride=infoOverride, ), ) except ReadError as e: log.critical(str(e)) log.critical(f"Reading file {relpath(inputFilename)!r} failed.") except WriteError as e: log.critical(str(e)) log.critical(f"Writing file {relpath(outputFilename)!r} failed.") except Error as e: log.critical(str(e)) self.cleanup() return None pyglossary-5.0.9/pyglossary/glossary_info.py000066400000000000000000000126661476751035500214350ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2008-2022 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import logging from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Iterator from .info import ( c_author, c_name, c_publisher, c_sourceLang, c_targetLang, infoKeysAliasDict, ) from .langs import Lang, langDict from .text_utils import ( fixUtf8, ) __all__ = ["GlossaryInfo"] log = logging.getLogger("pyglossary") class GlossaryInfo: def __init__(self) -> None: self._info: dict[str, str] = {} def infoKeys(self) -> list[str]: return list(self._info) def iterInfo(self) -> Iterator[tuple[str, str]]: return iter(self._info.items()) def getInfo(self, key: str) -> str: if not isinstance(key, str): raise TypeError(f"invalid {key=}, must be str") return self._info.get( infoKeysAliasDict.get(key.lower(), key), "", ) def setInfo(self, key: str, value: str | None) -> None: if value is None: try: del self._info[key] except KeyError: pass return if not isinstance(key, str): raise TypeError(f"invalid {key=}, must be str") key = fixUtf8(key) value = fixUtf8(str(value)) key = infoKeysAliasDict.get(key.lower(), key) self._info[key] = value def getExtraInfos(self, excludeKeys: list[str]) -> dict[str, str]: """ excludeKeys: a list of (basic) info keys to be excluded returns a dict including the rest of info keys, with associated values. """ excludeKeySet = set() for key in excludeKeys: excludeKeySet.add(key) key2 = infoKeysAliasDict.get(key.lower()) if key2: excludeKeySet.add(key2) extra = {} for key, value in self._info.items(): if key in excludeKeySet: continue extra[key] = value return extra @property def author(self) -> str: for key in (c_author, c_publisher): value = self._info.get(key, "") if value: return value return "" @staticmethod def _getLangByStr(st: str) -> Lang | None: lang = langDict[st] if lang: return lang log.error(f"unknown language {st!r}") return None def _getLangByInfoKey(self, key: str) -> Lang | None: st = self._info.get(key, "") if not st: return None return self._getLangByStr(st) @property def sourceLang(self) -> Lang | None: return self._getLangByInfoKey(c_sourceLang) @sourceLang.setter def sourceLang(self, lang: Lang) -> None: if not isinstance(lang, Lang): raise TypeError(f"invalid {lang=}, must be a Lang object") self._info[c_sourceLang] = lang.name @property def targetLang(self) -> Lang | None: return self._getLangByInfoKey(c_targetLang) @targetLang.setter def targetLang(self, lang: Lang) -> None: if not isinstance(lang, Lang): raise TypeError(f"invalid {lang=}, must be a Lang object") self._info[c_targetLang] = lang.name @property def sourceLangName(self) -> str: lang = self.sourceLang if lang is None: return "" return lang.name @sourceLangName.setter def sourceLangName(self, langName: str) -> None: if not langName: self._info[c_sourceLang] = "" return lang = self._getLangByStr(langName) if lang is None: return self._info[c_sourceLang] = lang.name @property def targetLangName(self) -> str: lang = self.targetLang if lang is None: return "" return lang.name @targetLangName.setter def targetLangName(self, langName: str) -> None: if not langName: self._info[c_targetLang] = "" return lang = self._getLangByStr(langName) if lang is None: return self._info[c_targetLang] = lang.name def titleTag(self, sample: str) -> str: from .langs.writing_system import getWritingSystemFromText ws = getWritingSystemFromText(sample) if ws and ws.name != "Latin": return ws.titleTag sourceLang = self.sourceLang if sourceLang: return sourceLang.titleTag return "b" def detectLangsFromName(self) -> None: """Extract sourceLang and targetLang from glossary name/title.""" import re name = self._info.get(c_name) if not name: return if self._info.get(c_sourceLang): return langNames = [] def checkPart(part: str) -> None: for match in re.findall(r"\w\w\w*", part): # print(f"{match = }") lang = langDict[match] if lang is None: continue langNames.append(lang.name) for part in re.split("-| to ", name): # print(f"{part = }") checkPart(part) if len(langNames) >= 2: # noqa: PLR2004 break if len(langNames) < 2: # noqa: PLR2004 return if len(langNames) > 2: # noqa: PLR2004 log.info(f"detectLangsFromName: {langNames = }") log.info( f"Detected sourceLang={langNames[0]!r}, " f"targetLang={langNames[1]!r} " f"from glossary name {name!r}", ) self.sourceLangName = langNames[0] self.targetLangName = langNames[1] pyglossary-5.0.9/pyglossary/glossary_progress.py000066400000000000000000000043521476751035500223370ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING from .core import log if TYPE_CHECKING: from collections.abc import Iterable, Iterator from typing import Protocol from pyglossary.glossary_types import EntryType from .ui_type import UIType class ReaderType(Protocol): def __iter__(self) -> Iterator[EntryType]: ... def __len__(self) -> int: ... __all__ = ["GlossaryProgress"] class GlossaryProgress: def __init__( self, ui: UIType | None = None, # noqa: F821 ) -> None: self._ui = ui self._progressbar = True def clear(self) -> None: self._progressbar = True @property def progressbar(self) -> bool: return self._ui is not None and self._progressbar @progressbar.setter def progressbar(self, enabled: bool) -> None: self._progressbar = enabled def progressInit( self, *args, # noqa: ANN002 ) -> None: if self._ui and self._progressbar: self._ui.progressInit(*args) def progress(self, pos: int, total: int, unit: str = "entries") -> None: if total == 0: log.warning(f"{pos=}, {total=}") return if self._ui is None: return self._ui.progress( min(pos + 1, total) / total, f"{pos:,} / {total:,} {unit}", ) def progressEnd(self) -> None: if self._ui and self._progressbar: self._ui.progressEnd() def _byteProgressIter( self, iterable: Iterable[EntryType], ) -> Iterator[EntryType]: lastPos = 0 for entry in iterable: if entry is None: continue yield entry if (bp := entry.byteProgress()) and bp[0] > lastPos + 100_000: self.progress(bp[0], bp[1], unit="bytes") lastPos = bp[0] def _wordCountProgressIter( self, iterable: Iterable[EntryType], wordCount: int, ) -> Iterator[EntryType]: wordCountThreshold = max( 1, min( 500, wordCount // 200, ), ) for index, entry in enumerate(iterable): yield entry if index % wordCountThreshold == 0: self.progress(index, wordCount) def _progressIter(self, reader: ReaderType) -> Iterable[EntryType]: if not self.progressbar: return reader if getattr(reader, "useByteProgress", False): return self._byteProgressIter(reader) if (wordCount := len(reader)) > 0: return self._wordCountProgressIter(reader, wordCount) return self._byteProgressIter(reader) pyglossary-5.0.9/pyglossary/glossary_types.py000066400000000000000000000110401476751035500216270ustar00rootroot00000000000000from __future__ import annotations import typing from collections.abc import ( Callable, Iterator, Sequence, ) # -*- coding: utf-8 -*- from typing import ( TYPE_CHECKING, Any, ) if TYPE_CHECKING: from typing import TypeAlias from .langs import Lang from .sort_keys import NamedSortKey __all__ = [ "Callable", "EntryListType", "EntryType", "RawEntryType", "ReaderGlossaryType", "WriterGlossaryType", ] MultiStr: TypeAlias = "str | list[str]" # str(rawEntry[0]): defiFormat or "" # rawEntry[1]: b_defi # rawEntry[2:]: b_word_list RawEntryType: TypeAlias = Sequence[bytes] class EntryType(typing.Protocol): # noqa: PLR0904 # def __init__(self) -> None: ... def isData(self) -> bool: ... def getFileName(self) -> str: ... @property def data(self) -> bytes: ... def size(self) -> int: ... def save(self, directory: str) -> str: ... @property def s_word(self) -> str: ... @property def l_word(self) -> list[str]: ... @property def lb_word(self) -> list[bytes]: ... @property def defi(self) -> str: ... @property def b_word(self) -> bytes: ... @property def b_defi(self) -> bytes: ... @property def defiFormat(self) -> str: # TODO: type: Literal["m", "h", "x", "b"] ... @defiFormat.setter def defiFormat(self, defiFormat: str) -> None: # TODO: type: Literal["m", "h", "x", "b"] ... def detectDefiFormat(self, default: str = "") -> str: ... def addAlt(self, alt: str) -> None: ... def editFuncWord(self, func: Callable[[str], str]) -> None: ... def editFuncDefi(self, func: Callable[[str], str]) -> None: ... def strip(self) -> None: ... def replaceInWord(self, source: str, target: str) -> None: ... def replaceInDefi(self, source: str, target: str) -> None: ... def replace(self, source: str, target: str) -> None: ... def byteProgress(self) -> tuple[int, int] | None: ... def removeEmptyAndDuplicateAltWords(self) -> None: ... def stripFullHtml(self) -> str | None: ... class EntryListType(typing.Protocol): def __init__( self, entryToRaw: Callable[[EntryType], RawEntryType], entryFromRaw: Callable[[RawEntryType], EntryType], ) -> None: ... def append(self, entry: EntryType) -> None: ... def clear(self) -> None: ... def __len__(self) -> int: ... def __iter__(self) -> Iterator[EntryType]: ... def hasSortKey(self) -> bool: ... def setSortKey( self, namedSortKey: NamedSortKey, sortEncoding: str | None, writeOptions: dict[str, Any], ) -> None: ... def sort(self) -> None: ... def close(self) -> None: ... class GlossaryInfoCommonType(typing.Protocol): def getInfo(self, key: str) -> str: ... def setInfo(self, key: str, value: str) -> None: ... @property def sourceLang(self) -> Lang | None: ... @property def targetLang(self) -> Lang | None: ... @property def sourceLangName(self) -> str: ... @sourceLangName.setter def sourceLangName(self, langName: str) -> None: ... @property def targetLangName(self) -> str: ... @targetLangName.setter def targetLangName(self, langName: str) -> None: ... @property def author(self) -> str: ... class ReaderGlossaryType(GlossaryInfoCommonType): def newEntry( self, word: MultiStr, defi: str, defiFormat: str = "", byteProgress: tuple[int, int] | None = None, ) -> EntryType: ... def newDataEntry(self, fname: str, data: bytes) -> EntryType: ... @property def progressbar(self) -> bool: ... def setDefaultDefiFormat(self, defiFormat: str) -> None: ... def titleTag(self, sample: str) -> str: ... @property def alts(self) -> bool: ... def getConfig(self, name: str, default: str | None) -> str | None: ... class WriterGlossaryType(GlossaryInfoCommonType): # def __len__(self) -> int: ... # @property # def filename(self) -> str: ... def __iter__(self) -> Iterator[EntryType]: ... def collectDefiFormat( self, maxCount: int, ) -> dict[str, float] | None: ... def iterInfo(self) -> Iterator[tuple[str, str]]: ... def getExtraInfos(self, excludeKeys: list[str]) -> dict[str, str]: ... def wordTitleStr( self, word: str, sample: str = "", class_: str = "", ) -> str: ... @property def tmpDataDir(self) -> str: ... def addCleanupPath(self, path: str) -> None: ... @property def readOptions(self) -> dict | None: ... @property def sqlite(self) -> bool: ... def stripFullHtml( self, errorHandler: Callable[[EntryType, str], None] | None = None, ) -> None: ... def preventDuplicateWords(self) -> None: ... def mergeEntriesWithSameHeadwordPlaintext(self) -> None: ... def removeHtmlTagsAll(self) -> None: ... def getConfig(self, name: str, default: str | None) -> str | None: ... pyglossary-5.0.9/pyglossary/glossary_utils.py000066400000000000000000000034311476751035500216300ustar00rootroot00000000000000# -*- coding: utf-8 -*- # glossary_utils.py # # Copyright © 2008-2022 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import logging from os.path import ( splitext, ) from .compression import ( stdCompressions, ) __all__ = ["Error", "ReadError", "WriteError", "splitFilenameExt"] log = logging.getLogger("pyglossary") MAX_EXT_LEN = 4 # FIXME class Error(Exception): pass class ReadError(Error): pass class WriteError(Error): pass def splitFilenameExt( filename: str = "", ) -> tuple[str, str, str, str]: """Return (filenameNoExt, filename, ext, compression).""" compression = "" filenameNoExt, ext = splitext(filename) ext = ext.lower() if not ext and len(filenameNoExt) <= MAX_EXT_LEN: filenameNoExt, ext = "", filenameNoExt if not ext: return filename, filename, "", "" if ext[1:] in {*stdCompressions, "zip", "dz"}: compression = ext[1:] filename = filenameNoExt filenameNoExt, ext = splitext(filename) ext = ext.lower() return filenameNoExt, filename, ext, compression pyglossary-5.0.9/pyglossary/glossary_v2.py000066400000000000000000001002231476751035500210140ustar00rootroot00000000000000# -*- coding: utf-8 -*- # glossary.py # # Copyright © 2008-2022 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import os import os.path import warnings from contextlib import suppress from dataclasses import dataclass from os.path import ( isdir, isfile, join, relpath, ) from time import perf_counter as now from typing import TYPE_CHECKING, cast from uuid import uuid1 from pyglossary.queued_iter import QueuedIterator from . import core from .core import ( cacheDir, log, ) from .entry import DataEntry, Entry from .entry_filters import ( EntryFilterType, PreventDuplicateWords, RemoveHtmlTagsAll, ShowMaxMemoryUsage, StripFullHtml, entryFiltersRules, ) from .entry_list import EntryList from .flags import ( ALWAYS, DEFAULT_YES, NEVER, ) from .glossary_info import GlossaryInfo from .glossary_progress import GlossaryProgress from .glossary_utils import Error, ReadError, WriteError, splitFilenameExt from .info import c_name from .os_utils import rmtree, showMemoryUsage from .plugin_handler import PluginHandler from .sort_keys import defaultSortKeyName, lookupSortKey from .sq_entry_list import SqEntryList if TYPE_CHECKING: from collections.abc import Callable, Iterable, Iterator from typing import ( Any, ) from .entry_base import MultiStr from .glossary_types import ( EntryListType, EntryType, RawEntryType, ) from .plugin_prop import PluginProp from .sort_keys import NamedSortKey from .ui_type import UIType __all__ = [ "ConvertArgs", "Error", "Glossary", "GlossaryCommon", "ReadError", "WriteError", ] # SortKeyType = Callable[ # [[list[str]], # Any, # ] @dataclass(slots=True, frozen=True) class ConvertArgs: inputFilename: str inputFormat: str = "" direct: bool | None = None outputFilename: str = "" outputFormat: str = "" sort: bool | None = None sortKeyName: str | None = None sortEncoding: str | None = None readOptions: dict[str, Any] | None = None writeOptions: dict[str, Any] | None = None sqlite: bool | None = None infoOverride: dict[str, str] | None = None class GlossaryCommon(GlossaryInfo, GlossaryProgress): # noqa: PLR0904 """ The signature of 'convert' method is different in glossary_v2.py See help(Glossary.convert). addEntryObj is renamed to addEntry in glossary_v2.py These methods do not exist in glossary_v2.py (but still exist in glossary.py) - read(): you can use directRead() then iterate over glossary - sortWords(): you have to sort entries yourself (when adding or after directRead) - updateIter(): no longer needed, and does't do anything in glossary.py """ def _closeReaders(self) -> None: for reader in self._readers: try: reader.close() except Exception: # noqa: PERF203 log.exception("") def initVars(self) -> None: GlossaryProgress.clear(self) self._info = {} readers = getattr(self, "_readers", []) for reader in readers: try: reader.close() except Exception: # noqa: PERF203 log.exception("") self._readers: list[Any] = [] self._defiHasWordTitle = False self._iter: Iterator[EntryType] | None = None self._entryFilters: list[EntryFilterType] = [] self._entryFiltersExtra: list[EntryFilterType] = [] self._entryFiltersName: set[str] = set() self._sort = False self._filename = "" self._defaultDefiFormat = "m" self._tmpDataDir = "" self._entryFiltersAreSet = False def clear(self) -> None: self.initVars() self._data.clear() def _newInMemorySqEntryList(self) -> SqEntryList: return SqEntryList( entryToRaw=self._entryToRaw, entryFromRaw=self._entryFromRaw, database="file::memory:", # or "file::memory:?cache=shared" create=True, ) def __init__( self, info: dict[str, str] | None = None, ui: UIType | None = None, # noqa: F821 ) -> None: """ info: dict instance, or None no need to copy dict instance before passing here we will not reference to it. """ GlossaryInfo.__init__(self) GlossaryProgress.__init__(self, ui=ui) self._config: dict[str, Any] = {} self._data: EntryListType = EntryList( entryToRaw=self._entryToRaw, entryFromRaw=self._entryFromRaw, ) self._sqlite = False self._cleanupPathList: set[str] = set() self._readOptions: dict[str, Any] | None = None self.initVars() if info: if not isinstance(info, dict): raise TypeError( "Glossary: `info` has invalid type, dict or OrderedDict expected", ) warnings.warn( "info= argument is deprecated. Use glos.setInfo(key, value)", category=DeprecationWarning, stacklevel=2, ) for key, value in info.items(): self.setInfo(key, value) def addCleanupPath(self, path: str) -> None: self._cleanupPathList.add(path) def cleanup(self) -> None: self._closeReaders() if not self._cleanupPathList: return if not self._config.get("cleanup", True): log.info("Not cleaning up files:") log.info("\n".join(self._cleanupPathList)) return self._data.close() for cleanupPath in self._cleanupPathList: if isfile(cleanupPath): log.debug(f"Removing file {cleanupPath}") try: os.remove(cleanupPath) except Exception: log.exception(f"error removing {cleanupPath}") elif isdir(cleanupPath): log.debug(f"Removing directory {cleanupPath}") rmtree(cleanupPath) else: log.error(f"no such file or directory: {cleanupPath}") self._cleanupPathList = set() def _dataEntryToRaw(self, entry: DataEntry) -> RawEntryType: b_fpath = b"" if self.tmpDataDir: b_fpath = entry.save(self.tmpDataDir).encode("utf-8") return (b"b", b_fpath, entry.getFileName().encode("utf-8")) def _entryToRaw(self, entry: EntryType) -> RawEntryType: """ Return a tuple (word, defi) or (word, defi, defiFormat) where both word and defi might be string or list of strings. """ if entry.isData(): return self._dataEntryToRaw(cast("DataEntry", entry)) defiFormat = entry.defiFormat if defiFormat is None or defiFormat == self._defaultDefiFormat: defiFormat = "" return [defiFormat.encode("ascii"), entry.b_defi] + entry.lb_word def _entryFromRaw(self, rawEntry: RawEntryType) -> EntryType: defiFormat = rawEntry[0].decode("ascii") or self._defaultDefiFormat defi = rawEntry[1].decode("utf-8") if defiFormat == "b": fname = rawEntry[2].decode("utf-8") if isinstance(fname, list): fname = fname[0] # NESTED 4 return DataEntry(fname, tmpPath=defi) return Entry( [b.decode("utf-8") for b in rawEntry[2:]], defi, defiFormat=defiFormat, ) @property def rawEntryCompress(self) -> bool: warnings.warn( "rawEntryCompress is not supported anymore, this propery returns False", stacklevel=2, ) return False def setRawEntryCompress(self, _enable: bool) -> None: # noqa: PLR6301 warnings.warn( "rawEntryCompress is not supported anymore, this method does nothing", stacklevel=2, ) def updateEntryFilters(self) -> None: entryFilters = [] config = self._config glosArg = self for configParam, default, filterClass in entryFiltersRules: args = [] value = default if configParam is None else config.get(configParam, default) if not value: continue if not isinstance(default, bool): args = [value] entryFilters.append(filterClass(glosArg, *tuple(args))) if log.level <= core.TRACE: try: import psutil # noqa: F401 except ModuleNotFoundError: pass else: entryFilters.append(ShowMaxMemoryUsage(glosArg)) self._entryFilters = entryFilters self._entryFiltersName = {entryFilter.name for entryFilter in entryFilters} self._entryFiltersAreSet = True def prepareEntryFilters(self) -> None: """ Call .prepare() method on all _entryFilters run this after glossary info is set and ready for most entry filters, it won't do anything. """ for entryFilter in self._entryFilters: entryFilter.prepare() def _addExtraEntryFilter(self, cls: type[EntryFilterType]) -> None: if cls.name in self._entryFiltersName: return self._entryFilters.append(cls(self)) self._entryFiltersExtra.append(cls(self)) self._entryFiltersName.add(cls.name) def removeHtmlTagsAll(self) -> None: """ Remove all HTML tags from definition. This should only be called from a plugin's Writer.__init__ method. Does not apply on entries added with glos.addEntry """ self._addExtraEntryFilter(RemoveHtmlTagsAll) def stripFullHtml( self, errorHandler: Callable[[EntryType, str], None] | None = None, ) -> None: """ Add entry filter "strip_full_html" to replace a full HTML document with it's body in entry definition. """ name = StripFullHtml.name if name in self._entryFiltersName: return self._entryFilters.append( StripFullHtml( # pyright: ignore[reportArgumentType] self, errorHandler=errorHandler, ), ) self._entryFiltersName.add(name) def preventDuplicateWords(self) -> None: """ Add entry filter to prevent duplicate `entry.s_word`. This should only be called from a plugin's Writer.__init__ method. Does not apply on entries added with glos.addEntry Note: there may be still duplicate headwords or alternate words but we only care about making the whole `entry.s_word` (aka entry key) unique """ self._addExtraEntryFilter(PreventDuplicateWords) # def mergeEntriesWithSameHeadwordHTML(self): # """ # Merge consequtive entries that have the same word list. # Currently this convert all non-html entries to html. # Should be only called in writer.open. # """ # from pyglossary.entry_merge import mergeHtmlEntriesWithSameHeadword # self._iter = mergeHtmlEntriesWithSameHeadword(self._iter) def mergeEntriesWithSameHeadwordPlaintext(self) -> None: """ Merge consequtive entries that have the same word list. Currently this assume all entries are plaintext Should be only called in writer.open. """ from pyglossary.entry_merge import mergePlaintextEntriesWithSameHeadword assert self._iter self._iter = mergePlaintextEntriesWithSameHeadword(self._iter) def __str__(self) -> str: return ( "Glossary{" f"filename: {self._filename!r}" f", name: {self._info.get('name')!r}" "}" ) def _loadedEntryGen(self) -> Iterator[EntryType]: if not self.progressbar: yield from self._data return iterable = self._progressIter(self._data) filters = self._entryFiltersExtra if not filters: self.progressInit("Writing") yield from iterable self.progressEnd() return self.progressInit("Writing") for _entry in iterable: entry = _entry for f in filters: entry = f.run(entry) # type: ignore # pyright: ignore[reportArgumentType] # assert entry # TODO: measure running time in non-optimized mode yield entry # pyright: ignore[reportReturnType] self.progressEnd() def _readersEntryGen(self) -> Iterator[EntryType]: for reader in self._readers: self.progressInit("Converting") iterator = self._progressIter(reader) iterator = self._applyEntryFiltersGen(iterator) # turn iterator into background-queued, like buffered channel in Go queueSize = os.getenv("PYGLOSSARY_ASYNC_ITER_SIZE") if queueSize: iterator = QueuedIterator(iterator, int(queueSize)) try: yield from iterator finally: reader.close() self.progressEnd() # This iterator/generator does not give None entries. # And Entry is not falsable, so bool(entry) is always True. # Since ProgressBar is already handled with an EntryFilter, there is # no point of returning None entries anymore. def _applyEntryFiltersGen( self, iterable: Iterable[EntryType], ) -> Iterator[EntryType]: entry: EntryType | None for entry in iterable: if entry is None: continue for entryFilter in self._entryFilters: entry = entryFilter.run(entry) # noqa: PLW2901 if entry is None: break else: yield entry def __iter__(self) -> Iterator[EntryType]: if self._iter is not None: return self._iter if not self._readers: return self._loadedEntryGen() log.error("Glossary: iterator is not set in direct mode") return iter([]) # TODO: switch to @property defaultDefiFormat def setDefaultDefiFormat(self, defiFormat: str) -> None: """ DefiFormat must be empty or one of these: "m": plain text "h": html "x": xdxf. """ self._defaultDefiFormat = defiFormat def getDefaultDefiFormat(self) -> str: return self._defaultDefiFormat def collectDefiFormat( self, maxCount: int, ) -> dict[str, float] | None: """ Collect definition format. Example return value: [("h", 0.91), ("m", 0.09)]. """ from collections import Counter readers = self._readers if readers: log.error("collectDefiFormat: not supported in direct mode") return None counter: dict[str, int] = Counter() count = 0 for entry in self: if entry.isData(): continue entry.detectDefiFormat() counter[entry.defiFormat] += 1 count += 1 if count >= maxCount: break result = { defiFormat: itemCount / count for defiFormat, itemCount in counter.items() } for defiFormat in ("h", "m", "x"): if defiFormat not in result: result[defiFormat] = 0 self._iter = self._loadedEntryGen() return result def __len__(self) -> int: return len(self._data) + sum(len(reader) for reader in self._readers) @property def config(self) -> dict[str, Any]: raise NotImplementedError @config.setter def config(self, config: dict[str, Any]) -> None: if self._config: log.error("glos.config is set more than once") return self._config = config @property def alts(self) -> bool: return self._config.get("enable_alts", True) @property def filename(self) -> str: return self._filename @property def tmpDataDir(self) -> str: if not self._tmpDataDir: self._setTmpDataDir(self._filename) return self._tmpDataDir @property def readOptions(self) -> dict | None: return self._readOptions @property def sqlite(self) -> bool: return self._sqlite def wordTitleStr( self, word: str, sample: str = "", class_: str = "", ) -> str: """ Return title tag for words. Notes ----- - `word` needs to be escaped before passing - `word` can contain html code (multiple words, colors, etc) - if input format (reader) indicates that words are already included in definition (as title), this method will return empty string - depending on glossary's `sourceLang` or writing system of `word`, (or sample if given) either '' or '' will be used. """ if self._defiHasWordTitle: return "" if not word: return "" if not sample: sample = word tag = self.titleTag(sample) if class_: return f'<{tag} class="{class_}">{word}
    ' return f"<{tag}>{word}
    " def getConfig(self, name: str, default: str | None) -> str | None: return self._config.get(name, default) def addEntry(self, entry: EntryType) -> None: self._data.append(entry) def newEntry( self, word: MultiStr, defi: str, defiFormat: str = "", byteProgress: tuple[int, int] | None = None, ) -> Entry: """ Create and return a new entry object. defiFormat must be empty or one of these: "m": plain text "h": html "x": xdxf """ if not defiFormat: defiFormat = self._defaultDefiFormat return Entry( word, defi, defiFormat=defiFormat, byteProgress=byteProgress, ) def newDataEntry(self, fname: str, data: bytes) -> EntryType: if self._readers: return DataEntry(fname, data) # pyright: ignore[reportReturnType] if self._tmpDataDir: return DataEntry( # pyright: ignore[reportReturnType] fname, data, tmpPath=join(self._tmpDataDir, fname.replace("/", "_")), ) tmpDir = join(cacheDir, "tmp") os.makedirs(tmpDir, mode=0o700, exist_ok=True) self._cleanupPathList.add(tmpDir) return DataEntry( # pyright: ignore[reportReturnType] fname, data, tmpPath=join(tmpDir, uuid1().hex), ) # ________________________________________________________________________# # def _hasWriteAccessToDir(self, dirPath: str) -> None: # if isdir(dirPath): # return os.access(dirPath, os.W_OK) # return os.access(os.path.dirname(dirPath), os.W_OK) # TODO: add ReaderType with Protocol def _createReader( self, formatName: str, options: dict[str, Any], ) -> Any: # noqa: ANN401 readerClass = PluginHandler.plugins[formatName].readerClass if readerClass is None: raise ReadError("_createReader: readerClass is None") reader = readerClass(self) for name, value in options.items(): setattr(reader, f"_{name}", value) return reader def _setTmpDataDir(self, filename: str) -> None: # good thing about cacheDir is that we don't have to clean it up after # conversion is finished. # specially since dataEntry.save(...) will move the file from cacheDir # to the new directory (associated with output glossary path) # And we don't have to check for write access to cacheDir because it's # inside user's home dir. But input glossary might be in a directory # that we don't have write access to. # still maybe add a config key to decide if we should always use cacheDir # if self._hasWriteAccessToDir(f"{filename}_res", os.W_OK): # self._tmpDataDir = f"{filename}_res" # else: if not filename: filename = uuid1().hex self._tmpDataDir = join(cacheDir, os.path.basename(filename) + "_res") log.debug(f"tmpDataDir = {self._tmpDataDir}") os.makedirs(self._tmpDataDir, mode=0o700, exist_ok=True) self._cleanupPathList.add(self._tmpDataDir) @staticmethod def _validateReadoptions( formatName: str, options: dict[str, Any], ) -> None: validOptionKeys = set(PluginHandler.formatsReadOptions[formatName]) for key in list(options): if key not in validOptionKeys: log.error( f"Invalid read option {key!r} given for {formatName} format", ) del options[key] def _openReader(self, reader: Any, filename: str) -> None: # noqa: ANN401 # reader.open returns "Iterator[tuple[int, int]] | None" progressbar: bool = self.progressbar try: openResult = reader.open(filename) if openResult is not None: self.progressInit("Reading metadata") lastPos = -100_000 for pos, total in openResult: if progressbar and pos - lastPos > 100_000: # noqa: PLR2004 self.progress(pos, total, unit="bytes") lastPos = pos self.progressEnd() except (FileNotFoundError, LookupError) as e: raise ReadError(str(e)) from e hasTitleStr = self._info.get("definition_has_headwords", "") if hasTitleStr: if hasTitleStr.lower() == "true": self._defiHasWordTitle = True else: log.error(f"bad info value: definition_has_headwords={hasTitleStr!r}") def directRead( self, filename: str, **options, # noqa: ANN003 ) -> bool: self._read( filename=filename, direct=True, **options, ) return True # these keyword arguments are also used by `directRead` # so renaming them would be a breaking change def _read( self, filename: str, format: str | None = None, # to be removed in 6.0.0 # noqa: A002 formatName: str = "", direct: bool = False, **options, # noqa: ANN003 ) -> None: if format: warnings.warn( "format= argument is deprecated and will be removed in 6.0.0" f". Use formatName={format}", category=DeprecationWarning, stacklevel=3, ) formatName = formatName or format del format filenameAbs = os.path.abspath(filename) self._setTmpDataDir(filename) filenameUC, formatName, compression = PluginHandler.detectInputFormat( filenameAbs, formatName=formatName ) # filenameUC is the uncompressed file's absolute path if compression: from .compression import uncompress uncompress(filenameAbs, filenameUC, compression) self._validateReadoptions(formatName, options) filenameBase, ext = os.path.splitext(filenameUC) if ext.lower() not in PluginHandler.plugins[formatName].extensions: filenameBase = filenameUC self._filename = filenameBase if not self._info.get(c_name): self._info[c_name] = os.path.split(filenameUC)[1] if not self._entryFiltersAreSet: self.updateEntryFilters() reader = self._createReader(formatName, options) self._openReader(reader, filenameUC) self._readOptions = options self.prepareEntryFilters() if not direct: self.loadReader(reader) self._iter = self._loadedEntryGen() return self._readers.append(reader) self._iter = self._readersEntryGen() def loadReader(self, reader: Any) -> None: # noqa: ANN401 """ Iterate over `reader` object and loads the whole data into self._data must call `reader.open(filename)` before calling this function. """ showMemoryUsage() self.progressInit("Reading") iterator = self._progressIter(reader) iterator = self._applyEntryFiltersGen(iterator) try: for entry in iterator: self.addEntry(entry) finally: reader.close() self.progressEnd() core.trace(log, f"Loaded {len(self._data)} entries") showMemoryUsage() # TODO: add WriterType with Protocol def _createWriter( self, formatName: str, options: dict[str, Any], ) -> Any: # noqa: ANN401 validOptions = PluginHandler.formatsWriteOptions.get(formatName) if validOptions is None: raise WriteError(f"No write support for {formatName!r} format") validOptionKeys = list(validOptions) for key in list(options): if key not in validOptionKeys: log.error( f"Invalid write option {key!r} given for {formatName} format", ) del options[key] writerClass = PluginHandler.plugins[formatName].writerClass if writerClass is None: raise WriteError("_createWriter: writerClass is None") writer = writerClass(self) for name, value in options.items(): setattr(writer, f"_{name}", value) return writer def write( self, filename: str, format: str | None = None, # to be removed in 6.0.0 # noqa: A002 formatName: str = "", **kwargs, # noqa: ANN003 ) -> str: """ Write to a given glossary file, with given formatName (optional). Return absolute path of output file. Raises Error exception if failed. Parameters ---------- filename (str): file name or path to write. formatName (str): format name You can pass write-options (of given format) as keyword arguments """ if type(filename) is not str: raise TypeError("filename must be str") if format is not None: warnings.warn( "format= argument is deprecated and will be removed in 6.0.0" f". Use formatName={format}", category=DeprecationWarning, stacklevel=2, ) formatName = formatName or format del format if formatName is not None and type(formatName) is not str: raise TypeError("formatName must be str") return self._write( filename=filename, formatName=formatName, **kwargs, ) def _writeEntries( self, writerList: list[Any], filename: str, options: dict[str, Any], ) -> None: writer = writerList[0] genList = [] gen = writer.write() if gen is None: log.error(f"{format} write function is not a generator") else: genList.append(gen) if self._config.get("save_info_json", False): from pyglossary.info_writer import InfoWriter infoWriter = InfoWriter(self) # pyright: ignore infoWriter.setWriteOptions(options) filenameNoExt, _, _, _ = splitFilenameExt(filename) infoWriter.open(f"{filenameNoExt}.info") genList.append(infoWriter.write()) writerList.append(infoWriter) for gen in genList: gen.send(None) for entry in self: for gen in genList: gen.send(entry) # suppress() on the whole for-loop does not work for gen in genList: with suppress(StopIteration): gen.send(None) @staticmethod def _openWriter( writer: Any, # noqa: ANN401 filename: str, ) -> None: try: writer.open(filename) except (FileNotFoundError, LookupError) as e: raise WriteError(str(e)) from e def _write( self, filename: str, formatName: str, sort: bool = False, **options, # noqa: ANN003 ) -> str: filename = os.path.abspath(filename) if formatName not in PluginHandler.plugins: raise WriteError(f"No plugin {formatName!r} was found") if not PluginHandler.plugins[formatName].canWrite: raise WriteError(f"No Writer class found for plugin {formatName}") if self._readers and sort: log.warning( "Full sort enabled, falling back to indirect mode", ) for reader in self._readers: self.loadReader(reader) self._readers = [] log.info(f"Writing to {formatName} file {filename!r}") writer = self._createWriter(formatName, options) self._sort = sort if sort: self._data.sort() if self._readers: self._iter = self._readersEntryGen() else: self._iter = self._loadedEntryGen() self._openWriter(writer, filename) showMemoryUsage() writerList = [writer] try: self._writeEntries(writerList, filename, options) except FileNotFoundError as e: # catching LookupError also catches IndexError: # issubclass(IndexError, LookupError) raise WriteError(str(e)) from e finally: showMemoryUsage() log.debug("Running writer.finish()") for writer in writerList: writer.finish() self.clear() showMemoryUsage() return filename @staticmethod def _compressOutput(filename: str, compression: str) -> str: from .compression import compress return compress(filename, compression) def _switchToSQLite( self, inputFilename: str, ) -> None: sq_fpath = join(cacheDir, f"{os.path.basename(inputFilename)}.db") if isfile(sq_fpath): log.info(f"Removing and re-creating {sq_fpath!r}") os.remove(sq_fpath) self._data = SqEntryList( # pyright: ignore[reportAttributeAccessIssue] entryToRaw=self._entryToRaw, entryFromRaw=self._entryFromRaw, database=sq_fpath, create=True, ) self._cleanupPathList.add(sq_fpath) if not self.alts: log.warning( "SQLite mode only works with enable_alts=True, force-enabling it.", ) self._config["enable_alts"] = True self._sqlite = True @staticmethod def _checkSortFlag( plugin: PluginProp, sort: bool | None, ) -> bool: sortOnWrite = plugin.sortOnWrite if sortOnWrite == ALWAYS: if sort is False: log.warning( f"Writing {plugin.name} requires sorting" ", ignoring user sort=False option", ) return True if sortOnWrite == NEVER: if sort: log.warning( "Plugin prevents sorting before write" ", ignoring user sort=True option", ) return False if sortOnWrite == DEFAULT_YES: return sort or sort is None # if sortOnWrite == DEFAULT_NO: return bool(sort) def _resolveSortParams( self, args: ConvertArgs, plugin: PluginProp, ) -> tuple[bool, bool]: """ sortKeyName: see doc/sort-key.md. returns (sort, direct) """ if args.direct and args.sqlite: raise ValueError( f"Conflictng arguments: direct={args.direct}, sqlite={args.sqlite}", ) sort = self._checkSortFlag(plugin, args.sort) if not sort: if args.direct is None: return True, False return args.direct, False # from this point we can assume sort == True and direct == False sqlite = args.sqlite if sqlite is None: sqlite = self._config.get("auto_sqlite", True) if sqlite: log.info( f"Automatically switching to SQLite mode for writing {plugin.name}", ) sortKeyTuple = self._checkSortKey( plugin, args.sortKeyName, args.sortEncoding, ) namedSortKey, sortEncoding = sortKeyTuple if sqlite: self._switchToSQLite( inputFilename=args.inputFilename, ) elif not os.getenv("NO_SQLITE"): self._data = self._newInMemorySqEntryList() # pyright: ignore self._data.setSortKey( namedSortKey=namedSortKey, sortEncoding=sortEncoding, writeOptions=args.writeOptions or {}, ) return False, True @staticmethod def _checkSortKey( plugin: PluginProp, sortKeyName: str | None, sortEncoding: str | None, ) -> tuple[NamedSortKey, str]: """ Check sortKeyName, sortEncoding and (output) plugin's params returns (namedSortKey, sortEncoding). """ writerSortKeyName = plugin.sortKeyName writerSortEncoding = getattr(plugin, "sortEncoding", None) if plugin.sortOnWrite == ALWAYS: if not writerSortKeyName: raise Error("No sortKeyName was found in plugin") if sortKeyName and sortKeyName != writerSortKeyName: log.warning( f"Ignoring user-defined sort order {sortKeyName!r}" f", and using sortKey function from {plugin.name} plugin", ) sortKeyName = writerSortKeyName if writerSortEncoding: sortEncoding = writerSortEncoding elif not sortKeyName: sortKeyName = writerSortKeyName or defaultSortKeyName namedSortKey = lookupSortKey(sortKeyName) if namedSortKey is None: raise Error(f"invalid {sortKeyName = }") log.info(f"Using sortKeyName = {namedSortKey.name!r}") if not sortEncoding: sortEncoding = "utf-8" return namedSortKey, sortEncoding @staticmethod def _convertValidateArgs(args: ConvertArgs) -> None: if type(args.inputFilename) is not str: raise TypeError("inputFilename must be str") if type(args.outputFilename) is not str: raise TypeError("outputFilename must be str") if args.inputFormat is not None and type(args.inputFormat) is not str: raise TypeError("inputFormat must be str") if args.outputFormat is not None and type(args.outputFormat) is not str: raise TypeError("outputFormat must be str") if args.outputFilename == args.inputFilename: raise Error("Input and output files are the same") def _convertPrepare( self, args: ConvertArgs, outputFilename: str = "", outputFormat: str = "", ) -> bool: if isdir(outputFilename) and os.listdir(outputFilename): raise Error( f"Directory already exists and not empty: {relpath(outputFilename)}", ) direct, sort = self._resolveSortParams( args=args, plugin=PluginHandler.plugins[outputFormat], ) showMemoryUsage() readOptions = args.readOptions or {} self._read( args.inputFilename, formatName=args.inputFormat, direct=direct, **readOptions, ) self.detectLangsFromName() return sort def convertV2(self, args: ConvertArgs) -> str: """ Return absolute path of output file. Raises Error exception if failed. sortKeyName: name of sort key/algorithm defaults to `defaultSortKeyName` in sort_keys.py see doc/sort-key.md or sort_keys.py for other possible values This can also include sort locale after a colon sign, for example: sortKeyName=":fa_IR.UTF-8" sortKeyName="headword:fa_IR.UTF-8" sortEncoding: encoding/charset for sorting, default to utf-8 """ self._convertValidateArgs(args) tm0 = now() outputFilename, outputFormat, compression = PluginHandler.detectOutputFormat( filename=args.outputFilename, formatName=args.outputFormat, inputFilename=args.inputFilename, ) sort = self._convertPrepare( args=args, outputFilename=outputFilename, outputFormat=outputFormat, ) if args.infoOverride: for key, value in args.infoOverride.items(): self.setInfo(key, value) if compression and not PluginHandler.plugins[outputFormat].singleFile: os.makedirs(outputFilename, mode=0o700, exist_ok=True) writeOptions = args.writeOptions or {} finalOutputFile = self._write( outputFilename, formatName=outputFormat, sort=sort, **writeOptions, ) if compression: finalOutputFile = self._compressOutput(finalOutputFile, compression) log.info(f"Writing file {finalOutputFile!r} done.") log.info(f"Running time of convert: {now() - tm0:.1f} seconds") showMemoryUsage() self.cleanup() return finalOutputFile # ________________________________________________________________________# class Glossary(GlossaryCommon, PluginHandler): """ init method is inherited from PluginHandler arguments: usePluginsJson: bool = True skipDisabledPlugins: bool = True. init() must be called only once, so make sure you put it in the right place. Probably in the top of your program's main function or module. """ GLOSSARY_API_VERSION = "2.0" def convert(self, args: ConvertArgs) -> str | None: return self.convertV2(args) pyglossary-5.0.9/pyglossary/gregorian.py000066400000000000000000000047131476751035500205260ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2008-2019 Saeed Rasooli # Copyright © 2007 Mehdi Bayazee # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # Also available in /usr/share/common-licenses/GPL on Debian systems # or /usr/share/licenses/common/GPL3/license.txt on ArchLinux # Gregorian calendar: # http://en.wikipedia.org/wiki/Gregorian_calendar from __future__ import annotations from datetime import datetime __all__ = ["isLeap", "jd_to", "to_jd"] name = "gregorian" desc = "Gregorian" epoch = 1721426 options = () def save() -> None: pass def isLeap(y: int) -> bool: return y % 4 == 0 and not (y % 100 == 0 and y % 400 != 0) def to_jd(year: int, month: int, day: int) -> int: if 0 < year < 10000: # > 1.5x faster # noqa: PLR2004 return datetime(year, month, day).toordinal() + 1721425 if month <= 2: # noqa: PLR2004 tm = 0 elif isLeap(year): tm = -1 else: tm = -2 return ( epoch - 1 + 365 * (year - 1) + (year - 1) // 4 + -((year - 1) // 100) + (year - 1) // 400 + (367 * month - 362) // 12 + tm + day ) def jd_to(jd: int) -> tuple[int, int, int]: ordinal = jd - 1721425 if 0 < ordinal < 3652060: # > 4x faster # noqa: PLR2004 # datetime(9999, 12, 31).toordinal() == 3652059 dt = datetime.fromordinal(ordinal) return (dt.year, dt.month, dt.day) # wjd = floor(jd - 0.5) + 0.5 qc, dqc = divmod(jd - epoch, 146097) # qc ~~ quadricent cent, dcent = divmod(dqc, 36524) quad, dquad = divmod(dcent, 1461) yindex = dquad // 365 # divmod(dquad, 365)[0] year = ( qc * 400 + cent * 100 + quad * 4 + yindex + (cent != 4 and yindex != 4) # noqa: PLR2004 ) yearday = jd - to_jd(year, 1, 1) if jd < to_jd(year, 3, 1): leapadj = 0 elif isLeap(year): leapadj = 1 else: leapadj = 2 month = ((yearday + leapadj) * 12 + 373) // 367 day = jd - to_jd(year, month, 1) + 1 return year, month, day pyglossary-5.0.9/pyglossary/html_utils.py000066400000000000000000000205751476751035500207410ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import logging import re __all__ = ["name2codepoint", "unescape_unicode"] log = logging.getLogger("pyglossary") re_entity = re.compile( r"&#?\w+;", ) special_chars = { "<", ">", "&", '"', "'", "\xa0", # " " or " " } # these are not included in html.entities.name2codepoint name2codepoint_extra = { "itilde": 0x0129, # ĩ "utilde": 0x0169, # ũ "uring": 0x016F, # ů "ycirc": 0x0177, # ŷ "wring": 0x1E98, # ẘ "yring": 0x1E99, # ẙ "etilde": 0x1EBD, # ẽ "ygrave": 0x1EF3, # ỳ "ytilde": 0x1EF9, # ỹ "ldash": 0x2013, # – "frac13": 0x2153, # ⅓ "xfrac13": 0x2153, # ⅓ "frac23": 0x2154, # ⅔ } # Use build_name2codepoint_dict function to update this dictionary name2codepoint = { "Aacute": 0x00C1, # Á "aacute": 0x00E1, # á "Acirc": 0x00C2, #  "acirc": 0x00E2, # â "acute": 0x00B4, # ´ "AElig": 0x00C6, # Æ "aelig": 0x00E6, # æ "Agrave": 0x00C0, # À "agrave": 0x00E0, # à "alefsym": 0x2135, # ℵ "Alpha": 0x0391, # Α "alpha": 0x03B1, # α "amp": 0x0026, # & "and": 0x2227, # ∧ "ang": 0x2220, # ∠ "Aring": 0x00C5, # Å "aring": 0x00E5, # å "asymp": 0x2248, # ≈ "Atilde": 0x00C3, # à "atilde": 0x00E3, # ã "Auml": 0x00C4, # Ä "auml": 0x00E4, # ä "bdquo": 0x201E, # „ "Beta": 0x0392, # Β "beta": 0x03B2, # β "brvbar": 0x00A6, # ¦ "bull": 0x2022, # • "cap": 0x2229, # ∩ "Ccedil": 0x00C7, # Ç "ccedil": 0x00E7, # ç "cedil": 0x00B8, # ¸ "cent": 0x00A2, # ¢ "Chi": 0x03A7, # Χ "chi": 0x03C7, # χ "circ": 0x02C6, # ˆ "clubs": 0x2663, # ♣ "cong": 0x2245, # ≅ "copy": 0x00A9, # © "crarr": 0x21B5, # ↵ "cup": 0x222A, # ∪ "curren": 0x00A4, # ¤ "Dagger": 0x2021, # ‡ "dagger": 0x2020, # † "dArr": 0x21D3, # ⇓ "darr": 0x2193, # ↓ "deg": 0x00B0, # ° "Delta": 0x0394, # Δ "delta": 0x03B4, # δ "diams": 0x2666, # ♦ "divide": 0x00F7, # ÷ "Eacute": 0x00C9, # É "eacute": 0x00E9, # é "Ecirc": 0x00CA, # Ê "ecirc": 0x00EA, # ê "Egrave": 0x00C8, # È "egrave": 0x00E8, # è "empty": 0x2205, # ∅ "emsp": 0x2003, "ensp": 0x2002, "Epsilon": 0x0395, # Ε "epsilon": 0x03B5, # ε "equiv": 0x2261, # ≡ "Eta": 0x0397, # Η "eta": 0x03B7, # η "ETH": 0x00D0, # Ð "eth": 0x00F0, # ð "etilde": 0x1EBD, # ẽ "Euml": 0x00CB, # Ë "euml": 0x00EB, # ë "euro": 0x20AC, # € "exist": 0x2203, # ∃ "fnof": 0x0192, # ƒ "forall": 0x2200, # ∀ "frac12": 0x00BD, # ½ "frac13": 0x2153, # ⅓ "frac14": 0x00BC, # ¼ "frac23": 0x2154, # ⅔ "frac34": 0x00BE, # ¾ "frasl": 0x2044, # ⁄ "Gamma": 0x0393, # Γ "gamma": 0x03B3, # γ "ge": 0x2265, # ≥ "gt": 0x003E, # > "hArr": 0x21D4, # ⇔ "harr": 0x2194, # ↔ "hearts": 0x2665, # ♥ "hellip": 0x2026, # … "Iacute": 0x00CD, # Í "iacute": 0x00ED, # í "Icirc": 0x00CE, # Î "icirc": 0x00EE, # î "iexcl": 0x00A1, # ¡ "Igrave": 0x00CC, # Ì "igrave": 0x00EC, # ì "image": 0x2111, # ℑ "infin": 0x221E, # ∞ "int": 0x222B, # ∫ "Iota": 0x0399, # Ι "iota": 0x03B9, # ι "iquest": 0x00BF, # ¿ "isin": 0x2208, # ∈ "itilde": 0x0129, # ĩ "Iuml": 0x00CF, # Ï "iuml": 0x00EF, # ï "Kappa": 0x039A, # Κ "kappa": 0x03BA, # κ "Lambda": 0x039B, # Λ "lambda": 0x03BB, # λ "lang": 0x2329, # 〈 "laquo": 0x00AB, # « "lArr": 0x21D0, # ⇐ "larr": 0x2190, # ← "lceil": 0x2308, # ⌈ "ldash": 0x2013, # – "ldquo": 0x201C, # “ "le": 0x2264, # ≤ "lfloor": 0x230A, # ⌊ "lowast": 0x2217, # ∗ "loz": 0x25CA, # ◊ "lrm": 0x200E, # ‎ "lsaquo": 0x2039, # ‹ "lsquo": 0x2018, # ‘ "lt": 0x003C, # < "macr": 0x00AF, # ¯ "mdash": 0x2014, # — "micro": 0x00B5, # µ "middot": 0x00B7, # · "minus": 0x2212, # − "Mu": 0x039C, # Μ "mu": 0x03BC, # μ "nabla": 0x2207, # ∇ "nbsp": 0x00A0, # space "ndash": 0x2013, # – "ne": 0x2260, # ≠ "ni": 0x220B, # ∋ "not": 0x00AC, # ¬ "notin": 0x2209, # ∉ "nsub": 0x2284, # ⊄ "Ntilde": 0x00D1, # Ñ "ntilde": 0x00F1, # ñ "Nu": 0x039D, # Ν "nu": 0x03BD, # ν "Oacute": 0x00D3, # Ó "oacute": 0x00F3, # ó "Ocirc": 0x00D4, # Ô "ocirc": 0x00F4, # ô "OElig": 0x0152, # Œ "oelig": 0x0153, # œ "Ograve": 0x00D2, # Ò "ograve": 0x00F2, # ò "oline": 0x203E, # ‾ "Omega": 0x03A9, # Ω "omega": 0x03C9, # ω "Omicron": 0x039F, # Ο "omicron": 0x03BF, # ο "oplus": 0x2295, # ⊕ "or": 0x2228, # ∨ "ordf": 0x00AA, # ª "ordm": 0x00BA, # º "Oslash": 0x00D8, # Ø "oslash": 0x00F8, # ø "Otilde": 0x00D5, # Õ "otilde": 0x00F5, # õ "otimes": 0x2297, # ⊗ "Ouml": 0x00D6, # Ö "ouml": 0x00F6, # ö "para": 0x00B6, # ¶ "part": 0x2202, # ∂ "permil": 0x2030, # ‰ "perp": 0x22A5, # ⊥ "Phi": 0x03A6, # Φ "phi": 0x03C6, # φ "Pi": 0x03A0, # Π "pi": 0x03C0, # π "piv": 0x03D6, # ϖ "plusmn": 0x00B1, # ± "pound": 0x00A3, # £ "Prime": 0x2033, # ″ "prime": 0x2032, # ′ "prod": 0x220F, # ∏ "prop": 0x221D, # ∝ "Psi": 0x03A8, # Ψ "psi": 0x03C8, # ψ "quot": 0x0022, # " "radic": 0x221A, # √ "rang": 0x232A, # 〉 "raquo": 0x00BB, # » "rArr": 0x21D2, # ⇒ "rarr": 0x2192, # → "rceil": 0x2309, # ⌉ "rdquo": 0x201D, # ” "real": 0x211C, # ℜ "reg": 0x00AE, # ® "rfloor": 0x230B, # ⌋ "Rho": 0x03A1, # Ρ "rho": 0x03C1, # ρ "rlm": 0x200F, # U+200F "rsaquo": 0x203A, # › "rsquo": 0x2019, # ’ "sbquo": 0x201A, # ‚ "Scaron": 0x0160, # Š "scaron": 0x0161, # š "sdot": 0x22C5, # ⋅ "sect": 0x00A7, # § "shy": 0x00AD, # ­ "Sigma": 0x03A3, # Σ "sigma": 0x03C3, # σ "sigmaf": 0x03C2, # ς "sim": 0x223C, # ∼ "spades": 0x2660, # ♠ "sub": 0x2282, # ⊂ "sube": 0x2286, # ⊆ "sum": 0x2211, # ∑ "sup": 0x2283, # ⊃ "sup1": 0x00B9, # ¹ "sup2": 0x00B2, # ² "sup3": 0x00B3, # ³ "supe": 0x2287, # ⊇ "szlig": 0x00DF, # ß "Tau": 0x03A4, # Τ "tau": 0x03C4, # τ "there4": 0x2234, # ∴ "Theta": 0x0398, # Θ "theta": 0x03B8, # θ "thetasym": 0x03D1, # ϑ "thinsp": 0x2009, "THORN": 0x00DE, # Þ "thorn": 0x00FE, # þ "tilde": 0x02DC, # ˜ "times": 0x00D7, # × "trade": 0x2122, # ™ "Uacute": 0x00DA, # Ú "uacute": 0x00FA, # ú "uArr": 0x21D1, # ⇑ "uarr": 0x2191, # ↑ "Ucirc": 0x00DB, # Û "ucirc": 0x00FB, # û "Ugrave": 0x00D9, # Ù "ugrave": 0x00F9, # ù "uml": 0x00A8, # ¨ "upsih": 0x03D2, # ϒ "Upsilon": 0x03A5, # Υ "upsilon": 0x03C5, # υ "uring": 0x016F, # ů "utilde": 0x0169, # ũ "Uuml": 0x00DC, # Ü "uuml": 0x00FC, # ü "weierp": 0x2118, # ℘ "wring": 0x1E98, # ẘ "xfrac13": 0x2153, # ⅓ "Xi": 0x039E, # Ξ "xi": 0x03BE, # ξ "Yacute": 0x00DD, # Ý "yacute": 0x00FD, # ý "ycirc": 0x0177, # ŷ "yen": 0x00A5, # ¥ "ygrave": 0x1EF3, # ỳ "yring": 0x1E99, # ẙ "ytilde": 0x1EF9, # ỹ "Yuml": 0x0178, # Ÿ "yuml": 0x00FF, # ÿ "Zeta": 0x0396, # Ζ "zeta": 0x03B6, # ζ "zwj": 0x200D, # ‍ "zwnj": 0x200C, # ‌ } def build_name2codepoint_dict() -> None: """ Build name -> codepoint dictionary copy and paste the output to the name2codepoint dictionary name2str - name to utf-8 string dictionary. """ import html.entities name2str = {} for k, v in name2codepoint_extra.items(): name2str[k] = chr(v) for k, v in html.entities.name2codepoint.items(): name2str[k] = chr(v) for key in sorted(name2str, key=lambda s: (s.lower(), s)): value = name2str[key] if len(value) > 1: raise ValueError(f"{value = }") print(f'\t"{key}": 0x{ord(value):0>4x}, # {value}') # noqa: T201 def _sub_unescape_unicode(m: re.Match) -> str: text = m.group(0) if text[:2] == "&#": # character reference code = int(text[3:-1], 16) if text.startswith("&#x") else int(text[2:-1]) try: char = chr(code) except ValueError: return text if char not in special_chars: return char return text # named entity name = text[1:-1] if name in name2codepoint: char = chr(name2codepoint[name]) if char not in special_chars: return char return text def unescape_unicode(text: str) -> str: """ Unscape unicode entities, but not "<", ">" and "&" leave these 3 special entities alone, since unescaping them creates invalid html we also ignore quotations: """ and "'". """ return re_entity.sub(_sub_unescape_unicode, text) if __name__ == "__main__": build_name2codepoint_dict() pyglossary-5.0.9/pyglossary/icu_types.py000066400000000000000000000025661476751035500205610ustar00rootroot00000000000000from __future__ import annotations import typing from collections.abc import Callable from typing import AnyStr __all__ = ["T_Collator", "T_Locale"] class T_Locale(typing.Protocol): def __init__(self, _id: str) -> None: ... def getName(self) -> str: ... class T_Collator(typing.Protocol): PRIMARY: int = 0 SECONDARY: int = 1 TERTIARY: int = 2 QUATERNARY: int = 3 IDENTICAL: int = 15 # mypy: error: Self argument missing for a non-static method # (or an invalid type for self) [misc] @classmethod # pyright: ignore[reportArgumentType] def createInstance(cls: T_Locale) -> T_Collator: ... # type: ignore @property def getSortKey(self) -> Callable[[AnyStr], bytes]: ... def setStrength(self, strength: int) -> None: ... def setAttribute(self, attr: int, value: int) -> None: ... class T_UCollAttribute(typing.Protocol): ALTERNATE_HANDLING: int = 1 CASE_FIRST: int = 2 CASE_LEVEL: int = 3 DECOMPOSITION_MODE: int = 4 FRENCH_COLLATION: int = 0 HIRAGANA_QUATERNARY_MODE: int = 6 NORMALIZATION_MODE: int = 4 NUMERIC_COLLATION: int = 7 STRENGTH: int = 5 class T_UCollAttributeValue(typing.Protocol): DEFAULT: int = -1 DEFAULT_STRENGTH: int = 2 IDENTICAL: int = 15 LOWER_FIRST: int = 24 NON_IGNORABLE: int = 21 OFF: int = 16 ON: int = 17 PRIMARY: int = 0 QUATERNARY: int = 3 SECONDARY: int = 1 SHIFTED: int = 20 TERTIARY: int = 2 UPPER_FIRST: int = 25 pyglossary-5.0.9/pyglossary/image_utils.py000066400000000000000000000024211476751035500210450ustar00rootroot00000000000000from __future__ import annotations import base64 import logging import re from os.path import join from pyglossary.text_utils import crc32hex __all__ = ["extractInlineHtmlImages"] log = logging.getLogger("pyglossary") re_inline_image = re.compile('src="(data:image/[^<>"]*)"') def extractInlineHtmlImages( defi: str, outDir: str, fnamePrefix: str = "", ) -> tuple[str, list[tuple[str, str]]]: imageDataDict: dict[str, bytes] = {} def subFunc(m: re.Match[str]) -> str: src = m.group(1)[len("data:image/") :] i = src.find(";") if i < 0: log.error(f"no semicolon, bad inline img src: {src[:60]}...") return "" imgFormat, src = src[:i], src[i + 1 :] if not src.startswith("base64,"): log.error(f"no 'base64,', bad inline img src: {src[:60]}...") return "" imgDataB64 = src[len("base64,") :] imgData = base64.b64decode(imgDataB64) imgFname = f"{fnamePrefix}{crc32hex(imgData)}.{imgFormat}" imageDataDict[imgFname] = imgData return f'src="./{imgFname}"' defi = re_inline_image.sub(subFunc, defi) images: list[tuple[str, str]] = [] for imgFname, imgData in imageDataDict.items(): imgPath = join(outDir, imgFname) with open(imgPath, mode="wb") as _file: _file.write(imgData) del imgData images.append((imgFname, imgPath)) return defi, images pyglossary-5.0.9/pyglossary/info.py000066400000000000000000000012231476751035500174750ustar00rootroot00000000000000__all__ = [ "c_author", "c_name", "c_publisher", "c_sourceLang", "c_targetLang", "infoKeysAliasDict", ] c_name = "name" c_sourceLang = "sourceLang" c_targetLang = "targetLang" c_copyright = "copyright" c_author = "author" c_publisher = "publisher" infoKeysAliasDict = { "title": c_name, "bookname": c_name, "dbname": c_name, ## "sourcelang": c_sourceLang, "inputlang": c_sourceLang, "origlang": c_sourceLang, ## "targetlang": c_targetLang, "outputlang": c_targetLang, "destlang": c_targetLang, ## "license": c_copyright, ## # do not map "publisher" to "author" ## "date": "creationTime", # are there alternatives to "lastUpdated"? } pyglossary-5.0.9/pyglossary/info_writer.py000066400000000000000000000076341476751035500211050ustar00rootroot00000000000000from __future__ import annotations from os.path import splitext from typing import TYPE_CHECKING, Any from pyglossary.core import log from pyglossary.io_utils import nullTextIO if TYPE_CHECKING: import io from collections.abc import Generator from pyglossary.glossary_types import ( EntryType, WriterGlossaryType, ) class InfoWriter: def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" self._file: io.TextIOBase = nullTextIO self._writeOptions: dict[str, Any] | None = None def setWriteOptions(self, options: dict[str, Any]) -> None: self._writeOptions = options def open(self, filename: str) -> None: self._filename = filename self._file = open(filename, mode="w", encoding="utf-8") def finish(self) -> None: self._filename = "" self._file.close() self._file = nullTextIO def write(self) -> Generator[None, EntryType, None]: # noqa: PLR0912, PLR0915, C901 import re from collections import Counter from pyglossary.json_utils import dataToPrettyJson from pyglossary.langs.writing_system import getWritingSystemFromText glos = self._glos re_possible_html = re.compile( r"<[a-z1-6]+[ />]", re.IGNORECASE, ) re_style = re.compile( r"<([a-z1-6]+)[^<>]* style=", re.IGNORECASE | re.DOTALL, ) wordCount = 0 bwordCount = 0 nonLowercaseWordCount = 0 styleByTagCounter: dict[str, int] = Counter() defiFormatCounter: dict[str, int] = Counter() firstTagCounter: dict[str, int] = Counter() allTagsCounter: dict[str, int] = Counter() sourceScriptCounter: dict[str, int] = Counter() dataEntryExtCounter: dict[str, int] = Counter() while True: entry = yield if entry is None: break defi = entry.defi wordCount += 1 bwordCount += defi.count("bword://") for word in entry.l_word: if word.lower() != word: nonLowercaseWordCount += 1 for m in re_style.finditer(defi): tag = m.group(1) styleByTagCounter[tag] += 1 defiFormat = entry.detectDefiFormat("") defiFormatCounter[defiFormat] += 1 if defiFormat == "m": if re_possible_html.match(defi): log.warning(f"undetected html defi: {defi}") elif defiFormat == "h": match = re_possible_html.search(defi) if match is not None: firstTagCounter[match.group().strip("< />").lower()] += 1 for tag in re_possible_html.findall(defi): allTagsCounter[tag.strip("< />").lower()] += 1 elif defiFormat == "b": _filenameNoExt, ext = splitext(entry.s_word) ext = ext.lstrip(".") dataEntryExtCounter[ext] += 1 ws = getWritingSystemFromText(entry.s_word) if ws: wsName = ws.name else: log.debug(f"No script detected for word: {entry.s_word}") wsName = "None" sourceScriptCounter[wsName] += 1 data_entry_count = defiFormatCounter["b"] del defiFormatCounter["b"] info: dict[str, Any] = dict(glos.iterInfo()) info["word_count"] = wordCount info["bword_count"] = bwordCount info["non_lowercase_word_count"] = nonLowercaseWordCount info["data_entry_count"] = data_entry_count info["data_entry_extension_count"] = ", ".join( f"{ext}={count}" for ext, count in dataEntryExtCounter.most_common() ) info["defi_format"] = ", ".join( f"{defiFormat}={count}" for defiFormat, count in sorted(defiFormatCounter.items()) ) info["defi_tag"] = ", ".join( f"{defiFormat}={count}" for defiFormat, count in allTagsCounter.most_common() ) info["defi_first_tag"] = ", ".join( f"{defiFormat}={count}" for defiFormat, count in firstTagCounter.most_common() ) info["style"] = ", ".join( f"{defiFormat}={count}" for defiFormat, count in styleByTagCounter.most_common() ) info["source_script"] = ", ".join( f"{defiFormat}={count}" for defiFormat, count in sourceScriptCounter.most_common() ) if self._writeOptions is not None: info["write_options"] = self._writeOptions info["read_options"] = glos.readOptions self._file.write(dataToPrettyJson(info) + "\n") pyglossary-5.0.9/pyglossary/io_utils.py000066400000000000000000000070711476751035500204000ustar00rootroot00000000000000from __future__ import annotations import io from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Iterator __all__ = ["nullBinaryIO", "nullTextIO"] class _NullBinaryIO(io.BufferedIOBase): # noqa: PLR0904 def __enter__(self, *args): raise NotImplementedError def __exit__(self, *args): raise NotImplementedError def close(self) -> None: pass def fileno(self) -> int: raise NotImplementedError def flush(self) -> None: raise NotImplementedError def isatty(self) -> bool: raise NotImplementedError def readable(self) -> bool: raise NotImplementedError def seek(self, pos: int, whence: int = 0) -> int: raise NotImplementedError def seekable(self) -> bool: raise NotImplementedError def tell(self) -> int: raise NotImplementedError def truncate(self, pos: int | None = None) -> int: raise NotImplementedError def writable(self) -> bool: raise NotImplementedError def detach(self) -> io.RawIOBase: raise NotImplementedError def read(self, n: int | None = None) -> bytes: raise NotImplementedError def read1(self, n: int | None = None) -> bytes: raise NotImplementedError def readinto(self, buffer) -> int: raise NotImplementedError def readinto1(self, buffer) -> int: raise NotImplementedError # data: "bytearray|memoryview|array[Any]|io.mmap|io._CData|io.PickleBuffer" def write(self, data: bytes) -> int: # type: ignore raise NotImplementedError def __iter__(self) -> Iterator[bytes]: raise NotImplementedError def __next__(self) -> bytes: raise NotImplementedError def readline(self, size: int | None = -1) -> bytes: raise NotImplementedError def readlines(self, hint: int = -1) -> list[bytes]: raise NotImplementedError def writelines(self, lines: list[bytes]) -> None: # type: ignore raise NotImplementedError class _NullTextIO(io.TextIOBase): # noqa: PLR0904 def __enter__(self, *args): raise NotImplementedError def __exit__(self, *args): raise NotImplementedError def close(self) -> None: pass def fileno(self) -> int: raise NotImplementedError def flush(self) -> None: raise NotImplementedError def isatty(self) -> bool: raise NotImplementedError def readable(self) -> bool: raise NotImplementedError def seek(self, pos: int, whence: int = 0) -> int: raise NotImplementedError def seekable(self) -> bool: raise NotImplementedError def tell(self) -> int: raise NotImplementedError def truncate(self, pos: int | None = None) -> int: raise NotImplementedError def writable(self) -> bool: raise NotImplementedError def detach(self) -> io.IOBase: # type: ignore raise NotImplementedError def read(self, n: int | None = None) -> str: raise NotImplementedError def read1(self, n: int | None = None) -> str: raise NotImplementedError def readinto(self, buffer) -> io.BufferedIOBase: raise NotImplementedError def readinto1(self, buffer) -> io.BufferedIOBase: raise NotImplementedError # data: "bytearray|memoryview|array[Any]|io.mmap|io._CData|io.PickleBuffer" def write(self, data: bytes) -> int: # type: ignore raise NotImplementedError def __iter__(self) -> Iterator[str]: # type: ignore raise NotImplementedError def __next__(self) -> str: # type: ignore raise NotImplementedError def readline(self, size: int | None = -1) -> str: # type: ignore raise NotImplementedError def readlines(self, hint: int = -1) -> list[str]: # type: ignore raise NotImplementedError def writelines(self, lines: list[str]) -> None: # type: ignore raise NotImplementedError nullBinaryIO = _NullBinaryIO() nullTextIO = _NullTextIO() pyglossary-5.0.9/pyglossary/iter_utils.py000066400000000000000000000032661476751035500207360ustar00rootroot00000000000000# Copyright (c) 2019 Saeed Rasooli # Copyright (c) 2012 Erik Rose # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Iterable, Iterator from typing import Any __all__ = ["unique_everseen"] # from https://github.com/erikrose/more-itertools def unique_everseen(iterable: Iterable) -> Iterator: """List unique elements, preserving order. Remember all elements ever seen.""" from itertools import filterfalse # unique_everseen('AAAABBBCCDAABBB') --> A B C D seen: set[Any] = set() seen_add = seen.add for element in filterfalse(seen.__contains__, iterable): seen_add(element) yield element pyglossary-5.0.9/pyglossary/json_utils.py000066400000000000000000000007671476751035500207470ustar00rootroot00000000000000from __future__ import annotations import json from typing import TYPE_CHECKING if TYPE_CHECKING: from typing import AnyStr, TypeAlias __all__ = ["dataToPrettyJson", "jsonToData"] JsonEncodable: TypeAlias = dict | list def dataToPrettyJson( data: JsonEncodable, ensure_ascii: bool = False, sort_keys: bool = False, ) -> str: return json.dumps( data, sort_keys=sort_keys, indent="\t", ensure_ascii=ensure_ascii, ) def jsonToData(st: AnyStr) -> JsonEncodable: return json.loads(st) pyglossary-5.0.9/pyglossary/langs/000077500000000000000000000000001476751035500172765ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/langs/__init__.py000066400000000000000000000036351476751035500214160ustar00rootroot00000000000000from __future__ import annotations import json import logging from os.path import join from pyglossary.core import rootDir log = logging.getLogger("pyglossary") class Lang: def __init__( self, codes: list[str], names: list[str], titleTag: str = "b", rtl: int = 0, ) -> None: self._codes = codes self._names = names self._titleTag = titleTag self._rtl = rtl def __repr__(self) -> str: return ( "Lang(" f"codes={self._codes!r}, " f"names={self._names!r}, " f"titleTag={self._titleTag!r}" ")" ) def __str__(self) -> str: return f"Lang({self._codes + self._names})" @property def codes(self) -> list[str]: return self._codes @property def names(self) -> list[str]: return self._names @property def name(self) -> str: return self._names[0] @property def code(self) -> str: return self._codes[0] @property def titleTag(self) -> str: return self._titleTag @property def rtl(self) -> int: return self._rtl class LangDict(dict): def _addLang(self, lang: Lang) -> None: for key in lang.codes: if key in self: log.error(f"duplicate language code: {key}") self[key] = lang for name in lang.names: if name in self: log.error(f"duplicate language name: {name}") self[name.lower()] = lang def load(self) -> None: from time import perf_counter as now if len(self) > 0: return t0 = now() filename = join(rootDir, "pyglossary", "langs", "langs.json") with open(filename, encoding="utf-8") as _file: data = json.load(_file) for row in data: self._addLang( Lang( codes=row["codes"], names=[row["name"]] + row["alt_names"], titleTag=row["title_tag"], rtl=row.get("rtl", 0), ), ) log.debug( f"LangDict: loaded, {len(self)} keys, took {(now() - t0) * 1000:.1f} ms", ) def __getitem__(self, key: str) -> Lang | None: self.load() return self.get(key.lower(), None) langDict = LangDict() pyglossary-5.0.9/pyglossary/langs/langs.json000066400000000000000000001352001476751035500212760ustar00rootroot00000000000000[ { "codes": ["aa", "aar"], "name": "Afar", "alt_names": ["Qafaraf", "’Afar Af", "Afaraf", "Qafar af"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Afar_language", "title_tag": "b" }, { "codes": ["ab", "abk"], "name": "Abkhaz", "alt_names": ["Abkhazian", "Abxaz", "Аҧсуа", "Аҧсуа бызшәа"], "script": ["Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Abkhaz_language", "title_tag": "b" }, { "codes": ["ae", "ave"], "name": "Avestan", "alt_names": ["Zend", "اوستایی"], "rtl": 1, "script": ["Avestan"], "wiki": "https://en.wikipedia.org/wiki/Avestan", "title_tag": "b" }, { "codes": ["af", "afr"], "name": "Afrikaans", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Afrikaans", "title_tag": "b" }, { "codes": ["ain"], "name": "Ainu", "alt_names": ["Ainuic", "Aynu", "itak", "アィヌ・イタㇰ"], "script": ["CJK", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Ainu_language", "title_tag": "big" }, { "codes": ["aib"], "name": "Äynu", "alt_names": ["Aynu", "Ainu", "Aini", "Eynu", "Abdal", "Äynú", "ئەينۇ‎", "ئابدال"], "script": ["Arabic"], "wiki": "https://en.wikipedia.org/wiki/%C3%84ynu_language", "title_tag": "big" }, { "codes": ["ak", "aka"], "name": "Akan", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Akan_language", "title_tag": "b" }, { "codes": ["alg"], "name": "Algonquian", "alt_names": ["Algonkian"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Algonquian_languages", "title_tag": "b" }, { "codes": ["am", "amh"], "name": "Amharic", "alt_names": ["አማርኛ", "Amarəñña"], "script": ["Ge'ez"], "wiki": "https://en.wikipedia.org/wiki/Amharic", "title_tag": "big" }, { "codes": ["an", "arg"], "name": "Aragonese", "alt_names": ["Aragonés"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Aragonese_language", "title_tag": "b" }, { "codes": ["ar", "ara"], "name": "Arabic", "alt_names": ["اَلْعَرَبِيَّةُ", "العربیه", "عَرَبِيّ", "عربی"], "rtl": 1, "script": ["Arabic"], "wiki": "https://en.wikipedia.org/wiki/Arabic", "title_tag": "b" }, { "codes": ["arc", "syc"], "name": "Aramaic", "alt_names": ["Classical Syriac", "ܐܪܡܝܐ"], "rtl": 1, "script": ["Syriac"], "wiki": "https://en.wikipedia.org/wiki/Aramaic", "title_tag": "b" }, { "codes": ["arn"], "name": "Mapuche", "alt_names": ["Mapudungun"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Mapuche_language", "title_tag": "b" }, { "codes": ["as", "asm"], "name": "Assamese", "alt_names": ["Asamiya", "অসমীয়া"], "script": ["Bengali-Assamese"], "wiki": "https://en.wikipedia.org/wiki/Assamese_language", "title_tag": "big" }, { "codes": ["av", "ava", "aya"], "name": "Avar", "alt_names": ["Avaric", "Авар", "Awar"], "script": ["Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Avar_language", "title_tag": "b" }, { "codes": ["ay", "aym"], "name": "Aymara", "alt_names": ["Aymar aru", "Aymaran"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Aymara_language", "title_tag": "b" }, { "codes": ["az", "aze"], "name": "Azerbaijani", "alt_names": ["Azeri"], "script": ["Latin", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Azerbaijani_language", "title_tag": "b" }, { "codes": ["ba", "bak"], "name": "Bashkir", "alt_names": ["Башҡортса‎", "Башҡорт теле", "Башорца", "Башкирский"], "script": ["Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Bashkir_language", "title_tag": "b" }, { "codes": ["be", "bel"], "name": "Belarusian", "alt_names": ["Беларуская", "Белорусский"], "script": ["Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Belarusian_language", "title_tag": "b" }, { "codes": ["bg", "bul"], "name": "Bulgarian", "alt_names": ["български", "български език", "Bǎlgarski"], "script": ["Cyrillic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Bulgarian_language", "title_tag": "b" }, { "codes": ["bh", "bih"], "name": "Bihari", "alt_names": ["Bihari languages", "बिहारी"], "script": ["Devanagari"], "wiki": "https://en.wikipedia.org/wiki/Bihari_languages", "title_tag": "big" }, { "codes": ["bi", "bis"], "name": "Bislama", "alt_names": ["Bichelamar"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Bislama", "title_tag": "b" }, { "codes": ["bm", "bam"], "name": "Bambara", "alt_names": ["Bamanankan", "ߓߡߊߣߊ߲", "ߓߡߊߣߊ߲ߞߊ߲"], "script": ["Latin", "N'Ko"], "wiki": "https://en.wikipedia.org/wiki/Bambara_language", "title_tag": "big" }, { "codes": ["bn", "ben"], "name": "Bengali", "alt_names": ["বাংলা"], "script": ["Bengali-Assamese"], "wiki": "https://en.wikipedia.org/wiki/Bengali_language", "title_tag": "big" }, { "codes": ["bnt"], "name": "Bantu", "alt_names": ["*bantʊ̀", "bantʊ̀"], "script": ["Latin", "Arabic", "Mandombe"], "wiki": "https://en.wikipedia.org/wiki/Bantu_languages", "title_tag": "b" }, { "codes": ["bo", "tib", "bod"], "name": "Tibetan", "alt_names": ["Standard Tibetan"], "script": ["Tibetan"], "wiki": "https://en.wikipedia.org/wiki/Standard_Tibetan", "title_tag": "b" }, { "codes": ["br", "bre"], "name": "Breton", "alt_names": ["Brezhoneg"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Breton_language", "title_tag": "b" }, { "codes": ["bs", "bos"], "name": "Bosnian", "alt_names": ["Bosanski", "Босански"], "script": ["Cyrillic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Bosnian_language", "title_tag": "b" }, { "codes": ["ca", "cat"], "name": "Catalan", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Catalan_language", "title_tag": "b" }, { "codes": ["ce", "che"], "name": "Chechen", "alt_names": ["нохчийн", "нохчийн мотт"], "script": ["Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Chechen_language", "title_tag": "b" }, { "codes": ["ch", "cha"], "name": "Chamorro", "alt_names": ["Chamoru", "CHamoru"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Chamorro_language", "title_tag": "b" }, { "codes": ["chn"], "name": "Chinook Jargon", "alt_names": ["Chinuk Wawa", "Chinook Wawa", "wawa", "chinook lelang", "lelang", "chinook"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Chinook_Jargon", "title_tag": "b" }, { "codes": ["co", "cos"], "name": "Corsican", "alt_names": ["Corsa"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Corsican_language", "title_tag": "b" }, { "codes": ["cr", "cre"], "name": "Cree", "alt_names": [], "script": ["Canadian syllabic"], "wiki": "https://en.wikipedia.org/wiki/Cree", "title_tag": "b" }, { "codes": ["cs", "cze", "ces"], "name": "Czech", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Czech_language", "title_tag": "b" }, { "codes": ["cu", "chu"], "name": "Church Slavonic", "alt_names": ["Church Slavic", "New Church Slavonic", "New Church Slavic", "црькъвьнословѣньскъ ѩзыкъ"], "script": ["Glagolitic", "Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Church_Slavonic", "title_tag": "b" }, { "codes": ["cv", "chv"], "name": "Chuvash", "alt_names": ["Căvašla", "Çovaşla", "Чӑвашла"], "script": ["Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Chuvash_language", "title_tag": "b" }, { "codes": ["cy", "wel", "cym"], "name": "Welsh", "alt_names": ["Cymraeg", "y Gymraeg"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Welsh_language", "title_tag": "b" }, { "codes": ["da", "dan"], "name": "Danish", "alt_names": ["dansk"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Danish_language", "title_tag": "b" }, { "codes": ["de", "ger", "deu"], "name": "German", "alt_names": ["Deutsch"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/German_language", "title_tag": "b" }, { "codes": ["dv", "div"], "name": "Maldivian", "alt_names": ["Dhivehi", "Divehi", "ދިވެހި"], "rtl": 1, "script": ["Thaana"], "wiki": "https://en.wikipedia.org/wiki/Maldivian_language", "title_tag": "b" }, { "codes": ["dyu"], "name": "Dyula", "alt_names": ["Jula", "Dioula", "ߖߎ߬ߟߊ߬ߞߊ߲)"], "script": ["N'Ko", "Latin", "Ajami"], "wiki": "https://en.wikipedia.org/wiki/Dyula_language", "title_tag": "b" }, { "codes": ["dz", "dzo"], "name": "Dzongkha", "alt_names": ["རྫོང་ཁ་"], "script": ["Tibetan"], "wiki": "https://en.wikipedia.org/wiki/Dzongkha", "title_tag": "big" }, { "codes": ["ee", "ewe"], "name": "Ewe", "alt_names": ["Èʋe", "Èʋegbe"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Ewe_language", "title_tag": "b" }, { "codes": ["el", "gre", "ell", "gr"], "name": "Greek", "alt_names": ["ελληνικά", "Elliniká"], "script": ["Greek"], "wiki": "https://en.wikipedia.org/wiki/Greek_language", "title_tag": "b" }, { "codes": ["grc"], "name": "Ancient Greek", "alt_names": ["Ἑλληνική", "Hellēnikḗ"], "script": ["Greek"], "wiki": "https://en.wikipedia.org/wiki/Ancient_Greek", "title_tag": "b" }, { "codes": ["en", "eng"], "name": "English", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/English_language", "title_tag": "b" }, { "codes": ["enm"], "name": "Middle English", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Middle_English", "title_tag": "b" }, { "codes": ["eo", "epo"], "name": "Esperanto", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Esperanto", "title_tag": "b" }, { "codes": ["es", "spa"], "name": "Spanish", "alt_names": ["español", "española"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Spanish_language", "title_tag": "b" }, { "codes": ["et", "est"], "name": "Estonian", "alt_names": ["eesti keel", "eesti"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Estonian_language", "title_tag": "b" }, { "codes": ["eu", "baq", "eus"], "name": "Basque", "alt_names": ["Euskara"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Basque_language", "title_tag": "b" }, { "codes": ["fa", "per", "fas", "prp"], "name": "Persian", "alt_names": ["Farsi", "فارسی", "Parsi", "Fārsī", "форсӣ", "Forsī", "Porsī"], "rtl": 1, "script": ["Arabic"], "wiki": "https://en.wikipedia.org/wiki/Persian_language", "title_tag": "b" }, { "codes": ["ff", "ful", "fuc", "fuf"], "name": "Fula", "alt_names": ["Fulani", "Fulah", "Fulfulde", "𞤊𞤵𞤤𞤬𞤵𞤤𞤣𞤫", "Pulaar", "𞤆𞤵𞤤𞤢𞥄𞤪", "Pular", "𞤆𞤵𞤤𞤢𞤪", "Peul"], "script": ["Latin", "Arabic", "Adlam"], "wiki": "https://en.wikipedia.org/wiki/Fula_language", "title_tag": "big" }, { "codes": ["fi", "fin"], "name": "Finnish", "alt_names": ["suomi", "suomen kieli"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Finnish_language", "title_tag": "b" }, { "codes": ["fil"], "name": "Filipino", "alt_names": ["Pilipino", "Wikang Filipino"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Filipino_language", "title_tag": "b" }, { "codes": ["fj", "fij"], "name": "Fijian", "alt_names": ["Na Vosa Vakaviti", "Na vosa vaka-Viti"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Fijian_language", "title_tag": "b" }, { "codes": ["fo", "fao"], "name": "Faroese", "alt_names": ["føroyskt mál"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Faroese_language", "title_tag": "b" }, { "codes": ["fr", "fre", "fra"], "name": "French", "alt_names": ["français", "langue française"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/French_language", "title_tag": "b" }, { "codes": ["fy", "fry"], "name": "West Frisian", "alt_names": ["Western Frisian", "Frisian"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/West_Frisian_languages", "title_tag": "b" }, { "codes": ["ga", "gle"], "name": "Irish", "alt_names": ["Gaeilge"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Irish_language", "title_tag": "b" }, { "codes": ["gd", "gla"], "name": "Scottish Gaelic", "alt_names": ["Gaelic", "Gàidhlig", "Scots Gaelic"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Scottish_Gaelic", "title_tag": "b" }, { "codes": ["gju"], "name": "Gurjari", "alt_names": ["Gujri", "गुर्जरी", "گُوجَری"], "rtl": 1, "script": ["Takri", "Arabic", "Devanagari"], "wiki": "https://en.wikipedia.org/wiki/Gujari_language", "title_tag": "big" }, { "codes": ["gl", "glg"], "name": "Galician", "alt_names": ["galego"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Galician_language", "title_tag": "b" }, { "codes": ["gn", "grn"], "name": "Guarani", "alt_names": ["Paraguayan Guarani", "avañeʼẽ"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Guarani_language", "title_tag": "b" }, { "codes": ["gu", "guj"], "name": "Gujarati", "alt_names": ["ગુજરાતી", "Gujarātī"], "script": ["Gujarati"], "wiki": "https://en.wikipedia.org/wiki/Gujarati_language", "title_tag": "big" }, { "codes": ["gv", "glv"], "name": "Manx", "alt_names": ["Manx Gaelic", "Gaelg", "Gailck"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Manx_language", "title_tag": "b" }, { "codes": ["gwc"], "name": "Kalami", "alt_names": ["Gawri", "Garwi", "Bashkarik", "کالامي", "ګاوری"], "rtl": 1, "script": ["Arabic"], "wiki": "https://en.wikipedia.org/wiki/Kalami_language", "title_tag": "b" }, { "codes": ["ha", "hau"], "name": "Hausa", "alt_names": ["Harshen Hausa", "Halshen Hausa", "هَرْشَن هَوْسَ‎"], "rtl": 2, "script": ["Latin", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Hausa_language", "title_tag": "b" }, { "codes": [], "name": "Haitian French", "alt_names": ["français haïtien"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Haitian_French", "title_tag": "b" }, { "codes": ["haw"], "name": "Hawaiian", "alt_names": ["ʻŌlelo Hawaiʻi"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Hawaiian_language", "title_tag": "b" }, { "codes": ["he", "heb"], "name": "Hebrew", "alt_names": ["Ivrit", "עִבְרִית"], "rtl": 1, "script": ["Hebrew"], "wiki": "https://en.wikipedia.org/wiki/Hebrew_language", "title_tag": "b" }, { "codes": ["hi", "hin"], "name": "Hindi", "alt_names": ["Hindī", "हिंदी"], "script": ["Devanagari", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Hindi", "title_tag": "big" }, { "codes": [], "name": "Hindko", "alt_names": ["ہندکو"], "rtl": 1, "script": ["Arabic"], "wiki": "https://en.wikipedia.org/wiki/Hindko", "title_tag": "b" }, { "codes": ["hnd"], "name": "Southern Hindko", "alt_names": [], "rtl": 1, "script": ["Arabic"], "wiki": "https://en.wikipedia.org/wiki/Hindko", "title_tag": "b" }, { "codes": ["hno"], "name": "Northern Hindko", "alt_names": [], "rtl": 1, "script": ["Arabic"], "wiki": "https://en.wikipedia.org/wiki/Hindko", "title_tag": "b" }, { "codes": ["ho", "hmo"], "name": "Hiri Motu", "alt_names": ["Police Motu", "Pidgin Motu", "Hiri"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Hiri_Motu", "title_tag": "b" }, { "codes": ["hr", "hrv", "scr"], "name": "Croatian", "alt_names": ["hrvatski"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Croatian_language", "title_tag": "b" }, { "codes": ["ht", "hat"], "name": "Haitian Creole", "alt_names": ["Haitian", "kreyòl ayisyen", "kreyòl", "créole haïtien"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Haitian_Creole", "title_tag": "b" }, { "codes": ["hu", "hun"], "name": "Hungarian", "alt_names": ["magyar nyelv"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Hungarian_language", "title_tag": "b" }, { "codes": ["hy", "arm", "hye"], "name": "Armenian", "alt_names": ["հայերէն", "հայերեն", "hayeren"], "script": ["Armenian"], "wiki": "https://en.wikipedia.org/wiki/Armenian_language", "title_tag": "b" }, { "codes": ["hz", "her"], "name": "Herero", "alt_names": ["Otjiherero"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Herero_language", "title_tag": "b" }, { "codes": ["ia", "ina"], "name": "Interlingua", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Interlingua", "title_tag": "b" }, { "codes": ["id", "ind"], "name": "Indonesian", "alt_names": ["bahasa Indonesia"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Indonesian_language", "title_tag": "b" }, { "codes": ["ie", "ile"], "name": "Interlingue", "alt_names": ["Occidental", "Interlingue"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Interlingue", "title_tag": "b" }, { "codes": ["ig", "ibo"], "name": "Igbo", "alt_names": ["Ásụ̀sụ̀ Ìgbò"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Igbo_language", "title_tag": "b" }, { "codes": ["ii", "iii"], "name": "Nuosu", "alt_names": ["Nosu", "Northern Yi", "Liangshan Yi", "Sichuan Yi", "ꆈꌠꉙ", "Nuosuhxop", "彝語", "诺苏语"], "script": ["CJK"], "wiki": "https://en.wikipedia.org/wiki/Nuosu_language", "title_tag": "big" }, { "codes": ["ik", "ipk"], "name": "Inupiaq", "alt_names": ["Inupiat", "Inupiatun", "Alaskan Inuit", "Iñupiatun", "Inupiaqtun"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Inupiaq_language", "title_tag": "b" }, { "codes": ["io", "ido"], "name": "Ido", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Ido", "title_tag": "b" }, { "codes": ["is", "ice", "isl"], "name": "Icelandic", "alt_names": ["íslenska"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Icelandic_language", "title_tag": "b" }, { "codes": ["it", "ita"], "name": "Italian", "alt_names": ["italiano", "lingua italiana"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Italian_language", "title_tag": "b" }, { "codes": ["iu", "iku", "ike"], "name": "Inuktitut", "alt_names": ["Eastern Canadian Inuktitut", "ᐃᓄᒃᑎᑐᑦ"], "script": ["Canadian syllabic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Inuktitut", "title_tag": "b" }, { "codes": ["ja", "jpn"], "name": "Japanese", "alt_names": ["日本語", "にほんご", "Nihongo"], "script": ["CJK"], "wiki": "https://en.wikipedia.org/wiki/Japanese_language", "title_tag": "big" }, { "codes": ["jb", "jbo"], "name": "Lojban", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Lojban", "title_tag": "b" }, { "codes": ["jv", "jav"], "name": "Javanese", "alt_names": ["ꦧꦱꦗꦮ", "ꦕꦫꦗꦮ", "باسا جاوا", "Basa Jawa", "Båså Jåwå", "Cara Jawa"], "script": ["Latin", "Javanese", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Javanese_language", "title_tag": "big" }, { "codes": ["ka", "geo", "kat"], "name": "Georgian", "alt_names": ["Kartuli", "ქართული"], "script": ["Georgian"], "wiki": "https://en.wikipedia.org/wiki/Georgian_language", "title_tag": "big" }, { "codes": ["kea"], "name": "Cape Verdean Creole", "alt_names": ["Cape Verdean"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Cape_Verdean_Creole", "title_tag": "b" }, { "codes": ["kg", "kon"], "name": "Kongo", "alt_names": ["Kikongo"], "script": ["Latin", "Mandombe"], "wiki": "https://en.wikipedia.org/wiki/Kongo_language", "title_tag": "b" }, { "codes": ["kha"], "name": "Khasi", "alt_names": ["Ka Ktien Khasi", "ক ক্ত্যেন খসি"], "script": ["Latin", "Bengali-Assamese"], "wiki": "https://en.wikipedia.org/wiki/Khasi_language", "title_tag": "big" }, { "codes": ["ki", "kik"], "name": "Kikuyu", "alt_names": ["Gĩkũyũ"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Kikuyu_language", "title_tag": "b" }, { "codes": ["kj", "kua"], "name": "Kwanyama", "alt_names": ["Kuanyama", "Cuanhama"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Kwanyama_dialect", "title_tag": "b" }, { "codes": ["kk", "kaz"], "name": "Kazakh", "alt_names": ["qazaqşa", "qazaq tili", "қазақша", "қазақ тілі", "قازاقشا", "قازاق تىلى"], "script": ["Cyrillic", "Latin", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Kazakh_language", "title_tag": "b" }, { "codes": ["kl", "kal"], "name": "Greenlandic", "alt_names": ["kalaallisut", "grønlandsk"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Greenlandic_language", "title_tag": "b" }, { "codes": ["km", "khm"], "name": "Khmer", "alt_names": ["Cambodian", "ភាសាខ្មែរ", "phiăsaa khmae", "ខ្មែរ", "khmae"], "script": ["Khmer"], "wiki": "https://en.wikipedia.org/wiki/Khmer_language", "title_tag": "big" }, { "codes": ["kn", "kan"], "name": "Kannada", "alt_names": ["Kanarese", "ಕನ್ನಡ"], "script": ["Kannada"], "wiki": "https://en.wikipedia.org/wiki/Kannada", "title_tag": "b" }, { "codes": ["ko", "kor"], "name": "Korean", "alt_names": ["한국어", "韓國語", "조선말", "朝鮮말"], "script": ["CJK"], "wiki": "https://en.wikipedia.org/wiki/Korean_language", "title_tag": "big" }, { "codes": ["kr", "kau"], "name": "Kanuri", "alt_names": ["Kànùrí"], "script": ["Arabic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Kanuri_language", "title_tag": "b" }, { "codes": ["ks", "kas"], "name": "Kashmiri", "alt_names": ["Koshur", "कॉशुर", "كٲشُر"], "script": ["Arabic", "Devanagari"], "wiki": "https://en.wikipedia.org/wiki/Kashmiri_language", "title_tag": "big" }, { "codes": ["ku", "kur"], "name": "Kurdish", "alt_names": ["Kurdî", "کوردی"], "script": ["Arabic", "Latin", "Armenian"], "wiki": "https://en.wikipedia.org/wiki/Kurdish_languages", "title_tag": "b" }, { "codes": ["kv", "kom"], "name": "Komi", "alt_names": ["Komi-Zyryan", "Коми", "Коми кыв"], "script": ["Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Komi-Zyryan_language", "title_tag": "b" }, { "codes": ["kw", "cor"], "name": "Cornish", "alt_names": ["Kernewek", "Kernowek"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Cornish_language", "title_tag": "b" }, { "codes": ["ky", "kir"], "name": "Kyrgyz", "alt_names": ["Kirghiz", "Kirgiz", "Qirghiz", "Кыргызча", "Qırğızça"], "script": ["Cyrillic", "Arabic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Kyrgyz_language", "title_tag": "b" }, { "codes": ["la", "lat"], "name": "Latin", "alt_names": ["latine", "latīne"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Latin", "title_tag": "b" }, { "codes": ["lb", "ltz"], "name": "Luxembourgish", "alt_names": ["Luxemburgish", "Letzeburgesch", "Lëtzebuergesch", "Luxembourgian"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Luxembourgish", "title_tag": "b" }, { "codes": ["lg", "lug"], "name": "Luganda", "alt_names": ["Ganda", "Oluganda"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Luganda", "title_tag": "b" }, { "codes": ["li", "lim"], "name": "Limburgish", "alt_names": ["Limburgan", "Limburgian", "Limburgic", "Lèmburgs", "Limburgs", "Limburgisch", "Limbourgeois"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Limburgish", "title_tag": "b" }, { "codes": ["ln", "lin"], "name": "Lingala", "alt_names": ["lingála"], "script": ["Latin", "Mandombe"], "wiki": "https://en.wikipedia.org/wiki/Lingala", "title_tag": "b" }, { "codes": ["lo", "lao"], "name": "Lao", "alt_names": ["Laotian", "ລາວ", "ພາສາລາວ"], "script": ["Lao", "Thai"], "wiki": "https://en.wikipedia.org/wiki/Lao_language", "title_tag": "big" }, { "codes": ["lt", "lit"], "name": "Lithuanian", "alt_names": ["lietuvių kalba"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Lithuanian_language", "title_tag": "b" }, { "codes": ["lu", "lub"], "name": "Luba-Katanga", "alt_names": ["Luba-Shaba", "Kiluba"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Luba-Katanga_language", "title_tag": "b" }, { "codes": ["lv", "lav"], "name": "Latvian", "alt_names": ["Lettish", "latviešu valoda"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Latvian_language", "title_tag": "b" }, { "codes": ["mas", "cma"], "name": "Maasai", "alt_names": ["Masai", "Maa", "ɔl"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Maasai_language", "title_tag": "b" }, { "codes": ["mg", "mlg"], "name": "Malagasy", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Malagasy_language", "title_tag": "b" }, { "codes": ["mh", "mah"], "name": "Marshallese", "alt_names": ["Ebon", "Kajin M̧ajeļ", "Kajin Majōl"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Marshallese_language", "title_tag": "b" }, { "codes": ["mi", "mao", "mri"], "name": "Maori", "alt_names": ["Māori", "Te reo Māori"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/M%C4%81ori_language", "title_tag": "b" }, { "codes": ["mk", "mac", "mkd"], "name": "Macedonian", "alt_names": ["македонски", "македонски јазик"], "script": ["Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Macedonian_language", "title_tag": "b" }, { "codes": ["ml", "mal"], "name": "Malayalam", "alt_names": ["മലയാളം", "Malayāḷam"], "script": ["Malayalam"], "wiki": "https://en.wikipedia.org/wiki/Malayalam", "title_tag": "big" }, { "codes": ["mnc"], "name": "Manchu", "alt_names": ["manju gisun", "ᠮᠠᠨᠵᡠᡤᡳᠰᡠᠨ"], "script": ["Mongolian"], "wiki": "https://en.wikipedia.org/wiki/Manchu_language", "title_tag": "big" }, { "codes": ["mn", "mon"], "name": "Mongolian", "alt_names": ["монгол хэл", "ᠮᠣᠩᠭᠣᠯ ᠬᠡᠯᠡ"], "script": ["Mongolian", "Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Mongolian_language", "title_tag": "b" }, { "codes": ["mo", "mol"], "name": "Moldovan", "alt_names": ["Moldavian", "limba moldovenească", "лимба молдовеняскэ", "лимба Молдовенѣскъ"], "script": ["Latin", "Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Moldovan_language", "title_tag": "b" }, { "codes": ["mr", "mar"], "name": "Marathi", "alt_names": ["मराठी", "Marāṭhī"], "script": ["Devanagari"], "wiki": "https://en.wikipedia.org/wiki/Marathi_language", "title_tag": "b" }, { "codes": ["ms", "may"], "name": "Malay", "alt_names": ["bahasa Melayu", "بهاس ملايو", "ꤷꥁꤼ ꤸꥍꤾꤿꥈ"], "script": ["Latin", "Arabic", "Thai"], "wiki": "https://en.wikipedia.org/wiki/Malay_language", "title_tag": "b" }, { "codes": ["mt", "mlt"], "name": "Maltese", "alt_names": ["Malti"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Maltese_language", "title_tag": "b" }, { "codes": ["my", "bur", "mya"], "name": "Burmese", "alt_names": ["မြန်မာစာ", "မြန်မာစကား"], "script": ["Burmese"], "wiki": "https://en.wikipedia.org/wiki/Burmese_language", "title_tag": "big" }, { "codes": ["na", "nau"], "name": "Nauruan", "alt_names": ["Nauru", "dorerin Naoero", "Ekaiairũ Naoero"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Nauruan_language", "title_tag": "b" }, { "codes": ["nb", "nob"], "name": "Bokmal", "alt_names": ["Bokmål", "Norwegian Bokmal", "Norwegian Bokmål"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Bokm%C3%A5l", "title_tag": "b" }, { "codes": ["nd", "nde"], "name": "North Ndebele", "alt_names": ["Ndebele", "amaNdebele", "Zimbabwean Ndebele", "North Ndebele"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Northern_Ndebele_language", "title_tag": "b" }, { "codes": ["ne", "nep"], "name": "Nepali", "alt_names": ["Gorkhali", "Khaskura", "Nepalese", "Parbate", "नेपाली", "खस कुरा"], "script": ["Devanagari"], "wiki": "https://en.wikipedia.org/wiki/Nepali_language", "title_tag": "big" }, { "codes": ["ng", "ndo"], "name": "Ndonga", "alt_names": ["Oshindonga"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Ndonga_dialect", "title_tag": "b" }, { "codes": ["nl", "dut", "nld"], "name": "Dutch", "alt_names": ["Flemish", "Nederlands"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Dutch_language", "title_tag": "b" }, { "codes": ["nn", "nno"], "name": "Norwegian Nynorsk", "alt_names": ["nynorsk"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Nynorsk", "title_tag": "b" }, { "codes": ["no", "nor"], "name": "Norwegian", "alt_names": ["norsk"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Norwegian_language", "title_tag": "b" }, { "codes": ["nr", "nbl"], "name": "Southern Ndebele", "alt_names": ["South Ndebele", "Transvaal Ndebele", "isiNdebele seSewula"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Southern_Ndebele_language", "title_tag": "b" }, { "codes": ["nv", "nav"], "name": "Navajo", "alt_names": ["Navaho", "Diné bizaad", "Naabeehó bizaad"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Navajo_language", "title_tag": "b" }, { "codes": ["ny", "nya"], "name": "Chewa", "alt_names": ["Nyanja", "Chichewa", "Chinyanja"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Chewa_language", "title_tag": "b" }, { "codes": ["oc", "oci"], "name": "Occitan", "alt_names": ["lenga d'òc", "provençal"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Occitan_language", "title_tag": "b" }, { "codes": ["oj", "oji"], "name": "Ojibwe", "alt_names": ["Ojibwa", "Ojibway", "Otchipwe", "Anishinaabemowin"], "script": ["Latin", "Canadian syllabic"], "wiki": "https://en.wikipedia.org/wiki/Ojibwe_language", "title_tag": "b" }, { "codes": ["om", "orm"], "name": "Oromo", "alt_names": ["Afaan Oromoo"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Oromo_language", "title_tag": "b" }, { "codes": ["or", "ori", "ory"], "name": "Odia", "alt_names": ["Oriya", "ଓଡ଼ିଆ", "Oṛiā"], "script": ["Odia"], "wiki": "https://en.wikipedia.org/wiki/Odia_language", "title_tag": "big" }, { "codes": ["os", "oss"], "name": "Ossetian", "alt_names": ["Ossetic", "Ossete", "ирон ӕвзаг", "дигорон ӕвзаг"], "script": ["Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Ossetian_language", "title_tag": "b" }, { "codes": ["pa", "pan"], "name": "Punjabi", "alt_names": ["Panjabi", "ਪੰਜਾਬੀ", "پن٘جابی"], "script": ["Gurmukhi", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Punjabi_language", "title_tag": "big" }, { "codes": ["pi", "pli"], "name": "Pali", "alt_names": ["Magadhan"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Pali", "title_tag": "b" }, { "codes": ["pl", "pol"], "name": "Polish", "alt_names": ["język polski", "polszczyzna", "polski"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Polish_language", "title_tag": "b" }, { "codes": ["ps", "pus"], "name": "Pashto", "alt_names": ["پښتو", "Pax̌tō"], "script": ["Arabic"], "wiki": "https://en.wikipedia.org/wiki/Pashto", "title_tag": "b" }, { "codes": ["pt", "por"], "name": "Portuguese", "alt_names": ["português", "língua portuguesa"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Portuguese_language", "title_tag": "b" }, { "codes": ["qu", "que"], "name": "Quechua", "alt_names": ["Runasimi", "Kechua", "Runa Simi"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Quechuan_languages", "title_tag": "b" }, { "codes": ["rm", "roh"], "name": "Rhaeto-Romance", "alt_names": ["Rheto-Romance", "Rhaetian", "Raeto-Romance"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Rhaeto-Romance_languages", "title_tag": "b" }, { "codes": ["rn", "run"], "name": "Kirundi", "alt_names": ["Rundi", "Ikirundi"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Kirundi", "title_tag": "b" }, { "codes": ["ro", "rum", "ron"], "name": "Romanian", "alt_names": ["Rumanian", "Roumanian", "Daco-Romanian", "limba română"], "script": ["Latin", "Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Romanian_language", "title_tag": "b" }, { "codes": ["ru", "rus"], "name": "Russian", "alt_names": ["русский", "русский язык"], "script": ["Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Russian_language", "title_tag": "b" }, { "codes": ["rw", "kin"], "name": "Kinyarwanda", "alt_names": ["Ikinyarwanda"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Kinyarwanda", "title_tag": "b" }, { "codes": ["sa", "san"], "name": "Sanskrit", "alt_names": ["संस्कृतम्", "saṃskṛtam"], "script": ["Devanagari"], "wiki": "https://en.wikipedia.org/wiki/Sanskrit", "title_tag": "big" }, { "codes": ["sc", "srd"], "name": "Sardinian", "alt_names": ["Sard", "sardu", "sadru", "limba sarda", "lìngua sarda"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Sardinian_language", "title_tag": "b" }, { "codes": ["sd", "snd"], "name": "Sindhi", "alt_names": ["سنڌي", "सिंधी", "ਸਿੰਧੀ", "𑈩𑈭𑈴𑈝𑈮", "𑋝𑋡𑋟𑋐𑋢"], "script": ["Arabic", "Devanagari", "Gurmukhi", "Khojki", "Khudabadi"], "wiki": "https://en.wikipedia.org/wiki/Sindhi_language", "title_tag": "big" }, { "codes": ["se", "sme"], "name": "Northern Sami", "alt_names": ["North Sami", "Sami", "davvisámegiella"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Northern_Sami", "title_tag": "b" }, { "codes": ["sg", "sag"], "name": "Sango", "alt_names": ["Sangho", "yângâ tî sängö"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Sango_language", "title_tag": "b" }, { "codes": ["sh", "shr", "hbs"], "name": "Serbo-Croatian", "alt_names": ["Serbo-Croat", "Serbo-Croat-Bosnian", "Bosnian-Croatian-Serbian", "Bosnian-Croatian-Montenegrin-Serbian", "srpskohrvatski", "hrvatskosrpski", "српскохрватски", "хрватскосрпски", "naš jezik", "наш језик"], "script": ["Latin", "Cyrillic"], "wiki": "https://en.wikipedia.org/wiki/Serbo-Croatian", "title_tag": "b" }, { "codes": ["si", "sin"], "name": "Sinhala", "alt_names": ["Sinhalese", "සිංහල", "Siṁhala"], "script": ["Sinhala"], "wiki": "https://en.wikipedia.org/wiki/Sinhala_language", "title_tag": "big" }, { "codes": ["sk", "slo", "slk"], "name": "Slovak", "alt_names": ["slovenčina", "slovenský jazyk"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Slovak_language", "title_tag": "b" }, { "codes": ["sl", "slv"], "name": "Slovene", "alt_names": ["Slovenian", "slovenski jezik", "slovenščina"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Slovene_language", "title_tag": "b" }, { "codes": ["sm", "smo"], "name": "Samoan", "alt_names": ["Gagana faʻa Sāmoa", "Gagana Sāmoa"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Samoan_language", "title_tag": "b" }, { "codes": ["sn", "sna"], "name": "Shona", "alt_names": ["chiShona"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Shona_language", "title_tag": "b" }, { "codes": ["so", "som"], "name": "Somali", "alt_names": ["Af Soomaali"], "script": ["Latin", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Somali_language", "title_tag": "b" }, { "codes": ["sq", "alb", "sqi"], "name": "Albanian", "alt_names": ["shqip", "gjuha shqipe"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Albanian_language", "title_tag": "b" }, { "codes": ["sr", "srp"], "name": "Serbian", "alt_names": ["српски", "srpski"], "script": ["Cyrillic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Serbian_language", "title_tag": "b" }, { "codes": ["ss", "ssw"], "name": "Swazi", "alt_names": ["Swati", "siSwati"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Swazi_language", "title_tag": "b" }, { "codes": ["st", "sot"], "name": "Sotho", "alt_names": ["Sesotho", "Southern Sotho"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Sotho_language", "title_tag": "b" }, { "codes": ["su", "sun"], "name": "Sundanese", "alt_names": ["Basa Sunda", "ᮘᮞ ᮞᮥᮔ᮪ᮓ"], "script": ["Latin", "Sundanese", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Sundanese_language", "title_tag": "b" }, { "codes": ["sv", "swe"], "name": "Swedish", "alt_names": ["svenska"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Swedish_language", "title_tag": "b" }, { "codes": ["sw", "swa"], "name": "Swahili", "alt_names": ["Kiswahili"], "script": ["Latin", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Swahili_language", "title_tag": "b" }, { "codes": ["ta", "tam"], "name": "Tamil", "alt_names": ["தமிழ்", "Tamiḻ"], "script": ["Tamil", "Arabic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Tamil_language", "title_tag": "big" }, { "codes": ["te", "tel"], "name": "Telugu", "alt_names": ["తెలుగు"], "script": ["Telugu"], "wiki": "https://en.wikipedia.org/wiki/Telugu_language", "title_tag": "big" }, { "codes": ["tg", "tgk"], "name": "Tajik", "alt_names": ["Tajiki", "тоҷик", "Тоҷикӣ", "tojikī", "забо́ни тоҷикӣ́", "zaboni tojikī"], "script": ["Cyrillic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Tajik_language", "title_tag": "b" }, { "codes": ["th", "tha"], "name": "Thai", "alt_names": ["Central Thai", "Siamese", "ภาษาไทย", "Phasa Thai"], "script": ["Thai"], "wiki": "https://en.wikipedia.org/wiki/Thai_language", "title_tag": "big" }, { "codes": ["ti", "tir"], "name": "Tigrinya", "alt_names": ["Tigrigna", "ትግርኛ", "tigriññā"], "script": ["Ge'ez"], "wiki": "https://en.wikipedia.org/wiki/Tigrinya_language", "title_tag": "big" }, { "codes": ["tk", "tuk"], "name": "Turkmen", "alt_names": ["Türkmençe", "Türkmen", "Türkmen dili", "Түркменче Түркмен дили", "تۆرکمن ديلی", "تۆرکمنچه‎", "تۆرکمن"], "script": ["Latin", "Cyrillic", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Turkmen_language", "title_tag": "b" }, { "codes": ["tl", "tgl"], "name": "Tagalog", "alt_names": ["Wikang Tagalog"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Tagalog_language", "title_tag": "b" }, { "codes": ["tn", "tsn"], "name": "Tswana", "alt_names": ["Setswana"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Tswana_language", "title_tag": "b" }, { "codes": ["to", "ton"], "name": "Tongan", "alt_names": ["Tonga", "lea fakatonga", "lea faka-Tonga"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Tongan_language", "title_tag": "b" }, { "codes": ["tr", "tur"], "name": "Turkish", "alt_names": ["Türkçe", "Türk dili", "Istanbul Turkish", "Turkey Turkish"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Turkish_language", "title_tag": "b" }, { "codes": ["ota"], "name": "Ottoman Turkish", "alt_names": ["لسان عثمانى‎", "lisân-ı Osmânî", "Osmanlı Türkçesi"], "rtl": 1, "script": ["Arabic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Ottoman_Turkish", "title_tag": "b" }, { "codes": ["trw"], "name": "Torwali", "alt_names": ["توروالی"], "script": ["Arabic"], "wiki": "https://en.wikipedia.org/wiki/Torwali_language", "title_tag": "b" }, { "codes": ["ts", "tso"], "name": "Tsonga", "alt_names": ["Xitsonga", "Xitsonga"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Tsonga_language", "title_tag": "b" }, { "codes": ["tt", "tat"], "name": "Tatar", "alt_names": ["татар", "تاتار", "татар теле", "tatar tele", "تاتار تلی‎"], "script": ["Cyrillic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Tatar_language", "title_tag": "b" }, { "codes": ["tw", "twi"], "name": "Twi", "alt_names": ["Akan Kasa"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Twi", "title_tag": "b" }, { "codes": ["ty", "tah"], "name": "Tahitian", "alt_names": ["Reo Tahiti", "Reo Māꞌohi"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Tahitian_language", "title_tag": "b" }, { "codes": ["ug", "uig"], "name": "Uyghur", "alt_names": ["Uighur", "Uyƣur", "Uyğur", "ئۇيغۇر", "Уйғур", "ئۇيغۇر تىلى", "Уйғур тили", "Uyghur tili", "Uyƣur tili", "Uyğur tili"], "script": ["Arabic", "Cyrillic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Uyghur_language", "title_tag": "b" }, { "codes": ["uk", "ukr"], "name": "Ukrainian", "alt_names": ["українська", "українська мова", "ukrayins'ka mova"], "script": ["Cyrillic", "Latin"], "wiki": "https://en.wikipedia.org/wiki/Ukrainian_language", "title_tag": "b" }, { "codes": ["ur", "urd"], "name": "Urdu", "alt_names": ["Urdū", "اُردُو", "اردو", "Lashkari", "لشکری", "Laškarī", "Modern Standard Urdu"], "rtl": 1, "script": ["Arabic"], "wiki": "https://en.wikipedia.org/wiki/Urdu", "title_tag": "b" }, { "codes": ["uz", "uzb"], "name": "Uzbek", "alt_names": ["O‘zbekcha", "o‘zbek tili", "Ўзбекча", "ўзбек тили", "اۉزبېکچه", "اۉزبېک تیلی", "Özbekçä", "Özbek Tili"], "script": ["Latin", "Cyrillic", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Uzbek_language", "title_tag": "b" }, { "codes": ["ve", "ven"], "name": "Venda", "alt_names": ["Tshivenda", "Tshivenḓa", "Luvenḓa"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Venda_language", "title_tag": "b" }, { "codes": ["vi", "vie"], "name": "Vietnamese", "alt_names": ["Tiếng Việt", "Việt"], "script": ["Latin", "CJK"], "wiki": "https://en.wikipedia.org/wiki/Vietnamese_language", "title_tag": "b" }, { "codes": ["vo", "vol"], "name": "Volapuk", "alt_names": ["Volapük", "Volapük nulik"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Volap%C3%BCk", "title_tag": "b" }, { "codes": ["wa", "wln"], "name": "Walloon", "alt_names": ["walon"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Walloon_language", "title_tag": "b" }, { "codes": ["wo", "wol"], "name": "Wolof", "alt_names": [], "script": ["Latin", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Wolof_language", "title_tag": "b" }, { "codes": ["xh", "xho"], "name": "Xhosa", "alt_names": ["isiXhosa"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Xhosa_language", "title_tag": "b" }, { "codes": ["yi", "yid"], "name": "Yiddish", "alt_names": ["ייִדיש", "יידיש", "אידיש", "yidish", "idish"], "script": ["Hebrew"], "wiki": "https://en.wikipedia.org/wiki/Yiddish", "title_tag": "b" }, { "codes": ["yo", "yor"], "name": "Yoruba", "alt_names": ["Èdè Yorùbá"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Yoruba_language", "title_tag": "b" }, { "codes": ["za", "zha"], "name": "Zhuang", "alt_names": ["Vahcuengh", "話僮", "壮语", "壯語", "Zhuàngyǔ"], "script": ["CJK"], "wiki": "https://en.wikipedia.org/wiki/Zhuang_languages", "title_tag": "big" }, { "codes": ["zh", "chi", "zho", "cmn"], "name": "Chinese", "alt_names": ["汉语", "漢語", "Mandarin", "Standard Chinese", "Modern Standard Mandarin", "Standard Mandarin", "Mandarin Chinese", "普通话", "普通話", "国语", "國語", "华语", "華語"], "script": ["CJK"], "wiki": "https://en.wikipedia.org/wiki/Standard_Chinese", "title_tag": "big" }, { "codes": ["zu", "zul"], "name": "Zulu", "alt_names": ["isiZulu"], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Zulu_language", "title_tag": "b" }, { "codes": ["ast"], "name": "Asturian", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Asturian_language", "title_tag": "b" }, { "codes": ["ceb"], "name": "Cebuano", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Cebuano_language", "title_tag": "b" }, { "codes": ["got"], "name": "Gothic", "alt_names": [], "script": ["Gothic"], "wiki": "https://en.wikipedia.org/wiki/Gothic_language", "title_tag": "b" }, { "codes": ["ang"], "name": "Old English", "alt_names": [], "script": ["Latin", "Runic"], "wiki": "https://en.wikipedia.org/wiki/Old_English", "title_tag": "b" }, { "codes": ["non"], "name": "Old Norse", "alt_names": [], "script": ["Latin", "Runic"], "wiki": "https://en.wikipedia.org/wiki/Old_Norse", "title_tag": "b" }, { "codes": ["nrf"], "name": "Norman", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Norman_language", "title_tag": "b" }, { "codes": ["yue"], "name": "Cantonese", "alt_names": [], "script": ["CJK"], "wiki": "https://en.wikipedia.org/wiki/Cantonese", "title_tag": "big" }, { "codes": ["fro"], "name": "Old French", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Old_French", "title_tag": "b" }, { "codes": ["lld"], "name": "Ladin", "alt_names": [], "script": ["Latin"], "wiki": "https://en.wikipedia.org/wiki/Ladin_language", "title_tag": "b" }, { "codes": ["ady"], "name": "Adyghe", "alt_names": [], "script": ["Cyrillic", "Latin", "Arabic"], "wiki": "https://en.wikipedia.org/wiki/Adyghe_language", "title_tag": "b" }, { "codes": ["xcl"], "name": "Classical Armenian", "alt_names": ["Old Armenian"], "script": ["Armenian"], "wiki": "https://en.wikipedia.org/wiki/Classical_Armenian", "title_tag": "b" } ] pyglossary-5.0.9/pyglossary/langs/writing_system.py000066400000000000000000000216471476751035500227510ustar00rootroot00000000000000from __future__ import annotations import unicodedata from typing import Literal, NamedTuple __all__ = [ "WritingSystem", "getAllWritingSystemsFromText", "getWritingSystemFromText", # 'unicodeNextWord', "writingSystemByLowercaseName", "writingSystemByName", "writingSystemByUnicode", "writingSystemList", ] class WritingSystem(NamedTuple): name: str iso: list[tuple[int, str]] | list[tuple[int, str, str]] = [] unicode: list = [] titleTag: str = "b" direction: Literal["ltr", "rtl", "ttb"] = "ltr" comma: str = ", " pop: int | float = 0 # population in millions # digits and FULLWIDTH DIGITs are considered neutral/ignored, not Latin # scripts are separated into multiple groups based on their popularity # (usage in multiple live languages, and number of native speakers) writingSystemList = [ WritingSystem( name="Latin", iso=[(215, "Latn")], unicode=[ "LATIN", ], titleTag="b", comma=", ", pop=4900, ), WritingSystem( name="Arabic", iso=[(160, "Arab")], unicode=["ARABIC"], titleTag="b", direction="rtl", comma="، ", pop=670, ), WritingSystem( name="Cyrillic", iso=[(220, "Cyrl")], unicode=["CYRILLIC"], titleTag="b", comma=", ", pop=250, ), WritingSystem( name="CJK", iso=[ (285, "Bopo", "BOPOMOFO"), (286, "Hang", "HANGUL"), (410, "Hira", "HIRAGANA"), (411, "Kana", "KATAKANA"), (412, "Hrkt", "KATAKANA OR HIRAGANA"), (460, "Yiii", "Yi"), (499, "Nshu", "NUSHU"), (500, "Hani", "HAN"), # aka Hanzi, Kanji, Hanja (501, "Hans", "SIMPLIFIED HAN"), (502, "Hant", "TRADITIONAL HAN"), ], unicode=[ "CJK", "HIRAGANA", "KATAKANA", "IDEOGRAPHIC", # Ideographic Description Characters "DITTO", # Ditto mark "HANGUL", # Korean alphabet "HALFWIDTH KATAKANA", "HALFWIDTH HANGUL", "YI", # https://en.wikipedia.org/wiki/Yi_script "FULLWIDTH LATIN", "BOPOMOFO", "NUSHU", ], titleTag="big", comma="、", pop=1540, # Chinese=1340, Kana=120, Hangul=78.7 ), WritingSystem( name="Devanagari", iso=[(315, "Deva")], unicode=["DEVANAGARI"], titleTag="big", comma=", ", pop=610, ), # _____________________________________________________ WritingSystem( name="Armenian", iso=[(230, "Armn")], unicode=["ARMENIAN"], titleTag="big", comma=", ", pop=12, ), WritingSystem( name="Bengali-Assamese", iso=[(325, "Beng")], unicode=["BENGALI"], titleTag="big", comma=", ", pop=270, ), WritingSystem( name="Burmese", iso=[(350, "Mymr")], unicode=["MYANMAR"], titleTag="big", comma=", ", # almost not used except in English phrases pop=39, ), WritingSystem( name="Ge'ez", iso=[(430, "Ethi")], unicode=["ETHIOPIC"], titleTag="big", comma=", ", pop=21, ), WritingSystem( name="Greek", iso=[(200, "Grek")], unicode=["GREEK"], titleTag="b", comma=", ", pop=11, ), WritingSystem( name="Gujarati", iso=[(320, "Gujr")], unicode=["GUJARATI"], titleTag="big", comma=", ", pop=48, ), WritingSystem( name="Gurmukhi", iso=[(310, "Guru")], unicode=["GURMUKHI"], titleTag="big", comma=", ", pop=22, ), WritingSystem( name="Hebrew", iso=[(125, "Hebr")], unicode=["HEBREW"], titleTag="big", direction="rtl", comma=", ", pop=14, ), WritingSystem( name="Kannada", iso=[(345, "Knda")], unicode=["KANNADA"], titleTag="big", comma=", ", pop=45, ), WritingSystem( name="Khmer", iso=[(355, "Khmr")], unicode=["KHMER"], titleTag="big", comma=", ", pop=11.4, ), WritingSystem( name="Lao", iso=[(356, "Laoo")], unicode=["LAO"], titleTag="big", comma=", ", pop=22, ), WritingSystem( name="Malayalam", iso=[(347, "Mlym")], unicode=["MALAYALAM"], titleTag="big", comma=", ", pop=38, ), WritingSystem( name="Odia", iso=[(327, "Orya")], unicode=["ORIYA"], titleTag="big", comma=", ", pop=21, ), WritingSystem( name="Sinhala", iso=[(348, "Sinh")], unicode=["SINHALA"], titleTag="big", comma=", ", pop=14.4, ), WritingSystem( name="Sundanese", iso=[(362, "Sund")], unicode=["SUNDANESE"], titleTag="big", comma=", ", pop=38, ), WritingSystem( name="Brahmi", iso=[ (300, "Brah", "Brahmi"), ], unicode=["BRAHMI"], titleTag="big", comma=", ", ), WritingSystem( name="Tamil", iso=[ (346, "Taml", "Tamil"), ], unicode=["TAMIL"], titleTag="big", # Parent scripts: Brahmi, Tamil-Brahmi, Pallava comma=", ", pop=70, ), WritingSystem( name="Telugu", iso=[(340, "Telu")], unicode=["TELUGU"], titleTag="big", comma=", ", pop=74, ), WritingSystem( name="Thai", iso=[(352, "Thai")], unicode=["THAI"], titleTag="big", comma=", ", pop=38, ), # _____________________________________________________ WritingSystem( name="Syriac", iso=[(135, "Syrc")], unicode=["SYRIAC"], titleTag="b", direction="rtl", comma="، ", pop=8, # Syriac=0.4, Lontara=7.6 # Lontara is a separate script according to Wikipedia # but not according to Unicode ), WritingSystem( name="Tibetan", iso=[(330, "Tibt")], unicode=["TIBETAN"], titleTag="big", comma=", ", # almost not used expect in numbers! pop=5, ), WritingSystem( name="Georgian", iso=[(240, "Geor")], unicode=["GEORGIAN"], titleTag="big", comma=", ", pop=4.5, ), WritingSystem( name="Mongolian", iso=[(145, "Mong")], unicode=["MONGOLIAN"], titleTag="big", direction="ltr", # historically ttb? comma=", ", pop=2, ), WritingSystem( name="Thaana", iso=[(170, "Thaa")], unicode=["THAANA"], titleTag="big", direction="rtl", comma="، ", pop=0.35, ), # _____________________________________________________ WritingSystem( name="Javanese", iso=[(361, "Java")], unicode=["JAVANESE"], titleTag="big", # Since around 1945 Javanese script has largely been # supplanted by Latin script to write Javanese. ), WritingSystem( # aka CANADIAN ABORIGINAL or UCAS name="Canadian syllabic", iso=[(440, "Cans")], unicode=["CANADIAN SYLLABICS"], titleTag="big", comma=", ", ), WritingSystem( name="Takri", iso=[(321, "Takr")], unicode=["TAKRI"], titleTag="b", comma=", ", ), # _____________________________________________________ WritingSystem( name="SignWriting", iso=[(95, "Sgnw")], unicode=["SIGNWRITING"], titleTag="big", direction="ttb", comma="𝪇", ), # _____________________________________________________ WritingSystem( name="Adlam", iso=[(166, "Adlm")], unicode=["ADLAM"], titleTag="big", direction="rtl", ), WritingSystem( name="Avestan", iso=[(134, "Avst")], unicode=["AVESTAN"], titleTag="b", direction="rtl", ), WritingSystem( name="Glagolitic", iso=[(225, "Glag")], unicode=["GLAGOLITIC"], # Unicode 4.1 titleTag="b", ), WritingSystem( name="Khojki", iso=[(322, "Khoj")], unicode=["KHOJKI"], titleTag="big", ), WritingSystem( name="Khudabadi", # aka: Khudawadi, "Sindhi" iso=[(318, "Sind")], unicode=["KHUDAWADI"], titleTag="big", ), WritingSystem( name="N'Ko", iso=[(165, "Nkoo")], unicode=["NKO"], titleTag="big", ), # _____________________________________________________ # WritingSystem( # name="Baybayin", # iso=[(370, "Tglg")], # unicode=["TAGALOG"], # added in Unicode 3.2 # ), # WritingSystem( # name="Rejang", # iso=[(363, "Rjng")], # unicode=["REJANG"], # ), # WritingSystem( # name="Mandombe", # unicode=[], # ), # WritingSystem( # name="Mwangwego", # unicode=[], # ), ] for _ws in writingSystemList: if not _ws.name: raise ValueError(f"empty name in {_ws}") writingSystemByUnicode = {uni: ws for ws in writingSystemList for uni in ws.unicode} writingSystemByName = {ws.name: ws for ws in writingSystemList} writingSystemByLowercaseName = {ws.name.lower(): ws for ws in writingSystemList} unicodeNextWord = { "HALFWIDTH", "FULLWIDTH", "CANADIAN", } def _getWritingSystemFromChar(char: str) -> WritingSystem | None: try: unicodeWords = unicodedata.name(char).split(" ") except ValueError: # if c not in string.whitespace: # print(f"{c=}, {e}") return None alias = unicodeWords[0] ws = writingSystemByUnicode.get(alias) if ws: return ws if alias in unicodeNextWord: return writingSystemByUnicode.get(" ".join(unicodeWords[:2])) return None def _getWritingSystemFromText( st: str, start: int, end: int, ) -> WritingSystem | None: for char in st[start:end]: ws = _getWritingSystemFromChar(char) if ws: return ws return None def getWritingSystemFromText( st: str, beginning: bool = False, ) -> WritingSystem | None: st = st.strip() if not st: return None # some special first words in unicodedata.name(c): # "RIGHT", "ASTERISK", "MODIFIER" k = 0 if beginning else (len(st) + 1) // 2 - 1 ws = _getWritingSystemFromText(st, k, len(st)) if ws: return ws return _getWritingSystemFromText(st, 0, k) def getAllWritingSystemsFromText( st: str, ) -> set[str]: st = st.strip() if not st: return set() wsSet = set() for char in st: ws = _getWritingSystemFromChar(char) if ws: wsSet.add(ws.name) return wsSet pyglossary-5.0.9/pyglossary/logger.py000066400000000000000000000145261476751035500200330ustar00rootroot00000000000000from __future__ import annotations import inspect import logging import os import sys import traceback from typing import TYPE_CHECKING, cast if TYPE_CHECKING: from collections.abc import Callable from types import TracebackType from typing import ( Any, TypeAlias, ) ExcInfoType: TypeAlias = ( tuple[type[BaseException], BaseException, TracebackType] | tuple[None, None, None] ) __all__ = [ "TRACE", "StdLogHandler", "format_exception", "trace", ] TRACE = 5 logging.addLevelName(TRACE, "TRACE") def trace(log: logging.Logger, msg: str) -> None: func = getattr(log, "trace", None) if func is None: log.error(f"Logger {log} has no 'trace' method") return func(msg) class _Formatter(logging.Formatter): def __init__(self, *args, **kwargs) -> None: # noqa: ANN002, ANN003 logging.Formatter.__init__(self, *args, **kwargs) self.fill: Callable[[str], str] | None = None def formatMessage( self, record: logging.LogRecord, ) -> str: msg = logging.Formatter.formatMessage(self, record) if self.fill is not None: msg = self.fill(msg) return msg # noqa: RET504 class Logger(logging.Logger): levelsByVerbosity = ( logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG, TRACE, logging.NOTSET, ) levelNamesCap = ( "Critical", "Error", "Warning", "Info", "Debug", "Trace", "All", # "Not-Set", ) def __init__(self, *args) -> None: # noqa: ANN101, ANN002 logging.Logger.__init__(self, *args) self._verbosity = 3 self._timeEnable = False def setVerbosity(self, verbosity: int) -> None: self.setLevel(self.levelsByVerbosity[verbosity]) self._verbosity = verbosity def getVerbosity(self) -> int: return self._verbosity def trace(self, msg: str) -> None: self.log(TRACE, msg) def pretty(self, data: Any, header: str = "") -> None: # noqa: ANN401 from pprint import pformat self.debug(header + pformat(data)) def newFormatter(self) -> _Formatter: timeEnable = self._timeEnable if timeEnable: fmt = "%(asctime)s [%(levelname)s] %(message)s" else: fmt = "[%(levelname)s] %(message)s" return _Formatter(fmt) def setTimeEnable(self, timeEnable: bool) -> None: self._timeEnable = timeEnable formatter = self.newFormatter() for handler in self.handlers: handler.setFormatter(formatter) def addHandler(self, hdlr: logging.Handler) -> None: # if want to add separate format (new config keys and flags) for ui_gtk # and ui_tk, you need to remove this function and run handler.setFormatter # in ui_gtk and ui_tk logging.Logger.addHandler(self, hdlr) hdlr.setFormatter(self.newFormatter()) def _formatVarDict( dct: dict[str, Any], indent: int = 4, max_width: int = 80, ) -> str: lines = [] pre = " " * indent for key, value in dct.items(): line = pre + key + " = " + repr(value) if len(line) > max_width: line = line[: max_width - 3] + "..." try: value_len = len(value) except TypeError: pass else: line += f"\n{pre}len({key}) = {value_len}" lines.append(line) return "\n".join(lines) def format_exception( exc_info: ExcInfoType | None = None, add_locals: bool = False, add_globals: bool = False, ) -> str: if exc_info is None: exc_info = sys.exc_info() type_, value, tback = exc_info text = "".join(traceback.format_exception(type_, value, tback)) if tback is None: return text if add_locals or add_globals: try: frame = inspect.getinnerframes(tback, context=0)[-1][0] except IndexError: pass else: if add_locals: text += f"Traceback locals:\n{_formatVarDict(frame.f_locals)}\n" if add_globals: text += f"Traceback globals:\n{_formatVarDict(frame.f_globals)}\n" return text class StdLogHandler(logging.Handler): colorsConfig = { "CRITICAL": ("color.cmd.critical", 196), "ERROR": ("color.cmd.error", 1), "WARNING": ("color.cmd.warning", 208), } # 1: dark red (like 31m), 196: real red, 9: light red # 15: white, 229: light yellow (#ffffaf), 226: real yellow (#ffff00) def __init__(self, noColor: bool = False) -> None: logging.Handler.__init__(self) self.set_name("std") self.noColor = noColor self.config: dict[str, Any] = {} @property def endFormat(self) -> str: if self.noColor: return "" return "\x1b[0;0;0m" def emit(self, record: logging.LogRecord) -> None: msg = "" if record.getMessage(): msg = self.format(record) ### if record.exc_info: type_, value, tback = record.exc_info if type_ and tback and value: # to fix mypy error tback_text = format_exception( exc_info=(type_, value, tback), add_locals=(self.level <= logging.DEBUG), add_globals=False, ) if not msg: msg = "unhandled exception:" msg += "\n" msg += tback_text ### levelname = record.levelname fp = sys.stderr if levelname in {"CRITICAL", "ERROR"} else sys.stdout if not self.noColor and levelname in self.colorsConfig: key, default = self.colorsConfig[levelname] colorCode = self.config.get(key, default) startColor = f"\x1b[38;5;{colorCode}m" msg = startColor + msg + self.endFormat ### if fp is None: print(f"fp=None, levelname={record.levelname}") # noqa: T201 print(msg) # noqa: T201 return encoding = getattr(fp, "encoding", "utf-8") or "utf-8" try: fp.write(msg + "\n") except UnicodeEncodeError: fp.write( (msg + "\n") .encode(encoding, errors="xmlcharrefreplace") .decode(encoding) ) fp.flush() def setupLogging() -> Logger: logging.setLoggerClass(Logger) log = cast("Logger", logging.getLogger("pyglossary")) if os.sep == "\\": def _windows_show_exception( type_: type[BaseException], exc: BaseException, tback: TracebackType | None, ) -> None: if not (type_ and exc and tback): return import ctypes msg = format_exception( exc_info=(type_, exc, tback), add_locals=(log.level <= logging.DEBUG), add_globals=False, ) log.critical(msg) ctypes.windll.user32.MessageBoxW(0, msg, "PyGlossary Error", 0) # type: ignore sys.excepthook = _windows_show_exception else: def _unix_show_exception( type_: type[BaseException], exc: BaseException, tback: TracebackType | None, ) -> None: if not (type_ and exc and tback): return log.critical( format_exception( exc_info=(type_, exc, tback), add_locals=(log.level <= logging.DEBUG), add_globals=False, ), ) sys.excepthook = _unix_show_exception return log pyglossary-5.0.9/pyglossary/lxml_types.py000066400000000000000000000056421476751035500207530ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2023 Saeed Rasooli (ilius) # # based on https://github.com/abelcheung/types-lxml # under Apache License, Version 2.0, January 2004 # http://www.apache.org/licenses/ from __future__ import annotations import typing from collections.abc import Mapping from contextlib import ( AbstractAsyncContextManager as AsyncContextManager, ) from contextlib import ( AbstractContextManager as ContextManager, ) from typing import ( AnyStr, Literal, TypeAlias, ) from lxml.etree import QName, _Element # noqa: PLC2701 __all__ = ["Element", "T_htmlfile"] _TextArg: TypeAlias = str | bytes | QName _TagName: TypeAlias = _TextArg _OutputMethodArg = Literal[ "html", "text", "xml", "HTML", "TEXT", "XML", ] # Element type can not be a protocol or interface or even TypeAlias # it's stupid! # And now pyright complains at every usage of it: # error: Variable not allowed in type expression (reportInvalidTypeForm) Element = _Element class IncrementalFileWriter(typing.Protocol): def write_declaration( self, version: AnyStr | None = ..., standalone: bool | None = ..., doctype: AnyStr | None = ..., ) -> None: ... def write_doctype( self, doctype: AnyStr | None, ) -> None: ... def write( self, *args: AnyStr | Element, with_tail: bool = ..., pretty_print: bool = ..., method: _OutputMethodArg | None = ..., ) -> None: ... def flush(self) -> None: ... def method( self, method: _OutputMethodArg | None, ) -> ContextManager[None]: raise NotImplementedError def element( self, tag: _TagName, attrib: Mapping[str, AnyStr] | None = ..., nsmap: dict[str | None, AnyStr] | None = ..., method: _OutputMethodArg | None = ..., **_extra: AnyStr, ) -> ContextManager[None]: raise NotImplementedError class AsyncIncrementalFileWriter(typing.Protocol): async def write_declaration( self, version: AnyStr | None = ..., standalone: bool | None = ..., doctype: AnyStr | None = ..., ) -> None: ... async def write_doctype( self, doctype: AnyStr | None, ) -> None: ... async def write( self, *args: AnyStr | Element | None, with_tail: bool = ..., pretty_print: bool = ..., method: _OutputMethodArg | None = ..., ) -> None: ... async def flush(self) -> None: ... def method( self, method: _OutputMethodArg | None, ) -> AsyncContextManager[None]: raise NotImplementedError def element( self, tag: _TagName, attrib: Mapping[str, AnyStr] | None = ..., nsmap: dict[str | None, AnyStr] | None = ..., method: _OutputMethodArg | None = ..., **_extra: AnyStr, ) -> AsyncContextManager[None]: raise NotImplementedError class T_htmlfile( # type: ignore # noqa: PGH003 IncrementalFileWriter, ContextManager[IncrementalFileWriter], # AsyncIncrementalFileWriter, # AsyncContextManager[AsyncIncrementalFileWriter], ): pass # typing.AsyncContextManager # is generic version of contextlib.AbstractAsyncContextManager pyglossary-5.0.9/pyglossary/option.py000066400000000000000000000245641476751035500200670ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import logging import re from typing import Any __all__ = [ "BoolOption", "DictOption", "EncodingOption", "FileSizeOption", "FloatOption", "HtmlColorOption", "IntOption", "ListOption", "NewlineOption", "Option", "StrOption", "optionFromDict", ] log = logging.getLogger("pyglossary") def optionFromDict(data: dict[str, Any]) -> Option: className = data.pop("class") optClass: type if className == "Option": data["typ"] = data.pop("type") optClass = Option else: data.pop("type") optClass = Option.classes[className] return optClass(**data) class Option: classes: dict[str, type] = {} @classmethod def register(cls: type[Option], optClass: type) -> type: cls.classes[optClass.__name__] = optClass return optClass def __init__( # noqa: PLR0913 self, typ: str, customValue: bool = False, values: list[Any] | None = None, allowNone: bool = False, comment: str = "", multiline: bool = False, disabled: bool = False, hasFlag: bool = False, customFlag: str = "", falseComment: str = "", ) -> None: if values is None: # otherwise there would not be any valid value customValue = True self.typ = typ self.values = values self.allowNone = allowNone self.customValue = customValue self.comment = comment self.multiline = multiline self.disabled = disabled self.hasFlag = hasFlag self.customFlag = customFlag self.falseComment = falseComment @property def typeDesc(self) -> str: return self.typ @property def longComment(self) -> str: comment = self.typeDesc if self.comment: if comment: comment += ", " comment += self.comment return comment def toDict(self) -> dict[str, Any]: data = { "class": self.__class__.__name__, "type": self.typ, "customValue": self.customValue, } if self.values: data["values"] = self.values if self.comment: data["comment"] = self.comment if self.disabled: data["disabled"] = True if self.hasFlag: data["hasFlag"] = True data["customFlag"] = self.customFlag if self.falseComment: data["falseComment"] = self.falseComment return data @classmethod def evaluate(cls, raw: str) -> tuple[Any, bool]: """Return (value, isValid).""" if raw == "None": return None, True return raw, True def validate(self, value: Any) -> bool: # noqa: ANN401 if not self.customValue: if not self.values: log.error( f"invalid option: customValue={self.customValue!r}" f", values={self.values!r}", ) return False return value in self.values if value is None: return self.allowNone valueType = type(value).__name__ return self.typ == valueType def validateRaw(self, raw: str) -> bool: """Return isValid.""" value, isValid = self.evaluate(raw) if not isValid: return False return self.validate(value) def groupValues(self) -> dict[str, Any] | None: # noqa: PLR6301 return None @Option.register class BoolOption(Option): def __init__( self, allowNone: bool = False, **kwargs, # noqa: ANN003 ) -> None: values: list[bool | None] = [False, True] if allowNone: values.append(None) Option.__init__( self, typ="bool", customValue=False, values=values, allowNone=allowNone, **kwargs, # noqa: ANN003 ) def toDict(self) -> dict[str, Any]: data = Option.toDict(self) del data["customValue"] del data["values"] return data @classmethod def evaluate( cls, raw: str | bool, ) -> tuple[bool | None, bool]: if raw is None: return None, True if isinstance(raw, bool): return raw, True if isinstance(raw, str): raw = raw.lower() if raw == "none": return None, True if raw in {"yes", "true", "1"}: return True, True if raw in {"no", "false", "0"}: return False, True return None, False # not valid @Option.register class StrOption(Option): def __init__( self, **kwargs, # noqa: ANN003 ) -> None: Option.__init__( self, typ="str", **kwargs, ) def validate(self, value: Any) -> bool: # noqa: ANN401 if not self.customValue: if not self.values: log.error( f"invalid option: customValue={self.customValue!r}" f", values={self.values!r}", ) return False return value in self.values return type(value).__name__ == "str" def groupValues(self) -> dict[str, Any] | None: # noqa: PLR6301 return None @Option.register class IntOption(Option): def __init__( self, **kwargs, # noqa: ANN003 ) -> None: Option.__init__( self, typ="int", **kwargs, ) @classmethod def evaluate(cls, raw: str | int) -> tuple[int | None, bool]: """Return (value, isValid).""" try: value = int(raw) except ValueError: return None, False return value, True @Option.register class FileSizeOption(IntOption): factors = { "KiB": 1024, "kib": 1024, "Ki": 1024, "ki": 1024, # ------------ "MiB": 1048576, "mib": 1048576, "Mi": 1048576, "mi": 1048576, # ------------ "GiB": 1073741824, "gib": 1073741824, "Gi": 1073741824, "gi": 1073741824, # ------------ "kB": 1000, "kb": 1000, "KB": 1000, "k": 1000, "K": 1000, # ------------ "MB": 1000000, "mb": 1000000, "mB": 1000000, "M": 1000000, "m": 1000000, # ------------ "GB": 1000000000, "gb": 1000000000, "gB": 1000000000, "G": 1000000000, "g": 1000000000, } validPattern = "^([0-9.]+)([kKmMgG]i?[bB]?)$" @property def typeDesc(self) -> str: return "" @classmethod def evaluate(cls, raw: str | int) -> tuple[int | None, bool]: if not raw: return 0, True factor = 1 if isinstance(raw, str): m = re.match(cls.validPattern, raw) if m is not None: raw, unit = m.groups() factorTmp = cls.factors.get(unit) if factorTmp is None: return None, False factor = factorTmp try: value = float(raw) except ValueError: return None, False if value < 0: return None, False return int(value * factor), True @Option.register class FloatOption(Option): def __init__( self, **kwargs, # noqa: ANN003 ) -> None: Option.__init__( self, typ="float", **kwargs, ) @classmethod def evaluate( cls, raw: str | float, ) -> tuple[float | None, bool]: """Return (value, isValid).""" try: value = float(raw) except ValueError: return None, False return value, True @Option.register class DictOption(Option): def __init__( self, **kwargs, # noqa: ANN003 ) -> None: Option.__init__( self, typ="dict", customValue=True, allowNone=True, multiline=True, **kwargs, ) def toDict(self) -> dict[str, Any]: data = Option.toDict(self) del data["customValue"] return data @classmethod def evaluate( cls, raw: str | dict, ) -> tuple[dict | None, bool]: import ast if isinstance(raw, dict): return raw, True if raw == "": # noqa: PLC1901 return None, True # valid try: value = ast.literal_eval(raw) except SyntaxError: return None, False # not valid if type(value).__name__ != "dict": return None, False # not valid return value, True # valid @Option.register class ListOption(Option): def __init__(self, **kwargs) -> None: # noqa: ANN003 Option.__init__( self, typ="list", customValue=True, allowNone=True, multiline=True, **kwargs, # noqa: ANN003 ) def toDict(self) -> dict[str, Any]: data = Option.toDict(self) del data["customValue"] return data @classmethod def evaluate(cls, raw: str) -> tuple[list | None, bool]: import ast if raw == "": # noqa: PLC1901 return None, True # valid try: value = ast.literal_eval(raw) except SyntaxError: return None, False # not valid if type(value).__name__ != "list": return None, False # not valid return value, True # valid @Option.register class EncodingOption(Option): re_category = re.compile("^[a-z]+") def __init__( self, customValue: bool = True, values: list[str] | None = None, comment: str | None = None, **kwargs, # noqa: ANN003 ) -> None: if values is None: values = [ "utf-8", "utf-16", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "mac_cyrillic", "mac_greek", "mac_iceland", "mac_latin2", "mac_roman", "mac_turkish", "cyrillic", "arabic", "greek", "hebrew", "latin2", "latin3", "latin4", "latin5", "latin6", ] if comment is None: comment = "Encoding/charset" Option.__init__( self, typ="str", customValue=customValue, values=values, comment=comment, **kwargs, # noqa: ANN003 ) def toDict(self) -> dict[str, Any]: data = Option.toDict(self) del data["values"] return data def groupValues(self) -> dict[str, Any] | None: groups: dict[str, list[str]] = {} others: list[str] = [] for value in self.values or []: cats = self.re_category.findall(value) if not cats: others.append(value) continue cat = cats[0] if len(cat) == len(value): others.append(value) continue if cat not in groups: groups[cat] = [] groups[cat].append(value) if others: groups["other"] = others return groups @Option.register class NewlineOption(Option): def __init__( self, customValue: bool = True, values: list[str] | None = None, comment: str | None = None, **kwargs, # noqa: ANN003 ) -> None: if values is None: values = [ "\r\n", "\n", "\r", ] if comment is None: comment = "Newline string" Option.__init__( self, typ="str", customValue=customValue, values=values, multiline=True, comment=comment, **kwargs, # noqa: ANN003 ) @Option.register class UnicodeErrorsOption(Option): def __init__( self, comment: str | None = None, ) -> None: if comment is None: comment = "Unicode Errors, values: `strict`, `ignore`, `replace`" Option.__init__( self, typ="str", customValue=False, values=["strict", "ignore", "replace"], multiline=False, comment=comment, ) def toDict(self) -> dict[str, Any]: return { "class": "UnicodeErrorsOption", "type": "str", "comment": self.comment, } @Option.register class HtmlColorOption(Option): def toDict(self) -> dict[str, Any]: data = Option.toDict(self) del data["customValue"] return data def __init__(self, **kwargs) -> None: # noqa: ANN003 Option.__init__( self, typ="str", customValue=True, **kwargs, # noqa: ANN003 ) # TODO: use a specific type? pyglossary-5.0.9/pyglossary/os_utils.py000066400000000000000000000076751476751035500204240ustar00rootroot00000000000000from __future__ import annotations import logging import os import shutil import sys from pathlib import Path from typing import TYPE_CHECKING from pyglossary import core if TYPE_CHECKING: from collections.abc import Callable from types import TracebackType __all__ = ["indir", "rmtree", "runDictzip", "showMemoryUsage"] log = logging.getLogger("pyglossary") class indir: """ mkdir + chdir shortcut to use with `with` statement. >>> print(os.getcwd()) # -> "~/projects" >>> with indir('my_directory', create=True): >>> print(os.getcwd()) # -> "~/projects/my_directory" >>> # do some work inside new 'my_directory'... >>> print(os.getcwd()) # -> "~/projects" >>> # automatically return to previous directory. """ def __init__( self, directory: str, create: bool = False, clear: bool = False, ) -> None: self.old_pwd: str | None = None self.dir = directory self.create = create self.clear = clear def __enter__(self) -> None: self.old_pwd = os.getcwd() if os.path.exists(self.dir): if self.clear: shutil.rmtree(self.dir) os.makedirs(self.dir) elif self.create: os.makedirs(self.dir) os.chdir(self.dir) def __exit__( self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: if self.old_pwd: os.chdir(self.old_pwd) self.old_pwd = None def _idzip(filename: str | Path) -> bool: try: from idzip import compressor except ModuleNotFoundError: return False filename = Path(filename) destination = filename.parent / (filename.name + ".dz") try: with open(filename, "rb") as inp_file, open(destination, "wb") as out_file: inputInfo = os.fstat(inp_file.fileno()) log.debug("compressing %s to %s with idzip", filename, destination) compressor.compress( inp_file, inputInfo.st_size, out_file, filename.name, int(inputInfo.st_mtime), ) filename.unlink() except OSError as error: log.error(str(error)) return True def _dictzip(filename: str | Path) -> bool: import subprocess dictzipCmd = shutil.which("dictzip") if not dictzipCmd: return False log.debug(f"dictzip command: {dictzipCmd!r}") try: subprocess.run( [dictzipCmd, filename], check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) except subprocess.CalledProcessError as proc_err: err_msg = proc_err.output.decode("utf-8").replace("\n", ";") retcode = proc_err.returncode log.error(f"dictzip exit {retcode}: {err_msg}") return True def runDictzip(filename: str | Path, method: str = "") -> None: """Compress file into dictzip format.""" res = None if method in {"", "idzip"}: res = _idzip(filename) if not res and method in {"", "dictzip"}: res = _dictzip(filename) if not res: log.warning( "Dictzip compression requires idzip module or dictzip utility," f" run `{core.pip} install python-idzip` to install or make sure" " dictzip is in your $PATH", ) def _rmtreeError( _func: Callable, _direc: str, exc_info: tuple[type[BaseException], BaseException, TracebackType], ) -> None: if exc_info is None: return _, exc_val, _ = exc_info log.error(exc_val) def _rmtreeException( _func: Callable, _direc: str, exc_val: BaseException, ) -> None: log.error(exc_val) def _rmtree(direc: str) -> None: # in Python 3.12, onexc is added and onerror is deprecated # https://github.com/python/cpython/blob/main/Lib/shutil.py if sys.version_info < (3, 12): shutil.rmtree(direc, onerror=_rmtreeError) return shutil.rmtree(direc, onexc=_rmtreeException) def rmtree(direc: str) -> None: from os.path import isdir try: for _ in range(2): if not isdir(direc): break _rmtree(direc) except Exception: log.exception(f"error removing directory: {direc}") def showMemoryUsage() -> None: if log.level > core.TRACE: return try: import psutil except ModuleNotFoundError: return usage = psutil.Process(os.getpid()).memory_info().rss // 1024 core.trace(log, f"Memory Usage: {usage:,} kB") pyglossary-5.0.9/pyglossary/persian_utils.py000066400000000000000000000004451476751035500214300ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from .text_utils import replacePostSpaceChar __all__ = ["faEditStr"] def faEditStr(st: str) -> str: return replacePostSpaceChar( st.replace("ي", "ی").replace("ك", "ک").replace("ۂ", "هٔ").replace("ہ", "ه"), "،", ) pyglossary-5.0.9/pyglossary/plugin_handler.py000066400000000000000000000210421476751035500215360ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2008-2022 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import logging import os import sys from os.path import isdir, join from typing import Any, NamedTuple from . import core from .core import ( cacheDir, dataDir, pluginsDir, userPluginsDir, ) from .glossary_utils import ( Error, splitFilenameExt, ) from .plugin_prop import PluginProp __all__ = ["DetectedFormat", "PluginHandler"] log = logging.getLogger("pyglossary") class DetectedFormat(NamedTuple): filename: str formatName: str compression: str class PluginLoader: loadedModules: set[str] = set() @staticmethod def fromModule(moduleName: str, skipDisabled: bool) -> PluginProp | None: log.debug(f"importing {moduleName} in loadPlugin") try: module = __import__(moduleName) except ModuleNotFoundError as e: log.warning(f"Module {e.name!r} not found, skipping plugin {moduleName!r}") return None except Exception: log.exception(f"Error while importing plugin {moduleName}") return None enable = getattr(module, "enable", False) if skipDisabled and not enable: # log.debug(f"Plugin disabled or not a module: {moduleName}") return None return PluginProp.fromModule(module) @staticmethod def loadPluginsFromJson(jsonPath: str) -> list[PluginProp]: import json with open(jsonPath, encoding="utf-8") as _file: data = json.load(_file) plugins: list[PluginProp] = [] for attrs in data: prop = PluginProp.fromDict( attrs=attrs, modulePath=join(pluginsDir, attrs["module"]), ) if prop is None: continue plugins.append(prop) return plugins @classmethod def loadPlugins( cls: type[PluginLoader], directory: str, skipDisabled: bool = True, ) -> list[PluginProp]: """ Load plugins from directory on startup. Skip importing plugin modules that are already loaded. """ import pkgutil # log.debug(f"Loading plugins from directory: {directory!r}") if not isdir(directory): log.critical(f"Invalid plugin directory: {directory!r}") return [] moduleNames = [ moduleName for _, moduleName, _ in pkgutil.iter_modules([directory]) if moduleName not in cls.loadedModules and moduleName != "formats_common" ] moduleNames.sort() sys.path.append(directory) plugins: list[PluginProp] = [] for moduleName in moduleNames: cls.loadedModules.add(moduleName) prop = cls.fromModule(moduleName, skipDisabled) if prop is None: continue plugins.append(prop) sys.path.pop() return plugins class PluginHandler: plugins: dict[str, PluginProp] = {} pluginByExt: dict[str, PluginProp] = {} formatsReadOptions: dict[str, dict[str, Any]] = {} formatsWriteOptions: dict[str, dict[str, Any]] = {} # for example formatsReadOptions[format][optName] gives you the default value readFormats: list[str] = [] writeFormats: list[str] = [] @classmethod def loadPluginsFromJson(cls: type[PluginHandler], jsonPath: str) -> None: for prop in PluginLoader.loadPluginsFromJson(jsonPath): cls._addPlugin(prop) @classmethod def loadPlugins( cls: type[PluginHandler], directory: str, skipDisabled: bool = True, ) -> None: """ Load plugins from directory on startup. Skip importing plugin modules that are already loaded. """ for prop in PluginLoader.loadPlugins(directory, skipDisabled): cls._addPlugin(prop) @classmethod def _addPlugin( cls: type[PluginHandler], prop: PluginProp, ) -> None: name = prop.name cls.plugins[name] = prop if not prop.enable: return for ext in prop.extensions: if ext.lower() != ext: log.error(f"non-lowercase extension={ext!r} in {prop.name} plugin") cls.pluginByExt[ext.lstrip(".")] = prop cls.pluginByExt[ext] = prop if prop.canRead: cls.formatsReadOptions[name] = prop.getReadOptions() cls.readFormats.append(name) if prop.canWrite: cls.formatsWriteOptions[name] = prop.getWriteOptions() cls.writeFormats.append(name) if log.level <= core.TRACE: prop.module # noqa: B018, to make sure importing works @classmethod def _findPlugin( cls: type[PluginHandler], query: str, ) -> PluginProp | None: """Find plugin by name or extension.""" plugin = cls.plugins.get(query) if plugin: return plugin plugin = cls.pluginByExt.get(query) if plugin: return plugin return None @classmethod def detectInputFormat( cls: type[PluginHandler], filename: str, formatName: str = "", ) -> DetectedFormat: filenameOrig = filename _, filename, ext, compression = splitFilenameExt(filename) plugin = None if formatName: plugin = cls.plugins.get(formatName) if plugin is None: raise Error(f"Invalid format {formatName!r}") else: plugin = cls.pluginByExt.get(ext) if not plugin: plugin = cls._findPlugin(filename) if not plugin: raise Error("Unable to detect input format!") if not plugin.canRead: raise Error(f"plugin {plugin.name} does not support reading") if compression in plugin.readCompressions: compression = "" filename = filenameOrig return DetectedFormat(filename, plugin.name, compression) @classmethod def _outputPluginByFormat( cls: type[PluginHandler], formatName: str, ) -> tuple[PluginProp | None, str]: if not formatName: return None, "" plugin = cls.plugins.get(formatName, None) if not plugin: return None, f"Invalid format {formatName}" if not plugin.canWrite: return None, f"plugin {plugin.name} does not support writing" return plugin, "" # C901 `detectOutputFormat` is too complex (16 > 13) # PLR0912 Too many branches (14 > 12) @classmethod def detectOutputFormat( # noqa: PLR0912, PLR0913, C901 cls: type[PluginHandler], filename: str = "", formatName: str = "", inputFilename: str = "", addExt: bool = False, ) -> DetectedFormat: from os.path import splitext plugin, err = cls._outputPluginByFormat(formatName) if err: raise Error(err) if not filename: # FIXME: not covered in tests if not inputFilename: raise Error(f"Invalid filename {filename!r}") # type: ignore if not plugin: raise Error( "No filename nor format is given for output file", ) # type: ignore filename = splitext(inputFilename)[0] + plugin.ext return DetectedFormat(filename, plugin.name, "") filenameOrig = filename filenameNoExt, filename, ext, compression = splitFilenameExt(filename) if not plugin: plugin = cls.pluginByExt.get(ext) if not plugin: plugin = cls._findPlugin(filename) if not plugin: raise Error("Unable to detect output format!") # type: ignore if not plugin.canWrite: raise Error( f"plugin {plugin.name} does not support writing", ) # type: ignore if compression in getattr(plugin.writerClass, "compressions", []): compression = "" filename = filenameOrig if addExt: if not filenameNoExt: if inputFilename: ext = plugin.ext filename = splitext(inputFilename)[0] + ext else: log.error("inputFilename is empty") if not ext and plugin.ext: filename += plugin.ext return DetectedFormat(filename, plugin.name, compression) @classmethod def init( cls: type[PluginHandler], usePluginsJson: bool = True, skipDisabledPlugins: bool = True, ) -> None: """ Initialize the glossary class (not an insatnce). Must be called only once, so make sure you put it in the right place. Probably in the top of your program's main function or module. """ cls.readFormats = [] cls.writeFormats = [] pluginsJsonPath = join(dataDir, "plugins-meta", "index.json") # even if usePluginsJson, we should still call loadPlugins to load # possible new plugins that are not in json file if usePluginsJson: cls.loadPluginsFromJson(pluginsJsonPath) cls.loadPlugins(pluginsDir, skipDisabled=skipDisabledPlugins) if isdir(userPluginsDir): cls.loadPlugins(userPluginsDir) os.makedirs(cacheDir, mode=0o700, exist_ok=True) pyglossary-5.0.9/pyglossary/plugin_lib/000077500000000000000000000000001476751035500203165ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugin_lib/__init__.py000066400000000000000000000000001476751035500224150ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugin_lib/dictdlib.py000066400000000000000000000256011476751035500224520ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Dictionary creation library # Copyright (C) 2002 John Goerzen # Copyright (C) 2020 Saeed Rasooli # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA from __future__ import annotations import gzip import os import string import sys import typing if typing.TYPE_CHECKING: import io from collections.abc import Iterable __all__ = ["DictDB"] b64_list = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" url_headword = "00-database-url" short_headword = "00-database-short" info_headword = "00-database-info" validdict = set( string.ascii_letters + string.digits + " \t", ) def b64_encode(val: int) -> str: """ Takes as input an integer val and returns a string of it encoded with the base64 algorithm used by dict indexes. """ startfound = 0 retval = "" for i in range(5, -1, -1): thispart = (val >> (6 * i)) & ((2**6) - 1) if (not startfound) and (not thispart): # Both zero -- keep going. continue startfound = 1 retval += b64_list[thispart] if retval: return retval return b64_list[0] def b64_decode(text: str) -> int: """ Takes as input a string and returns an integer value of it decoded with the base64 algorithm used by dict indexes. """ if not text: return 0 retval = 0 shiftval = 0 for i in range(len(text) - 1, -1, -1): val = b64_list.index(text[i]) retval |= val << shiftval shiftval += 6 return retval def sortNormalize(inp: str) -> str: """ Returns a value such that x is mapped to a format that sorts properly with standard comparison. """ st2 = "" for char in inp: if char in validdict: st2 += char return st2.upper() + "\0" + inp.upper() def sortKey(x: str) -> list[str]: """Emulate sort -df.""" return x.split("\0") class DictDB: def __init__( self, basename: str, mode: str = "read", quiet: int = 0, ) -> None: # url = 'unknown', shortname = 'unknown', # longinfo = 'unknown', quiet = 0): """ Initialize a DictDB object. Mode must be one of: read -- read-only access write -- write-only access, truncates existing files, does not work with .dz. dict created if nonexistent. update -- read/write access, dict created if nonexistent. Does not work with .dz. Read can read dict or dict.dz files. Write and update will NOT work with dict.dz files. If quiet is nonzero, status messages will be suppressed. """ self.mode = mode self.quiet = quiet self.indexEntries: dict[str, list[tuple[int, int]]] = {} # indexEntries[word] is a list of (start: int, size: int) self.count = 0 self.basename = basename self.indexFilename = self.basename + ".index" if mode == "read" and os.path.isfile(self.basename + ".dict.dz"): self.useCompression = 1 else: self.useCompression = 0 self.dictFilename = ( self.basename + ".dict" + (".dz" if self.useCompression else "") ) self.dictFile: io.IOBase self.indexFile: io.IOBase self._open(mode) # self.writeentry(url_headword + "\n " + url, [url_headword]) # self.writeentry(short_headword + "\n " + shortname, [short_headword]) # self.writeentry(info_headword + "\n" + longinfo, [info_headword]) def _open(self, mode: str) -> None: if mode == "read": self.indexFile = open(self.indexFilename, "rb") if self.useCompression: self.dictFile = gzip.GzipFile(self.dictFilename, "rb") else: self.dictFile = open(self.dictFilename, "rb") self._initIndex() elif mode == "write": self.indexFile = open(self.indexFilename, "wb") if self.useCompression: raise ValueError("'write' mode incompatible with .dz files") self.dictFile = open(self.dictFilename, "wb") elif mode == "update": self._openForUpdate() else: raise ValueError("mode must be 'read', 'write', or 'update'") def _openForUpdate(self) -> None: try: self.indexFile = open(self.indexFilename, "r+b") except OSError: self.indexFile = open(self.indexFilename, "w+b") if self.useCompression: # Open it read-only since we don't support mods. self.dictFile = gzip.GzipFile(self.dictFilename, "rb") else: try: self.dictFile = open(self.dictFilename, "r+b") except OSError: self.dictFile = open(self.dictFilename, "w+b") self._initIndex() def __len__(self) -> int: return len(self.indexEntries) def _initIndex(self) -> None: """Load the entire index off disk into memory.""" self.indexFile.seek(0) for line in self.indexFile: parts = line.decode("utf-8").rstrip().split("\t") if parts[0] not in self.indexEntries: self.indexEntries[parts[0]] = [] self.indexEntries[parts[0]].append( ( b64_decode(parts[1]), b64_decode(parts[2]), ), ) def addIndexEntry( self, word: str, start: int, size: int, ) -> None: """ Adds an entry to the index. word is the relevant word. start is the starting position in the dictionary and size is the size of the definition; both are integers. """ if word not in self.indexEntries: self.indexEntries[word] = [] self.indexEntries[word].append((start, size)) def deleteIndexEntry( self, word: str, start: int | None = None, size: int | None = None, ) -> int: """ Removes an entry from the index; word is the word to search for. start and size are optional. If they are specified, only index entries matching the specified values will be removed. For instance, if word is "foo" and start and size are not specified, all index entries for the word foo will be removed. If start and size are specified, only those entries matching all criteria will be removed. This function does not actually remove the data from the .dict file. Therefore, information removed by this function will still exist on-disk in the .dict file, but the dict server will just not "see" it -- there will be no way to get to it anymore. Returns a count of the deleted entries. """ if word not in self.indexEntries: return 0 retval = 0 entrylist = self.indexEntries[word] for i in range(len(entrylist) - 1, -1, -1): # Go backwards so the del doesn't effect the index. if (start is None or start == entrylist[i][0]) and ( size is None or size == entrylist[i][1] ): del entrylist[i] retval += 1 if not entrylist: # if we emptied it, del it completely del self.indexEntries[word] return retval def update(self, text: str) -> None: """Writes string out, if not quiet.""" if not self.quiet: sys.stdout.write(text) sys.stdout.flush() def setUrl(self, url: str) -> None: """ Sets the URL attribute of this database. If there was already a URL specified, we will use deleteIndexEntry() on it first. """ self.deleteIndexEntry(url_headword) self.addEntry(url_headword + "\n " + url, [url_headword]) def setShortName(self, shortname: str) -> None: """ Sets the shortname for this database. If there was already a shortname specified, we will use deleteIndexEntry() on it first. """ self.deleteIndexEntry(short_headword) self.addEntry( short_headword + "\n " + shortname, [short_headword], ) def setLongInfo(self, longinfo: str) -> None: """ Sets the extended information for this database. If there was already long info specified, we will use deleteIndexEntry() on it first. """ self.deleteIndexEntry(info_headword) self.addEntry(info_headword + "\n" + longinfo, [info_headword]) def addEntry( self, s_defi: str, headwords: list[str], ) -> None: r""" Writes an entry. defstr holds the content of the definition. headwords is a list specifying one or more words under which this definition should be indexed. This function always adds \n to the end of defstr. """ self.dictFile.seek(0, 2) # Seek to end of file start = self.dictFile.tell() s_defi += "\n" b_defi = s_defi.encode("utf-8") self.dictFile.write(b_defi) for word in headwords: self.addIndexEntry(word, start, len(b_defi)) self.count += 1 if self.count % 1000 == 0: self.update(f"Processed {self.count} records\r") def finish(self, dosort: bool = True) -> None: """ Called to finish the writing process. **REQUIRED IF OPENED WITH 'update' OR 'write' MODES**. This will write the index and close the files. dosort is optional and defaults to true. If set to false, dictlib will not sort the index file. In this case, you MUST manually sort it through "sort -df" before it can be used. """ self.update(f"Processed {self.count} records.\n") if dosort: self.update("Sorting index: converting") indexList: list[str] = [ f"{word}\t{b64_encode(thisdef[0])}\t{b64_encode(thisdef[1])}" for word, defs in self.indexEntries.items() for thisdef in defs ] self.update(" mapping") sortmap: dict[str, list[str]] = {} for entry in indexList: norm = sortNormalize(entry) if norm in sortmap: sortmap[norm].append(entry) sortmap[norm].sort(key=sortKey) else: sortmap[norm] = [entry] self.update(" listing") normalizedentries = list(sortmap) self.update(" sorting") normalizedentries.sort() self.update(" re-mapping") indexList = [] for normentry in normalizedentries: for entry in sortmap[normentry]: indexList.append(entry) self.update(", done.\n") self.update("Writing index...\n") self.indexFile.seek(0) for entry in indexList: self.indexFile.write(entry.encode("utf-8") + b"\n") if self.mode == "update": # In case things were deleted self.indexFile.truncate() self.close() self.update("Complete.\n") def close(self) -> None: self.indexFile.close() self.dictFile.close() def getDefList(self) -> Iterable[str]: """ Returns a list of strings naming all definitions contained in this dictionary. """ return self.indexEntries.keys() def hasDef(self, word: str) -> bool: return word in self.indexEntries def getDef(self, word: str) -> list[bytes]: """ Given a definition name, returns a list of strings with all matching definitions. This is an *exact* match, not a case-insensitive one. Returns [] if word is not in the dictionary. """ retval: list[bytes] = [] if not self.hasDef(word): return retval for start, length in self.indexEntries[word]: self.dictFile.seek(start) retval.append(self.dictFile.read(length)) return retval # print("------------------------ ", __name__) if __name__ == "__main__": db = DictDB("test") print(db) # noqa: T201 pyglossary-5.0.9/pyglossary/plugin_lib/mutf8.py000066400000000000000000000104741476751035500217410ustar00rootroot00000000000000# Copyright (c) 2012-2015 Tyler Kennedy . All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. # # -- # # Code to convert Python strings to/from Java's modified UTF-8 encoding. # The code is from https://github.com/TkTech/mutf8 (MIT License) with fixes # by @gentlegiantJGC (https://github.com/TkTech/mutf8/pull/7). __all__ = ["decode_modified_utf8", "encode_modified_utf8"] def decode_modified_utf8(s: bytes) -> str: """ Decodes a bytestring containing modified UTF-8 as defined in section 4.4.7 of the JVM specification. :param s: bytestring to be converted. :returns: A unicode representation of the original string. """ s_out = [] s_len = len(s) s_ix = 0 while s_ix < s_len: b1 = s[s_ix] s_ix += 1 if b1 == 0: raise UnicodeDecodeError( "mutf-8", s, s_ix - 1, s_ix, "Embedded NULL byte in input.", ) if b1 < 0x80: # ASCII/one-byte codepoint. s_out.append(chr(b1)) elif (b1 & 0xE0) == 0xC0: # Two-byte codepoint. if s_ix >= s_len: raise UnicodeDecodeError( "mutf-8", s, s_ix - 1, s_ix, "2-byte codepoint started, but input too short to finish.", ) s_out.append( chr( (b1 & 0x1F) << 0x06 | (s[s_ix] & 0x3F), ), ) s_ix += 1 elif (b1 & 0xF0) == 0xE0: # Three-byte codepoint. if s_ix + 1 >= s_len: raise UnicodeDecodeError( "mutf-8", s, s_ix - 1, s_ix, "3-byte or 6-byte codepoint started, but input too" " short to finish.", ) b2 = s[s_ix] b3 = s[s_ix + 1] if b1 == 0xED and (b2 & 0xF0) == 0xA0: # Possible six-byte codepoint. if s_ix + 4 >= s_len: raise UnicodeDecodeError( "mutf-8", s, s_ix - 1, s_ix, "3-byte or 6-byte codepoint started, but input too" " short to finish.", ) b4 = s[s_ix + 2] b5 = s[s_ix + 3] b6 = s[s_ix + 4] if b4 == 0xED and (b5 & 0xF0) == 0xB0: # Definite six-byte codepoint. s_out.append( chr( 0x10000 + ( (b2 & 0x0F) << 0x10 | (b3 & 0x3F) << 0x0A | (b5 & 0x0F) << 0x06 | (b6 & 0x3F) ), ), ) s_ix += 5 continue s_out.append( chr( (b1 & 0x0F) << 0x0C | (b2 & 0x3F) << 0x06 | (b3 & 0x3F), ), ) s_ix += 2 else: raise RuntimeError return "".join(s_out) def encode_modified_utf8(u: str) -> bytes: """ Encodes a unicode string as modified UTF-8 as defined in section 4.4.7 of the JVM specification. :param u: unicode string to be converted. :returns: A decoded bytearray. """ final_string = bytearray() for c in (ord(char) for char in u): if c == 0x00: # NULL byte encoding shortcircuit. final_string.extend([0xC0, 0x80]) elif c <= 0x7F: # ASCII final_string.append(c) elif c <= 0x7FF: # Two-byte codepoint. final_string.extend( [ (0xC0 | (0x1F & (c >> 0x06))), (0x80 | (0x3F & c)), ], ) elif c <= 0xFFFF: # Three-byte codepoint. final_string.extend( [ (0xE0 | (0x0F & (c >> 0x0C))), (0x80 | (0x3F & (c >> 0x06))), (0x80 | (0x3F & c)), ], ) else: # Six-byte codepoint. final_string.extend( [ 0xED, 0xA0 | ((c >> 0x10) - 1 & 0x0F), 0x80 | ((c >> 0x0A) & 0x3F), 0xED, 0xB0 | ((c >> 0x06) & 0x0F), 0x80 | (c & 0x3F), ], ) return bytes(final_string) pyglossary-5.0.9/pyglossary/plugin_lib/pureSalsa20.py000066400000000000000000000305541476751035500230000ustar00rootroot00000000000000# coding: utf-8 # mypy: ignore-errors # Copyright (C) 2016-2023 Saeed Rasooli on https://github.com/ilius/pyglossary/ # Copyright (C) 2015 Z. H. Liu on https://github.com/zhansliu/writemdict # pureSalsa20.py -- a pure Python implementation of the Salsa20 cipher, # ported to Python 3 # v4.0: Added Python 3 support, dropped support for Python <= 2.5. # // zhansliu # Original comments below. # ==================================================================== # There are comments here by two authors about three pieces of software: # comments by Larry Bugbee about # Salsa20, the stream cipher by Daniel J. Bernstein # (including comments about the speed of the C version) and # pySalsa20, Bugbee's own Python wrapper for salsa20.c # (including some references), and # comments by Steve Witham about # pureSalsa20, Witham's pure Python 2.5 implementation of Salsa20, # which follows pySalsa20's API, and is in this file. # Salsa20: a Fast Streaming Cipher (comments by Larry Bugbee) # ----------------------------------------------------------- # Salsa20 is a fast stream cipher written by Daniel Bernstein # that basically uses a hash function and XOR making for fast # encryption. (Decryption uses the same function.) Salsa20 # is simple and quick. # Some Salsa20 parameter values... # design strength 128 bits # key length 128 or 256 bits, exactly # IV, aka nonce 64 bits, always # chunk size must be in multiples of 64 bytes # Salsa20 has two reduced versions, 8 and 12 rounds each. # One benchmark (10 MB): # 1.5GHz PPC G4 102/97/89 MB/sec for 8/12/20 rounds # AMD Athlon 2500+ 77/67/53 MB/sec for 8/12/20 rounds # (no I/O and before Python GC kicks in) # Salsa20 is a Phase 3 finalist in the EU eSTREAM competition # and appears to be one of the fastest ciphers. It is well # documented so I will not attempt any injustice here. Please # see "References" below. # ...and Salsa20 is "free for any use". # pySalsa20: a Python wrapper for Salsa20 (Comments by Larry Bugbee) # ------------------------------------------------------------------ # pySalsa20.py is a simple ctypes Python wrapper. Salsa20 is # as it's name implies, 20 rounds, but there are two reduced # versions, 8 and 12 rounds each. Because the APIs are # identical, pySalsa20 is capable of wrapping all three # versions (number of rounds hardcoded), including a special # version that allows you to set the number of rounds with a # set_rounds() function. Compile the version of your choice # as a shared library (not as a Python extension), name and # install it as libsalsa20.so. # Sample usage: # from pySalsa20 import Salsa20 # s20 = Salsa20(key, IV) # dataout = s20.encryptBytes(datain) # same for decrypt # This is EXPERIMENTAL software and intended for educational # purposes only. To make experimentation less cumbersome, # pySalsa20 is also free for any use. # THIS PROGRAM IS PROVIDED WITHOUT WARRANTY OR GUARANTEE OF # ANY KIND. USE AT YOUR OWN RISK. # Enjoy, # Larry Bugbee # bugbee@seanet.com # April 2007 # References: # ----------- # http://en.wikipedia.org/wiki/Salsa20 # http://en.wikipedia.org/wiki/Daniel_Bernstein # http://cr.yp.to/djb.html # http://www.ecrypt.eu.org/stream/salsa20p3.html # http://www.ecrypt.eu.org/stream/p3ciphers/salsa20/salsa20_p3source.zip # Prerequisites for pySalsa20: # ---------------------------- # - Python 2.5 (haven't tested in 2.4) # pureSalsa20: Salsa20 in pure Python 2.5 (comments by Steve Witham) # ------------------------------------------------------------------ # pureSalsa20 is the stand-alone Python code in this file. # It implements the underlying Salsa20 core algorithm # and emulates pySalsa20's Salsa20 class API (minus a bug(*)). # pureSalsa20 is MUCH slower than libsalsa20.so wrapped with pySalsa20-- # about 1/1000 the speed for Salsa20/20 and 1/500 the speed for Salsa20/8, # when encrypting 64k-byte blocks on my computer. # pureSalsa20 is for cases where portability is much more important than # speed. I wrote it for use in a "structured" random number generator. # There are comments about the reasons for this slowness in # http://www.tiac.net/~sw/2010/02/PureSalsa20 # Sample usage: # from pureSalsa20 import Salsa20 # s20 = Salsa20(key, IV) # dataout = s20.encryptBytes(datain) # same for decrypt # I took the test code from pySalsa20, added a bunch of tests including # rough speed tests, and moved them into the file testSalsa20.py. # To test both pySalsa20 and pureSalsa20, type # python testSalsa20.py # (*)The bug (?) in pySalsa20 is this. The rounds variable is global to the # libsalsa20.so library and not switched when switching between instances # of the Salsa20 class. # s1 = Salsa20( key, IV, 20 ) # s2 = Salsa20( key, IV, 8 ) # In this example, # with pySalsa20, both s1 and s2 will do 8 rounds of encryption. # with pureSalsa20, s1 will do 20 rounds and s2 will do 8 rounds. # Perhaps giving each instance its own nRounds variable, which # is passed to the salsa20wordtobyte() function, is insecure. I'm not a # cryptographer. # pureSalsa20.py and testSalsa20.py are EXPERIMENTAL software and # intended for educational purposes only. To make experimentation less # cumbersome, pureSalsa20.py and testSalsa20.py are free for any use. # Revisions: # ---------- # p3.2 Fixed bug that initialized the output buffer with plaintext! # Saner ramping of nreps in speed test. # Minor changes and print statements. # p3.1 Took timing variability out of add32() and rot32(). # Made the internals more like pySalsa20/libsalsa . # Put the semicolons back in the main loop! # In encryptBytes(), modify a byte array instead of appending. # Fixed speed calculation bug. # Used subclasses instead of patches in testSalsa20.py . # Added 64k-byte messages to speed test to be fair to pySalsa20. # p3 First version, intended to parallel pySalsa20 version 3. # More references: # ---------------- # http://www.seanet.com/~bugbee/crypto/salsa20/ [pySalsa20] # http://cr.yp.to/snuffle.html [The original name of Salsa20] # http://cr.yp.to/snuffle/salsafamily-20071225.pdf [ Salsa20 design] # http://www.tiac.net/~sw/2010/02/PureSalsa20 # THIS PROGRAM IS PROVIDED WITHOUT WARRANTY OR GUARANTEE OF # ANY KIND. USE AT YOUR OWN RISK. # Cheers, # Steve Witham sw at remove-this tiac dot net # February, 2010 import operator from struct import Struct __all__ = ["Salsa20"] little_u64 = Struct(" None: self._lastChunk64 = True self._IVbitlen = 64 # must be 64 bits self.ctx = [0] * 16 if key: self.setKey(key) if IV: self.setIV(IV) self.setRounds(rounds) def setKey(self, key): assert isinstance(key, bytes) ctx = self.ctx if len(key) == 32: # recommended constants = b"expand 32-byte k" ctx[1], ctx[2], ctx[3], ctx[4] = little4_i32.unpack(key[0:16]) ctx[11], ctx[12], ctx[13], ctx[14] = little4_i32.unpack(key[16:32]) elif len(key) == 16: constants = b"expand 16-byte k" ctx[1], ctx[2], ctx[3], ctx[4] = little4_i32.unpack(key[0:16]) ctx[11], ctx[12], ctx[13], ctx[14] = little4_i32.unpack(key[0:16]) else: raise ValueError("key length isn't 32 or 16 bytes.") ctx[0], ctx[5], ctx[10], ctx[15] = little4_i32.unpack(constants) def setIV(self, IV): assert isinstance(IV, bytes) assert len(IV) * 8 == 64, "nonce (IV) not 64 bits" self.IV = IV ctx = self.ctx ctx[6], ctx[7] = little2_i32.unpack(IV) ctx[8], ctx[9] = 0, 0 # Reset the block counter. setNonce = setIV # support an alternate name def setCounter(self, counter): assert isinstance(counter, int) assert 0 <= counter < 1 << 64, "counter < 0 or >= 2**64" ctx = self.ctx ctx[8], ctx[9] = little2_i32.unpack(little_u64.pack(counter)) def getCounter(self): return little_u64.unpack(little2_i32.pack(*self.ctx[8:10]))[0] def setRounds(self, rounds, testing=False): assert testing or rounds in {8, 12, 20}, "rounds must be 8, 12, 20" self.rounds = rounds def encryptBytes(self, data: bytes) -> bytes: assert isinstance(data, bytes), "data must be byte string" assert self._lastChunk64, "previous chunk not multiple of 64 bytes" lendata = len(data) munged = bytearray(lendata) for i in range(0, lendata, 64): h = salsa20_wordtobyte(self.ctx, self.rounds, checkRounds=False) self.setCounter((self.getCounter() + 1) % 2**64) # Stopping at 2^70 bytes per nonce is user's responsibility. for j in range(min(64, lendata - i)): munged[i + j] = data[i + j] ^ h[j] self._lastChunk64 = not lendata % 64 return bytes(munged) decryptBytes = encryptBytes # encrypt and decrypt use same function # -------------------------------------------------------------------------- def salsa20_wordtobyte(input_, nRounds=20, checkRounds=True): """ Do nRounds Salsa20 rounds on a copy of input: list or tuple of 16 ints treated as little-endian unsigneds. Returns a 64-byte string. """ assert isinstance(input_, list | tuple) and len(input_) == 16 assert not checkRounds or nRounds in {8, 12, 20} x = list(input_) XOR = operator.xor ROTATE = rot32 PLUS = add32 for _ in range(nRounds // 2): # These ...XOR...ROTATE...PLUS... lines are from ecrypt-linux.c # unchanged except for indents and the blank line between rounds: x[4] = XOR(x[4], ROTATE(PLUS(x[0], x[12]), 7)) x[8] = XOR(x[8], ROTATE(PLUS(x[4], x[0]), 9)) x[12] = XOR(x[12], ROTATE(PLUS(x[8], x[4]), 13)) x[0] = XOR(x[0], ROTATE(PLUS(x[12], x[8]), 18)) x[9] = XOR(x[9], ROTATE(PLUS(x[5], x[1]), 7)) x[13] = XOR(x[13], ROTATE(PLUS(x[9], x[5]), 9)) x[1] = XOR(x[1], ROTATE(PLUS(x[13], x[9]), 13)) x[5] = XOR(x[5], ROTATE(PLUS(x[1], x[13]), 18)) x[14] = XOR(x[14], ROTATE(PLUS(x[10], x[6]), 7)) x[2] = XOR(x[2], ROTATE(PLUS(x[14], x[10]), 9)) x[6] = XOR(x[6], ROTATE(PLUS(x[2], x[14]), 13)) x[10] = XOR(x[10], ROTATE(PLUS(x[6], x[2]), 18)) x[3] = XOR(x[3], ROTATE(PLUS(x[15], x[11]), 7)) x[7] = XOR(x[7], ROTATE(PLUS(x[3], x[15]), 9)) x[11] = XOR(x[11], ROTATE(PLUS(x[7], x[3]), 13)) x[15] = XOR(x[15], ROTATE(PLUS(x[11], x[7]), 18)) x[1] = XOR(x[1], ROTATE(PLUS(x[0], x[3]), 7)) x[2] = XOR(x[2], ROTATE(PLUS(x[1], x[0]), 9)) x[3] = XOR(x[3], ROTATE(PLUS(x[2], x[1]), 13)) x[0] = XOR(x[0], ROTATE(PLUS(x[3], x[2]), 18)) x[6] = XOR(x[6], ROTATE(PLUS(x[5], x[4]), 7)) x[7] = XOR(x[7], ROTATE(PLUS(x[6], x[5]), 9)) x[4] = XOR(x[4], ROTATE(PLUS(x[7], x[6]), 13)) x[5] = XOR(x[5], ROTATE(PLUS(x[4], x[7]), 18)) x[11] = XOR(x[11], ROTATE(PLUS(x[10], x[9]), 7)) x[8] = XOR(x[8], ROTATE(PLUS(x[11], x[10]), 9)) x[9] = XOR(x[9], ROTATE(PLUS(x[8], x[11]), 13)) x[10] = XOR(x[10], ROTATE(PLUS(x[9], x[8]), 18)) x[12] = XOR(x[12], ROTATE(PLUS(x[15], x[14]), 7)) x[13] = XOR(x[13], ROTATE(PLUS(x[12], x[15]), 9)) x[14] = XOR(x[14], ROTATE(PLUS(x[13], x[12]), 13)) x[15] = XOR(x[15], ROTATE(PLUS(x[14], x[13]), 18)) for idx, item in enumerate(input_): x[idx] = PLUS(x[idx], item) return little16_i32.pack(*x) # --------------------------- 32-bit ops ------------------------------- def trunc32(w): """ Return the bottom 32 bits of w as a Python int. This creates longs temporarily, but returns an int. """ w = int((w & 0x7FFFFFFF) | -(w & 0x80000000)) assert isinstance(w, int) return w def add32(a, b): """ Add two 32-bit words discarding carry above 32nd bit, and without creating a Python long. Timing shouldn't vary. """ lo = (a & 0xFFFF) + (b & 0xFFFF) hi = (a >> 16) + (b >> 16) + (lo >> 16) return (-(hi & 0x8000) | (hi & 0x7FFF)) << 16 | (lo & 0xFFFF) def rot32(w, nLeft): """ Rotate 32-bit word left by nLeft or right by -nLeft without creating a Python long. Timing depends on nLeft but not on w. """ nLeft &= 31 # which makes nLeft >= 0 if nLeft == 0: return w # Note: now 1 <= nLeft <= 31. # RRRsLLLLLL There are nLeft RRR's, (31-nLeft) LLLLLL's, # => sLLLLLLRRR and one s which becomes the sign bit. RRR = ((w >> 1) & 0x7FFFFFFF) >> (31 - nLeft) sLLLLLL = -((1 << (31 - nLeft)) & w) | (0x7FFFFFFF >> nLeft) & w return RRR | (sLLLLLL << nLeft) # --------------------------------- end ----------------------------------- pyglossary-5.0.9/pyglossary/plugin_lib/readmdict.py000066400000000000000000000507451476751035500226370ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # readmdict.py from https://bitbucket.org/xwang/mdict-analysis # Octopus MDict Dictionary File (.mdx) and Resource File (.mdd) Analyser # # Copyright (C) 2016-2023 Saeed Rasooli on https://github.com/ilius/pyglossary/ # Copyright (C) 2012, 2013, 2015, 2022 Xiaoqiang Wang # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. import logging import re import sys # zlib compression is used for engine version >=2.0 import zlib from io import BytesIO from struct import pack, unpack from .pureSalsa20 import Salsa20 from .ripemd128 import ripemd128 # LZO compression is used for engine version < 2.0 try: import lzo except ImportError: lzo = None # xxhash is used for engine version >= 3.0 try: import xxhash except ImportError: xxhash = None __all__ = ["MDD", "MDX"] log = logging.getLogger(__name__) def _unescape_entities(text): """Unescape offending tags < > " &.""" text = text.replace(b"<", b"<") text = text.replace(b">", b">") text = text.replace(b""", b'"') text = text.replace(b"&", b"&") return text # noqa: RET504 def _fast_decrypt(data, key): """XOR decryption.""" b = bytearray(data) key = bytearray(key) previous = 0x36 for i, bi in enumerate(b): t = (bi >> 4 | bi << 4) & 0xFF t = t ^ previous ^ (i & 0xFF) ^ key[i % len(key)] previous = bi b[i] = t return bytes(b) def _salsa_decrypt(ciphertext, encrypt_key): """salsa20 (8 rounds) decryption.""" s20 = Salsa20(key=encrypt_key, IV=b"\x00" * 8, rounds=8) return s20.encryptBytes(ciphertext) def _decrypt_regcode_by_userid(reg_code: bytes, userid: bytes) -> bytes: userid_digest = ripemd128(userid) s20 = Salsa20(key=userid_digest, IV=b"\x00" * 8, rounds=8) return s20.encryptBytes(reg_code) class MDict: """ Base class which reads in header and key block. It has no public methods and serves only as code sharing base class. """ def __init__( self, fname: str, encoding: str = "", passcode: "tuple[bytes, bytes] | None" = None, ) -> None: self._fname = fname self._encoding = encoding.upper() self._encrypted_key = None self._passcode = passcode self.header = self._read_header() # decrypt regcode to get the encrypted key if passcode is not None: regcode, userid = passcode if isinstance(userid, str): userid = userid.encode("utf8") self._encrypted_key = _decrypt_regcode_by_userid(regcode, userid) # MDict 3.0 encryption key derives from UUID if present elif self._version >= 3.0: uuid = self.header.get(b"UUID") if uuid: if xxhash is None: raise RuntimeError( "xxhash module is needed to read MDict 3.0 format" "\n" "Run `pip3 install xxhash` to install", ) mid = (len(uuid) + 1) // 2 self._encrypted_key = xxhash.xxh64_digest( uuid[:mid], ) + xxhash.xxh64_digest(uuid[mid:]) self._key_list = self._read_keys() def __repr__(self): return ( f"MDict({self._fname!r}, " f"encoding={self._encoding!r}, " f"passcode={self._passcode})" ) @property def filename(self): return self._fname def __len__(self): return self._num_entries def __iter__(self): return self.keys() def keys(self): """Return an iterator over dictionary keys.""" return (key_value for key_id, key_value in self._key_list) def _read_number(self, f): return unpack(self._number_format, f.read(self._number_width))[0] @staticmethod def _read_int32(f): return unpack(">I", f.read(4))[0] @staticmethod def _parse_header(header): """Extract attributes from .""" return { key: _unescape_entities(value) for key, value in re.findall(rb'(\w+)="(.*?)"', header, re.DOTALL) } def _decode_block(self, block, decompressed_size): # block info: compression, encryption info = unpack("> 4) & 0xF encryption_size = (info >> 8) & 0xFF # adler checksum of the block data used as the encryption key if none given adler32 = unpack(">I", block[4:8])[0] encrypted_key = self._encrypted_key if encrypted_key is None: encrypted_key = ripemd128(block[4:8]) # block data data = block[8:] # decrypt if encryption_method == 0: decrypted_block = data elif encryption_method == 1: decrypted_block = ( _fast_decrypt(data[:encryption_size], encrypted_key) + data[encryption_size:] ) elif encryption_method == 2: decrypted_block = ( _salsa_decrypt(data[:encryption_size], encrypted_key) + data[encryption_size:] ) else: raise ValueError(f"encryption method {encryption_method} not supported") # check adler checksum over decrypted data if self._version >= 3: assert hex(adler32) == hex(zlib.adler32(decrypted_block) & 0xFFFFFFFF) # decompress if compression_method == 0: decompressed_block = decrypted_block elif compression_method == 1: if lzo is None: raise RuntimeError("LZO compression is not supported") header = b"\xf0" + pack(">I", decompressed_size) decompressed_block = lzo.decompress(header + decrypted_block) elif compression_method == 2: decompressed_block = zlib.decompress(decrypted_block) else: raise ValueError(f"compression method {compression_method} not supported") # check adler checksum over decompressed data if self._version < 3: assert hex(adler32) == hex(zlib.adler32(decompressed_block) & 0xFFFFFFFF) return decompressed_block def _decode_key_block_info(self, key_block_info_compressed): if self._version >= 2: # zlib compression assert key_block_info_compressed[:4] == b"\x02\x00\x00\x00" # decrypt if needed if self._encrypt & 0x02: key = ripemd128(key_block_info_compressed[4:8] + pack(b"I", key_block_info_compressed[4:8])[0] assert adler32 == zlib.adler32(key_block_info) & 0xFFFFFFFF else: # no compression key_block_info = key_block_info_compressed # decode key_block_info_list = [] num_entries = 0 i = 0 if self._version >= 2: byte_format = ">H" byte_width = 2 text_term = 1 else: byte_format = ">B" byte_width = 1 text_term = 0 while i < len(key_block_info): # number of entries in current key block num_entries += unpack( self._number_format, key_block_info[i : i + self._number_width], )[0] i += self._number_width # text head size text_head_size = unpack(byte_format, key_block_info[i : i + byte_width])[0] i += byte_width # text head if self._encoding != "UTF-16": i += text_head_size + text_term else: i += (text_head_size + text_term) * 2 # text tail size text_tail_size = unpack(byte_format, key_block_info[i : i + byte_width])[0] i += byte_width # text tail if self._encoding != "UTF-16": i += text_tail_size + text_term else: i += (text_tail_size + text_term) * 2 # key block compressed size key_block_compressed_size = unpack( self._number_format, key_block_info[i : i + self._number_width], )[0] i += self._number_width # key block decompressed size key_block_decompressed_size = unpack( self._number_format, key_block_info[i : i + self._number_width], )[0] i += self._number_width key_block_info_list.append( (key_block_compressed_size, key_block_decompressed_size), ) # assert num_entries == self._num_entries return key_block_info_list def _decode_key_block(self, key_block_compressed, key_block_info_list): key_list = [] i = 0 for compressed_size, decompressed_size in key_block_info_list: key_block = self._decode_block( key_block_compressed[i : i + compressed_size], decompressed_size, ) # extract one single key block into a key list key_list += self._split_key_block(key_block) i += compressed_size return key_list def _split_key_block(self, key_block): key_list = [] key_start_index = 0 while key_start_index < len(key_block): # the corresponding record's offset in record block key_id = unpack( self._number_format, key_block[key_start_index : key_start_index + self._number_width], )[0] # key text ends with '\x00' if self._encoding == "UTF-16": delimiter = b"\x00\x00" width = 2 else: delimiter = b"\x00" width = 1 i = key_start_index + self._number_width key_end_index = None while i < len(key_block): if key_block[i : i + width] == delimiter: key_end_index = i break i += width assert key_end_index is not None key_text = ( key_block[key_start_index + self._number_width : key_end_index] .decode(self._encoding, errors="ignore") .encode("utf-8") .strip() ) key_start_index = key_end_index + width key_list += [(key_id, key_text)] return key_list def _read_header(self): f = open(self._fname, "rb") # number of bytes of header text header_bytes_size = unpack(">I", f.read(4))[0] header_bytes = f.read(header_bytes_size) # 4 bytes: adler32 checksum of header, in little endian adler32 = unpack("= 0x03000000: encoding = encoding.decode("utf-8") # GB18030 > GBK > GB2312 if encoding in {"GBK", "GB2312"}: encoding = "GB18030" self._encoding = encoding # encryption flag # 0x00 - no encryption, "Allow export to text" is checked in MdxBuilder 3. # 0x01 - encrypt record block, "Encryption Key" is given in MdxBuilder 3. # 0x02 - encrypt key info block, # "Allow export to text" is unchecked in MdxBuilder 3. if b"Encrypted" not in header_tag or header_tag[b"Encrypted"] == b"No": self._encrypt = 0 elif header_tag[b"Encrypted"] == b"Yes": self._encrypt = 1 else: self._encrypt = int(header_tag[b"Encrypted"]) # stylesheet attribute if present takes form of: # style_number # 1-255 # style_begin # or '' # style_end # or '' # store stylesheet in dict in the form of # {'number' : ('style_begin', 'style_end')} self._stylesheet = {} if header_tag.get("StyleSheet"): lines = header_tag["StyleSheet"].splitlines() self._stylesheet = { lines[i]: (lines[i + 1], lines[i + 2]) for i in range(0, len(lines), 3) } # before version 2.0, number is 4 bytes integer # version 2.0 and above uses 8 bytes self._version = float(header_tag[b"GeneratedByEngineVersion"]) if self._version < 2.0: self._number_width = 4 self._number_format = ">I" else: self._number_width = 8 self._number_format = ">Q" # version 3.0 uses UTF-8 only if self._version >= 3: self._encoding = "UTF-8" return header_tag def _read_keys(self): if self._version >= 3: return self._read_keys_v3() # if no regcode is given, try brute-force (only for engine <= 2) if (self._encrypt & 0x01) and self._encrypted_key is None: log.warning("Trying brute-force on encrypted key blocks") return self._read_keys_brutal() return self._read_keys_v1v2() def _read_keys_v3(self): f = open(self._fname, "rb") f.seek(self._key_block_offset) # find all blocks offset while True: block_type = self._read_int32(f) block_size = self._read_number(f) block_offset = f.tell() # record data if block_type == 0x01000000: self._record_block_offset = block_offset # record index elif block_type == 0x02000000: self._record_index_offset = block_offset # key data elif block_type == 0x03000000: self._key_data_offset = block_offset # key index elif block_type == 0x04000000: self._key_index_offset = block_offset else: raise RuntimeError(f"Unknown block type {block_type}") f.seek(block_size, 1) # test the end of file if f.read(4): f.seek(-4, 1) else: break # read key data f.seek(self._key_data_offset) number = self._read_int32(f) self._read_number(f) # total_size key_list = [] for _ in range(number): decompressed_size = self._read_int32(f) compressed_size = self._read_int32(f) block_data = f.read(compressed_size) decompressed_block_data = self._decode_block(block_data, decompressed_size) key_list.extend(self._split_key_block(decompressed_block_data)) f.close() self._num_entries = len(key_list) return key_list def _read_keys_v1v2(self): f = open(self._fname, "rb") f.seek(self._key_block_offset) # the following numbers could be encrypted num_bytes = 8 * 5 if self._version >= 2.0 else 4 * 4 block = f.read(num_bytes) if self._encrypt & 1: block = _salsa_decrypt(block, self._encrypted_key) # decode this block sf = BytesIO(block) # number of key blocks num_key_blocks = self._read_number(sf) # number of entries self._num_entries = self._read_number(sf) # number of bytes of key block info after decompression if self._version >= 2.0: self._read_number(sf) # key_block_info_decomp_size # number of bytes of key block info key_block_info_size = self._read_number(sf) # number of bytes of key block key_block_size = self._read_number(sf) # 4 bytes: adler checksum of previous 5 numbers if self._version >= 2.0: adler32 = unpack(">I", f.read(4))[0] assert adler32 == (zlib.adler32(block) & 0xFFFFFFFF) # read key block info, which indicates key block's compressed # and decompressed size key_block_info = f.read(key_block_info_size) key_block_info_list = self._decode_key_block_info(key_block_info) assert num_key_blocks == len(key_block_info_list) # read key block key_block_compressed = f.read(key_block_size) # extract key block key_list = self._decode_key_block(key_block_compressed, key_block_info_list) self._record_block_offset = f.tell() f.close() return key_list def _read_keys_brutal(self): f = open(self._fname, "rb") f.seek(self._key_block_offset) # the following numbers could be encrypted, disregard them! if self._version >= 2.0: num_bytes = 8 * 5 + 4 key_block_type = b"\x02\x00\x00\x00" else: num_bytes = 4 * 4 key_block_type = b"\x01\x00\x00\x00" f.read(num_bytes) # block # key block info # 4 bytes '\x02\x00\x00\x00' # 4 bytes adler32 checksum # unknown number of bytes follows until '\x02\x00\x00\x00' # which marks the beginning of key block key_block_info = f.read(8) if self._version >= 2.0: assert key_block_info[:4] == b"\x02\x00\x00\x00" while True: fpos = f.tell() t = f.read(1024) index = t.find(key_block_type) if index != -1: key_block_info += t[:index] f.seek(fpos + index) break key_block_info += t key_block_info_list = self._decode_key_block_info(key_block_info) key_block_size = sum(list(zip(*key_block_info_list, strict=False))[0]) # read key block key_block_compressed = f.read(key_block_size) # extract key block key_list = self._decode_key_block(key_block_compressed, key_block_info_list) self._record_block_offset = f.tell() f.close() self._num_entries = len(key_list) return key_list def items(self): """ Return a generator which in turn produce tuples in the form of (filename, content). """ return self._read_records() def _read_records(self): if self._version >= 3: yield from self._read_records_v3() else: yield from self._read_records_v1v2() def _read_records_v3(self): f = open(self._fname, "rb") f.seek(self._record_block_offset) offset = 0 i = 0 size_counter = 0 num_record_blocks = self._read_int32(f) self._read_number(f) # num_bytes for _ in range(num_record_blocks): decompressed_size = self._read_int32(f) compressed_size = self._read_int32(f) record_block = self._decode_block( f.read(compressed_size), decompressed_size, ) # split record block according to the offset info from key block while i < len(self._key_list): record_start, key_text = self._key_list[i] # reach the end of current record block if record_start - offset >= len(record_block): break # record end index if i < len(self._key_list) - 1: record_end = self._key_list[i + 1][0] else: record_end = len(record_block) + offset i += 1 data = record_block[record_start - offset : record_end - offset] yield key_text, self._treat_record_data(data) offset += len(record_block) size_counter += compressed_size def _read_records_v1v2(self): f = open(self._fname, "rb") f.seek(self._record_block_offset) num_record_blocks = self._read_number(f) num_entries = self._read_number(f) assert num_entries == self._num_entries record_block_info_size = self._read_number(f) self._read_number(f) # record_block_size # record block info section record_block_info_list = [] size_counter = 0 for _ in range(num_record_blocks): compressed_size = self._read_number(f) decompressed_size = self._read_number(f) record_block_info_list += [(compressed_size, decompressed_size)] size_counter += self._number_width * 2 assert size_counter == record_block_info_size # actual record block offset = 0 i = 0 size_counter = 0 for compressed_size, decompressed_size in record_block_info_list: record_block_compressed = f.read(compressed_size) try: record_block = self._decode_block( record_block_compressed, decompressed_size, ) except zlib.error: log.error("zlib decompress error") log.debug(f"record_block_compressed = {record_block_compressed!r}") continue # split record block according to the offset info from key block while i < len(self._key_list): record_start, key_text = self._key_list[i] # reach the end of current record block if record_start - offset >= len(record_block): break # record end index if i < len(self._key_list) - 1: record_end = self._key_list[i + 1][0] else: record_end = len(record_block) + offset i += 1 data = record_block[record_start - offset : record_end - offset] yield key_text, self._treat_record_data(data) offset += len(record_block) size_counter += compressed_size # assert size_counter == record_block_size f.close() def _treat_record_data(self, data): # noqa: PLR6301 return data class MDD(MDict): """ MDict resource file format (*.MDD) reader. >>> mdd = MDD("example.mdd") >>> len(mdd) 208 >>> for filename,content in mdd.items(): ... print(filename, content[:10]) """ def __init__( self, fname: str, passcode: "tuple[bytes, bytes] | None" = None, ) -> None: MDict.__init__(self, fname, encoding="UTF-16", passcode=passcode) class MDX(MDict): """ MDict dictionary file format (*.MDD) reader. >>> mdx = MDX("example.mdx") >>> len(mdx) 42481 >>> for key,value in mdx.items(): ... print(key, value[:10]) """ def __init__( self, fname: str, encoding: str = "", substyle: bool = False, passcode: "tuple[bytes, bytes] | None" = None, ) -> None: MDict.__init__(self, fname, encoding, passcode) self._substyle = substyle def _substitute_stylesheet(self, txt): # substitute stylesheet definition txt_list = re.split(r"`\d+`", txt) txt_tag = re.findall(r"`\d+`", txt) txt_styled = txt_list[0] for j, p in enumerate(txt_list[1:]): key = txt_tag[j][1:-1] try: style = self._stylesheet[key] except KeyError: log.error(f'invalid stylesheet key "{key}"') continue if p and p[-1] == "\n": txt_styled = txt_styled + style[0] + p.rstrip() + style[1] + "\r\n" else: txt_styled = txt_styled + style[0] + p + style[1] return txt_styled def _treat_record_data(self, data): # convert to utf-8 data = ( data.decode(self._encoding, errors="ignore").strip("\x00").encode("utf-8") ) # substitute styles if self._substyle and self._stylesheet: data = self._substitute_stylesheet(data) return data # noqa: RET504 pyglossary-5.0.9/pyglossary/plugin_lib/ripemd128.py000066400000000000000000000072551476751035500224140ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright (C) 2016-2023 Saeed Rasooli on https://github.com/ilius/pyglossary/ # Copyright (C) 2015 Z. H. Liu on https://github.com/zhansliu/writemdict # # ripemd128.py - A simple ripemd128 library in pure Python. # # Supports both Python 2 (versions >= 2.6) and Python 3. # # Usage: # from ripemd128 import ripemd128 # digest = ripemd128(b"The quick brown fox jumps over the lazy dog") # assert( # digest == b"\x3f\xa9\xb5\x7f\x05\x3c\x05\x3f\xbe\x27\x35\xb2\x38\x0d\xb5\x96" # ) import struct __all__ = ["ripemd128"] # follows this description: http://homes.esat.kuleuven.be/~bosselae/ripemd/rmd128.txt def f(j, x, y, z): assert 0 <= j < 64 if j < 16: return x ^ y ^ z if j < 32: return (x & y) | (z & ~x) if j < 48: return (x | (0xFFFFFFFF & ~y)) ^ z return (x & z) | (y & ~z) def K(j): assert 0 <= j < 64 if j < 16: return 0x00000000 if j < 32: return 0x5A827999 if j < 48: return 0x6ED9EBA1 return 0x8F1BBCDC def Kp(j): assert 0 <= j < 64 if j < 16: return 0x50A28BE6 if j < 32: return 0x5C4DD124 if j < 48: return 0x6D703EF3 return 0x00000000 def padandsplit(message: bytes): """ returns a two-dimensional array X[i][j] of 32-bit integers, where j ranges from 0 to 16. First pads the message to length in bytes is congruent to 56 (mod 64), by first adding a byte 0x80, and then padding with 0x00 bytes until the message length is congruent to 56 (mod 64). Then adds the little-endian 64-bit representation of the original length. Finally, splits the result up into 64-byte blocks, which are further parsed as 32-bit integers. """ origlen = len(message) padlength = 64 - ((origlen - 56) % 64) # minimum padding is 1! message += b"\x80" message += b"\x00" * (padlength - 1) message += struct.pack("> (32 - s)) & 0xFFFFFFFF r = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8, 3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12, 1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2, ] rp = [ 5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, 6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2, 15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13, 8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14, ] s = [ 11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8, 7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12, 11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5, 11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12, ] sp = [ 8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6, 9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11, 9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5, 15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8, ] def ripemd128(message: bytes) -> bytes: h0 = 0x67452301 h1 = 0xEFCDAB89 h2 = 0x98BADCFE h3 = 0x10325476 X = padandsplit(message) for Xi in X: A, B, C, D = h0, h1, h2, h3 Ap, Bp, Cp, Dp = h0, h1, h2, h3 for j in range(64): T = rol( s[j], add( A, f(j, B, C, D), Xi[r[j]], K(j), ), ) A, D, C, B = D, C, B, T T = rol( sp[j], add( Ap, f(63 - j, Bp, Cp, Dp), Xi[rp[j]], Kp(j), ), ) Ap, Dp, Cp, Bp = Dp, Cp, Bp, T T = add(h1, C, Dp) h1 = add(h2, D, Ap) h2 = add(h3, A, Bp) h3 = add(h0, B, Cp) h0 = T return struct.pack(" (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import logging import warnings from typing import TYPE_CHECKING if TYPE_CHECKING: import pathlib from typing import Any from .flags import StrWithDesc from . import core from .flags import ( DEFAULT_NO, YesNoAlwaysNever, flagsByName, ) from .option import Option, optionFromDict __all__ = ["PluginProp"] log = logging.getLogger("pyglossary") def optionsPropFromDict( optionsPropDict: dict[str, Any], ) -> dict[str, Option]: props: dict[str, Option] = {} for name, propDict in optionsPropDict.items(): try: prop = optionFromDict(propDict) except Exception: log.exception(f"{name=}, {propDict=}\n") continue props[name] = prop return props def sortOnWriteFromStr(sortOnWriteStr: str | None) -> StrWithDesc: if sortOnWriteStr is None: return DEFAULT_NO return flagsByName[sortOnWriteStr] class PluginCheckError(Exception): pass class PluginProp: # noqa: PLR0904 __slots__ = [ "_Reader", "_ReaderLoaded", "_Writer", "_WriterLoaded", "_canRead", "_canWrite", "_description", "_enable", "_extensionCreate", "_extensions", "_lname", "_mod", "_moduleName", "_modulePath", "_name", "_optionsProp", "_readCompressions", "_readDepends", "_readOptions", "_singleFile", "_sortKeyName", "_sortOnWrite", "_writeDepends", "_writeOptions", ] def __init__(self) -> None: self._mod: Any self._Reader: Any self._ReaderLoaded: bool self._Writer: Any self._WriterLoaded: bool self._moduleName: str self._modulePath: str self._enable: bool self._lname: str self._name: str self._description: str self._extensions: list[str] self._extensionCreate: str self._singleFile: bool self._optionsProp: dict[str, Option] self._sortOnWrite: YesNoAlwaysNever self._sortKeyName: str | None self._canRead: bool self._canWrite: bool self._readOptions: dict[str, Any] self._writeOptions: dict[str, Any] self._readCompressions: list[str] self._readDepends: dict[str, str] self._writeDepends: dict[str, str] @classmethod def fromDict( cls: type, attrs: dict[str, Any], modulePath: str, ) -> None: self = cls() self._mod = None self._Reader = None self._ReaderLoaded = False self._Writer = None self._WriterLoaded = False self._moduleName = attrs["module"] self._modulePath = modulePath self._enable = attrs.get("enable", True) self._lname = attrs["lname"] self._name = attrs["name"] self._description = attrs["description"] self._extensions = attrs["extensions"] self._extensionCreate = attrs.get("extensionCreate", "") self._singleFile = attrs["singleFile"] self._optionsProp = optionsPropFromDict(attrs["optionsProp"]) self._sortOnWrite = sortOnWriteFromStr(attrs.get("sortOnWrite")) self._sortKeyName = attrs.get("sortKeyName") self._canRead = attrs["canRead"] self._canWrite = attrs["canWrite"] self._readOptions = attrs.get("readOptions", {}) self._writeOptions = attrs.get("writeOptions", {}) self._readCompressions = attrs.get("readCompressions", []) self._readDepends = attrs.get("readDepends", {}) self._writeDepends = attrs.get("writeDepends", {}) return self @classmethod def fromModule(cls: type, mod: Any) -> PluginProp: # noqa: ANN401 self = cls() self._mod = mod self._Reader = None self._ReaderLoaded = False self._Writer = None self._WriterLoaded = False self._moduleName = mod.__name__ self._modulePath = mod.__file__ if self._modulePath.endswith("__init__.py"): self._modulePath = self._modulePath[: -len("/__init__.py")] elif self._modulePath.endswith(".py"): self._modulePath = self._modulePath[:-3] self._enable = getattr(mod, "enable", True) self._lname = mod.lname if hasattr(mod, "name"): self._name = mod.name else: self._name = mod.format warnings.warn( "`format` variable in plugin is deprecated, rename it to `name`", category=DeprecationWarning, stacklevel=2, ) self._description = mod.description self._extensions = list(mod.extensions) self._extensionCreate = getattr(mod, "extensionCreate", "") self._singleFile = getattr(mod, "singleFile", False) self._optionsProp = getattr(mod, "optionsProp", {}) self._sortOnWrite = getattr(mod, "sortOnWrite", DEFAULT_NO) self._sortKeyName = getattr(mod, "sortKeyName", None) self._canRead = hasattr(mod, "Reader") self._canWrite = hasattr(mod, "Writer") self._readOptions = None self._writeOptions = None self._readCompressions = None self._readDepends = None self._writeDepends = None if core.isDebug(): self.checkModule(mod) return self @property def enable(self) -> bool: return self._enable @property def module(self) -> Any: # noqa: ANN401 if self._mod is not None: return self._mod moduleName = self._moduleName log.debug(f"importing {moduleName} in DictPluginProp") try: mod = __import__( f"pyglossary.plugins.{moduleName}", fromlist=moduleName, ) except ModuleNotFoundError as e: log.warning( f"Module {e.name!r} not found in {self._modulePath}" f", skipping plugin {moduleName!r}", ) return None except Exception: log.exception(f"Error while importing plugin {moduleName}") return None # self._mod = _mod if core.isDebug(): self.checkModule(mod) return mod @property def lname(self) -> str: return self._lname @property def name(self) -> str: return self._name @property def moduleName(self) -> str: return self._moduleName @property def description(self) -> str: return self._description @property def extensions(self) -> list[str]: return self._extensions @property def ext(self) -> str: extensions = self.extensions if extensions: return extensions[0] return "" @property def extensionCreate(self) -> str: return self._extensionCreate @property def singleFile(self) -> bool: return self._singleFile @property def optionsProp(self) -> dict[str, Option]: return self._optionsProp @property def sortOnWrite(self) -> YesNoAlwaysNever: return self._sortOnWrite @property def sortKeyName(self) -> str | None: return self._sortKeyName @property def path(self) -> pathlib.Path: from pathlib import Path return Path(self._modulePath) @property def readerClass(self) -> type | None: if self._ReaderLoaded: return self._Reader cls = getattr(self.module, "Reader", None) self._Reader = cls self._ReaderLoaded = True if cls is not None and core.isDebug(): self.checkReaderClass() return cls @property def writerClass(self) -> type | None: if self._WriterLoaded: return self._Writer cls = getattr(self.module, "Writer", None) self._Writer = cls self._WriterLoaded = True if cls is not None and core.isDebug(): self.checkWriterClass() return cls @property def canRead(self) -> bool: return self._canRead @property def canWrite(self) -> bool: return self._canWrite @staticmethod def _getOptionAttrNamesFromClass(rwclass: type) -> list[str]: nameList = [] for cls in (*rwclass.__bases__, rwclass): for _name in cls.__dict__: if not _name.startswith("_") or _name.startswith("__"): # and _name not in ("_open",) continue nameList.append(_name) # rwclass.__dict__ does not include attributes of parent/base class # and dir(rwclass) is sorted by attribute name alphabetically # using rwclass.__bases__ solves the problem return nameList def _getOptionsFromClass(self, rwclass: type | None) -> dict[str, Any]: if rwclass is None: return {} optionsProp = self.optionsProp options: dict[str, Any] = {} for attrName in self._getOptionAttrNamesFromClass(rwclass): name = attrName[1:] default = getattr(rwclass, attrName) if name not in optionsProp: continue prop = optionsProp[name] if prop.disabled: core.trace( log, f"skipping disabled option {name} in {self.name} plugin", ) continue if not prop.validate(default): log.warning( "invalid default value for option: " f"{name} = {default!r} in plugin {self.name}", ) options[name] = default return options def getReadOptions(self) -> dict[str, Any]: if self._readOptions is None: self._readOptions = self._getOptionsFromClass(self.readerClass) return self._readOptions def getWriteOptions(self) -> dict[str, Any]: if self._writeOptions is None: self._writeOptions = self._getOptionsFromClass(self.writerClass) return self._writeOptions @property def readCompressions(self) -> list[str]: if self._readCompressions is None: self._readCompressions = getattr(self.readerClass, "compressions", []) return self._readCompressions @property def readDepends(self) -> dict[str, str]: if self._readDepends is None: self._readDepends = getattr(self.readerClass, "depends", {}) return self._readDepends @property def writeDepends(self) -> dict[str, str]: if self._writeDepends is None: self._writeDepends = getattr(self.writerClass, "depends", {}) return self._writeDepends def checkModule(self, module: Any) -> None: # noqa: ANN401 name = self.name if hasattr(module, "write"): log.error( f"plugin {name!r} has write function, must migrate to Writer class", ) extensions = module.extensions if not isinstance(extensions, tuple): msg = f"{name} plugin: extensions must be tuple" if isinstance(extensions, list): extensions = tuple(extensions) log.error(msg) else: raise TypeError(msg) if not isinstance(self.readDepends, dict): log.error( f"invalid depends={self.readDepends} in {self.name!r}.Reader class", ) if not isinstance(self.writeDepends, dict): log.error( f"invalid depends={self.writeDepends} in {self.name!r}.Reader class", ) for name, opt in self.optionsProp.items(): if name.lower() != name: suggestName = "".join( "_" + x.lower() if x.isupper() else x for x in name ) log.debug( f"{self.name}: please rename option {name} to {suggestName}", ) if not opt.comment: log.debug( f"{self.name}: please add comment for option {name}", ) valid__all__ = [ "enable", "lname", "name", "description", "extensions", "extensionCreate", "singleFile", "kind", "wiki", "website", "optionsProp", "Reader", "Writer", ] # only run this on CI to do extra validation def checkModuleMore(self, module: Any) -> None: name = self.name if not hasattr(module, "__all__"): raise PluginCheckError(f"Please add __all__ to plugin {name!r}") all_ = module.__all__ for attr in all_: if not hasattr(module, attr): raise PluginCheckError( f"Undefined name {attr!r} in __all__ in plugin {name!r}" f": {module.__file__}", ) if attr not in self.valid__all__: raise PluginCheckError( f"Unnecessary name {attr!r} in __all__ in plugin {name!r}" f": {module.__file__}", ) def checkReaderClass(self) -> bool: cls = self._Reader for attr in ( "__init__", "open", "close", "__len__", "__iter__", ): if not hasattr(cls, attr): log.error( f"Invalid Reader class in {self.name!r} plugin, no {attr!r} method", ) self._Reader = None return False return True def checkWriterClass(self) -> bool: cls = self._Writer for attr in ( "__init__", "open", "write", "finish", ): if not hasattr(cls, attr): log.error( f"Invalid Writer class in {self.name!r} plugin, no {attr!r} method", ) self._Writer = None return False return True # def _getReadExtraOptions(self) -> list[str]: # noqa: F811 # cls = self.readerClass # if cls is None: # return [] # return self.__class__.getExtraOptionsFromFunc(cls.open, self.name) # def _getWriteExtraOptions(self) -> list[str]: # noqa: F811 # cls = self.writerClass # if cls is None: # return [] # return self.__class__.getExtraOptionsFromFunc(cls.write, self.name) # @classmethod # def getExtraOptionsFromFunc( # cls: type, # func: Callable, # format: str, # ) -> list[str]: # import inspect # extraOptNames = [] # for name, param in inspect.signature(func).parameters.items(): # if name == "self": # continue # if str(param.default) != "": # extraOptNames.append(name) # continue # if name not in {"filename", "dirname"}: # extraOptNames.append(name) # if extraOptNames: # log.warning(f"{format}: {extraOptNames = }") # return extraOptNames pyglossary-5.0.9/pyglossary/plugins/000077500000000000000000000000001476751035500176535ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/__init__.py000066400000000000000000000000001476751035500217520ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/aard2_slob/000077500000000000000000000000001476751035500216635ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/aard2_slob/__init__.py000066400000000000000000000033101476751035500237710ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, FileSizeOption, IntOption, Option, StrOption, ) from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "aard2_slob" name = "Aard2Slob" description = "Aard 2 (.slob)" extensions = (".slob",) extensionCreate = ".slob" singleFile = True kind = "binary" wiki = "https://github.com/itkach/slob/wiki" website = ( "http://aarddict.org/", "aarddict.org", ) optionsProp: dict[str, Option] = { "compression": StrOption( values=["", "bz2", "zlib", "lzma2"], comment="Compression Algorithm", ), "content_type": StrOption( customValue=True, values=[ "text/plain; charset=utf-8", "text/html; charset=utf-8", ], comment="Content Type", ), # "encoding": EncodingOption(), "file_size_approx": FileSizeOption( comment="split up by given approximate file size\nexamples: 100m, 1g", ), "file_size_approx_check_num_entries": IntOption( comment="for file_size_approx, check every `[?]` entries", ), "separate_alternates": BoolOption( comment="add alternate headwords as separate entries to slob", ), "word_title": BoolOption( comment="add headwords title to beginning of definition", ), "version_info": BoolOption( comment="add version info tags to slob file", ), "audio_goldendict": BoolOption( comment="Convert audio links for GoldenDict (desktop)", ), } extraDocs = [ ( "PyICU", "See [doc/pyicu.md](./doc/pyicu.md) file for more detailed" " instructions on how to install PyICU.", ), ] pyglossary-5.0.9/pyglossary/plugins/aard2_slob/reader.py000066400000000000000000000070551476751035500235060ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import re from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Iterator from pyglossary import slob from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.core import exc_note, log, pip from pyglossary.plugins.aard2_slob.tags import ( supported_tags, t_copyright, t_created_at, t_created_by, t_edition, t_label, t_license_name, t_license_url, t_uri, ) __all__ = ["Reader"] class Reader: useByteProgress = False depends = { "icu": "PyICU", # >=1.5 } def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._clear() self._re_bword = re.compile( "(
    ]+?>)", re.IGNORECASE, ) def close(self) -> None: if self._slobObj is not None: self._slobObj.close() self._clear() def _clear(self) -> None: self._filename = "" self._slobObj: slob.Slob | None = None # TODO: PLR0912 Too many branches (13 > 12) def open(self, filename: str) -> None: # noqa: PLR0912 try: import icu # type: ignore # noqa: F401 except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install PyICU` to install") raise from pyglossary import slob self._filename = filename self._slobObj = slob.open(filename) tags = dict(self._slobObj.tags.items()) if t_label in tags: self._glos.setInfo("name", tags[t_label]) if t_created_at in tags: self._glos.setInfo("creationTime", tags[t_created_at]) if t_created_by in tags: self._glos.setInfo("author", tags[t_created_by]) copyrightLines: list[str] = [] for key in (t_copyright, t_license_name, t_license_url): try: value = tags.pop(key) except KeyError: continue copyrightLines.append(value) if copyrightLines: self._glos.setInfo("copyright", "\n".join(copyrightLines)) if t_uri in tags: self._glos.setInfo("website", tags[t_uri]) if t_edition in tags: self._glos.setInfo("edition", tags[t_edition]) for key, value in tags.items(): if key in supported_tags: continue self._glos.setInfo(f"slob.{key}", value) def __len__(self) -> int: if self._slobObj is None: log.error("called len() on a reader which is not open") return 0 return len(self._slobObj) @staticmethod def _href_sub(m: re.Match) -> str: st = m.group(0) if "//" in st: return st return st.replace('href="', 'href="bword://').replace( "href='", "href='bword://", ) def __iter__(self) -> Iterator[EntryType | None]: from pyglossary.slob import MIME_HTML, MIME_TEXT if self._slobObj is None: raise RuntimeError("iterating over a reader while it's not open") slobObj = self._slobObj blobSet = set() # slob library gives duplicate blobs when iterating over slobObj # even keeping the last id is not enough, since duplicate blobs # are not all consecutive. so we have to keep a set of blob IDs for blob in slobObj: id_ = blob.identity if id_ in blobSet: yield None # update progressbar continue blobSet.add(id_) # blob.key is str, blob.content is bytes word = blob.key ctype = blob.content_type.split(";")[0] if ctype not in {MIME_HTML, MIME_TEXT}: log.debug(f"unknown {blob.content_type=} in {word=}") word = word.removeprefix("~/") yield self._glos.newDataEntry(word, blob.content) continue defiFormat = "" if ctype == MIME_HTML: defiFormat = "h" elif ctype == MIME_TEXT: defiFormat = "m" defi = blob.content.decode("utf-8") defi = self._re_bword.sub(self._href_sub, defi) yield self._glos.newEntry(word, defi, defiFormat=defiFormat) pyglossary-5.0.9/pyglossary/plugins/aard2_slob/tags.py000066400000000000000000000007031476751035500231730ustar00rootroot00000000000000t_created_at = "created.at" t_label = "label" t_created_by = "created.by" t_copyright = "copyright" t_license_name = "license.name" t_license_url = "license.url" t_uri = "uri" t_edition = "edition" supported_tags = { t_label, t_created_at, t_created_by, t_copyright, t_uri, t_edition, } __all__ = [ "supported_tags", "t_copyright", "t_created_at", "t_created_by", "t_edition", "t_label", "t_license_name", "t_license_url", "t_uri", ] pyglossary-5.0.9/pyglossary/plugins/aard2_slob/tools.toml000066400000000000000000000006121476751035500237170ustar00rootroot00000000000000["Aard 2 for Android"] web = "http://aarddict.org/" source = "https://github.com/itkach/aard2-android" platforms = [ "Android",] license = "GPL" plang = "Java" # no auto-RTL (in plaintext or html) ["Aard2 for Web"] web = "http://aarddict.org/" source = "https://github.com/itkach/aard2-web" platforms = [ "Web",] license = "MPL" plang = "Java" # auto-RTL works in plaintext mode, but not html pyglossary-5.0.9/pyglossary/plugins/aard2_slob/writer.py000066400000000000000000000157151476751035500235620ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os import re import shutil from os.path import isfile, splitext from typing import TYPE_CHECKING from pyglossary.glossary_utils import WriteError if TYPE_CHECKING: from collections.abc import Generator from pyglossary import slob from pyglossary.glossary_types import EntryType, WriterGlossaryType from pyglossary.core import cacheDir, exc_note, log, pip from pyglossary.plugins.aard2_slob.tags import ( t_created_at, t_created_by, t_label, t_uri, ) __all__ = ["Writer"] class Writer: depends = { "icu": "PyICU", } _compression: str = "zlib" _content_type: str = "" _file_size_approx: int = 0 _file_size_approx_check_num_entries = 100 _separate_alternates: bool = False _word_title: bool = False _version_info: bool = False _audio_goldendict: bool = False resourceMimeTypes = { "png": "image/png", "jpeg": "image/jpeg", "jpg": "image/jpeg", "gif": "image/gif", "svg": "image/svg+xml", "webp": "image/webp", "tiff": "image/tiff", "tif": "image/tiff", "bmp": "image/bmp", "css": "text/css", "js": "application/javascript", "json": "application/json", "woff": "application/font-woff", "woff2": "application/font-woff2", "ttf": "application/x-font-ttf", "otf": "application/x-font-opentype", "mp3": "audio/mpeg", "ogg": "audio/ogg", "opus": "audio/ogg", "oga": "audio/ogg", "spx": "audio/x-speex", "wav": "audio/wav", "ini": "text/plain", # "application/octet-stream+xapian", "eot": "application/vnd.ms-fontobject", "pdf": "application/pdf", "mp4": "video/mp4", } def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" self._resPrefix = "" self._slobWriter: slob.Writer | None = None @staticmethod def _slobObserver( event: slob.WriterEvent, # noqa: F401, F821 ) -> None: log.debug(f"slob: {event.name}{': ' + event.data if event.data else ''}") def _open(self, filepath: str, namePostfix: str) -> slob.Writer: from pyglossary import slob if isfile(filepath): shutil.move(filepath, f"{filepath}.bak") log.warning(f"renamed existing {filepath!r} to {filepath + '.bak'!r}") self._slobWriter = slobWriter = slob.Writer( filepath, observer=self._slobObserver, workdir=cacheDir, compression=self._compression, version_info=self._version_info, ) # "label" tag is a dictionary name shown in UI slobWriter.tag(t_label, self._glos.getInfo("name") + namePostfix) createdAt = self._glos.getInfo("creationTime") if createdAt is not None: slobWriter.tag(t_created_at, createdAt) createdBy = self._glos.getInfo("author") if createdBy is not None: slobWriter.tag(t_created_by, createdBy) filename = os.path.basename(filepath) dic_uri = re.sub(r"[^A-Za-z0-9_-]+", "_", filename) # "uri" tag is not web url, it's a part of gloss addressing ID: uri + article ID # setting the tag allows bookmark & history migration, if dict file is updated # we use source filename as "uri", since it is stable (most likely) slobWriter.tag(t_uri, dic_uri) return slobWriter def open(self, filename: str) -> None: try: import icu # noqa: F401 except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install PyICU` to install") raise if isfile(filename): raise WriteError(f"File '{filename}' already exists") namePostfix = "" if self._file_size_approx > 0: namePostfix = " (part 1)" self._open(filename, namePostfix) self._filename = filename def finish(self) -> None: from time import perf_counter self._filename = "" if self._slobWriter is None: return log.info("Finalizing slob file...") t0 = perf_counter() self._slobWriter.finalize() log.info(f"Finalizing slob file took {perf_counter() - t0:.1f} seconds") self._slobWriter = None def addDataEntry(self, entry: EntryType) -> None: slobWriter = self._slobWriter if slobWriter is None: raise ValueError("slobWriter is None") rel_path = entry.s_word _, ext = splitext(rel_path) ext = ext.lstrip(os.path.extsep).lower() content_type = self.resourceMimeTypes.get(ext) if not content_type: log.error(f"Aard2 slob: unknown content type for {rel_path!r}") return content = entry.data key = self._resPrefix + rel_path try: key.encode(slobWriter.encoding) except UnicodeEncodeError: log.error(f"Failed to add, broken unicode in key: {key!a}") return slobWriter.add(content, key, content_type=content_type) def addEntry(self, entry: EntryType) -> None: words = entry.l_word b_defi = entry.defi.encode("utf-8") ctype = self._content_type writer = self._slobWriter if writer is None: raise ValueError("slobWriter is None") entry.detectDefiFormat() defiFormat = entry.defiFormat if self._word_title and defiFormat in {"h", "m"}: if defiFormat == "m": defiFormat = "h" title = self._glos.wordTitleStr( words[0], ) b_defi = title.encode("utf-8") + b_defi if defiFormat == "h": b_defi = b_defi.replace(b'"bword://', b'"') b_defi = b_defi.replace(b"'bword://", b"'") if not self._audio_goldendict: b_defi = b_defi.replace( b"""href="sound://""", b'''onclick="new Audio(this.href).play(); return false;" href="''', ) b_defi = b_defi.replace( b"""href='sound://""", b"""onclick="new Audio(this.href).play(); return false;" href='""", ) b_defi = b_defi.replace(b""" Generator[None, EntryType, None]: slobWriter = self._slobWriter if slobWriter is None: raise ValueError("slobWriter is None") file_size_approx = int(self._file_size_approx * 0.95) entryCount = 0 sumBlobSize = 0 fileIndex = 0 filenameNoExt, _ = splitext(self._filename) while True: entry = yield if entry is None: break if entry.isData(): self.addDataEntry(entry) else: self.addEntry(entry) if file_size_approx <= 0: continue # handle file_size_approx check_every = self._file_size_approx_check_num_entries entryCount += 1 if entryCount % check_every == 0: sumBlobSize = slobWriter.size_data() if sumBlobSize >= file_size_approx: slobWriter.finalize() fileIndex += 1 slobWriter = self._open( f"{filenameNoExt}.{fileIndex}.slob", f" (part {fileIndex + 1})", ) sumBlobSize = 0 entryCount = 0 pyglossary-5.0.9/pyglossary/plugins/almaany/000077500000000000000000000000001476751035500212755ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/almaany/__init__.py000066400000000000000000000012531476751035500234070ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from pyglossary.option import Option from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "almaany" name = "Almaany" description = "Almaany.com (SQLite3)" extensions = () extensionCreate = ".db" singleFile = True kind = "binary" wiki = "" website = ( "https://play.google.com/store/apps/details?id=com.almaany.arar", "Almaany.com Arabic Dictionary - Google Play", ) optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/almaany/reader.py000066400000000000000000000043671476751035500231230ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import html from typing import TYPE_CHECKING if TYPE_CHECKING: import sqlite3 from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._clear() def _clear(self) -> None: self._filename = "" self._con: sqlite3.Connection | None = None self._cur: sqlite3.Cursor | None = None def open(self, filename: str) -> None: from sqlite3 import connect self._filename = filename self._con = connect(filename) self._cur = self._con.cursor() self._glos.setDefaultDefiFormat("h") def __len__(self) -> int: if self._cur is None: raise ValueError("cur is None") self._cur.execute("select count(*) from WordsTable") return self._cur.fetchone()[0] def __iter__(self) -> Iterator[EntryType]: if self._cur is None: raise ValueError("cur is None") from pyglossary.langs.writing_system import getWritingSystemFromText alternateDict: dict[str, list[str]] = {} self._cur.execute("select wordkey, searchwordkey from Keys") for row in self._cur.fetchall(): if row[0] in alternateDict: alternateDict[row[0]].append(row[1]) else: alternateDict[row[0]] = [row[1]] self._cur.execute( "select word, searchword, root, meaning from WordsTable order by id", ) # FIXME: iteration over self._cur stops after one entry # and self._cur.fetchone() returns None # for row in self._cur: for row in self._cur.fetchall(): word = row[0] searchword = row[1] root = row[2] meaning = row[3] definition = meaning definition = definition.replace("|", "
    ") if root: definition += ( f'
    Root:
    {root}' ) ws = getWritingSystemFromText(meaning) if ws and ws.direction == "rtl": definition = f'
    {definition}
    ' words = [word, searchword] if word in alternateDict: words += alternateDict[word] yield self._glos.newEntry( words, definition, defiFormat="h", ) def close(self) -> None: if self._cur: self._cur.close() if self._con: self._con.close() self._clear() pyglossary-5.0.9/pyglossary/plugins/almaany/tools.toml000066400000000000000000000002271476751035500233330ustar00rootroot00000000000000["Almaany.com Arabic Dictionary"] web = "https://play.google.com/store/apps/details?id=com.almaany.arar" platforms = [ "Android",] license = "Unknown" pyglossary-5.0.9/pyglossary/plugins/appledict/000077500000000000000000000000001476751035500216205ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/appledict/__init__.py000066400000000000000000000045071476751035500237370ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Output to Apple Dictionary xml sources for Dictionary Development Kit. # # Copyright © 2016-2023 Saeed Rasooli (ilius) # Copyright © 2016 ivan tkachenko # Copyright © 2012-2015 Xiaoqiang Wang # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from pyglossary.option import ( BoolOption, DictOption, Option, StrOption, ) from .writer import Writer __all__ = [ "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "appledict" name = "AppleDict" description = "AppleDict Source" extensions = (".apple",) extensionCreate = ".apple/" singleFile = False kind = "directory" wiki = "" website = ( "https://support.apple.com/en-gu/guide/dictionary/welcome/mac", "Dictionary User Guide for Mac", ) # FIXME: rename indexes arg/option to indexes_lang? optionsProp: dict[str, Option] = { "clean_html": BoolOption(comment="use BeautifulSoup parser"), "css": StrOption( comment="custom .css file path", ), "xsl": StrOption( comment="custom XSL transformations file path", ), "default_prefs": DictOption( comment="default prefs in python dict format", # example: {"key": "value", "version": "1"} ), "prefs_html": StrOption( comment="preferences XHTML file path", ), "front_back_matter": StrOption( comment="XML file path with top-level tag", ), "jing": BoolOption(comment="run Jing check on generated XML"), "indexes": StrOption( customValue=False, values=["", "ru", "zh"], comment="Additional indexes to dictionary entries", ), } extraDocs = [ ( "Also see:", "See [doc/apple.md](./doc/apple.md) for additional AppleDict instructions.", ), ] pyglossary-5.0.9/pyglossary/plugins/appledict/_content.py000066400000000000000000000174271476751035500240160ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2016-2019 Saeed Rasooli (ilius) # Copyright © 2016 ivan tkachenko me@ratijas.tk # Copyright © 2012-2015 Xiaoqiang Wang # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # FIXME: # MDX-specific parts should be isolated and moved to MDX Reader # and parts that are specific to one glossary # (like Oxford_Advanced_English-Chinese_Dictionary_9th_Edition.mdx) # should be moved to separate modules (like content processors) and enabled # per-glossary (by title or something else) from __future__ import annotations import logging import re from typing import TYPE_CHECKING, Any from xml.sax.saxutils import quoteattr, unescape if TYPE_CHECKING: import bs4 as BeautifulSoup import bs4.element from pyglossary.text_utils import toStr __all__ = ["prepare_content"] log = logging.getLogger("pyglossary") _re_brhr = re.compile("<(BR|HR)>", re.IGNORECASE) _re_nonprintable = re.compile("[\x00-\x07\x0e-\x1f]") _re_img = re.compile("", re.IGNORECASE) _re_div_margin_em = re.compile(r'
    ') _sub_div_margin_em = r'
    ' _re_div_margin_em_ex = re.compile( r'
    ', ) _sub_div_margin_em_ex = r'
    ' _re_href = re.compile(r"""href=(["'])(.*?)\1""") _re_margin = re.compile(r"margin-left:(\d)em") def prepare_content( title: str | None, body: str, BeautifulSoup: Any, ) -> str: # heavily integrated with output of dsl reader plugin! # and with xdxf also. """:param title: str | None""" # class="sec" => d:priority="2" # style="color:steelblue" => class="ex" # class="p" style="color:green" => class="p" # style="color:green" => class="c" # style="margin-left:{}em" => class="m{}" # => # xhtml is strict if BeautifulSoup: content = prepare_content_with_soup(title, body, BeautifulSoup) else: content = prepare_content_without_soup(title, body) content = content.replace(" ", " ") content = _re_nonprintable.sub("", content) return content # noqa: RET504 def prepare_content_without_soup( title: str | None, body: str, ) -> str: # somewhat analogue to what BeautifulSoup suppose to do body = _re_div_margin_em.sub(_sub_div_margin_em, body) body = _re_div_margin_em_ex.sub(_sub_div_margin_em_ex, body) body = _re_href.sub(_href_sub, body) body = ( body.replace( '', '', ) .replace( '', '', ) .replace( '', '', ) .replace( '', '', ) .replace("", '') .replace("", "") .replace("", "") .replace("", "") ) # nice header to display content = f"

    {title}

    {body}" if title else body content = _re_brhr.sub(r"<\g<1> />", content) content = _re_img.sub(r"/>", content) return content # noqa: RET504 def _prepare_href(tag: bs4.element.Tag) -> None: href = tag["href"] href = _cleanup_link_target(href) if href.startswith("sound:"): _fix_sound_link(href, tag) elif href.startswith(("phonetics", "help:phonetics")): # for oxford9 log.debug(f"phonetics: {tag=}") if tag.audio and "name" in tag.audio.attrs: tag["onmousedown"] = "this.lastChild.play(); return false;" src_name = tag.audio["name"].replace("#", "_") tag.audio["src"] = f"{src_name}.mp3" elif not _link_is_url(href): tag["href"] = f"x-dictionary:d:{href}" def _prepare_onclick(soup: BeautifulSoup.BeautifulSoup) -> None: for thumb in soup.find_all("div", "pic_thumb"): thumb["onclick"] = ( 'this.setAttribute("style", "display:none"); ' 'this.nextElementSibling.setAttribute("style", "display:block")' ) for pic in soup.find_all("div", "big_pic"): pic["onclick"] = ( 'this.setAttribute("style", "display:none"), ' 'this.previousElementSibling.setAttribute("style", "display:block")' ) # to unfold(expand) and fold(collapse) blocks for pos in soup.find_all("pos", onclick="toggle_infl(this)"): # TODO: simplify this! pos["onclick"] = ( r"var e = this.parentElement.parentElement.parentElement" r'.querySelector("res-g vp-gs"); style = window.' r"getComputedStyle(e), display = style.getPropertyValue" r'("display"), "none" === e.style.display || "none" === display' r' ? e.style.display = "block" : e.style.display = "none", ' r"this.className.match(/(?:^|\s)Clicked(?!\S)/) ? this." r"className = this.className.replace(" r'/(?:^|\s)Clicked(?!\S)/g, "") : this.setAttribute(' r'"class", "Clicked")' ) # TODO: PLR0912 Too many branches (18 > 12) def prepare_content_with_soup( # noqa: PLR0912 title: str | None, body: str, BeautifulSoup: BeautifulSoup, ) -> str: soup = BeautifulSoup.BeautifulSoup(body, features="lxml") # difference between "lxml" and "html.parser" if soup.body: soup = soup.body for tag in soup(class_="sec"): tag["class"].remove("sec") if not tag["class"]: del tag["class"] tag["d:priority"] = "2" for tag in soup(lambda x: "color:steelblue" in x.get("style", "")): _remove_style(tag, "color:steelblue") if "ex" not in tag.get("class", []): tag["class"] = tag.get("class", []) + ["ex"] for tag in soup(_is_green): _remove_style(tag, "color:green") if "p" not in tag.get("class", ""): tag["class"] = tag.get("class", []) + ["c"] for tag in soup(True): if "style" in tag.attrs: m = _re_margin.search(tag["style"]) if m: _remove_style(tag, m.group(0)) tag["class"] = tag.get("class", []) + ["m" + m.group(1)] for tag in soup(lambda x: "xhtml:" in x.name): old_tag_name = tag.name tag.name = old_tag_name[len("xhtml:") :] if tag.string: tag.string = f"{tag.string} " for tag in soup.select("[href]"): _prepare_href(tag) _prepare_onclick(soup) for tag in soup.select("[src]"): src = tag["src"] if src.startswith("/"): tag["src"] = src[1:] for tag in soup("u"): tag.name = "span" tag["class"] = tag.get("class", []) + ["u"] for tag in soup("s"): tag.name = "del" if title and " str: return href.removeprefix("bword://") def _href_sub(x: re.Match) -> str: href = x.groups()[1] if href.startswith("http"): return x.group() href = _cleanup_link_target(href) return "href=" + quoteattr( "x-dictionary:d:" + unescape( href, {""": '"'}, ), ) def _is_green(x: dict) -> bool: return "color:green" in x.get("style", "") def _remove_style(tag: dict, line: str) -> None: s = "".join(tag["style"].replace(line, "").split(";")) if s: tag["style"] = s else: del tag["style"] def _fix_sound_link(href: str, tag: dict[str, Any]) -> None: tag["href"] = f'javascript:new Audio("{href[len("sound://") :]}").play();' def _link_is_url(href: str) -> bool: for prefix in ( "http:", "https:", "addexample:", "addid:", "addpv:", "help:", "helpg:", "helpp:", "helpr:", "helpxr:", "xi:", "xid:", "xp:", "sd:", "#", ): if href.startswith(prefix): return True return False pyglossary-5.0.9/pyglossary/plugins/appledict/_dict.py000066400000000000000000000066311476751035500232620ustar00rootroot00000000000000# -*- coding: utf-8 -*- # appledict/_dict.py # Output to Apple Dictionary xml sources for Dictionary Development Kit. # # Copyright © 2016-2019 Saeed Rasooli (ilius) # Copyright © 2016 ivan tkachenko me@ratijas.tk # Copyright © 2012-2015 Xiaoqiang Wang # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import logging import string from typing import TYPE_CHECKING from ._normalize import title as normalize_title from ._normalize import title_long as normalize_title_long from ._normalize import title_short as normalize_title_short if TYPE_CHECKING: from collections.abc import Callable, Iterator from typing import Any __all__ = ["id_generator", "indexes_generator", "quote_string"] log = logging.getLogger("pyglossary") _digs = string.digits + string.ascii_letters def _base36(x: int) -> str: """ Simplified version of int2base http://stackoverflow.com/questions/2267362/convert-integer-to-a-string-in-a-given-numeric-base-in-python#2267446. """ digits: list[str] = [] while x: digits.append(_digs[x % 36]) x //= 36 digits.reverse() return "".join(digits) def id_generator() -> Iterator[str]: cnt = 1 while True: yield "_" + _base36(cnt) cnt += 1 def quote_string(value: str, BeautifulSoup: Any) -> str: if BeautifulSoup: return BeautifulSoup.dammit.EntitySubstitution.substitute_xml( value, make_quoted_attribute=True, ) return '"' + value.replace(">", ">").replace('"', """) + '"' def indexes_generator( indexes_lang: str, ) -> Callable[ [str, list[str], str, Any], str, ]: """Generate indexes according to glossary language.""" indexer = None """Callable[[Sequence[str], str], Sequence[str]]""" if indexes_lang: from .indexes import languages indexer = languages.get(indexes_lang, None) if not indexer: keys_str = ", ".join(languages) msg = ( "extended indexes not supported for the" f" specified language: {indexes_lang}.\n" f"following languages available: {keys_str}." ) log.error(msg) raise ValueError(msg) def generate_indexes( title: str, alts: list[str], content: str, BeautifulSoup: Any, ) -> str: indexes = [title] indexes.extend(alts) quoted_title = quote_string(title, BeautifulSoup) if indexer: indexes = list(set(indexer(indexes, content))) normal_indexes = set() for idx in indexes: normal = normalize_title(idx, BeautifulSoup) normal_indexes.add(normalize_title_long(normal)) normal_indexes.add(normalize_title_short(normal)) normal_indexes.discard(title) s = f"" for idx in normal_indexes: if not idx.strip(): # skip empty titles. everything could happen. continue quoted_idx = quote_string(idx, BeautifulSoup) s += f"" return s return generate_indexes pyglossary-5.0.9/pyglossary/plugins/appledict/_normalize.py000066400000000000000000000070171476751035500243360ustar00rootroot00000000000000# -*- coding: utf-8 -*- # appledict/_normalize.py # Output to Apple Dictionary xml sources for Dictionary Development Kit. # # Copyright © 2016-2019 Saeed Rasooli (ilius) # Copyright © 2016 ivan tkachenko me@ratijas.tk # Copyright © 2012-2015 Xiaoqiang Wang # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import re from typing import Any __all__ = ["title", "title_long", "title_short"] _re_spaces = re.compile(r"[ \t\n]{2,}") _re_title = re.compile('<[^<]+?>|"|[<>]|\xef\xbb\xbf') _re_title_short = re.compile(r"\[.*?\]") _re_whitespace = re.compile("(\t|\n|\r)") # FIXME: rename all/most functions here, add a 'fix_' prefix def _spaces(s: str) -> str: """ Strip off leading and trailing whitespaces and replace contiguous whitespaces with just one space. """ return _re_spaces.sub(" ", s.strip()) _brackets_sub = ( ( re.compile(r"( *)\{( *)\\\[( *)"), # { \[ r"\1\2\3[", ), ( re.compile(r"( *)\\\]( *)\}( *)"), # \] } r"]\1\2\3", ), ( re.compile(r"( *)\{( *)\(( *)\}( *)"), # { ( } r"\1\2\3\4[", ), ( re.compile(r"( *)\{( *)\)( *)\}( *)"), # { ) } r"]\1\2\3\4", ), ( re.compile(r"( *)\{( *)\(( *)"), # { ( r"\1\2\3[", ), ( re.compile(r"( *)\)( *)\}( *)"), # ) } r"]\1\2\3", ), ( re.compile(r"( *)\{( *)"), # { r"\1\2[", ), ( re.compile(r"( *)\}( *)"), # } r"]\1\2", ), ( re.compile(r"{.*?}"), r"", ), ) def _brackets(s: str) -> str: r""" Replace all crazy brackets with square ones []. following combinations are to replace: { \[ ... \] } { ( } ... { ) } { ( ... ) } { ... } """ if "{" in s: for exp, sub in _brackets_sub: s = exp.sub(sub, s) return _spaces(s) def _truncate(text: str, length: int = 449) -> str: """ Trunct a string to given length :param str text: :return: truncated text :rtype: str. """ content = _re_whitespace.sub(" ", text) if len(text) > length: # find the next space after max_len chars (do not break inside a word) pos = content[:length].rfind(" ") if pos == -1: pos = length text = text[:pos] return text # noqa: RET504 def title(title: str, BeautifulSoup: Any) -> str: """Strip double quotes and html tags.""" if BeautifulSoup: title = title.replace("\xef\xbb\xbf", "") if len(title) > 1: # BeautifulSoup has a bug when markup <= 1 char length title = BeautifulSoup.BeautifulSoup( title, features="lxml", # FIXME: html or lxml? gives warning unless it's lxml ).get_text(strip=True) else: title = _re_title.sub("", title) title = title.replace("&", "&") title = _brackets(title) title = _truncate(title, 1126) return title # noqa: RET504 def title_long(s: str) -> str: """ Return long title line. Example: ------- title_long("str[ing]") -> string. """ return s.replace("[", "").replace("]", "") def title_short(s: str) -> str: """ Return short title line. Example: ------- title_short("str[ing]") -> str. """ return _spaces(_re_title_short.sub("", s)) pyglossary-5.0.9/pyglossary/plugins/appledict/indexes/000077500000000000000000000000001476751035500232575ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/appledict/indexes/__init__.py000066400000000000000000000026741476751035500254010ustar00rootroot00000000000000# -*- coding: utf-8 -*- # appledict/indexes/__init__.py # # Copyright © 2016 ivan tkachenko me@ratijas.tk # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations """extended indexes generation with respect to source language.""" import os import pkgutil from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Callable, Sequence __all__ = ["languages"] languages: dict[str, Callable[[Sequence[str], str], set[str]]] = {} """ submodules must register languages by adding (language name -> function) pairs to the mapping. function must follow signature below: :param titles: flat iterable of title and altenrative titles :param content: cleaned entry content :return: iterable of indexes (str). """ here = os.path.dirname(os.path.abspath(__file__)) for _, module, _ in pkgutil.iter_modules([here]): # type: ignore # noqa: PGH003 __import__(f"{__name__}.{module}") pyglossary-5.0.9/pyglossary/plugins/appledict/indexes/ru.py000066400000000000000000000050251476751035500242610ustar00rootroot00000000000000# -*- coding: utf-8 -*- # appledict/indexes/ru.py # # Copyright © 2016 ivan tkachenko me@ratijas.tk # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. """Russian indexes based on pymorphy.""" from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Sequence from pyglossary.core import log, pip from . import languages try: import pymorphy3 # type: ignore except ImportError: log.error( f"""module pymorphy3 is required to build extended Russian indexes. You can download it here: https://github.com/no-plagiarism/pymorphy3 Or by running: {pip} install pymorphy3""", ) raise morphy = pymorphy3.MorphAnalyzer() def ru(titles: Sequence[str], _: str) -> set[str]: """ Give a set of all declines, cases and other forms of word `title`. note that it works only if title is one word. """ indexes: set[str] = set() indexes_norm: set[str] = set() for title in titles: # in-place modification _ru(title, indexes, indexes_norm) return indexes def _ru(title: str, a: set[str], a_norm: set[str]) -> None: # uppercase abbreviature if title.isupper(): return title_norm = normalize(title) # feature: put dot at the end to match only this word a.add(title) a.add(title + ".") a_norm.add(title_norm) # decline only one-word titles if len(title.split()) == 1: normal_forms = morphy.parse(title) if len(normal_forms) > 0: # forms of most probable match normal_form = normal_forms[0] for x in normal_form.lexeme: word = x.word # Apple Dictionary Services see no difference between # "й" and "и", "ё" and "е", so we're trying to avoid # "* Duplicate index. Skipped..." warning. # new: return indexes with original letters but check for # occurrence against "normal forms". word_norm = normalize(word) if word_norm not in a_norm: a.add(word) a_norm.add(word_norm) def normalize(word: str) -> str: return word.lower().replace("й", "и").replace("ё", "е").replace("-", " ") languages["ru"] = ru pyglossary-5.0.9/pyglossary/plugins/appledict/indexes/zh.py000066400000000000000000000063041476751035500242550ustar00rootroot00000000000000# -*- coding: utf-8 -*- # appledict/indexes/zh.py # # Copyright © 2016 ivan tkachenko me@ratijas.tk # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations """Chinese wildcard and pinyin indexes.""" import re import bs4 from pyglossary.core import log, pip try: import colorize_pinyin as color # type: ignore except ImportError: log.error( "module colorize_pinyin is required to build extended Chinese" " indexes. You can install it by running: " f"{pip} install colorize-pinyin", ) raise from typing import TYPE_CHECKING from . import languages if TYPE_CHECKING: from collections.abc import Sequence pinyinPattern = re.compile(r",|;") nonHieroglyphPattern = re.compile(r"[^\u4e00-\u9fff]") def zh(titles: Sequence[str], content: str) -> set[str]: """ Chinese indexes. assuming that content is HTML and pinyin is inside second tag (first is

    ), we can try to parse pinyin and generate indexes with pinyin subwords separated by whitespaces - pinyin itself - pinyin with diacritics replaced by tone numbers multiple pronunciations separated by comma or semicolon are supported. """ indexes = set() for title in titles: # feature: put dot at the end to match only this word indexes.update({title, title + "。"}) # remove all non hieroglyph indexes.add(nonHieroglyphPattern.sub("", title)) indexes.update(pinyin_indexes(content)) return indexes def pinyin_indexes(content: str) -> set[str]: pinyin = find_pinyin(content) # assert type(pinyin) == unicode if not pinyin or pinyin == "_": return set() indexes = set() # multiple pronunciations for pinyinPart in pinyinPattern.split(pinyin): # find all pinyin ranges, use them to rip pinyin out py = [ r._slice(pinyinPart) for r in color.ranges_of_pinyin_in_string(pinyinPart) ] # maybe no pinyin here if not py: return set() # just pinyin, with diacritics, separated by whitespace indexes.add(color.utf(" ".join(py)) + ".") # pinyin with diacritics replaced by tone numbers indexes.add( color.utf( " ".join( [ color.lowercase_string_by_removing_pinyin_tones(p) + str(color.determine_tone(p)) for p in py ], ), ) + ".", ) return indexes def find_pinyin(content: str) -> str | None: # assume that content is HTML and pinyin is inside second tag # (first is

    ) soup = bs4.BeautifulSoup(content.splitlines()[0], features="lxml") if soup.body: soup = soup.body # type: ignore # noqa: PGH003 children = soup.children try: next(children) # type: ignore # noqa: PGH003 pinyin = next(children) # type: ignore # noqa: PGH003 except StopIteration: return None return pinyin.text languages["zh"] = zh pyglossary-5.0.9/pyglossary/plugins/appledict/jing/000077500000000000000000000000001476751035500225475ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/appledict/jing/DictionarySchema/000077500000000000000000000000001476751035500257755ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/appledict/jing/DictionarySchema/AppleDictionarySchema.rng000066400000000000000000000042141476751035500327160ustar00rootroot00000000000000 pyglossary-5.0.9/pyglossary/plugins/appledict/jing/DictionarySchema/modules/000077500000000000000000000000001476751035500274455ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/appledict/jing/DictionarySchema/modules/dict-struct.rng000066400000000000000000000060311476751035500324220ustar00rootroot00000000000000 1 pyglossary-5.0.9/pyglossary/plugins/appledict/jing/__init__.py000066400000000000000000000005021476751035500246550ustar00rootroot00000000000000""" checking XML files with Apple Dictionary Schema. this module can be run from command line with only argument -- file to be checked. otherwise, you need to import this module and call `run` function with the filename as its only argument. """ __all__ = ["JingTestError", "run"] from .main import JingTestError, run pyglossary-5.0.9/pyglossary/plugins/appledict/jing/__main__.py000066400000000000000000000006551476751035500246470ustar00rootroot00000000000000"""main entry point.""" import logging import os import sys sys.path.append(os.path.abspath(os.path.dirname(__file__))) # noqa: E402 from .main import main log = logging.getLogger("root") console_output_handler = logging.StreamHandler(sys.stderr) console_output_handler.setFormatter( logging.Formatter( "%(asctime)s: %(message)s", ), ) log.addHandler(console_output_handler) log.setLevel(logging.INFO) sys.exit(main()) pyglossary-5.0.9/pyglossary/plugins/appledict/jing/jing/000077500000000000000000000000001476751035500234765ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/appledict/jing/jing/readme.html000066400000000000000000000052601476751035500256240ustar00rootroot00000000000000 Jing version 20091111

    Jing version 20091111

    Copyright © 2001, 2002, 2003, 2008 Thai Open Source Software Center Ltd. Jing can be freely copied subject to these conditions.

    This directory contains version 20091111 of Jing, a validator for RELAX NG and other schema languages.

    The directory bin contains jing.jar, which contains the code for Jing, ready to use with a Java runtime. For more information on how to use Jing, see this document.

    Apart from jing.jar, the bin directory contains some third-party jar files, which are used for XML parsing (under a pre-1.4 JRE that does not provide the Java XML parsing extension) and for validating with schema languages other than RELAX NG:

    saxon.jar
    Comes from the Saxon 6.5.5 distribution. Used for Schematron 1.5 validation.
    xercesImpl.jar
    xml-apis.jar
    Come from the Xerces2 Java 2.9.1 distribution. Used for W3C XML Schema validation and for XML parsing. Xerces2 Java is under the Apache License Version 2.0, which requires the following notice:
       Apache Xerces Java
       Copyright 1999-2007 The Apache Software Foundation
    
       This product includes software developed at
       The Apache Software Foundation (http://www.apache.org/).
    
       Portions of this software were originally based on the following:
         - software copyright (c) 1999, IBM Corporation., http://www.ibm.com.
         - software copyright (c) 1999, Sun Microsystems., http://www.sun.com.
         - voluntary contributions made by Paul Eng on behalf of the 
           Apache Software Foundation that were originally developed at iClick, Inc.,
           software copyright (c) 1999.
    isorelax.jar
    Comes from ISO RELAX 2004/11/11 distribution. Provides a bridge to validators that use the JARV interface.

    The file src.zip contains the Java source code. This is for reference purposes, and doesn't contain the supporting files, such as build scripts and test cases, that are needed for working conveniently with the source code. If you want to make changes to Jing, you should check out the source code and supporting files from the project's Subversion repository.

    pyglossary-5.0.9/pyglossary/plugins/appledict/jing/main.py000066400000000000000000000042571476751035500240550ustar00rootroot00000000000000from __future__ import annotations """Jing, a validator for RELAX NG and other schema languages.""" import logging import subprocess import sys from os import path __all__ = ["JingTestError", "main", "run"] log = logging.getLogger("pyglossary") log.setLevel(logging.DEBUG) class JingTestError(subprocess.CalledProcessError): """ A exception that is raised when jing test failed, e.g. returned non-zero. the exit status will be stored in the `returncode` attribute. the `output` attribute also will store the output. """ def __init__( self, returncode: int, cmd: list[str], output: bytes, ) -> None: super().__init__(returncode, cmd, output) def __str__(self) -> str: return "\n".join( [ f"Jing check failed with exit code {self.returncode}:", "-" * 80, self.output, ], ) def run(filename: str) -> None: """ Check whether the file named `filename` conforms to `AppleDictionarySchema.rng`. :returns: None :raises: JingTestError """ here = path.abspath(path.dirname(__file__)) filename = path.abspath(filename) jing_jar_path = path.join(here, "jing", "bin", "jing.jar") rng_path = path.join(here, "DictionarySchema", "AppleDictionarySchema.rng") # -Xmxn Specifies the maximum size, in bytes, of the memory allocation pool # -- from `man 1 java` cmd = ["java", "-Xmx2G", "-jar", jing_jar_path, rng_path, filename] log.info("running Jing check:") log.info(str(cmd)) log.info("...") pipe = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) returncode = pipe.wait() output = pipe.communicate()[0] if returncode != 0: if returncode < 0: log.error(f"Jing was terminated by signal {-returncode}") elif returncode > 0: log.error(f"Jing returned {returncode}") raise JingTestError(returncode, cmd, output) log.info("Jing check successfully passed!") def main() -> int: """ Run Jing test on given dictionary XML file with Apple Dictionary Schema. It's a command-line utility. """ if len(sys.argv) < 2: prog_name = path.basename(sys.argv[0]) log.info(f"usage:\n {prog_name} filename") return 1 try: run(sys.argv[1]) return 0 except JingTestError as e: log.fatal(str(e)) return e.returncode pyglossary-5.0.9/pyglossary/plugins/appledict/templates/000077500000000000000000000000001476751035500236165ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/appledict/templates/Dictionary.css000066400000000000000000000023101476751035500264310ustar00rootroot00000000000000@charset "UTF-8"; @namespace d url(http://www.apple.com/DTDs/DictionaryService-1.0.rng); @media (prefers-color-scheme: dark) { html { -apple-color-filter: apple-invert-lightness(); } img { filter: invert(0%); } } d|entry { } h1 { font-size: 150%; } h3 { font-size: 100%; } .ex, .m, .m0, .m1, .m2, .m3, .m4, .m5, .m6, .m7, .m8, .m9 { display: block; } .m { margin-left: 0em; } .m0 { margin-left: 0em; } .m1 { margin-left: 1em; } .m2 { margin-left: 2em; } .m3 { margin-left: 3em; } .m4 { margin-left: 4em; } .m5 { margin-left: 5em; } .m6 { margin-left: 6em; } .m7 { margin-left: 7em; } .m8 { margin-left: 8em; } .m9 { margin-left: 9em; } .ex + br, .k + br { display: none; } .c { color: green; } .p { font-style: italic; color: green; } .ex { color: #666; } .u { text-decoration: underline; } /* xdxf support */ .k { color: black; font-weight: bold; display: block; } .tr { color: black; } .abr { color: #008000; font-style: italic; } .hideextra .extra { display: none; } .stress { color: #FF0000; } .kref { color: #000080; text-decoration: none; } .pr { color: #000080; white-space: nowrap; text-decoration: none; overflow: hidden; text-overflow: ellipsis; padding-right: 1ex; } pyglossary-5.0.9/pyglossary/plugins/appledict/templates/Info.plist000066400000000000000000000016711476751035500255730ustar00rootroot00000000000000 CFBundleDevelopmentRegion English CFBundleIdentifier {CFBundleIdentifier} CFBundleDisplayName {CFBundleDisplayName} CFBundleName {CFBundleName} CFBundleShortVersionString 1.0 DCSDictionaryCopyright {DCSDictionaryCopyright}. DCSDictionaryManufacturerName {DCSDictionaryManufacturerName}. DCSDictionaryXSL {DCSDictionaryXSL} DCSDictionaryDefaultPrefs {DCSDictionaryDefaultPrefs} DCSDictionaryPrefsHTML {DCSDictionaryPrefsHTML} {DCSDictionaryFrontMatterReferenceID} pyglossary-5.0.9/pyglossary/plugins/appledict/templates/Makefile000066400000000000000000000024121476751035500252550ustar00rootroot00000000000000# # Makefile # # # ########################### # You need to edit these values. DICT_NAME = "{dict_name}" DICT_SRC_PATH = "{dict_name}.xml" CSS_PATH = "{dict_name}.css" PLIST_PATH = "{dict_name}.plist" DICT_BUILD_OPTS = # Suppress adding supplementary key. # DICT_BUILD_OPTS = -s 0 # Suppress adding supplementary key. ########################### # The DICT_BUILD_TOOL_DIR value is used also in "build_dict.sh" script. # You need to set it when you invoke the script directly. DICT_BUILD_TOOL_DIR = "/Applications/Utilities/Dictionary Development Kit" DICT_BUILD_TOOL_BIN = "$(DICT_BUILD_TOOL_DIR)/bin" ########################### DICT_DEV_KIT_OBJ_DIR = ./objects export DICT_DEV_KIT_OBJ_DIR DESTINATION_FOLDER = ~/Library/Dictionaries RM = /bin/rm ########################### all: "$(DICT_BUILD_TOOL_BIN)/build_dict.sh" $(DICT_BUILD_OPTS) $(DICT_NAME) $(DICT_SRC_PATH) $(CSS_PATH) $(PLIST_PATH) @echo "Done." install: @echo "Installing into $(DESTINATION_FOLDER)". mkdir -p $(DESTINATION_FOLDER) ditto --noextattr --norsrc $(DICT_DEV_KIT_OBJ_DIR)/$(DICT_NAME).dictionary $(DESTINATION_FOLDER)/$(DICT_NAME).dictionary touch $(DESTINATION_FOLDER) @echo "Done." @echo "To test the new dictionary, try Dictionary.app." clean: $(RM) -rf $(DICT_DEV_KIT_OBJ_DIR) pyglossary-5.0.9/pyglossary/plugins/appledict/tools.toml000066400000000000000000000002171476751035500236550ustar00rootroot00000000000000["Dictionary Development Kit"] web = "https://github.com/SebastianSzturo/Dictionary-Development-Kit" platforms = [ "Mac",] license = "Unknown" pyglossary-5.0.9/pyglossary/plugins/appledict/writer.py000066400000000000000000000225751476751035500235210ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Output to Apple Dictionary xml sources for Dictionary Development Kit. # # Copyright © 2016-2023 Saeed Rasooli (ilius) # Copyright © 2016 ivan tkachenko # Copyright © 2012-2015 Xiaoqiang Wang # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import os import pkgutil import shutil import sys from os.path import basename, isdir, join from typing import TYPE_CHECKING, Any from pyglossary.core import log, pip from pyglossary.text_utils import toStr from ._content import prepare_content from ._dict import ( id_generator, indexes_generator, quote_string, ) from ._normalize import title as normalize_title from ._normalize import title_long as normalize_title_long if TYPE_CHECKING: import io from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] sys.setrecursionlimit(10000) BeautifulSoup = None def _loadBeautifulSoup() -> None: global BeautifulSoup try: import bs4 as BeautifulSoup except ImportError: try: import BeautifulSoup # type: ignore except ImportError: return version: str = BeautifulSoup.__version__ # type: ignore if int(version.split(".")[0]) < 4: raise ImportError( "BeautifulSoup is too old, required at least version 4, " f"{version!r} found.\n" f"Please run `{pip} install lxml beautifulsoup4 html5lib`", ) def _abspath_or_None(path: str | None) -> str | None: if not path: return None return os.path.abspath(os.path.expanduser(path)) def _write_header( toFile: io.TextIOBase, front_back_matter: str | None, ) -> None: # write header toFile.write( '\n' '\n', ) if front_back_matter: with open( front_back_matter, encoding="utf-8", ) as _file: toFile.write(_file.read()) def _format_default_prefs(default_prefs: dict[str, Any] | None) -> str: """ :type default_prefs: dict or None as by 14th of Jan 2016, it is highly recommended that prefs should contain {"version": "1"}, otherwise Dictionary.app does not keep user changes between restarts. """ if not default_prefs: return "" if not isinstance(default_prefs, dict): raise TypeError(f"default_prefs not a dictionary: {default_prefs!r}") if str(default_prefs.get("version", None)) != "1": log.error( "default prefs does not contain {'version': '1'}. prefs " "will not be persistent between Dictionary.app restarts.", ) return "\n".join( f"\t\t{key}\n\t\t{value}" for key, value in sorted(default_prefs.items()) ).strip() def _write_css(fname: str, css_file: str) -> None: with open(fname, mode="wb") as toFile: if css_file: with open(css_file, mode="rb") as fromFile: toFile.write(fromFile.read()) else: data = pkgutil.get_data( __name__, "templates/Dictionary.css", ) if data is None: raise RuntimeError("failed to load templates/Dictionary.css") toFile.write(data) """ write glossary to Apple dictionary .xml and supporting files. :param dirname: directory path, must not have extension :param clean_html: pass True to use BeautifulSoup parser. :param css: path to custom .css file :param xsl: path to custom XSL transformations file. :param default_prefs: Default prefs in python dictionary literal format, i.e. {"key1": "value1", "key2": "value2", ...}. All keys and values must be quoted strings; not allowed characters (e.g. single/double quotes,equal sign "=", semicolon) must be escaped as hex code according to python string literal rules. :param prefs_html: path to XHTML file with user interface for dictionary's preferences. refer to Apple's documentation for details. :param front_back_matter: path to XML file with top-level tag your front/back matter entry content :param jing: pass True to run Jing check on generated XML. # FIXME: rename to indexes_lang? :param indexes: Dictionary.app is dummy and by default it don't know how to perform flexible search. we can help it by manually providing additional indexes to dictionary entries. """ class Writer: depends = { "lxml": "lxml", "bs4": "beautifulsoup4", "html5lib": "html5lib", } _clean_html: bool = True _css: str = "" _xsl: str = "" _default_prefs: dict | None = None _prefs_html: str = "" _front_back_matter: str = "" _jing: bool = False _indexes: str = "" # FIXME: rename to indexes_lang? def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._dirname = "" def finish(self) -> None: self._dirname = "" def open(self, dirname: str) -> None: self._dirname = dirname if not isdir(dirname): os.mkdir(dirname) # TODO: PLR0915 Too many statements (74 > 50) def write(self) -> Generator[None, EntryType, None]: # noqa: PLR0912, PLR0915 global BeautifulSoup from pyglossary.xdxf.transform import XdxfTransformer glos = self._glos clean_html = self._clean_html css: str | None = self._css xsl: str | None = self._xsl default_prefs = self._default_prefs prefs_html: str | None = self._prefs_html front_back_matter: str | None = self._front_back_matter jing = self._jing indexes = self._indexes xdxf_to_html = XdxfTransformer(encoding="utf-8") if clean_html: if BeautifulSoup is None: _loadBeautifulSoup() if BeautifulSoup is None: log.warning( "clean_html option passed but BeautifulSoup not found. " "to fix this run " f"`{pip} install lxml beautifulsoup4 html5lib`", ) else: BeautifulSoup = None dirname = self._dirname fileNameBase = basename(dirname).replace(".", "_") filePathBase = join(dirname, fileNameBase) # before chdir (outside indir block) css = _abspath_or_None(css) xsl = _abspath_or_None(xsl) prefs_html = _abspath_or_None(prefs_html) front_back_matter = _abspath_or_None(front_back_matter) generate_id = id_generator() generate_indexes = indexes_generator(indexes) myResDir = join(dirname, "OtherResources") if not isdir(myResDir): os.mkdir(myResDir) with open(filePathBase + ".xml", mode="w", encoding="utf-8") as toFile: _write_header(toFile, front_back_matter) while True: entry = yield if entry is None: break if entry.isData(): entry.save(myResDir) continue words = entry.l_word word, alts = words[0], words[1:] defi = entry.defi long_title = normalize_title_long( normalize_title(word, BeautifulSoup), ) if not long_title: continue id_ = next(generate_id) quoted_title = quote_string(long_title, BeautifulSoup) content_title: str | None = long_title if entry.defiFormat == "x": defi = xdxf_to_html.transformByInnerString(defi) content_title = None content = prepare_content(content_title, defi, BeautifulSoup) toFile.write( f'\n' + generate_indexes(long_title, alts, content, BeautifulSoup) + content + "\n\n", ) toFile.write("\n") if xsl: shutil.copy(xsl, myResDir) if prefs_html: shutil.copy(prefs_html, myResDir) _write_css(filePathBase + ".css", css) with open(join(dirname, "Makefile"), mode="w", encoding="utf-8") as toFile: toFile.write( toStr( pkgutil.get_data( __name__, "templates/Makefile", ), ).format(dict_name=fileNameBase), ) copyright_ = glos.getInfo("copyright") if BeautifulSoup: # strip html tags copyright_ = str( BeautifulSoup.BeautifulSoup( copyright_, features="lxml", ).text, ) # if DCSDictionaryXSL provided but DCSDictionaryDefaultPrefs not # present in Info.plist, Dictionary.app will crash. with open(filePathBase + ".plist", mode="w", encoding="utf-8") as toFile: frontMatterReferenceID = ( "DCSDictionaryFrontMatterReferenceID\n" "\tfront_back_matter" if front_back_matter else "" ) bundle_id = glos.getInfo("CFBundleIdentifier") if not bundle_id: bundle_id = fileNameBase.replace(" ", "") toFile.write( toStr( pkgutil.get_data( __name__, "templates/Info.plist", ), ).format( # identifier must be unique CFBundleIdentifier=bundle_id, CFBundleDisplayName=glos.getInfo("name"), CFBundleName=fileNameBase, DCSDictionaryCopyright=copyright_, DCSDictionaryManufacturerName=glos.author, DCSDictionaryXSL=basename(xsl) if xsl else "", DCSDictionaryDefaultPrefs=_format_default_prefs(default_prefs), DCSDictionaryPrefsHTML=basename(prefs_html) if prefs_html else "", DCSDictionaryFrontMatterReferenceID=frontMatterReferenceID, ), ) if jing: from .jing import run as jing_run jing_run(filePathBase + ".xml") pyglossary-5.0.9/pyglossary/plugins/appledict_bin/000077500000000000000000000000001476751035500224505ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/appledict_bin/__init__.py000066400000000000000000000014341476751035500245630ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import BoolOption, Option from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "appledict_bin" name = "AppleDictBin" description = "AppleDict Binary" extensions = (".dictionary", ".data") extensionCreate = "" singleFile = True kind = "binary" wiki = "" website = ( "https://support.apple.com/en-gu/guide/dictionary/welcome/mac", "Dictionary User Guide for Mac", ) optionsProp: dict[str, Option] = { "html": BoolOption(comment="Entries are HTML"), "html_full": BoolOption( comment="Turn every entry's definition into an HTML document", ), } pyglossary-5.0.9/pyglossary/plugins/appledict_bin/appledict_file_tools.py000066400000000000000000000035511476751035500272120ustar00rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2023 soshial (soshial) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from struct import unpack from typing import TYPE_CHECKING if TYPE_CHECKING: import io __all__ = [ "APPLEDICT_FILE_OFFSET", "guessFileOffsetLimit", "readInt", "read_2_bytes_here", "read_x_bytes_as_word", ] APPLEDICT_FILE_OFFSET = 0x40 # addressing of AppleDict binary files always ignores first 0x40 bytes def _readIntPair(buffer: io.BufferedIOBase) -> tuple[int, int]: # to satisfy mymy, put them in vars with declared type a: int b: int a, b = unpack("ii", buffer.read(8)) return a, b def readInt(buffer: io.BufferedIOBase) -> int: return unpack("i", buffer.read(4))[0] def read_x_bytes_as_word(buffer: io.BufferedIOBase, x: int) -> str: return buffer.read(x).decode("UTF-16LE") def read_2_bytes_here(buffer: io.BufferedIOBase) -> int: lower_byte = buffer.read(1) higher_byte = buffer.read(1) return ord(higher_byte) * 0x100 + ord(lower_byte) def guessFileOffsetLimit(file: io.BufferedIOBase) -> tuple[int, int]: """Returns address offset to start parsing from and EOF address.""" file.seek(APPLEDICT_FILE_OFFSET) limit = readInt(file) intPair = _readIntPair(file) if intPair == (0, -1): # 0000 0000 FFFF FFFF return 0x20, limit return 0x4, limit pyglossary-5.0.9/pyglossary/plugins/appledict_bin/appledict_properties.py000066400000000000000000000065001476751035500272440ustar00rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2023 soshial (soshial) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from dataclasses import dataclass from typing import Any __all__ = ["AppleDictProperties", "from_metadata"] @dataclass(slots=True, frozen=True) class AppleDictProperties: # in plist file: IDXDictionaryVersion # values := (1 | 2 | 3) format_version: int # in plist file: HeapDataCompressionType values := (absent | 1 | 2) body_compression_type: int # in plist file: for field with "IDXDataFieldName" equal "DCSExternalBodyID" # "IDXDataSize" value = 4 or 8 body_has_sections: bool # in plist file for key_text_metadata: # 'TrieAuxiliaryDataOptions' -> 'HeapDataCompressionType' key_text_compression_type: int # in plist file: IDXIndexDataFields / "IDXFixedDataFields" # Example: ["DCSPrivateFlag"] key_text_fixed_fields: list[str] # in plist file: IDXIndexDataFields / "IDXVariableDataFields" # Example: ["DCSKeyword", "DCSHeadword", "DCSEntryTitle", # "DCSAnchor", "DCSYomiWord"] key_text_variable_fields: list[str] # DCSDictionaryCSS, generally "DefaultStyle.css" css_name: str | None def from_metadata(metadata: dict[str, Any]) -> AppleDictProperties: format_version: int = metadata.get("IDXDictionaryVersion", -1) dictionaryIndexes: list[dict[str, Any]] | None = metadata.get( "IDXDictionaryIndexes", ) key_text_metadata: dict[str, Any] = ( dictionaryIndexes[0] if dictionaryIndexes else {} ) body_metadata: dict[str, Any] = dictionaryIndexes[2] if dictionaryIndexes else {} key_text_data_fields = key_text_metadata.get("IDXIndexDataFields", {}) key_text_variable_fields = [ field_data["IDXDataFieldName"] for field_data in key_text_data_fields.get("IDXVariableDataFields", []) ] key_text_fixed_field = [ fixed_field["IDXDataFieldName"] for fixed_field in key_text_data_fields.get("IDXFixedDataFields", []) ] external_data_fields = key_text_data_fields.get("IDXExternalDataFields") body_compression_type = body_metadata.get("HeapDataCompressionType", 0) body_has_sections = ( body_compression_type == 2 and external_data_fields[0].get("IDXDataSize") == 8 ) if ( "TrieAuxiliaryDataOptions" in key_text_metadata and "HeapDataCompressionType" in key_text_metadata["TrieAuxiliaryDataOptions"] ): key_text_compression_type = key_text_metadata["TrieAuxiliaryDataOptions"][ "HeapDataCompressionType" ] else: key_text_compression_type = 0 css_name = metadata.get("DCSDictionaryCSS") return AppleDictProperties( format_version=format_version, body_compression_type=body_compression_type, body_has_sections=body_has_sections, key_text_compression_type=key_text_compression_type, key_text_fixed_fields=key_text_fixed_field, key_text_variable_fields=key_text_variable_fields, css_name=css_name, ) pyglossary-5.0.9/pyglossary/plugins/appledict_bin/article_address.py000066400000000000000000000016471476751035500261620ustar00rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2023 soshial (soshial) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from typing import NamedTuple __all__ = ["ArticleAddress"] class ArticleAddress(NamedTuple): sectionOffset: int chunkOffset: int def __str__(self) -> str: return f"Addr[{self.sectionOffset:#x}, {self.chunkOffset:#x}]" pyglossary-5.0.9/pyglossary/plugins/appledict_bin/key_data.py000066400000000000000000000104471476751035500246110ustar00rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2023 soshial (soshial) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Sequence from typing import Any, TypeAlias __all__ = ["KeyData", "RawKeyDataType"] if TYPE_CHECKING: RawKeyDataType: TypeAlias = tuple[int, int, Sequence[str]] """tuple(priority, parentalControl, keyTextFields)""" """ KeyText.data contains: 1. morphological data (opens article "make" when user enters "making") and data that shows 2. data that encodes that searching "2 per cent", "2 percent", or "2%" returns the same article EXAMPLE: If the entry for "make" contains these definitions, the entry can be searched not only by "make" but also by "makes" or "made". On the search result list, title value texts like "made" are displayed. EXAMPLE: EXAMPLE: EXAMPLE: user entered "'s finest", search list we show "—'s finest", show article with title "fine" and point to element id = 'm_en_gbus0362750.070' """ # TODO: switch to dataclass class KeyData: """ Dictionary entries are opened by entering different search texts. This class contains texts by which entry is searchable and other properties. """ # keyword_data_id_xml = { # "DCSKeyword": "d:value", # # Search key -- if entered in search, this key will provide this definition. # "DCSHeadword": "d:title", # # Headword text that is displayed on the search result list. # # When the value is the same as d:value, it can be omitted. # # In that case, the value of the d:value is used also for the d:title. # "DCSAnchor": "d:anchor", # # Used to highlight a specific part in an entry. # # For example, it is used to highlight an idiomatic phrase explanation # # in an entry for a word. # "DCSYomiWord": "d:yomi", # # Used only in making Japanese dictionaries. # "DCSSortKey": "d:DCSSortKey", # # This value shows sorting (probably for non-english languages) # "DCSEntryTitle": "d:DCSEntryTitle", # # Headword displayed as article title # } __slots__ = [ "anchor", "entryTitle", "headword", "keyword", "parentalControl", "priority", ] def __init__( # noqa: PLR0913 self, priority: int, parentalControl: int, keyword: str, headword: str, entryTitle: str, anchor: str, ) -> None: self.priority = priority self.parentalControl = parentalControl self.keyword = keyword self.headword = headword self.entryTitle = entryTitle self.anchor = anchor def toDict(self) -> dict[str, Any]: return { "priority": self.priority, "parentalControl": self.parentalControl, "keyword": self.keyword, "headword": self.headword, "entryTitle": self.entryTitle, "anchor": self.anchor, } @staticmethod def fromRaw(rawKeyData: RawKeyDataType, keyTextFieldOrder: list[str]) -> KeyData: priority, parentalControl, keyTextFields = rawKeyData keyword = "" headword = "" entryTitle = "" anchor = "" for i, key_value in enumerate(keyTextFields): key_type = keyTextFieldOrder[i] if key_type == "DCSKeyword": keyword = key_value elif key_type == "DCSHeadword": headword = key_value elif key_type == "DCSEntryTitle": entryTitle = key_value elif key_type == "DCSAnchor": anchor = key_value return KeyData( priority, parentalControl, keyword, headword, entryTitle, anchor, ) pyglossary-5.0.9/pyglossary/plugins/appledict_bin/reader.py000066400000000000000000000475421476751035500243000ustar00rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2019 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import os import re from datetime import datetime from io import BytesIO from operator import attrgetter from os.path import isdir, isfile, join, split, splitext from struct import unpack from typing import ( TYPE_CHECKING, Any, cast, ) from lxml import etree from .appledict_file_tools import ( APPLEDICT_FILE_OFFSET, guessFileOffsetLimit, read_2_bytes_here, read_x_bytes_as_word, readInt, ) from .appledict_properties import from_metadata from .article_address import ArticleAddress from .key_data import KeyData if TYPE_CHECKING: import io from collections.abc import Iterator from lxml.html import ( # type: ignore HtmlComment, HtmlElement, HtmlEntity, HtmlProcessingInstruction, ) from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.lxml_types import Element from .appledict_properties import AppleDictProperties from .key_data import RawKeyDataType from zlib import decompress from pyglossary import core from pyglossary.apple_utils import substituteAppleCSS from pyglossary.core import exc_note, log, pip from pyglossary.io_utils import nullBinaryIO __all__ = ["Reader"] class Reader: useByteProgress = True depends = { "lxml": "lxml", "biplist": "biplist", } _html: bool = True _html_full: bool = True resNoExt = { ".data", ".index", ".plist", ".xsl", ".html", ".strings", } def __init__(self, glos: ReaderGlossaryType) -> None: self._glos: ReaderGlossaryType = glos self._dictDirPath = "" self._contentsPath = "" self._file: io.BufferedIOBase = nullBinaryIO self._encoding = "utf-8" self._defiFormat = "m" self._re_xmlns = re.compile(' xmlns:d="[^"<>]+"') self._titleById: dict[str, str] = {} self._wordCount = 0 self._keyTextData: dict[ArticleAddress, list[RawKeyDataType]] = {} self._cssName = "" @staticmethod def tostring( elem: ( Element | HtmlComment | HtmlElement | HtmlEntity | HtmlProcessingInstruction ), ) -> str: from lxml.html import tostring return tostring( cast("HtmlElement", elem), encoding="utf-8", method="html", ).decode("utf-8") def fixLink(self, a: Element) -> Element: href = a.attrib.get("href", "") if href.startswith("x-dictionary:d:"): word = href[len("x-dictionary:d:") :] a.attrib["href"] = href = f"bword://{word}" elif href.startswith("x-dictionary:r:"): # https://github.com/ilius/pyglossary/issues/343 id_i = len("x-dictionary:r:") id_j = href.find(":", id_i) id_ = href[id_i:id_j] title = self._titleById.get(id_) if title: a.attrib["href"] = href = f"bword://{title}" else: title = a.attrib.get("title") if title: a.attrib["href"] = href = f"bword://{title}" elif href.startswith(("http://", "https://")): pass else: a.attrib["href"] = f"bword://{href}" return a # TODO: PLR0912 Too many branches (17 > 12) # TODO: PLR0915 Too many statements (60 > 50) def open(self, filename: str) -> Iterator[tuple[int, int]]: # noqa: PLR0912, PLR0915 from os.path import dirname try: from lxml import etree # noqa: F401 except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install lxml` to install") raise try: import biplist # type: ignore # noqa: F401 except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install biplist` to install") raise self._defiFormat = "h" if self._html else "m" dictDirPath: str contentsPath: str infoPlistPath: str bodyDataPath: str keyTextDataPath: str if isdir(filename): if split(filename)[-1] == "Contents": contentsPath = filename dictDirPath = dirname(filename) elif isdir(join(filename, "Contents")): contentsPath = join(filename, "Contents") dictDirPath = filename else: raise OSError(f"invalid directory {filename}") elif split(filename)[-1] == "Body.data": # Maybe we should remove this support in a future release parentPath = dirname(filename) parentName = split(parentPath)[-1] if parentName == "Contents": contentsPath = parentPath elif parentName == "Resources": contentsPath = dirname(parentPath) else: raise OSError(f"invalid file path {filename}") dictDirPath = dirname(contentsPath) else: raise OSError(f"invalid file path {filename}") if not isdir(contentsPath): raise OSError( f"{contentsPath} is not a folder, " "Please provide 'Contents/' folder of the dictionary", ) infoPlistPath = join(contentsPath, "Info.plist") if isfile(join(contentsPath, "Body.data")): bodyDataPath = join(contentsPath, "Body.data") keyTextDataPath = join(contentsPath, "KeyText.data") elif isfile(join(contentsPath, "Resources/Body.data")): bodyDataPath = join(contentsPath, "Resources/Body.data") keyTextDataPath = join(contentsPath, "Resources/KeyText.data") else: raise OSError( "could not find Body.data file, " "Please provide 'Contents/' folder of the dictionary", ) metadata = self.parseMetadata(infoPlistPath) self.setMetadata(metadata) yield from self.setKeyTextData( keyTextDataPath, self._properties, ) self._dictDirPath = dictDirPath self._contentsPath = contentsPath self._file = open(bodyDataPath, "rb") _, self._limit = guessFileOffsetLimit(self._file) t0 = datetime.now() self.readEntryIds() dt = datetime.now() - t0 log.info( f"Reading entry IDs took {int(dt.total_seconds() * 1000)} ms, " f"number of entries: {self._wordCount}", ) @staticmethod def parseMetadata(infoPlistPath: str) -> dict[str, Any]: import biplist if not isfile(infoPlistPath): raise OSError( "Could not find 'Info.plist' file, " "Please provide 'Contents/' folder of the dictionary", ) metadata: dict[str, Any] try: metadata = biplist.readPlist(infoPlistPath) except (biplist.InvalidPlistException, biplist.NotBinaryPlistException): try: import plistlib with open(infoPlistPath, "rb") as plist_file: metadata = plistlib.loads(plist_file.read()) except Exception as e: raise OSError( "'Info.plist' file is malformed, " f"Please provide 'Contents/' with a correct 'Info.plist'. {e}", ) from e return metadata def setMetadata(self, metadata: dict[str, Any]) -> None: name = metadata.get("CFBundleDisplayName") if not name: name = metadata.get("CFBundleIdentifier") if name: self._glos.setInfo("name", name) identifier = metadata.get("CFBundleIdentifier") if identifier and identifier != name: self._glos.setInfo("CFBundleIdentifier", identifier) copyright_ = metadata.get("DCSDictionaryCopyright") if copyright_: self._glos.setInfo("copyright", copyright_) author = metadata.get("DCSDictionaryManufacturerName") if author: self._glos.setInfo("author", author) edition = metadata.get("CFBundleInfoDictionaryVersion") if edition: self._glos.setInfo("edition", edition) if "DCSDictionaryLanguages" in metadata: self.setLangs(metadata) self._properties = from_metadata(metadata) self._cssName = self._properties.css_name or "DefaultStyle.css" def setLangs(self, metadata: dict[str, Any]) -> None: import locale langsList = metadata.get("DCSDictionaryLanguages") if not langsList: return langs = langsList[0] sourceLocale = langs["DCSDictionaryDescriptionLanguage"] self._glos.sourceLangName = locale.normalize(sourceLocale).split("_")[0] targetLocale = langs["DCSDictionaryIndexLanguage"] self._glos.targetLangName = locale.normalize(targetLocale).split("_")[0] def __len__(self) -> int: return self._wordCount def close(self) -> None: self._file.close() self._file = nullBinaryIO def _getDefi( self, entryElem: Element, ) -> str: if not self._html: # FIXME: this produces duplicate text for Idioms.dictionary, see #301 return "".join( self.tostring(child) for child in entryElem.iterdescendants() ) entryElem.tag = "div" for attr in list(entryElem.attrib): # if attr == "id" or attr.endswith("title"): del entryElem.attrib[attr] for a_link in entryElem.xpath("//a"): self.fixLink(a_link) defi = self.tostring(entryElem) defi = self._re_xmlns.sub("", defi) if self._html_full: defi = ( "" '' f"{defi}" ) return defi @staticmethod def getChunkLenOffset( pos: int, buffer: bytes, ) -> tuple[int, int]: """ @return chunk byte length and offset. offset is usually 4 bytes integer, that contains chunk/entry byte length """ offset = buffer[pos : pos + 12].find(b"") if offset == 0: # when no such info (offset equals 0) provided, # we take all bytes till the closing tag or till section end endI = buffer[pos:].find(b"\n") chunkLen = len(buffer) - pos if endI == -1 else endI + 11 else: bs = buffer[pos : pos + offset] if offset < 4: bs = b"\x00" * (4 - offset) + bs try: (chunkLen,) = unpack("i", bs) except Exception as e: log.error(f"{buffer[pos : pos + 100]!r}") raise e from None return chunkLen, offset def createEntry( self, entryBytes: bytes, articleAddress: ArticleAddress, ) -> EntryType | None: # 1. create and validate XML of the entry's body entryRoot = self.convertEntryBytesToXml(entryBytes) if entryRoot is None: return None namespaces: dict[str, str] = { key: value for key, value in entryRoot.nsmap.items() if key and value } entryElems = entryRoot.xpath("/d:entry", namespaces=namespaces) if not entryElems: return None word = entryElems[0].xpath("./@d:title", namespaces=namespaces)[0] # 2. add alts keyTextFieldOrder = self._properties.key_text_variable_fields words = [word] keyDataList: list[KeyData] = [ KeyData.fromRaw(rawKeyData, keyTextFieldOrder) for rawKeyData in self._keyTextData.get(articleAddress, []) ] if keyDataList: keyDataList.sort( key=attrgetter("priority"), reverse=True, ) words += [keyData.keyword for keyData in keyDataList] defi = self._getDefi(entryElems[0]) return self._glos.newEntry( word=words, defi=defi, defiFormat=self._defiFormat, byteProgress=(self._absPos, self._limit), ) def convertEntryBytesToXml( self, entryBytes: bytes, ) -> Element | None: if not entryBytes.strip(): return None try: entryRoot = etree.fromstring(entryBytes) except etree.XMLSyntaxError as e: log.error( f"{entryBytes=}", ) raise e from None if self._limit <= 0: raise ValueError(f"self._limit = {self._limit}") return entryRoot def readEntryIds(self) -> None: titleById: dict[str, str] = {} for entryBytesTmp, _ in self.yieldEntryBytes( self._file, self._properties, ): entryBytes = entryBytesTmp.strip() if not entryBytes: continue id_i = entryBytes.find(b'id="') if id_i < 0: log.error(f"id not found: {entryBytes!r}") continue id_j = entryBytes.find(b'"', id_i + 4) if id_j < 0: log.error(f"id closing not found: {entryBytes.decode(self._encoding)}") continue id_ = entryBytes[id_i + 4 : id_j].decode(self._encoding) title_i = entryBytes.find(b'd:title="') if title_i < 0: log.error(f"title not found: {entryBytes.decode(self._encoding)}") continue title_j = entryBytes.find(b'"', title_i + 9) if title_j < 0: log.error( f"title closing not found: {entryBytes.decode(self._encoding)}", ) continue titleById[id_] = entryBytes[title_i + 9 : title_j].decode(self._encoding) self._titleById = titleById self._wordCount = len(titleById) def setKeyTextData( self, morphoFilePath: str, properties: AppleDictProperties, ) -> Iterator[tuple[int, int]]: """ Prepare `KeyText.data` file for extracting morphological data. Returns an iterator/generator for the progress Sets self._keyTextData when done """ with open(morphoFilePath, "rb") as keyTextFile: fileDataOffset, fileLimit = guessFileOffsetLimit(keyTextFile) buff = BytesIO() if properties.key_text_compression_type > 0: keyTextFile.seek(fileDataOffset + APPLEDICT_FILE_OFFSET) sectionLength = readInt(keyTextFile) sectionOffset = keyTextFile.tell() fileLimitDecompressed = 0 while keyTextFile.tell() < fileLimit + APPLEDICT_FILE_OFFSET: compressedSectionByteLen = readInt(keyTextFile) decompressedSectionByteLen = readInt(keyTextFile) if compressedSectionByteLen == decompressedSectionByteLen == 0: break chunk_section_compressed = keyTextFile.read( compressedSectionByteLen - 4, ) chunksection_bytes = decompress(chunk_section_compressed) buff.write(chunksection_bytes) fileLimitDecompressed += decompressedSectionByteLen sectionOffset += max(sectionLength, compressedSectionByteLen + 4) keyTextFile.seek(sectionOffset) bufferOffset = 0 bufferLimit = fileLimitDecompressed else: keyTextFile.seek(APPLEDICT_FILE_OFFSET) buff.write(keyTextFile.read()) bufferOffset = fileDataOffset bufferLimit = fileLimit yield from self.readKeyTextData( buff=buff, bufferOffset=bufferOffset, bufferLimit=bufferLimit, properties=properties, ) # TODO: PLR0912 Too many branches (16 > 12) # TODO: PLR0915 Too many statements (56 > 50) def readKeyTextData( # noqa: PLR0912, PLR0915 self, buff: io.BufferedIOBase, bufferOffset: int, bufferLimit: int, properties: AppleDictProperties, ) -> Iterator[tuple[int, int]]: """ Returns an iterator/generator for the progress Sets self._keyTextData when done. """ buff.seek(bufferOffset) keyTextData: dict[ArticleAddress, list[RawKeyDataType]] = {} while bufferOffset < bufferLimit: yield (bufferOffset, bufferLimit) buff.seek(bufferOffset) next_section_jump = readInt(buff) if properties.key_text_compression_type == 0: big_len = readInt(buff) # noqa: F841 # number of lexemes wordFormCount = read_2_bytes_here(buff) # 0x01 next_lexeme_offset: int = 0 for _ in range(wordFormCount): _ = read_2_bytes_here(buff) # 0x00 # TODO might be 1 or 2 or more zeros if next_lexeme_offset != 0: buff.seek(next_lexeme_offset) small_len = 0 while small_len == 0: small_len = read_2_bytes_here(buff) # 0x2c curr_offset = buff.tell() next_lexeme_offset = curr_offset + small_len # the resulting number must match with Contents/Body.data # address of the entry articleAddress: ArticleAddress if properties.body_has_sections: chunkOffset = readInt(buff) sectionOffset = readInt(buff) articleAddress = ArticleAddress( sectionOffset=sectionOffset, chunkOffset=chunkOffset, ) else: chunkOffset = 0x0 sectionOffset = readInt(buff) articleAddress = ArticleAddress( sectionOffset=sectionOffset, chunkOffset=chunkOffset, ) if not properties.key_text_fixed_fields: priority = 0 parentalControl = 0 elif len(properties.key_text_fixed_fields) == 1: priorityAndParentalControl = read_2_bytes_here(buff) # 0x13 # "DCSDictionaryLanguages" array inside plist file has a list of # dictionaries inside this file # This DCSPrivateFlag per each article provides not only priority # and parental control, but also a flag of translation direction: # 0x0-0x1f values are reserved for the first language from the # DCSDictionaryLanguages array 0x20-0x3f values are reserved for # the second language etc. if priorityAndParentalControl >= 0x40: log.error( "WRONG priority or parental control:" f"{priorityAndParentalControl} (section: {bufferOffset:#x})" ", skipping KeyText.data file", ) return if priorityAndParentalControl >= 0x20: priorityAndParentalControl -= 0x20 # d:parental-control="1" parentalControl = priorityAndParentalControl % 2 # d:priority=".." between 0x00..0x12, priority = [0..9] priority = (priorityAndParentalControl - parentalControl) // 2 else: log.error( f"Unknown private field: {properties.key_text_fixed_fields}", ) return keyTextFields: list[str] = [] while buff.tell() < next_lexeme_offset: word_form_len = read_2_bytes_here(buff) if word_form_len == 0: keyTextFields.append("") continue word_form = read_x_bytes_as_word(buff, word_form_len) keyTextFields.append(word_form) entryKeyTextData: RawKeyDataType = ( priority, parentalControl, tuple(keyTextFields), ) if articleAddress in keyTextData: keyTextData[articleAddress].append(entryKeyTextData) else: keyTextData[articleAddress] = [entryKeyTextData] bufferOffset += next_section_jump + 4 self._keyTextData = keyTextData def readResFile(self, fname: str, fpath: str, ext: str) -> EntryType: with open(fpath, "rb") as _file: data = _file.read() if ext == ".css": log.debug(f"substituting apple css: {fname}: {fpath}") data = substituteAppleCSS(data) return self._glos.newDataEntry(fname, data) def fixResFilename(self, fname: str, relPath: str) -> str: if fname == self._cssName: fname = "style.css" if relPath: fname = relPath + "/" + fname if os.path == "\\": fname = fname.replace("\\", "/") return fname def readResDir( self, dirPath: str, recurse: bool = False, relPath: str = "", ) -> Iterator[EntryType]: if not isdir(dirPath): return resNoExt = self.resNoExt for fname in os.listdir(dirPath): if fname == "Resources": continue _, ext = splitext(fname) if ext in resNoExt: continue fpath = join(dirPath, fname) if isdir(fpath): if recurse: yield from self.readResDir( fpath, recurse=True, relPath=join(relPath, fname), ) continue if not isfile(fpath): continue fname2 = self.fixResFilename(fname, relPath) core.trace(log, f"Using resource {fpath!r} as {fname2!r}") yield self.readResFile(fname2, fpath, ext) def __iter__(self) -> Iterator[EntryType]: yield from self.readResDir( self._contentsPath, recurse=True, ) yield from self.readResDir( join(self._contentsPath, "Resources"), recurse=True, ) for entryBytes, articleAddress in self.yieldEntryBytes( self._file, self._properties, ): entry = self.createEntry(entryBytes, articleAddress) if entry is not None: yield entry def yieldEntryBytes( self, body_file: io.BufferedIOBase, properties: AppleDictProperties, ) -> Iterator[tuple[bytes, ArticleAddress]]: fileDataOffset, fileLimit = guessFileOffsetLimit(body_file) sectionOffset = fileDataOffset while sectionOffset < fileLimit: body_file.seek(sectionOffset + APPLEDICT_FILE_OFFSET) self._absPos = body_file.tell() # at the start of each section byte lengths of the section are encoded next_section_jump = readInt(body_file) data_byte_len = readInt(body_file) if properties.body_compression_type > 0: decompressed_byte_len = readInt(body_file) # noqa: F841 decompressed_bytes = body_file.read(data_byte_len - 4) buffer = decompress(decompressed_bytes) else: buffer = body_file.read(data_byte_len) pos = 0 while pos < len(buffer): chunkLen, offset = self.getChunkLenOffset(pos, buffer) articleAddress = ArticleAddress(sectionOffset, pos) pos += offset entryBytes = buffer[pos : pos + chunkLen] pos += chunkLen yield entryBytes, articleAddress sectionOffset += next_section_jump + 4 pyglossary-5.0.9/pyglossary/plugins/appledict_bin/tools.toml000066400000000000000000000002101476751035500244760ustar00rootroot00000000000000["Apple Dictionary"] web = "https://support.apple.com/en-gu/guide/dictionary/welcome/mac" platforms = [ "Mac",] license = "Proprietary" pyglossary-5.0.9/pyglossary/plugins/ayandict_sqlite/000077500000000000000000000000001476751035500230305ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/ayandict_sqlite/__init__.py000066400000000000000000000012701476751035500251410ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import BoolOption, Option from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "ayandict_sqlite" name = "AyanDictSQLite" description = "AyanDict SQLite" extensions = () extensionCreate = ".db" singleFile = True kind = "binary" wiki = "" website = ( "https://github.com/ilius/ayandict", "ilius/ayandict", ) optionsProp: dict[str, Option] = { "fuzzy": BoolOption( comment="Create fuzzy search data", ), } pyglossary-5.0.9/pyglossary/plugins/ayandict_sqlite/reader.py000066400000000000000000000032041476751035500246430ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import ( TYPE_CHECKING, ) if TYPE_CHECKING: import sqlite3 from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._clear() def _clear(self) -> None: self._filename = "" self._con: sqlite3.Connection | None = None self._cur: sqlite3.Cursor | None = None def open(self, filename: str) -> None: from sqlite3 import connect self._filename = filename self._con = connect(filename) self._cur = self._con.cursor() self._glos.setDefaultDefiFormat("h") self._cur.execute("SELECT key, value FROM meta;") for row in self._cur.fetchall(): if row[0] == "hash": continue self._glos.setInfo(row[0], row[1]) def __len__(self) -> int: if self._cur is None: raise ValueError("cur is None") self._cur.execute("select count(id) from entry") return self._cur.fetchone()[0] def __iter__(self) -> Iterator[EntryType]: from json import loads if self._cur is None: raise ValueError("cur is None") self._cur.execute( "SELECT entry.term, entry.article, " "json_group_array(alt.term)" "FROM entry LEFT JOIN alt ON entry.id=alt.id " "GROUP BY entry.id;", ) for row in self._cur.fetchall(): terms = [row[0]] + [alt for alt in loads(row[2]) if alt] article = row[1] yield self._glos.newEntry(terms, article, defiFormat="h") def close(self) -> None: if self._cur: self._cur.close() if self._con: self._con.close() self._clear() pyglossary-5.0.9/pyglossary/plugins/ayandict_sqlite/tools.toml000066400000000000000000000002511476751035500250630ustar00rootroot00000000000000[AyanDict] web = "https://github.com/ilius/ayandict" source = "https://github.com/ilius/ayandict" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" plang = "Go" pyglossary-5.0.9/pyglossary/plugins/ayandict_sqlite/writer.py000066400000000000000000000073471476751035500247310ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.core import log if TYPE_CHECKING: import sqlite3 from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType from pyglossary.xdxf.transform import XdxfTransformer __all__ = ["Writer"] class Writer: _fuzzy: int = True def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._clear() def _clear(self) -> None: self._filename = "" self._con: sqlite3.Connection | None = None self._cur: sqlite3.Cursor | None = None self._xdxfTr: XdxfTransformer | None = None def open(self, filename: str) -> None: from sqlite3 import connect self._filename = filename con = self._con = connect(filename) self._cur = self._con.cursor() for query in ( "CREATE TABLE meta ('key' TEXT PRIMARY KEY NOT NULL, 'value' TEXT);", ( "CREATE TABLE entry ('id' INTEGER PRIMARY KEY NOT NULL, " "'term' TEXT, 'article' TEXT);" ), "CREATE TABLE alt ('id' INTEGER NOT NULL, 'term' TEXT);", "CREATE INDEX idx_meta ON meta(key);", "CREATE INDEX idx_entry_term ON entry(term COLLATE NOCASE);", "CREATE INDEX idx_alt_id ON alt(id);", "CREATE INDEX idx_alt_term ON alt(term COLLATE NOCASE);", ): try: con.execute(query) except Exception as e: # noqa: PERF203 log.error(f"query: {query}") raise e for key, value in self._glos.iterInfo(): con.execute( "INSERT INTO meta (key, value) VALUES (?, ?);", (key, value), ) if self._fuzzy: con.execute( "CREATE TABLE fuzzy3 ('sub' TEXT NOT NULL, " "'term' TEXT NOT NULL, " "id INTEGER NOT NULL);", ) con.execute( "CREATE INDEX idx_fuzzy3_sub ON fuzzy3(sub COLLATE NOCASE);", ) con.commit() def finish(self) -> None: if self._con is None or self._cur is None: return self._con.commit() self._con.close() self._con = None self._cur = None def xdxf_setup(self) -> None: from pyglossary.xdxf.transform import XdxfTransformer # if self._xsl: # self._xdxfTr = XslXdxfTransformer(encoding="utf-8") # return self._xdxfTr = XdxfTransformer(encoding="utf-8") def xdxf_transform(self, text: str) -> str: if self._xdxfTr is None: self.xdxf_setup() return self._xdxfTr.transformByInnerString(text) # type: ignore def write(self) -> Generator[None, EntryType, None]: import hashlib cur = self._cur if cur is None: raise ValueError("cur is None") hash_ = hashlib.md5() while True: entry = yield if entry is None: break if entry.isData(): # can save it with entry.save(directory) continue defi = entry.defi entry.detectDefiFormat() if entry.defiFormat == "m": if "\n" in defi: defi = f"
    {defi}
    " elif entry.defiFormat == "x": defi = self.xdxf_transform(defi) cur.execute( "INSERT INTO entry(term, article) VALUES (?, ?);", (entry.l_word[0], defi), ) id_ = cur.lastrowid if id_ is None: raise ValueError("lastrowid is None") for alt in entry.l_word[1:]: cur.execute( "INSERT INTO alt(id, term) VALUES (?, ?);", (id_, alt), ) hash_.update(entry.s_word.encode("utf-8")) if self._fuzzy: self.addFuzzy(id_, entry.l_word) cur.execute( "INSERT INTO meta (key, value) VALUES (?, ?);", ("hash", hash_.hexdigest()), ) def addFuzzy(self, id_: int, terms: list[str]) -> None: cur = self._cur if cur is None: raise ValueError("cur is None") for term in terms: subs: set[str] = set() for word in term.split(" "): eword = "\n" + word subs.update(eword[i : i + 3] for i in range(len(eword) - 2)) for sub in subs: cur.execute( "INSERT INTO fuzzy3(sub, term, id) VALUES (?, ?, ?);", (sub, term, id_), ) pyglossary-5.0.9/pyglossary/plugins/babylon_bdc/000077500000000000000000000000001476751035500221115ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/babylon_bdc/__init__.py000066400000000000000000000010271476751035500242220ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from pyglossary.option import Option __all__ = [ "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = False lname = "babylon_bdc" name = "BabylonBdc" description = "Babylon (bdc)" extensions = (".bdc",) extensionCreate = "" singleFile = True kind = "binary" wiki = "" website = None optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/000077500000000000000000000000001476751035500221255ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/__init__.py000066400000000000000000000024531476751035500242420ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2008-2021 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from .reader import Reader, optionsProp __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "babylon_bgl" name = "BabylonBgl" description = "Babylon (.BGL)" extensions = (".bgl",) extensionCreate = "" singleFile = True kind = "binary" wiki = "" website = None # progressbar = DEFAULT_YES # FIXME: document type of read/write options # (that would be specified in command line) pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/bgl_charset.py000066400000000000000000000024571476751035500247640ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2008-2020 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . __all__ = ["charsetByCode"] charsetByCode = { 0x41: "cp1252", # Default, 0x41 0x42: "cp1252", # Latin, 0x42 0x43: "cp1250", # Eastern European, 0x43 0x44: "cp1251", # Cyrillic, 0x44 0x45: "cp932", # Japanese, 0x45 0x46: "cp950", # Traditional Chinese, 0x46 0x47: "cp936", # Simplified Chinese, 0x47 0x48: "cp1257", # Baltic, 0x48 0x49: "cp1253", # Greek, 0x49 0x4A: "cp949", # Korean, 0x4A 0x4B: "cp1254", # Turkish, 0x4B 0x4C: "cp1255", # Hebrew, 0x4C 0x4D: "cp1256", # Arabic, 0x4D 0x4E: "cp874", # Thai, 0x4E } pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/bgl_gzip.py000066400000000000000000000423331476751035500243010ustar00rootroot00000000000000""" Functions that read and write gzipped files. The user of the file doesn't have to worry about the compression, but random access is not allowed. """ # based on Andrew Kuchling's minigzip.py distributed with the zlib module import _compression import builtins import io import logging import os import struct import time import zlib __all__ = ["BadGzipFile", "GzipFile"] log = logging.getLogger("root") _FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 READ, WRITE = 1, 2 _COMPRESS_LEVEL_FAST = 1 _COMPRESS_LEVEL_TRADEOFF = 6 _COMPRESS_LEVEL_BEST = 9 def write32u(output, value): # The L format writes the bit pattern correctly whether signed # or unsigned. output.write(struct.pack("" def _init_write(self, filename): self.name = filename self.crc = zlib.crc32(b"") self.size = 0 self.offset = 0 # Current file offset for seek(), tell(), etc def _write_gzip_header(self, compresslevel): self.fileobj.write(b"\037\213") # magic header self.fileobj.write(b"\010") # compression method try: # RFC 1952 requires the FNAME field to be Latin-1. Do not # include filenames that cannot be represented that way. fname = os.path.basename(self.name) if not isinstance(fname, bytes): fname = fname.encode("latin-1") if fname.endswith(b".gz"): fname = fname[:-3] except UnicodeEncodeError: fname = b"" flags = 0 if fname: flags = FNAME self.fileobj.write(chr(flags).encode("latin-1")) mtime = self._write_mtime if mtime is None: mtime = time.time() write32u(self.fileobj, int(mtime)) if compresslevel == _COMPRESS_LEVEL_BEST: xfl = b"\002" elif compresslevel == _COMPRESS_LEVEL_FAST: xfl = b"\004" else: xfl = b"\000" self.fileobj.write(xfl) self.fileobj.write(b"\377") if fname: self.fileobj.write(fname + b"\000") def write(self, data): self._check_not_closed() if self.mode != WRITE: import errno raise OSError(errno.EBADF, "write() on read-only GzipFile object") if self.fileobj is None: raise ValueError("write() on closed GzipFile object") if isinstance(data, (bytes, bytearray)): length = len(data) else: # accept any data that supports the buffer protocol data = memoryview(data) length = data.nbytes if length > 0: self.fileobj.write(self.compress.compress(data)) self.size += length self.crc = zlib.crc32(data, self.crc) self.offset += length return length def read(self, size=-1): self._check_not_closed() if self.mode != READ: import errno raise OSError(errno.EBADF, "read() on write-only GzipFile object") return self._buffer.read(size) def read1(self, size=-1): """ Implements BufferedIOBase.read1(). Reads up to a buffer's worth of data if size is negative. """ self._check_not_closed() if self.mode != READ: import errno raise OSError(errno.EBADF, "read1() on write-only GzipFile object") if size < 0: size = io.DEFAULT_BUFFER_SIZE return self._buffer.read1(size) def peek(self, n): self._check_not_closed() if self.mode != READ: import errno raise OSError(errno.EBADF, "peek() on write-only GzipFile object") return self._buffer.peek(n) @property def closed(self): return self.fileobj is None def close(self): fileobj = self.fileobj if fileobj is None: return self.fileobj = None try: if self.mode == WRITE: fileobj.write(self.compress.flush()) write32u(fileobj, self.crc) # self.size may exceed 2 GiB, or even 4 GiB write32u(fileobj, self.size & 0xffffffff) elif self.mode == READ: self._buffer.close() finally: myfileobj = self.myfileobj if myfileobj: self.myfileobj = None myfileobj.close() def flush(self, zlib_mode=zlib.Z_SYNC_FLUSH): self._check_not_closed() if self.mode == WRITE: # Ensure the compressor's buffer is flushed self.fileobj.write(self.compress.flush(zlib_mode)) self.fileobj.flush() def fileno(self): """ Invoke the underlying file object's fileno() method. This will raise AttributeError if the underlying file object doesn't support fileno(). """ return self.fileobj.fileno() def rewind(self): """ Return the uncompressed stream file position indicator to the beginning of the file. """ if self.mode != READ: raise OSError("Can't rewind in write mode") self._buffer.seek(0) def readable(self): return self.mode == READ def writable(self): return self.mode == WRITE def seekable(self): return True def seek(self, offset, whence=io.SEEK_SET): if self.mode == WRITE: if whence != io.SEEK_SET: if whence == io.SEEK_CUR: offset = self.offset + offset else: raise ValueError("Seek from end not supported") if offset < self.offset: raise OSError("Negative seek in write mode") count = offset - self.offset chunk = b"\0" * 1024 for _ in range(count // 1024): self.write(chunk) self.write(b"\0" * (count % 1024)) elif self.mode == READ: self._check_not_closed() return self._buffer.seek(offset, whence) return self.offset def readline(self, size=-1): self._check_not_closed() return self._buffer.readline(size) def _read_exact(fp, n): """ Read exactly *n* bytes from `fp`. This method is required because fp may be unbuffered, i.e. return short reads. """ data = fp.read(n) while len(data) < n: b = fp.read(n - len(data)) if not b: raise EOFError("Compressed file ended before the " "end-of-stream marker was reached") data += b return data def _read_gzip_header(fp): """ Read a gzip header from `fp` and progress to the end of the header. Returns last mtime if header was present or None otherwise. """ magic = fp.read(2) if magic == b"": return None if magic != b"\037\213": raise BadGzipFile(f"Not a gzipped file ({magic!r})") (method, flag, last_mtime) = struct.unpack(" (ilius) # Copyright © 2011-2012 kubtek # This file is part of PyGlossary project, http://github.com/ilius/pyglossary # Thanks to Raul Fernandes and Karl Grill for reverse # engineering as part of https://sourceforge.net/projects/ktranslator/ # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations from dataclasses import dataclass from typing import TYPE_CHECKING, Any from pyglossary import gregorian from pyglossary.core import log from pyglossary.text_utils import ( uintFromBytes, ) from .bgl_charset import charsetByCode from .bgl_language import BabylonLanguage, languageByCode if TYPE_CHECKING: from collections.abc import Callable __all__ = ["charsetInfoDecode", "infoType3ByCode"] @dataclass(slots=True) class InfoItem: name: str decode: Callable[[bytes], Any] | None = None attr: bool = False def decodeBglBinTime(b_value: bytes) -> str: jd1970 = gregorian.to_jd(1970, 1, 1) djd, hm = divmod(uintFromBytes(b_value), 24 * 60) year, month, day = gregorian.jd_to(djd + jd1970) hour, minute = divmod(hm, 60) return f"{year:04d}/{month:02d}/{day:02d}, {hour:02d}:{minute:02d}" def languageInfoDecode(b_value: bytes) -> BabylonLanguage | None: """Returns BabylonLanguage instance.""" intValue = uintFromBytes(b_value) try: return languageByCode[intValue] except IndexError: log.warning(f"read_type_3: unknown language code = {intValue}") return None def charsetInfoDecode(b_value: bytes) -> str | None: value = b_value[0] try: return charsetByCode[value] except KeyError: log.warning(f"read_type_3: unknown charset {value!r}") return None def aboutInfoDecode(b_value: bytes) -> dict[str, any]: if not b_value: return None b_aboutExt, _, aboutContents = b_value.partition(b"\x00") if not b_aboutExt: log.warning("read_type_3: about: no file extension") return None try: aboutExt = b_aboutExt.decode("ascii") except UnicodeDecodeError as e: log.error(f"{b_aboutExt=}: {e}") aboutExt = "" return { "about_extension": aboutExt, "about": aboutContents, } def utf16InfoDecode(b_value: bytes) -> str | None: r""" Decode info values from UTF-16. Return str, or None (on errors). block type = 3 block format: <2 byte code1><2 byte code2> if code2 == 0: then the block ends if code2 == 1: then the block continues as follows: <4 byte len1> \x00 \x00 len1 - length of message in 2-byte chars """ if b_value[0] != 0: log.warning( f"utf16InfoDecode: b_value={b_value}, null expected at 0", ) return None if b_value[1] == 0: if len(b_value) > 2: log.warning( f"utf16InfoDecode: unexpected b_value size: {len(b_value)}", ) return None if b_value[1] > 1: log.warning( f"utf16InfoDecode: b_value={b_value!r}, unexpected byte at 1", ) return None # now b_value[1] == 1 size = 2 * uintFromBytes(b_value[2:6]) if tuple(b_value[6:8]) != (0, 0): log.warning( f"utf16InfoDecode: b_value={b_value!r}, null expected at 6:8", ) if size != len(b_value) - 8: log.warning( f"utf16InfoDecode: b_value={b_value!r}, size does not match", ) return b_value[8:].decode("utf16") # str def flagsInfoDecode(b_value: bytes) -> dict[str, bool]: """ Returns a dict with these keys: utf8Encoding when this flag is set utf8 encoding is used for all articles when false, the encoding is set according to the source and target alphabet bgl_spellingAlternatives determines whether the glossary offers spelling alternatives for searched terms bgl_caseSensitive defines if the search for terms in this glossary is case sensitive see code 0x20 as well. """ flags = uintFromBytes(b_value) return { "utf8Encoding": (flags & 0x8000 != 0), "bgl_spellingAlternatives": (flags & 0x10000 == 0), "bgl_caseSensitive": (flags & 0x1000 != 0), } infoType3ByCode = { # glossary name 0x01: InfoItem("title"), # glossary author name, a list of "|"-separated values 0x02: InfoItem("author"), # glossary author e-mail 0x03: InfoItem("email"), 0x04: InfoItem("copyright"), 0x07: InfoItem( "sourceLang", decode=languageInfoDecode, attr=True, ), 0x08: InfoItem( "targetLang", decode=languageInfoDecode, attr=True, ), 0x09: InfoItem("description"), # 0: browsing disabled, 1: browsing enabled 0x0A: InfoItem( "bgl_browsingEnabled", decode=lambda b_value: (b_value[0] != 0), ), 0x0B: InfoItem("icon1.ico"), 0x0C: InfoItem( "bgl_numEntries", decode=uintFromBytes, attr=True, ), # the value is a dict 0x11: InfoItem("flags", decode=flagsInfoDecode), 0x14: InfoItem("creationTime", decode=decodeBglBinTime), 0x1A: InfoItem( "sourceCharset", decode=charsetInfoDecode, attr=True, ), 0x1B: InfoItem( "targetCharset", decode=charsetInfoDecode, attr=True, ), 0x1C: InfoItem( "bgl_firstUpdated", decode=decodeBglBinTime, ), # bgl_firstUpdated was previously called middleUpdated # in rare cases, bgl_firstUpdated is before creationTime # but usually it looks like to be the first update (after creation) # in some cases, it's the same as lastUpdated # in some cases, it's minutes after creationTime # bgl_firstUpdated exists in more glossaries than lastUpdated # so if lastUpdated is not there, we use bgl_firstUpdated as lastUpdated 0x20: InfoItem( "bgl_caseSensitive2", decode=lambda b_value: (b_value[0] == 0x31), # 0x30 - case sensitive search is disabled # 0x31 - case sensitive search is enabled ), 0x24: InfoItem("icon2.ico"), 0x2C: InfoItem( "bgl_purchaseLicenseMsg", decode=utf16InfoDecode, ), 0x2D: InfoItem( "bgl_licenseExpiredMsg", decode=utf16InfoDecode, ), 0x2E: InfoItem("bgl_purchaseAddress"), 0x30: InfoItem( "bgl_titleWide", decode=utf16InfoDecode, ), # a list of "|"-separated values 0x31: InfoItem( "bgl_authorWide", decode=utf16InfoDecode, ), 0x33: InfoItem( "lastUpdated", decode=decodeBglBinTime, ), 0x3B: InfoItem("bgl_contractions"), # contains a value like "Arial Unicode MS" or "Tahoma" 0x3D: InfoItem("bgl_fontName"), # value would be dict 0x41: InfoItem( "bgl_about", decode=aboutInfoDecode, ), # the length of the substring match in a term 0x43: InfoItem( "bgl_length", decode=uintFromBytes, ), } pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/bgl_internal_test.py000066400000000000000000000010251476751035500261740ustar00rootroot00000000000000import unittest from pyglossary.plugins.babylon_bgl.reader_debug import isASCII class BglInternalTest(unittest.TestCase): def test_isASCII(self): f = isASCII self.assertEqual(f(""), True) self.assertEqual(f("abc"), True) self.assertEqual(f("xyz"), True) self.assertEqual(f("ABC"), True) self.assertEqual(f("XYZ"), True) self.assertEqual(f("1234567890"), True) self.assertEqual(f("\n\r\t"), True) self.assertEqual(f("\x80"), False) self.assertEqual(f("abc\x80"), False) self.assertEqual(f("abc\xff"), False) pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/bgl_language.py000066400000000000000000000266341476751035500251210ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2008-2020 Saeed Rasooli (ilius) # Copyright © 2011-2012 kubtek # This file is part of PyGlossary project, http://github.com/ilius/pyglossary # Thanks to Raul Fernandes and Karl Grill for reverse # engineering as part of https://sourceforge.net/projects/ktranslator/ # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . """ language properties. In this short note we describe how Babylon select encoding for key words, alternates and definitions. There are source and target encodings. The source encoding is used to encode keys and alternates, the target encoding is used to encode definitions. The source encoding is selected based on the source language of the dictionary, the target encoding is tied to the target language. Babylon Glossary Builder allows you to specify source and target languages. If you open a Builder project (a file with .gpr extension) in a text editor, you should find the following elements: Latin English Latin English Here bab:SourceLanguage is the source language that you select in the builder wizard, bab:SourceCharset - is the corresponding charset. bab:TargetLanguage - target language, bab:TargetCharset - corresponding charset. Unfortunately, builder does not tell us what encoding corresponds to charset, but we can detect it. A few words about how definitions are encoded. If all chars of the definition fall into the target charset, Babylon use that charset to encode the definition. If at least one char does not fall into the target charset, Babylon use utf-8 encoding, wrapping the definition into and tags. You can make Babylon use utf-8 encoding for the whole dictionary, in that case all definitions, keys and alternates are encoded with utf-8. See Babylon Glossary Builder wizard, Glossary Properties tab, Advanced button, Use UTF-8 encoding check box. Definitions are not augmented with extra mackup in this case, that is you'll not find charset tags in definitions. How you can tell what encoding was used for the particular definition in .bgl file? You need to check the following conditions. Block type 3, code 0x11. If 0x8000 bit is set, the whole dictionary use utf-8 encoding. If the definition starts with , that definition uses utf-8 encoding. Otherwise you need to consult the target encoding. Block type 3, code 0x1b. That field normally contains 1 byte code of the target encoding. Codes fill the range of 0x41 to 0x4e. Babylon Builder generate codes 0x42 - 0x4e. How to generate code 0x41? Occasionally you may encounter the field value is four zero bytes. In this case, I guess, the default encoding for the target language is used. Block type 3, code 0x08. That field contains 4-bytes code of the target language. The first three bytes are always zero, the last byte is the code. Playing with Babylon Glossary builder we can find language codes corresponding to target language. The language codes fill the range of 0 to 0x3d. How to detect the target encoding? Here is the technique I've used. - Create a babylon glossary source file ( a file with .gls extension) with the following contents. Start the file with utf-8 BOM for the builder to recognize the utf-8 encoding. Use unicode code point code as key, and a single unicode chars encoding in utf-8 as definition. Create keys for all code points in the range 32 - 0x10000, or you may use wider range. We do not use code points in the range 0-31, since they are control chars. You should skip the following three chars: & < >. Since the definition is supposed to contain html, these chars are be replaced by & < > respectively. You should skip the char $ as well, it has special meaning in definitions (?). Skip all code point that cannot encoded in utf-8 (not all code points in the range 32-0x10000 represent valid chars). - Now that you have a glossary source file, process it with builder selecting the desired target language. Make sure the "use utf-8" option is no set. You'll get a .bgl file. - Process the generated .bgl file with pyglossary. Skip all definitions that start with tag. Try to decode definitions using different encodings and match the result with the real value (key - code point char code). Thus you'll find the encoding having the best match. For example, you may do the following. Loop over all available encodings, loop over all definitions in the dictionary. Count the number of definitions that does not start with charset tag - total. Among them count the number of definitions that were correctly decoded - success. The encoding where total == success, is the target encoding. There are a few problems I encountered. It looks like python does not correctly implement cp932 and cp950 encodings. For Japanese charset I got 99.12% match, and for Traditional Chinese charset I got even less - 66.97%. To conform my guess that Japanese is cp932 and Traditional Chinese is cp950 I built a C++ utility that worked on the data extracted from .bgl dictionary. I used WideCharToMultiByte function for conversion. The C++ utility confirmed the cp932 and cp950 encodings, I got 100% match. """ from dataclasses import dataclass __all__ = ["BabylonLanguage", "languageByCode"] @dataclass(slots=True, frozen=True) class BabylonLanguage: """ Babylon language properties. name: bab:SourceLanguage, bab:TargetLanguage .gpr tags (English, French, Japanese) charset: bab:SourceCharset, bab:TargetCharset .gpr tags (Latin, Arabic, Cyrillic) encoding: Windows code page (cp1250, cp1251, cp1252) code: value of the type 3, code in .bgl file """ name: str encoding: str code: int name2: str = "" languages = ( BabylonLanguage( name="English", encoding="cp1252", code=0x00, ), BabylonLanguage( name="French", encoding="cp1252", code=0x01, ), BabylonLanguage( name="Italian", encoding="cp1252", code=0x02, ), BabylonLanguage( name="Spanish", encoding="cp1252", code=0x03, ), BabylonLanguage( name="Dutch", encoding="cp1252", code=0x04, ), BabylonLanguage( name="Portuguese", encoding="cp1252", code=0x05, ), BabylonLanguage( name="German", encoding="cp1252", code=0x06, ), BabylonLanguage( name="Russian", encoding="cp1251", code=0x07, ), BabylonLanguage( name="Japanese", encoding="cp932", code=0x08, ), BabylonLanguage( name="Chinese", name2="Traditional Chinese", encoding="cp950", code=0x09, ), BabylonLanguage( name="Chinese", name2="Simplified Chinese", encoding="cp936", code=0x0A, ), BabylonLanguage( name="Greek", encoding="cp1253", code=0x0B, ), BabylonLanguage( name="Korean", encoding="cp949", code=0x0C, ), BabylonLanguage( name="Turkish", encoding="cp1254", code=0x0D, ), BabylonLanguage( name="Hebrew", encoding="cp1255", code=0x0E, ), BabylonLanguage( name="Arabic", encoding="cp1256", code=0x0F, ), BabylonLanguage( name="Thai", encoding="cp874", code=0x10, ), BabylonLanguage( name="Other", encoding="cp1252", code=0x11, ), BabylonLanguage( name="Chinese", name2="Other Simplified Chinese dialects", encoding="cp936", code=0x12, ), BabylonLanguage( name="Chinese", name2="Other Traditional Chinese dialects", encoding="cp950", code=0x13, ), BabylonLanguage( name="Other Eastern-European languages", encoding="cp1250", code=0x14, ), BabylonLanguage( name="Other Western-European languages", encoding="cp1252", code=0x15, ), BabylonLanguage( name="Other Russian languages", encoding="cp1251", code=0x16, ), BabylonLanguage( name="Other Japanese languages", encoding="cp932", code=0x17, ), BabylonLanguage( name="Other Baltic languages", encoding="cp1257", code=0x18, ), BabylonLanguage( name="Other Greek languages", encoding="cp1253", code=0x19, ), BabylonLanguage( name="Other Korean dialects", encoding="cp949", code=0x1A, ), BabylonLanguage( name="Other Turkish dialects", encoding="cp1254", code=0x1B, ), BabylonLanguage( name="Other Thai dialects", encoding="cp874", code=0x1C, ), BabylonLanguage( name="Polish", encoding="cp1250", code=0x1D, ), BabylonLanguage( name="Hungarian", encoding="cp1250", code=0x1E, ), BabylonLanguage( name="Czech", encoding="cp1250", code=0x1F, ), BabylonLanguage( name="Lithuanian", encoding="cp1257", code=0x20, ), BabylonLanguage( name="Latvian", encoding="cp1257", code=0x21, ), BabylonLanguage( name="Catalan", encoding="cp1252", code=0x22, ), BabylonLanguage( name="Croatian", encoding="cp1250", code=0x23, ), BabylonLanguage( name="Serbian", encoding="cp1250", code=0x24, ), BabylonLanguage( name="Slovak", encoding="cp1250", code=0x25, ), BabylonLanguage( name="Albanian", encoding="cp1252", code=0x26, ), BabylonLanguage( name="Urdu", encoding="cp1256", code=0x27, ), BabylonLanguage( name="Slovenian", encoding="cp1250", code=0x28, ), BabylonLanguage( name="Estonian", encoding="cp1252", code=0x29, ), BabylonLanguage( name="Bulgarian", encoding="cp1250", code=0x2A, ), BabylonLanguage( name="Danish", encoding="cp1252", code=0x2B, ), BabylonLanguage( name="Finnish", encoding="cp1252", code=0x2C, ), BabylonLanguage( name="Icelandic", encoding="cp1252", code=0x2D, ), BabylonLanguage( name="Norwegian", encoding="cp1252", code=0x2E, ), BabylonLanguage( name="Romanian", encoding="cp1252", code=0x2F, ), BabylonLanguage( name="Swedish", encoding="cp1252", code=0x30, ), BabylonLanguage( name="Ukrainian", encoding="cp1251", code=0x31, ), BabylonLanguage( name="Belarusian", encoding="cp1251", code=0x32, ), BabylonLanguage( name="Persian", # aka "Farsi" encoding="cp1256", code=0x33, ), BabylonLanguage( name="Basque", encoding="cp1252", code=0x34, ), BabylonLanguage( name="Macedonian", encoding="cp1250", code=0x35, ), BabylonLanguage( name="Afrikaans", encoding="cp1252", code=0x36, ), BabylonLanguage( # Babylon Glossary Builder spells this language "Faeroese" name="Faroese", encoding="cp1252", code=0x37, ), BabylonLanguage( name="Latin", encoding="cp1252", code=0x38, ), BabylonLanguage( name="Esperanto", encoding="cp1254", code=0x39, ), BabylonLanguage( name="Tamazight", # aka "Standard Moroccan Tamazight", "Standard Moroccan Berber" # or "Standard Moroccan Amazigh" encoding="cp1252", code=0x3A, ), BabylonLanguage( name="Armenian", encoding="cp1252", code=0x3B, ), BabylonLanguage( name="Hindi", encoding="cp1252", code=0x3C, ), BabylonLanguage( name="Somali", encoding="cp1252", code=0x3D, ), ) languageByCode = {lang.code: lang for lang in languages} pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/bgl_pos.py000066400000000000000000000044541476751035500241330ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2008-2020 Saeed Rasooli (ilius) # Copyright © 2011-2012 kubtek # This file is part of PyGlossary project, http://github.com/ilius/pyglossary # Thanks to Raul Fernandes and Karl Grill for reverse # engineering as part of https://sourceforge.net/projects/ktranslator/ # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations __all__ = ["partOfSpeechByCode"] partOfSpeechByCode = { # Use None for codes we have not seen yet # Use "" for codes we've seen but part of speech is unknown 0x30: "noun", 0x31: "adjective", 0x32: "verb", 0x33: "adverb", 0x34: "interjection", 0x35: "pronoun", 0x36: "preposition", 0x37: "conjunction", 0x38: "suffix", 0x39: "prefix", 0x3A: "article", 0x3B: "", # in Babylon Italian-English.BGL, # Babylon Spanish-English.BGL, # Babylon_Chinese_S_English.BGL # no indication of the part of speech 0x3C: "abbreviation", # (short form: 'ר"ת') # (full form: "ר"ת: ראשי תיבות") # "ת'" # adjective # (full form: "ת': תואר") # "ש"ע" # noun # (full form: "ש"ע: שם עצם") 0x3D: "masculine noun and adjective", 0x3E: "feminine noun and adjective", 0x3F: "masculine and feminine noun and adjective", 0x40: "feminine noun", # (short form: "נ\'") # (full form: "נ': נקבה") 0x41: "masculine and feminine noun", # 0x41: noun that may be used as masculine and feminine # (short form: "זו"נ") # (full form: "זו"נ: זכר ונקבה") 0x42: "masculine noun", # (short form: 'ז\'') # (full form: "ז': זכר") 0x43: "numeral", 0x44: "participle", 0x45: None, 0x46: None, 0x47: None, } pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/bgl_text.py000066400000000000000000000207121476751035500243110ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2008-2021 Saeed Rasooli (ilius) # Copyright © 2011-2012 kubtek # This file is part of PyGlossary project, http://github.com/ilius/pyglossary # Thanks to Raul Fernandes and Karl Grill for reverse # engineering as part of https://sourceforge.net/projects/ktranslator/ # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import re from pyglossary import core from pyglossary.core import log from pyglossary.xml_utils import xml_escape __all__ = [ "fixImgLinks", "normalizeNewlines", "removeControlChars", "removeNewlines", "replaceAsciiCharRefs", "replaceHtmlEntries", "replaceHtmlEntriesInKeys", "stripDollarIndexes", "stripHtmlTags", "unknownHtmlEntries", ] u_pat_html_entry = re.compile("(?:&#x|&#|&)(\\w+);?", re.IGNORECASE) u_pat_html_entry_key = re.compile("(?:&#x|&#|&)(\\w+);", re.IGNORECASE) b_pat_ascii_char_ref = re.compile(b"(&#\\w+;)", re.IGNORECASE) u_pat_newline_escape = re.compile("[\\r\\n\\\\]") u_pat_strip_tags = re.compile("(?:<[/a-zA-Z].*?(?:>|$))+") u_pat_control_chars = re.compile("[\x00-\x08\x0c\x0e-\x1f]") u_pat_newline = re.compile("[\r\n]+") unknownHtmlEntries = set() def replaceHtmlEntryNoEscapeCB(u_match: re.Match) -> str: """ u_match: instance of _sre.SRE_Match Replace character entity with the corresponding character. Return the original string if conversion fails. Use this as a replace function of re.sub. """ from pyglossary.html_utils import name2codepoint u_text = u_match.group(0) u_name = u_match.group(1) if core.isDebug(): assert isinstance(u_text, str) assert isinstance(u_name, str) if u_text[:2] == "&#": # character reference try: code = int(u_name, 16) if u_text[:3].lower() == "&#x" else int(u_name) if code <= 0: raise ValueError(f"{code = }") return chr(code) except (ValueError, OverflowError): return chr(0xFFFD) # replacement character elif u_text[0] == "&": """ Babylon dictionaries contain a lot of non-standard entity, references for example, csdot, fllig, nsm, cancer, thlig, tsdot, upslur... This not just a typo. These entries repeat over and over again. Perhaps they had meaning in the source dictionary that was converted to Babylon, but now the meaning is lost. Babylon does render them as is, that is, for example, &csdot; despite other references like & are replaced with corresponding characters. """ # named entity try: return chr(name2codepoint[u_name.lower()]) except KeyError: unknownHtmlEntries.add(u_text) return u_text raise ValueError(f"{u_text[0] =}") def replaceHtmlEntryCB(u_match: re.Match) -> str: """ u_match: instance of _sre.SRE_Match Same as replaceHtmlEntryNoEscapeCB, but escapes result string. Only <, >, & characters are escaped. """ u_res = replaceHtmlEntryNoEscapeCB(u_match) if u_match.group(0) == u_res: # conversion failed return u_res # FIXME: should " and ' be escaped? return xml_escape(u_res, quotation=False) # def replaceDingbat(u_match: "re.Match") -> str: # r"""Replace chars \\u008c-\\u0095 with \\u2776-\\u277f.""" # ch = u_match.group(0) # code = ch + 0x2776 - 0x8C # return chr(code) def escapeNewlinesCallback(u_match: re.Match) -> str: """u_match: instance of _sre.SRE_Match.""" ch = u_match.group(0) if ch == "\n": return "\\n" if ch == "\r": return "\\r" if ch == "\\": return "\\\\" return ch def replaceHtmlEntries(u_text: str) -> str: # &ldash; # “ # ċ if core.isDebug(): assert isinstance(u_text, str) return u_pat_html_entry.sub( replaceHtmlEntryCB, u_text, ) def replaceHtmlEntriesInKeys(u_text: str) -> str: # &ldash; # “ # ċ if core.isDebug(): assert isinstance(u_text, str) return u_pat_html_entry_key.sub( replaceHtmlEntryNoEscapeCB, u_text, ) def escapeNewlines(u_text: str) -> str: r""" Convert text to c-escaped string: \ -> \\ new line -> \n or \r. """ if core.isDebug(): assert isinstance(u_text, str) return u_pat_newline_escape.sub( escapeNewlinesCallback, u_text, ) def stripHtmlTags(u_text: str) -> str: if core.isDebug(): assert isinstance(u_text, str) return u_pat_strip_tags.sub( " ", u_text, ) def removeControlChars(u_text: str) -> str: # \x09 - tab # \x0a - line feed # \x0b - vertical tab # \x0d - carriage return if core.isDebug(): assert isinstance(u_text, str) return u_pat_control_chars.sub( "", u_text, ) def removeNewlines(u_text: str) -> str: if core.isDebug(): assert isinstance(u_text, str) return u_pat_newline.sub( " ", u_text, ) def normalizeNewlines(u_text: str) -> str: """Convert new lines to unix style and remove consecutive new lines.""" if core.isDebug(): assert isinstance(u_text, str) return u_pat_newline.sub( "\n", u_text, ) def replaceAsciiCharRefs(b_text: bytes) -> bytes: # “ # ċ if core.isDebug(): assert isinstance(b_text, bytes) b_parts = b_pat_ascii_char_ref.split(b_text) for i_part, b_part in enumerate(b_parts): if i_part % 2 != 1: continue # reference try: code = ( int(b_part[3:-1], 16) if b_part[:3].lower() == "&#x" else int(b_part[2:-1]) ) if code <= 0: raise ValueError(f"{code = }") except (ValueError, OverflowError): code = -1 if code < 128 or code > 255: continue # no need to escape "<", ">", "&" b_parts[i_part] = bytes([code]) return b"".join(b_parts) def fixImgLinks(u_text: str) -> str: r""" Fix img tag links. src attribute value of image tag is often enclosed in \x1e - \x1f characters. For example: . Naturally the control characters are not part of the image source name. They may be used to quickly find all names of resources. This function strips all such characters. Control characters \x1e and \x1f are useless in html text, so we may safely remove all of them, irrespective of context. """ if core.isDebug(): assert isinstance(u_text, str) return u_text.replace("\x1e", "").replace("\x1f", "") def stripDollarIndexes(b_word: bytes) -> tuple[bytes, int]: if core.isDebug(): assert isinstance(b_word, bytes) i = 0 b_word_main = b"" strip_count = 0 # number of sequences found # strip $$ sequences while True: d0 = b_word.find(b"$", i) if d0 == -1: b_word_main += b_word[i:] break d1 = b_word.find(b"$", d0 + 1) if d1 == -1: # log.debug( # f"stripDollarIndexes({b_word}):\npaired $ is not found", # ) b_word_main += b_word[i:] break # You may find keys (or alternative keys) like these: # sur l'arbre$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ # obscurantiste$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ # They all end on a sequence of b'$', key length including dollars # is always 60 chars. # You may find keys like these: # extremidade-$$$-$$$-linha # .FIRM$$$$$$$$$$$$$ # etc # summary: we must remove any sequence of dollar signs longer # than 1 chars if d1 == d0 + 1: # log.debug(f"stripDollarIndexes({b_word}):\nfound $$") b_word_main += b_word[i:d0] i = d1 + 1 while i < len(b_word) and b_word[i] == ord(b"$"): i += 1 if i >= len(b_word): break continue if b_word[d0 + 1 : d1].strip(b"0123456789"): # if has at least one non-digit char # log.debug(f"stripDollarIndexes({b_word}):\nnon-digit between $$") b_word_main += b_word[i:d1] i = d1 continue # Examples: # make do$4$/make /do # potere$1$

    # See also notes... # volere$1$

    # See also notes... # Ihre$1$Ihres if d1 + 1 < len(b_word) and b_word[d1 + 1] != 0x20: log.debug( f"stripDollarIndexes({b_word!r}):\nsecond $ is followed by non-space", ) b_word_main += b_word[i:d0] i = d1 + 1 strip_count += 1 return b_word_main, strip_count pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/gzip_no_crc.patch000066400000000000000000000003311476751035500254370ustar00rootroot000000000000007a8,10 > import logging > log = logging.getLogger('root') > 524c527 < raise BadGzipFile("CRC check failed %s != %s" % (hex(crc32), --- > log.warning("CRC check failed %s != %s" % (hex(crc32), pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/reader.py000066400000000000000000001361071476751035500237510ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2008-2021 Saeed Rasooli (ilius) # Copyright © 2011-2012 kubtek # This file is part of PyGlossary project, http://github.com/ilius/pyglossary # Thanks to Raul Fernandes and Karl Grill for reverse # engineering as part of https://sourceforge.net/projects/ktranslator/ # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import io import os import re from typing import TYPE_CHECKING, NamedTuple from pyglossary.core import log from pyglossary.option import ( BoolOption, EncodingOption, HtmlColorOption, Option, StrOption, ) from pyglossary.text_utils import ( excMessage, uintFromBytes, ) from pyglossary.xml_utils import xml_escape from .bgl_gzip import GzipFile from .bgl_info import ( charsetInfoDecode, infoType3ByCode, ) from .bgl_pos import partOfSpeechByCode from .bgl_text import ( fixImgLinks, normalizeNewlines, removeControlChars, removeNewlines, replaceAsciiCharRefs, replaceHtmlEntries, replaceHtmlEntriesInKeys, stripDollarIndexes, stripHtmlTags, unknownHtmlEntries, ) if TYPE_CHECKING: from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["BGLGzipFile", "Block", "FileOffS", "Reader", "optionsProp", "tmpDir"] file = io.BufferedReader debugReadOptions = { "search_char_samples", # bool "collect_metadata2", # bool "write_gz", # bool "char_samples_path", # str, file path "msg_log_path", # str, file path "raw_dump_path", # str, file path "unpacked_gzip_path", # str, file path } optionsProp: dict[str, Option] = { "default_encoding_overwrite": EncodingOption( comment="Default encoding (overwrite)", ), "source_encoding_overwrite": EncodingOption( comment="Source encoding (overwrite)", ), "target_encoding_overwrite": EncodingOption( comment="Target encoding (overwrite)", ), "part_of_speech_color": HtmlColorOption( comment="Color for Part of Speech", ), "no_control_sequence_in_defi": BoolOption( comment="No control sequence in definitions", ), "strict_string_conversion": BoolOption( comment="Strict string conversion", ), "process_html_in_key": BoolOption( comment="Process HTML in (entry or info) key", ), "key_rstrip_chars": StrOption( multiline=True, comment="Characters to strip from right-side of keys", ), # debug read options: "search_char_samples": BoolOption( comment="(debug) Search character samples", ), "collect_metadata2": BoolOption( comment="(debug) Collect second pass metadata from definitions", ), "write_gz": BoolOption( comment="(debug) Create a file named *-data.gz", ), "char_samples_path": StrOption( # file path comment="(debug) File path for character samples", ), "msg_log_path": StrOption( # file path comment="(debug) File path for message log", ), "raw_dump_path": StrOption( # file path comment="(debug) File path for writing raw blocks", ), "unpacked_gzip_path": StrOption( # file path comment="(debug) Path to create unzipped file", ), } if os.sep == "/": # Operating system is Unix-like tmpDir = "/tmp" # noqa: S108 elif os.sep == "\\": # Operating system is ms-windows tmpDir = os.getenv("TEMP") else: raise RuntimeError( f"Unknown path separator(os.sep=={os.sep!r}). What is your operating system?", ) re_charset_decode = re.compile( b'(|
    )', re.IGNORECASE, ) re_b_reference = re.compile(b"^[0-9a-fA-F]{4}$") class EntryWordData(NamedTuple): pos: int b_word: bytes u_word: str u_word_html: str class BGLGzipFile(GzipFile): """ gzip_no_crc.py contains GzipFile class without CRC check. It prints a warning when CRC code does not match. The original method raises an exception in this case. Some dictionaries do not use CRC code, it is set to 0. """ def __init__( self, fileobj: io.IOBase | None = None, closeFileobj: bool = False, **kwargs, # noqa: ANN003 ) -> None: GzipFile.__init__(self, fileobj=fileobj, **kwargs) self.closeFileobj = closeFileobj def close(self) -> None: if self.closeFileobj: self.fileobj.close() class Block: def __init__(self) -> None: self.data = b"" self.type = "" # block offset in the gzip stream, for debugging self.offset = -1 def __str__(self) -> str: return ( f"Block type={self.type}, length={self.length}, len(data)={len(self.data)}" ) class FileOffS(file): """ A file class with an offset. This class provides an interface to a part of a file starting at specified offset and ending at the end of the file, making it appear an independent file. offset parameter of the constructor specifies the offset of the first byte of the modeled file. """ def __init__(self, filename: str, offset: int = 0) -> None: fileObj = open(filename, "rb") # noqa: SIM115 file.__init__(self, fileObj) self._fileObj = fileObj self.offset = offset file.seek(self, offset) # OR self.seek(0) def close(self) -> None: self._fileObj.close() def seek(self, pos: int, whence: int = 0) -> None: if whence == 0: # relative to start of file file.seek( self, max(0, pos) + self.offset, 0, ) elif whence == 1: # relative to current position file.seek( self, max( self.offset, self.tell() + pos, ), 0, ) elif whence == 2: # relative to end of file file.seek(self, pos, 2) else: raise ValueError(f"FileOffS.seek: bad whence={whence}") def tell(self) -> int: return file.tell(self) - self.offset class DefinitionFields: """ Fields of entry definition. Entry definition consists of a number of fields. The most important of them are: defi - the main definition, mandatory, comes first. part of speech title """ # nameByCode = { # } def __init__(self) -> None: # self.bytesByCode = {} # self.strByCode = {} self.encoding = None # encoding of the definition self.singleEncoding = True # singleEncoding=True if the definition was encoded with # a single encoding self.b_defi = None # bytes, main definition part of defi self.u_defi = None # str, main part of definition self.partOfSpeech = None # string representation of the part of speech, utf-8 self.b_title = None # bytes self.u_title = None # str self.b_title_trans = None # bytes self.u_title_trans = None # str self.b_transcription_50 = None # bytes self.u_transcription_50 = None # str self.code_transcription_50 = None self.b_transcription_60 = None # bytes self.u_transcription_60 = None # str self.code_transcription_60 = None self.b_field_1a = None # bytes self.u_field_1a = None # str self.b_field_07 = None # bytes self.b_field_06 = None # bytes self.b_field_13 = None # bytes class Reader: useByteProgress = False _default_encoding_overwrite: str = "" _source_encoding_overwrite: str = "" _target_encoding_overwrite: str = "" _part_of_speech_color: str = "007000" _no_control_sequence_in_defi: bool = False _strict_string_conversion: bool = False # process keys and alternates as HTML # Babylon does not interpret keys and alternates as HTML text, # however you may encounter many keys containing character references # and html tags. That is clearly a bug of the dictionary. # We must be very careful processing HTML tags in keys, not damage # normal keys. This option should be disabled by default, enabled # explicitly by user. Namely this option does the following: # - resolve character references # - strip HTML tags _process_html_in_key: bool = True # a string of characters that will be stripped from the end of the # key (and alternate), see str.rstrip function _key_rstrip_chars: str = "" ########################################################################## """ Dictionary properties --------------------- Dictionary (or glossary) properties are textual data like glossary name, glossary author name, glossary author e-mail, copyright message and glossary description. Most of the dictionaries have these properties set. Since they contain textual data we need to know the encoding. There may be other properties not listed here. I've enumerated only those that are available in Babylon Glossary builder. Playing with Babylon builder allows us detect how encoding is selected. If global utf-8 flag is set, utf-8 encoding is used for all properties. Otherwise the target encoding is used, that is the encoding corresponding to the target language. The chars that cannot be represented in the target encoding are replaced with question marks. Using this algorithm to decode dictionary properties you may encounter that some of them are decoded incorrectly. For example, it is clear that the property is in cp1251 encoding while the algorithm says we must use cp1252, and we get garbage after decoding. That is OK, the algorithm is correct. You may install that dictionary in Babylon and check dictionary properties. It shows the same garbage. Unfortunately, we cannot detect correct encoding in this case automatically. We may add a parameter the will overwrite the selected encoding, so the user may fix the encoding if needed. """ def __init__(self, glos: ReaderGlossaryType) -> None: # no more arguments self._glos = glos self._filename = "" self.info = {} self.numEntries = None #### self.sourceLang = "" self.targetLang = "" ## self.defaultCharset = "" self.sourceCharset = "" self.targetCharset = "" ## self.sourceEncoding = None self.targetEncoding = None #### self.bgl_numEntries = None self.wordLenMax = 0 self.defiMaxBytes = 0 ## self.metadata2 = None self.rawDumpFile = None self.msgLogFile = None self.samplesDumpFile = None ## self.stripSlashAltKeyPattern = re.compile(r"(^|\s)/(\w)", re.UNICODE) self.specialCharPattern = re.compile(r"[^\s\w.]", re.UNICODE) ### self.file = None # offset of gzip header, set in self.open() self.gzipOffset = None # must be a in RRGGBB format self.iconDataList = [] self.aboutBytes: bytes | None = None self.aboutExt = "" def __len__(self) -> int: if self.numEntries is None: log.warning("len(reader) called while numEntries=None") return 0 return self.numEntries + self.numResources # open .bgl file, read signature, find and open gzipped content # self.file - ungzipped content def open( self, filename: str, ) -> None: self._filename = filename if not self.openGzip(): raise OSError("BGL: failed to read gzip header") self.readInfo() self.setGlossaryInfo() def openGzip(self) -> None: with open(self._filename, "rb") as bglFile: if not bglFile: log.error(f"file pointer empty: {bglFile}") return False b_head = bglFile.read(6) if len(b_head) < 6 or b_head[:4] not in { b"\x12\x34\x00\x01", b"\x12\x34\x00\x02", }: log.error(f"invalid header: {b_head[:6]!r}") return False self.gzipOffset = gzipOffset = uintFromBytes(b_head[4:6]) log.debug(f"Position of gz header: {gzipOffset}") if gzipOffset < 6: log.error(f"invalid gzip header position: {gzipOffset}") return False self.file = BGLGzipFile( fileobj=FileOffS(self._filename, gzipOffset), closeFileobj=True, ) return True # TODO: PLR0912 Too many branches (14 > 12) def readInfo(self) -> None: # noqa: PLR0912 """ Read meta information about the dictionary: author, description, source and target languages, etc (articles are not read). """ self.numEntries = 0 self.numBlocks = 0 self.numResources = 0 block = Block() while not self.isEndOfDictData(): if not self.readBlock(block): break self.numBlocks += 1 if not block.data: continue if block.type == 0: self.readType0(block) elif block.type in {1, 7, 10, 11, 13}: self.numEntries += 1 elif block.type == 2: self.numResources += 1 elif block.type == 3: self.readType3(block) else: # Unknown block.type log.debug( f"Unknown Block type {block.type!r}" f", data_length = {len(block.data)}" f", number = {self.numBlocks}", ) self.file.seek(0) self.detectEncoding() log.debug(f"numEntries = {self.numEntries}") if self.bgl_numEntries and self.bgl_numEntries != self.numEntries: # There are a number of cases when these numbers do not match. # The dictionary is OK, and these is no doubt that we might missed # an entry. # self.bgl_numEntries may be less than the number of entries # we've read. log.warning( f"bgl_numEntries={self.bgl_numEntries}, numEntries={self.numEntries}", ) self.numBlocks = 0 encoding = self.targetEncoding # FIXME: confirm this is correct for key, value in self.info.items(): if isinstance(value, bytes): try: value = value.decode(encoding) # noqa: PLW2901 except Exception: log.warning(f"failed to decode info value: {key} = {value}") else: self.info[key] = value def setGlossaryInfo(self) -> None: glos = self._glos ### if self.sourceLang: glos.sourceLangName = self.sourceLang.name if self.sourceLang.name2: glos.setInfo("sourceLang2", self.sourceLang.name2) if self.targetLang: glos.targetLangName = self.targetLang.name if self.targetLang.name2: glos.setInfo("targetLang2", self.targetLang.name2) ### for attr in ( "defaultCharset", "sourceCharset", "targetCharset", "defaultEncoding", "sourceEncoding", "targetEncoding", ): value = getattr(self, attr, None) if value: glos.setInfo("bgl_" + attr, value) ### glos.setInfo("sourceCharset", "UTF-8") glos.setInfo("targetCharset", "UTF-8") ### if "lastUpdated" not in self.info and "bgl_firstUpdated" in self.info: log.debug("replacing bgl_firstUpdated with lastUpdated") self.info["lastUpdated"] = self.info.pop("bgl_firstUpdated") ### for key, value in self.info.items(): s_value = str(value).strip("\x00") if not s_value: continue # TODO: a bool flag to add empty value infos? # leave "creationTime" and "lastUpdated" as is if key == "utf8Encoding": key = "bgl_" + key # noqa: PLW2901 try: glos.setInfo(key, s_value) except Exception: log.exception(f"key = {key}") def isEndOfDictData(self) -> bool: # noqa: PLR6301 """ Test for end of dictionary data. A bgl file stores dictionary data as a gzip compressed block. In other words, a bgl file stores a gzip data file inside. A gzip file consists of a series of "members". gzip data block in bgl consists of one member (I guess). Testing for block type returned by self.readBlock is not a reliable way to detect the end of gzip member. For example, consider "Airport Code Dictionary.BGL" dictionary. To reliably test for end of gzip member block we must use a number of undocumented variables of gzip.GzipFile class. self.file._new_member - true if the current member has been completely read from the input file self.file.extrasize - size of buffered data self.file.offset - offset in the input file after reading one gzip member current position in the input file is set to the first byte after gzip data We may get this offset: self.file_bgl.tell() The last 4 bytes of gzip block contains the size of the original (uncompressed) input data modulo 2^32 """ return False def close(self) -> None: if self.file: self.file.close() self.file = None def __del__(self) -> None: self.close() while unknownHtmlEntries: entity = unknownHtmlEntries.pop() log.debug(f"BGL: unknown html entity: {entity}") # returns False if error def readBlock(self, block: Block) -> bool: block.offset = self.file.tell() length = self.readBytes(1) if length == -1: log.debug("readBlock: length = -1") return False block.type = length & 0xF length >>= 4 if length < 4: length = self.readBytes(length + 1) if length == -1: log.error("readBlock: length = -1") return False else: length -= 4 self.file.flush() if length > 0: try: block.data = self.file.read(length) except Exception: # struct.error: unpack requires a string argument of length 4 # FIXME log.exception( "failed to read block data" f": numBlocks={self.numBlocks}" f", length={length}" f", filePos={self.file.tell()}", ) block.data = b"" return False else: block.data = b"" return True def readBytes(self, num: int) -> int: """Return -1 if error.""" if num < 1 or num > 4: log.error(f"invalid argument num={num}") return -1 self.file.flush() buf = self.file.read(num) if len(buf) == 0: log.debug("readBytes: end of file: len(buf)==0") return -1 if len(buf) != num: log.error( f"readBytes: expected to read {num} bytes, but found {len(buf)} bytes", ) return -1 return uintFromBytes(buf) def readType0(self, block: Block) -> bool: code = block.data[0] if code == 2: # this number is vary close to self.bgl_numEntries, # but does not always equal to the number of entries # see self.readType3, code == 12 as well # num = uintFromBytes(block.data[1:]) pass elif code == 8: self.defaultCharset = charsetInfoDecode(block.data[1:]) if not self.defaultCharset: log.warning("defaultCharset is not valid") else: self.logUnknownBlock(block) return False return True def readType2(self, block: Block) -> EntryType | None: """ Process type 2 block. Type 2 block is an embedded file (mostly Image or HTML). pass_num - pass number, may be 1 or 2 On the first pass self.sourceEncoding is not defined and we cannot decode file names. That is why the second pass is needed. The second pass is costly, it apparently increases total processing time. We should avoid the second pass if possible. Most of the dictionaries do not have valuable resources, and those that do, use file names consisting only of ASCII characters. We may process these resources on the second pass. If all files have been processed on the first pass, the second pass is not needed. All dictionaries I've processed so far use only ASCII chars in file names. Babylon glossary builder replaces names of files, like links to images, with what looks like a hash code of the file name, for example "8FFC5C68.png". returns: DataEntry instance if the resource was successfully processed and None if failed """ # Embedded File (mostly Image or HTML) pos = 0 # name: Len = block.data[pos] pos += 1 if pos + Len > len(block.data): log.warning("reading block type 2: name too long") return None b_name = block.data[pos : pos + Len] pos += Len b_data = block.data[pos:] # if b_name in (b"C2EEF3F6.html", b"8EAF66FD.bmp"): # log.debug(f"Skipping useless file {b_name!r}") # return u_name = b_name.decode(self.sourceEncoding) return self._glos.newDataEntry( u_name, b_data, ) def readType3(self, block: Block) -> None: """ Reads block with type 3, and updates self.info returns None. """ code, b_value = uintFromBytes(block.data[:2]), block.data[2:] if not b_value: return # if not b_value.strip(b"\x00"): return # FIXME try: item = infoType3ByCode[code] except KeyError: if b_value.strip(b"\x00"): log.debug( f"Unknown info type code={code:#02x}, b_value={b_value!r}", ) return key = item.name decode = item.decode if key.endswith(".ico"): self.iconDataList.append((key, b_value)) return value = b_value if decode is None else decode(b_value) # `value` can be None, str, bytes or dict if not value: return if key == "bgl_about": self.aboutBytes = value["about"] self.aboutExt = value["about_extension"] return if isinstance(value, dict): self.info.update(value) return if item.attr: setattr(self, key, value) return self.info[key] = value def detectEncoding(self) -> None: # noqa: PLR0912 """Assign self.sourceEncoding and self.targetEncoding.""" utf8Encoding = self.info.get("utf8Encoding", False) if self._default_encoding_overwrite: self.defaultEncoding = self._default_encoding_overwrite elif self.defaultCharset: self.defaultEncoding = self.defaultCharset else: self.defaultEncoding = "cp1252" if self._source_encoding_overwrite: self.sourceEncoding = self._source_encoding_overwrite elif utf8Encoding: self.sourceEncoding = "utf-8" elif self.sourceCharset: self.sourceEncoding = self.sourceCharset elif self.sourceLang: self.sourceEncoding = self.sourceLang.encoding else: self.sourceEncoding = self.defaultEncoding if self._target_encoding_overwrite: self.targetEncoding = self._target_encoding_overwrite elif utf8Encoding: self.targetEncoding = "utf-8" elif self.targetCharset: self.targetEncoding = self.targetCharset elif self.targetLang: self.targetEncoding = self.targetLang.encoding else: self.targetEncoding = self.defaultEncoding def logUnknownBlock(self, block: Block) -> None: log.debug( f"Unknown block: type={block.type}" f", number={self.numBlocks}" f", data={block.data!r}", ) def __iter__(self) -> Iterator[EntryType]: # noqa: PLR0912 if not self.file: raise RuntimeError("iterating over a reader while it's not open") for fname, iconData in self.iconDataList: yield self._glos.newDataEntry(fname, iconData) if self.aboutBytes: yield self._glos.newDataEntry( "about" + self.aboutExt, self.aboutBytes, ) block = Block() while not self.isEndOfDictData(): if not self.readBlock(block): break if not block.data: continue if block.type == 2: yield self.readType2(block) elif block.type == 11: succeed, u_word, u_alts, u_defi = self.readEntry_Type11(block) if not succeed: continue yield self._glos.newEntry( [u_word] + u_alts, u_defi, ) elif block.type in {1, 7, 10, 11, 13}: pos = 0 # word: wordData = self.readEntryWord(block, pos) if not wordData: continue pos = wordData.pos # defi: succeed, pos, u_defi, _b_defi = self.readEntryDefi( block, pos, wordData, ) if not succeed: continue # now pos points to the first char after definition succeed, pos, u_alts = self.readEntryAlts( block, pos, wordData, ) if not succeed: continue yield self._glos.newEntry( [wordData.u_word] + u_alts, u_defi, ) def readEntryWord( self, block: Block, pos: int, ) -> EntryWordData | None: """ Read word part of entry. Return None on error """ if pos + 1 > len(block.data): log.error( f"reading block offset={block.offset:#02x}" ", reading word size: pos + 1 > len(block.data)", ) return None Len = block.data[pos] pos += 1 if pos + Len > len(block.data): log.error( f"reading block offset={block.offset:#02x}" f", block.type={block.type}" ", reading word: pos + Len > len(block.data)", ) return None b_word = block.data[pos : pos + Len] u_word, u_word_html = self.processKey(b_word) """ Entry keys may contain html text, for example: ante< meridiem arm und reich c=t>2003;
    und etc. Babylon does not process keys as html, it display them as is. Html in keys is the problem of that particular dictionary. We should not process keys as html, since Babylon do not process them as such. """ pos += Len self.wordLenMax = max(self.wordLenMax, len(u_word)) return EntryWordData( pos=pos, u_word=u_word.strip(), b_word=b_word.strip(), u_word_html=u_word_html, ) def readEntryDefi( self, block: Block, pos: int, word: EntryWordData, ) -> tuple[bool, int | None, bytes | None, bytes | None]: """ Read defi part of entry. Return value is a list. (False, None, None, None) if error (True, pos, u_defi, b_defi) if OK u_defi is a str instance (utf-8) b_defi is a bytes instance """ Err = (False, None, None, None) if pos + 2 > len(block.data): log.error( f"reading block offset={block.offset:#02x}" ", reading defi size: pos + 2 > len(block.data)", ) return Err Len = uintFromBytes(block.data[pos : pos + 2]) pos += 2 if pos + Len > len(block.data): log.error( f"reading block offset={block.offset:#02x}" f", block.type={block.type}" ", reading defi: pos + Len > len(block.data)", ) return Err b_defi = block.data[pos : pos + Len] u_defi = self.processDefi(b_defi, word.b_word) # I was going to add this u_word_html or "formatted headword" to defi, # so to lose this information, but after looking at the diff # for 8 such glossaries, I decided it's not useful enough! # if word.u_word_html: # u_defi = f"
    {word.u_word_html}
    " + u_defi self.defiMaxBytes = max(self.defiMaxBytes, len(b_defi)) pos += Len return True, pos, u_defi, b_defi def readEntryAlts( self, block: Block, pos: int, word: EntryWordData, ) -> tuple[bool, int | None, list[str] | None]: """ Returns ------- (False, None, None) if error (True, pos, u_alts) if succeed u_alts is a sorted list, items are str (utf-8). """ Err = (False, None, None) # use set instead of list to prevent duplicates u_alts = set() while pos < len(block.data): if pos + 1 > len(block.data): log.error( f"reading block offset={block.offset:#02x}" ", reading alt size: pos + 1 > len(block.data)", ) return Err Len = block.data[pos] pos += 1 if pos + Len > len(block.data): log.error( f"reading block offset={block.offset:#02x}" f", block.type={block.type}" ", reading alt: pos + Len > len(block.data)", ) return Err b_alt = block.data[pos : pos + Len] u_alt = self.processAlternativeKey(b_alt, word.b_word) # Like entry key, alt is not processed as html by babylon, # so do we. u_alts.add(u_alt) pos += Len u_alts.discard(word.u_word) return True, pos, sorted(u_alts) def readEntry_Type11( self, block: Block, ) -> tuple[bool, str | None, list[str] | None, str | None]: """Return (succeed, u_word, u_alts, u_defi).""" Err = (False, None, None, None) pos = 0 # reading headword if pos + 5 > len(block.data): log.error( f"reading block offset={block.offset:#02x}" ", reading word size: pos + 5 > len(block.data)", ) return Err wordLen = uintFromBytes(block.data[pos : pos + 5]) pos += 5 if pos + wordLen > len(block.data): log.error( f"reading block offset={block.offset:#02x}" f", block.type={block.type}" ", reading word: pos + wordLen > len(block.data)", ) return Err b_word = block.data[pos : pos + wordLen] u_word, _u_word_html = self.processKey(b_word) pos += wordLen self.wordLenMax = max(self.wordLenMax, len(u_word)) # reading alts and defi if pos + 4 > len(block.data): log.error( f"reading block offset={block.offset:#02x}" ", reading defi size: pos + 4 > len(block.data)", ) return Err altsCount = uintFromBytes(block.data[pos : pos + 4]) pos += 4 # reading alts # use set instead of list to prevent duplicates u_alts = set() for _ in range(altsCount): if pos + 4 > len(block.data): log.error( f"reading block offset={block.offset:#02x}" ", reading alt size: pos + 4 > len(block.data)", ) return Err altLen = uintFromBytes(block.data[pos : pos + 4]) pos += 4 if altLen == 0: if pos + altLen != len(block.data): # no evidence log.warning( f"reading block offset={block.offset:#02x}" ", reading alt size: pos + altLen != len(block.data)", ) break if pos + altLen > len(block.data): log.error( f"reading block offset={block.offset:#02x}" f", block.type={block.type}" ", reading alt: pos + altLen > len(block.data)", ) return Err b_alt = block.data[pos : pos + altLen] u_alt = self.processAlternativeKey(b_alt, b_word) # Like entry key, alt is not processed as html by babylon, # so do we. u_alts.add(u_alt) pos += altLen u_alts.discard(u_word) # reading defi defiLen = uintFromBytes(block.data[pos : pos + 4]) pos += 4 if pos + defiLen > len(block.data): log.error( f"reading block offset={block.offset:#02x}" f", block.type={block.type}" ", reading defi: pos + defiLen > len(block.data)", ) return Err b_defi = block.data[pos : pos + defiLen] u_defi = self.processDefi(b_defi, b_word) self.defiMaxBytes = max(self.defiMaxBytes, len(b_defi)) pos += defiLen return True, u_word, sorted(u_alts), u_defi def charReferencesStat(self, b_text: bytes, encoding: str) -> None: pass @staticmethod def decodeCharsetTagsBabylonReference(b_text: bytes, b_text2: bytes) -> str: b_refs = b_text2.split(b";") add_text = "" for i_ref, b_ref in enumerate(b_refs): if not b_ref: if i_ref != len(b_refs) - 1: log.debug( f"decoding charset tags, b_text={b_text!r}" "\nblank character" f" reference ({b_text2!r})\n", ) continue if not re_b_reference.match(b_ref): log.debug( f"decoding charset tags, b_text={b_text!r}" "\ninvalid character" f" reference ({b_text2!r})\n", ) continue add_text += chr(int(b_ref, 16)) return add_text def decodeCharsetTagsTextBlock( self, encoding: str, b_text: bytes, b_part: bytes, ) -> str: b_text2 = b_part if encoding == "babylon-reference": return self.decodeCharsetTagsBabylonReference(b_text, b_text2) self.charReferencesStat(b_text2, encoding) if encoding == "cp1252": b_text2 = replaceAsciiCharRefs(b_text2) if self._strict_string_conversion: try: u_text2 = b_text2.decode(encoding) except UnicodeError: log.debug( f"decoding charset tags, b_text={b_text!r}" f"\nfragment: {b_text2!r}" "\nconversion error:\n" + excMessage(), ) u_text2 = b_text2.decode(encoding, "replace") else: u_text2 = b_text2.decode(encoding, "replace") return u_text2 def decodeCharsetTags( # noqa: PLR0912 self, b_text: bytes, defaultEncoding: str, ) -> tuple[str, str]: """ b_text is a bytes Decode html text taking into account charset tags and default encoding. Return value: (u_text, defaultEncodingOnly) u_text is str defaultEncodingOnly parameter is false if the text contains parts encoded with non-default encoding (babylon character references '00E6;' do not count). """ b_parts = re_charset_decode.split(b_text) u_text = "" encodings: list[str] = [] # stack of encodings defaultEncodingOnly = True for i, b_part in enumerate(b_parts): if i % 3 == 0: # text block encoding = encodings[-1] if encodings else defaultEncoding u_text += self.decodeCharsetTagsTextBlock(encoding, b_text, b_part) if encoding != defaultEncoding: defaultEncodingOnly = False continue if i % 3 == 1: # or if b_part.startswith(b" if encodings: encodings.pop() else: log.debug( f"decoding charset tags, b_text={b_text!r}" "\nunbalanced tag\n", ) continue # b_type = b_parts[i + 1].lower() # b_type is a bytes instance, with length 1 if b_type == b"t": encodings.append("babylon-reference") elif b_type == b"u": encodings.append("utf-8") elif b_type == b"k": # noqa: SIM114 encodings.append(self.sourceEncoding) elif b_type == b"e": encodings.append(self.sourceEncoding) elif b_type == b"g": # gbk or gb18030 encoding # (not enough data to make distinction) encodings.append("gbk") else: log.debug( f"decoding charset tags, text = {b_text!r}" f"\nunknown charset code = {ord(b_type):#02x}\n", ) # add any encoding to prevent # "unbalanced tag" error encodings.append(defaultEncoding) continue # c attribute of charset tag if the previous tag was charset if encodings: log.debug( f"decoding charset tags, text={b_text}\nunclosed tag\n", ) return u_text, defaultEncodingOnly def processKey(self, b_word: bytes) -> tuple[str, str]: """ b_word is a bytes instance returns (u_word: str, u_word_html: str) u_word_html is empty unless it's different from u_word. """ b_word, strip_count = stripDollarIndexes(b_word) if strip_count > 1: log.debug( f"processKey({b_word}):\nnumber of dollar indexes = {strip_count}", ) # convert to unicode if self._strict_string_conversion: try: u_word = b_word.decode(self.sourceEncoding) except UnicodeError: log.debug( f"processKey({b_word}):\nconversion error:\n" + excMessage(), ) u_word = b_word.decode( self.sourceEncoding, "ignore", ) else: u_word = b_word.decode(self.sourceEncoding, "ignore") u_word_html = "" if self._process_html_in_key: u_word = replaceHtmlEntriesInKeys(u_word) # u_word = u_word.replace("
    ", "").replace("
    ", "")\ # .replace("
    ", "").replace("
    ", "") u_word_copy = u_word u_word = stripHtmlTags(u_word) if u_word != u_word_copy: u_word_html = u_word_copy # if(re.match(".*[&<>].*", _u_word_copy)): # log.debug("original text: " + _u_word_copy + "\n" \ # + "new text: " + u_word + "\n") u_word = removeControlChars(u_word) u_word = removeNewlines(u_word) u_word = u_word.lstrip() if self._key_rstrip_chars: u_word = u_word.rstrip(self._key_rstrip_chars) return u_word, u_word_html def processAlternativeKey(self, b_word: bytes, b_key: bytes) -> str: """ b_word is a bytes instance returns u_word_main, as str instance (utf-8 encoding). """ b_word_main, _strip_count = stripDollarIndexes(b_word) # convert to unicode if self._strict_string_conversion: try: u_word_main = b_word_main.decode(self.sourceEncoding) except UnicodeError: log.debug( f"processAlternativeKey({b_word})\nkey = {b_key}" ":\nconversion error:\n" + excMessage(), ) u_word_main = b_word_main.decode(self.sourceEncoding, "ignore") else: u_word_main = b_word_main.decode(self.sourceEncoding, "ignore") # strip "/" before words u_word_main = self.stripSlashAltKeyPattern.sub( r"\1\2", u_word_main, ) if self._process_html_in_key: # u_word_main_orig = u_word_main u_word_main = stripHtmlTags(u_word_main) u_word_main = replaceHtmlEntriesInKeys(u_word_main) # if(re.match(".*[&<>].*", u_word_main_orig)): # log.debug("original text: " + u_word_main_orig + "\n" \ # + "new text: " + u_word_main + "\n") u_word_main = removeControlChars(u_word_main) u_word_main = removeNewlines(u_word_main) u_word_main = u_word_main.lstrip() return u_word_main.rstrip(self._key_rstrip_chars) # TODO: break it down # PLR0912 Too many branches (20 > 12) # PLR0915 Too many statements (60 > 50) def processDefi(self, b_defi: bytes, b_key: bytes) -> str: # noqa: PLR0912, PLR0915 """ b_defi: bytes b_key: bytes. return: u_defi_format """ fields = DefinitionFields() self.collectDefiFields(b_defi, b_key, fields) fields.u_defi, fields.singleEncoding = self.decodeCharsetTags( fields.b_defi, self.targetEncoding, ) if fields.singleEncoding: fields.encoding = self.targetEncoding fields.u_defi = fixImgLinks(fields.u_defi) fields.u_defi = replaceHtmlEntries(fields.u_defi) fields.u_defi = removeControlChars(fields.u_defi) fields.u_defi = normalizeNewlines(fields.u_defi) fields.u_defi = fields.u_defi.strip() if fields.b_title: fields.u_title, _singleEncoding = self.decodeCharsetTags( fields.b_title, self.sourceEncoding, ) fields.u_title = replaceHtmlEntries(fields.u_title) fields.u_title = removeControlChars(fields.u_title) if fields.b_title_trans: # sourceEncoding or targetEncoding ? fields.u_title_trans, _singleEncoding = self.decodeCharsetTags( fields.b_title_trans, self.sourceEncoding, ) fields.u_title_trans = replaceHtmlEntries(fields.u_title_trans) fields.u_title_trans = removeControlChars(fields.u_title_trans) if fields.b_transcription_50: if fields.code_transcription_50 == 0x10: # contains values like this (char codes): # 00 18 00 19 00 1A 00 1B 00 1C 00 1D 00 1E 00 40 00 07 # this is not utf-16 # what is this? pass elif fields.code_transcription_50 == 0x1B: fields.u_transcription_50, _singleEncoding = self.decodeCharsetTags( fields.b_transcription_50, self.sourceEncoding, ) fields.u_transcription_50 = replaceHtmlEntries( fields.u_transcription_50, ) fields.u_transcription_50 = removeControlChars( fields.u_transcription_50, ) elif fields.code_transcription_50 == 0x18: # incomplete text like: # t c=T>02D0;g0259;- # This defi normally contains fields.b_transcription_60 # in this case. pass else: log.debug( f"processDefi({b_defi})\nb_key = {b_key}" ":\ndefi field 50" f", unknown code: {fields.code_transcription_50:#02x}", ) if fields.b_transcription_60: if fields.code_transcription_60 == 0x1B: fields.u_transcription_60, _singleEncoding = self.decodeCharsetTags( fields.b_transcription_60, self.sourceEncoding, ) fields.u_transcription_60 = replaceHtmlEntries( fields.u_transcription_60, ) fields.u_transcription_60 = removeControlChars( fields.u_transcription_60, ) else: log.debug( f"processDefi({b_defi})\nb_key = {b_key}" ":\ndefi field 60" f", unknown code: {fields.code_transcription_60:#02x}", ) if fields.b_field_1a: fields.u_field_1a, _singleEncoding = self.decodeCharsetTags( fields.b_field_1a, self.sourceEncoding, ) log.info(f"------- u_field_1a = {fields.u_field_1a}") self.processDefiStat(fields, b_defi, b_key) u_defi_format = "" if fields.partOfSpeech or fields.u_title: if fields.partOfSpeech: pos = xml_escape(fields.partOfSpeech) posColor = self._part_of_speech_color u_defi_format += f'{pos}' if fields.u_title: if u_defi_format: u_defi_format += " " u_defi_format += fields.u_title u_defi_format += "
    \n" if fields.u_title_trans: u_defi_format += fields.u_title_trans + "
    \n" if fields.u_transcription_50: u_defi_format += f"[{fields.u_transcription_50}]
    \n" if fields.u_transcription_60: u_defi_format += f"[{fields.u_transcription_60}]
    \n" if fields.u_defi: u_defi_format += fields.u_defi return u_defi_format.removesuffix("
    ").removesuffix("
    ") def processDefiStat( self, fields: DefinitionFields, b_defi: bytes, b_key: bytes, ) -> None: pass def findDefiFieldsStart(self, b_defi: bytes) -> int: r""" Find the beginning of the definition trailing fields. Return value is the index of the first chars of the field set, or -1 if the field set is not found. Normally "\x14" should signal the beginning of the definition fields, but some articles may contain this characters inside, so we get false match. As a workaround we may check the following chars. If "\x14" is followed by space, we assume this is part of the article and continue search. Unfortunately this does no help in many cases... """ if self._no_control_sequence_in_defi: return -1 index = -1 while True: index = b_defi.find( 0x14, index + 1, # starting from next character -1, # not the last character ) if index == -1: break if b_defi[index + 1] != 0x20: # b" "[0] == 0x20 break return index # TODO: break it down # PLR0912 Too many branches (41 > 12) # PLR0915 Too many statements (121 > 50) def collectDefiFields( # noqa: PLR0912, PLR0915 self, b_defi: bytes, b_key: bytes, fields: DefinitionFields, ) -> None: r""" Entry definition structure:
    ['\x14'[{field_code}{field_data}]*] {field_code} is one character {field_data} has arbitrary length. """ # d0 is index of the '\x14 char in b_defi # d0 may be the last char of the string d0 = self.findDefiFieldsStart(b_defi) if d0 == -1: fields.b_defi = b_defi return fields.b_defi = b_defi[:d0] i = d0 + 1 while i < len(b_defi): if self.metadata2: self.metadata2.defiTrailingFields[b_defi[i]] += 1 if b_defi[i] == 0x02: # part of speech # "\x02" if fields.partOfSpeech: log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}" ":\nduplicate part of speech item", ) if i + 1 >= len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\nb_defi ends after \\x02", ) return posCode = b_defi[i + 1] try: fields.partOfSpeech = partOfSpeechByCode[posCode] except KeyError: log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}" f":\nunknown part of speech code = {posCode:#02x}", ) return i += 2 elif b_defi[i] == 0x06: # \x06 if fields.b_field_06: log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\nduplicate type 6", ) if i + 1 >= len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\nb_defi ends after \\x06", ) return fields.b_field_06 = b_defi[i + 1] i += 2 elif b_defi[i] == 0x07: # \x07 # Found in 4 Hebrew dictionaries. I do not understand. if i + 3 > len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\ntoo few data after \\x07", ) return fields.b_field_07 = b_defi[i + 1 : i + 3] i += 3 elif b_defi[i] == 0x13: # "\x13" # known values: # 03 06 0D C7 # 04 00 00 00 44 # ... # 04 00 00 00 5F if i + 1 >= len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\ntoo few data after \\x13", ) return Len = b_defi[i + 1] i += 2 if Len == 0: log.debug( f"collecting definition fields, b_defi = {b_defi!r}\n" f"b_key = {b_key!r}:\nblank data after \\x13", ) continue if i + Len > len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}\n" f"b_key = {b_key!r}:\ntoo few data after \\x13", ) return fields.b_field_13 = b_defi[i : i + Len] i += Len elif b_defi[i] == 0x18: # \x18 if fields.b_title: log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"b_key = {b_key!r}:\nduplicate entry title item", ) if i + 1 >= len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}\n" f"b_key = {b_key!r}:\nb_defi ends after \\x18", ) return i += 1 Len = b_defi[i] i += 1 if Len == 0: # log.debug( # f"collecting definition fields, b_defi = {b_defi!r}\n" # f"b_key = {b_key!r}:\nblank entry title" # ) continue if i + Len > len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}\n" f"b_key = {b_key!r}:\ntitle is too long", ) return fields.b_title = b_defi[i : i + Len] i += Len elif b_defi[i] == 0x1A: # "\x1a" # found only in Hebrew dictionaries, I do not understand. if i + 1 >= len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key}:\ntoo few data after \\x1a", ) return Len = b_defi[i + 1] i += 2 if Len == 0: log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\nblank data after \\x1a", ) continue if i + Len > len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\ntoo few data after \\x1a", ) return fields.b_field_1a = b_defi[i : i + Len] i += Len elif b_defi[i] == 0x28: # "\x28" # title with transcription? if i + 2 >= len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\ntoo few data after \\x28", ) return i += 1 Len = uintFromBytes(b_defi[i : i + 2]) i += 2 if Len == 0: log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\nblank data after \\x28", ) continue if i + Len > len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\ntoo few data after \\x28", ) return fields.b_title_trans = b_defi[i : i + Len] i += Len elif 0x40 <= b_defi[i] <= 0x4F: # [\x41-\x4f] # often contains digits as text: # 56 # ælps - key Alps # 48@i # has no apparent influence on the article code = b_defi[i] Len = b_defi[i] - 0x3F if i + 2 + Len > len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\ntoo few data after \\x40+", ) return i += 2 b_text = b_defi[i : i + Len] i += Len log.debug( f"unknown definition field {code:#02x}, b_text={b_text!r}", ) elif b_defi[i] == 0x50: # \x50 if i + 2 >= len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\ntoo few data after \\x50", ) return fields.code_transcription_50 = b_defi[i + 1] Len = b_defi[i + 2] i += 3 if Len == 0: log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\nblank data after \\x50", ) continue if i + Len > len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\ntoo few data after \\x50", ) return fields.b_transcription_50 = b_defi[i : i + Len] i += Len elif b_defi[i] == 0x60: # "\x60" if i + 4 > len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\ntoo few data after \\x60", ) return fields.code_transcription_60 = b_defi[i + 1] i += 2 Len = uintFromBytes(b_defi[i : i + 2]) i += 2 if Len == 0: log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\nblank data after \\x60", ) continue if i + Len > len(b_defi): log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}:\ntoo few data after \\x60", ) return fields.b_transcription_60 = b_defi[i : i + Len] i += Len else: log.debug( f"collecting definition fields, b_defi = {b_defi!r}" f"\nb_key = {b_key!r}" f":\nunknown control char. Char code = {b_defi[i]:#02x}", ) return pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/reader_debug.py000066400000000000000000000342621476751035500251160ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2008-2021 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import gzip import os import re from dataclasses import dataclass from os.path import join from pyglossary.core import log from pyglossary.text_utils import ( toStr, uintFromBytes, ) from .reader import BGLGzipFile, Block, FileOffS, Reader, tmpDir def isASCII(data: str) -> bool: for c in data: # noqa: SIM110 if ord(c) >= 128: return False return True class MetaData: def __init__(self) -> None: self.blocks = [] self.numEntries = None self.numBlocks = None self.numFiles = None self.gzipStartOffset = None self.gzipEndOffset = None self.fileSize = None self.bglHeader = None # data before gzip header @dataclass class MetaDataBlock: data: bytes type: str @dataclass class MetaDataRange: type: str count: int class MetaData2: """ Second pass metadata. We need to scan all definitions in order to collect these statistical data. """ def __init__(self) -> None: # defiTrailingFields[i] - number of fields with code i found self.defiTrailingFields = [0] * 256 self.isDefiASCII = True # isDefiASCII = true if all definitions contain only ASCII chars """ We apply a number of tests to each definition, excluding those with overwritten encoding (they start with ). defiProcessedCount - total number of definitions processed defiUtf8Count - number of definitions in utf8 encoding defiAsciiCount - number of definitions containing only ASCII chars """ self.defiProcessedCount = 0 self.defiUtf8Count = 0 self.defiAsciiCount = 0 self.charRefs = {} # encoding -> [ 0 ] * 257 class GzipWithCheck: """ gzip.GzipFile with check. It checks that unpacked data match what was packed. """ def __init__( self, fileobj, unpackedPath, reader, closeFileobj=False, ) -> None: """ Constructor. fileobj - gzip file - archive unpackedPath - path of a file containing original data, for testing. reader - reference to BGL Reader class instance, used for logging. """ self.file = BGLGzipFile( fileobj=fileobj, closeFileobj=closeFileobj, ) self.unpackedFile = open(unpackedPath, "rb") # noqa: SIM115 self.reader = reader def __del__(self) -> None: self.close() def close(self) -> None: if self.file: self.file.close() self.file = None if self.unpackedFile: self.unpackedFile.close() self.unpackedFile = None def read(self, size=-1): buf1 = self.file.read(size) buf2 = self.unpackedFile.read(size) if buf1 != buf2: self.reader.msgLogFileWrite( f"GzipWithCheck.read: !=: size = {buf1}, ({buf2}) ({size})", ) # else: # self.reader.msgLogFileWrite( # f"GzipWithCheck.read: ==: size = {buf1}, ({buf2}) ({size})", # ) return buf1 def seek(self, offset, whence=os.SEEK_SET): self.file.seek(offset, whence) self.unpackedFile.seek(offset, whence) # self.reader.msgLogFileWrite( # f"GzipWithCheck.seek: offset = {offset}, whence = {whence}", # ) def tell(self): pos1 = self.file.tell() pos2 = self.unpackedFile.tell() if pos1 != pos2: self.reader.msgLogFileWrite( f"GzipWithCheck.tell: !=: {pos1} {pos2}", ) # else: # self.reader.msgLogFileWrite( # f"GzipWithCheck.tell: ==: {pos1} {pos2}", # ) return pos1 def flush(self): if os.sep == "\\": pass # a bug in Windows # after file.flush, file.read returns garbage else: self.file.flush() self.unpackedFile.flush() class DebugReader(Reader): _collect_metadata2: bool = False _search_char_samples: bool = False _write_gz: bool = False _raw_dump_path: str = "" _unpacked_gzip_path: str = "" _char_samples_path: str = "" _msg_log_path: str = "" def open( self, filename, ): if not Reader.open(self, filename): return self.metadata2 = MetaData2() if self._collect_metadata2 else None if self._search_char_samples: self.targetCharsArray = [False] * 256 else: self.targetCharsArray = None if self._raw_dump_path: self.rawDumpFile = open(self._raw_dump_path, "w", encoding="utf-8") if self._char_samples_path: self.samplesDumpFile = open(self._char_samples_path, "w", encoding="utf-8") if self._msg_log_path: self.msgLogFile = open(self._msg_log_path, "w", encoding="utf-8") self.charRefStatPattern = re.compile(b"(&#\\w+;)", re.IGNORECASE) def openGzip(self): with open(self._filename, "rb") as bglFile: if not bglFile: log.error(f"file pointer empty: {bglFile}") return False buf = bglFile.read(6) if len(buf) < 6 or buf[:4] not in { b"\x12\x34\x00\x01", b"\x12\x34\x00\x02", }: log.error(f"invalid header: {buf[:6]!r}") return False self.gzipOffset = gzipOffset = uintFromBytes(buf[4:6]) log.debug(f"Position of gz header: {gzipOffset}") if gzipOffset < 6: log.error(f"invalid gzip header position: {gzipOffset}") return False if self._write_gz: self.dataFile = self._filename + "-data.gz" try: f2 = open(self.dataFile, "wb") except OSError: log.exception("error while opening gzip data file") self.dataFile = join( tmpDir, os.path.split(self.m_filename)[-1] + "-data.gz", ) f2 = open(self.dataFile, "wb") bglFile.seek(gzipOffset) f2.write(bglFile.read()) f2.close() self.file = gzip.open(self.dataFile, "rb") return None f2 = FileOffS(self._filename, gzipOffset) if self._unpacked_gzip_path: self.file = GzipWithCheck( f2, self._unpacked_gzip_path, self, closeFileobj=True, ) return None self.file = BGLGzipFile( fileobj=f2, closeFileobj=True, ) return None def close(self) -> None: Reader.close(self) if self.rawDumpFile: self.rawDumpFile.close() self.rawDumpFile = None if self.msgLogFile: self.msgLogFile.close() self.msgLogFile = None if self.samplesDumpFile: self.samplesDumpFile.close() self.samplesDumpFile = None def __del__(self) -> None: Reader.__del__(self) def readEntryWord(self, block, pos): succeed, pos, _u_word, b_word = Reader.readEntryWord(self, block, pos) if not succeed: return self.rawDumpFileWriteText(f"\n\nblock type = {block.type}\nkey = ") self.rawDumpFileWriteData(b_word) def readEntryDefi(self, block, pos, b_key): succeed, pos, _u_defi, b_defi = Reader.readEntryDefi(self, block, pos, b_key) if not succeed: return self.rawDumpFileWriteText("\ndefi = ") self.rawDumpFileWriteData(b_defi) """ def readEntryAlts(self, block, pos, b_key, key): succeed, pos, alts, b_alts = \ Reader.readEntryAlts(self, block, pos, b_key, key) if not succeed: return for b_alt in b_alts: self.rawDumpFileWriteText("\nalt = ") self.rawDumpFileWriteData(b_alt) """ def charReferencesStat(self, b_text, encoding): """b_text is bytes instance.""" # “ # ċ if not self.metadata2: return if encoding not in self.metadata2.charRefs: self.metadata2.charRefs[encoding] = [0] * 257 charRefs = self.metadata2.charRefs[encoding] for index, b_part in enumerate(self.charRefStatPattern.split(b_text)): if index % 2 != 1: continue try: code = ( int(b_part[3:-1], 16) if b_part[:3].lower() == "&#x" else int(b_part[2:-1]) ) except (ValueError, OverflowError): continue if code <= 0: continue code = min(code, 256) charRefs[code] += 1 # write text to dump file as is def rawDumpFileWriteText(self, text): # FIXME text = toStr(text) if self.rawDumpFile: self.rawDumpFile.write(text) # write data to dump file unambiguously representing control chars # escape "\" with "\\" # print control chars as "\xhh" def rawDumpFileWriteData(self, text): text = toStr(text) # the next function escapes too many chars, for example, it escapes äöü # self.rawDumpFile.write(text.encode("unicode_escape")) if self.rawDumpFile: self.rawDumpFile.write(text) def msgLogFileWrite(self, text): text = toStr(text) if self.msgLogFile: offset = self.msgLogFile.tell() # print offset in the log file to facilitate navigating this # log in hex editor # intended usage: # the log file is opened in a text editor and hex editor # use text editor to read error messages, use hex editor to # inspect char codes offsets allows to quickly jump to the right # place of the file hex editor self.msgLogFile.write(f"\noffset = {offset:#02x}\n") self.msgLogFile.write(text + "\n") else: log.debug(text) def samplesDumpFileWrite(self, text): text = toStr(text) if self.samplesDumpFile: offset = self.samplesDumpFile.tell() self.samplesDumpFile.write(f"\noffset = {offset:#02x}\n") self.samplesDumpFile.write(text + "\n") else: log.debug(text) def dumpBlocks(self, dumpPath): import pickle self.file.seek(0) metaData = MetaData() metaData.numFiles = 0 metaData.gzipStartOffset = self.gzipOffset self.numEntries = 0 self.numBlocks = 0 range_type = None range_count = 0 block = Block() while not self.isEndOfDictData(): log.debug( f"readBlock: offset {self.file.tell():#02x}, " f"unpacked offset {self.file.unpackedFile.tell():#02x}", ) if not self.readBlock(block): break self.numBlocks += 1 if block.type in {1, 7, 10, 11, 13}: self.numEntries += 1 elif block.type == 2: # Embedded File (mostly Image or HTML) metaData.numFiles += 1 if block.type in {1, 2, 7, 10, 11, 13}: if range_type == block.type: range_count += 1 else: if range_count > 0: mblock = MetaDataRange(range_type, range_count) metaData.blocks.append(mblock) range_count = 0 range_type = block.type range_count = 1 else: if range_count > 0: mblock = MetaDataRange(range_type, range_count) metaData.blocks.append(mblock) range_count = 0 mblock = MetaDataBlock(block.data, block.type) metaData.blocks.append(mblock) if range_count > 0: mblock = MetaDataRange(range_type, range_count) metaData.blocks.append(mblock) range_count = 0 metaData.numEntries = self.numEntries metaData.numBlocks = self.numBlocks metaData.gzipEndOffset = self.file_bgl.tell() metaData.fileSize = os.path.getsize(self._filename) with open(self._filename, "rb") as f: metaData.bglHeader = f.read(self.gzipOffset) with open(dumpPath, "wb") as f: pickle.dump(metaData, f) self.file.seek(0) def dumpMetadata2(self, dumpPath): import pickle if not self.metadata2: return with open(dumpPath, "wb") as f: pickle.dump(self.metadata2, f) def processDefiStat(self, fields, defi, b_key): # noqa: PLR0912 Reader.processDefiStat(self, fields, defi, b_key) if fields.b_title: self.rawDumpFileWriteText("\ndefi title: ") self.rawDumpFileWriteData(fields.b_title) if fields.b_title_trans: self.rawDumpFileWriteText("\ndefi title trans: ") self.rawDumpFileWriteData(fields.b_title_trans) if fields.b_transcription_50: self.rawDumpFileWriteText( f"\ndefi transcription_50 ({fields.code_transcription_50:#x}): ", ) self.rawDumpFileWriteData(fields.b_transcription_50) if fields.b_transcription_60: self.rawDumpFileWriteText( f"\ndefi transcription_60 ({fields.code_transcription_60:#x}): ", ) self.rawDumpFileWriteData(fields.b_transcription_60) if fields.b_field_1a: self.rawDumpFileWriteText("\ndefi field_1a: ") self.rawDumpFileWriteData(fields.b_field_1a) if fields.b_field_13: self.rawDumpFileWriteText( f"\ndefi field_13 bytes: {fields.b_field_13!r}", ) if fields.b_field_07: self.rawDumpFileWriteText("\ndefi field_07: ") self.rawDumpFileWriteData(fields.b_field_07) if fields.b_field_06: self.rawDumpFileWriteText( f"\ndefi field_06: {fields.b_field_06}", ) if fields.singleEncoding: self.findAndPrintCharSamples( fields.b_defi, f"defi, key = {b_key}", fields.encoding, ) if self.metadata2: self.metadata2.defiProcessedCount += 1 if isASCII(toStr(fields.b_defi)): self.metadata2.defiAsciiCount += 1 try: fields.b_defi.decode("utf-8") except UnicodeError: pass else: self.metadata2.defiUtf8Count += 1 if self.metadata2 and self.metadata2.isDefiASCII and not isASCII(fields.u_defi): self.metadata2.isDefiASCII = False # search for new chars in data # if new chars are found, mark them with a special sequence in the text # and print result into msg log def findAndPrintCharSamples(self, b_data: bytes, hint, encoding): if not self.targetCharsArray: return offsets = self.findCharSamples(b_data) if len(offsets) == 0: return res = "" utf8 = encoding.lower() == "utf-8" i = 0 for o in offsets: j = o if utf8: while b_data[j] & 0xC0 == 0x80: j -= 1 res += b_data[i:j] res += "!!!--+!!!" i = j res += b_data[j:] offsets_str = " ".join([str(el) for el in offsets]) self.samplesDumpFileWrite( f"charSample({hint})\noffsets = {offsets_str}" f"\nmarked = {res}\norig = {b_data}\n", ) def findCharSamples(self, b_data: bytes) -> list[int]: """ Find samples of chars in b_data. Search for chars in data that have not been marked so far in the targetCharsArray array, mark new chars. Returns a list of offsets in b_data May return an empty list. """ res: list[int] = [] if not isinstance(b_data, bytes): log.error("findCharSamples: b_data is not a bytes instance") return res if not self.targetCharsArray: log.error( f"findCharSamples: self.targetCharsArray={self.targetCharsArray}", ) return res for i, char in enumerate(b_data): if char < 128: continue if not self.targetCharsArray[char]: self.targetCharsArray[char] = True res.append(i) return res pyglossary-5.0.9/pyglossary/plugins/babylon_bgl/tools.toml000066400000000000000000000012101476751035500241540ustar00rootroot00000000000000["Babylon Translator"] web = "https://www.babylon-software.com/" wiki = "https://en.wikipedia.org/wiki/Babylon_Software" platforms = [ "Windows",] license = "Freemium" [GoldenDict] web = "http://goldendict.org/" platforms = [ "Linux", "Windows",] license = "GPL" ["GoldenDict Mobile (Free)"] web = "http://goldendict.mobi/" web2 = "https://play.google.com/store/apps/details?id=mobi.goldendict.android.free" platforms = [ "Android",] license = "Freeware" ["GoldenDict Mobile (Full)"] web = "http://goldendict.mobi/" web2 = "https://play.google.com/store/apps/details?id=mobi.goldendict.android" platforms = [ "Android",] license = "Proprietary" pyglossary-5.0.9/pyglossary/plugins/cc_kedict/000077500000000000000000000000001476751035500215635ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/cc_kedict/__init__.py000066400000000000000000000012051476751035500236720ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from pyglossary.option import Option from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "cc_kedict" name = "cc-kedict" description = "cc-kedict" extensions = () extensionCreate = "" singleFile = True kind = "text" wiki = "" website = ( "https://github.com/mhagiwara/cc-kedict", "@mhagiwara/cc-kedict", ) optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/cc_kedict/reader.py000066400000000000000000000151241476751035500234020ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors from __future__ import annotations from io import BytesIO from os.path import isdir, join from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from collections.abc import Callable, Iterator import lxml from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.core import exc_note, log, pip from pyglossary.text_reader import TextGlossaryReader __all__ = ["Reader"] class YamlReader(TextGlossaryReader): useByteProgress = True tagStyle = ( "color:white;" "background:green;" "padding-left:3px;" "padding-right:3px;" "border-radius:0.5ex;" # 0.5ex ~= 0.3em, but "ex" is recommended ) def __init__( # noqa: PLR0913 self, glos: ReaderGlossaryType, spellKey: str = "", posKey: str = "", synsKey: str = "", tagsKey: str = "", ) -> None: TextGlossaryReader.__init__(self, glos) self._spellKey = spellKey self._posKey = posKey self._synsKey = synsKey self._tagsKey = tagsKey self._posMapping = { "n": "noun", "v": "verb", "a": "adjective", "pron": "pronoun", "propn": "proper noun", "intj": "interjection", "det": "determiner", "part": "particle", "adv": "adverb", "num": "number", "abbrev": "abbreviation", "suf": "suffix", "pref": "prefix", } @classmethod def isInfoWord(cls, _word: str) -> bool: return False @classmethod def fixInfoWord(cls, _word: str) -> str: return "" @staticmethod def _makeList( hf: lxml.etree.htmlfile, input_objects: list[Any], processor: Callable, single_prefix: str | None = None, skip_single: bool = True, ) -> None: """Wrap elements into
      if more than one element.""" if not input_objects: return if skip_single and len(input_objects) == 1: # if single_prefix is None: # single_prefix = ET.Element("br") if single_prefix: hf.write(single_prefix) processor(hf, input_objects[0], 1) return with hf.element("ol"): for el in input_objects: with hf.element("li"): processor(hf, el, len(input_objects)) def _processExample( # noqa: PLR6301 self, hf: lxml.etree.htmlfile, exampleDict: dict, _count: int, ) -> None: from lxml import etree as ET if not exampleDict.get("example"): log.error(f"invalid example: {exampleDict}") return hf.write(exampleDict["example"]) transliteration = exampleDict.get("transliteration") if transliteration: hf.write(ET.Element("br")) with hf.element("font", color="green"): hf.write(f"{transliteration}") translation = exampleDict.get("translation") if translation: hf.write(ET.Element("br")) with hf.element("i"): hf.write(f"{translation}") def _processDef( self, hf: lxml.etree.htmlfile, defDict: dict, count: int, ) -> None: from lxml import etree as ET text = defDict.get("def", "") if text: hf.write(text) examples = defDict.get("examples") if examples: if text: if count == 1: hf.write(ET.Element("br")) hf.write(ET.Element("br")) with hf.element("i"): hf.write("Examples:") self._makeList( hf, examples, self._processExample, skip_single=False, ) def _processNote( # noqa: PLR6301 self, hf: lxml.etree.htmlfile, note: str, _count: int, ) -> None: hf.write(note) def _processEntry( self, hf: lxml.etree.htmlfile, edict: dict, ) -> None: from lxml import etree as ET if self._spellKey and self._spellKey in edict: spelling = edict[self._spellKey] if not isinstance(spelling, str): log.error(f"{spelling=}, {type(spelling)=}, {edict=}") # https://github.com/mhagiwara/cc-kedict/pull/1 spelling = "on" if spelling is True else "" if spelling: with hf.element("font", color="green"): hf.write(spelling) hf.write(ET.Element("br")) if self._posKey and self._posKey in edict: pos = edict[self._posKey] pos = self._posMapping.get(pos, pos) with hf.element("i"): hf.write(pos.capitalize()) hf.write(ET.Element("br")) if self._tagsKey and self._tagsKey in edict: tags = edict[self._tagsKey] for i, tag in enumerate(tags): if i > 0: hf.write(" ") with hf.element("span", style=self.tagStyle): hf.write(tag) hf.write(ET.Element("br")) defs = edict.get("defs") if defs: self._makeList( hf, defs, self._processDef, ) if self._synsKey and self._synsKey in edict: hf.write("Synonyms: ") for i, word in enumerate(edict[self._synsKey]): if i > 0: with hf.element("big"): hf.write(" | ") # NESTED: 5 with hf.element("a", href=f"bword://{word}"): hf.write(word) hf.write(ET.Element("br")) notes = edict.get("notes") if notes: hf.write(ET.Element("br")) hf.write("Notes:") self._makeList( hf, notes, self._processNote, skip_single=False, ) def _createEntry( self, yamlBlock: str, ) -> tuple[str, str, None] | None: from lxml import etree as ET from yaml import load try: from yaml import CLoader as Loader except ImportError: from yaml import Loader edict = load(yamlBlock, Loader=Loader) word = edict.get("word") if not word: log.error(f"no word in {edict}") return None f = BytesIO() with ET.htmlfile(f, encoding="utf-8") as hf: with hf.element("div"): self._processEntry(hf, edict) defi = f.getvalue().decode("utf-8") return word, defi, None def nextBlock(self) -> EntryType: if not self._file: raise StopIteration lines: list[str] = [] while True: line = self.readline() if not line: break line = line.rstrip("\n\r") if not line: continue if line.startswith("- "): line = " " + line[1:] if lines: self._bufferLine = line return self._createEntry("\n".join(lines)) lines.append(line) if lines: return self._createEntry("\n".join(lines)) raise StopIteration class Reader: depends = { "yaml": "PyYAML", "lxml": "lxml", } def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._yaml = YamlReader( glos, spellKey="romaja", posKey="pos", synsKey="syns", tagsKey="tags", ) def __len__(self) -> int: return 0 def open(self, filename: str) -> None: try: from lxml import etree as ET # noqa: F401 except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install lxml` to install") raise if isdir(filename): filename = join(filename, "kedict.yml") self._filename = filename self._glos.sourceLangName = "Korean" self._glos.targetLangName = "English" self._glos.setDefaultDefiFormat("h") self._yaml.open(filename) def close(self) -> None: self._yaml.close() def __iter__(self) -> Iterator[EntryType]: yield from self._yaml pyglossary-5.0.9/pyglossary/plugins/cc_kedict/tools.toml000066400000000000000000000000001476751035500236060ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/check-style000077700000000000000000000000001476751035500245702../../check-styleustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/crawler_dir/000077500000000000000000000000001476751035500221505ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/crawler_dir/__init__.py000066400000000000000000000012701476751035500242610ustar00rootroot00000000000000# mypy: ignore-errors from __future__ import annotations from pyglossary.option import ( Option, StrOption, ) from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "crawler_dir" name = "CrawlerDir" description = "Crawler Directory" extensions = (".crawler",) extensionCreate = ".crawler/" singleFile = True kind = "directory" wiki = "" website = None optionsProp: dict[str, Option] = { "compression": StrOption( values=["", "gz", "bz2", "lzma"], comment="Compression Algorithm", ), } pyglossary-5.0.9/pyglossary/plugins/crawler_dir/reader.py000066400000000000000000000042451476751035500237710ustar00rootroot00000000000000# mypy: ignore-errors from __future__ import annotations from os import listdir from os.path import isdir, isfile, join, splitext from typing import TYPE_CHECKING from pyglossary.compression import ( compressionOpenFunc, ) from pyglossary.core import log from pyglossary.text_utils import ( splitByBarUnescapeNTB, ) if TYPE_CHECKING: from collections.abc import Generator, Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = None self._wordCount = 0 def open(self, filename: str) -> None: from pyglossary.json_utils import jsonToData self._filename = filename with open(join(filename, "info.json"), encoding="utf-8") as infoFp: info = jsonToData(infoFp.read()) self._wordCount = info.pop("wordCount") for key, value in info.items(): self._glos.setInfo(key, value) def close(self) -> None: pass def __len__(self) -> int: return self._wordCount def _fromFile(self, fpath: str) -> EntryType: _, ext = splitext(fpath) c_open = compressionOpenFunc(ext.lstrip(".")) if not c_open: log.error(f"invalid extension {ext}") c_open = open with c_open(fpath, "rt", encoding="utf-8") as _file: words = splitByBarUnescapeNTB(_file.readline().rstrip("\n")) defi = _file.read() return self._glos.newEntry(words, defi) @staticmethod def _listdirSortKey(name: str) -> str: name_nox, ext = splitext(name) if ext == ".d": return name return name_nox def _readDir( self, dpath: str, exclude: set[str] | None, ) -> Generator[EntryType, None, None]: children = listdir(dpath) if exclude: children = [name for name in children if name not in exclude] children.sort(key=self._listdirSortKey) for name in children: cpath = join(dpath, name) if isfile(cpath): yield self._fromFile(cpath) continue if isdir(cpath): yield from self._readDir(cpath, None) continue log.error(f"Not a file nor a directory: {cpath}") def __iter__(self) -> Iterator[EntryType]: yield from self._readDir( self._filename, { "info.json", }, ) pyglossary-5.0.9/pyglossary/plugins/crawler_dir/tools.toml000066400000000000000000000000001476751035500241730ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/crawler_dir/writer.py000066400000000000000000000042421476751035500240400ustar00rootroot00000000000000# mypy: ignore-errors from __future__ import annotations from hashlib import sha1 from os import makedirs from os.path import dirname, isdir, isfile, join from typing import TYPE_CHECKING from pyglossary.compression import ( compressionOpenFunc, ) from pyglossary.core import log from pyglossary.text_utils import ( escapeNTB, ) if TYPE_CHECKING: from pyglossary.glossary_types import WriterGlossaryType __all__ = ["Writer"] class Writer: _compression: str = "" def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = None def finish(self) -> None: pass def open(self, filename: str) -> None: self._filename = filename if not isdir(filename): makedirs(filename) @staticmethod def filePathFromWord(b_word: bytes) -> str: bw = b_word.lower() if len(bw) <= 2: return bw.hex() if len(bw) <= 4: return join( bw[:2].hex() + ".d", bw[2:].hex(), ) return join( bw[:2].hex() + ".d", bw[2:4].hex() + ".d", bw[4:8].hex() + "-" + sha1(b_word).hexdigest()[:8], # noqa: S324 ) def write(self) -> None: from pyglossary.json_utils import dataToPrettyJson filename = self._filename wordCount = 0 compression = self._compression c_open = compressionOpenFunc(compression) if not c_open: raise ValueError(f"invalid compression {compression!r}") while True: entry = yield if entry is None: break if entry.isData(): continue fpath = join(filename, self.filePathFromWord(entry.b_word)) if compression: fpath = f"{fpath}.{compression}" parentDir = dirname(fpath) if not isdir(parentDir): makedirs(parentDir) if isfile(fpath): log.warning(f"file exists: {fpath}") fpath += f"-{sha1(entry.b_defi).hexdigest()[:4]}" # noqa: S324 with c_open(fpath, "wt", encoding="utf-8") as _file: _file.write( f"{escapeNTB(entry.s_word)}\n{entry.defi}", ) wordCount += 1 with open( join(filename, "info.json"), mode="w", encoding="utf-8", ) as infoFile: info = {} info["name"] = self._glos.getInfo("name") info["wordCount"] = wordCount info |= self._glos.getExtraInfos(["name", "wordCount"]) infoFile.write(dataToPrettyJson(info)) pyglossary-5.0.9/pyglossary/plugins/csv_plugin/000077500000000000000000000000001476751035500220245ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/csv_plugin/__init__.py000066400000000000000000000037561476751035500241500ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2013-2019 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import csv from pyglossary.option import ( BoolOption, EncodingOption, NewlineOption, Option, ) from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "csv" name = "Csv" description = "CSV (.csv)" extensions = (".csv",) extensionCreate = ".csv" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/Comma-separated_values" website = None optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "newline": NewlineOption(), "resources": BoolOption( comment="Enable resources / data files", ), "delimiter": Option( typ="str", customValue=True, values=[",", ";", "@"], comment="Column delimiter", ), "add_defi_format": BoolOption( comment="enable adding defiFormat (m/h/x)", ), "enable_info": BoolOption( comment="Enable glossary info / metedata", ), "word_title": BoolOption( comment="add headwords title to beginning of definition", ), } csv.field_size_limit(0x7FFFFFFF) pyglossary-5.0.9/pyglossary/plugins/csv_plugin/reader.py000066400000000000000000000110611476751035500236370ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2013-2019 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import csv import os from os.path import isdir, join from typing import TYPE_CHECKING, cast from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.core import log from pyglossary.io_utils import nullTextIO if TYPE_CHECKING: import io from collections.abc import Iterable, Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = True compressions = stdCompressions _encoding: str = "utf-8" _newline: str = "\n" _delimiter: str = "," def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self.clear() def clear(self) -> None: self._filename = "" self._file: io.TextIOBase = nullTextIO self._fileSize = 0 self._leadingLinesCount = 0 self._wordCount: int | None = None self._pos = -1 self._csvReader: Iterable[list[str]] | None = None self._resDir = "" self._resFileNames: list[str] = [] self._bufferRow: list[str] | None = None def open( self, filename: str, ) -> None: from pyglossary.text_reader import TextFilePosWrapper self._filename = filename cfile = cast( "io.TextIOBase", compressionOpen( filename, mode="rt", encoding=self._encoding, newline=self._newline, ), ) if self._glos.progressbar: if cfile.seekable(): cfile.seek(0, 2) self._fileSize = cfile.tell() cfile.seek(0) # self._glos.setInfo("input_file_size", f"{self._fileSize}") else: log.warning("CSV Reader: file is not seekable") self._file = TextFilePosWrapper(cfile, self._encoding) self._csvReader = csv.reader( self._file, dialect="excel", delimiter=self._delimiter, ) self._resDir = filename + "_res" if isdir(self._resDir): self._resFileNames = os.listdir(self._resDir) else: self._resDir = "" self._resFileNames = [] for row in self._csvReader: if not row: continue if not row[0].startswith("#"): self._bufferRow = row break if len(row) < 2: log.error(f"invalid row: {row}") continue self._glos.setInfo(row[0].lstrip("#"), row[1]) def close(self) -> None: if self._file: try: self._file.close() except Exception: log.exception("error while closing csv file") self.clear() def __len__(self) -> int: from pyglossary.file_utils import fileCountLines if self._wordCount is None: if hasattr(self._file, "compression"): return 0 log.debug("Try not to use len(reader) as it takes extra time") self._wordCount = fileCountLines(self._filename) - self._leadingLinesCount return self._wordCount + len(self._resFileNames) def _iterRows(self) -> Iterator[list[str]]: if self._csvReader is None: raise RuntimeError("self._csvReader is None") if self._bufferRow: yield self._bufferRow yield from self._csvReader def _processRow(self, row: list[str]) -> EntryType | None: if not row: return None word: str | list[str] try: word = row[0] defi = row[1] except IndexError: log.error(f"invalid row: {row!r}") return None try: alts = row[2].split(",") except IndexError: pass else: word = [word] + alts return self._glos.newEntry( word, defi, byteProgress=( (self._file.tell(), self._fileSize) if self._fileSize else None ), ) def __iter__(self) -> Iterator[EntryType | None]: if not self._csvReader: raise RuntimeError("iterating over a reader while it's not open") wordCount = 0 for row in self._iterRows(): wordCount += 1 yield self._processRow(row) self._wordCount = wordCount resDir = self._resDir for fname in self._resFileNames: with open(join(resDir, fname), "rb") as _file: yield self._glos.newDataEntry( fname, _file.read(), ) pyglossary-5.0.9/pyglossary/plugins/csv_plugin/tools.toml000066400000000000000000000004111476751035500240550ustar00rootroot00000000000000["LibreOffice Calc"] web = "https://www.libreoffice.org/discover/calc/" platforms = [ "Linux", "Windows", "Mac",] license = "MPL/GPL" ["Microsoft Excel"] web = "https://www.microsoft.com/en-us/microsoft-365/excel" platforms = [ "Windows",] license = "Proprietary" pyglossary-5.0.9/pyglossary/plugins/csv_plugin/writer.py000066400000000000000000000060011476751035500237070ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2013-2019 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import csv import os from os.path import isdir from typing import TYPE_CHECKING, cast from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.io_utils import nullTextIO if TYPE_CHECKING: import io from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] class Writer: compressions = stdCompressions _encoding: str = "utf-8" _newline: str = "\n" _resources: bool = True _delimiter: str = "," _add_defi_format: bool = False _enable_info: bool = True _word_title: bool = False def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._file: io.TextIOBase = nullTextIO def open(self, filename: str) -> None: self._filename = filename self._file = cast( "io.TextIOBase", compressionOpen( filename, mode="wt", encoding=self._encoding, newline=self._newline, ), ) self._resDir = resDir = filename + "_res" self._csvWriter = csv.writer( self._file, dialect="excel", quoting=csv.QUOTE_ALL, # FIXME delimiter=self._delimiter, ) if not isdir(resDir): os.mkdir(resDir) if self._enable_info: for key, value in self._glos.iterInfo(): self._csvWriter.writerow([f"#{key}", value]) def finish(self) -> None: self._filename = "" self._file.close() self._file = nullTextIO if not os.listdir(self._resDir): os.rmdir(self._resDir) def write(self) -> Generator[None, EntryType, None]: resources = self._resources add_defi_format = self._add_defi_format glos = self._glos resDir = self._resDir writer = self._csvWriter word_title = self._word_title while True: entry = yield if entry is None: break if entry.isData(): if resources: entry.save(resDir) continue words = entry.l_word if not words: continue word, alts = words[0], words[1:] defi = entry.defi if word_title: defi = glos.wordTitleStr(words[0]) + defi row = [ word, defi, ] if add_defi_format: entry.detectDefiFormat() row.append(entry.defiFormat) if alts: row.append(",".join(alts)) writer.writerow(row) pyglossary-5.0.9/pyglossary/plugins/dicformids/000077500000000000000000000000001476751035500217765ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/dicformids/__init__.py000066400000000000000000000015001476751035500241030ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from pyglossary.option import Option from pyglossary.flags import ALWAYS from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] lname = "dicformids" enable = True name = "Dicformids" description = "DictionaryForMIDs" extensions = (".mids",) extensionCreate = ".mids/" singleFile = False sortOnWrite = ALWAYS sortKeyName = "dicformids" sortEncoding = "utf-8" kind = "directory" wiki = "" website = ( "http://dictionarymid.sourceforge.net/", "DictionaryForMIDs - SourceForge", ) optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/dicformids/reader.py000066400000000000000000000035121476751035500236130ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors from __future__ import annotations import operator import os import re from os.path import join from typing import TYPE_CHECKING from pyglossary.plugins.tabfile import Reader as TabfileReader if TYPE_CHECKING: from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = False re_number = re.compile(r"\d+") def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._tabFileNames: list[str] = [] self._tabFileReader = None def open(self, dirname: str) -> None: self._dirname = dirname orderFileNames: list[tuple[int, str]] = [] for fname in os.listdir(dirname): if not fname.startswith("directory"): continue try: num = self.re_number.findall(fname)[-1] except IndexError: pass else: orderFileNames.append((num, fname)) orderFileNames.sort( key=operator.itemgetter(0), reverse=True, ) self._tabFileNames = [x[1] for x in orderFileNames] self.nextTabFile() def __len__(self) -> int: return 0 def __iter__(self) -> Iterator[EntryType]: return self def __next__(self) -> EntryType: for _ in range(10): try: return next(self._tabFileReader) except StopIteration: # noqa: PERF203 self._tabFileReader.close() self.nextTabFile() return None def nextTabFile(self) -> None: try: tabFileName = self._tabFileNames.pop() except IndexError: raise StopIteration from None self._tabFileReader = TabfileReader(self._glos, hasInfo=False) self._tabFileReader.open(join(self._dirname, tabFileName), newline="\n") def close(self) -> None: if self._tabFileReader: try: self._tabFileReader.close() except Exception: pass # noqa: S110 self._tabFileReader = None self._tabFileNames = [] pyglossary-5.0.9/pyglossary/plugins/dicformids/tools.toml000066400000000000000000000005041476751035500240320ustar00rootroot00000000000000[DictionaryForMIDs] web = "http://dictionarymid.sourceforge.net/" # https://sourceforge.net/projects/dictionarymid/ platforms = [ "Android", "Web", "Windows", "Linux", "Mac",] plang = "Java" # PC version is also Java-based license = "GPL" # android last commit: 2015/02/09 # android last release: 2015/02/09 - version 1.0.1 pyglossary-5.0.9/pyglossary/plugins/dicformids/writer.py000066400000000000000000000123431476751035500236670ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors from __future__ import annotations import os import re from os.path import join from typing import TYPE_CHECKING from pyglossary.core import log if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] PROP_TEMPLATE = """#DictionaryForMIDs property file infoText={name}, author: {author} indexFileMaxSize={indexFileMaxSize}\n language1IndexNumberOfSourceEntries={wordCount} language1DictionaryUpdateClassName=de.kugihan.dictionaryformids.dictgen.DictionaryUpdate indexCharEncoding=ISO-8859-1 dictionaryFileSeparationCharacter='\\t' language2NormationClassName=de.kugihan.dictionaryformids.translation.Normation language2DictionaryUpdateClassName=de.kugihan.dictionaryformids.dictgen.DictionaryUpdate logLevel=0 language1FilePostfix={directoryPostfix} dictionaryCharEncoding=UTF-8 numberOfAvailableLanguages=2 language1IsSearchable=true language2GenerateIndex=false dictionaryFileMaxSize={dicMaxSize} language2FilePostfix={language2FilePostfix} searchListFileMaxSize=20000 language2IsSearchable=false fileEncodingFormat=plain_format1 language1HasSeparateDictionaryFile=true searchListCharEncoding=ISO-8859-1 searchListFileSeparationCharacter='\t' indexFileSeparationCharacter='\t' language1DisplayText={sourceLang} language2HasSeparateDictionaryFile=false dictionaryGenerationInputCharEncoding=UTF-8 language1GenerateIndex=true language2DisplayText={targetLang} language1NormationClassName=de.kugihan.dictionaryformids.translation.NormationEng """ class Writer: def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self.linesPerDirectoryFile = 500 # 200 self.indexFileMaxSize = 32722 # 30000 self.directoryPostfix = "" self.indexPostfix = "" self._dirname = "" # looks like we need to remove tabs, because app gives error # but based on the java code, all punctuations should be removed # as well, including '|' self.re_punc = re.compile( r"""[!"$§%&/()=?´`\\{}\[\]^°+*~#'\-_.:,;<>@|]*""", # noqa: RUF001 ) self.re_spaces = re.compile(" +") self.re_tabs = re.compile("\t+") def normateWord(self, word: str) -> str: word = word.strip() word = self.re_punc.sub("", word) word = self.re_spaces.sub(" ", word) word = self.re_tabs.sub(" ", word) word = word.lower() return word # noqa: RET504 def writeProbs(self) -> None: glos = self._glos probsPath = join( self._dirname, "DictionaryForMIDs.properties", ) with open(probsPath, mode="w", newline="\n", encoding="utf-8") as fileObj: fileObj.write( PROP_TEMPLATE.format( name=glos.getInfo("name"), author=glos.author, indexFileMaxSize=self.indexFileMaxSize, wordCount=self.wordCount, directoryPostfix=self.directoryPostfix, dicMaxSize=self.dicMaxSize + 1, language2FilePostfix="fa", # FIXME sourceLang=glos.sourceLangName, targetLang=glos.targetLangName, ), ) def nextIndex(self) -> None: try: self.indexFp.close() except AttributeError: self.indexIndex = 0 self.indexIndex += 1 fname = f"index{self.indexPostfix}{self.indexIndex}.csv" fpath = join(self._dirname, fname) self.indexFp = open(fpath, mode="w", encoding="utf-8", newline="\n") def finish(self) -> None: pass def open(self, dirname: str) -> None: self._dirname = dirname if not os.path.isdir(dirname): os.mkdir(dirname) def write(self) -> Generator[None, EntryType, None]: self.nextIndex() dicMaxSize = 0 indexData: list[tuple[str, int, int]] = [] def writeBucket(dicIndex: int, entryList: list[EntryType]) -> None: nonlocal dicMaxSize log.debug( f"{dicIndex=}, {len(entryList)=}, {dicMaxSize=}", ) dicFp = open( join( self._dirname, f"directory{self.directoryPostfix}{dicIndex + 1}.csv", ), mode="w", encoding="utf-8", newline="\n", ) for entry in entryList: word = entry.s_word n_word = self.normateWord(word) defi = entry.defi dicLine = word + "\t" + defi + "\n" dicPos = dicFp.tell() dicFp.write(dicLine) indexData.append((n_word, dicIndex + 1, dicPos)) dicMaxSize = max(dicMaxSize, dicFp.tell()) dicFp.close() bucketSize = self.linesPerDirectoryFile wordCount = 0 dicIndex = 0 entryList: list[EntryType] = [] # aka bucket while True: entry = yield if entry is None: break if entry.isData(): # FIXME continue wordCount += 1 entryList.append(entry) if len(entryList) >= bucketSize: writeBucket(dicIndex, entryList) dicIndex += 1 entryList = [] if entryList: writeBucket(dicIndex, entryList) entryList = [] self.dicMaxSize = dicMaxSize self.wordCount = wordCount langSearchListFp = open( join( self._dirname, f"searchlist{self.directoryPostfix}.csv", ), mode="w", newline="\n", encoding="utf-8", ) langSearchListFp.write(f"{indexData[0][0]}\t{self.indexIndex}\n") for word, dicIndex, dicPos in indexData: indexLine = f"{word}\t{dicIndex}-{dicPos}-B\n" if (self.indexFp.tell() + len(indexLine)) > self.indexFileMaxSize - 10: self.nextIndex() langSearchListFp.write(f"{word}\t{self.indexIndex}\n") self.indexFp.write(indexLine) self.indexFp.close() langSearchListFp.close() self.writeProbs() pyglossary-5.0.9/pyglossary/plugins/dict_cc/000077500000000000000000000000001476751035500212435ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/dict_cc/__init__.py000066400000000000000000000012771476751035500233630ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from pyglossary.option import Option from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "dict_cc" name = "Dictcc" description = "Dict.cc (SQLite3)" extensions = () extensionCreate = ".db" singleFile = True kind = "binary" wiki = "https://en.wikipedia.org/wiki/Dict.cc" website = ( "https://play.google.com/store/apps/details?id=cc.dict.dictcc", "dict.cc dictionary - Google Play", ) optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/dict_cc/reader.py000066400000000000000000000116531476751035500230650ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import html from operator import itemgetter from typing import TYPE_CHECKING, cast if TYPE_CHECKING: import sqlite3 from collections.abc import Callable, Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.lxml_types import Element, T_htmlfile from pyglossary.core import log __all__ = ["Reader"] class Reader: useByteProgress = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._clear() def _clear(self) -> None: self._filename = "" self._con: sqlite3.Connection | None = None self._cur: sqlite3.Cursor | None = None def open(self, filename: str) -> None: from sqlite3 import connect self._filename = filename self._con = connect(filename) self._cur = self._con.cursor() self._glos.setDefaultDefiFormat("h") def __len__(self) -> int: if self._cur is None: raise ValueError("cur is None") self._cur.execute( "select count(distinct term1)+count(distinct term2) from main_ft", ) return self._cur.fetchone()[0] @staticmethod def makeList( hf: T_htmlfile, input_elements: list[Element], processor: Callable, single_prefix: str = "", skip_single: bool = True, ) -> None: """Wrap elements into
        if more than one element.""" if not input_elements: return if skip_single and len(input_elements) == 1: hf.write(single_prefix) processor(hf, input_elements[0]) return with hf.element("ol"): for el in input_elements: with hf.element("li"): processor(hf, el) @staticmethod def makeGroupsList( hf: T_htmlfile, groups: list[tuple[str, str]], processor: Callable[[T_htmlfile, tuple[str, str]], None], single_prefix: str = "", skip_single: bool = True, ) -> None: """Wrap elements into
          if more than one element.""" if not groups: return if skip_single and len(groups) == 1: hf.write(single_prefix) processor(hf, groups[0]) return with hf.element("ol"): for el in groups: with hf.element("li"): processor(hf, el) def writeSense( # noqa: PLR6301 self, hf: T_htmlfile, row: tuple[str, str], ) -> None: from lxml import etree as ET trans, entry_type = row if entry_type: with hf.element("i"): hf.write(f"{entry_type}") hf.write(ET.Element("br")) try: hf.write(trans + " ") except Exception as e: log.error(f"error in writing {trans!r}, {e}") hf.write(repr(trans) + " ") else: with hf.element("big"): with hf.element("a", href=f"bword://{trans}"): hf.write("⏎") def iterRows( self, column1: str, column2: str, ) -> Iterator[tuple[str, str, str]]: if self._cur is None: raise ValueError("cur is None") self._cur.execute( f"select {column1}, {column2}, entry_type from main_ft order by {column1}", ) for row in self._cur.fetchall(): term1 = row[0] term2 = row[1] try: term1 = html.unescape(term1) except Exception as e: log.error(f"html.unescape({term1!r}) -> {e}") try: term2 = html.unescape(term2) except Exception as e: log.error(f"html.unescape({term2!r}) -> {e}") yield term1, term2, row[2] def parseGender(self, headword: str) -> tuple[str | None, str]: # noqa: PLR6301 # {m} masc masculine German: maskulin # {f} fem feminine German: feminin # {n} neut neutral German: neutral # { } ???? i = headword.find(" {") if i <= 0: return None, headword if len(headword) < i + 4: return None, headword if headword[i + 3] != "}": return None, headword g = headword[i + 2] gender = None if g == "m": gender = "masculine" elif g == "f": gender = "feminine" elif g == "n": gender = "neutral" else: log.warning(f"invalid gender {g!r}") return None, headword headword = headword[:i] + headword[i + 4 :] return gender, headword def _iterOneDirection( self, column1: str, column2: str, ) -> Iterator[EntryType]: from io import BytesIO from itertools import groupby from lxml import etree as ET glos = self._glos for headwordEscaped, groupsOrig in groupby( self.iterRows(column1, column2), key=itemgetter(0), ): headword = html.unescape(headwordEscaped) groups: list[tuple[str, str]] = [ (term2, entry_type) for _, term2, entry_type in groupsOrig ] f = BytesIO() gender, headword = self.parseGender(headword) with ET.htmlfile(f, encoding="utf-8") as hf: with hf.element("div"): if gender: with hf.element("i"): hf.write(gender) hf.write(ET.Element("br")) self.makeGroupsList( cast("T_htmlfile", hf), groups, self.writeSense, ) defi = f.getvalue().decode("utf-8") yield glos.newEntry(headword, defi, defiFormat="h") def __iter__(self) -> Iterator[EntryType]: yield from self._iterOneDirection("term1", "term2") yield from self._iterOneDirection("term2", "term1") def close(self) -> None: if self._cur: self._cur.close() if self._con: self._con.close() self._clear() pyglossary-5.0.9/pyglossary/plugins/dict_cc/tools.toml000066400000000000000000000002161476751035500232770ustar00rootroot00000000000000["dict.cc dictionary"] web = "https://play.google.com/store/apps/details?id=cc.dict.dictcc" platforms = [ "Android",] license = "Proprietary" pyglossary-5.0.9/pyglossary/plugins/dict_cc_split/000077500000000000000000000000001476751035500224565ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/dict_cc_split/__init__.py000066400000000000000000000013221476751035500245650ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from pyglossary.option import Option from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "dict_cc_split" name = "Dictcc_split" description = "Dict.cc (SQLite3) - Split" extensions = () extensionCreate = ".db" singleFile = True kind = "binary" wiki = "https://en.wikipedia.org/wiki/Dict.cc" website = ( "https://play.google.com/store/apps/details?id=cc.dict.dictcc", "dict.cc dictionary - Google Play", ) optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/dict_cc_split/reader.py000066400000000000000000000040671476751035500243010ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import html from typing import TYPE_CHECKING if TYPE_CHECKING: import sqlite3 from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.core import log __all__ = ["Reader"] class Reader: useByteProgress = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._clear() def _clear(self) -> None: self._filename = "" self._con: sqlite3.Connection | None = None self._cur: sqlite3.Cursor | None = None def open(self, filename: str) -> None: from sqlite3 import connect self._filename = filename self._con = connect(filename) self._cur = self._con.cursor() self._glos.setDefaultDefiFormat("m") def __len__(self) -> int: if self._cur is None: raise ValueError("cur is None") self._cur.execute("select count(*) * 2 from main_ft") return self._cur.fetchone()[0] def iterRows( self, column1: str, column2: str, ) -> Iterator[tuple[str, str, str]]: if self._cur is None: raise ValueError("cur is None") self._cur.execute( f"select {column1}, {column2}, entry_type from main_ft order by {column1}", ) for row in self._cur.fetchall(): term1 = row[0] term2 = row[1] try: term1 = html.unescape(term1) except Exception as e: log.error(f"html.unescape({term1!r}) -> {e}") try: term2 = html.unescape(term2) except Exception as e: log.error(f"html.unescape({term2!r}) -> {e}") yield term1, term2, row[2] def _iterOneDirection( self, column1: str, column2: str, ) -> Iterator[EntryType]: for word, defi, entry_type in self.iterRows(column1, column2): if entry_type: word = f"{word} {{{entry_type}}}" # noqa: PLW2901 yield self._glos.newEntry(word, defi, defiFormat="m") def __iter__(self) -> Iterator[EntryType]: yield from self._iterOneDirection("term1", "term2") yield from self._iterOneDirection("term2", "term1") def close(self) -> None: if self._cur: self._cur.close() if self._con: self._con.close() self._clear() pyglossary-5.0.9/pyglossary/plugins/dict_cc_split/tools.toml000066400000000000000000000002161476751035500245120ustar00rootroot00000000000000["dict.cc dictionary"] web = "https://play.google.com/store/apps/details?id=cc.dict.dictcc" platforms = [ "Android",] license = "Proprietary" pyglossary-5.0.9/pyglossary/plugins/dict_org/000077500000000000000000000000001476751035500214455ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/dict_org/__init__.py000066400000000000000000000016101476751035500235540ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.flags import DEFAULT_NO from pyglossary.option import BoolOption, Option from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "dict_org" name = "DictOrg" description = "DICT.org file format (.index)" extensions = (".index",) extensionCreate = "" singleFile = False optionsProp: dict[str, Option] = { "dictzip": BoolOption(comment="Compress .dict file to .dict.dz"), "install": BoolOption(comment="Install dictionary to /usr/share/dictd/"), } sortOnWrite = DEFAULT_NO kind = "directory" wiki = "https://en.wikipedia.org/wiki/DICT#DICT_file_format" website = ( "http://dict.org/bin/Dict", "The DICT Development Group", ) pyglossary-5.0.9/pyglossary/plugins/dict_org/reader.py000066400000000000000000000040401476751035500232570ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import re from typing import TYPE_CHECKING from pyglossary.core import log from pyglossary.plugin_lib.dictdlib import DictDB if TYPE_CHECKING: from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = "" self._dictdb: DictDB | None = None # regular expression patterns used to prettify definition text self._re_newline_in_braces = re.compile( r"\{(?P.*?)\n(?P.*?)?\}", ) self._re_words_in_braces = re.compile( r"\{(?P.+?)\}", ) def open(self, filename: str) -> None: filename = filename.removesuffix(".index") self._filename = filename self._dictdb = DictDB(filename, "read", 1) def close(self) -> None: if self._dictdb is not None: self._dictdb.close() # self._dictdb.finish() self._dictdb = None def prettifyDefinitionText(self, defi: str) -> str: # Handle words in {} # First, we remove any \n in {} pairs defi = self._re_newline_in_braces.sub(r"{\g\g}", defi) # Then, replace any {words} into words, # so it can be rendered as link correctly defi = self._re_words_in_braces.sub( r'\g', defi, ) # Use
          so it can be rendered as newline correctly return defi.replace("\n", "
          ") def __len__(self) -> int: if self._dictdb is None: return 0 return len(self._dictdb) def __iter__(self) -> Iterator[EntryType]: if self._dictdb is None: raise RuntimeError("iterating over a reader while it's not open") dictdb = self._dictdb for word in dictdb.getDefList(): b_defi = b"\n\n
          \n\n".join(dictdb.getDef(word)) try: defi = b_defi.decode("utf_8", "ignore") defi = self.prettifyDefinitionText(defi) except Exception as e: log.error(f"{b_defi = }") raise e yield self._glos.newEntry(word, defi) pyglossary-5.0.9/pyglossary/plugins/dict_org/tools.toml000066400000000000000000000010201476751035500234730ustar00rootroot00000000000000[Dictd] web = "https://directory.fsf.org/wiki/Dictd" platforms = [ "Linux",] license = "GPL" ["GNOME Dictionary"] web = "https://wiki.gnome.org/Apps/Dictionary" platforms = [ "Linux",] license = "GPL" ["Xfce4 Dictionary"] web = "https://docs.xfce.org/apps/xfce4-dict/start" platforms = [ "linux",] license = "GPL" [Ding] desc = "Graphical dictionary lookup program for Unix (Tk)" web = "https://www-user.tu-chemnitz.de/~fri/ding/" platforms = [ "linux",] license = "GPL" copyright = "Copyright (c) 1999 - 2016 Frank Richter" pyglossary-5.0.9/pyglossary/plugins/dict_org/writer.py000066400000000000000000000047621476751035500233440ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from os.path import splitext from typing import TYPE_CHECKING from pyglossary.core import log from pyglossary.plugin_lib.dictdlib import DictDB if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] def _installToDictd(filename: str, dictzip: bool) -> None: """Filename is without extension (neither .index or .dict or .dict.dz).""" import shutil import subprocess from os.path import isdir, isfile targetDir = "/usr/share/dictd/" if filename.startswith(targetDir): return if not isdir(targetDir): log.warning(f"Directory {targetDir!r} does not exist, skipping install") return log.info(f"Installing {filename!r} to DICTD server directory: {targetDir}") if dictzip and isfile(filename + ".dict.dz"): dictExt = ".dict.dz" elif isfile(filename + ".dict"): dictExt = ".dict" else: log.error(f"No .dict file, could not install dictd file {filename!r}") return if not filename.startswith(targetDir): shutil.copy(filename + ".index", targetDir) shutil.copy(filename + dictExt, targetDir) # update /var/lib/dictd/db.list if subprocess.call(["/usr/sbin/dictdconfig", "-w"]) != 0: log.error( "failed to update /var/lib/dictd/db.list file" ", try manually running: sudo /usr/sbin/dictdconfig -w", ) log.info("don't forget to restart dictd server") class Writer: _dictzip: bool = False _install: bool = True def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" self._dictdb: DictDB | None = None def finish(self) -> None: from pyglossary.os_utils import runDictzip if self._dictdb is None: raise RuntimeError("self._dictdb is None") self._dictdb.finish(dosort=True) if self._dictzip: runDictzip(f"{self._filename}.dict") if self._install: _installToDictd( self._filename, self._dictzip, ) self._filename = "" def open(self, filename: str) -> None: filename_nox, ext = splitext(filename) if ext.lower() == ".index": filename = filename_nox self._dictdb = DictDB(filename, "write", 1) self._filename = filename def write(self) -> Generator[None, EntryType, None]: dictdb = self._dictdb if dictdb is None: raise RuntimeError("self._dictdb is None") while True: entry = yield if entry is None: break if entry.isData(): # does dictd support resources? and how? FIXME continue dictdb.addEntry(entry.defi, entry.l_word) pyglossary-5.0.9/pyglossary/plugins/dict_org_source/000077500000000000000000000000001476751035500230255ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/dict_org_source/__init__.py000066400000000000000000000013071476751035500251370ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import BoolOption, Option from .writer import Writer __all__ = [ "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "dict_org_source" name = "DictOrgSource" description = "DICT.org dictfmt source file" extensions = (".dtxt",) extensionCreate = ".dtxt" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/DICT" website = ( "https://github.com/cheusov/dictd", "@cheusov/dictd", ) optionsProp: dict[str, Option] = { "remove_html_all": BoolOption(comment="Remove all HTML tags"), } pyglossary-5.0.9/pyglossary/plugins/dict_org_source/tools.toml000066400000000000000000000001361476751035500250620ustar00rootroot00000000000000[dictfmt] web = "https://linux.die.net/man/1/dictfmt" platforms = [ "Linux",] license = "GPL" pyglossary-5.0.9/pyglossary/plugins/dict_org_source/writer.py000066400000000000000000000017701476751035500247200ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] class Writer: _remove_html_all: bool = True def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" def finish(self) -> None: self._filename = "" def open(self, filename: str) -> None: self._filename = filename if self._remove_html_all: self._glos.removeHtmlTagsAll() # TODO: add another bool flag to only remove html tags that are not # supported by GtkTextView @staticmethod def _defiEscapeFunc(defi: str) -> str: return defi.replace("\r", "") def write(self) -> Generator[None, EntryType, None]: from pyglossary.text_writer import writeTxt yield from writeTxt( self._glos, entryFmt=":{word}:{defi}\n", filename=self._filename, defiEscapeFunc=self._defiEscapeFunc, ext=".dtxt", ) pyglossary-5.0.9/pyglossary/plugins/dictunformat/000077500000000000000000000000001476751035500223525ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/dictunformat/__init__.py000066400000000000000000000014701476751035500244650ustar00rootroot00000000000000from __future__ import annotations from pyglossary.option import EncodingOption, Option, StrOption from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "dictunformat" name = "Dictunformat" description = "dictunformat output file" extensions = (".dictunformat",) extensionCreate = ".dictunformat" singleFile = True kind = "text" wiki = "https://directory.fsf.org/wiki/Dictd" website = ( "https://github.com/cheusov/dictd/blob/master/dictunformat.1.in", "dictd/dictunformat.1.in - @cheusov/dictd", ) optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "headword_separator": StrOption( comment="separator for headword and alternates", ), } pyglossary-5.0.9/pyglossary/plugins/dictunformat/reader.py000066400000000000000000000040071476751035500241670ustar00rootroot00000000000000from __future__ import annotations from pyglossary.core import log from pyglossary.text_reader import TextGlossaryReader def unescapeDefi(defi: str) -> str: return defi __all__ = ["Reader"] class Reader(TextGlossaryReader): useByteProgress = True _headword_separator = "; " # https://github.com/cheusov/dictd/blob/master/dictfmt/dictunformat.in#L14 @classmethod def isInfoWord(cls, word: str) -> bool: return word.startswith("00-database-") @classmethod def fixInfoWord(cls, word: str) -> str: return word def setInfo(self, word: str, defi: str) -> None: if word == "00-database-short": self._glos.setInfo("name", defi) return if word != "00-database-info": return glos = self._glos lastKey = "" for line in defi.split("\n"): if not line.startswith("##:"): if lastKey: glos.setInfo(word, f"{glos.getInfo(lastKey)}\n{line}") continue parts = line[3:].split(":") if len(parts) < 2: log.error(f"unexpected line: {line}") key = lastKey = parts[0] value = ":".join(parts[1:]) glos.setInfo(key, value) def nextBlock(self) -> tuple[str | list[str], str, None] | None: if not self._file: raise StopIteration word = "" defiLines: list[str] = [] while True: line = self.readline() if not line: break line = line.rstrip("\n\r") if not line: continue if not line.strip("_"): if not word: continue if not defiLines: log.warning(f"no definition/value for {word!r}") defi = unescapeDefi("\n".join(defiLines)) words = word.split(self._headword_separator) return words, defi, None if not word: word = line continue if line == word: continue if line.lower() == word: word = line continue defiLines.append(line) if word: defi = unescapeDefi("\n".join(defiLines)) if word.startswith("00-database-") and defi == "unknown": log.info(f"ignoring {word} -> {defi}") return None words = word.split(self._headword_separator) return words, defi, None raise StopIteration pyglossary-5.0.9/pyglossary/plugins/dictunformat/tools.toml000066400000000000000000000001501476751035500244030ustar00rootroot00000000000000[dictunformat] web = "https://linux.die.net/man/1/dictunformat" platforms = [ "Linux",] license = "GPL" pyglossary-5.0.9/pyglossary/plugins/digitalnk/000077500000000000000000000000001476751035500216215ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/digitalnk/__init__.py000066400000000000000000000012021476751035500237250ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from pyglossary.option import Option from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "digitalnk" name = "DigitalNK" description = "DigitalNK (SQLite3, N-Korean)" extensions = () extensionCreate = ".db" singleFile = True kind = "binary" wiki = "" website = ( "https://github.com/digitalprk/dicrs", "@digitalprk/dicrs", ) optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/digitalnk/reader.py000066400000000000000000000030361476751035500234370ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import html from typing import TYPE_CHECKING if TYPE_CHECKING: import sqlite3 from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._clear() def _clear(self) -> None: self._filename = "" self._con: sqlite3.Connection | None = None self._cur: sqlite3.Cursor | None = None def open(self, filename: str) -> None: from sqlite3 import connect self._filename = filename self._con = connect(filename) self._cur = self._con.cursor() self._glos.setDefaultDefiFormat("m") def __len__(self) -> int: if self._cur is None: raise ValueError("cur is None") self._cur.execute("select count(*) from dictionary") return self._cur.fetchone()[0] def __iter__(self) -> Iterator[EntryType]: if self._cur is None: raise ValueError("cur is None") self._cur.execute( "select word, definition from dictionary order by word", ) # iteration over self._cur stops after one entry # and self._cur.fetchone() returns None # no idea why! # https://github.com/ilius/pyglossary/issues/282 # for row in self._cur: for row in self._cur.fetchall(): word = html.unescape(row[0]) definition = row[1] yield self._glos.newEntry(word, definition, defiFormat="m") def close(self) -> None: if self._cur: self._cur.close() if self._con: self._con.close() self._clear() pyglossary-5.0.9/pyglossary/plugins/digitalnk/tools.toml000066400000000000000000000001501476751035500236520ustar00rootroot00000000000000["Dic.rs"] web = "https://github.com/digitalprk/dicrs" platforms = [ "Linux",] license = "BSD-2-Clause" pyglossary-5.0.9/pyglossary/plugins/dikt_json/000077500000000000000000000000001476751035500216375ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/dikt_json/__init__.py000066400000000000000000000016271476751035500237560ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # from https://github.com/maxim-saplin/pyglossary from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, Option, ) from .writer import Writer __all__ = [ "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "dikt_json" name = "DiktJson" description = "DIKT JSON (.json)" extensions = () extensionCreate = ".json" singleFile = True kind = "text" wiki = "" website = "https://github.com/maxim-saplin/dikt" optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "enable_info": BoolOption(comment="Enable glossary info / metedata"), "resources": BoolOption(comment="Enable resources / data files"), "word_title": BoolOption( comment="add headwords title to beginning of definition", ), } pyglossary-5.0.9/pyglossary/plugins/dikt_json/writer.py000066400000000000000000000036601476751035500235320ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # from https://github.com/maxim-saplin/pyglossary from __future__ import annotations import re from typing import TYPE_CHECKING from pyglossary.compression import ( # compressionOpen, stdCompressions, ) if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] class Writer: _encoding: str = "utf-8" _enable_info: bool = True _resources: bool = True _word_title: bool = False compressions = stdCompressions def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = None glos.preventDuplicateWords() def open(self, filename: str) -> None: self._filename = filename def finish(self) -> None: self._filename = None def write(self) -> Generator[None, EntryType, None]: from json import dumps from pyglossary.text_writer import writeTxt glos = self._glos encoding = self._encoding enable_info = self._enable_info resources = self._resources ensure_ascii = encoding == "ascii" def escape(st: str) -> str: # remove styling from HTML tags st2 = re.sub(r' style="[^"]*"', "", st) st2 = re.sub(r' class="[^"]*"', "", st2) st2 = re.sub(r"]*>", "", st2) st2 = st2.replace("", "") st2 = re.sub(r"\n", "", st2) st2 = st2.replace("
          ", "") st2 = st2.replace("", "") # fix russian dictionary issues, # such as hyphenation in word (e.g. абб{[']}а{[/']}т) st2 = re.sub(r"\{\['\]\}", "", st2) st2 = re.sub(r"\{\[/'\]\}", "", st2) return dumps(st2, ensure_ascii=ensure_ascii) yield from writeTxt( glos, entryFmt="\t{word}: {defi},\n", filename=self._filename, encoding=encoding, writeInfo=enable_info, wordEscapeFunc=escape, defiEscapeFunc=escape, ext=".json", head="{\n", tail='\t"": ""\n}', resources=resources, word_title=self._word_title, ) pyglossary-5.0.9/pyglossary/plugins/dsl/000077500000000000000000000000001476751035500204355ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/dsl/__init__.py000066400000000000000000000027771476751035500225630ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, Option, StrOption, ) from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "dsl" name = "ABBYYLingvoDSL" description = "ABBYY Lingvo DSL (.dsl)" extensions = (".dsl",) extensionCreate = ".dsl" singleFile = True kind = "text" wiki = "https://ru.wikipedia.org/wiki/ABBYY_Lingvo" website = ( "https://www.lingvo.ru/", "www.lingvo.ru", ) optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "audio": BoolOption( comment="Enable audio objects", ), "example_color": StrOption( comment="Examples color", ), "abbrev": StrOption( customValue=False, values=["", "hover"], comment="Load and apply abbreviation file (`_abrv.dsl`)", ), } # ABBYY is a Russian company # https://ru.wikipedia.org/wiki/ABBYY_Lingvo # http://lingvo.helpmax.net/en/troubleshooting/dsl-compiler/compiling-a-dictionary/ # https://www.abbyy.com/news/abbyy-lingvo-80-dictionaries-to-suit-every-taste/ # {{{ # modified to work around codepoints that are not supported by `unichr`. # http://effbot.org/zone/re-sub.htm#unescape-html # January 15, 2003 | Fredrik Lundh # Removes HTML or XML character references and entities from a text string. # # @param text The HTML (or XML) source text. # @return The plain text, as a Unicode string, if necessary. pyglossary-5.0.9/pyglossary/plugins/dsl/_types.py000066400000000000000000000024671476751035500223230ustar00rootroot00000000000000from __future__ import annotations import typing from collections.abc import Callable from typing import TYPE_CHECKING __all__ = [ "ErrorType", "LexType", "TitleLexType", "TitleTransformerType", "TransformerType", ] class TransformerType(typing.Protocol): start: int pos: int input: str output: str currentKey: str attrs: dict[str, str | None] attrName: str audio: bool resFileSet: set[str] exampleColor: str def __init__(self) -> None: pass def end(self) -> bool: ... def move(self, chars: int) -> None: ... def next(self) -> str: ... def follows(self, st: str) -> bool: ... def skipAny(self, chars: str) -> None: ... def addText(self, st: str) -> None: ... def resetBuf(self) -> None: ... def addHtml(self, st: str) -> None: ... def closeTag(self, tag: str) -> None: ... @property def labelOpen(self) -> bool: ... @labelOpen.setter def labelOpen(self) -> bool: ... class TitleTransformerType(TransformerType, typing.Protocol): title: str outputAlt: str def addText2(self, st: str) -> None: ... if TYPE_CHECKING: ErrorType = str | None # it is an State Function (state as in state machine) LexType = Callable[[TransformerType], tuple["LexType", ErrorType]] | None TitleLexType = ( Callable[ [TitleTransformerType], tuple["TitleLexType", ErrorType], ] | None ) pyglossary-5.0.9/pyglossary/plugins/dsl/lex.py000066400000000000000000000163021476751035500216010ustar00rootroot00000000000000from __future__ import annotations from os.path import splitext from typing import TYPE_CHECKING from xml.sax.saxutils import escape, quoteattr from pyglossary.core import log if TYPE_CHECKING: from ._types import ErrorType, LexType, TransformerType __all__ = ["lexRoot"] # rename to lexText? def lexRoot(tr: TransformerType) -> tuple[LexType, ErrorType]: if tr.start < tr.pos: log.warning(f"incomplete buffer near pos {tr.pos}") if tr.end(): # if tr.openParenth > 0: # return None, "unexpected: unclosed '('" return None, None c = tr.next() if tr.end(): tr.addText(c) return None, None if c == "\\": return lexBackslash, None if c == "[": tr.resetBuf() return lexTag, None if c == "]": tr.next() if tr.follows("["): tr.next() tr.output += c tr.resetBuf() return lexRoot, None if c == "~": tr.addText(tr.currentKey) tr.resetBuf() return lexRoot, None if c == "\n": return lexRootNewline, None if c == "<" and tr.follows("<"): tr.next() return lexRefText, None tr.addText(c) tr.resetBuf() return lexRoot, None def lexRootNewline(tr: TransformerType) -> tuple[LexType, ErrorType]: tr.skipAny(" \t") if not tr.follows("[m"): tr.addHtml("
          ") tr.resetBuf() return lexRoot, None def lexBackslash(tr: TransformerType) -> tuple[LexType, ErrorType]: c = tr.next() if c == " ": tr.addHtml(" ") elif c in "<>" and tr.follows(c): tr.next() tr.addText(2 * c) else: tr.addText(c) tr.resetBuf() return lexRoot, None def lexTag(tr: TransformerType) -> tuple[LexType, ErrorType]: if tr.end(): return None, f"'[' not closed near pos {tr.pos} in lexTag" c = tr.next() if c == "[": tr.output += c tr.resetBuf() return lexRoot, None if c in " \t": tr.skipAny(" \t") return lexTagAttr, None if c == "]": tag = tr.input[tr.start : tr.pos - 1] if not tag: return None, f"empty tag near pos {tr.pos}" return processTag(tr, tag) # if c == '\\': # return lexTagBackslash, None # do not advance tr.start return lexTag, None def lexTagAttr(tr: TransformerType) -> tuple[LexType, ErrorType]: if tr.end(): tr.attrs[tr.attrName] = None tr.resetBuf() return lexRoot, None c = tr.next() if c == "]": tr.attrs[tr.attrName] = None tr.move(-1) return lexTag, None if c == "=": tr.skipAny(" \t") return lexTagAttrValue, None tr.attrName += c return lexTagAttr, None def lexTagAttrValue(tr: TransformerType) -> tuple[LexType, ErrorType]: if tr.end(): return None, f"'[' not closed near pos {tr.pos} in lexTagAttrValue(1)" c = tr.next() quote = "" value = "" if c in "'\"": if tr.end(): return None, f"'[' not closed near pos {tr.pos} in lexTagAttrValue(2)" quote = c else: value += c while True: if tr.end(): return None, f"'[' not closed near pos {tr.pos} in lexTagAttrValue(3)" c = tr.next() if c == "\\": if tr.end(): return None, f"'[' not closed near pos {tr.pos} in lexTagAttrValue(3)" c = tr.next() value += c continue if c == "]": tr.move(-1) break if c == quote: break if not quote and c in " \t": break value += c tr.attrs[tr.attrName] = value return lexTag, None r""" [m{}] =>

          [*] => [ex] => [c] => [p] => ['] => [b] => [i] => [u] => [sup] => [sub] => [ref] \ [url] } => {} <<...>> / [s] => [s] => {} [t] => {{...}} \ [trn] | [!trn] | [trs] } => remove [!trs] | [lang ...] | [com] / """ def lexRefText(tr: TransformerType) -> tuple[LexType, ErrorType]: if tr.end(): return None, None text = "" while not tr.end(): c = tr.next() if c == "\\": if tr.end(): break text += tr.next() continue if c == "[": tr.move(-1) break if c == ">" and tr.follows(">"): tr.next() break text += c target = tr.attrs.get("target") if not target: target = text tr.addHtml(f"{escape(text)}") tr.resetBuf() return lexRoot, None def lexUrlText(tr: TransformerType) -> tuple[LexType, ErrorType]: if tr.end(): return None, None text = "" while not tr.end(): c = tr.next() if c == "\\": if tr.end(): break text += tr.next() continue if c == "[": tr.move(-1) break text += c target = tr.attrs.get("target") if not target: target = text if "://" not in target: target = "http://" + target tr.addHtml(f"{escape(text)}") tr.resetBuf() return lexRoot, None def lexTagS(tr: TransformerType) -> tuple[LexType, ErrorType]: if tr.end(): return None, None fname = "" while not tr.end(): c = tr.next() if c == "[": tr.move(-1) break fname += c _, ext = splitext(fname) ext = ext.lstrip(".") if ext in {"wav", "mp3"}: if tr.audio: tr.addHtml( rf'' '' "" ) elif ext in {"jpg", "jpeg", "gif", "tif", "tiff", "png", "bmp"}: tr.addHtml(rf'{fname}') else: log.warning(f"unknown file extension in {fname!r}") tr.resFileSet.add(fname) tr.resetBuf() return lexRoot, None def processTagM(tr: TransformerType, tag: str) -> tuple[LexType, ErrorType]: padding = "0.3" if len(tag) > 1: padding = tag[1:] if padding == "0": padding = "0.3" tr.addHtml(f'

          ') tr.resetBuf() return lexRoot, None def lexTagC(tr: TransformerType) -> tuple[LexType, ErrorType]: color = "green" for key, value in tr.attrs.items(): if value is None: color = key break tr.addHtml(f'') tr.resetBuf() return lexRoot, None # PLR0912 Too many branches (19 > 12) def processTag(tr: TransformerType, tag: str) -> tuple[LexType, ErrorType]: # noqa: PLR0912 tr.attrName = "" if not tag: tr.resetBuf() return lexRoot, None if tag[0] == "/": tr.closeTag(tag[1:]) return lexRoot, None tag = tag.split(" ")[0] if tag == "ref": return lexRefText(tr) if tag == "url": return lexUrlText(tr) if tag == "s": return lexTagS(tr) if tag == "c": return lexTagC(tr) if tag[0] == "m": return processTagM(tr, tag) if tag == "p": tr.labelOpen = True tr.resetBuf() return lexRoot, None if tag == "*": tr.addHtml('') elif tag == "ex": tr.addHtml(f'') elif tag == "t": tr.addHtml('') elif tag == "i": tr.addHtml("") elif tag == "b": tr.addHtml("") elif tag == "u": tr.addHtml("") elif tag == "'": tr.addHtml('') elif tag == "sup": tr.addHtml("") elif tag == "sub": tr.addHtml("") elif tag in { "trn", "!trn", "trs", "!trs", "lang", "com", }: pass else: log.warning(f"unknown tag {tag!r}") tr.resetBuf() return lexRoot, None # def lexTagBackslash(tr: TransformerType) -> tuple[LexType, ErrorType]: pyglossary-5.0.9/pyglossary/plugins/dsl/reader.py000066400000000000000000000223211476751035500222510ustar00rootroot00000000000000# -*- coding: utf-8 -*- # dsl/__init__.py # Read ABBYY Lingvo DSL dictionary format # # Copyright © 2013-2020 Saeed Rasooli # Copyright © 2016 ivan tkachenko me@ratijas.tk # Copyright © 2013 Xiaoqiang Wang # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import html import html.entities import re from os.path import abspath, dirname, isfile, join, splitext from typing import TYPE_CHECKING, cast from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.core import log from pyglossary.io_utils import nullTextIO from pyglossary.os_utils import indir from pyglossary.text_reader import TextFilePosWrapper from .title import TitleTransformer from .transform import Transformer if TYPE_CHECKING: import io from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] htmlEntityPattern = re.compile(r"&#?\w+;") def _unescape(text: str) -> str: def fixup(m: re.Match) -> str: text = m.group(0) if text[:2] == "&#": # character reference try: i = int(text[3:-1], 16) if text[:3] == "&#x" else int(text[2:-1]) except ValueError: pass else: try: return chr(i) except ValueError: # f"\\U{i:08x}", but no fb"..." return (b"\\U%08x" % i).decode("unicode-escape") else: # named entity try: text = chr(html.entities.name2codepoint[text[1:-1]]) except KeyError: pass return text # leave as is return htmlEntityPattern.sub(fixup, text) # precompiled regexs _re_wrapped_in_quotes = re.compile("^(\\'|\")(.*)(\\1)$") def _unwrap_quotes(s: str) -> str: return _re_wrapped_in_quotes.sub("\\2", s) class Reader: useByteProgress = True compressions = stdCompressions + ("dz",) _encoding: str = "" _audio: bool = True _example_color: str = "steelblue" _abbrev: str = "hover" def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = "" self._dirPath = "" self._file: io.TextIOBase = nullTextIO self._fileSize = 0 self._bufferLine = "" self._resFileSet: set[str] = set() self._includes: list[Reader] = [] self._abbrevDict: dict[str, str] = {} def transform( self, text: str, header: str, ) -> str: tr = Transformer( text, currentKey=header, audio=self._audio, exampleColor=self._example_color, abbrev=self._abbrev, abbrevDict=self._abbrevDict if self._abbrev else None, ) try: result, err = tr.transform() except Exception: log.exception(f"{text = }") return "" if err: log.error(f"error in transforming {text!r}: {err}") return "" if result is None: log.error(f"error in transforming {text!r}: result is None") return "" resText = result.output.strip() self._resFileSet.update(tr.resFileSet) return resText def close(self) -> None: self._file.close() self._file = nullTextIO def __len__(self) -> int: # FIXME return 0 def open( self, filename: str, ) -> None: self._filename = filename self._dirPath = abspath(dirname(self._filename)) encoding = self._encoding if not encoding: encoding = self.detectEncoding() cfile = cast( "io.TextIOBase", compressionOpen( filename, dz=True, mode="rt", encoding=encoding, ), ) if cfile.seekable(): cfile.seek(0, 2) self._fileSize = cfile.tell() cfile.seek(0) # self._glos.setInfo("input_file_size", f"{self._fileSize}") else: log.warning("DSL Reader: file is not seekable") self._file = TextFilePosWrapper(cfile, encoding) # read header for line in self._file: line = line.rstrip().lstrip("\ufeff") # noqa: B005, PLW2901 # \ufeff -> https://github.com/ilius/pyglossary/issues/306 if not line: continue if not line.startswith("#"): self._bufferLine = line break self.processHeaderLine(line) if self._abbrev: self.loadAbbrevFile() def loadAbbrevFile(self) -> None: baseName, _ = splitext(self._filename) abbrevName = baseName + "_abrv.dsl" if not isfile(abbrevName): return log.info(f"Reading abbrevation file {abbrevName!r}") reader = Reader(self._glos) reader.open(abbrevName) for entry in reader: for word in entry.l_word: self._abbrevDict[word] = entry.defi reader.close() def detectEncoding(self) -> str: for testEncoding in ( "utf-8", "utf-16", "utf-16-le", "utf-16-be", ): with compressionOpen( self._filename, dz=True, mode="rt", encoding=testEncoding, ) as fileObj: try: for _ in range(10): fileObj.readline() except (UnicodeDecodeError, UnicodeError): log.info(f"Encoding of DSL file is not {testEncoding}") continue else: log.info(f"Encoding of DSL file detected: {testEncoding}") return testEncoding raise ValueError( "Could not detect encoding of DSL file" ", specify it by: --read-options encoding=ENCODING", ) def setInfo(self, key: str, value: str) -> None: self._glos.setInfo(key, _unwrap_quotes(value)) def processHeaderLine(self, line: str) -> None: if line.startswith("#NAME"): self.setInfo("name", _unwrap_quotes(line[6:].strip())) elif line.startswith("#INDEX_LANGUAGE"): self._glos.sourceLangName = _unwrap_quotes(line[16:].strip()) elif line.startswith("#CONTENTS_LANGUAGE"): self._glos.targetLangName = _unwrap_quotes(line[19:].strip()) elif line.startswith("#INCLUDE"): self.processInclude(_unwrap_quotes(line[9:].strip())) def processInclude(self, filename: str) -> None: reader = Reader(self._glos) reader._audio = self._audio reader._example_color = self._example_color with indir(self._dirPath): reader.open(filename) self._includes.append(reader) def _iterLines(self) -> Iterator[str]: if self._bufferLine: line = self._bufferLine self._bufferLine = "" yield line for line in self._file: yield line @staticmethod def sub_title_line(m: re.Match) -> str: line = m.group(0)[1:-1] line = line.replace("[']", "") # FIXME line = line.replace("[/']", "") return line # noqa: RET504 def __iter__(self) -> Iterator[EntryType]: for reader in self._includes: yield from reader reader.close() term_lines: list[str] = [] text_lines: list[str] = [] for line in self._iterLines(): if not line.strip(): continue if line.startswith((" ", "\t")): # text text_lines.append(line) continue # header or alt if text_lines: yield from self.parseEntryBlock(term_lines, text_lines) term_lines = [] text_lines = [] term_lines.append(line) if text_lines: yield from self.parseEntryBlock(term_lines, text_lines) resDir = dirname(self._filename) for fname in sorted(self._resFileSet): fpath = join(resDir, fname) if not isfile(fpath): log.warning(f"resource file not found: {fname}") continue with open(fpath, mode="rb") as _file: data = _file.read() yield self._glos.newDataEntry(fname, data) def parseEntryBlock( # noqa: PLR0912 Too many branches (14 > 12) self, term_lines: list[str], text_lines: list[str], ) -> Iterator[EntryType]: terms: list[str] = [] defiTitles: list[str] = [] for line in term_lines: tr = TitleTransformer(line) res, err = tr.transform() if err: log.error(err) continue if res is None: log.error(f"res is None for line={line!r}") continue term = res.output.strip() terms.append(term) term2 = res.outputAlt.strip() if term2 != term: terms.append(term2) title = tr.title.strip() if title != term: defiTitles.append("" + title + "") main_text: str = "" subglos_list: list[tuple[str, str]] = [] subglos_key, subglos_text = "", "" def add_subglos() -> None: nonlocal main_text, subglos_key, subglos_text subglos_list.append((subglos_key, subglos_text)) main_text += f"\t[m2][ref]{subglos_key}[/ref]\n" subglos_key, subglos_text = "", "" for line in text_lines: s_line = line.strip() if s_line == "@": if subglos_key: add_subglos() continue if s_line.startswith("@ "): if subglos_key: add_subglos() subglos_key = s_line[2:].strip() continue if subglos_key: subglos_text += line continue main_text += line if subglos_key: add_subglos() if not terms: log.warning(f"No terms: {main_text=}") return defi = self.transform( text=main_text, header=terms[0], ) if defiTitles: defi = "
          ".join(defiTitles + [defi]) byteProgress = (self._file.tell(), self._fileSize) if self._fileSize else None yield self._glos.newEntry( terms, defi, byteProgress=byteProgress, ) for term, text in subglos_list: yield self._glos.newEntry( [term], self.transform( text=text, header=term, ), byteProgress=byteProgress, ) pyglossary-5.0.9/pyglossary/plugins/dsl/test.py000066400000000000000000000007051476751035500217700ustar00rootroot00000000000000import sys from os.path import dirname sys.path.insert(0, dirname(dirname(dirname(dirname(__file__))))) # noqa: E402 from pyglossary.plugins.dsl.transform import Transformer if __name__ == "__main__": inputStr = sys.argv[1] tr = Transformer( inputStr, currentKey="HEADWORD", ) result, err = tr.transform() if err: print(f"Error: {err} in {inputStr!r}") elif result is None: print("ERROR: result is None") else: print(result.output) pyglossary-5.0.9/pyglossary/plugins/dsl/title.py000066400000000000000000000055461476751035500221420ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, NamedTuple, cast from xml.sax.saxutils import escape from pyglossary.core import log from .transform import Transformer if TYPE_CHECKING: from ._types import ErrorType from ._types import TitleLexType as LexType from ._types import TitleTransformerType as TransformerType __all__ = ["TitleTransformer"] def lexRoot(tr: TransformerType) -> tuple[LexType, ErrorType]: # if tr.start < tr.pos: # log.warning(f"incomplete buffer near pos {tr.pos}") if tr.end(): return None, None c = tr.next() if tr.end(): tr.addText2(c) return None, None if c == "\\": return lexBackslash, None if c == "(": # tr.resetBuf() return lexParan, None if c == "{": return lexCurly, None tr.addText2(c) # tr.resetBuf() return lexRoot, None def lexBackslash(tr: TransformerType) -> tuple[LexType, ErrorType]: c = tr.next() tr.addText2(c) # tr.resetBuf() return lexRoot, None def lexParan(tr: TransformerType) -> tuple[LexType, ErrorType]: while True: if tr.end(): log.warning(f"unclosed '(' near pos {tr.pos}") return None, None c = tr.next() if c == "\\": if tr.end(): log.warning("unclosed '(' near pos {tr.pos}") return None, None tr.addText("\\" + tr.next()) continue if c == ")": break tr.addText(c) return lexRoot, None def lexCurly(tr: TransformerType) -> tuple[LexType, ErrorType]: start = tr.pos while True: if tr.end(): log.warning("unclosed '{{' near pos {tr.pos}") return None, None c = tr.next() if c == "\\": if tr.end(): log.warning("unclosed '{{' near pos {tr.pos}") return None, None tr.next() continue if c == "}": break tr2 = Transformer(tr.input[start : tr.pos - 1]) res, err = tr2.transform() if err or res is None: return None, err tr.title += res.output return lexRoot, None class TitleResult(NamedTuple): output: str outputAlt: str class TitleTransformer: def __init__( self, inputTitle: str, ) -> None: self.input = inputTitle # self.start = 0 self.pos = 0 self.output = "" self.outputAlt = "" self.title = "" def end(self) -> bool: return self.pos >= len(self.input) def move(self, chars: int) -> None: self.pos += chars def next(self) -> str: c = self.input[self.pos] self.pos += 1 return c # noqa: RET504 # def resetBuf(self) -> str: # self.start = self.pos def addText(self, st: str) -> None: self.output += escape(st) self.title += escape(st) def addText2(self, st: str) -> None: esc = escape(st) self.output += esc self.outputAlt += esc self.title += esc def transform(self) -> tuple[TitleResult | None, ErrorType]: lex: LexType = lexRoot tr = cast("TransformerType", self) while lex is not None: lex, err = lex(tr) if err: return None, err return TitleResult( output=self.output, outputAlt=self.outputAlt, ), None pyglossary-5.0.9/pyglossary/plugins/dsl/tools.toml000066400000000000000000000003171476751035500224730ustar00rootroot00000000000000["ABBYY Lingvo"] web = "https://www.lingvo.ru/" wiki_ru = "https://ru.wikipedia.org/wiki/ABBYY_Lingvo" platforms = [ "Windows", "Mac", "Android", "iOS", "Windows Mobile", "Symbian",] license = "Proprietary" pyglossary-5.0.9/pyglossary/plugins/dsl/transform.py000066400000000000000000000071331476751035500230260ustar00rootroot00000000000000from __future__ import annotations import re from typing import TYPE_CHECKING, NamedTuple, cast from xml.sax.saxutils import escape from pyglossary.core import log from .lex import lexRoot if TYPE_CHECKING: from ._types import ErrorType, LexType, TransformerType __all__ = ["Transformer"] _re_comment_block = re.compile(r"\{\{([^}]*)\}\}") class Result(NamedTuple): output: str resFileSet: set[str] # called Lexer by Rob Pike in "Lexical Scanning" video) class Transformer: def __init__( # noqa: PLR0913 self, inputText: str, currentKey: str = "", exampleColor: str = "steelblue", audio: bool = True, abbrev: str = "", # "" or "css" abbrevDict: dict[str, str] | None = None, ) -> None: self.input = inputText self.start = 0 self.pos = 0 self.labelOpen = False self.label = "" self.output = "" self.resFileSet: set[str] = set() self.abbrev = abbrev self.abbrevDict = abbrevDict self.attrs: dict[str, str] = {} self.attrName = "" self.currentKey = currentKey self.exampleColor = exampleColor self.audio = audio def end(self) -> bool: return self.pos >= len(self.input) def move(self, chars: int) -> None: self.pos += chars # self.absPos += chars def next(self) -> str: c = self.input[self.pos] self.pos += 1 # self.absPos += 1 return c # noqa: RET504 def resetBuf(self) -> None: self.start = self.pos self.attrName = "" self.attrs = {} def follows(self, st: str) -> bool: """Check if current position follows the string `st`.""" pos = self.pos for c in st: if pos >= len(self.input): return False if self.input[pos] not in c: return False pos += 1 return True def skipAny(self, chars: str) -> None: """Skip any of the characters that are in `chars`.""" pos = self.pos while True: if pos >= len(self.input): break if self.input[pos] not in chars: break pos += 1 self.pos = pos def addHtml(self, st: str) -> None: if self.labelOpen: self.label += st return self.output += st def addText(self, st: str) -> None: st = escape(st) if self.labelOpen: self.label += st return self.output += st def closeLabel(self) -> None: # print(f"Label: {self.label!r}") desc = None if self.abbrev: desc = self.abbrevDict.get(self.label) if desc: self.output += ( '{self.label}' ) else: self.output += ( '' + self.label + "" ) self.label = "" self.labelOpen = False def closeTag(self, tag: str) -> None: assert tag if tag == "m": self.addHtml("

          ") elif tag == "b": self.addHtml("") elif tag in {"u", "'"}: self.addHtml("") elif tag == "i": self.addHtml("") elif tag == "sup": self.addHtml("") elif tag == "sub": self.addHtml("") elif tag in {"c", "t"}: self.addHtml("") elif tag == "p": self.closeLabel() elif tag == "*": self.addHtml("") elif tag == "ex": self.addHtml("") elif tag in { "ref", "url", "s", "trn", "!trn", "trs", "!trs", "lang", "com", }: pass else: log.warning(f"unknown close tag {tag!r}") self.resetBuf() def transform(self) -> tuple[Result | None, ErrorType]: # TODO: implement these 2 with lex functions self.input = _re_comment_block.sub("", self.input) lex: LexType = lexRoot tr = cast("TransformerType", self) while lex is not None: lex, err = lex(tr) if err: return None, err if self.labelOpen: self.closeLabel() return Result(self.output, self.resFileSet), None pyglossary-5.0.9/pyglossary/plugins/ebook_epub2/000077500000000000000000000000001476751035500220475ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/ebook_epub2/__init__.py000066400000000000000000000022711476751035500241620ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.flags import ALWAYS from pyglossary.option import ( BoolOption, IntOption, Option, StrOption, ) from .writer import Writer __all__ = [ "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "epub2" name = "Epub2" description = "EPUB-2 E-Book" extensions = (".epub",) extensionCreate = ".epub" singleFile = True sortOnWrite = ALWAYS sortKeyName = "ebook" kind = "package" wiki = "https://en.wikipedia.org/wiki/EPUB" website = None # EPUB-3: https://www.w3.org/community/epub3/ optionsProp: dict[str, Option] = { "group_by_prefix_length": IntOption( comment="Prefix length for grouping", ), # "group_by_prefix_merge_min_size": IntOption(), # "group_by_prefix_merge_across_first": BoolOption(), "compress": BoolOption( comment="Enable compression", ), "keep": BoolOption( comment="Keep temp files", ), "include_index_page": BoolOption( comment="Include index page", ), "css": StrOption( comment="Path to css file", ), "cover_path": StrOption( comment="Path to cover file", ), } pyglossary-5.0.9/pyglossary/plugins/ebook_epub2/tools.toml000066400000000000000000000016231476751035500241060ustar00rootroot00000000000000[calibre] web = "https://calibre-ebook.com/" wiki = "https://en.wikipedia.org/wiki/Calibre_(software)" repo = "https://github.com/kovidgoyal/calibre" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" [Okular] web = "https://okular.kde.org/" wiki = "https://en.wikipedia.org/wiki/Okular" repo = "https://invent.kde.org/graphics/okular" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" ["Book Reader"] web = "https://f-droid.org/en/packages/com.github.axet.bookreader/" repo = "https://gitlab.com/axet/android-book-reader" platforms = [ "Android",] license = "GPL" ["Kobo eReader"] web = "https://www.kobo.com" platforms = [ "Kobo eReader",] license = "Proprietary" ["Icecream Ebook Reader"] web = "https://icecreamapps.com/Ebook-Reader/" platforms = [ "Windows",] license = "Proprietary" [Aldiko] web = "https://www.demarque.com/aldiko" platforms = [ "Android", "iOS",] license = "Proprietary" pyglossary-5.0.9/pyglossary/plugins/ebook_epub2/writer.py000066400000000000000000000146031476751035500237410ustar00rootroot00000000000000# -*- coding: utf-8 -*- # The MIT License (MIT) # Copyright © 2012-2016 Alberto Pettarin (alberto@albertopettarin.it) # Copyright © 2016-2019 Saeed Rasooli # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import annotations import os from typing import TYPE_CHECKING, Any from pyglossary.ebook_base import EbookWriter if TYPE_CHECKING: from pyglossary.glossary_types import WriterGlossaryType __all__ = ["Writer"] def _newUUID() -> str: import uuid return str(uuid.uuid4()).replace("-", "") class Writer(EbookWriter): # these class attrs are only in Epub # MIMETYPE_CONTENTS, CONTAINER_XML_CONTENTS # NCX_TEMPLATE, NCX_NAVPOINT_TEMPLATE MIMETYPE_CONTENTS = "application/epub+zip" CONTAINER_XML_CONTENTS = """ """ NCX_TEMPLATE = """ {title} {ncx_items} """ NCX_NAVPOINT_TEMPLATE = """\t {text} """ CSS_CONTENTS = b"""@charset "UTF-8"; body { margin: 10px 25px 10px 25px; } h1 { font-size: 200%; } h2 { font-size: 150%; } p { margin-left: 0em; margin-right: 0em; margin-top: 0em; margin-bottom: 0em; line-height: 2em; text-align: justify; } a, a:focus, a:active, a:visited { color: black; text-decoration: none; } body.indexPage {} h1.indexTitle {} p.indexGroups { font-size: 150%; } span.indexGroup {} body.groupPage {} h1.groupTitle {} div.groupNavigation {} span.groupHeadword {} div.groupEntry { margin-top: 0; margin-bottom: 1em; } h2.groupHeadword { margin-left: 5%; } p.groupDefinition { margin-left: 10%; margin-right: 10%; } """ GROUP_XHTML_TEMPLATE = """ {title}

          {group_title}

          [ Previous ] {index_link} [ Next ]
          {group_contents} """ GROUP_XHTML_INDEX_LINK = '\t\t[ Index ]' GROUP_XHTML_WORD_DEFINITION_TEMPLATE = """\t

          {headword}

          {definition}

          """ OPF_TEMPLATE = """ {identifier} {sourceLang} {title} {creator} {copyright} {creationDate} {cover} {manifest} {spine} """ COVER_TEMPLATE = '' def __init__(self, glos: WriterGlossaryType) -> None: glos.setInfo( "uuid", os.getenv("EPUB_UUID") or glos.getInfo("uuid") or _newUUID(), ) EbookWriter.__init__( self, glos, ) @classmethod def cls_get_prefix( cls: type[EbookWriter], options: dict[str, Any], word: str, ) -> str: if not word: return "" length = options.get("group_by_prefix_length", cls._group_by_prefix_length) prefix = word[:length].lower() if prefix[0] < "a": return "SPECIAL" return prefix def get_prefix(self, word: str) -> str: if not word: return "" length = self._group_by_prefix_length prefix = word[:length].lower() if prefix[0] < "a": return "SPECIAL" return prefix def write_ncx(self, group_labels: list[str]) -> None: """ write_ncx only for epub. """ ncx_items: list[str] = [] index = 1 if self._include_index_page: ncx_items.append( self.NCX_NAVPOINT_TEMPLATE.format( index=index, text="Index", src="index.xhtml", ), ) index += 1 for group_label in group_labels: ncx_items.append( self.NCX_NAVPOINT_TEMPLATE.format( index=index, text=group_label, src=self.get_group_xhtml_file_name_from_index(index), ), ) index += 1 ncx_items_unicode = "\n".join(ncx_items) ncx_contents = self.NCX_TEMPLATE.format( identifier=self._glos.getInfo("uuid"), title=self._glos.getInfo("name"), ncx_items=ncx_items_unicode, ).encode("utf-8") self.add_file_manifest( "OEBPS/toc.ncx", "toc.ncx", ncx_contents, "application/x-dtbncx+xml", ) # inherits write from EbookWriter pyglossary-5.0.9/pyglossary/plugins/ebook_kobo/000077500000000000000000000000001476751035500217645ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/ebook_kobo/__init__.py000066400000000000000000000015451476751035500241020ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.flags import NEVER from .writer import Writer if TYPE_CHECKING: from pyglossary.option import Option __all__ = [ "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "kobo" name = "Kobo" description = "Kobo E-Reader Dictionary" extensions = (".kobo",) extensionCreate = ".kobo.zip" singleFile = False kind = "package" sortOnWrite = NEVER wiki = "https://en.wikipedia.org/wiki/Kobo_eReader" website = ( "https://www.kobo.com", "www.kobo.com", ) # https://help.kobo.com/hc/en-us/articles/360017640093-Add-new-dictionaries-to-your-Kobo-eReader optionsProp: dict[str, Option] = {} # Penelope option: marisa_index_size=1000000 pyglossary-5.0.9/pyglossary/plugins/ebook_kobo/tools.toml000066400000000000000000000001451476751035500240210ustar00rootroot00000000000000["Kobo eReader"] web = "https://www.kobo.com" platforms = [ "Kobo eReader",] license = "Proprietary" pyglossary-5.0.9/pyglossary/plugins/ebook_kobo/writer.py000066400000000000000000000152101476751035500236510ustar00rootroot00000000000000# -*- coding: utf-8 -*- # The MIT License (MIT) # Copyright © 2012-2016 Alberto Pettarin (alberto@albertopettarin.it) # Copyright © 2022 Saeed Rasooli # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import annotations import re import unicodedata from gzip import compress, decompress from operator import itemgetter from pathlib import Path from pickle import dumps, loads from typing import TYPE_CHECKING from pyglossary import core from pyglossary.core import exc_note, log, pip from pyglossary.os_utils import indir if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] def _is_cyrillic_char(c: str) -> bool: # U+0400 - U+04FF: Cyrillic # U+0500 - U+052F: Cyrillic Supplement if "\u0400" <= c <= "\u052f": return True # U+2DE0 - U+2DFF: Cyrillic Extended-A if "\u2de0" <= c <= "\u2dff": return True # U+A640 - U+A69F: Cyrillic Extended-B if "\ua640" <= c <= "\ua69f": return True # U+1C80 - U+1C8F: Cyrillic Extended-C if "\u1c80" <= c <= "\u1c8f": return True # U+FE2E, U+FE2F: Combining Half Marks # U+1D2B, U+1D78: Phonetic Extensions return c in {"\ufe2e", "\ufe2f", "\u1d2b", "\u1d78"} def _fixFilename(fname: str) -> str: return Path(fname.replace("/", "2F").replace("\\", "5C")).name class Writer: WORDS_FILE_NAME = "words" depends = { "marisa_trie": "marisa-trie", } @staticmethod def stripFullHtmlError(entry: EntryType, error: str) -> None: log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}") def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" self._words: list[str] = [] self._img_pattern = re.compile( ']*?)?>', re.DOTALL, ) # img tag has no closing glos.stripFullHtml(errorHandler=self.stripFullHtmlError) def get_prefix(self, word: str) -> str: # noqa: PLR6301 if not word: return "11" wo = word[:2].strip().lower() if not wo: return "11" if wo[0] == "\x00": return "11" if len(wo) > 1 and wo[1] == "\x00": wo = wo[:1] if _is_cyrillic_char(wo[0]): return wo # if either of the first 2 chars are not unicode letters, return "11" for c in wo: if not unicodedata.category(c).startswith("L"): return "11" return wo.ljust(2, "a") def fix_defi(self, defi: str) -> str: # @pgaskin on #219: Kobo supports images in dictionaries, # but these have a lot of gotchas # (see https://pgaskin.net/dictutil/dicthtml/format.html). # Basically, The best way to do it is to encode the images as a # base64 data URL after shrinking it and making it grayscale # (if it's JPG, this is as simple as only keeping the Y channel) # for now we just skip data entries and remove ' Generator[None, EntryType, None]: import gzip dataEntryCount = 0 htmlHeader = '\n' groupCounter = 0 htmlContents = htmlHeader def writeGroup(lastPrefix: str) -> None: nonlocal htmlContents group_fname = _fixFilename(lastPrefix) htmlContents += "" core.trace( log, f"writeGroup: {lastPrefix!r}, {group_fname!r}, count={groupCounter}", ) with gzip.open(group_fname + ".html", mode="wb") as gzipFile: gzipFile.write(htmlContents.encode("utf-8")) htmlContents = htmlHeader allWords: list[str] = [] # TODO: switch to SQLite, like StarDict writer data: list[tuple[str, bytes]] = [] while True: entry = yield if entry is None: break if entry.isData(): dataEntryCount += 1 continue l_word = entry.l_word allWords += l_word wordsByPrefix: dict[str, list[str]] = {} for word in l_word: prefix = self.get_prefix(word) if prefix in wordsByPrefix: wordsByPrefix[prefix].append(word) else: wordsByPrefix[prefix] = [word] defi = self.fix_defi(entry.defi) mainHeadword = l_word[0] for prefix, p_words in wordsByPrefix.items(): headword, *variants = p_words if headword != mainHeadword: headword = f"{mainHeadword}, {headword}" data.append( ( prefix, compress( dumps( ( headword, variants, defi, ), ), ), ), ) del entry log.info("Kobo: sorting entries...") data.sort(key=itemgetter(0)) log.info("Kobo: writing entries...") lastPrefix = "" for prefix, row in data: headword, variants, defi = loads(decompress(row)) if lastPrefix and prefix != lastPrefix: writeGroup(lastPrefix) groupCounter = 0 lastPrefix = prefix htmlVariants = "".join( f'' for v in variants ) body = f"
          {headword}{htmlVariants}
          {defi}
          " htmlContents += f'{body}\n' groupCounter += 1 del data if groupCounter > 0: writeGroup(lastPrefix) if dataEntryCount > 0: log.warning( f"ignored {dataEntryCount} files (data entries)" " and replaced ' None: try: import marisa_trie # type: ignore # noqa: F401 except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install marisa-trie` to install") raise self._filename = filename def write(self) -> Generator[None, EntryType, None]: with indir(self._filename, create=True): yield from self.write_groups() def finish(self) -> None: import marisa_trie with indir(self._filename, create=False): trie = marisa_trie.Trie(self._words) trie.save(self.WORDS_FILE_NAME) self._filename = "" pyglossary-5.0.9/pyglossary/plugins/ebook_kobo_dictfile/000077500000000000000000000000001476751035500236275ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/ebook_kobo_dictfile/__init__.py000066400000000000000000000014771476751035500257510ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, Option, ) from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "kobo_dictfile" name = "Dictfile" description = "Kobo E-Reader Dictfile (.df)" extensions = (".df",) extensionCreate = ".df" singleFile = True kind = "text" wiki = "" website = ( "https://pgaskin.net/dictutil/dictgen/#dictfile-format", "dictgen - dictutil", ) # https://github.com/pgaskin/dictutil optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "extract_inline_images": BoolOption(comment="Extract inline images"), } pyglossary-5.0.9/pyglossary/plugins/ebook_kobo_dictfile/reader.py000066400000000000000000000071441476751035500254510ustar00rootroot00000000000000# -*- coding: utf-8 -*- # The MIT License (MIT) # Copyright © 2020-2021 Saeed Rasooli # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.core import exc_note, pip from pyglossary.image_utils import extractInlineHtmlImages from pyglossary.text_reader import TextGlossaryReader if TYPE_CHECKING: from pyglossary.glossary_types import ReaderGlossaryType __all__ = ["Reader"] class Reader(TextGlossaryReader): useByteProgress = True depends = { "mistune": "mistune==3.0.1", } _extract_inline_images: bool = True def __init__(self, glos: ReaderGlossaryType) -> None: TextGlossaryReader.__init__(self, glos, hasInfo=False) def open(self, filename: str) -> None: try: import mistune # type: ignore # noqa: F401 except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install mistune` to install") raise TextGlossaryReader.open(self, filename) self._glos.setDefaultDefiFormat("h") @classmethod def isInfoWord(cls, _word: str) -> bool: return False @classmethod def fixInfoWord(cls, _word: str) -> str: raise NotImplementedError def fixDefi( self, defi: str, html: bool, ) -> tuple[str, list[tuple[str, str]] | None]: import mistune defi = ( defi.replace("\n @", "\n@") .replace("\n :", "\n:") .replace("\n &", "\n&") .replace("


          ", "

          ") .replace("


          ", "

          ") .replace("


          ", "

          ") ) defi = defi.strip() if html: pass else: defi = mistune.html(defi) images: list[tuple[str, str]] | None = None if self._extract_inline_images: defi, images = extractInlineHtmlImages( defi, self._glos.tmpDataDir, fnamePrefix="", # maybe f"{self._pos:06d}-" ) return defi, images def nextBlock( self, ) -> tuple[list[str], str, list[tuple[str, str]] | None]: words: list[str] = [] defiLines: list[str] = [] html = False while True: line = self.readline() if not line: break line = line.rstrip("\n\r") if line.startswith("@"): if words: self._bufferLine = line defi, images = self.fixDefi("\n".join(defiLines), html=html) return words, defi, images words = [line[1:].strip()] continue if line.startswith(": "): defiLines.append(line[2:]) continue if line.startswith("::"): continue if line.startswith("&"): words.append(line[1:].strip()) continue if line.startswith(""): line = line[6:] html = True defiLines.append(line) if words: defi, images = self.fixDefi("\n".join(defiLines), html=html) return words, defi, images raise StopIteration pyglossary-5.0.9/pyglossary/plugins/ebook_kobo_dictfile/tools.toml000066400000000000000000000001621476751035500256630ustar00rootroot00000000000000[dictgen] web = "https://pgaskin.net/dictutil/dictgen/" platforms = [ "Linux", "Windows", "Mac",] license = "MIT" pyglossary-5.0.9/pyglossary/plugins/ebook_kobo_dictfile/writer.py000066400000000000000000000056141476751035500255230ustar00rootroot00000000000000# -*- coding: utf-8 -*- # The MIT License (MIT) # Copyright © 2020-2021 Saeed Rasooli # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import annotations import os from os.path import isdir from typing import TYPE_CHECKING from pyglossary.core import log from pyglossary.io_utils import nullTextIO if TYPE_CHECKING: import io from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] def fixWord(word: str) -> str: return word.replace("\n", " ") def escapeDefi(defi: str) -> str: return defi.replace("\n@", "\n @").replace("\n:", "\n :").replace("\n&", "\n &") class Writer: _encoding: str = "utf-8" @staticmethod def stripFullHtmlError(entry: EntryType, error: str) -> None: log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}") def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._file: io.TextIOBase = nullTextIO glos.stripFullHtml(errorHandler=self.stripFullHtmlError) def finish(self) -> None: self._file.close() if not os.listdir(self._resDir): os.rmdir(self._resDir) def open(self, filename: str) -> None: self._file = open(filename, "w", encoding=self._encoding) # dictgen's ParseDictFile does not seem to support glossary info / metedata self._resDir = filename + "_res" if not isdir(self._resDir): os.mkdir(self._resDir) def write( self, ) -> Generator[None, EntryType, None]: fileObj = self._file resDir = self._resDir while True: entry = yield if entry is None: break if entry.isData(): entry.save(resDir) continue words = entry.l_word defi = entry.defi entry.detectDefiFormat() if entry.defiFormat == "h": defi = f"{entry.defi}" fileObj.write(f"@ {fixWord(words[0])}\n") for alt in words[1:]: fileObj.write(f"& {fixWord(alt)}\n") fileObj.write(f"{escapeDefi(defi)}\n\n") pyglossary-5.0.9/pyglossary/plugins/ebook_mobi/000077500000000000000000000000001476751035500217605ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/ebook_mobi/__init__.py000066400000000000000000000037111476751035500240730ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.flags import DEFAULT_YES from pyglossary.option import ( BoolOption, FileSizeOption, IntOption, Option, StrOption, ) from .writer import Writer __all__ = [ "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "mobi" name = "Mobi" description = "Mobipocket (.mobi) E-Book" extensions = (".mobi",) extensionCreate = ".mobi" singleFile = False sortOnWrite = DEFAULT_YES sortKeyName = "ebook" kind = "package" wiki = "https://en.wikipedia.org/wiki/Mobipocket" website = None optionsProp: dict[str, Option] = { "group_by_prefix_length": IntOption( comment="Prefix length for grouping", ), # "group_by_prefix_merge_min_size": IntOption(), # "group_by_prefix_merge_across_first": BoolOption(), # specific to mobi "kindlegen_path": StrOption( comment="Path to kindlegen executable", ), "compress": BoolOption( disabled=True, comment="Enable compression", ), "keep": BoolOption( comment="Keep temp files", ), "include_index_page": BoolOption( disabled=True, comment="Include index page", ), "css": StrOption( # disabled=True, comment="Path to css file", ), "cover_path": StrOption( # disabled=True, comment="Path to cover file", ), "file_size_approx": FileSizeOption( comment="Approximate size of each xhtml file (example: 200kb)", ), "hide_word_index": BoolOption( comment="Hide headword in tap-to-check interface", ), "spellcheck": BoolOption( comment="Enable wildcard search and spell correction during word lookup", # "Maybe it just enables the kindlegen's spellcheck." ), "exact": BoolOption( comment="Exact-match Parameter", # "I guess it only works for inflections" ), } extraDocs = [ ( "Other Requirements", "Install [KindleGen](https://wiki.mobileread.com/wiki/KindleGen)" " for creating Mobipocket e-books.", ), ] pyglossary-5.0.9/pyglossary/plugins/ebook_mobi/tools.toml000066400000000000000000000012671476751035500240230ustar00rootroot00000000000000["Amazon Kindle"] web = "https://www.amazon.com/kindle" platforms = [ "Amazon Kindle",] license = "Proprietary" [calibre] web = "https://calibre-ebook.com/" wiki = "https://en.wikipedia.org/wiki/Calibre_(software)" repo = "https://github.com/kovidgoyal/calibre" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" [Okular] web = "https://okular.kde.org/" wiki = "https://en.wikipedia.org/wiki/Okular" repo = "https://invent.kde.org/graphics/okular" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" ["Book Reader"] web = "https://f-droid.org/en/packages/com.github.axet.bookreader/" repo = "https://gitlab.com/axet/android-book-reader" platforms = [ "Android",] license = "GPL" pyglossary-5.0.9/pyglossary/plugins/ebook_mobi/writer.py000066400000000000000000000222221476751035500236460ustar00rootroot00000000000000# -*- coding: utf-8 -*- # The MIT License (MIT) # Copyright © 2012-2016 Alberto Pettarin (alberto@albertopettarin.it) # Copyright © 2016-2022 Saeed Rasooli # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import annotations import os from datetime import datetime from os.path import join, split from typing import TYPE_CHECKING from pyglossary.core import log from pyglossary.ebook_base import EbookWriter from pyglossary.langs import Lang if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] class GroupStateBySize: def __init__(self, writer: Writer) -> None: self.writer = writer self.group_index = -1 self.reset() def reset(self) -> None: self.group_contents: list[str] = [] self.group_size = 0 def add(self, entry: EntryType) -> None: defi = entry.defi content = self.writer.format_group_content( entry.l_word[0], defi, variants=entry.l_word[1:], ) self.group_contents.append(content) self.group_size += len(content.encode("utf-8")) class Writer(EbookWriter): _compress: bool = False _keep: bool = False _kindlegen_path: str = "" _file_size_approx: int = 271360 _hide_word_index: bool = False _spellcheck: bool = True _exact: bool = False CSS_CONTENTS = b""""@charset "UTF-8";""" GROUP_XHTML_TEMPLATE = """ {group_contents} """ GROUP_XHTML_WORD_DEFINITION_TEMPLATE = """ {headword_visible}{infl}
          {definition}

          """ GROUP_XHTML_WORD_INFL_TEMPLATE = """ {iforms_str} """ GROUP_XHTML_WORD_IFORM_TEMPLATE = """""" OPF_TEMPLATE = """ {title} {sourceLang} {identifier} {creator} {copyright} {description} Dictionaries {sourceLang} {targetLang} {cover} {manifest} {spine} """ def __init__(self, glos: WriterGlossaryType) -> None: import uuid EbookWriter.__init__( self, glos, ) glos.setInfo("uuid", str(uuid.uuid4()).replace("-", "")) # FIXME: check if full html pages/documents as entry do work # glos.stripFullHtml(errorHandler=None) def get_prefix(self, word: str) -> str: if not word: return "" length = self._group_by_prefix_length prefix = word[:length].lower() if prefix[0] < "a": return "SPECIAL" return prefix def format_group_content( self, word: str, defi: str, variants: list[str] | None = None, ) -> str: hide_word_index = self._hide_word_index infl = "" if variants: iforms_list = [ self.GROUP_XHTML_WORD_IFORM_TEMPLATE.format( inflword=variant, exact_str=' exact="yes"' if self._exact else "", ) for variant in variants ] infl = "\n" + self.GROUP_XHTML_WORD_INFL_TEMPLATE.format( iforms_str="\n".join(iforms_list), ) headword = self.escape_if_needed(word) defi = self.escape_if_needed(defi) if hide_word_index: headword_visible = "" value_headword = f' value="{headword}"' else: headword_visible = "\n" + self._glos.wordTitleStr(headword) value_headword = "" return self.GROUP_XHTML_WORD_DEFINITION_TEMPLATE.format( spellcheck_str=' spell="yes"' if self._spellcheck else "", headword_visible=headword_visible, value_headword=value_headword, definition=defi, infl=infl, ) @staticmethod def getLangCode(lang: Lang | None) -> str: return lang.code if isinstance(lang, Lang) else "" def get_opf_contents( self, manifest_contents: str, spine_contents: str, ) -> bytes: cover = "" if self.cover: cover = self.COVER_TEMPLATE.format(cover=self.cover) creationDate = datetime.now().strftime("%Y-%m-%d") return self.OPF_TEMPLATE.format( identifier=self._glos.getInfo("uuid"), # use Language code instead name for kindlegen sourceLang=self.getLangCode(self._glos.sourceLang), targetLang=self.getLangCode(self._glos.targetLang), title=self._glos.getInfo("name"), creator=self._glos.author, copyright=self._glos.getInfo("copyright"), description=self._glos.getInfo("description"), creationDate=creationDate, cover=cover, manifest=manifest_contents, spine=spine_contents, ).encode("utf-8") def write_groups(self) -> Generator[None, EntryType, None]: def add_group(state: GroupStateBySize) -> None: if state.group_size <= 0: return state.group_index += 1 index = state.group_index + self.GROUP_START_INDEX group_xhtml_path = self.get_group_xhtml_file_name_from_index(index) self.add_file_manifest( "OEBPS/" + group_xhtml_path, group_xhtml_path, self.GROUP_XHTML_TEMPLATE.format( group_contents=self.GROUP_XHTML_WORD_DEFINITION_JOINER.join( state.group_contents, ), ).encode("utf-8"), "application/xhtml+xml", ) state = GroupStateBySize(self) while True: entry = yield if entry is None: break if entry.isData(): continue if state.group_size >= self._file_size_approx: add_group(state) state.reset() state.add(entry) add_group(state) def write(self) -> Generator[None, EntryType, None]: import shutil import subprocess filename = self._filename kindlegen_path = self._kindlegen_path yield from EbookWriter.write(self) # download kindlegen from this page: # https://www.amazon.com/gp/feature.html?ie=UTF8&docId=1000765211 # run kindlegen if not kindlegen_path: kindlegen_path = shutil.which("kindlegen") or "" if not kindlegen_path: log.warning( f"Not running kindlegen, the raw files are located in {filename}", ) log.warning( "Provide KindleGen path with: --write-options 'kindlegen_path=...'", ) return if not self._glos.sourceLang: log.error("Source language is empty, pass --source-lang=LANG flag.") if not self._glos.targetLang: log.error("Target language is empty, pass --target-lang=LANG flag.") # name = self._glos.getInfo("name") log.info(f"Creating .mobi file with kindlegen, using {kindlegen_path!r}") direc, filename = split(filename) cmd = [ kindlegen_path, join(filename, "OEBPS", "content.opf"), "-gen_ff_mobi7", "-dont_append_source", "-verbose", "-o", "content.mobi", ] proc = subprocess.Popen( cmd, cwd=direc, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, ) output = proc.communicate() log.info(output[0].decode("utf-8")) mobi_path_abs = os.path.join(filename, "OEBPS", "content.mobi") log.info(f"Created .mobi file with kindlegen: {mobi_path_abs}") pyglossary-5.0.9/pyglossary/plugins/edict2/000077500000000000000000000000001476751035500210255ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/edict2/.gitignore000066400000000000000000000000371476751035500230150ustar00rootroot00000000000000.*.swp __pycache__ *.pyc venv pyglossary-5.0.9/pyglossary/plugins/edict2/__init__.py000066400000000000000000000026071476751035500231430ustar00rootroot00000000000000from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, Option, ) from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "edict2" name = "EDICT2" description = "EDICT2 (CEDICT) (.u8)" extensions = (".u8",) extensionCreate = "" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/CEDICT" website = None # Websites / info for different uses of format: # CC-CEDICT: Chinese-English (122k entries) # "https://cc-cedict.org/editor/editor.php", "CC-CEDICT Editor" # HanDeDict: Chinese-German (144k entries) # "https://handedict.zydeo.net/de/download", # "Herunterladen - HanDeDict @ Zydeo Wörterbuch Chinesisch-Deutsch" # CFDICT: Chinese-French (56k entries) # "https://chine.in/mandarin/dictionnaire/CFDICT/", # "Dictionnaire chinois français _ 汉法词典 — Chine Informations" # CC-Canto is Pleco Software's addition of Cantonese language readings # in Jyutping transcription to CC-CEDICT # "https://cantonese.org/download.html", optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "traditional_title": BoolOption( comment="Use traditional Chinese for entry titles/keys", ), "colorize_tones": BoolOption( comment="Set to false to disable tones coloring", ), } pyglossary-5.0.9/pyglossary/plugins/edict2/conv.py000066400000000000000000000062571476751035500223560ustar00rootroot00000000000000from __future__ import annotations import re from io import BytesIO from typing import TYPE_CHECKING, NamedTuple, cast from lxml import etree as ET from pyglossary.core import log from .pinyin import convert from .summarize import summarize if TYPE_CHECKING: from collections.abc import Callable, Sequence from pyglossary.lxml_types import T_htmlfile __all__ = [ "Article", "parse_line_simp", "parse_line_trad", "render_article", "render_syllables_color", "render_syllables_no_color", ] _re_line = re.compile(r"^([^ ]+) ([^ ]+) \[([^\]]+)\] /(.+)/$") _COLORS = { "": "black", "1": "red", "2": "orange", "3": "green", "4": "blue", "5": "black", } def parse_line_trad(line: str) -> tuple[str, str, str, list[str]] | None: line = line.strip() match = _re_line.match(line) if match is None: return None trad, simp, pinyin, eng = match.groups() pinyin = pinyin.replace("u:", "v") return trad, simp, pinyin, eng.split("/") def parse_line_simp(line: str) -> tuple[str, str, str, list[str]] | None: line = line.strip() match = _re_line.match(line) if match is None: return None trad, simp, pinyin, eng = match.groups() pinyin = pinyin.replace("u:", "v") return simp, trad, pinyin, eng.split("/") class Article(NamedTuple): first: str second: str pinyin: str eng: list[str] def names(self) -> list[str]: return [self.first, self.second, self.pinyin] + list(map(summarize, self.eng)) def render_syllables_no_color( hf: T_htmlfile, syllables: Sequence[str], _tones: Sequence[str], ) -> None: with hf.element("div", style="display: inline-block"): for syllable in syllables: with hf.element("font", color=""): hf.write(syllable) def render_syllables_color( hf: T_htmlfile, syllables: Sequence[str], tones: Sequence[str], ) -> None: if len(syllables) != len(tones): log.warning(f"unmatched tones: {syllables=}, {tones=}") render_syllables_no_color(hf, syllables, tones) return with hf.element("div", style="display: inline-block"): for index, syllable in enumerate(syllables): with hf.element("font", color=_COLORS[tones[index]]): hf.write(syllable) # @lru_cache(maxsize=128) def _convert_pinyin(pinyin: str) -> tuple[Sequence[str], Sequence[str]]: return tuple(zip(*map(convert, pinyin.split()), strict=False)) # type: ignore def render_article( render_syllables: Callable, article: Article, ) -> tuple[list[str], str]: names = article.names() # pinyin_tones = [convert(syl) for syl in pinyin.split()] pinyin_list, tones = _convert_pinyin(article.pinyin) f = BytesIO() with ET.htmlfile(f, encoding="utf-8") as _hf: # noqa: PLR1702 hf = cast("T_htmlfile", _hf) with hf.element("div", style="border: 1px solid; padding: 5px"): with hf.element("div"): with hf.element("big"): render_syllables(hf, names[0], tones) if names[1] != names[0]: hf.write("\xa0/\xa0") # "\xa0" --> " " == " " render_syllables(hf, names[1], tones) hf.write(ET.Element("br")) with hf.element("big"): render_syllables(hf, pinyin_list, tones) with hf.element("div"): with hf.element("ul"): for defn in article.eng: with hf.element("li"): hf.write(defn) return names, f.getvalue().decode("utf-8") pyglossary-5.0.9/pyglossary/plugins/edict2/pinyin.py000066400000000000000000000015471476751035500227140ustar00rootroot00000000000000# coding=utf-8 # based on https://github.com/zkoch/CEDICT_Parser from __future__ import annotations __all__ = ["convert"] _TONES = { "a1": "ā", "a2": "á", "a3": "ǎ", "a4": "à", "e1": "ē", "e2": "é", "e3": "ě", "e4": "è", "i1": "ī", "i2": "í", "i3": "ǐ", "i4": "ì", "o1": "ō", "o2": "ó", "o3": "ǒ", "o4": "ò", "u1": "ū", "u2": "ú", "u3": "ǔ", "u4": "ù", "v1": "ǖ", "v2": "ǘ", "v3": "ǚ", "v4": "ǜ", } # using v for the umlauted u _VOWELS = ("a", "e", "o", "iu", "ui", "i", "u", "v") def convert(word: str) -> tuple[str, str]: tone = word[-1] pinyin = word[:-1].lower() if tone == "5": return pinyin, tone if tone not in {"1", "2", "3", "4"}: return word, "" for vowel in _VOWELS: if vowel in pinyin: vowel1 = vowel[-1] return pinyin.replace(vowel1, _TONES[vowel1 + tone]), tone return pinyin, tone pyglossary-5.0.9/pyglossary/plugins/edict2/reader.py000066400000000000000000000040451476751035500226440ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.core import log from pyglossary.io_utils import nullTextIO from .conv import ( Article, parse_line_simp, parse_line_trad, render_article, render_syllables_color, render_syllables_no_color, ) if TYPE_CHECKING: import io from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = True depends = { "lxml": "lxml", } _encoding: str = "utf-8" _traditional_title: bool = False _colorize_tones: bool = True def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self.file: io.TextIOBase = nullTextIO self._fileSize = 0 def open(self, filename: str) -> None: # self._glos.sourceLangName = "Chinese" # self._glos.targetLangName = "English" cfile = self.file = open(filename, encoding=self._encoding) if cfile.seekable(): cfile.seek(0, 2) self._fileSize = cfile.tell() cfile.seek(0) # self._glos.setInfo("input_file_size", f"{self._fileSize}") else: log.warning("EDICT2 Reader: file is not seekable") def close(self) -> None: self.file.close() self.file = nullTextIO def __len__(self) -> int: return 0 def __iter__(self) -> Iterator[EntryType]: file = self.file fileSize = self._fileSize glos = self._glos render_syllables = ( render_syllables_color if self._colorize_tones else render_syllables_no_color ) parse_line = parse_line_trad if self._traditional_title else parse_line_simp while True: line = file.readline() if not line: break line = line.rstrip("\n") if not line: continue if line.startswith("#"): continue parts = parse_line(line) if parts is None: log.warning(f"bad line: {line!r}") continue names, article_text = render_article( render_syllables, Article(*parts), ) entry = glos.newEntry( names, article_text, defiFormat="h", byteProgress=(file.tell(), fileSize) if fileSize else None, ) yield entry pyglossary-5.0.9/pyglossary/plugins/edict2/summarize.py000066400000000000000000000035121476751035500234140ustar00rootroot00000000000000from __future__ import annotations import re import string __all__ = ["summarize"] _parenthetical = re.compile(r"\([^)]+?\)") _punct_table = {ord(p): " " for p in string.punctuation if p not in "-'"} _stops = { "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now", "d", "ll", "m", "o", "re", "ve", "y", "ain", "aren", "couldn", "didn", "doesn", "hadn", "hasn", "haven", "isn", "ma", "mightn", "mustn", "needn", "shan", "shouldn", "wasn", "weren", "won", "wouldn", } def summarize(phrase: str) -> str: phrase = _parenthetical.sub("", phrase) phrase = phrase.translate(_punct_table) words = phrase.split() relevant_words = [word for word in words if word not in _stops] if not relevant_words: relevant_words = words return " ".join(relevant_words[:10]) pyglossary-5.0.9/pyglossary/plugins/edlin/000077500000000000000000000000001476751035500207465ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/edlin/__init__.py000066400000000000000000000013211476751035500230540ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, Option, ) from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "edlin" name = "Edlin" # Editable Linked List of Entries description = "EDLIN" extensions = (".edlin",) extensionCreate = ".edlin/" singleFile = False kind = "directory" wiki = "" website = None optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "prev_link": BoolOption(comment="Enable link to previous entry"), } pyglossary-5.0.9/pyglossary/plugins/edlin/reader.py000066400000000000000000000062671476751035500225750ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os from os.path import dirname, isdir, isfile, join from typing import TYPE_CHECKING from pyglossary.core import log from pyglossary.text_utils import ( splitByBarUnescapeNTB, unescapeNTB, ) if TYPE_CHECKING: from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = False _encoding: str = "utf-8" def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._clear() def close(self) -> None: self._clear() def _clear(self) -> None: self._filename = "" self._prev_link = True self._wordCount = None self._rootPath = None self._resDir = "" self._resFileNames: list[str] = [] def open(self, filename: str) -> None: from pyglossary.json_utils import jsonToData if isdir(filename): infoFname = join(filename, "info.json") elif isfile(filename): infoFname = filename filename = dirname(filename) else: raise ValueError( f"error while opening {filename!r}: no such file or directory", ) self._filename = filename with open(infoFname, encoding=self._encoding) as infoFp: info = jsonToData(infoFp.read()) self._wordCount = info.pop("wordCount") self._prev_link = info.pop("prev_link") self._rootPath = info.pop("root") for key, value in info.items(): self._glos.setInfo(key, value) self._resDir = join(filename, "res") if isdir(self._resDir): self._resFileNames = os.listdir(self._resDir) else: self._resDir = "" self._resFileNames = [] def __len__(self) -> int: if self._wordCount is None: log.error("called len() on a reader which is not open") return 0 return self._wordCount + len(self._resFileNames) def __iter__(self) -> Iterator[EntryType]: if not self._rootPath: raise RuntimeError("iterating over a reader while it's not open") wordCount = 0 nextPath = self._rootPath while nextPath != "END": wordCount += 1 # before or after reading word and defi # (and skipping empty entry)? FIXME with open( join(self._filename, nextPath), encoding=self._encoding, ) as _file: header = _file.readline().rstrip() if self._prev_link: _prevPath, nextPath = header.split(" ") else: nextPath = header word = _file.readline() if not word: yield None # update progressbar continue defi = _file.read() if not defi: log.warning( f"Edlin Reader: no definition for word {word!r}, skipping", ) yield None # update progressbar continue word = word.rstrip() defi = defi.rstrip() if self._glos.alts: word = splitByBarUnescapeNTB(word) if len(word) == 1: word = word[0] else: word = unescapeNTB(word, bar=False) # defi = unescapeNTB(defi) yield self._glos.newEntry(word, defi) if wordCount != self._wordCount: log.warning( f"{wordCount} words found, " f"wordCount in info.json was {self._wordCount}", ) self._wordCount = wordCount resDir = self._resDir for fname in self._resFileNames: with open(join(resDir, fname), "rb") as _file: yield self._glos.newDataEntry( fname, _file.read(), ) pyglossary-5.0.9/pyglossary/plugins/edlin/tools.toml000066400000000000000000000000001476751035500227710ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/edlin/writer.py000066400000000000000000000063021476751035500226350ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os from os.path import isdir, join from typing import TYPE_CHECKING from pyglossary.text_utils import ( escapeNTB, ) if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] def _makeDir(direc: str) -> None: if not isdir(direc): os.makedirs(direc) class Writer: _encoding: str = "utf-8" _prev_link: bool = True def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._clear() def finish(self) -> None: self._clear() def open(self, filename: str) -> None: self._filename = filename self._resDir = join(filename, "res") os.makedirs(filename) os.mkdir(self._resDir) def _clear(self) -> None: self._filename = "" self._resDir = "" self._encoding = "utf-8" self._hashSet: set[str] = set() # self._wordCount = None @staticmethod def hashToPath(h: str) -> str: return h[:2] + "/" + h[2:] def getEntryHash(self, entry: EntryType) -> str: """ Return hash string for given entry don't call it twice for one entry, if you do you will get a different hash string. """ from hashlib import sha1 hash_ = sha1(entry.s_word.encode("utf-8")).hexdigest()[:8] # noqa: S324 if hash_ not in self._hashSet: self._hashSet.add(hash_) return hash_ index = 0 while True: tmp_hash = hash_ + f"{index:x}" if tmp_hash not in self._hashSet: self._hashSet.add(tmp_hash) return tmp_hash index += 1 def saveEntry( self, thisEntry: EntryType, thisHash: str, prevHash: str | None, nextHash: str | None, ) -> None: dpath = join(self._filename, thisHash[:2]) _makeDir(dpath) with open( join(dpath, thisHash[2:]), "w", encoding=self._encoding, ) as toFile: nextPath = self.hashToPath(nextHash) if nextHash else "END" if self._prev_link: prevPath = self.hashToPath(prevHash) if prevHash else "START" header = prevPath + " " + nextPath else: header = nextPath toFile.write( "\n".join( [ header, escapeNTB(thisEntry.s_word, bar=False), thisEntry.defi, ], ), ) def write(self) -> Generator[None, EntryType, None]: from pyglossary.json_utils import dataToPrettyJson thisEntry = yield if thisEntry is None: raise ValueError("glossary is empty") count = 1 rootHash = thisHash = self.getEntryHash(thisEntry) prevHash = None while True: nextEntry = yield if nextEntry is None: break if nextEntry.isData(): nextEntry.save(self._resDir) continue nextHash = self.getEntryHash(nextEntry) self.saveEntry(thisEntry, thisHash, prevHash, nextHash) thisEntry = nextEntry prevHash, thisHash = thisHash, nextHash count += 1 self.saveEntry(thisEntry, thisHash, prevHash, None) with open( join(self._filename, "info.json"), "w", encoding=self._encoding, ) as toFile: info = {} info["name"] = self._glos.getInfo("name") info["root"] = self.hashToPath(rootHash) info["prev_link"] = self._prev_link info["wordCount"] = count # info["modified"] = info |= self._glos.getExtraInfos(["name", "root", "prev_link", "wordCount"]) toFile.write(dataToPrettyJson(info)) pyglossary-5.0.9/pyglossary/plugins/formats_common/000077500000000000000000000000001476751035500226765ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/formats_common/__init__.py000066400000000000000000000035011476751035500250060ustar00rootroot00000000000000import logging import os import sys from os.path import ( exists, isdir, isfile, join, split, splitext, ) from pprint import pformat from pyglossary.core import rootDir sys.path.insert(0, rootDir) # noqa: E402 from pyglossary import core from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.core import ( cacheDir, pip, ) from pyglossary.flags import ( ALWAYS, DEFAULT_NO, DEFAULT_YES, NEVER, YesNoAlwaysNever, ) from pyglossary.glossary_types import EntryType, ReaderGlossaryType, WriterGlossaryType from pyglossary.option import ( BoolOption, DictOption, EncodingOption, FileSizeOption, FloatOption, HtmlColorOption, IntOption, ListOption, NewlineOption, Option, StrOption, ) from pyglossary.os_utils import indir from pyglossary.text_utils import toStr log = logging.getLogger("pyglossary") enable = False lname = "" name = "Unknown" description = "Unknown" extensions: "tuple[str, ...]" = () extensionCreate = "" singleFile = False kind = "" wiki = "" website = None # key is option/argument name, value is instance of Option optionsProp: "dict[str, Option]" = {} sortOnWrite: YesNoAlwaysNever = DEFAULT_NO __all__ = [ "ALWAYS", "DEFAULT_NO", "DEFAULT_YES", "NEVER", "BoolOption", "DictOption", "EncodingOption", "EntryType", "FileSizeOption", "FloatOption", "HtmlColorOption", "IntOption", "ListOption", "NewlineOption", "ReaderGlossaryType", "StrOption", "WriterGlossaryType", "YesNoAlwaysNever", "cacheDir", "compressionOpen", "core", "description", "enable", "exists", "extensionCreate", "extensions", "indir", "isdir", "isfile", "join", "kind", "lname", "log", "logging", "name", "optionsProp", "os", "pformat", "pip", "rootDir", "singleFile", "sortOnWrite", "split", "splitext", "stdCompressions", "toStr", "website", "wiki", ] pyglossary-5.0.9/pyglossary/plugins/freedict/000077500000000000000000000000001476751035500214405ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/freedict/__init__.py000066400000000000000000000010241476751035500235460ustar00rootroot00000000000000# -*- coding: utf-8 -*- from .options import optionsProp from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "freedict" name = "FreeDict" description = "FreeDict (.tei)" extensions = (".tei",) extensionCreate = ".tei" singleFile = True kind = "text" wiki = "https://github.com/freedict/fd-dictionaries/wiki" website = ( "https://freedict.org/", "FreeDict.org", ) pyglossary-5.0.9/pyglossary/plugins/freedict/options.py000066400000000000000000000016271476751035500235130ustar00rootroot00000000000000# -*- coding: utf-8 -*- from pyglossary.option import ( BoolOption, IntOption, Option, StrOption, ) __all__ = ["optionsProp"] optionsProp: "dict[str, Option]" = { "resources": BoolOption( comment="Enable resources / data files", ), "discover": BoolOption( comment="Find and show unsupported tags", ), "auto_rtl": BoolOption( allowNone=True, comment="Auto-detect and mark Right-to-Left text", ), "auto_comma": BoolOption( comment="Auto-detect comma sign based on text", ), "comma": StrOption( customValue=True, values=[", ", "، "], comment="Comma sign (following space) to use as separator", ), "word_title": BoolOption( comment="Add headwords title to beginning of definition", ), "pron_color": StrOption( comment="Pronunciation color", ), "gram_color": StrOption( comment="Grammar color", ), "example_padding": IntOption( comment="Padding for examples (in px)", ), } pyglossary-5.0.9/pyglossary/plugins/freedict/reader.py000066400000000000000000000522061476751035500232610ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import re from dataclasses import dataclass from io import BytesIO, IOBase from os.path import dirname, isfile, join from typing import TYPE_CHECKING, cast if TYPE_CHECKING: from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.lxml_types import Element, T_htmlfile from pyglossary.compression import compressionOpen, stdCompressions from pyglossary.core import exc_note, log, pip from pyglossary.html_utils import unescape_unicode from pyglossary.io_utils import nullBinaryIO from pyglossary.langs import langDict from pyglossary.langs.writing_system import getWritingSystemFromText from .options import optionsProp from .utils import XMLLANG, ReaderUtils __all__ = ["Reader"] _TEI = "{http://www.tei-c.org/ns/1.0}" _ENTRY = f"{_TEI}entry" _INCLUDE = "{http://www.w3.org/2001/XInclude}include" _NAMESPACE = {None: "http://www.tei-c.org/ns/1.0"} @dataclass(slots=True) class _ParsedSense: transCits: list[Element] defs: list[Element] grams: list[Element] notes: list[Element] refs: list[Element] usages: list[Element] xrList: list[Element] exampleCits: list[Element] langs: list[Element] class Reader(ReaderUtils): useByteProgress = True compressions = stdCompressions depends = { "lxml": "lxml", } _discover: bool = False _auto_rtl: bool | None = None _auto_comma: bool = True _comma: str = ", " _word_title: bool = False _pron_color: str = "gray" _gram_color: str = "green" _example_padding: int = 10 gramClass = "grammar" supportedTags: set[str] = { f"{_TEI}{tag}" for tag in ( "entry", "form", # entry.form "orth", # entry.form.orth "pron", # entry.form.pron "sense", # entry.sense "cit", # entry.sense.cit "quote", # entry.sense.cit.quote "gramGrp", # entry.sense.cit.gramGrp "pos", # entry.sense.cit.gramGrp.pos "gen", # entry.sense.cit.gramGrp.gen "number", # entry.sense.cit.gramGrp.number "num", # entry.sense.cit.gramGrp.num ) } posMapping: dict[str, str] = { "n": "noun", "v": "verb", "pn": "pronoun", "pron": "pronoun", "prep": "preposition", "conj": "conjunction", "adj": "adjective", "adv": "adverb", # "numeral", "interjection", "suffix", "particle" # "indefinitePronoun" } genderMapping: dict[str, str] = { "m": "male", "masc": "male", "f": "female", "fem": "female", "n": "neutral", "neut": "neutral", # "m;f" "adj": "adjective", } numberMapping: dict[str, str] = { "pl": "plural", "sing": "singular", } subcMapping: dict[str, str] = { "t": "transitive", "i": "intransitive", } noteTypes: set[str] = { "sense", "stagr", "stagk", "def", "usage", "hint", "status", "editor", "dom", "infl", "obj", "lbl", } def writeRef( # noqa: PLR6301 self, hf: T_htmlfile, ref: Element, ) -> None: target = ref.get("target") attrib: dict[str, str] = {} if target: if "://" in target: attrib["class"] = "external" else: target = f"bword://{ref.text}" with hf.element("a", href=target, attrib=attrib): hf.write(ref.text or "") def writeQuote( self, hf: T_htmlfile, elem: Element, ) -> None: self.writeWithDirection(hf, elem, "div") def writeTransCit( self, hf: T_htmlfile, elem: Element, ) -> None: from lxml import etree as ET children = elem.xpath("child::node()") if not children: return assert isinstance(children, list) quotes: list[Element] = [] sense = ET.Element(f"{_TEI}sense") for child in children: if isinstance(child, str): child = child.strip() # noqa: PLW2901 if child: hf.write(child) log.warning(f"text directly inside : {child}") continue if child.__class__.__name__ == "_Comment": continue if child.tag == f"{_TEI}quote": quotes.append(child) continue if child.tag in {f"{_TEI}gramGrp", f"{_TEI}usg", f"{_TEI}note"}: sense.append(child) continue if child.tag == f"{_TEI}cit": # TODO continue log.warning( f"unknown tag {child.tag!r} inside translation " f": {self.tostring(child)}", ) self.makeList( hf, quotes, self.writeQuote, single_prefix="", ) if next(sense.iterchildren(), False) is not None: self.writeSense(hf, sense) def writeDef( self, hf: T_htmlfile, elem: Element, ) -> None: # sep = None # if self._cif_newline: # sep = ET.Element("br") count = 0 def writeChild(item: str | Element, depth: int) -> None: nonlocal count if isinstance(item, str): item = item.strip() if not item: return if count > 0: hf.write(self.getCommaSep(item)) # with hf.element(self.getTitleTag(item)): hf.write(item) return if item.tag == f"{_TEI}ref": if item.text: if count > 0: hf.write(self.getCommaSep(item.text)) self.writeRef(hf, item) else: log.warning(f"ref without text: {self.tostring(item)}") return for child in item.xpath("child::node()"): writeChild(child, depth + 1) if depth < 1: count += 1 for child in elem.xpath("child::node()"): writeChild(child, 0) def setAttribLangDir( self, attrib: dict[str, str], ) -> None: try: lang = attrib.pop(XMLLANG) except KeyError: return attrib["lang"] = lang if self._auto_rtl: langObj = langDict[lang] if langObj: attrib["dir"] = "rtl" if langObj.rtl else "ltr" def writeWithDirection( self, hf: T_htmlfile, child: Element, tag: str, ) -> None: attrib: dict[str, str] = dict(child.attrib) self.setAttribLangDir(attrib) try: type_ = attrib.pop("type") except KeyError: pass else: if type_ != "trans": attrib["class"] = type_ with hf.element(tag, attrib=attrib): self.writeRichText(hf, child) def writeRichText( self, hf: T_htmlfile, el: Element, ) -> None: from lxml import etree as ET for child in el.xpath("child::node()"): if isinstance(child, str): hf.write(child) continue if child.tag == f"{_TEI}ref": self.writeRef(hf, child) continue if child.tag == f"{_TEI}br": hf.write(ET.Element("br")) continue if child.tag == f"{_TEI}p": with hf.element("p", **child.attrib): self.writeRichText(hf, child) continue if child.tag == f"{_TEI}div": self.writeWithDirection(hf, child, "div") continue if child.tag == f"{_TEI}span": self.writeWithDirection(hf, child, "span") continue self.writeRichText(hf, child) def writeNote( self, hf: T_htmlfile, note: Element, ) -> None: self.writeRichText(hf, note) def parseSenseSense( # noqa: PLR0912 self, sense: Element, ) -> _ParsedSense: # this element can be 1st-level (directly under ) # or 2nd-level transCits: list[Element] = [] defs: list[Element] = [] grams: list[Element] = [] notes: list[Element] = [] refs: list[Element] = [] usages: list[Element] = [] xrList: list[Element] = [] exampleCits: list[Element] = [] langs: list[Element] = [] for child in sense.iterchildren(): if child.tag == f"{_TEI}cit": if child.attrib.get("type", "trans") == "trans": transCits.append(child) elif child.attrib.get("type") == "example": exampleCits.append(child) else: log.warning(f"unknown cit type: {self.tostring(child)}") continue if child.tag == f"{_TEI}def": defs.append(child) continue if child.tag == f"{_TEI}note": type_ = child.attrib.get("type") if not type_: notes.append(child) elif type_ in {"pos", "gram"}: grams.append(child) elif type_ in self.noteTypes: notes.append(child) else: log.warning(f"unknown note type {type_}") notes.append(child) continue if child.tag == f"{_TEI}ref": refs.append(child) continue if child.tag == f"{_TEI}usg": if not child.text: log.warning(f"empty usg: {self.tostring(child)}") continue usages.append(child) continue if child.tag == f"{_TEI}lang": langs.append(child) continue if child.tag in {f"{_TEI}sense", f"{_TEI}gramGrp"}: continue if child.tag == f"{_TEI}xr": xrList.append(child) continue log.warning(f"unknown tag {child.tag} in ") return _ParsedSense( transCits=transCits, defs=defs, grams=grams, notes=notes, refs=refs, usages=usages, xrList=xrList, exampleCits=exampleCits, langs=langs, ) # TODO: break it down # PLR0912 Too many branches (16 > 12) def writeSenseSense( # noqa: PLR0912 self, hf: T_htmlfile, sense: Element, ) -> int: # this element can be 1st-level (directly under ) # or 2nd-level ps = self.parseSenseSense(sense) for child in ps.langs: self.writeLangTag(hf, child) self.makeList( hf, ps.defs, self.writeDef, single_prefix="", ) if ps.grams: color = self._gram_color attrib = { "class": self.gramClass, } if color: attrib["color"] = color with hf.element("div"): for i, gram in enumerate(ps.grams): text = gram.text or "" if i > 0: hf.write(self.getCommaSep(text)) with hf.element("font", attrib=attrib): hf.write(text) self.makeList( hf, ps.notes, self.writeNote, single_prefix="", ) self.makeList( hf, ps.transCits, self.writeTransCit, single_prefix="", ) if ps.refs: with hf.element("div"): hf.write("Related: ") for i, ref in enumerate(ps.refs): if i > 0: hf.write(" | ") self.writeRef(hf, ref) for xr in ps.xrList: with hf.element("div"): self.writeRichText(hf, xr) if ps.usages: with hf.element("div"): hf.write("Usage: ") for i, usg in enumerate(ps.usages): text = usg.text or "" if i > 0: hf.write(self.getCommaSep(text)) hf.write(text) for cit in ps.exampleCits: with hf.element( "div", attrib={ "class": "example", "style": f"padding: {self._example_padding}px 0px;", }, ): for quote in cit.findall("quote", _NAMESPACE): self.writeWithDirection(hf, quote, "div") for cit2 in cit.findall("cit", _NAMESPACE): for quote in cit2.findall("quote", _NAMESPACE): quote.attrib.update(cit2.attrib) self.writeWithDirection(hf, quote, "div") return len(ps.transCits) + len(ps.exampleCits) def getCommaSep(self, sample: str) -> str: if sample and self._auto_comma: ws = getWritingSystemFromText(sample) if ws: return ws.comma return self._comma def writeGramGroups( self, hf: T_htmlfile, gramGrpList: list[Element], ) -> None: from lxml import etree as ET color = self._gram_color attrib = { "class": self.gramClass, } if color: attrib["color"] = color for gramGrp in gramGrpList: parts: list[str] = [] for child in gramGrp.iterchildren(): part = self.normalizeGramGrpChild(child) if part: parts.append(part) if not parts: continue sep = self.getCommaSep(parts[0]) text = sep.join(parts) with hf.element("font", attrib=attrib): hf.write(text) hf.write(ET.Element("br")) def writeGramGroupChildren( self, hf: T_htmlfile, elem: Element, ) -> None: self.writeGramGroups(hf, elem.findall("gramGrp", _NAMESPACE)) def writeSense( self, hf: T_htmlfile, sense: Element, ) -> None: # this element is 1st-level (directly under ) self.writeGramGroupChildren(hf, sense) self.makeList( hf, sense.findall("sense", _NAMESPACE), self.writeSenseSense, single_prefix="", ) self.writeSenseSense(hf, sense) def writeSenseList( self, hf: T_htmlfile, senseList: list[Element], ) -> None: # these elements are 1st-level (directly under ) if not senseList: return if self._auto_rtl and self.isRTL(senseList[0]): with hf.element("div", dir="rtl"): self.makeList( hf, senseList, self.writeSense, ordered=(len(senseList) > 3), ) return self.makeList( hf, senseList, self.writeSense, # list_type="A", ) def normalizeGramGrpChild(self, elem: Element) -> str: # noqa: PLR0912 # child can be "pos" or "gen" tag = elem.tag text = elem.text if not text: return "" text = text.strip() if tag == f"{_TEI}pos": return self.posMapping.get(text.lower(), text) if tag == f"{_TEI}gen": return self.genderMapping.get(text.lower(), text) if tag in {f"{_TEI}num", f"{_TEI}number"}: return self.numberMapping.get(text.lower(), text) if tag == f"{_TEI}subc": return self.subcMapping.get(text.lower(), text) if tag == f"{_TEI}gram": type_ = elem.get("type") if type_: if type_ == "pos": return self.posMapping.get(text.lower(), text) if type_ == "gen": return self.genderMapping.get(text.lower(), text) if type_ in {"num", "number"}: return self.numberMapping.get(text.lower(), text) if type_ == "subc": return self.subcMapping.get(text.lower(), text) log.warning(f"unrecognize type={type_!r}: {self.tostring(elem)}") return text log.warning(f" with no type: {self.tostring(elem)}") return text if tag == f"{_TEI}note": return text if tag == f"{_TEI}colloc": return "" log.warning( f"unrecognize GramGrp child tag: {elem.tag!r}: {self.tostring(elem)}", ) return "" def getEntryByElem( # noqa: PLR0912 self, entry: Element, ) -> EntryType: from lxml import etree as ET glos = self._glos keywords: list[str] = [] buff = BytesIO() pron_color = self._pron_color if self._discover: for elem in entry.iter(): if elem.tag not in self.supportedTags: self._discoveredTags[elem.tag] = elem def br() -> Element: return ET.Element("br") inflectedKeywords: list[str] = [] for form in entry.findall(".//form", _NAMESPACE): inflected = form.get("type") == "infl" for orth in form.findall("orth", _NAMESPACE): if not orth.text: continue if inflected: inflectedKeywords.append(orth.text) else: keywords.append(orth.text) keywords += inflectedKeywords pronList = [ pron.text.strip("/") for pron in entry.findall("form/pron", _NAMESPACE) if pron.text ] senseList = entry.findall("sense", _NAMESPACE) with ET.htmlfile(buff, encoding="utf-8") as hf: with hf.element("div"): if self._word_title: for keyword in keywords: with hf.element(glos.titleTag(keyword)): hf.write(keyword) hf.write(br()) # TODO: "form/usg" # Brit # US # ... if pronList: for i, pron in enumerate(pronList): if i > 0: hf.write(self.getCommaSep(pron)) hf.write("/") with hf.element("font", color=pron_color): hf.write(pron) hf.write("/") hf.write(br()) hf.write("\n") hf_ = cast("T_htmlfile", hf) self.writeGramGroupChildren(hf_, entry) self.writeSenseList(hf_, senseList) defi = buff.getvalue().decode("utf-8") # defi = defi.replace("\xa0", " ") # do we need to do this? file = self._file return self._glos.newEntry( keywords, defi, defiFormat="h", byteProgress=(file.tell(), self._fileSize) if self._progress else None, ) def setWordCount(self, header: Element) -> None: extent_elem = header.find(".//extent", _NAMESPACE) if extent_elem is None: log.warning( "did not find 'extent' tag in metedata, progress bar will not word", ) return extent = extent_elem.text or "" if not extent.endswith(" headwords"): log.warning(f"unexpected {extent=}") return try: self._wordCount = int(extent.split(" ")[0].replace(",", "")) except Exception: log.exception(f"unexpected {extent=}") def stripParag(self, elem: Element) -> str: text = self.tostring(elem) text = self._p_pattern.sub("\\2", text) return text # noqa: RET504 def stripParagList( self, elems: list[Element], ) -> str: lines: list[str] = [] for elem in elems: for line in self.stripParag(elem).split("\n"): line = line.strip() # noqa: PLW2901 if not line: continue lines.append(line) return "\n".join(lines) def setGlosInfo(self, key: str, value: str) -> None: self._glos.setInfo(key, unescape_unicode(value)) def setCopyright(self, header: Element) -> None: elems = header.findall(".//availability//p", _NAMESPACE) if not elems: log.warning("did not find copyright") return copyright_ = self.stripParagList(elems) copyright_ = self.replaceRefLink(copyright_) self.setGlosInfo("copyright", copyright_) log.debug(f"Copyright: {copyright_!r}") def setPublisher(self, header: Element) -> None: elem = header.find(".//publisher", _NAMESPACE) if elem is None or not elem.text: log.warning("did not find publisher") return self.setGlosInfo("publisher", elem.text) def setCreationTime(self, header: Element) -> None: elem = header.find(".//publicationStmt/date", _NAMESPACE) if elem is None or not elem.text: return self.setGlosInfo("creationTime", elem.text) def replaceRefLink(self, text: str) -> str: return self._ref_pattern.sub('
          \\2', text) def setDescription(self, header: Element) -> None: elems: list[Element] = [] for tag in ("sourceDesc", "projectDesc"): elems += header.findall(f".//{tag}//p", _NAMESPACE) desc = self.stripParagList(elems) if not desc: return website_list: list[str] = [] for match in self._website_pattern.findall(desc): if not match[1]: continue website_list.append(match[1]) if website_list: website = " | ".join(website_list) self.setGlosInfo("website", website) desc = self._website_pattern.sub("", desc).strip() log.debug(f"Website: {website}") desc = self.replaceRefLink(desc) self.setGlosInfo("description", desc) log.debug( "------------ Description: ------------\n" f"{desc}\n" "--------------------------------------", ) def setMetadata(self, header: Element) -> None: self.setWordCount(header) title = header.find(".//title", _NAMESPACE) if title is not None and title.text: self.setGlosInfo("name", title.text) edition = header.find(".//edition", _NAMESPACE) if edition is not None and edition.text: self.setGlosInfo("edition", edition.text) self.setCopyright(header) self.setPublisher(header) self.setCreationTime(header) self.setDescription(header) def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = "" self._dirname = "" self._file: IOBase = nullBinaryIO self._fileSize = 0 self._progress = True self._wordCount = 0 self._discoveredTags: dict[str, Element] = {} self._p_pattern = re.compile( "]*?)?>(.*?)

          ", re.DOTALL, ) self._ref_pattern = re.compile( '(.*?)', ) self._website_pattern = re.compile( 'Home: <(ref|ptr) target="(.*)">(.*)', ) def __len__(self) -> int: return self._wordCount def close(self) -> None: self._file.close() self._file = nullBinaryIO self._filename = "" self._fileSize = 0 def open( self, filename: str, ) -> None: try: from lxml import etree as ET except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install lxml` to install") raise self._filename = filename self._dirname = dirname(filename) cfile = compressionOpen(filename, mode="rb") if cfile.seekable(): cfile.seek(0, 2) self._fileSize = cfile.tell() cfile.seek(0) self._glos.setInfo("input_file_size", str(self._fileSize)) else: log.warning("FreeDict Reader: file is not seekable") self._progress = self._glos.progressbar and self._fileSize self._glos.setDefaultDefiFormat("h") if self._word_title: self._glos.setInfo("definition_has_headwords", "True") context = ET.iterparse( # type: ignore # noqa: PGH003 cfile, events=("end",), tag=f"{_TEI}teiHeader", ) for _, elem in context: self.setMetadata(elem) # type: ignore break cfile.close() def loadInclude(self, elem: Element) -> Reader | None: href = elem.attrib.get("href") if not href: log.error(f"empty href in {elem}") return None filename = join(self._dirname, href) if not isfile(filename): log.error(f"no such file {filename!r} from {elem}") return None reader = Reader(self._glos) for optName in optionsProp: attr = "_" + optName if hasattr(self, attr): setattr(reader, attr, getattr(self, attr)) reader.open(filename) return reader def __iter__(self) -> Iterator[EntryType]: from lxml import etree as ET if self._auto_rtl is None: glos = self._glos if (glos.sourceLang and glos.sourceLang.rtl) or ( glos.targetLang and glos.targetLang.rtl ): log.info("setting auto_rtl=True") self._auto_rtl = True self._file = compressionOpen(self._filename, mode="rb") context = ET.iterparse( # type: ignore # noqa: PGH003 self._file, events=("end",), tag=(_ENTRY, _INCLUDE), ) for _, _elem in context: elem = cast("Element", _elem) if elem.tag == _INCLUDE: reader = self.loadInclude(elem) if reader is not None: yield from reader reader.close() continue yield self.getEntryByElem(elem) # clean up preceding siblings to save memory # this can reduce memory usage from 1 GB to ~25 MB parent = elem.getparent() if parent is None: continue while elem.getprevious() is not None: del parent[0] if self._discoveredTags: log.info("Found unsupported tags") for elem in self._discoveredTags.values(): log.info(f"{self.tostring(elem)}\n") pyglossary-5.0.9/pyglossary/plugins/freedict/tools.toml000066400000000000000000000000001476751035500234630ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/freedict/utils.py000066400000000000000000000046171476751035500231620ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Callable from typing import Any from pyglossary.lxml_types import Element, T_htmlfile from pyglossary.core import log from pyglossary.langs import langDict from pyglossary.langs.writing_system import getWritingSystemFromText __all__ = ["XMLLANG", "ReaderUtils"] XMLLANG = "{http://www.w3.org/XML/1998/namespace}lang" class ReaderUtils: @staticmethod def tostring(elem: Element) -> str: from lxml import etree as ET return ( ET.tostring( elem, method="html", pretty_print=True, ) .decode("utf-8") .strip() ) @staticmethod def makeList( # noqa: PLR0913 hf: T_htmlfile, input_objects: list[Any], processor: Callable, single_prefix: str = "", skip_single: bool = True, ordered: bool = True, list_type: str = "", ) -> None: """Wrap elements into
            if more than one element.""" if not input_objects: return if skip_single and len(input_objects) == 1: if single_prefix: hf.write(single_prefix) processor(hf, input_objects[0]) return attrib: dict[str, str] = {} if list_type: attrib["type"] = list_type with hf.element("ol" if ordered else "ul", attrib=attrib): for el in input_objects: with hf.element("li"): processor(hf, el) @staticmethod def getTitleTag(sample: str) -> str: ws = getWritingSystemFromText(sample) if ws: return ws.titleTag return "b" @staticmethod def isRTL(elem: Element) -> bool: lang = elem.get(XMLLANG) if lang is None: return False langObj = langDict[lang] if langObj is None: log.warning(f"unknown language {lang}") return False return bool(langObj.rtl) @classmethod def getLangDesc(cls, elem: Element) -> str | None: lang = elem.attrib.get(XMLLANG) if lang: langObj = langDict[lang] if not langObj: log.warning(f"unknown lang {lang!r} in {cls.tostring(elem)}") return None return langObj.name orig = elem.attrib.get("orig") if orig: return orig log.warning(f"unknown lang name in {cls.tostring(elem)}") return None @classmethod def writeLangTag( cls, hf: T_htmlfile, elem: Element, ) -> None: langDesc = cls.getLangDesc(elem) if not langDesc: return # TODO: make it Italic or change font color? if elem.text: hf.write(f"{langDesc}: {elem.text}") else: hf.write(f"{langDesc}") pyglossary-5.0.9/pyglossary/plugins/gettext_po/000077500000000000000000000000001476751035500220355ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/gettext_po/__init__.py000066400000000000000000000013611476751035500241470ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, Option, ) from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "gettext_po" name = "GettextPo" description = "Gettext Source (.po)" extensions = (".po",) extensionCreate = ".po" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/Gettext" website = ( "https://www.gnu.org/software/gettext", "gettext - GNU Project", ) optionsProp: dict[str, Option] = { "resources": BoolOption(comment="Enable resources / data files"), } pyglossary-5.0.9/pyglossary/plugins/gettext_po/reader.py000066400000000000000000000060511476751035500236530ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os from os.path import isdir from typing import TYPE_CHECKING from pyglossary.core import exc_note, log, pip from pyglossary.io_utils import nullTextIO from pyglossary.text_utils import splitByBar if TYPE_CHECKING: import io from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = False depends = { "polib": "polib", } def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._alts = glos.alts self.clear() def clear(self) -> None: self._filename = "" self._file: io.TextIOBase = nullTextIO self._wordCount: int | None = None self._resDir = "" self._resFileNames: list[str] = [] def open(self, filename: str) -> None: self._filename = filename self._file = open(filename, encoding="utf-8") self._resDir = filename + "_res" if isdir(self._resDir): self._resFileNames = os.listdir(self._resDir) else: self._resDir = "" self._resFileNames = [] def close(self) -> None: self._file.close() self._file = nullTextIO self.clear() def __len__(self) -> int: from pyglossary.file_utils import fileCountLines if self._wordCount is None: log.debug("Try not to use len(reader) as it takes extra time") self._wordCount = fileCountLines( self._filename, newline=b"\nmsgid", ) return self._wordCount def makeEntry(self, word: str, defi: str) -> EntryType: if self._alts: return self._glos.newEntry(splitByBar(word), defi) return self._glos.newEntry(word, defi) def __iter__(self) -> Iterator[EntryType]: # noqa: PLR0912 try: from polib import unescape as po_unescape except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install polib` to install") raise file = self._file word = "" defi = "" msgstr = False wordCount = 0 for line_ in file: line = line_.strip() # noqa: PLW2901 if not line: continue if line.startswith("#"): continue if line.startswith("msgid "): if word: yield self.makeEntry(word, defi) wordCount += 1 word = "" defi = "" else: pass # TODO: parse defi and set glos info? # but this should be done in self.open word = po_unescape(line[6:]) if word.startswith('"'): if len(word) < 2 or word[-1] != '"': raise ValueError("invalid po line: line") word = word[1:-1] msgstr = False continue if line.startswith("msgstr "): if msgstr: log.error("msgid omitted!") defi = po_unescape(line[7:]) if defi.startswith('"'): if len(defi) < 2 or defi[-1] != '"': raise ValueError("invalid po line: line") defi = defi[1:-1] msgstr = True continue line = po_unescape(line) if line.startswith('"'): if len(line) < 2 or line[-1] != '"': raise ValueError("invalid po line: line") line = line[1:-1] if msgstr: defi += line else: word += line if word: yield self.makeEntry(word, defi) wordCount += 1 self._wordCount = wordCount pyglossary-5.0.9/pyglossary/plugins/gettext_po/tools.toml000066400000000000000000000003451476751035500240740ustar00rootroot00000000000000[gettext] web = "https://www.gnu.org/software/gettext/" platforms = [ "Linux", "Windows",] license = "GPL" [poEdit] web = "https://github.com/vslavik/poedit" platforms = [ "Linux", "Windows", "Mac",] license = "MIT / Shareware" pyglossary-5.0.9/pyglossary/plugins/gettext_po/writer.py000066400000000000000000000030271476751035500237250ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.core import exc_note, pip from pyglossary.io_utils import nullTextIO if TYPE_CHECKING: import io from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] class Writer: depends = { "polib": "polib", } _resources: bool = True def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" self._file: io.TextIOBase = nullTextIO glos.preventDuplicateWords() def open(self, filename: str) -> None: try: from polib import escape as po_escape except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install polib` to install") raise self._filename = filename self._file = file = open(filename, mode="w", encoding="utf-8") file.write('#\nmsgid ""\nmsgstr ""\n') file.writelines( f'"{po_escape(key)}: {po_escape(value)}\\n"\n' for key, value in self._glos.iterInfo() ) def finish(self) -> None: self._filename = "" self._file.close() self._file = nullTextIO def write(self) -> Generator[None, EntryType, None]: from polib import escape as po_escape file = self._file resources = self._resources filename = self._filename while True: entry = yield if entry is None: break if entry.isData(): if resources: entry.save(filename + "_res") continue file.write( f'msgid "{po_escape(entry.s_word)}"\n' f'msgstr "{po_escape(entry.defi)}"\n\n', ) pyglossary-5.0.9/pyglossary/plugins/html_dir/000077500000000000000000000000001476751035500214555ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/html_dir/__init__.py000066400000000000000000000021271476751035500235700ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, IntOption, Option, StrOption, ) from .writer import Writer __all__ = [ "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "html_dir" name = "HtmlDir" description = "HTML Directory" extensions = (".hdir",) extensionCreate = ".hdir/" singleFile = False kind = "directory" wiki = "" website = None optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "resources": BoolOption( comment="Enable resources / data files", ), "max_file_size": IntOption( comment="Maximum file size in bytes", ), "filename_format": StrOption( comment="Filename format, default: {n:05d}.html", ), "escape_defi": BoolOption( comment="Escape definitions", ), "dark": BoolOption( comment="Use dark style", ), "css": StrOption( comment="Path to css file", ), "word_title": BoolOption( comment="Add headwords title to beginning of definition", ), } pyglossary-5.0.9/pyglossary/plugins/html_dir/tools.toml000066400000000000000000000000001476751035500235000ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/html_dir/writer.py000066400000000000000000000273001476751035500233450ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import html import os import re import time from functools import lru_cache from os.path import isdir, isfile, join from typing import TYPE_CHECKING if TYPE_CHECKING: import io from collections.abc import Generator from pyglossary.glossary_types import ( EntryType, WriterGlossaryType, ) from pyglossary.core import log from pyglossary.text_utils import ( escapeNTB, unescapeNTB, ) __all__ = ["Writer"] _nbsp = "\xa0" # _nbsp = " " _darkStyle = """ body {{ background-color: #373737; color: #eee; }} a {{ color: #aaaaff; }} a.broken {{ color: #e0c0c0; }} a.no_ul {{ text-decoration: none; }} b.headword {{ font-size: 1.5em; color: #c7ffb9; }} h1 {{ font-size: 1.5em; color: #c7ffb9;}} h2 {{ font-size: 1.3em;}} h3 {{ font-size: 1.0em;}} h4 {{ font-size: 1.0em;}} h5 {{ font-size: 1.0em;}} h6 {{ font-size: 1.0em;}} """ class Writer: _encoding: str = "utf-8" _resources: bool = True _max_file_size: int = 102400 _filename_format: str = "{n:05d}.html" _escape_defi: bool = False _dark: bool = True _css: str = "" _word_title: bool = True @staticmethod def stripFullHtmlError(entry: EntryType, error: str) -> None: log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}") def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" self._fileObj: io.IOBase | None = None self._encoding = "utf-8" self._filename_format = "{n:05d}.html" self._tail = "" self._filenameList: list[str] = [] glos.stripFullHtml(errorHandler=self.stripFullHtmlError) self._resSrcPattern = re.compile(' src="([^"]*)"') def open(self, filename: str) -> None: self._filename = filename self._resDir = resDir = join(filename, "res") if not isdir(filename): os.mkdir(filename) if not isdir(resDir): os.mkdir(resDir) if self._css: self.copyCSS(self._css) def copyCSS(self, cssPath: str) -> None: import shutil shutil.copy(cssPath, join(self._filename, "style.css")) def finish(self) -> None: pass def getNextFilename(self) -> str: return self._filename_format.format( n=len(self._filenameList), ) def nextFile(self) -> io.TextIOBase: if self._fileObj: self._fileObj.write(self._tail) self._fileObj.close() filename = self.getNextFilename() self._filenameList.append(filename) self._fileObj = open( join( self._filename, filename, ), mode="w", encoding=self._encoding, ) return self._fileObj def fixLinks(self, linkTargetSet: set[str]) -> None: # noqa: PLR0912 import gc gc.collect() dirn = self._filename filenameList = self._filenameList fileByWord: dict[str, list[tuple[str, int]]] = {} for line in open(join(dirn, "index.txt"), encoding="utf-8"): line = line.rstrip("\n") # noqa: PLW2901 if not line: continue entryIndexStr, wordEsc, filename, _ = line.split("\t") entryIndex = int(entryIndexStr) # entryId = f"entry{entryIndex}" word = unescapeNTB(wordEsc) if word not in linkTargetSet: continue if word in fileByWord: fileByWord[word].append((filename, entryIndex)) else: fileByWord[word] = [(filename, entryIndex)] # with open(join(dirn, "fileByWord.json"), "w") as fileByWordFile: # json.dump(fileByWord, fileByWordFile, ensure_ascii=False, indent="\t") @lru_cache(maxsize=10) def getLinksByFile(fileIndex: int) -> io.TextIOBase: return open( join(dirn, f"links{fileIndex}"), mode="a", encoding="utf-8", ) log.info("") for line in open(join(dirn, "links.txt"), encoding="utf-8"): line = line.rstrip("\n") # noqa: PLW2901 if not line: continue target, fileIndexStr, x_start, x_size = line.split("\t") target = unescapeNTB(target) if target not in fileByWord: targetNew = "" else: targetFilename, targetEntryIndex = fileByWord[target][0] if targetFilename == filename: continue targetNew = f"{targetFilename}#entry{targetEntryIndex}" file = getLinksByFile(int(fileIndexStr)) file.write( f"{x_start}\t{x_size}\t{targetNew}\n", ) file.flush() linkTargetSet.clear() del fileByWord, linkTargetSet gc.collect() if os.sep == "\\": time.sleep(0.1) entry_url_fmt = self._glos.getInfo("entry_url") re_href = re.compile( b' href="[^<>"]*?"', re.IGNORECASE, ) for fileIndex, filename in enumerate(filenameList): if not isfile(join(dirn, f"links{fileIndex}")): continue with open(join(dirn, filename), mode="rb") as inFile: with open(join(dirn, f"{filename}.new"), mode="wb") as outFile: for linkLine in open(join(dirn, f"links{fileIndex}"), "rb"): outFile.flush() ( b_x_start, b_x_size, b_target, ) = linkLine.rstrip(b"\n").split(b"\t") outFile.write( inFile.read( int(b_x_start, 16) - inFile.tell(), ), ) curLink = inFile.read(int(b_x_size, 16)) if b_target: outFile.write( re_href.sub( b' href="./' + b_target + b'"', curLink, ), ) continue if not entry_url_fmt: outFile.write( curLink.replace( b' href="#', b' class="broken" href="#', ), ) continue st = curLink.decode("utf-8") i = st.find('href="#') j = st.find('"', i + 7) word = st[i + 7 : j] url = entry_url_fmt.format(word=word) outFile.write( ( st[:i] + f'class="broken" href="{url}"' + st[j + 1 :] ).encode("utf-8"), ) outFile.write(inFile.read()) os.remove(join(dirn, filename)) os.rename(join(dirn, f"{filename}.new"), join(dirn, filename)) os.remove(join(dirn, f"links{fileIndex}")) def writeInfo(self, filename: str, header: str) -> None: glos = self._glos title = glos.getInfo("name") customStyle = ( "table, th, td {border: 1px solid black; " "border-collapse: collapse; padding: 5px;}" ) infoHeader = header.format( pageTitle=f"Info: {title}", customStyle=customStyle, ) with open( join(filename, "info.html"), mode="w", encoding=self._encoding, ) as _file: _file.write( infoHeader + "" "" '' '' "\n", ) _file.writelines( f"\n" for key, value in glos.iterInfo() ) _file.write("
            KeyValue
            {key}{value}
            ") @staticmethod def _subResSrc(m: re.Match) -> str: url = m.group(1) if "://" in url: return m.group(0) url = "res/" + url return f' src="{url}"' def write(self) -> Generator[None, EntryType, None]: # noqa: PLR0912 encoding = self._encoding resources = self._resources max_file_size = self._max_file_size filename_format = self._filename_format escape_defi = self._escape_defi wordSep = ' | ' initFileSizeMax = 100 glos = self._glos filename = self._filename self._encoding = encoding self._filename_format = filename_format entry_url_fmt = glos.getInfo("entry_url") def getEntryWebLink(entry: EntryType) -> str: if not entry_url_fmt: return "" url = entry_url_fmt.format(word=html.escape(entry.l_word[0])) return f'{_nbsp}🌏' # from math import log2, ceil # maxPosHexLen = int(ceil(log2(max_file_size) / 4)) indexTxtFileObj = open( join(filename, "index.txt"), mode="w", encoding="utf-8", ) linksTxtFileObj = open( join(filename, "links.txt"), mode="w", encoding="utf-8", ) title = glos.getInfo("name") style = "" if self._dark: style = _darkStyle cssLink = '' if self._css else "" header = ( "\n" "" "{pageTitle}" f'' f'{cssLink}' "\n" ) def pageHeader(n: int) -> str: return header.format( pageTitle=f"Page {n} of {title}", customStyle="", ) def navBar() -> str: links: list[str] = [] if len(self._filenameList) > 1: links.append(f'') links.extend( [ f'', 'ℹ️
    ', # noqa: RUF001 ], ) return ( '" ) tailSize = len(self._tail.encode(encoding)) if max_file_size < len(header) + tailSize: raise ValueError(f"{max_file_size=} is too small") max_file_size -= tailSize if not isdir(self._filename): os.mkdir(self._filename) fileObj = self.nextFile() fileObj.write(pageHeader(0)) fileObj.write(navBar()) re_fixed_link = re.compile( r']*? )?href="#([^<>"]+?)">[^<>]+?', re.IGNORECASE, ) linkTargetSet = set() def replaceBword(text: str) -> str: return text.replace( ' href="bword://', ' href="#', ) def addLinks(text: str, pos: int) -> None: for m in re_fixed_link.finditer(text): if ' class="entry_link"' in m.group(0): continue if m.group(0).count("href=") != 1: log.error(f"unexpected match: {m.group(0)}") target = html.unescape(m.group(1)) linkTargetSet.add(target) start = m.start() b_start = len(text[:start].encode(encoding)) b_size = len(text[start : m.end()].encode(encoding)) linksTxtFileObj.write( f"{escapeNTB(target)}\t" f"{len(self._filenameList) - 1}\t" f"{pos + b_start:x}\t" f"{b_size:x}\n", ) linksTxtFileObj.flush() self.writeInfo(filename, header) word_title = self._word_title resDir = self._resDir entryIndex = -1 while True: entryIndex += 1 entry = yield if entry is None: break if entry.isData(): if resources: entry.save(resDir) continue entry.detectDefiFormat() defi = entry.defi defiFormat = entry.defiFormat if defi.startswith("") and defiFormat != "h": log.error(f"bad {defiFormat=}") defiFormat = "h" if defiFormat == "m": defi = html.escape(defi) if "\n" in defi: # could be markdown or unformatted plaintext # FIXME: this changes the font to a monospace defi = f"
    {defi}
    " elif defiFormat == "h": defi = self._resSrcPattern.sub(self._subResSrc, defi) if escape_defi: defi = html.escape(defi) entryId = f"entry{entryIndex}" if word_title: words = [html.escape(word) for word in entry.l_word] title = glos.wordTitleStr( wordSep.join(words), sample=entry.l_word[0], class_="headword", ) if not title: title = f"Entry {entryIndex}" # entry_link_sym = "¶" entry_link_sym = "🔗" text = ( f'
    {title}{_nbsp}{_nbsp}' f'' f"{entry_link_sym}" f"{getEntryWebLink(entry)}" f"
    \n{defi}" "
    \n" "
    \n" ) pos = fileObj.tell() if pos > initFileSizeMax and pos > max_file_size - len( text.encode(encoding), ): fileObj = self.nextFile() fileObj.write( pageHeader( len(self._filenameList) - 1, ), ) fileObj.write(navBar()) pos = fileObj.tell() tmpFilename = escapeNTB(self._filenameList[-1]) indexTxtFileObj.writelines( f"{entryIndex}\t{escapeNTB(word)}\t{tmpFilename}\t{pos}\n" for word in entry.l_word ) del tmpFilename text = replaceBword(text) addLinks(text, pos) fileObj.write(text) fileObj.close() self._fileObj = None indexTxtFileObj.close() linksTxtFileObj.close() if linkTargetSet: log.info(f"{len(linkTargetSet)} link targets found") log.info("Fixing links, please wait...") self.fixLinks(linkTargetSet) os.remove(join(filename, "links.txt")) pyglossary-5.0.9/pyglossary/plugins/info_plugin/000077500000000000000000000000001476751035500221645ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/info_plugin/__init__.py000066400000000000000000000012741476751035500243010ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.info_writer import InfoWriter as Writer from .reader import Reader if TYPE_CHECKING: from pyglossary.option import Option __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "info" name = "Info" description = "Glossary Info (.info)" extensions = (".info",) extensionCreate = ".info" singleFile = True kind = "text" wiki = "" website = None # key is option/argument name, value is instance of Option optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/info_plugin/reader.py000066400000000000000000000014201476751035500237750ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Iterator from pyglossary.glossary_types import ( EntryType, ReaderGlossaryType, ) __all__ = ["Reader"] class Reader: useByteProgress = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos def close(self) -> None: pass def open(self, filename: str) -> None: from pyglossary.json_utils import jsonToData with open(filename, encoding="utf-8") as infoFp: info = jsonToData(infoFp.read()) assert isinstance(info, dict) for key, value in info.items(): self._glos.setInfo(key, value) def __len__(self) -> int: return 0 def __iter__(self) -> Iterator[EntryType | None]: yield None pyglossary-5.0.9/pyglossary/plugins/info_plugin/tools.toml000066400000000000000000000000001476751035500242070ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/jmdict/000077500000000000000000000000001476751035500211255ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/jmdict/__init__.py000066400000000000000000000015511476751035500232400ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, IntOption, Option, StrOption, ) from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "jmdict" name = "JMDict" description = "JMDict (xml)" extensions = () extensionCreate = "" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/JMdict" website = ( "https://www.edrdg.org/jmdict/j_jmdict.html", "The JMDict Project", ) optionsProp: dict[str, Option] = { "example_color": StrOption( comment="Examples color", ), "example_padding": IntOption( comment="Padding for examples (in px)", ), "translitation": BoolOption( comment="Add translitation (romaji) of keywords", ), } pyglossary-5.0.9/pyglossary/plugins/jmdict/reader.py000066400000000000000000000243751476751035500227540ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os import re import unicodedata from io import BytesIO from typing import TYPE_CHECKING, cast if TYPE_CHECKING: import io from collections.abc import Callable, Iterator from pyglossary.glossary_types import ( EntryType, ReaderGlossaryType, ) from pyglossary.lxml_types import Element, T_htmlfile from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.core import exc_note, pip from pyglossary.io_utils import nullBinaryIO __all__ = ["Reader"] class Reader: useByteProgress = True compressions = stdCompressions depends = { "lxml": "lxml", } _example_padding: int = 10 _example_color: str = "" # _example_color: str = "#008FE1" _translitation: bool = False tagStyle = ( "color:white;" "background:green;" "padding-left:3px;" "padding-right:3px;" "border-radius:0.5ex;" # 0.5ex ~= 0.3em, but "ex" is recommended ) gikun_key = "gikun (meaning as reading) or jukujikun (special kanji reading)" re_inf_mapping = { gikun_key: "gikun/jukujikun", "out-dated or obsolete kana usage": "obsolete", # outdated/obsolete "word containing irregular kana usage": "irregular", } @staticmethod def makeList( hf: T_htmlfile, input_objects: list[Element], processor: Callable, single_prefix: str = "", skip_single: bool = True, ) -> None: """Wrap elements into
      if more than one element.""" if not input_objects: return if skip_single and len(input_objects) == 1: hf.write(single_prefix) processor(hf, input_objects[0]) return with hf.element("ol"): for el in input_objects: with hf.element("li"): processor(hf, el) # TODO: break it down # PLR0912 Too many branches (23 > 12) def writeSense( # noqa: PLR0912 self, hf: T_htmlfile, sense: Element, ) -> None: from lxml import etree as ET def br() -> Element: return ET.Element("br") for elem in sense.findall("pos"): if not elem.text: continue desc = elem.text if desc == "unclassified": continue with hf.element("i"): hf.write(desc.capitalize()) hf.write(br()) glossList = [elem.text.strip() for elem in sense.findall("gloss") if elem.text] if glossList: for i, gloss in enumerate(glossList): if i > 0: hf.write(", ") hf.write(gloss) hf.write(br()) relatedWords: list[str] = [] for elem in sense.findall("xref"): if not elem.text: continue word = elem.text.strip() word = self._link_number_postfix.sub("", word) relatedWords.append(word) if relatedWords: hf.write("Related: ") for i, word in enumerate(relatedWords): if i > 0: with hf.element("big"): hf.write(" | ") with hf.element("a", href=f"bword://{word}"): hf.write(word) hf.write(br()) antonymWords: list[str] = [] for elem in sense.findall("ant"): if not elem.text: continue word = elem.text.strip() word = self._link_number_postfix.sub("", word) antonymWords.append(word) if antonymWords: hf.write("Antonym: ") for i, word in enumerate(antonymWords): if i > 0: with hf.element("big"): hf.write(" | ") with hf.element( "a", href=f"bword://{word}", attrib={"class": "antonym"}, ): hf.write(word) hf.write(br()) for i, elem in enumerate(sense.findall("field")): if not elem.text: continue if i > 0: hf.write(" ") desc = elem.text with hf.element("span", style=self.tagStyle): hf.write(desc) hf.write(br()) for i, elem in enumerate(sense.findall("misc")): if not elem.text: continue if i > 0: hf.write(" ") desc = elem.text with hf.element("small"): with hf.element("span", style=self.tagStyle): hf.write(desc) hf.write(br()) examples = sense.findall("example") # TODO: move to a method if examples: # noqa: PLR1702 with hf.element( "div", attrib={ "class": "example", "style": f"padding: {self._example_padding}px 0px;", }, ): hf.write("Examples:") with hf.element("ul"): for i, elem in enumerate(examples): if not elem.text: continue if i > 0: hf.write(" ") # one ex_srce (id?), one ex_text, and two ex_sent tags textElem = elem.find("ex_text") if textElem is None: continue if not textElem.text: continue text = textElem.text sentList: list[str] = [] for sentElem in elem.findall("ex_sent"): if not sentElem.text: continue sentList.append(sentElem.text) with hf.element("li"): style: dict[str, str] = {} if self._example_color: style["color"] = self._example_color with hf.element("font", attrib=style): hf.write(text) for sent in sentList: hf.write(br()) hf.write(sent) # TODO: break it down def getEntryByElem( # noqa: PLR0912 self, entry: Element, ) -> EntryType: from lxml import etree as ET glos = self._glos keywords: list[str] = [] f = BytesIO() translit = self._translitation def br() -> Element: return ET.Element("br") with ET.htmlfile(f, encoding="utf-8") as hf: # noqa: PLR1702 kebList: list[str] = [] rebList: list[str] = [] kebDisplayList: list[str] = [] rebDisplayList: list[tuple[str, list[str]]] = [] with hf.element("div"): for k_ele in entry.findall("k_ele"): keb = k_ele.find("keb") if keb is None: continue if not keb.text: continue keb_text = keb.text keb_text_norm = unicodedata.normalize("NFKC", keb_text) keywords.append(keb_text_norm) if keb_text != keb_text_norm: keywords.append(keb_text) kebList.append(keb_text) keb_display = keb_text if translit: import romkan # type: ignore t_keb = romkan.to_roma(keb_text) if t_keb and t_keb.isascii(): keywords.append(t_keb) keb_display += f" ({t_keb})" kebDisplayList.append(keb_display) # for elem in k_ele.findall("ke_pri"): # log.info(elem.text) for r_ele in entry.findall("r_ele"): reb = r_ele.find("reb") if reb is None: continue if not reb.text: continue props: list[str] = [] if r_ele.find("re_nokanji") is not None: props.append("no kanji") inf = r_ele.find("re_inf") if inf is not None and inf.text: props.append( self.re_inf_mapping.get(inf.text, inf.text), ) keywords.append(reb.text) reb_text = reb.text rebList.append(reb_text) reb_display = reb_text if translit: import romkan t_reb = romkan.to_roma(reb.text) if t_reb and t_reb.isascii(): keywords.append(t_reb) reb_display += f" ({t_reb})" rebDisplayList.append((reb_display, props)) # for elem in r_ele.findall("re_pri"): # log.info(elem.text) # this is for making internal links valid # this makes too many alternates! # but we don't seem to have a choice # except for scanning and indexing all words once # and then starting over and fixing/optimizing links for s_keb in kebList: for s_reb in rebList: keywords.append(f"{s_keb}・{s_reb}") # noqa: PERF401 if kebDisplayList: with hf.element(glos.titleTag(kebDisplayList[0])): for i, s_keb in enumerate(kebDisplayList): if i > 0: with hf.element("font", color="red"): hf.write(" | ") hf.write(s_keb) hf.write(br()) if rebDisplayList: for i, (s_reb, props) in enumerate(rebDisplayList): if i > 0: with hf.element("font", color="red"): hf.write(" | ") with hf.element("font", color="green"): hf.write(s_reb) for prop in props: hf.write(" ") with hf.element("small"): with hf.element("span", style=self.tagStyle): hf.write(prop) hf.write(br()) hf_ = cast("T_htmlfile", hf) self.makeList( hf_, entry.findall("sense"), self.writeSense, ) defi = f.getvalue().decode("utf-8") file = self._file byteProgress = (file.tell(), self._fileSize) return self._glos.newEntry( keywords, defi, defiFormat="h", byteProgress=byteProgress, ) @staticmethod def tostring(elem: Element) -> str: from lxml import etree as ET return ( ET.tostring( elem, method="html", pretty_print=True, ) .decode("utf-8") .strip() ) def setCreationTime(self, header: str) -> None: m = re.search("JMdict created: ([0-9]{4}-[0-9]{2}-[0-9]{2})", header) if m is None: return self._glos.setInfo("creationTime", m.group(1)) def setMetadata(self, header: str) -> None: # TODO: self.set_info("edition", ...) self.setCreationTime(header) def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._wordCount = 0 self._filename = "" self._file: io.IOBase = nullBinaryIO self._fileSize = 0 self._link_number_postfix = re.compile("・[0-9]+$") def __len__(self) -> int: return self._wordCount def close(self) -> None: if self._file: self._file.close() self._file = nullBinaryIO def open( self, filename: str, ) -> None: try: from lxml import etree as ET # noqa: F401 except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install lxml` to install") raise self._filename = filename self._fileSize = os.path.getsize(filename) self._glos.sourceLangName = "Japanese" self._glos.setDefaultDefiFormat("h") self._glos.setInfo("definition_has_headwords", "True") self._glos.setInfo("entry_url", "https://jisho.org/search/{word}") # also good: f"https://sakuradict.com/search?q={{word}}" header = "" with compressionOpen(filename, mode="rt", encoding="utf-8") as text_file: text_file = cast("io.TextIOBase", text_file) for line in text_file: if "" in line: break header += line self.setMetadata(header) self._file = compressionOpen(filename, mode="rb") def __iter__(self) -> Iterator[EntryType]: from lxml import etree as ET context = ET.iterparse( # type: ignore # noqa: PGH003 self._file, events=("end",), tag="entry", ) for _, _elem in context: elem = cast("Element", _elem) yield self.getEntryByElem(elem) # clean up preceding siblings to save memory # this reduces memory usage from ~64 MB to ~30 MB parent = elem.getparent() if parent is None: continue while elem.getprevious() is not None: del parent[0] pyglossary-5.0.9/pyglossary/plugins/jmdict/tools.toml000066400000000000000000000000001476751035500231500ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/jmnedict/000077500000000000000000000000001476751035500214505ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/jmnedict/__init__.py000066400000000000000000000012171476751035500235620ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING from .reader import Reader if TYPE_CHECKING: from pyglossary.option import Option __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "jmnedict" name = "JMnedict" description = "JMnedict" extensions = () extensionCreate = "" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/JMdict" website = ( "https://www.edrdg.org/wiki/index.php/Main_Page", "EDRDG Wiki", ) optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/jmnedict/reader.py000066400000000000000000000161131476751035500232660ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os import re from io import BytesIO from typing import TYPE_CHECKING, cast if TYPE_CHECKING: import io from collections.abc import Callable, Iterator from pyglossary.glossary_types import ( EntryType, ReaderGlossaryType, ) from pyglossary.lxml_types import Element, T_htmlfile from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.core import exc_note, pip from pyglossary.io_utils import nullBinaryIO __all__ = ["Reader"] class Reader: useByteProgress = True compressions = stdCompressions depends = { "lxml": "lxml", } tagStyle = ( "color:white;" "background:green;" "padding-left:3px;" "padding-right:3px;" "border-radius:0.5ex;" # 0.5ex ~= 0.3em, but "ex" is recommended ) gikun_key = "gikun (meaning as reading) or jukujikun (special kanji reading)" re_inf_mapping = { gikun_key: "gikun/jukujikun", "out-dated or obsolete kana usage": "obsolete", # outdated/obsolete "word containing irregular kana usage": "irregular", } @staticmethod def makeList( hf: T_htmlfile, input_objects: list[Element], processor: Callable, single_prefix: str = "", skip_single: bool = True, ) -> None: """Wrap elements into
        if more than one element.""" if not input_objects: return if skip_single and len(input_objects) == 1: hf.write(single_prefix) processor(hf, input_objects[0]) return with hf.element("ol"): for el in input_objects: with hf.element("li"): processor(hf, el) def writeTrans( self, hf: T_htmlfile, trans: Element, ) -> None: from lxml import etree as ET def br() -> Element: return ET.Element("br") for elem in trans.findall("name_type"): if not elem.text: continue desc = elem.text with hf.element("i"): hf.write(desc.capitalize()) hf.write(br()) for elem in trans.findall("trans_det"): if not elem.text: continue desc = elem.text hf.write(desc) hf.write(br()) relatedWords: list[str] = [] for elem in trans.findall("xref"): if not elem.text: continue word = elem.text.strip() word = self._link_number_postfix.sub("", word) relatedWords.append(word) if relatedWords: hf.write("Related: ") for i, word in enumerate(relatedWords): if i > 0: with hf.element("big"): hf.write(" | ") with hf.element("a", href=f"bword://{word}"): hf.write(word) hf.write(br()) def getEntryByElem( # noqa: PLR0912 self, entry: Element, ) -> EntryType: from lxml import etree as ET glos = self._glos keywords: list[str] = [] f = BytesIO() def br() -> Element: return ET.Element("br") with ET.htmlfile(f, encoding="utf-8") as hf: # noqa: PLR1702 kebList: list[str] = [] rebList: list[tuple[str, list[str]]] = [] with hf.element("div"): for k_ele in entry.findall("k_ele"): keb = k_ele.find("keb") if keb is None: continue if not keb.text: continue kebList.append(keb.text) keywords.append(keb.text) # for elem in k_ele.findall("ke_pri"): # log.info(elem.text) for r_ele in entry.findall("r_ele"): reb = r_ele.find("reb") if reb is None: continue if not reb.text: continue props: list[str] = [] if r_ele.find("re_nokanji") is not None: props.append("no kanji") inf = r_ele.find("re_inf") if inf is not None and inf.text: props.append( self.re_inf_mapping.get(inf.text, inf.text), ) rebList.append((reb.text, props)) keywords.append(reb.text) # for elem in r_ele.findall("re_pri"): # log.info(elem.text) # this is for making internal links valid # this makes too many alternates! # but we don't seem to have a choice # except for scanning and indexing all words once # and then starting over and fixing/optimizing links for s_keb in kebList: for s_reb, _ in rebList: keywords.append(f"{s_keb}・{s_reb}") if kebList: with hf.element(glos.titleTag(kebList[0])): for i, s_keb in enumerate(kebList): if i > 0: with hf.element("font", color="red"): hf.write(" | ") hf.write(s_keb) hf.write(br()) if rebList: for i, (s_reb, props) in enumerate(rebList): if i > 0: with hf.element("font", color="red"): hf.write(" | ") with hf.element("font", color="green"): hf.write(s_reb) for prop in props: hf.write(" ") with hf.element("small"): with hf.element("span", style=self.tagStyle): hf.write(prop) hf.write(br()) hf_ = cast("T_htmlfile", hf) self.makeList( hf_, entry.findall("trans"), self.writeTrans, ) defi = f.getvalue().decode("utf-8") file = self._file byteProgress = (file.tell(), self._fileSize) return self._glos.newEntry( keywords, defi, defiFormat="h", byteProgress=byteProgress, ) @staticmethod def tostring(elem: Element) -> str: from lxml import etree as ET return ( ET.tostring( elem, method="html", pretty_print=True, ) .decode("utf-8") .strip() ) def setCreationTime(self, header: str) -> None: m = re.search("JMdict created: ([0-9]{4}-[0-9]{2}-[0-9]{2})", header) if m is None: return self._glos.setInfo("creationTime", m.group(1)) def setMetadata(self, header: str) -> None: # TODO: self.set_info("edition", ...) self.setCreationTime(header) def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._wordCount = 0 self._filename = "" self._file: io.IOBase = nullBinaryIO self._fileSize = 0 self._link_number_postfix = re.compile("・[0-9]+$") def __len__(self) -> int: return self._wordCount def close(self) -> None: if self._file: self._file.close() self._file = nullBinaryIO def open( self, filename: str, ) -> None: try: from lxml import etree as ET # noqa: F401 except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install lxml` to install") raise self._filename = filename self._fileSize = os.path.getsize(filename) self._glos.sourceLangName = "Japanese" self._glos.setDefaultDefiFormat("h") self._glos.setInfo("definition_has_headwords", "True") self._glos.setInfo("entry_url", "https://jisho.org/search/{word}") # also good: f"https://sakuradict.com/search?q={{word}}" header = "" with compressionOpen(filename, mode="rt", encoding="utf-8") as text_file: text_file = cast("io.TextIOBase", text_file) for line in text_file: if "" in line: break header += line self.setMetadata(header) self._file = compressionOpen(filename, mode="rb") def __iter__(self) -> Iterator[EntryType]: from lxml import etree as ET context = ET.iterparse( # type: ignore # noqa: PGH003 self._file, events=("end",), tag="entry", ) for _, _elem in context: elem = cast("Element", _elem) yield self.getEntryByElem(elem) # clean up preceding siblings to save memory # this reduces memory usage from ~64 MB to ~30 MB parent = elem.getparent() if parent is None: continue while elem.getprevious() is not None: del parent[0] pyglossary-5.0.9/pyglossary/plugins/json_plugin/000077500000000000000000000000001476751035500222025ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/json_plugin/__init__.py000066400000000000000000000015771476751035500243250ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, Option, ) from .writer import Writer __all__ = [ "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "json" name = "Json" description = "JSON (.json)" extensions = (".json",) extensionCreate = ".json" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/JSON" website = ( "https://www.json.org/json-en.html", "www.json.org", ) optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "enable_info": BoolOption(comment="Enable glossary info / metedata"), "resources": BoolOption(comment="Enable resources / data files"), "word_title": BoolOption( comment="add headwords title to beginning of definition", ), } pyglossary-5.0.9/pyglossary/plugins/json_plugin/tools.toml000066400000000000000000000000001476751035500242250ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/json_plugin/writer.py000066400000000000000000000025701476751035500240740ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.compression import ( # compressionOpen, stdCompressions, ) if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import ( EntryType, WriterGlossaryType, ) __all__ = ["Writer"] class Writer: _encoding: str = "utf-8" _enable_info: bool = True _resources: bool = True _word_title: bool = False compressions = stdCompressions def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" glos.preventDuplicateWords() def open(self, filename: str) -> None: self._filename = filename def finish(self) -> None: self._filename = "" def write(self) -> Generator[None, EntryType, None]: from json import dumps from pyglossary.text_writer import writeTxt glos = self._glos encoding = self._encoding enable_info = self._enable_info resources = self._resources ensure_ascii = encoding == "ascii" def escape(st: str) -> str: return dumps(st, ensure_ascii=ensure_ascii) yield from writeTxt( glos, entryFmt="\t{word}: {defi},\n", filename=self._filename, encoding=encoding, writeInfo=enable_info, wordEscapeFunc=escape, defiEscapeFunc=escape, ext=".json", head="{\n", tail='\t"": ""\n}', resources=resources, word_title=self._word_title, ) pyglossary-5.0.9/pyglossary/plugins/lingoes_ldf/000077500000000000000000000000001476751035500221405ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/lingoes_ldf/__init__.py000066400000000000000000000015301476751035500242500ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, NewlineOption, Option, ) from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "lingoes_ldf" name = "LingoesLDF" description = "Lingoes Source (.ldf)" extensions = (".ldf",) extensionCreate = ".ldf" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/Lingoes" website = ( "http://www.lingoes.net/en/dictionary/dict_format.php", "Lingoes.net", ) optionsProp: dict[str, Option] = { "newline": NewlineOption(), "resources": BoolOption(comment="Enable resources / data files"), "encoding": EncodingOption(), } pyglossary-5.0.9/pyglossary/plugins/lingoes_ldf/reader.py000066400000000000000000000035721476751035500237630ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.compression import ( # compressionOpen, stdCompressions, ) from pyglossary.core import log from pyglossary.file_utils import fileCountLines from pyglossary.text_reader import TextGlossaryReader, nextBlockResultType from pyglossary.text_utils import splitByBar __all__ = ["Reader"] class Reader(TextGlossaryReader): useByteProgress = True compressions = stdCompressions def __len__(self) -> int: if self._wordCount is None: log.debug("Try not to use len(reader) as it takes extra time") self._wordCount = ( fileCountLines( self._filename, newline=b"\n\n", ) - self._leadingLinesCount ) return self._wordCount @classmethod def isInfoWord(cls, word: str) -> bool: if isinstance(word, str): return word.startswith("#") return False @classmethod def fixInfoWord(cls, word: str) -> str: if isinstance(word, str): return word.lstrip("#").lower() return word def nextBlock(self) -> nextBlockResultType: if not self._file: raise StopIteration entryLines: list[str] = [] while True: line = self.readline() if not line: raise StopIteration line = line.rstrip("\n\r") if line.startswith("###"): parts = line.split(":") key = parts[0].strip() value = ":".join(parts[1:]).strip() return key, value, None if line: entryLines.append(line) continue # now `line` is empty, process `entryLines` if not entryLines: return None if len(entryLines) < 2: log.error( f"invalid block near pos {self._file.tell()}" f" in file {self._filename}", ) return None word = entryLines[0] defi = "\n".join(entryLines[1:]) defi = ( defi.replace("
        ", "\n") .replace("
        ", "\n") .replace("
        ", "\n") .replace("
        ", "\n") ) words = splitByBar(word) return words, defi, None pyglossary-5.0.9/pyglossary/plugins/lingoes_ldf/tools.toml000066400000000000000000000005441476751035500242000ustar00rootroot00000000000000["Lingoes Dictionary Creator"] web = "http://www.lingoes.net/en/dictionary/dict_format.php" platforms = [] license = "Unknown" comment = "Lingoes Dictionary Creator is developing now.\nPlease send your finished dictionary source file to kevin-yau@msn.com\nLingoes will compile it into .ld2 for you.\nYou will can do it yourself after the creator release." pyglossary-5.0.9/pyglossary/plugins/lingoes_ldf/writer.py000066400000000000000000000030551476751035500240310ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.compression import ( # compressionOpen, stdCompressions, ) if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] class Writer: compressions = stdCompressions _newline: str = "\n" _resources: bool = True def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" def getInfo(self, key: str) -> str: return self._glos.getInfo(key).replace("\n", "
        ") def getAuthor(self) -> str: return self._glos.author.replace("\n", "
        ") def finish(self) -> None: self._filename = "" def open(self, filename: str) -> None: self._filename = filename @staticmethod def _defiEscapeFunc(defi: str) -> str: return defi.replace("\n", "
        ") def write(self) -> Generator[None, EntryType, None]: from pyglossary.text_writer import writeTxt newline = self._newline resources = self._resources head = ( f"###Title: {self.getInfo('title')}\n" f"###Description: {self.getInfo('description')}\n" f"###Author: {self.getAuthor()}\n" f"###Email: {self.getInfo('email')}\n" f"###Website: {self.getInfo('website')}\n" f"###Copyright: {self.getInfo('copyright')}\n" ) yield from writeTxt( self._glos, entryFmt="{word}\n{defi}\n\n", filename=self._filename, writeInfo=False, defiEscapeFunc=self._defiEscapeFunc, ext=".ldf", head=head, newline=newline, resources=resources, ) pyglossary-5.0.9/pyglossary/plugins/makindo_medical/000077500000000000000000000000001476751035500227535ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/makindo_medical/__init__.py000066400000000000000000000012721476751035500250660ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING from .reader import Reader if TYPE_CHECKING: from pyglossary.option import Option __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "makindo_medical" name = "MakindoMedical" description = "Makindo Medical Reference (SQLite3)" extensions = () extensionCreate = ".db" singleFile = True kind = "binary" wiki = "" website = ( "https://www.makindo.co.uk/topics/_index.php", "Makindo.co.uk Comprehensive Medical Encyclopedia", ) optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/makindo_medical/reader.py000066400000000000000000000030551476751035500245720ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import html from typing import TYPE_CHECKING if TYPE_CHECKING: import sqlite3 from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._clear() def _clear(self) -> None: self._filename = "" self._con: sqlite3.Connection | None = None self._cur: sqlite3.Cursor | None = None def open(self, filename: str) -> None: from sqlite3 import connect self._filename = filename self._con = connect(filename) self._cur = self._con.cursor() self._glos.setDefaultDefiFormat("h") def __len__(self) -> int: if self._cur is None: raise ValueError("cur is None") self._cur.execute("select count(*) from NEW_TABLE") return self._cur.fetchone()[0] def __iter__(self) -> Iterator[EntryType]: if self._cur is None: raise ValueError("cur is None") self._cur.execute( "select _id, contents from NEW_TABLE where _id is not null", ) # FIXME: iteration over self._cur stops after one entry # and self._cur.fetchone() returns None # for row in self._cur: for row in self._cur.fetchall(): word = html.unescape(row[0]) definition = row[1].decode("utf-8", errors="ignore") # print(f"{word!r}, {definition!r}") yield self._glos.newEntry(word, definition, defiFormat="h") def close(self) -> None: if self._cur: self._cur.close() if self._con: self._con.close() self._clear() pyglossary-5.0.9/pyglossary/plugins/makindo_medical/tools.toml000066400000000000000000000002431476751035500250070ustar00rootroot00000000000000["Makindo Medical Reference"] web = "https://play.google.com/store/apps/details?id=com.pocketmednotes2014.secondapp" platforms = [ "Android",] license = "Unknown" pyglossary-5.0.9/pyglossary/plugins/octopus_mdict_new/000077500000000000000000000000001476751035500234005ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/octopus_mdict_new/__init__.py000066400000000000000000000021321476751035500255070ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, Option, ) from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "octopus_mdict" name = "OctopusMdict" description = "Octopus MDict (.mdx)" extensions = (".mdx",) extensionCreate = "" singleFile = False kind = "binary" wiki = "" website = ( "https://www.mdict.cn/wp/?page_id=5325&lang=en", "Download | MDict.cn", ) optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "substyle": BoolOption( comment="Enable substyle", ), "same_dir_data_files": BoolOption( comment="Read data files from same directory", ), "audio": BoolOption( comment="Enable audio objects", ), } extraDocs = [ ( "`python-lzo` is required for **some** MDX glossaries.", """First try converting your MDX file, if failed (`AssertionError` probably), then try to install [LZO library and Python binding](./doc/lzo.md).""", ), ] pyglossary-5.0.9/pyglossary/plugins/octopus_mdict_new/reader.py000066400000000000000000000142151476751035500252170ustar00rootroot00000000000000# -*- coding: utf-8 -*- # Read Octopus MDict dictionary format, mdx(dictionary)/mdd(data) # # Copyright © 2013 Xiaoqiang Wang # Copyright © 2013-2021 Saeed Rasooli # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import gc import os import re import sys from os.path import dirname, extsep, isfile, join, splitext from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.plugin_lib.readmdict import MDD, MDX from pyglossary.core import log from pyglossary.text_utils import toStr __all__ = ["Reader"] class Reader: useByteProgress = False _encoding: str = "" _substyle: bool = True _same_dir_data_files: bool = False _audio: bool = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self.clear() self._re_internal_link = re.compile("href=([\"'])(entry://|[dx]:)") self._re_audio_link = re.compile( ']*? )?href="sound://([^<>"]+)"( .*?)?>(.*?)', ) def clear(self) -> None: self._filename = "" self._mdx: MDX | None = None self._mdd: list[MDD] = [] self._wordCount = 0 self._dataEntryCount = 0 # dict of mainWord -> newline-separated alternatives self._linksDict: dict[str, str] = {} def open(self, filename: str) -> None: from pyglossary.plugin_lib.readmdict import MDD, MDX self._filename = filename self._mdx = MDX(filename, self._encoding, self._substyle) """ multiple MDD files are supported with this naming schema: FILE.mdx FILE.mdd FILE.1.mdd FILE.2.mdd FILE.3.mdd """ filenameNoExt, _ext = splitext(self._filename) mddBase = filenameNoExt + extsep for fname in (f"{mddBase}mdd", f"{mddBase}1.mdd"): if isfile(fname): self._mdd.append(MDD(fname)) mddN = 2 while isfile(f"{mddBase}{mddN}.mdd"): self._mdd.append(MDD(f"{mddBase}{mddN}.mdd")) mddN += 1 dataEntryCount = 0 for mdd in self._mdd: dataEntryCount += len(mdd) self._dataEntryCount = dataEntryCount log.info(f"Found {len(self._mdd)} mdd files with {dataEntryCount} entries") # from pprint import pformat # log.debug("mdx.header = " + pformat(self._mdx.header)) # for key, value in self._mdx.header.items(): # key = key.lower() # self._glos.setInfo(key, value) try: title = toStr(self._mdx.header[b"Title"]) except KeyError: pass else: title = title.strip() if title == "Title (No HTML code allowed)": # TODO: how to avoid this? title = "" if title: self._glos.setInfo("name", title) desc = toStr(self._mdx.header.get(b"Description", "")) if desc: self._glos.setInfo("description", desc) self.loadLinks() def loadLinks(self) -> None: from pyglossary.plugin_lib.readmdict import MDX mdx = self._mdx if mdx is None: raise ValueError("mdx is None") log.info("extracting links...") linksDict: dict[str, str] = {} word = "" wordCount = 0 for b_word, b_defi in mdx.items(): word = b_word.decode("utf-8") defi = b_defi.decode("utf-8").strip() if defi.startswith("@@@LINK="): if not word: log.warning(f"unexpected defi: {defi}") continue mainWord = defi[8:] if mainWord in linksDict: linksDict[mainWord] += "\n" + word else: linksDict[mainWord] = word continue wordCount += 1 log.info( f"extracting links done, sizeof(linksDict)={sys.getsizeof(linksDict)}", ) log.info(f"{wordCount = }") self._linksDict = linksDict self._wordCount = wordCount self._mdx = MDX(self._filename, self._encoding, self._substyle) def fixDefi(self, defi: str) -> str: defi = self._re_internal_link.sub(r"href=\1bword://", defi) defi = defi.replace(' src="file://', ' src=".') if self._audio: # \5 is the possible elements between and # but anything between and is completely # ignored by Aaard2 Web and browser # and there is no point adding it after # which makes it shown after audio controls # GoldenDict acts completely different, so must use # audio_goldendict=True option in StarDict writer instead. defi = self._re_audio_link.sub( r'', defi, ) return defi def __iter__(self) -> Iterator[EntryType]: if self._mdx is None: log.error("trying to iterate on a closed MDX file") return glos = self._glos linksDict = self._linksDict for b_word, b_defi in self._mdx.items(): word = b_word.decode("utf-8") defi = b_defi.decode("utf-8").strip() if defi.startswith("@@@LINK="): continue defi = self.fixDefi(defi) words = word altsStr = linksDict.get(word, "") if altsStr: words = [word] + altsStr.split("\n") yield glos.newEntry(words, defi) self._mdx = None del linksDict self._linksDict = {} gc.collect() if self._same_dir_data_files: dirPath = dirname(self._filename) for fname in os.listdir(dirPath): ext = splitext(fname)[1].lower() if ext in {".mdx", ".mdd"}: continue fpath = join(dirPath, fname) if not isfile(fpath): continue with open(fpath, mode="rb") as _file: b_data = _file.read() yield glos.newDataEntry(fname, b_data) for mdd in self._mdd: try: for b_fname, b_data in mdd.items(): fname = toStr(b_fname) fname = fname.replace("\\", os.sep).lstrip(os.sep) yield glos.newDataEntry(fname, b_data) except Exception: # noqa: PERF203 log.exception(f"Error reading {mdd.filename}") self._mdd = [] def __len__(self) -> int: return self._wordCount + self._dataEntryCount def close(self) -> None: self.clear() pyglossary-5.0.9/pyglossary/plugins/octopus_mdict_new/tools.toml000066400000000000000000000001611476751035500254330ustar00rootroot00000000000000[MDict] web = "https://www.mdict.cn/" platforms = [ "Android", "iOS", "Windows", "Mac",] license = "Proprietary" pyglossary-5.0.9/pyglossary/plugins/quickdic6/000077500000000000000000000000001476751035500215355ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/quickdic6/__init__.py000066400000000000000000000016541476751035500236540ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.flags import NEVER from pyglossary.option import ( Option, StrOption, ) from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "quickdic6" name = "QuickDic6" description = "QuickDic version 6 (.quickdic)" extensions = (".quickdic", ".quickdic.v006.zip") extensionCreate = ".quickdic" singleFile = True sortOnWrite = NEVER kind = "binary" wiki = "" website = ( "https://github.com/rdoeffinger/Dictionary", "github.com/rdoeffinger/Dictionary", ) # https://github.com/rdoeffinger/Dictionary/blob/master/dictionary-format-v6.txt optionsProp: dict[str, Option] = { "normalizer_rules": StrOption( comment="ICU normalizer rules to use for index sorting", ), } pyglossary-5.0.9/pyglossary/plugins/quickdic6/commons.py000066400000000000000000000035771476751035500235760ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations __all__ = [ "HASH_SET_CAPACITY_FACTOR", "HASH_SET_INIT", "HASH_SET_INIT2", "LINKED_HASH_SET_INIT", "EntryIndexTuple", "IndexEntryType", ] HASH_SET_INIT = ( b"\xac\xed" # magic b"\x00\x05" # version b"\x73" # object b"\x72" # class # Java String "java.util.HashSet": b"\x00\x11\x6a\x61\x76\x61\x2e\x75\x74\x69" b"\x6c\x2e\x48\x61\x73\x68\x53\x65\x74" ) """First part of Java serialization of java.util.HashSet""" HASH_SET_INIT2 = ( # serialization ID: b"\xba\x44\x85\x95\x96\xb8\xb7\x34" b"\x03" # flags: serialized, custom serialization function b"\x00\x00" # fields count b"\x78" # blockdata end b"\x70" # null (superclass) b"\x77\x0c" # blockdata short, 0xc bytes ) """Second part of Java serialization of java.util.HashSet""" LINKED_HASH_SET_INIT = ( b"\xac\xed" # magic b"\x00\x05" # version b"\x73" # object b"\x72" # class # Java String "java.util.LinkedHashSet": b"\x00\x17\x6a\x61\x76\x61\x2e\x75\x74\x69" b"\x6c\x2e\x4c\x69\x6e\x6b\x65\x64" b"\x48\x61\x73\x68\x53\x65\x74" # serialization ID: b"\xd8\x6c\xd7\x5a\x95\xdd\x2a\x1e" b"\x02" # flags b"\x00\x00" # fields count b"\x78" # blockdata end b"\x72" # superclass (java.util.HashSet) b"\x00\x11\x6a\x61\x76\x61\x2e\x75\x74\x69" b"\x6c\x2e\x48\x61\x73\x68\x53\x65\x74" ) + HASH_SET_INIT2 """Header of Java serialization of java.util.LinkedHashSet""" HASH_SET_CAPACITY_FACTOR = 0.75 """Capacity factor used to determine the hash set's capacity from its length""" IndexEntryType = tuple[ str, # token int, # start_index int, # count str, # token_norm list[int], # html_indices ] EntryIndexTuple = tuple[ str, # short_name str, # long_name str, # iso str, # normalizer_rules bool, # swap_flag int, # main_token_count list[IndexEntryType], # index_entries list[str], # stop_list, list[tuple[int, int]], # rows ] pyglossary-5.0.9/pyglossary/plugins/quickdic6/comparator.py000066400000000000000000000026731476751035500242660ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from typing import Literal __all__ = ["Comparator"] class Comparator: def __init__(self, locale_str: str, normalizer_rules: str, version: int) -> None: import icu self.version = version self.locale = icu.Locale(locale_str) self._comparator = ( icu.RuleBasedCollator("&z<ȝ") if self.locale.getLanguage() == "en" else icu.Collator.createInstance(self.locale) ) self._comparator.setStrength(icu.Collator.IDENTICAL) self.normalizer_rules = normalizer_rules self.normalize = icu.Transliterator.createFromRules( "", self.normalizer_rules, icu.UTransDirection.FORWARD, ).transliterate def compare( self, tup1: tuple[str, str], tup2: tuple[str, str], ) -> Literal[0, 1, -1]: # assert isinstance(tup1, tuple) # assert isinstance(tup2, tuple) s1, n1 = tup1 s2, n2 = tup2 cn = self._compare_without_dash(n1, n2) if cn != 0: return cn cn = self._comparator.compare(n1, n2) if cn != 0 or self.version < 7: return cn return self._comparator.compare(s1, s2) def _compare_without_dash( self, a: str, b: str, ) -> Literal[0, 1, -1]: if self.version < 7: return 0 s1 = self._without_dash(a) s2 = self._without_dash(b) return self._comparator.compare(s1, s2) @staticmethod def _without_dash(a: str) -> str: return a.replace("-", "").replace("þ", "th").replace("Þ", "Th") pyglossary-5.0.9/pyglossary/plugins/quickdic6/quickdic.py000066400000000000000000000067731476751035500237200ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import datetime as dt import functools from typing import TYPE_CHECKING from pyglossary.core import log from .comparator import Comparator if TYPE_CHECKING: from .commons import EntryIndexTuple, IndexEntryType __all__ = ["QuickDic"] class QuickDic: def __init__( # noqa: PLR0913 self, name: str, sources: list[tuple[str, int]], pairs: list[tuple[int, list[tuple[str, str]]]], texts: list[tuple[int, str]], htmls: list[tuple[int, str, str]], version: int = 6, indices: list[EntryIndexTuple] | None = None, created: dt.datetime | None = None, ) -> None: self.name = name self.sources = sources self.pairs = pairs self.texts = texts self.htmls = htmls self.version = version self.indices = [] if indices is None else indices self.created = dt.datetime.now() if created is None else created def add_index( # noqa: PLR0913 self, short_name: str, long_name: str, iso: str, normalizer_rules: str, synonyms: dict | None = None, ) -> None: swap_flag = False comparator = Comparator(iso, normalizer_rules, self.version) synonyms = synonyms or {} n_synonyms = sum(len(v) for v in synonyms.values()) log.info(f"Adding an index for {iso} with {n_synonyms} synonyms ...") # since we don't tokenize, the stop list is always empty stop_list: list[str] = [] if self.indices is None: self.indices = [] log.info("Initialize token list ...") tokens1 = [ (pair[1 if swap_flag else 0], 0, idx) for idx, (_, pairs) in enumerate(self.pairs) for pair in pairs ] if not swap_flag: tokens1.extend( [(title, 4, idx) for idx, (_, title, _) in enumerate(self.htmls)], ) tokens1 = [(t.strip(), ttype, tidx) for t, ttype, tidx in tokens1] log.info("Normalize tokens ...") tokens = [ (t, comparator.normalize(t), ttype, tidx) for t, ttype, tidx in tokens1 if t ] if synonyms: log.info( f"Insert synonyms into token list ({len(tokens)} entries) ...", ) tokens.extend( [ (s, comparator.normalize(s)) + t[2:] for t in tokens if t[0] in synonyms for s in synonyms[t[0]] if s ], ) log.info(f"Sort tokens with synonyms ({len(tokens)} entries) ...") key_fun = functools.cmp_to_key(comparator.compare) tokens.sort(key=lambda t: key_fun((t[0], t[1]))) log.info("Build mid-layer index ...") rows: list[tuple[int, int]] = [] index_entries: list[IndexEntryType] = [] for token, token_norm, ttype, tidx in tokens: prev_token = index_entries[-1][0] if index_entries else "" html_indices: list[int] if prev_token == token: ( token, # noqa: PLW2901 index_start, count, token_norm, # noqa: PLW2901 html_indices, ) = index_entries.pop() else: i_entry = len(index_entries) index_start = len(rows) count = 0 token_norm = "" if token == token_norm else token_norm # noqa: PLW2901 html_indices = [] rows.append((1, i_entry)) if ttype == 4: if tidx not in html_indices: html_indices.append(tidx) elif (ttype, tidx) not in rows[index_start + 1 :]: rows.append((ttype, tidx)) count += 1 index_entries.append( (token, index_start, count, token_norm, html_indices), ) # the exact meaning of this parameter is unknown, # and it seems to be ignored by readers main_token_count = len(index_entries) self.indices.append( ( short_name, long_name, iso, normalizer_rules, swap_flag, main_token_count, index_entries, stop_list, rows, ), ) pyglossary-5.0.9/pyglossary/plugins/quickdic6/read_funcs.py000066400000000000000000000101211476751035500242130ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import gzip import io import struct from typing import IO, TYPE_CHECKING, TypeVar if TYPE_CHECKING: from collections.abc import Callable from .commons import EntryIndexTuple, IndexEntryType from pyglossary.plugin_lib import mutf8 from .commons import ( HASH_SET_CAPACITY_FACTOR, HASH_SET_INIT, HASH_SET_INIT2, LINKED_HASH_SET_INIT, ) __all__ = [ "read_entry_html", "read_entry_index", "read_entry_pairs", "read_entry_source", "read_entry_text", "read_int", "read_list", "read_long", "read_string", ] def read_byte(fp: IO[bytes]) -> int: return struct.unpack(">b", fp.read(1))[0] def read_bool(fp: IO[bytes]) -> bool: return bool(read_byte(fp)) def read_short(fp: IO[bytes]) -> int: return struct.unpack(">h", fp.read(2))[0] def read_int(fp: IO[bytes]) -> int: return struct.unpack(">i", fp.read(4))[0] def read_long(fp: IO[bytes]) -> int: return struct.unpack(">q", fp.read(8))[0] def read_float(fp: IO[bytes]) -> float: return struct.unpack(">f", fp.read(4))[0] def read_string(fp: IO[bytes]) -> str: length = read_short(fp) return mutf8.decode_modified_utf8(fp.read(length)) def read_hashset(fp: IO[bytes]) -> list[str]: hash_set_init = fp.read(len(HASH_SET_INIT)) if hash_set_init == HASH_SET_INIT: hash_set_init2 = fp.read(len(HASH_SET_INIT2)) assert hash_set_init2 == HASH_SET_INIT2 else: n_extra = len(LINKED_HASH_SET_INIT) - len(HASH_SET_INIT) hash_set_init += fp.read(n_extra) assert hash_set_init == LINKED_HASH_SET_INIT read_int(fp) # capacity capacity_factor = read_float(fp) assert capacity_factor == HASH_SET_CAPACITY_FACTOR num_entries = read_int(fp) data: list[str] = [] while len(data) < num_entries: assert read_byte(fp) == 0x74 data.append(read_string(fp)) assert read_byte(fp) == 0x78 return data T = TypeVar("T") def read_list( fp: IO[bytes], fun: Callable[[IO[bytes]], T], ) -> list[T]: size = read_int(fp) toc = struct.unpack(f">{size + 1}q", fp.read(8 * (size + 1))) entries: list[T] = [] for offset in toc[:-1]: fp.seek(offset) entries.append(fun(fp)) fp.seek(toc[-1]) return entries def read_entry_int(fp: IO[bytes]) -> int: return read_int(fp) def read_entry_source(fp: IO[bytes]) -> tuple[str, int]: name = read_string(fp) count = read_int(fp) return name, count def read_entry_pairs(fp: IO[bytes]) -> tuple[int, list[tuple[str, str]]]: src_idx = read_short(fp) count = read_int(fp) pairs = [(read_string(fp), read_string(fp)) for i in range(count)] return src_idx, pairs def read_entry_text(fp: IO[bytes]) -> tuple[int, str]: src_idx = read_short(fp) txt = read_string(fp) return src_idx, txt def read_entry_html(fp: IO[bytes]) -> tuple[int, str, str]: src_idx = read_short(fp) title = read_string(fp) read_int(fp) # len_raw len_compr = read_int(fp) b_compr = fp.read(len_compr) with gzip.open(io.BytesIO(b_compr), "r") as zf: # this is not modified UTF-8 (read_string), but actual UTF-8 html = zf.read().decode() return src_idx, title, html def read_entry_index(fp: IO[bytes]) -> EntryIndexTuple: short_name = read_string(fp) long_name = read_string(fp) iso = read_string(fp) normalizer_rules = read_string(fp) swap_flag = read_bool(fp) main_token_count = read_int(fp) index_entries = read_list(fp, read_entry_indexentry) stop_list_size = read_int(fp) stop_list_offset = fp.tell() stop_list = read_hashset(fp) assert fp.tell() == stop_list_offset + stop_list_size num_rows = read_int(fp) row_size = read_int(fp) row_data = fp.read(num_rows * row_size) rows = [ # , struct.unpack(">bi", row_data[j : j + row_size]) for j in range(0, len(row_data), row_size) ] return ( short_name, long_name, iso, normalizer_rules, swap_flag, main_token_count, index_entries, stop_list, rows, ) def read_entry_indexentry(fp: IO[bytes]) -> IndexEntryType: token = read_string(fp) start_index = read_int(fp) count = read_int(fp) has_normalized = read_bool(fp) token_norm = read_string(fp) if has_normalized else "" html_indices = read_list(fp, read_entry_int) return token, start_index, count, token_norm, html_indices pyglossary-5.0.9/pyglossary/plugins/quickdic6/reader.py000066400000000000000000000122401476751035500233500ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import datetime as dt import pathlib import zipfile from typing import IO, TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType from .commons import EntryIndexTuple from pyglossary.html_utils import unescape_unicode from .quickdic import QuickDic from .read_funcs import ( read_entry_html, read_entry_index, read_entry_pairs, read_entry_source, read_entry_text, read_int, read_list, read_long, read_string, ) __all__ = ["Reader"] class Reader: useByteProgress = False depends = { "icu": "PyICU", } def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._dic: QuickDic | None = None def open(self, filename: str) -> None: self._filename = filename self._dic = self.quickdic_from_path(self._filename) self._glos.setDefaultDefiFormat("h") self._extract_synonyms_from_indices() # TODO: read glossary name and langs? @classmethod def quickdic_from_path(cls: type[Reader], path_str: str) -> QuickDic: path = pathlib.Path(path_str) if path.suffix != ".zip": with open(path, "rb") as fp: return cls.quickdic_from_fp(fp) with zipfile.ZipFile(path, mode="r") as zf: fname = next(n for n in zf.namelist() if n.endswith(".quickdic")) with zf.open(fname) as fp: return cls.quickdic_from_fp(fp) @staticmethod def quickdic_from_fp(fp: IO[bytes]) -> QuickDic: version = read_int(fp) created = dt.datetime.fromtimestamp(float(read_long(fp)) / 1000.0) # noqa: DTZ006 name = read_string(fp) sources = read_list(fp, read_entry_source) pairs = read_list(fp, read_entry_pairs) texts = read_list(fp, read_entry_text) htmls = read_list(fp, read_entry_html) indices = read_list(fp, read_entry_index) assert read_string(fp) == "END OF DICTIONARY" return QuickDic( name=name, sources=sources, pairs=pairs, texts=texts, htmls=htmls, version=version, indices=indices, created=created, ) def _extract_synonyms_from_indices(self) -> None: self._text_tokens: dict[int, str] = {} self._synonyms: dict[tuple[int, int], set[str]] = {} assert self._dic is not None for index in self._dic.indices: _, _, _, _, swap_flag, _, index_entries, _, _ = index # Note that we ignore swapped indices because pyglossary assumes # uni-directional dictionaries. # It might make sense to add an option in the future to read only the # swapped indices (create a dictionary with reversed direction). if swap_flag: continue for i_entry, index_entry in enumerate(index_entries): e_rows = self._extract_rows_from_indexentry(index, i_entry) token, _, _, token_norm, _ = index_entry for entry_id in e_rows: if entry_id not in self._synonyms: self._synonyms[entry_id] = set() self._synonyms[entry_id].add(token) if token_norm: self._synonyms[entry_id].add(token_norm) def _extract_rows_from_indexentry( self, index: EntryIndexTuple, i_entry: int, recurse: list[int] | None = None, ) -> list[tuple[int, int]]: recurse = recurse or [] recurse.append(i_entry) _, _, _, _, _, _, index_entries, _, rows = index token, start_index, count, _, html_indices = index_entries[i_entry] block_rows = rows[start_index : start_index + count + 1] assert block_rows[0][0] in {1, 3} assert block_rows[0][1] == i_entry e_rows: list[tuple[int, int]] = [] for entry_type, entry_idx in block_rows[1:]: if entry_type in {1, 3}: # avoid an endless recursion if entry_idx not in recurse: e_rows.extend( self._extract_rows_from_indexentry( index, entry_idx, recurse=recurse, ), ) else: e_rows.append((entry_type, entry_idx)) if entry_type == 2 and entry_idx not in self._text_tokens: self._text_tokens[entry_idx] = token for idx in html_indices: if (4, idx) not in e_rows: e_rows.append((4, idx)) return e_rows def close(self) -> None: self.clear() def clear(self) -> None: self._filename = "" self._dic = None def __len__(self) -> int: if self._dic is None: return 0 return sum(len(p) for _, p in self._dic.pairs) + len(self._dic.htmls) def __iter__(self) -> Iterator[EntryType]: if self._dic is None: raise RuntimeError("dictionary not open") for idx, (_, pairs) in enumerate(self._dic.pairs): syns = self._synonyms.get((0, idx), set()) for word, defi in pairs: l_word = [word] + sorted(syns.difference({word})) yield self._glos.newEntry(l_word, defi, defiFormat="m") for idx, (_, defi) in enumerate(self._dic.texts): if idx not in self._text_tokens: # Ignore this text entry since it is not mentioned in the index at all # so that we don't even have a token or title for it. continue word = self._text_tokens[idx] syns = self._synonyms.get((2, idx), set()) l_word = [word] + sorted(syns.difference({word})) yield self._glos.newEntry(l_word, defi, defiFormat="m") for idx, (_, word, defi) in enumerate(self._dic.htmls): syns = self._synonyms.get((4, idx), set()) l_word = [word] + sorted(syns.difference({word})) defi_new = unescape_unicode(defi) yield self._glos.newEntry(l_word, defi_new, defiFormat="h") pyglossary-5.0.9/pyglossary/plugins/quickdic6/tools.toml000066400000000000000000000006371476751035500236000ustar00rootroot00000000000000[Dictionary] web = "https://play.google.com/store/apps/details?id=de.reimardoeffinger.quickdic" source = "https://github.com/rdoeffinger/Dictionary" platforms = [ "Android",] license = "Apache License 2.0" plang = "Java" [DictionaryPC] web = "https://github.com/rdoeffinger/DictionaryPC" source = "https://github.com/rdoeffinger/DictionaryPC" platforms = [ "Windows",] license = "Apache License 2.0" plang = "Java"pyglossary-5.0.9/pyglossary/plugins/quickdic6/write_funcs.py000066400000000000000000000116401476751035500244410ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import gzip import io import math import struct from typing import IO, TYPE_CHECKING, TypeVar if TYPE_CHECKING: from collections.abc import Callable from typing import Any from .commons import EntryIndexTuple, IndexEntryType from pyglossary.plugin_lib import mutf8 from .commons import ( HASH_SET_CAPACITY_FACTOR, HASH_SET_INIT, HASH_SET_INIT2, LINKED_HASH_SET_INIT, ) __all__ = [ "write_entry_html", "write_entry_index", "write_entry_pairs", "write_entry_source", "write_entry_text", "write_int", "write_list", "write_long", "write_string", ] def write_int(fp: IO[bytes], val: int) -> int: return fp.write(struct.pack(">i", val)) def write_byte(fp: IO[bytes], val: int) -> int: return fp.write(struct.pack(">b", val)) def write_bool(fp: IO[bytes], val: int) -> int: return write_byte(fp, val) def write_short(fp: IO[bytes], val: int) -> int: return fp.write(struct.pack(">h", val)) def write_long(fp: IO[bytes], val: int) -> int: return fp.write(struct.pack(">q", val)) def write_float(fp: IO[bytes], val: float) -> int: return fp.write(struct.pack(">f", val)) def write_string(fp: IO[bytes], val: str) -> int: b_string = mutf8.encode_modified_utf8(val) return write_short(fp, len(b_string)) + fp.write(b_string) def write_hashset( fp: IO[bytes], data: list[str], linked_hash_set: bool = False, ) -> int: write_start_offset = fp.tell() if linked_hash_set: fp.write(LINKED_HASH_SET_INIT) else: fp.write(HASH_SET_INIT + HASH_SET_INIT2) num_entries = len(data) capacity = ( 2 ** math.ceil(math.log2(num_entries / HASH_SET_CAPACITY_FACTOR)) if num_entries > 0 else 128 ) write_int(fp, capacity) write_float(fp, HASH_SET_CAPACITY_FACTOR) write_int(fp, num_entries) for string in data: write_byte(fp, 0x74) write_string(fp, string) write_byte(fp, 0x78) return fp.tell() - write_start_offset T = TypeVar("T") def write_list( fp: IO[bytes], fun: Callable[[IO[bytes], T], Any], entries: list[T], ) -> int: write_start_offset = fp.tell() size = len(entries) write_int(fp, size) toc_offset = fp.tell() fp.seek(toc_offset + 8 * (size + 1)) toc = [fp.tell()] for e in entries: fun(fp, e) toc.append(fp.tell()) fp.seek(toc_offset) fp.write(struct.pack(f">{size + 1}q", *toc)) fp.seek(toc[-1]) return fp.tell() - write_start_offset def write_entry_int(fp: IO[bytes], entry: int) -> int: return write_int(fp, entry) def write_entry_source(fp: IO[bytes], entry: tuple[str, int]) -> int: name, count = entry return write_string(fp, name) + write_int(fp, count) def write_entry_pairs( fp: IO[bytes], entry: tuple[int, list[tuple[str, str]]], ) -> int: write_start_offset = fp.tell() src_idx, pairs = entry write_short(fp, src_idx) write_int(fp, len(pairs)) for p in pairs: write_string(fp, p[0]) write_string(fp, p[1]) return fp.tell() - write_start_offset def write_entry_text(fp: IO[bytes], entry: tuple[int, str]) -> int: src_idx, txt = entry return write_short(fp, src_idx) + write_string(fp, txt) def write_entry_html(fp: IO[bytes], entry: tuple[int, str, str]) -> int: write_start_offset = fp.tell() src_idx, title, html = entry b_html = "".join(c if ord(c) < 128 else f"&#{ord(c)};" for c in html).encode() ib_compr = io.BytesIO() with gzip.GzipFile(fileobj=ib_compr, mode="wb", mtime=0) as zf: # note that the compressed bytes might differ from the original Java # implementation that uses GZIPOutputStream zf.write(b_html) ib_compr.seek(0) b_compr = ib_compr.read() write_short(fp, src_idx) write_string(fp, title) write_int(fp, len(b_html)) write_int(fp, len(b_compr)) fp.write(b_compr) return fp.tell() - write_start_offset def write_entry_index( fp: IO[bytes], entry: EntryIndexTuple, ) -> int: write_start_offset = fp.tell() ( short_name, long_name, iso, normalizer_rules, swap_flag, main_token_count, index_entries, stop_list, rows, ) = entry write_string(fp, short_name) write_string(fp, long_name) write_string(fp, iso) write_string(fp, normalizer_rules) write_bool(fp, swap_flag) write_int(fp, main_token_count) write_list(fp, write_entry_indexentry, index_entries) stop_list_size_offset = fp.tell() stop_list_offset = stop_list_size_offset + write_int(fp, 0) stop_list_size = write_hashset(fp, stop_list, linked_hash_set=True) fp.seek(stop_list_size_offset) write_int(fp, stop_list_size) fp.seek(stop_list_offset + stop_list_size) write_int(fp, len(rows)) write_int(fp, 5) row_data = b"".join([struct.pack(">bi", t, i) for t, i in rows]) fp.write(row_data) return fp.tell() - write_start_offset def write_entry_indexentry( fp: IO[bytes], entry: IndexEntryType, ) -> None: token, start_index, count, token_norm, html_indices = entry has_normalized = bool(token_norm) write_string(fp, token) write_int(fp, start_index) write_int(fp, count) write_bool(fp, has_normalized) if has_normalized: write_string(fp, token_norm) write_list(fp, write_entry_int, html_indices) pyglossary-5.0.9/pyglossary/plugins/quickdic6/writer.py000066400000000000000000000071701476751035500234300ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import datetime as dt import os from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType from pyglossary.core import log from .quickdic import QuickDic from .write_funcs import ( write_entry_html, write_entry_index, write_entry_pairs, write_entry_source, write_entry_text, write_int, write_list, write_long, write_string, ) __all__ = ["Writer"] _defaultNormalizerRulesDE = ":: Lower; 'ae' > 'ä'; 'oe' > 'ö'; 'ue' > 'ü'; 'ß' > 'ss'; " _defaultNormalizerRules = ( ":: Any-Latin; ' ' > ; :: Lower; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC ;" ) class Writer: _normalizer_rules = "" def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" self._dic = None def finish(self) -> None: self._filename = "" self._dic = None def open(self, filename: str) -> None: self._filename = filename @staticmethod def write_quickdic(dic: QuickDic, path: str) -> None: with open(path, "wb") as fp: log.info(f"Writing to {path} ...") write_int(fp, dic.version) write_long(fp, int(dic.created.timestamp() * 1000)) write_string(fp, dic.name) write_list(fp, write_entry_source, dic.sources) write_list(fp, write_entry_pairs, dic.pairs) write_list(fp, write_entry_text, dic.texts) write_list(fp, write_entry_html, dic.htmls) write_list(fp, write_entry_index, dic.indices) write_string(fp, "END OF DICTIONARY") def write(self) -> Generator[None, EntryType, None]: synonyms: dict[str, list[str]] = {} htmls: list[tuple[int, str, str]] = [] log.info("Converting individual entries ...") while True: entry = yield if entry is None: break if entry.isData(): log.warn(f"Ignoring binary data entry {entry.l_word[0]}") continue entry.detectDefiFormat() if entry.defiFormat not in {"h", "m"}: log.error(f"Unsupported defiFormat={entry.defiFormat}, assuming 'h'") words = entry.l_word if words[0] in synonyms: synonyms[words[0]].extend(words[1:]) else: synonyms[words[0]] = words[1:] # Note that we currently write out all entries as "html" type entries. # In the future, it might make sense to add an option that somehow # specifies the entry type to use. htmls.append((0, words[0], entry.defi)) glos = self._glos log.info("Collecting meta data ...") name = glos.getInfo("bookname") if not name: name = glos.getInfo("description") sourceLangCode, targetLangCode = "EN", "EN" if glos.sourceLang: sourceLangCode = glos.sourceLang.code if glos.targetLang: targetLangCode = glos.targetLang.code langs = f"{sourceLangCode}->{targetLangCode}" if langs not in name.lower(): name = f"{self._glos.getInfo('name')} ({langs})" log.info(f"QuickDic: {langs = }, {name = }") sources = [("", len(htmls))] created = None createdStr = os.getenv("QUICKDIC_CREATION_TIME") if createdStr: created = dt.datetime.fromtimestamp(int(createdStr), tz=dt.timezone.utc) log.info(f"QuickDic: using created={created.isoformat()!r}") self._dic = QuickDic( name=name, sources=sources, pairs=[], texts=[], htmls=htmls, created=created, # version: int = 6, # indices: list[EntryIndexTuple] | None = None, ) short_name = long_name = iso = sourceLangCode normalizer_rules = self._normalizer_rules or ( _defaultNormalizerRulesDE if iso == "DE" else _defaultNormalizerRules ) self._dic.add_index( short_name, long_name, iso, normalizer_rules, synonyms=synonyms, ) self.write_quickdic(self._dic, self._filename) pyglossary-5.0.9/pyglossary/plugins/sql/000077500000000000000000000000001476751035500204525ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/sql/__init__.py000066400000000000000000000015231476751035500225640ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, ListOption, NewlineOption, Option, ) from .writer import Writer __all__ = [ "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "sql" name = "Sql" description = "SQL (.sql)" extensions = (".sql",) extensionCreate = ".sql" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/SQL" website = None optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "info_keys": ListOption(comment="List of dbinfo table columns"), "add_extra_info": BoolOption(comment="Create dbinfo_extra table"), "newline": NewlineOption(), "transaction": BoolOption(comment="Use TRANSACTION"), } pyglossary-5.0.9/pyglossary/plugins/sql/tools.toml000066400000000000000000000000001476751035500224750ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/sql/writer.py000066400000000000000000000065541476751035500223520ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: import io from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] class Writer: _encoding: str = "utf-8" _info_keys: list | None = None _add_extra_info: bool = True _newline: str = "
        " _transaction: bool = False def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" self._file: io.IOBase | None = None def finish(self) -> None: self._filename = "" if self._file: self._file.close() self._file = None def open(self, filename: str) -> None: self._filename = filename self._file = open(filename, "w", encoding=self._encoding) self._writeInfo() def _writeInfo(self) -> None: fileObj = self._file if fileObj is None: raise ValueError("fileObj is None") newline = self._newline info_keys = self._getInfoKeys() infoDefLine = "CREATE TABLE dbinfo (" infoValues: list[str] = [] glos = self._glos for key in info_keys: value = glos.getInfo(key) value = ( value.replace("'", "''") .replace("\x00", "") .replace("\r", "") .replace("\n", newline) ) infoValues.append(f"'{value}'") infoDefLine += f"{key} char({len(value)}), " infoDefLine = infoDefLine[:-2] + ");" fileObj.write(infoDefLine + "\n") if self._add_extra_info: fileObj.write( "CREATE TABLE dbinfo_extra (" "'id' INTEGER PRIMARY KEY NOT NULL, " "'name' TEXT UNIQUE, 'value' TEXT);\n", ) fileObj.write( "CREATE TABLE word ('id' INTEGER PRIMARY KEY NOT NULL, " "'w' TEXT, 'm' TEXT);\n", ) fileObj.write( "CREATE TABLE alt ('id' INTEGER NOT NULL, 'w' TEXT);\n", ) if self._transaction: fileObj.write("BEGIN TRANSACTION;\n") fileObj.write(f"INSERT INTO dbinfo VALUES({','.join(infoValues)});\n") if self._add_extra_info: extraInfo = glos.getExtraInfos(info_keys) for index, (key, value) in enumerate(extraInfo.items()): key2 = key.replace("'", "''") value2 = value.replace("'", "''") fileObj.write( f"INSERT INTO dbinfo_extra VALUES({index + 1}, " f"'{key2}', '{value2}');\n", ) def _getInfoKeys(self) -> list[str]: info_keys = self._info_keys if info_keys: return info_keys return [ "dbname", "author", "version", "direction", "origLang", "destLang", "license", "category", "description", ] def write(self) -> Generator[None, EntryType, None]: newline = self._newline fileObj = self._file if fileObj is None: raise ValueError("fileObj is None") def fixStr(word: str) -> str: return word.replace("'", "''").replace("\r", "").replace("\n", newline) id_ = 1 while True: entry = yield if entry is None: break if entry.isData(): # FIXME continue words = entry.l_word word = fixStr(words[0]) defi = fixStr(entry.defi) fileObj.write( f"INSERT INTO word VALUES({id_}, '{word}', '{defi}');\n", ) for alt in words[1:]: fileObj.write( f"INSERT INTO alt VALUES({id_}, '{fixStr(alt)}');\n", ) id_ += 1 if self._transaction: fileObj.write("END TRANSACTION;\n") fileObj.write("CREATE INDEX ix_word_w ON word(w COLLATE NOCASE);\n") fileObj.write("CREATE INDEX ix_alt_id ON alt(id COLLATE NOCASE);\n") fileObj.write("CREATE INDEX ix_alt_w ON alt(w COLLATE NOCASE);\n") pyglossary-5.0.9/pyglossary/plugins/stardict/000077500000000000000000000000001476751035500214705ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/stardict/__init__.py000066400000000000000000000045351476751035500236100ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os from pyglossary.flags import ALWAYS, DEFAULT_YES from pyglossary.option import ( BoolOption, Option, StrOption, ) from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "stardict" name = "Stardict" description = "StarDict (.ifo)" extensions = (".ifo",) extensionCreate = "-stardict/" singleFile = False sortOnWrite = ALWAYS sortKeyName = "stardict" sortEncoding = "utf-8" kind = "directory" wiki = "https://en.wikipedia.org/wiki/StarDict" website = ( "http://huzheng.org/stardict/", "huzheng.org/stardict", ) extraDocs = [ ( "For sdcv and KOReader users", "Use [StarDict (Merge Syns)](./stardict_merge_syns.md) plugin" " (instead of this one) to create glossaries for using in" " [sdcv](https://dushistov.github.io/sdcv/)" " or [KOReader](http://koreader.rocks/)", ), ] # https://github.com/huzheng001/stardict-3/blob/master/dict/doc/StarDictFileFormat optionsProp: dict[str, Option] = { "large_file": BoolOption( comment="Use idxoffsetbits=64 bits, for large files only", ), "stardict_client": BoolOption( comment="Modify html entries for StarDict 3.0", ), "dictzip": BoolOption( comment="Compress .dict file to .dict.dz", ), "sametypesequence": StrOption( values=["", "h", "m", "x", None], comment="Definition format: h=html, m=plaintext, x=xdxf", ), "xdxf_to_html": BoolOption( comment="Convert XDXF entries to HTML", ), "xsl": BoolOption( comment="Use XSL transformation", ), "unicode_errors": StrOption( values=[ "strict", # raise a UnicodeDecodeError exception "ignore", # just leave the character out "replace", # use U+FFFD, REPLACEMENT CHARACTER "backslashreplace", # insert a \xNN escape sequence ], comment="What to do with Unicode decoding errors", ), "audio_goldendict": BoolOption( comment="Convert audio links for GoldenDict (desktop)", ), "audio_icon": BoolOption( comment="Add glossary's audio icon", ), "sqlite": BoolOption( comment="Use SQLite to limit memory usage." " Default depends on global SQLite mode.", allowNone=True, ), } if os.getenv("PYGLOSSARY_STARDICT_NO_FORCE_SORT") == "1": sortOnWrite = DEFAULT_YES pyglossary-5.0.9/pyglossary/plugins/stardict/memlist.py000066400000000000000000000011551476751035500235160ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import ( TYPE_CHECKING, Any, ) if TYPE_CHECKING: from collections.abc import Iterator __all__ = ["MemSdList"] class MemSdList: def __init__(self) -> None: self._l: list[Any] = [] def append(self, x: Any) -> None: self._l.append(x) def __len__(self) -> int: return len(self._l) def __iter__(self) -> Iterator[Any]: return iter(self._l) def sortKey(self, item: tuple[bytes, Any]) -> tuple[bytes, bytes]: # noqa: PLR6301 return ( item[0].lower(), item[0], ) def sort(self) -> None: self._l.sort(key=self.sortKey) pyglossary-5.0.9/pyglossary/plugins/stardict/reader.py000066400000000000000000000336341476751035500233150ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import gzip import os from os.path import ( dirname, isdir, isfile, join, realpath, splitext, ) from typing import ( TYPE_CHECKING, Protocol, ) if TYPE_CHECKING: import io from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.core import log from pyglossary.text_utils import ( uint32FromBytes, uint64FromBytes, ) __all__ = ["Reader"] def _verifySameTypeSequence(s: str) -> bool: if not s: return True # maybe should just check it's in ("h", "m", "x") if not s.isalpha(): return False return len(s) == 1 if TYPE_CHECKING: class XdxfTransformerType(Protocol): def transformByInnerString(self, text: str) -> str: ... class Reader: useByteProgress = False _xdxf_to_html: bool = True _xsl: bool = False _unicode_errors: str = "strict" def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self.clear() self._xdxfTr: XdxfTransformerType | None = None self._large_file = False """ indexData format indexData[i] - i-th record in index file, a tuple (previously a list) of length 3 indexData[i][0] - b_word (bytes) indexData[i][1] - definition block offset in dict file (int) indexData[i][2] - definition block size in dict file (int) REMOVED: indexData[i][3] - list of definitions indexData[i][3][j][0] - definition data indexData[i][3][j][1] - definition type - "h", "m" or "x" indexData[i][4] - list of synonyms (strings) synDict: a dict { entryIndex -> altList } """ def xdxf_setup(self) -> XdxfTransformerType: if self._xsl: from pyglossary.xdxf.xsl_transform import XslXdxfTransformer return XslXdxfTransformer(encoding="utf-8") from pyglossary.xdxf.transform import XdxfTransformer return XdxfTransformer(encoding="utf-8") def xdxf_transform(self, text: str) -> str: if self._xdxfTr is None: self._xdxfTr = self.xdxf_setup() return self._xdxfTr.transformByInnerString(text) def close(self) -> None: if self._dictFile: self._dictFile.close() self.clear() def clear(self) -> None: self._dictFile: io.IOBase | None = None self._filename = "" # base file path, no extension self._indexData: list[tuple[bytes, int, int]] = [] self._synDict: dict[int, list[str]] = {} self._sametypesequence = "" self._resDir = "" self._resFileNames: list[str] = [] self._wordCount: int | None = None def open(self, filename: str) -> None: if splitext(filename)[1].lower() == ".ifo": filename = splitext(filename)[0] elif isdir(filename): filename = join(filename, filename) self._filename = filename self._filename = realpath(self._filename) self.readIfoFile() sametypesequence = self._glos.getInfo("sametypesequence") if not _verifySameTypeSequence(sametypesequence): raise LookupError(f"Invalid {sametypesequence = }") self._indexData = self.readIdxFile() self._wordCount = len(self._indexData) self._synDict = self.readSynFile() self._sametypesequence = sametypesequence if isfile(self._filename + ".dict.dz"): self._dictFile = gzip.open(self._filename + ".dict.dz", mode="rb") else: self._dictFile = open(self._filename + ".dict", mode="rb") self._resDir = join(dirname(self._filename), "res") if isdir(self._resDir): self._resFileNames = os.listdir(self._resDir) else: self._resDir = "" self._resFileNames = [] # self.readResources() def __len__(self) -> int: if self._wordCount is None: raise RuntimeError( "StarDict: len(reader) called while reader is not open", ) return self._wordCount + len(self._resFileNames) def readIfoFile(self) -> None: """.ifo file is a text file in utf-8 encoding.""" with open( self._filename + ".ifo", mode="rb", ) as ifoFile: for line in ifoFile: line = line.strip() # noqa: PLW2901 if not line: continue if line == b"StarDict's dict ifo file": continue b_key, _, b_value = line.partition(b"=") if not (b_key and b_value): continue try: key = b_key.decode("utf-8") value = b_value.decode("utf-8", errors=self._unicode_errors) except UnicodeDecodeError: log.error(f"ifo line is not UTF-8: {line!r}") continue self._glos.setInfo(key, value) idxoffsetbits = self._glos.getInfo("idxoffsetbits") if idxoffsetbits: if idxoffsetbits == "32": self._large_file = False elif idxoffsetbits == "64": self._large_file = True else: raise ValueError(f"invalid {idxoffsetbits = }") def readIdxFile(self) -> list[tuple[bytes, int, int]]: if isfile(self._filename + ".idx.gz"): with gzip.open(self._filename + ".idx.gz") as g_file: idxBytes = g_file.read() else: with open(self._filename + ".idx", "rb") as _file: idxBytes = _file.read() indexData: list[tuple[bytes, int, int]] = [] pos = 0 if self._large_file: def getOffset() -> tuple[int, int]: return uint64FromBytes(idxBytes[pos : pos + 8]), pos + 8 else: def getOffset() -> tuple[int, int]: return uint32FromBytes(idxBytes[pos : pos + 4]), pos + 4 while pos < len(idxBytes): beg = pos pos = idxBytes.find(b"\x00", beg) if pos < 0: log.error("Index file is corrupted") break b_word = idxBytes[beg:pos] pos += 1 if pos + 8 > len(idxBytes): log.error("Index file is corrupted") break offset, pos = getOffset() size = uint32FromBytes(idxBytes[pos : pos + 4]) pos += 4 indexData.append((b_word, offset, size)) return indexData """ Type: 'r' https://github.com/huzheng001/stardict-3/blob/master/dict/doc/StarDictFileFormat#L431 Resource file list. The content can be: img:pic/example.jpg // Image file snd:apple.wav // Sound file vdo:film.avi // Video file att:file.bin // Attachment file More than one line is supported as a list of available files. StarDict will find the files in the Resource Storage. The image will be shown, the sound file will have a play button. You can "save as" the attachment file and so on. The file list must be a utf-8 string ending with '\0'. Use '\n' for separating new lines. Use '/' character as directory separator. """ def decodeDefiTypeR( # noqa: PLR6301 self, b_defiPart: bytes, ) -> tuple[str, str]: result = '
        ' for b_item in b_defiPart.split(b"\n"): item = b_item.decode("utf-8") type_, _, fname = item.partition(":") if type_ == "img": result += f'' elif type_ == "snd": result += f'' elif type_ == "vdo": result += f'' elif type_ == "att": result += f'{fname}' else: log.warning(f"Unsupported resource type {type_}") result += "
        " return "h", result def decodeRawDefiPart( self, b_defiPart: bytes, i_type: int, unicode_errors: str, ) -> tuple[str, str]: type_ = chr(i_type) if type_ == "r": return self.decodeDefiTypeR(b_defiPart) format_ = { "m": "m", "t": "m", "y": "m", "g": "h", "h": "h", "x": "x", }.get(type_, "") if not format_: log.warning(f"Definition type {type_!r} is not supported") defi = b_defiPart.decode("utf-8", errors=unicode_errors) # log.info(f"{_type}->{_format}: {_defi}".replace("\n", "")[:120]) if format_ == "x" and self._xdxf_to_html: defi = self.xdxf_transform(defi) format_ = "h" return format_, defi def renderRawDefiList( self, rawDefiList: list[tuple[bytes, int]], unicode_errors: str, ) -> tuple[str, str]: if len(rawDefiList) == 1: b_defiPart, i_type = rawDefiList[0] format_, defi = self.decodeRawDefiPart( b_defiPart=b_defiPart, i_type=i_type, unicode_errors=unicode_errors, ) return defi, format_ defiFormatSet: set[str] = set() defisWithFormat: list[tuple[str, str]] = [] for b_defiPart, i_type in rawDefiList: format_, defi = self.decodeRawDefiPart( b_defiPart=b_defiPart, i_type=i_type, unicode_errors=unicode_errors, ) defisWithFormat.append((defi, format_)) defiFormatSet.add(format_) if len(defiFormatSet) == 1: format_ = defiFormatSet.pop() if format_ == "h": return "\n
        ".join([defi for defi, _ in defisWithFormat]), format_ return "\n".join([defi for defi, _ in defisWithFormat]), format_ if not defiFormatSet: log.error(f"empty defiFormatSet, {rawDefiList=}") return "", "" # convert plaintext or xdxf to html defis: list[str] = [] for defi_, format_ in defisWithFormat: defi = defi_ if format_ == "m": defi = defi.replace("\n", "
        ") defi = f"
        {defi}
        " elif format_ == "x": defi = self.xdxf_transform(defi) defis.append(defi) return "\n
        \n".join(defis), "h" def __iter__(self) -> Iterator[EntryType]: # noqa: PLR0912 indexData = self._indexData synDict = self._synDict sametypesequence = self._sametypesequence dictFile = self._dictFile unicode_errors = self._unicode_errors if not dictFile: raise RuntimeError("iterating over a reader while it's not open") if not indexData: log.warning("indexData is empty") return for entryIndex, (b_word, defiOffset, defiSize) in enumerate(indexData): if not b_word: continue dictFile.seek(defiOffset) if dictFile.tell() != defiOffset: log.error(f"Unable to read definition for word {b_word!r}") continue b_defiBlock = dictFile.read(defiSize) if len(b_defiBlock) != defiSize: log.error(f"Unable to read definition for word {b_word!r}") continue if sametypesequence: rawDefiList = self.parseDefiBlockCompact( b_defiBlock, sametypesequence, ) else: rawDefiList = self.parseDefiBlockGeneral(b_defiBlock) if rawDefiList is None: log.error(f"Data file is corrupted. Word {b_word!r}") continue word: str | list[str] word = b_word.decode("utf-8", errors=unicode_errors) try: alts = synDict[entryIndex] except KeyError: # synDict is dict pass else: word = [word] + alts defi, defiFormat = self.renderRawDefiList( rawDefiList, unicode_errors, ) # FIXME: # defi = defi.replace(' src="./res/', ' src="./') yield self._glos.newEntry(word, defi, defiFormat=defiFormat) if isdir(self._resDir): for fname in os.listdir(self._resDir): fpath = join(self._resDir, fname) with open(fpath, "rb") as _file: yield self._glos.newDataEntry( fname, _file.read(), ) def readSynFile(self) -> dict[int, list[str]]: """Return synDict, a dict { entryIndex -> altList }.""" if self._wordCount is None: raise RuntimeError("self._wordCount is None") unicode_errors = self._unicode_errors synBytes = b"" if isfile(self._filename + ".syn"): with open(self._filename + ".syn", mode="rb") as _file: synBytes = _file.read() elif isfile(self._filename + ".syn.dz"): with gzip.open(self._filename + ".syn.dz", mode="rb") as _zfile: synBytes = _zfile.read() else: return {} synBytesLen = len(synBytes) synDict: dict[int, list[str]] = {} pos = 0 while pos < synBytesLen: beg = pos pos = synBytes.find(b"\x00", beg) if pos < 0: log.error("Synonym file is corrupted") break b_alt = synBytes[beg:pos] # b_alt is bytes pos += 1 if pos + 4 > len(synBytes): log.error("Synonym file is corrupted") break entryIndex = uint32FromBytes(synBytes[pos : pos + 4]) pos += 4 if entryIndex >= self._wordCount: log.error( f"Corrupted synonym file. Word {b_alt!r} references invalid item", ) continue s_alt = b_alt.decode("utf-8", errors=unicode_errors) # s_alt is str try: synDict[entryIndex].append(s_alt) except KeyError: synDict[entryIndex] = [s_alt] return synDict @staticmethod def parseDefiBlockCompact( b_block: bytes, sametypesequence: str, ) -> list[tuple[bytes, int]] | None: """ Parse definition block when sametypesequence option is specified. Return a list of (b_defi, defiFormatCode) tuples where b_defi is a bytes instance and defiFormatCode is int, so: defiFormat = chr(defiFormatCode) """ b_sametypesequence = sametypesequence.encode("utf-8") if not b_sametypesequence: raise ValueError(f"{b_sametypesequence = }") res: list[tuple[bytes, int]] = [] i = 0 for t in b_sametypesequence[:-1]: if i >= len(b_block): return None if bytes([t]).islower(): beg = i i = b_block.find(b"\x00", beg) if i < 0: return None res.append((b_block[beg:i], t)) i += 1 else: # assert bytes([t]).isupper() if i + 4 > len(b_block): return None size = uint32FromBytes(b_block[i : i + 4]) i += 4 if i + size > len(b_block): return None res.append((b_block[i : i + size], t)) i += size if i >= len(b_block): return None t = b_sametypesequence[-1] if bytes([t]).islower(): if 0 in b_block[i:]: return None res.append((b_block[i:], t)) else: # assert bytes([t]).isupper() res.append((b_block[i:], t)) return res @staticmethod def parseDefiBlockGeneral( b_block: bytes, ) -> list[tuple[bytes, int]] | None: """ Parse definition block when sametypesequence option is not specified. Return a list of (b_defi, defiFormatCode) tuples where b_defi is a bytes instance and defiFormatCode is int, so: defiFormat = chr(defiFormatCode) """ res: list[tuple[bytes, int]] = [] i = 0 while i < len(b_block): t = b_block[i] if not bytes([t]).isalpha(): return None i += 1 if bytes([t]).islower(): beg = i i = b_block.find(b"\x00", beg) if i < 0: return None res.append((b_block[beg:i], t)) i += 1 else: # assert bytes([t]).isupper() if i + 4 > len(b_block): return None size = uint32FromBytes(b_block[i : i + 4]) i += 4 if i + size > len(b_block): return None res.append((b_block[i : i + size], t)) i += size return res # def readResources(self): # if not isdir(self._resDir): # resInfoPath = join(baseDirPath, "res.rifo") # if isfile(resInfoPath): # log.warning( # "StarDict resource database is not supported. Skipping" # ) pyglossary-5.0.9/pyglossary/plugins/stardict/sd_types.py000066400000000000000000000007231476751035500236760ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import ( TYPE_CHECKING, Any, Protocol, TypeVar, ) if TYPE_CHECKING: from collections.abc import Iterator T_SDListItem_contra = TypeVar("T_SDListItem_contra", contravariant=True) class T_SdList(Protocol[T_SDListItem_contra]): def append(self, x: T_SDListItem_contra) -> None: ... def __len__(self) -> int: ... def __iter__(self) -> Iterator[Any]: ... def sort(self) -> None: ... pyglossary-5.0.9/pyglossary/plugins/stardict/sqlist.py000066400000000000000000000050251476751035500233630ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os from os.path import isfile from typing import TYPE_CHECKING if TYPE_CHECKING: import sqlite3 from collections.abc import Iterator, Sequence from pyglossary.glossary_types import EntryType from pyglossary.core import log __all__ = [ "IdxSqList", "SynSqList", ] class _BaseSqList: def __init__( self, database: str, ) -> None: from sqlite3 import connect if isfile(database): log.warning(f"Renaming {database} to {database}.bak") os.rename(database, database + "bak") self._con: sqlite3.Connection | None = connect(database) self._cur: sqlite3.Cursor | None = self._con.cursor() if not database: raise ValueError(f"invalid {database=}") self._orderBy = "word_lower, word" self._sorted = False self._len = 0 columns = self._columns = [ ("word_lower", "TEXT"), ("word", "TEXT"), ] + self.getExtraColumns() self._columnNames = ",".join(col[0] for col in columns) colDefs = ",".join(f"{col[0]} {col[1]}" for col in columns) self._con.execute( f"CREATE TABLE data ({colDefs})", ) self._con.execute( f"CREATE INDEX sortkey ON data({self._orderBy});", ) self._con.commit() @classmethod def getExtraColumns(cls) -> list[tuple[str, str]]: # list[(columnName, dataType)] return [] def __len__(self) -> int: return self._len def append(self, item: Sequence) -> None: if self._cur is None or self._con is None: raise RuntimeError("db is closed") self._len += 1 extraN = len(self._columns) - 1 self._cur.execute( f"insert into data({self._columnNames}) values (?{', ?' * extraN})", [item[0].lower()] + list(item), ) def sort(self) -> None: pass def close(self) -> None: if self._cur is None or self._con is None: return self._con.commit() self._cur.close() self._con.close() self._con = None self._cur = None def __del__(self) -> None: try: self.close() except AttributeError as e: log.error(str(e)) def __iter__(self) -> Iterator[EntryType]: if self._cur is None: raise RuntimeError("db is closed") query = f"SELECT * FROM data ORDER BY {self._orderBy}" self._cur.execute(query) for row in self._cur: yield row[1:] class IdxSqList(_BaseSqList): @classmethod def getExtraColumns(cls) -> list[tuple[str, str]]: # list[(columnName, dataType)] return [ ("idx_block", "BLOB"), ] class SynSqList(_BaseSqList): @classmethod def getExtraColumns(cls) -> list[tuple[str, str]]: # list[(columnName, dataType)] return [ ("entry_index", "INTEGER"), ] pyglossary-5.0.9/pyglossary/plugins/stardict/tools.toml000066400000000000000000000057741476751035500235420ustar00rootroot00000000000000[AyanDict] web = "https://github.com/ilius/ayandict" source = "https://github.com/ilius/ayandict" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" plang = "Go" ["GoldenDict-NG by @xiaoyifang"] web = "https://xiaoyifang.github.io/goldendict-ng/" source = "https://github.com/xiaoyifang/goldendict-ng" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" plang = "C++" [GoldenDict] web = "http://goldendict.org/" source = "https://github.com/goldendict/goldendict" wiki = "https://github.com/goldendict/goldendict/wiki" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" plang = "C++" [StarDict] web = "http://huzheng.org/stardict/" source = "https://github.com/huzheng001/stardict-3" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" plang = "C++" [QStarDict] web = "https://github.com/a-rodin/qstardict" source = "https://github.com/a-rodin/qstardict" platforms = [ "Linux", "Windows", "Mac",] license = "GPLv2" plang = "C++" ["GoldenDict Mobile (Free)"] web = "http://goldendict.mobi/" web2 = "https://play.google.com/store/apps/details?id=mobi.goldendict.android.free" platforms = [ "Android",] license = "Freemium" ["GoldenDict Mobile (Full)"] web = "http://goldendict.mobi/" web2 = "https://play.google.com/store/apps/details?id=mobi.goldendict.android" platforms = [ "Android",] license = "Proprietary" ["Twinkle Star Dictionary"] web = "https://play.google.com/store/apps/details?id=com.qtier.dict" platforms = [ "Android",] license = "Unknown" # last release: 2015/10/19 # could not find the source code, license or website [WordMateX] web = "https://apkcombo.com/wordmatex/org.d1scw0rld.wordmatex/" platforms = [ "Android",] license = "Proprietary" # last release: 2020/01/01, version 2.1.1 # Google Play says "not compatible with your devices", not letting me # download and install, so I downloaded apk from apkcombo.com # This is the only Android app (not just for StarDict format) I found # that supports auto-RTL [QDict] web = "https://play.google.com/store/apps/details?id=com.annie.dictionary" source = "https://github.com/namndev/QDict" platforms = [ "Android",] license = "Apache 2.0" plang = "Java" # last release: 2017/04/16 (keeps crashing on my device, unusable) # last commit: 2020/06/24 ["Fora Dictionary"] web = "https://play.google.com/store/apps/details?id=com.ngc.fora" platforms = [ "Android",] license = "Freemium" # no dark mode # some options show "Premium Feature" # has prefix-search-on-type but it's a little slow # supports RTL (haven't tested auto-RTL) ["Fora Dictionary Pro"] web = "https://play.google.com/store/apps/details?id=com.ngc.fora.android" platforms = [ "Android",] license = "Proprietary" [KOReader] web = "http://koreader.rocks/" source = "https://github.com/koreader/koreader" platforms = [ "Android", "Amazon Kindle", "Kobo eReader", "PocketBook", "Cervantes",] license = "AGPLv3" plang = "Lua" [sdcv] web = "https://dushistov.github.io/sdcv/" source = "https://github.com/Dushistov/sdcv" platforms = [ "Linux", "Windows", "Mac", "Android",] license = "GPLv2" plang = "C++" pyglossary-5.0.9/pyglossary/plugins/stardict/writer.py000066400000000000000000000252651476751035500233700ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os import re from os.path import ( dirname, getsize, isdir, join, realpath, split, splitext, ) from time import perf_counter as now from typing import ( TYPE_CHECKING, Literal, ) if TYPE_CHECKING: from collections.abc import Callable, Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType from pyglossary.langs import Lang from pyglossary.plugins.stardict.sd_types import T_SdList from pyglossary.core import log from pyglossary.glossary_utils import Error from pyglossary.plugins.stardict.memlist import MemSdList from pyglossary.plugins.stardict.sqlist import IdxSqList, SynSqList from pyglossary.text_utils import uint32ToBytes, uint64ToBytes __all__ = ["Writer"] infoKeys = ( "bookname", "author", "email", "website", "description", "date", ) # _re_newline = re.compile("[\n\r]+") _re_newline = re.compile("\n\r?|\r\n?") def _newlinesToSpace(text: str) -> str: return _re_newline.sub(" ", text) def _newlinesToBr(text: str) -> str: return _re_newline.sub("
        ", text) class Writer: _large_file: bool = False _dictzip: bool = True _sametypesequence: Literal["", "h", "m", "x"] | None = "" _stardict_client: bool = False _audio_goldendict: bool = False _audio_icon: bool = True _sqlite: bool | None = None dictzipSynFile = True def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" self._resDir = "" self._sourceLang: Lang | None = None self._targetLang: Lang | None = None self._p_pattern = re.compile( "]*?)?>(.*?)

        ", re.DOTALL, ) self._br_pattern = re.compile( "", re.IGNORECASE, ) self._re_audio_link = re.compile( ']*? )?href="sound://([^<>"]+)"( .*?)?>(.*?)', ) def finish(self) -> None: self._filename = "" self._resDir = "" self._sourceLang = None self._targetLang = None def open(self, filename: str) -> None: if self._sqlite is None: self._sqlite = self._glos.sqlite log.debug(f"open: {filename = }, {self._sqlite = }") fileBasePath = filename ## if splitext(filename)[1].lower() == ".ifo": fileBasePath = splitext(filename)[0] elif filename.endswith(os.sep): if not isdir(filename): os.makedirs(filename) fileBasePath = join(filename, split(filename[:-1])[-1]) elif isdir(filename): fileBasePath = join(filename, split(filename)[-1]) parentDir = split(fileBasePath)[0] if not isdir(parentDir): log.info(f"Creating directory {parentDir}") os.mkdir(parentDir) ## if fileBasePath: fileBasePath = realpath(fileBasePath) self._filename = fileBasePath self._resDir = join(dirname(fileBasePath), "res") self._sourceLang = self._glos.sourceLang self._targetLang = self._glos.targetLang if self._sametypesequence: log.debug(f"Using write option sametypesequence={self._sametypesequence}") elif self._sametypesequence is not None: stat = self._glos.collectDefiFormat(100) log.debug(f"defiFormat stat: {stat}") if stat: if stat["m"] > 0.97: log.info("Auto-selecting sametypesequence=m") self._sametypesequence = "m" elif stat["h"] > 0.5: log.info("Auto-selecting sametypesequence=h") self._sametypesequence = "h" def write(self) -> Generator[None, EntryType, None]: from pyglossary.os_utils import runDictzip if not isdir(self._resDir): os.mkdir(self._resDir) if self._sametypesequence: yield from self.writeCompact(self._sametypesequence) else: yield from self.writeGeneral() try: os.rmdir(self._resDir) except OSError: pass # "Directory not empty" or "Permission denied" if self._dictzip: runDictzip(f"{self._filename}.dict") syn_file = f"{self._filename}.syn" if os.path.exists(syn_file) and self.dictzipSynFile: runDictzip(syn_file) def fixDefi(self, defi: str, defiFormat: str) -> bytes: # for StarDict 3.0: if self._stardict_client and defiFormat == "h": defi = self._p_pattern.sub("\\2
        ", defi) # if there is

        left without opening, replace with
        defi = defi.replace("

        ", "
        ") defi = self._br_pattern.sub("
        ", defi) if self._audio_goldendict: if self._audio_icon: defi = self._re_audio_link.sub( r'', defi, ) else: defi = self._re_audio_link.sub( r'', defi, ) # FIXME: # defi = defi.replace(' src="./', ' src="./res/') return defi.encode("utf-8") def newIdxList(self) -> T_SdList[tuple[bytes, bytes]]: if not self._sqlite: return MemSdList() return IdxSqList(join(self._glos.tmpDataDir, "stardict-idx.db")) def newSynList(self) -> T_SdList[tuple[bytes, int]]: if not self._sqlite: return MemSdList() return SynSqList(join(self._glos.tmpDataDir, "stardict-syn.db")) def dictMarkToBytesFunc(self) -> tuple[Callable[[int], bytes], int]: if self._large_file: return uint64ToBytes, 0xFFFFFFFFFFFFFFFF return uint32ToBytes, 0xFFFFFFFF def writeCompact(self, defiFormat: str) -> Generator[None, EntryType, None]: """ Build StarDict dictionary with sametypesequence option specified. Every item definition consists of a single article. All articles have the same format, specified in defiFormat parameter. defiFormat: format of article definition: h - html, m - plain text """ log.debug(f"writeCompact: {defiFormat=}") altIndexList = self.newSynList() dictFile = open(self._filename + ".dict", "wb") idxFile = open(self._filename + ".idx", "wb") dictMarkToBytes, dictMarkMax = self.dictMarkToBytesFunc() t0 = now() dictMark, entryIndex = 0, -1 while True: entry = yield if entry is None: break if entry.isData(): entry.save(self._resDir) continue entryIndex += 1 b_words = entry.lb_word for b_alt in b_words[1:]: altIndexList.append((b_alt, entryIndex)) b_dictBlock = self.fixDefi(entry.defi, defiFormat) dictFile.write(b_dictBlock) idxFile.write( b_words[0] + b"\x00" + dictMarkToBytes(dictMark) + uint32ToBytes(len(b_dictBlock)) ) dictMark += len(b_dictBlock) if dictMark > dictMarkMax: raise Error( f"StarDict: {dictMark = } is too big, set option large_file=true", ) dictFile.close() idxFile.close() log.info(f"Writing dict + idx file took {now() - t0:.2f} seconds") self.writeSynFile(altIndexList) self.writeIfoFile( entryIndex + 1, len(altIndexList), ) def writeGeneral(self) -> Generator[None, EntryType, None]: """ Build StarDict dictionary in general case. Every item definition may consist of an arbitrary number of articles. sametypesequence option is not used. """ log.debug("writeGeneral") altIndexList = self.newSynList() dictFile = open(self._filename + ".dict", "wb") idxFile = open(self._filename + ".idx", "wb") t0 = now() dictMarkToBytes, dictMarkMax = self.dictMarkToBytesFunc() dictMark, entryIndex = 0, -1 while True: entry = yield if entry is None: break if entry.isData(): entry.save(self._resDir) continue entryIndex += 1 defiFormat = entry.detectDefiFormat("m") # call no more than once b_words = entry.lb_word for b_alt in b_words[1:]: altIndexList.append((b_alt, entryIndex)) b_defi = self.fixDefi(entry.defi, defiFormat) b_dictBlock = defiFormat.encode("ascii") + b_defi + b"\x00" dictFile.write(b_dictBlock) idxFile.write( b_words[0] + b"\x00" + dictMarkToBytes(dictMark) + uint32ToBytes(len(b_dictBlock)) ) dictMark += len(b_dictBlock) if dictMark > dictMarkMax: raise Error( f"StarDict: {dictMark = } is too big, set option large_file=true", ) dictFile.close() idxFile.close() log.info(f"Writing dict + idx file took {now() - t0:.2f} seconds") self.writeSynFile(altIndexList) self.writeIfoFile( entryIndex + 1, len(altIndexList), ) def writeSynFile(self, altIndexList: T_SdList[tuple[bytes, int]]) -> None: """Build .syn file.""" if not altIndexList: return log.info(f"Sorting {len(altIndexList)} synonyms...") t0 = now() altIndexList.sort() log.info( f"Sorting {len(altIndexList)} synonyms took {now() - t0:.2f} seconds", ) log.info(f"Writing {len(altIndexList)} synonyms...") t0 = now() with open(self._filename + ".syn", "wb") as synFile: synFile.writelines( b_alt + b"\x00" + uint32ToBytes(entryIndex) for b_alt, entryIndex in altIndexList ) log.info( f"Writing {len(altIndexList)} synonyms took {now() - t0:.2f} seconds", ) def writeIdxFile(self, indexList: T_SdList[tuple[bytes, bytes]]) -> None: if not indexList: return log.info(f"Sorting idx with {len(indexList)} entries...") t0 = now() indexList.sort() log.info( f"Sorting idx with {len(indexList)} entries took {now() - t0:.2f} seconds", ) log.info(f"Writing idx with {len(indexList)} entries...") t0 = now() with open(self._filename + ".idx", mode="wb") as indexFile: indexFile.writelines(key + b"\x00" + value for key, value in indexList) log.info( f"Writing idx with {len(indexList)} entries took {now() - t0:.2f} seconds", ) def getBookname(self) -> str: bookname = _newlinesToSpace(self._glos.getInfo("name")) sourceLang = self._sourceLang targetLang = self._targetLang if sourceLang and targetLang: langs = f"{sourceLang.code}-{targetLang.code}" if langs not in bookname.lower(): bookname = f"{bookname} ({langs})" log.info(f"bookname: {bookname}") return bookname def getDescription(self) -> str: glos = self._glos desc = glos.getInfo("description") copyright_ = glos.getInfo("copyright") if copyright_: desc = f"{copyright_}\n{desc}" publisher = glos.getInfo("publisher") if publisher: desc = f"Publisher: {publisher}\n{desc}" return _newlinesToBr(desc) def writeIfoFile( self, wordCount: int, synWordCount: int, ) -> None: """Build .ifo file.""" glos = self._glos defiFormat = self._sametypesequence indexFileSize = getsize(self._filename + ".idx") ifoDict: dict[str, str] = { "version": "3.0.0", "bookname": self.getBookname(), "wordcount": str(wordCount), "idxfilesize": str(indexFileSize), } if self._large_file: ifoDict["idxoffsetbits"] = "64" if defiFormat: ifoDict["sametypesequence"] = defiFormat if synWordCount > 0: ifoDict["synwordcount"] = str(synWordCount) for key in infoKeys: if key in { "bookname", "description", }: continue value = glos.getInfo(key) if not value: continue value = _newlinesToSpace(value) ifoDict[key] = value ifoDict["description"] = self.getDescription() with open( self._filename + ".ifo", mode="w", encoding="utf-8", newline="\n", ) as ifoFile: ifoFile.write("StarDict's dict ifo file\n") ifoFile.writelines(f"{key}={value}\n" for key, value in ifoDict.items()) pyglossary-5.0.9/pyglossary/plugins/stardict_merge_syns/000077500000000000000000000000001476751035500237235ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/stardict_merge_syns/__init__.py000066400000000000000000000036021476751035500260350ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os from pyglossary.flags import ALWAYS, DEFAULT_YES from pyglossary.option import ( BoolOption, Option, StrOption, ) from .writer import Writer __all__ = [ "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "stardict_merge_syns" name = "StardictMergeSyns" description = "StarDict (Merge Syns)" extensions = () extensionCreate = "-stardict/" singleFile = False sortOnWrite = ALWAYS sortKeyName = "stardict" sortEncoding = "utf-8" kind = "directory" wiki = "https://en.wikipedia.org/wiki/StarDict" website = ( "http://huzheng.org/stardict/", "huzheng.org/stardict", ) # https://github.com/huzheng001/stardict-3/blob/master/dict/doc/StarDictFileFormat optionsProp: dict[str, Option] = { "large_file": BoolOption( comment="Use idxoffsetbits=64 bits, for large files only", ), "dictzip": BoolOption( comment="Compress .dict file to .dict.dz", ), "sametypesequence": StrOption( values=["", "h", "m", "x", None], comment="Definition format: h=html, m=plaintext, x=xdxf", ), "xdxf_to_html": BoolOption( comment="Convert XDXF entries to HTML", ), "xsl": BoolOption( comment="Use XSL transformation", ), "unicode_errors": StrOption( values=[ "strict", # raise a UnicodeDecodeError exception "ignore", # just leave the character out "replace", # use U+FFFD, REPLACEMENT CHARACTER "backslashreplace", # insert a \xNN escape sequence ], comment="What to do with Unicode decoding errors", ), "audio_icon": BoolOption( comment="Add glossary's audio icon", ), "sqlite": BoolOption( comment="Use SQLite to limit memory usage." " Default depends on global SQLite mode.", allowNone=True, ), } if os.getenv("PYGLOSSARY_STARDICT_NO_FORCE_SORT") == "1": sortOnWrite = DEFAULT_YES pyglossary-5.0.9/pyglossary/plugins/stardict_merge_syns/tools.toml000066400000000000000000000006051476751035500257610ustar00rootroot00000000000000[KOReader] web = "http://koreader.rocks/" source = "https://github.com/koreader/koreader" platforms = [ "Android", "Amazon Kindle", "Kobo eReader", "PocketBook", "Cervantes",] license = "AGPLv3" plang = "Lua" [sdcv] web = "https://dushistov.github.io/sdcv/" source = "https://github.com/Dushistov/sdcv" platforms = [ "Linux", "Windows", "Mac", "Android",] license = "GPLv2" plang = "C++" pyglossary-5.0.9/pyglossary/plugins/stardict_merge_syns/writer.py000066400000000000000000000065001476751035500256120ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from time import perf_counter as now from typing import ( TYPE_CHECKING, ) from pyglossary.plugins.stardict import Writer as StdWriter if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import EntryType from pyglossary.core import log from pyglossary.glossary_utils import Error from pyglossary.text_utils import uint32ToBytes __all__ = ["Writer"] class Writer(StdWriter): dictzipSynFile = False def fixDefi(self, defi: str, defiFormat: str) -> bytes: # noqa: ARG002, PLR6301 return defi.encode("utf-8") def writeCompact( self, defiFormat: str, ) -> Generator[None, EntryType, None]: """ Build StarDict dictionary with sametypesequence option specified. Every item definition consists of a single article. All articles have the same format, specified in defiFormat parameter. defiFormat - format of article definition: h - html, m - plain text """ log.debug(f"writeCompact: {defiFormat=}") idxBlockList = self.newIdxList() altIndexList = self.newSynList() dictFile = open(self._filename + ".dict", "wb") t0 = now() dictMarkToBytes, dictMarkMax = self.dictMarkToBytesFunc() dictMark, entryIndex = 0, -1 while True: entry = yield if entry is None: break if entry.isData(): entry.save(self._resDir) continue entryIndex += 1 b_dictBlock = self.fixDefi(entry.defi, defiFormat) dictFile.write(b_dictBlock) b_idxBlock = dictMarkToBytes(dictMark) + uint32ToBytes(len(b_dictBlock)) for b_word in entry.lb_word: idxBlockList.append((b_word, b_idxBlock)) dictMark += len(b_dictBlock) if dictMark > dictMarkMax: raise Error( f"StarDict: {dictMark = } is too big, set option large_file=true", ) dictFile.close() log.info(f"Writing dict file took {now() - t0:.2f} seconds") self.writeIdxFile(idxBlockList) self.writeIfoFile( len(idxBlockList), len(altIndexList), ) def writeGeneral(self) -> Generator[None, EntryType, None]: """ Build StarDict dictionary in general case. Every item definition may consist of an arbitrary number of articles. sametypesequence option is not used. """ log.debug("writeGeneral") idxBlockList = self.newIdxList() altIndexList = self.newSynList() dictFile = open(self._filename + ".dict", "wb") t0 = now() dictMarkToBytes, dictMarkMax = self.dictMarkToBytesFunc() dictMark, entryIndex = 0, -1 while True: entry = yield if entry is None: break if entry.isData(): entry.save(self._resDir) continue entryIndex += 1 defiFormat = entry.detectDefiFormat("m") # call no more than once b_defi = self.fixDefi(entry.defi, defiFormat) b_dictBlock = defiFormat.encode("ascii") + b_defi + b"\x00" dictFile.write(b_dictBlock) b_idxBlock = dictMarkToBytes(dictMark) + uint32ToBytes(len(b_dictBlock)) for b_word in entry.lb_word: idxBlockList.append((b_word, b_idxBlock)) dictMark += len(b_dictBlock) if dictMark > dictMarkMax: raise Error( f"StarDict: {dictMark = } is too big, set option large_file=true", ) dictFile.close() log.info(f"Writing dict file took {now() - t0:.2f} seconds") self.writeIdxFile(idxBlockList) self.writeIfoFile( len(idxBlockList), len(altIndexList), ) # TODO: override getDescription to indicate merge_syns pyglossary-5.0.9/pyglossary/plugins/stardict_textual/000077500000000000000000000000001476751035500232365ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/stardict_textual/__init__.py000066400000000000000000000015521476751035500253520ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, Option, ) from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "stardict_textual" name = "StardictTextual" description = "StarDict Textual File (.xml)" extensions = () extensionCreate = ".xml" sortKeyName = "stardict" singleFile = True kind = "text" wiki = "" website = ( "https://github.com/huzheng001/stardict-3" "/blob/master/dict/doc/TextualDictionaryFileFormat", "TextualDictionaryFileFormat", ) optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "xdxf_to_html": BoolOption( comment="Convert XDXF entries to HTML", ), } pyglossary-5.0.9/pyglossary/plugins/stardict_textual/reader.py000066400000000000000000000132331476751035500250540ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING, cast if TYPE_CHECKING: import io from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.lxml_types import Element from pyglossary.xdxf.transform import XdxfTransformer from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.core import exc_note, log, pip from pyglossary.html_utils import unescape_unicode from pyglossary.io_utils import nullBinaryIO __all__ = ["Reader"] class Reader: useByteProgress = True _encoding: str = "utf-8" _xdxf_to_html: bool = True compressions = stdCompressions depends = { "lxml": "lxml", } def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = "" self._file: io.IOBase = nullBinaryIO self._fileSize = 0 self._xdxfTr: XdxfTransformer | None = None def xdxf_setup(self) -> XdxfTransformer: from pyglossary.xdxf.transform import XdxfTransformer self._xdxfTr = tr = XdxfTransformer(encoding="utf-8") return tr def xdxf_transform(self, text: str) -> str: tr = self._xdxfTr if tr is None: tr = self.xdxf_setup() return tr.transformByInnerString(text) def __len__(self) -> int: return 0 def close(self) -> None: self._file.close() self._file = nullBinaryIO self._filename = "" self._fileSize = 0 def open(self, filename: str) -> None: try: from lxml import etree as ET except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install lxml` to install") raise self._filename = filename cfile = compressionOpen(filename, mode="rb") if cfile.seekable(): cfile.seek(0, 2) self._fileSize = cfile.tell() cfile.seek(0) # self._glos.setInfo("input_file_size", f"{self._fileSize}") else: log.warning("StarDict Textual File Reader: file is not seekable") context = ET.iterparse( # type: ignore # noqa: PGH003 cfile, events=("end",), tag="info", ) for _, elem in context: self.setMetadata(elem) # type: ignore break cfile.close() def setGlosInfo(self, key: str, value: str) -> None: if value is None: return self._glos.setInfo(key, unescape_unicode(value)) def setMetadata(self, header: Element) -> None: if (elem := header.find("./bookname")) is not None and elem.text: self.setGlosInfo("name", elem.text) if (elem := header.find("./author")) is not None and elem.text: self.setGlosInfo("author", elem.text) if (elem := header.find("./email")) is not None and elem.text: self.setGlosInfo("email", elem.text) if (elem := header.find("./website")) is not None and elem.text: self.setGlosInfo("website", elem.text) if (elem := header.find("./description")) is not None and elem.text: self.setGlosInfo("description", elem.text) if (elem := header.find("./bookname")) is not None and elem.text: self.setGlosInfo("name", elem.text) if (elem := header.find("./bookname")) is not None and elem.text: self.setGlosInfo("name", elem.text) if (elem := header.find("./date")) is not None and elem.text: self.setGlosInfo("creationTime", elem.text) # if (elem := header.find("./dicttype")) is not None and elem.text: # self.setGlosInfo("dicttype", elem.text) def renderDefiList( self, defisWithFormat: list[tuple[str, str]], ) -> tuple[str, str]: if not defisWithFormat: return "", "" if len(defisWithFormat) == 1: return defisWithFormat[0] defiFormatSet: set[str] = set() defiFormatSet.update(_type for _, _type in defisWithFormat) if len(defiFormatSet) == 1: format_ = defiFormatSet.pop() if format_ == "h": return "\n
        ".join([defi for defi, _ in defisWithFormat]), format_ return "\n".join([defi for defi, _ in defisWithFormat]), format_ # convert plaintext or xdxf to html defis: list[str] = [] for defi_, format_ in defisWithFormat: if format_ == "m": defis.append("
        " + defi_.replace("\n", "
        ") + "
        ") elif format_ == "x": defis.append(self.xdxf_transform(defi_)) else: defis.append(defi_) return "\n
        \n".join(defis), "h" def __iter__(self) -> Iterator[EntryType]: from lxml import etree as ET glos = self._glos fileSize = self._fileSize self._file = file = compressionOpen(self._filename, mode="rb") context = ET.iterparse( # type: ignore # noqa: PGH003 self._file, events=("end",), tag="article", ) for _, _elem in context: elem = cast("Element", _elem) words: list[str] = [] defisWithFormat: list[tuple[str, str]] = [] for child in elem.iterchildren(): if not child.text: continue if child.tag in {"key", "synonym"}: words.append(child.text) elif child.tag == "definition": type_ = child.attrib.get("type", "") if type_: new_type = { "m": "m", "t": "m", "y": "m", "g": "h", "h": "h", "x": "x", }.get(type_, "") if not new_type: log.warning(f"unsupported definition type {type_}") type_ = new_type if not type_: type_ = "m" defi_ = child.text.strip() if type_ == "x" and self._xdxf_to_html: defi_ = self.xdxf_transform(defi_) type_ = "h" defisWithFormat.append((defi_, type_)) # TODO: child.tag == "definition-r" else: log.warning(f"unknown tag {child.tag}") defi, defiFormat = self.renderDefiList(defisWithFormat) yield glos.newEntry( words, defi, defiFormat=defiFormat, byteProgress=(file.tell(), fileSize), ) # clean up preceding siblings to save memory # this can reduce memory usage from >300 MB to ~25 MB while elem.getprevious() is not None: parent = elem.getparent() if parent is None: break del parent[0] pyglossary-5.0.9/pyglossary/plugins/stardict_textual/tools.toml000066400000000000000000000003411476751035500252710ustar00rootroot00000000000000["StarDict-Editor (Tools)"] web = "https://github.com/huzheng001/stardict-3/blob/master/tools/README" source = "https://github.com/huzheng001/stardict-3" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" plang = "C" pyglossary-5.0.9/pyglossary/plugins/stardict_textual/writer.py000066400000000000000000000066371476751035500251400ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os from os.path import dirname, isdir, join from typing import TYPE_CHECKING, cast if TYPE_CHECKING: from collections.abc import Generator from lxml import builder from pyglossary.glossary_types import EntryType, WriterGlossaryType from pyglossary.compression import ( compressionOpen, stdCompressions, ) __all__ = ["Writer"] class Writer: _encoding: str = "utf-8" compressions = stdCompressions depends = { "lxml": "lxml", } def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" self._resDir = "" def open( self, filename: str, ) -> None: self._filename = filename self._resDir = join(dirname(self._filename), "res") self._file = compressionOpen( self._filename, mode="w", encoding=self._encoding, ) def finish(self) -> None: self._file.close() def writeInfo( self, maker: builder.ElementMaker, pretty: bool, ) -> None: from lxml import etree as ET glos = self._glos desc = glos.getInfo("description") copyright_ = glos.getInfo("copyright") if copyright_: desc = f"{copyright_}\n{desc}" publisher = glos.getInfo("publisher") if publisher: desc = f"Publisher: {publisher}\n{desc}" info = maker.info( maker.version("3.0.0"), maker.bookname(glos.getInfo("name")), maker.author(glos.getInfo("author")), maker.email(glos.getInfo("email")), maker.website(glos.getInfo("website")), maker.description(desc), maker.date(glos.getInfo("creationTime")), maker.dicttype(""), ) file = self._file file.write( cast( "bytes", ET.tostring( info, encoding=self._encoding, pretty_print=pretty, ), ).decode(self._encoding) + "\n", ) def writeDataEntry( self, maker: builder.ElementMaker, # noqa: ARG002 entry: EntryType, ) -> None: entry.save(self._resDir) # TODO: create article tag with "definition-r" in it? # or just save the file to res/ directory? or both? # article = maker.article( # maker.key(entry.s_word), # maker.definition_r( # ET.CDATA(entry.defi), # **{"type": ext}) # ) # ) def write(self) -> Generator[None, EntryType, None]: from lxml import builder from lxml import etree as ET file = self._file encoding = self._encoding maker = builder.ElementMaker() file.write( """ """, ) self.writeInfo(maker, pretty=True) if not isdir(self._resDir): os.mkdir(self._resDir) pretty = True while True: entry = yield if entry is None: break if entry.isData(): self.writeDataEntry(maker, entry) continue entry.detectDefiFormat() article = maker.article( maker.key(entry.l_word[0]), ) for alt in entry.l_word[1:]: article.append(maker.synonym(alt)) article.append( maker.definition( ET.CDATA(entry.defi), type=entry.defiFormat, ), ) ET.indent(article, space="") articleStr = cast( "bytes", ET.tostring( article, pretty_print=pretty, encoding=encoding, ), ).decode(encoding) # for some reason, "´k" becomes " ́k" (for example) # noqa: RUF003 # stardict-text2bin tool also does this. # https://en.wiktionary.org/wiki/%CB%88#Translingual self._file.write(articleStr + "\n") file.write("") if not os.listdir(self._resDir): os.rmdir(self._resDir) pyglossary-5.0.9/pyglossary/plugins/tabfile/000077500000000000000000000000001476751035500212615ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/tabfile/__init__.py000066400000000000000000000020511476751035500233700ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, EncodingOption, FileSizeOption, Option, ) from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "tabfile" name = "Tabfile" description = "Tabfile (.txt, .dic)" extensions = (".txt", ".tab", ".tsv") extensionCreate = ".txt" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/Tab-separated_values" website = None optionsProp: dict[str, Option] = { "encoding": EncodingOption(), "enable_info": BoolOption( comment="Enable glossary info / metedata", ), "resources": BoolOption( comment="Enable resources / data files", ), "file_size_approx": FileSizeOption( comment="Split up by given approximate file size\nexamples: 100m, 1g", ), "word_title": BoolOption( comment="Add headwords title to beginning of definition", ), } pyglossary-5.0.9/pyglossary/plugins/tabfile/reader.py000066400000000000000000000021221476751035500230720ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.core import log from pyglossary.text_reader import TextGlossaryReader from pyglossary.text_utils import ( splitByBarUnescapeNTB, unescapeNTB, ) __all__ = ["Reader"] class Reader(TextGlossaryReader): useByteProgress = True @classmethod def isInfoWord(cls, word: str) -> bool: return word.startswith("#") @classmethod def fixInfoWord(cls, word: str) -> str: return word.lstrip("#") def nextBlock(self) -> tuple[str | list[str], str, None] | None: if not self._file: raise StopIteration line = self.readline() if not line: raise StopIteration line = line.rstrip("\n") if not line: return None ### word: str | list[str] word, tab, defi = line.partition("\t") if not tab: log.warning( f"Warning: line starting with {line[:10]!r} has no tab!", ) return None ### if self._glos.alts: word = splitByBarUnescapeNTB(word) if len(word) == 1: word = word[0] else: word = unescapeNTB(word, bar=False) ### defi = unescapeNTB(defi) ### return word, defi, None pyglossary-5.0.9/pyglossary/plugins/tabfile/tools.toml000066400000000000000000000003401476751035500233130ustar00rootroot00000000000000["StarDict-Editor (Tools)"] web = "https://github.com/huzheng001/stardict-3/blob/master/tools/README" source = "https://github.com/huzheng001/stardict-3" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" plang = "C" pyglossary-5.0.9/pyglossary/plugins/tabfile/writer.py000066400000000000000000000024741476751035500231560ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.compression import stdCompressions if TYPE_CHECKING: from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] class Writer: _encoding: str = "utf-8" _enable_info: bool = True _resources: bool = True _file_size_approx: int = 0 _word_title: bool = False compressions = stdCompressions def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" def open( self, filename: str, ) -> None: self._filename = filename def finish(self) -> None: pass def write(self) -> Generator[None, EntryType, None]: from pyglossary.text_utils import escapeNTB, joinByBar from pyglossary.text_writer import TextGlossaryWriter writer = TextGlossaryWriter( self._glos, entryFmt="{word}\t{defi}\n", writeInfo=self._enable_info, outInfoKeysAliasDict=None, ) writer.setAttrs( encoding=self._encoding, wordListEncodeFunc=joinByBar, wordEscapeFunc=escapeNTB, defiEscapeFunc=escapeNTB, ext=".txt", resources=self._resources, word_title=self._word_title, file_size_approx=self._file_size_approx, ) writer.open(self._filename) yield from writer.write() writer.finish() pyglossary-5.0.9/pyglossary/plugins/testformat/000077500000000000000000000000001476751035500220435ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/testformat/__init__.py000066400000000000000000000011701476751035500241530ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import Option from .reader import Reader from .writer import Writer __all__ = [ "Reader", "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = False lname = "testformat" name = "Test" description = "Test Format File(.test)" extensions = (".test", ".tst") extensionCreate = ".test" singleFile = True kind = "text" wiki = "" website = None # key is option/argument name, value is instance of Option optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/testformat/reader.py000066400000000000000000000040471476751035500236640ustar00rootroot00000000000000 from __future__ import annotations # -*- coding: utf-8 -*- from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = "" self._wordCount = 0 def __len__(self) -> int: # return the number of entries if you have it # if you don't, return 0 and progressbar will be disabled # self._wordCount can be set in self.open function # but if you want to set it, you should set it before # iteration begins and __iter__ method is called return self._wordCount def open(self, filename: str) -> None: # open the file, read headers / info and set info to self._glos # and set self._wordCount if you can # read-options should be keyword arguments in this method self._wordCount = 100 # log.info(f"some useful message") # here read info from file and set to Glossary object self._glos.setInfo("name", "Test") desc = "Test glossary created by a PyGlossary plugin" self._glos.setInfo("description", desc) self._glos.setInfo("author", "Me") self._glos.setInfo("copyright", "GPL") def close(self) -> None: # this is called after reading/conversion is finished # if you have an open file object, close it here # if you need to clean up temp files, do it here pass def __iter__(self) -> Iterator[EntryType]: # the easiest and simplest way to implement an Iterator is # by writing a generator, by calling: yield glos.newEntry(word, defi) # inside a loop (typically iterating over a file object for text file) # another way (which is harder) is by implementing __next__ method # and returning self in __iter__ # that forces you to keep the state manually because __next__ is called # repeatedly, but __iter__ is only called once glos = self._glos for i in range(self._wordCount): # here get word and definition from file(depending on your format) word = f"word_{i}" defi = f"definition {i}" yield glos.newEntry(word, defi) pyglossary-5.0.9/pyglossary/plugins/testformat/writer.py000066400000000000000000000022461476751035500237350ustar00rootroot00000000000000 from __future__ import annotations # -*- coding: utf-8 -*- from collections.abc import Generator from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] class Writer: def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" def open(self, filename: str) -> None: self._filename = filename def write(self) -> Generator[None, EntryType, None]: glos = self._glos filename = self._filename # noqa # log.info(f"some useful message") while True: entry = yield if entry is None: break if entry.isData(): # can save it with entry.save(directory) continue word = entry.s_word # noqa defi = entry.defi # noqa # here write word and defi to the output file (depending on # your format) # here read info from Glossaey object name = glos.getInfo("name") # noqa desc = glos.getInfo("description") # noqa author = glos.author # noqa copyright = glos.getInfo("copyright") # noqa # if an info key doesn't exist, getInfo returns empty string # now write info to the output file (depending on your output format) def finish(self) -> None: self._filename = "" pyglossary-5.0.9/pyglossary/plugins/wiktextract/000077500000000000000000000000001476751035500222245ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/wiktextract/__init__.py000066400000000000000000000022661476751035500243430ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, ListOption, Option, StrOption, ) from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "wiktextract" name = "Wiktextract" description = "Wiktextract (.jsonl)" extensions = (".jsonl",) extensionCreate = ".jsonl" singleFile = True kind = "text" wiki = "" website = ( "https://github.com/tatuylonen/wiktextract", "@tatuylonen/wiktextract", ) optionsProp: dict[str, Option] = { "resources": BoolOption( comment="Enable resources / data files", ), "word_title": BoolOption( comment="Add headwords title to beginning of definition", ), "pron_color": StrOption( comment="Pronunciation color", ), "gram_color": StrOption( comment="Grammar color", ), "example_padding": StrOption( comment="Padding for examples (css value)", ), "audio": BoolOption( comment="Enable audio", ), "audio_formats": ListOption( comment="List of audio formats to use", ), "categories": BoolOption( comment="Enable categories", ), } pyglossary-5.0.9/pyglossary/plugins/wiktextract/reader.py000066400000000000000000000354011476751035500240430ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import collections from collections import Counter from io import BytesIO, IOBase from json import loads as json_loads from typing import TYPE_CHECKING, cast if TYPE_CHECKING: from collections.abc import Callable, Iterator from typing import Any from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.lxml_types import Element, T_htmlfile from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.core import exc_note, log, pip from pyglossary.io_utils import nullBinaryIO __all__ = ["Reader"] class Reader: useByteProgress = True compressions = stdCompressions depends = { "lxml": "lxml", } _word_title: bool = False _pron_color: str = "gray" _gram_color: str = "green" # 'top right' or 'top right bottom left' _example_padding: str = "10px 20px" _audio: bool = True _audio_formats: list[str] = ["ogg", "mp3"] _categories: bool = False topicStyle = ( "color:white;" "background:green;" "padding-left:3px;" "padding-right:3px;" "border-radius:0.5ex;" # 0.5ex ~= 0.3em, but "ex" is recommended ) def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = "" self._file: IOBase = nullBinaryIO self._fileSize = 0 self._wordCount = 0 def open( self, filename: str, ) -> None: try: pass except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install lxml` to install") raise self._filename = filename cfile = compressionOpen(filename, mode="rt", encoding="utf-8") if cfile.seekable(): cfile.seek(0, 2) self._fileSize = cfile.tell() cfile.seek(0) self._glos.setInfo("input_file_size", str(self._fileSize)) else: self.warning("Wiktextract Reader: file is not seekable") self._glos.setDefaultDefiFormat("h") if self._word_title: self._glos.setInfo("definition_has_headwords", "True") self._file = cfile self._warnings: Counter[str] = collections.Counter() def close(self) -> None: self._file.close() self._file = nullBinaryIO self._filename = "" self._fileSize = 0 def __len__(self) -> int: return 0 def __iter__(self) -> Iterator[EntryType]: while line := self._file.readline(): line = line.strip() if not line: continue yield self.makeEntry(json_loads(line)) for _msg, count in self._warnings.most_common(): msg = _msg if count > 1: msg = f"[{count} times] {msg}" log.warning(msg) def warning(self, msg: str) -> None: self._warnings[msg] += 1 def makeEntry(self, data: dict[str, Any]) -> EntryType: # noqa: PLR0912 from lxml import etree as ET glos = self._glos f = BytesIO() def br() -> Element: return ET.Element("br") keywords: list[str] = [] inflectedKeywords: list[str] = [] word = data.get("word") if word: keywords.append(word) for formDict in data.get("forms", []): form: str = formDict.get("form", "") if not form: continue if len(form) > 80: self.warning(f"'form' too long: {form}") continue source: str = formDict.get("source", "") # tags = formDict.get("tags", []) if source == "Inflection": inflectedKeywords.append(form) else: keywords.append(form) keywords += inflectedKeywords with ET.htmlfile(f, encoding="utf-8") as hf: with hf.element("div"): if self._word_title: for keyword in keywords: with hf.element(glos.titleTag(keyword)): hf.write(keyword) hf.write(br()) hf_ = cast("T_htmlfile", hf) self.writeSoundList(hf_, data.get("sounds")) pos: str | None = data.get("pos") if pos: with hf.element("div", attrib={"class": "pos"}): with hf.element("font", color=self._gram_color): hf.write(pos) senses = data.get("senses") or [] self.writeSenseList(hf_, senses) # type: ignore self.writeSynonyms(hf_, data.get("synonyms")) # type: ignore self.writeAntonyms(hf_, data.get("antonyms")) # type: ignore # TODO: data.get("translations") # list[dict[str, str]] # dict keys: code, "lang", "sense", "word" etymology: str = data.get("etymology_text", "") if etymology: hf.write(br()) with hf.element("div"): hf.write(f"Etymology: {etymology}") if self._categories: categories = [] for sense in senses: senseCats = sense.get("categories") if senseCats: categories += senseCats self.writeSenseCategories(hf_, categories) defi = f.getvalue().decode("utf-8") # defi = defi.replace("\xa0", " ") # do we need to do this? file = self._file return self._glos.newEntry( keywords, defi, defiFormat="h", byteProgress=(file.tell(), self._fileSize), ) def writeSoundPron( self, hf: T_htmlfile, sound: dict[str, Any], ) -> None: # "homophone" key found in Dutch and Arabic dictionaries # (similar-sounding words for Arabic) for key in ("ipa", "other", "rhymes", "homophone"): value = sound.get(key) if not value: continue with hf.element("font", color=self._pron_color): hf.write(str(value)) hf.write(f" ({key})") def writeSoundAudio( self, hf: T_htmlfile, sound: dict[str, Any], ) -> None: # TODO: add a read-option for audio # keys for audio: # "audio" (file name), "text" (link text), "ogg_url", "mp3_url" # possible "tags" (list[str]) text = sound.get("text") if text: hf.write(f"{text}: ") with hf.element("audio", attrib={"controls": ""}): for _format in self._audio_formats: url = sound.get(f"{_format}_url") if not url: continue with hf.element( "source", attrib={ "src": url, "type": f"audio/{_format}", }, ): pass def writeSoundList( self, hf: T_htmlfile, soundList: list[dict[str, Any]] | None, ) -> None: if not soundList: return pronList: list[dict[str, Any]] = [] audioList: list[dict[str, Any]] = [] for sound in soundList: if "audio" in sound: if self._audio: audioList.append(sound) continue pronList.append(sound) # can it contain both audio and pronunciation? if pronList: with hf.element("div", attrib={"class": "pronunciations"}): for i, sound in enumerate(pronList): if i > 0: hf.write(", ") self.writeSoundPron(hf, sound) for sound in audioList: with hf.element("div", attrib={"class": "audio"}): self.writeSoundAudio(hf, sound) def writeSenseList( self, hf: T_htmlfile, senseList: list[dict[str, Any]], ) -> None: if not senseList: return self.makeList( hf, senseList, self.writeSense, ) def writeSenseGloss( # noqa: PLR6301 self, hf: T_htmlfile, text: str | None, ) -> None: hf.write(text or "") def writeSenseCategory( # noqa: PLR6301 self, hf: T_htmlfile, category: dict[str, Any], ) -> None: # keys: name: str, kind: str, parents: list, source: str # values for "source" (that I found): "w", "w+disamb" name = category.get("name") if not name: self.warning(f"{category = }") return desc = name source = category.get("source") if source: desc = f"{desc} (source: {source})" hf.write(desc) def writeSenseCategories( self, hf: T_htmlfile, categories: list[dict[str, Any]] | None, ) -> None: if not categories: return # long names, mostly about grammar? with hf.element("div", attrib={"class": "categories"}): hf.write("Categories: ") self.makeList(hf, categories, self.writeSenseCategory) def writeSenseExample( # noqa: PLR6301, PLR0912 self, hf: T_htmlfile, example: dict[str, str | list], ) -> None: # example keys: text, "english", "ref", "type" textList: list[tuple[str | None, str]] = [] text_: str | list = example.pop("example", "") if text_: assert isinstance(text_, str) textList.append((None, text_)) example.pop("ref", "") example.pop("type", "") for key, value in example.items(): if not value: continue prefix: str | None = key if prefix in ("text",): # noqa: PLR6201, FURB171 prefix = None if isinstance(value, str): textList.append((prefix, value)) elif isinstance(value, list): for item in value: if isinstance(item, str): textList.append((prefix, item)) elif isinstance(item, list): textList += [(prefix, item2) for item2 in item] else: log.error(f"writeSenseExample: invalid type for {value=}") if not textList: return def writePair(prefix: str | None, text: str) -> None: if prefix: with hf.element("b"): hf.write(prefix) hf.write(": ") hf.write(text) if len(textList) == 1: prefix, text = textList[0] writePair(prefix, text) return with hf.element("ul"): for prefix, text in textList: with hf.element("li"): writePair(prefix, text) def writeSenseExamples( self, hf: T_htmlfile, examples: list[dict[str, str | list]] | None, ) -> None: from lxml import etree as ET if not examples: return hf.write(ET.Element("br")) with hf.element("div", attrib={"class": "examples"}): hf.write("Examples:") hf.write(ET.Element("br")) for example in examples: with hf.element( "div", attrib={ "class": "example", "style": f"padding: {self._example_padding};", }, ): self.writeSenseExample(hf, example) def writeSenseFormOf( # noqa: PLR6301 self, hf: T_htmlfile, form_of: dict[str, str], ) -> None: from lxml import etree as ET # {"word": ..., "extra": ...} word = form_of.get("word") if not word: return hf.write(word) extra = form_of.get("extra") if extra: hf.write(ET.Element("br")) hf.write(extra) def writeSenseFormOfList( self, hf: T_htmlfile, form_of_list: list[dict[str, str]] | None, ) -> None: if not form_of_list: return with hf.element("div", attrib={"class": "form_of"}): hf.write("Form of: ") self.makeList(hf, form_of_list, self.writeSenseFormOf) def writeTags( self, hf: T_htmlfile, tags: list[str] | None, toRemove: list[str] | None, ) -> None: if not tags: return if toRemove: for tag in toRemove: if tag in tags: tags.remove(tag) if not tags: return with hf.element("div", attrib={"class": "tags"}): for i, tag in enumerate(tags): if i > 0: hf.write(", ") with hf.element("font", color=self._gram_color): hf.write(tag) def writeTopics( self, hf: T_htmlfile, topics: list[str] | None, ) -> None: if not topics: return with hf.element("div", attrib={"class": "tags"}): for i, topic in enumerate(topics): if i > 0: hf.write(" ") with hf.element("span", style=self.topicStyle): hf.write(topic) def addWordLink( # noqa: PLR6301 self, hf: T_htmlfile, word: str, wordClass: str = "", ) -> None: i = word.find(" [") if i >= 0: word = word[:i] if not word: return attrib = {"href": f"bword://{word}"} if wordClass: attrib["class"] = wordClass with hf.element( "a", attrib=attrib, ): hf.write(word) def writeSynonyms( self, hf: T_htmlfile, synonyms: list[dict[str, Any]] | None, ) -> None: if not synonyms: return # "word": "str", # "sense": "str", # "_dis1": "str", # "tags": list[str] # "extra": "str", # "english": "str" with hf.element("div"): hf.write("Synonyms: ") for i, item in enumerate(synonyms): if i > 0: hf.write(", ") word = item.get("word") if not word: continue self.addWordLink(hf, word) def writeAntonyms( self, hf: T_htmlfile, antonyms: list[dict[str, str]] | None, ) -> None: if not antonyms: return # dict keys: word with hf.element("div"): hf.write("Antonyms: ") for i, item in enumerate(antonyms): if i > 0: hf.write(", ") word = item.get("word") if not word: continue self.addWordLink(hf, word, wordClass="antonym") def writeRelated( self, hf: T_htmlfile, relatedList: list[dict[str, str]] | None, ) -> None: if not relatedList: return # dict keys: sense, "word", "english" with hf.element("div"): hf.write("Related: ") for i, item in enumerate(relatedList): if i > 0: hf.write(", ") word = item.get("word") if not word: continue self.addWordLink(hf, word) def writeSenseLinks( self, hf: T_htmlfile, linkList: list[list[str]] | None, ) -> None: if not linkList: return with hf.element("div"): hf.write("Links: ") for i, link in enumerate(linkList): if len(link) != 2: self.warning(f"unexpected {link =}") continue text, ref = link sq = ref.find("#") if sq == 0: ref = text elif sq > 0: ref = ref[:sq] if i > 0: hf.write(", ") self.addWordLink(hf, ref) def writeSense( self, hf: T_htmlfile, sense: dict[str, Any], ) -> None: from lxml import etree as ET # tags seem to be mostly about grammar, so with format it like grammar self.writeTags( hf, sense.get("tags"), toRemove=["form-of"], ) # for key in ("english",): # text: "str | None" = sense.get("english") # if not text: # continue # keyCap = key.capitalize() # with hf.element("div"): # with hf.element("b"): # hf.write(keyCap) # hf.write(f": {text}") # sense["glosses"] and sense["english"] seems to be unreliable # for example: # "raw_glosses": ["(short) story, fable, play"], # "english": "short", # "glosses": ["story, fable, play"], glosses: list[str] | None = sense.get("raw_glosses") if not glosses: glosses = sense.get("glosses") if glosses: self.makeList(hf, glosses, self.writeSenseGloss) self.writeTopics(hf, sense.get("topics")) self.writeSenseFormOfList(hf, sense.get("form_of")) self.writeSynonyms(hf, sense.get("synonyms")) self.writeAntonyms(hf, sense.get("antonyms")) self.writeRelated(hf, sense.get("related")) self.writeSenseLinks(hf, sense.get("links")) self.writeSenseExamples(hf, sense.get("examples")) # alt_of[i]["word"] seem to point to a word that is # mentioned in sense["raw_glosses"] # so we could try to find that word and turn it into a link # sense.get("alt_of"): list[dict[str, str]] | None # sense.get("wikipedia", []): list[str] # sense.get("wikidata", []): list[str] # sense.get("id", ""): str # not useful # sense.get("senseid", []): list[str] # not useful hf.write(ET.Element("br")) @staticmethod def makeList( # noqa: PLR0913 hf: T_htmlfile, input_objects: list[Any], processor: Callable, ordered: bool = True, skip_single: bool = True, # single_prefix: str = "", # list_type: str = "", ) -> None: """Wrap elements into
          if more than one element.""" if not input_objects: return if skip_single and len(input_objects) == 1: # if single_prefix: # hf.write(single_prefix) processor(hf, input_objects[0]) return attrib: dict[str, str] = {} # if list_type: # attrib["type"] = list_type with hf.element("ol" if ordered else "ul", attrib=attrib): for el in input_objects: with hf.element("li"): processor(hf, el) pyglossary-5.0.9/pyglossary/plugins/wordnet/000077500000000000000000000000001476751035500213355ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/wordnet/__init__.py000066400000000000000000000013341476751035500234470ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING from .reader import Reader if TYPE_CHECKING: from pyglossary.option import Option __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "wordnet" name = "Wordnet" description = "WordNet" extensions = () extensionCreate = "" singleFile = False kind = "directory" wiki = "https://en.wikipedia.org/wiki/WordNet" website = ( "https://wordnet.princeton.edu/", "WordNet - A Lexical Database for English", ) # key is option/argument name, value is instance of Option optionsProp: dict[str, Option] = {} pyglossary-5.0.9/pyglossary/plugins/wordnet/reader.py000066400000000000000000000213071476751035500231540ustar00rootroot00000000000000# -*- coding: utf-8 -*- # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License version 3 # as published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License # for more details. # # Copyright (C) 2023 Saeed Rasooli # Copyright (C) 2015 Igor Tkach # # This plugin is based on https://github.com/itkach/wordnet2slob from __future__ import annotations import os import re import sys from collections import defaultdict from typing import TYPE_CHECKING from pyglossary.core import log if TYPE_CHECKING: import io from collections.abc import Iterator from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] # original expression from # http://stackoverflow.com/questions/694344/regular-expression-that-matches-between-quotes-containing-escaped-quotes # "(?:[^\\"]+|\\.)*" # some examples don't have closing quote which # make the subn with this expression hang # _re_quotedText = re.compile(r'"(?:[^"]+|\.)*["|\n]') # make it a capturing group so that we can get rid of quotes _re_quotedText = re.compile(r'"([^"]+)"') _re_ref = re.compile(r"`(\w+)'") class SynSet: def __init__(self, line: str | bytes) -> None: self.line = line if isinstance(line, bytes): line = line.decode("utf-8") meta, self.gloss = line.split("|") self.meta_parts = meta.split() @property def offset(self) -> int: return int(self.meta_parts[0]) @property def lex_filenum(self) -> str: return self.meta_parts[1] @property def ss_type(self) -> str: return self.meta_parts[2] @property def w_cnt(self) -> int: return int(self.meta_parts[3], 16) @property def words(self) -> list[str]: return [self.meta_parts[4 + 2 * i].replace("_", " ") for i in range(self.w_cnt)] @property def pointers(self) -> list[Pointer]: p_cnt_index = 4 + 2 * self.w_cnt p_cnt = self.meta_parts[p_cnt_index] pointer_count = int(p_cnt) start = p_cnt_index + 1 return [ Pointer(*self.meta_parts[start + i * 4 : start + (i + 1) * 4]) # type: ignore for i in range(pointer_count) ] def __repr__(self) -> str: return f"SynSet({self.line!r})" class PointerSymbols: n = { "!": "Antonyms", "@": "Hypernyms", "@i": "Instance hypernyms", "~": "Hyponyms", "~i": "Instance hyponyms", "#m": "Member holonyms", "#s": "Substance holonyms", "#p": "Part holonyms", "%m": "Member meronyms", "%s": "Substance meronyms", "%p": "Part meronyms", "=": "Attributes", "+": "Derivationally related forms", ";c": "Domain of synset - TOPIC", "-c": "Member of this domain - TOPIC", ";r": "Domain of synset - REGION", "-r": "Member of this domain - REGION", ";u": "Domain of synset - USAGE", "-u": "Member of this domain - USAGE", "^": "Also see", } v = { "!": "Antonyms", "@": "Hypernyms", "~": "Hyponyms", "*": "Entailments", ">": "Cause", "^": "Also see", "$": "Verb group", "+": "Derivationally related forms", ";c": "Domain of synset - TOPIC", ";r": "Domain of synset - REGION", ";u": "Domain of synset - USAGE", } a = s = { "!": "Antonyms", "+": "Derivationally related forms", "&": "Similar to", "<": "Participle of verb", "\\": "Pertainyms", "=": "Attributes", "^": "Also see", ";c": "Domain of synset - TOPIC", ";r": "Domain of synset - REGION", ";u": "Domain of synset - USAGE", } r = { "!": "Antonyms", "\\": "Derived from adjective", "+": "Derivationally related forms", ";c": "Domain of synset - TOPIC", ";r": "Domain of synset - REGION", ";u": "Domain of synset - USAGE", "^": "Also see", } class Pointer: def __init__(self, symbol: str, offset: str, pos: str, source_target: str) -> None: self.symbol = symbol self.offset = int(offset) self.pos = pos self.source_target = source_target self.source = int(source_target[:2], 16) self.target = int(source_target[2:], 16) def __repr__(self) -> str: return ( f"Pointer({self.symbol!r}, {self.offset!r}, " f"{self.pos!r}, {self.source_target!r})" ) class WordNet: article_template = "

          %s

          %s" synSetTypes = { "n": "n.", "v": "v.", "a": "adj.", "s": "adj. satellite", "r": "adv.", } file2pos = { "data.adj": ["a", "s"], "data.adv": ["r"], "data.noun": ["n"], "data.verb": ["v"], } def __init__(self, wordnetdir: str) -> None: self.wordnetdir = wordnetdir self.collector: dict[str, list[str]] = defaultdict(list) @staticmethod def iterlines(dict_dir: str) -> Iterator[str]: for name in os.listdir(dict_dir): if not name.startswith("data."): continue with open(os.path.join(dict_dir, name), encoding="utf-8") as f: for line in f: if not line.startswith(" "): yield line # PLR0912 Too many branches (16 > 12) def prepare(self) -> None: # noqa: PLR0912 synSetTypes = self.synSetTypes file2pos = self.file2pos dict_dir = self.wordnetdir files: dict[str, io.TextIOWrapper] = {} for name in os.listdir(dict_dir): if name.startswith("data.") and name in file2pos: f = open(os.path.join(dict_dir, name), encoding="utf-8") # noqa: SIM115 for key in file2pos[name]: files[key] = f def a(word: str) -> str: return f'{word}' for index, line in enumerate(self.iterlines(dict_dir)): if index % 100 == 0 and index > 0: sys.stdout.write(".") sys.stdout.flush() if index % 5000 == 0 and index > 0: sys.stdout.write("\n") sys.stdout.flush() if not line or not line.strip(): continue synset = SynSet(line) gloss_with_examples, _ = _re_quotedText.subn( lambda x: f'{x.group(1)}', synset.gloss, ) gloss_with_examples, _ = _re_ref.subn( lambda x: a(x.group(1)), gloss_with_examples, ) words = synset.words for index2, word in enumerate(words): # TODO: move this block to a func synonyms = ", ".join(a(w) for w in words if w != word) synonyms_str = ( f'
          Synonyms: {synonyms}' if synonyms else "" ) pointers = defaultdict(list) for pointer in synset.pointers: if ( pointer.source and pointer.target and pointer.source - 1 != index2 ): continue symbol = pointer.symbol if symbol and symbol[:1] in {";", "-"}: continue try: symbol_desc = getattr(PointerSymbols, synset.ss_type)[symbol] except KeyError: log.warning( f"unknown pointer symbol {symbol} for {synset.ss_type} ", ) symbol_desc = symbol data_file = files[pointer.pos] data_file.seek(pointer.offset) referenced_synset = SynSet(data_file.readline()) if pointer.source == pointer.target == 0: pointers[symbol_desc] = [ w for w in referenced_synset.words if w not in words ] else: referenced_word = referenced_synset.words[pointer.target - 1] if referenced_word not in pointers[symbol_desc]: pointers[symbol_desc].append(referenced_word) pointers_str = "".join( [ f'
          {symbol_desc}: ' + ", ".join(a(w) for w in referenced_words) for symbol_desc, referenced_words in pointers.items() if referenced_words ], ) self.collector[word].append( f'{synSetTypes[synset.ss_type]}' f" {gloss_with_examples}{synonyms_str}{pointers_str}", ) sys.stdout.write("\n") sys.stdout.flush() def process(self) -> Iterator[tuple[str, str]]: article_template = self.article_template for title in self.collector: article_pieces = self.collector[title] article_pieces_count = len(article_pieces) text = None if article_pieces_count > 1: ol = ["
            "] + [f"
          1. {ap}
          2. " for ap in article_pieces] + ["
          "] text = article_template % (title, "".join(ol)) elif article_pieces_count == 1: text = article_template % (title, article_pieces[0]) if text: yield title, text class Reader: useByteProgress = False def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = "" self._wordCount = 0 self.wordnet: WordNet | None = None def __len__(self) -> int: return self._wordCount def open(self, filename: str) -> None: self.wordnet = WordNet(filename) log.info("Running wordnet.prepare()") self.wordnet.prepare() # TODO: metadata def close(self) -> None: self.wordnet = None def __iter__(self) -> Iterator[EntryType]: if self.wordnet is None: raise ValueError("self.wordnet is None") glos = self._glos for word, defi in self.wordnet.process(): yield glos.newEntry(word, defi) pyglossary-5.0.9/pyglossary/plugins/wordset/000077500000000000000000000000001476751035500213425ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/wordset/__init__.py000066400000000000000000000012331476751035500234520ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( EncodingOption, Option, ) from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "wordset" name = "Wordset" description = "Wordset.org JSON directory" extensions = () extensionCreate = "-wordset/" singleFile = False kind = "directory" wiki = "" website = ( "https://github.com/wordset/wordset-dictionary", "@wordset/wordset-dictionary", ) optionsProp: dict[str, Option] = { "encoding": EncodingOption(), } pyglossary-5.0.9/pyglossary/plugins/wordset/reader.py000066400000000000000000000045401476751035500231610ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from json import load from os import listdir from os.path import isfile, join, splitext from typing import TYPE_CHECKING from pyglossary.core import log if TYPE_CHECKING: from collections.abc import Iterator from typing import Any from pyglossary.glossary_types import EntryType, ReaderGlossaryType __all__ = ["Reader"] class Reader: useByteProgress = False _encoding: str = "utf-8" def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._clear() self.defiTemplate = ( "

          " '{speech_part}' "
          " "{def}" "
          " "{example}" "

          " ) """ { "id": "492099d426", "def": "without musical accompaniment", "example": "they performed a cappella", "speech_part": "adverb" }, """ def close(self) -> None: self._clear() def _clear(self) -> None: self._filename = "" def open(self, filename: str) -> None: self._filename = filename name = self._glos.getInfo("name") if not name or name == "data": self._glos.setInfo("name", "Wordset.org") self._glos.setDefaultDefiFormat("h") def __len__(self) -> int: return 0 @staticmethod def fileNameSortKey(fname: str) -> str: fname = splitext(fname)[0] if fname == "misc": return "\x80" return fname @staticmethod def sortKey(word: str) -> Any: return word.lower().encode("utf-8", errors="replace") def __iter__(self) -> Iterator[EntryType]: if not self._filename: raise RuntimeError("iterating over a reader while it's not open") direc = self._filename encoding = self._encoding glos = self._glos for fname in sorted(listdir(direc), key=self.fileNameSortKey): fpath = join(direc, fname) if not (fname.endswith(".json") and isfile(fpath)): continue with open(fpath, encoding=encoding) as fileObj: data: dict[str, dict[str, Any]] = load(fileObj) for word in sorted(data, key=self.sortKey): entryDict = data[word] defi = "".join( self.defiTemplate.format( **{ "word": word, "def": meaning.get("def", ""), "example": meaning.get("example", ""), "speech_part": meaning.get("speech_part", ""), }, ) for meaning in entryDict.get("meanings", []) ) yield glos.newEntry(word, defi, defiFormat="h") log.info(f"finished reading {fname}") pyglossary-5.0.9/pyglossary/plugins/wordset/tools.toml000066400000000000000000000000001476751035500233650ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/xdxf/000077500000000000000000000000001476751035500206245ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/xdxf/__init__.py000066400000000000000000000024351476751035500227410ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, Option, ) from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "xdxf" name = "Xdxf" description = "XDXF (.xdxf)" extensions = (".xdxf",) extensionCreate = ".xdxf" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/XDXF" website = ( "https://github.com/soshial/xdxf_makedict/tree/master/format_standard", "XDXF standard - @soshial/xdxf_makedict", ) optionsProp: dict[str, Option] = { "html": BoolOption(comment="Entries are HTML"), "xsl": BoolOption( comment="Use XSL transformation", ), } """ new format ... ... ... article 1 article 2 article 3 article 4 ... old format ... ... article 1 article 2 article 3 article 4 ... """ pyglossary-5.0.9/pyglossary/plugins/xdxf/reader.py000066400000000000000000000146641476751035500224530ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2023 Saeed Rasooli # Copyright © 2016 ivan tkachenko me@ratijas.tk # # some parts of this file include code from: # Aard Dictionary Tools . # Copyright © 2008-2009 Igor Tkach # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import re import typing from typing import TYPE_CHECKING, cast if TYPE_CHECKING: import io from collections.abc import Iterator, Sequence from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.lxml_types import Element from lxml import etree as ET from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.core import log from pyglossary.io_utils import nullBinaryIO from pyglossary.text_utils import toStr __all__ = ["Reader"] if TYPE_CHECKING: class TransformerType(typing.Protocol): def transform(self, article: Element) -> str: ... class Reader: useByteProgress = True compressions = stdCompressions depends = { "lxml": "lxml", } _html: bool = True _xsl: bool = False infoKeyMap = { "full_name": "name", "full_title": "name", } def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = "" self._file: io.IOBase = nullBinaryIO self._encoding = "utf-8" self._htmlTr: TransformerType | None = None self._re_span_k = re.compile( '[^<>]*(
          )?', ) def makeTransformer(self) -> None: if self._xsl: from pyglossary.xdxf.xsl_transform import XslXdxfTransformer self._htmlTr = XslXdxfTransformer(encoding=self._encoding) return from pyglossary.xdxf.transform import XdxfTransformer self._htmlTr = XdxfTransformer(encoding=self._encoding) def open(self, filename: str) -> None: # noqa: PLR0912 # self._filename = filename if self._html: self.makeTransformer() self._glos.setDefaultDefiFormat("h") else: self._glos.setDefaultDefiFormat("x") cfile = self._file = cast( "io.IOBase", compressionOpen( self._filename, mode="rb", ), ) context = ET.iterparse( # type: ignore cfile, events=("end",), ) for _, _elem in context: elem = cast("Element", _elem) if elem.tag in {"meta_info", "ar", "k", "abr", "dtrn"}: break # every other tag before or is considered info if elem.tag == "abbr_def": continue # in case of multiple or multiple tags, the last one # will be stored. # Very few formats support more than one language pair in their # metadata, so it's not very useful to have multiple if elem.tag == "from": for key, value in elem.attrib.items(): if key.endswith("}lang"): self._glos.sourceLangName = value.split("-")[0] break continue if elem.tag == "to": for key, value in elem.attrib.items(): if key.endswith("}lang"): self._glos.targetLangName = value.split("-")[0] break continue if not elem.text: if elem.tag != "br": log.warning(f"empty tag <{elem.tag}>") continue key = self.infoKeyMap.get(elem.tag, elem.tag) self._glos.setInfo(key, elem.text) del context if cfile.seekable(): cfile.seek(0, 2) self._fileSize = cfile.tell() cfile.seek(0) self._glos.setInfo("input_file_size", str(self._fileSize)) else: log.warning("XDXF Reader: file is not seekable") self._file.close() self._file = compressionOpen(self._filename, mode="rb") def __len__(self) -> int: return 0 def __iter__(self) -> Iterator[EntryType]: context = ET.iterparse( # type: ignore self._file, events=("end",), tag="ar", ) for _, _article in context: article = cast("Element", _article) article.tail = None words = [toStr(w) for w in self.titles(article)] if self._htmlTr: defi = self._htmlTr.transform(article) defiFormat = "h" if len(words) == 1: defi = self._re_span_k.sub("", defi) else: b_defi = cast("bytes", ET.tostring(article, encoding=self._encoding)) defi = b_defi[4:-5].decode(self._encoding).strip() defiFormat = "x" # log.info(f"{defi=}, {words=}") yield self._glos.newEntry( words, defi, defiFormat=defiFormat, byteProgress=(self._file.tell(), self._fileSize), ) # clean up preceding siblings to save memory # this can reduce memory usage from 1 GB to ~25 MB parent = article.getparent() if parent is None: continue while article.getprevious() is not None: del parent[0] def close(self) -> None: self._file.close() self._file = nullBinaryIO @staticmethod def tostring( elem: Element, ) -> str: return ( ET.tostring( elem, method="html", pretty_print=True, ) .decode("utf-8") .strip() ) def titles(self, article: Element) -> list[str]: """ :param article: tag :return: (title (str) | None, alternative titles (set)) """ from itertools import combinations titles: list[str] = [] for title_element in article.findall("k"): if title_element.text is None: # TODO: look for tag? log.warning(f"empty title element: {self.tostring(title_element)}") continue n_opts = len([c for c in title_element if c.tag == "opt"]) if n_opts: titles += [ self._mktitle(title_element, comb) for j in range(n_opts + 1) for comb in combinations(list(range(n_opts)), j) ] else: titles.append(self._mktitle(title_element)) return titles def _mktitle( # noqa: PLR6301 self, title_element: Element, include_opts: Sequence | None = None, ) -> str: if include_opts is None: include_opts = () title = title_element.text or "" opt_i = -1 for c in title_element: if c.tag == "nu" and c.tail: if title: title += c.tail else: title = c.tail if c.tag == "opt" and c.text is not None: opt_i += 1 if opt_i in include_opts: title += c.text if c.tail: title += c.tail return title.strip() pyglossary-5.0.9/pyglossary/plugins/xdxf/tools.toml000066400000000000000000000012561476751035500226650ustar00rootroot00000000000000["GoldenDict-NG by @xiaoyifang"] web = "https://xiaoyifang.github.io/goldendict-ng/" source = "https://github.com/xiaoyifang/goldendict-ng" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" plang = "C++" [GoldenDict] web = "http://goldendict.org/" source = "https://github.com/goldendict/goldendict" wiki = "https://github.com/goldendict/goldendict/wiki" platforms = [ "Linux", "Windows", "Mac",] license = "GPL" plang = "C++" [QTranslate] web = "https://qtranslate.en.lo4d.com/windows" platforms = [ "Windows",] license = "Freeware" plang = "C++" [Alpus] web = "https://alpusapp.com/" platforms = [ "Windows", "Mac", "Linux", "Android",] license = "Freeware" plang = "Java" pyglossary-5.0.9/pyglossary/plugins/xdxf_css/000077500000000000000000000000001476751035500214745ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/xdxf_css/__init__.py000066400000000000000000000024651476751035500236140ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.option import BoolOption from .reader import Reader if TYPE_CHECKING: from pyglossary.option import Option __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "xdxf_css" name = "XdxfCss" description = "XDXF with CSS and JS" extensions = () extensionCreate = ".xdxf" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/XDXF" website = ( "https://github.com/soshial/xdxf_makedict/tree/master/format_standard", "XDXF standard - @soshial/xdxf_makedict", ) optionsProp: dict[str, Option] = { "html": BoolOption(comment="Entries are HTML"), } """ new format ... ... ... article 1 article 2 article 3 article 4 ... old format ... ... article 1 article 2 article 3 article 4 ... """ pyglossary-5.0.9/pyglossary/plugins/xdxf_css/reader.py000066400000000000000000000171571476751035500233230ustar00rootroot00000000000000# -*- coding: utf-8 -*- # xdxf file format reader and utils to convert xdxf to html. # # Copyright © 2023 Saeed Rasooli # Copyright © 2016 ivan tkachenko me@ratijas.tk # # some parts of this file include code from: # Aard Dictionary Tools . # Copyright © 2008-2009 Igor Tkach # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import re import typing from os.path import join from typing import TYPE_CHECKING, cast if TYPE_CHECKING: import io from collections.abc import Iterator, Sequence from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.lxml_types import Element from lxml import etree as ET from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.core import log, rootDir from pyglossary.io_utils import nullBinaryIO from pyglossary.text_utils import toStr __all__ = ["Reader"] if TYPE_CHECKING: class TransformerType(typing.Protocol): def transform(self, article: Element) -> str: ... class Reader: useByteProgress = True compressions = stdCompressions depends = { "lxml": "lxml", } _html: bool = True infoKeyMap = { "full_name": "name", "full_title": "name", } def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = "" self._file: io.IOBase = nullBinaryIO self._encoding = "utf-8" self._htmlTr: TransformerType | None = None self._re_span_k = re.compile( '[^<>]*(
          )?', ) self._has_added_css: bool = False self._has_added_js: bool = False self._abbr_defs_js: bytes def makeTransformer(self) -> None: from pyglossary.xdxf.css_js_transform import XdxfTransformer self._htmlTr = XdxfTransformer(encoding=self._encoding) def open(self, filename: str) -> None: # noqa: PLR0912 # self._filename = filename self.makeTransformer() self._glos.setDefaultDefiFormat("h") cfile = self._file = cast( "io.IOBase", compressionOpen( self._filename, mode="rb", ), ) context = ET.iterparse( # type: ignore cfile, events=("end",), ) abbr_defs: list[Element] = [] for _, _elem in context: elem = cast("Element", _elem) if elem.tag in {"meta_info", "ar", "k", "abr", "dtrn"}: break # every other tag before or
          is considered info if elem.tag == "abbr_def": abbr_defs.append(elem) continue # in case of multiple or multiple tags, the last one # will be stored. # Very few formats support more than one language pair in their # metadata, so it's not very useful to have multiple if elem.tag == "from": for key, value in elem.attrib.items(): if key.endswith("}lang"): self._glos.sourceLangName = value.split("-")[0] break continue if elem.tag == "to": for key, value in elem.attrib.items(): if key.endswith("}lang"): self._glos.targetLangName = value.split("-")[0] break continue if not elem.text: if elem.tag != "br": log.warning(f"empty tag <{elem.tag}>") continue key = self.infoKeyMap.get(elem.tag, elem.tag) self._glos.setInfo(key, elem.text) self._abbr_defs_js = self.generate_abbr_js(abbr_defs) del context if cfile.seekable(): cfile.seek(0, 2) self._fileSize = cfile.tell() cfile.seek(0) self._glos.setInfo("input_file_size", str(self._fileSize)) else: log.warning("XDXF Reader: file is not seekable") self._file.close() self._file = compressionOpen(self._filename, mode="rb") def __len__(self) -> int: return 0 def __iter__(self) -> Iterator[EntryType]: context = ET.iterparse( # type: ignore self._file, events=("end",), tag="ar", ) if not self._has_added_css: self._has_added_css = True cssPath = join(rootDir, "pyglossary", "xdxf", "xdxf.css") with open(cssPath, "rb") as css_file: yield self._glos.newDataEntry("css/xdxf.css", css_file.read()) if self._abbr_defs_js is not None and not self._has_added_js: self._has_added_js = True yield self._glos.newDataEntry("js/xdxf.js", self._abbr_defs_js) for _, _article in context: article = cast("Element", _article) article.tail = None words = [toStr(w) for w in self.titles(article)] defi = self._htmlTr.transform(article) defiFormat = "h" if len(words) == 1: defi = self._re_span_k.sub("", defi) defi = f""" {defi} """ # log.info(f"{defi=}, {words=}") yield self._glos.newEntry( words, defi, defiFormat=defiFormat, byteProgress=(self._file.tell(), self._fileSize), ) # clean up preceding siblings to save memory # this can reduce memory usage from 1 GB to ~25 MB parent = article.getparent() if parent is None: continue while article.getprevious() is not None: del parent[0] def close(self) -> None: self._file.close() self._file = nullBinaryIO def generate_abbr_js(self, abbr_defs: list[Element]) -> bytes: abbr_map_js = """const abbr_map = new Map();\n""" for abbr_def in abbr_defs: abbr_k_list: list[str] = [] abbr_v_text = "" for child in abbr_def.xpath("child::node()"): if child.tag == "abbr_k": abbr_k_list.append(self._htmlTr.stringify_children(child)) if child.tag == "abbr_v": abbr_v_text = self._htmlTr.stringify_children(child) # TODO escape apostrophes for abbr_k in abbr_k_list: if abbr_k and abbr_v_text: abbr_map_js += f"abbr_map.set('{abbr_k}', '{abbr_v_text}');\n" with open(join(rootDir, "pyglossary", "xdxf", "xdxf.js"), "rb") as js_file: return abbr_map_js.encode(encoding="utf-8") + js_file.read() @staticmethod def tostring( elem: Element, ) -> str: return ( ET.tostring( elem, method="html", pretty_print=True, ) .decode("utf-8") .strip() ) def titles(self, article: Element) -> list[str]: """ :param article: tag :return: (title (str) | None, alternative titles (set)) """ from itertools import combinations titles: list[str] = [] for title_element in article.findall("k"): if title_element.text is None: # TODO: look for tag? log.warning(f"empty title element: {self.tostring(title_element)}") continue n_opts = len([c for c in title_element if c.tag == "opt"]) if n_opts: titles += [ self._mktitle(title_element, comb) for j in range(n_opts + 1) for comb in combinations(list(range(n_opts)), j) ] else: titles.append(self._mktitle(title_element)) return titles def _mktitle( # noqa: PLR6301 self, title_element: Element, include_opts: Sequence | None = None, ) -> str: if include_opts is None: include_opts = () title = title_element.text or "" opt_i = -1 for c in title_element: if c.tag == "nu" and c.tail: if title: title += c.tail else: title = c.tail if c.tag == "opt" and c.text is not None: opt_i += 1 if opt_i in include_opts: title += c.text if c.tail: title += c.tail return title.strip() pyglossary-5.0.9/pyglossary/plugins/xdxf_lax/000077500000000000000000000000001476751035500214705ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/xdxf_lax/__init__.py000066400000000000000000000014261476751035500236040ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import ( BoolOption, Option, ) from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "xdxf_lax" name = "XdxfLax" description = "XDXF Lax (.xdxf)" extensions = () extensionCreate = ".xdxf" singleFile = True kind = "text" wiki = "https://en.wikipedia.org/wiki/XDXF" website = ( "https://github.com/soshial/xdxf_makedict/tree/master/format_standard", "XDXF standard - @soshial/xdxf_makedict", ) optionsProp: dict[str, Option] = { "html": BoolOption(comment="Entries are HTML"), "xsl": BoolOption( comment="Use XSL transformation", ), } pyglossary-5.0.9/pyglossary/plugins/xdxf_lax/reader.py000066400000000000000000000141421476751035500233060ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Lax implementation of xdxf reader. # # Copyright © 2023 Saeed Rasooli # Copyright © 2016 ivan tkachenko me@ratijas.tk # # some parts of this file include code from: # Aard Dictionary Tools . # Copyright © 2008-2009 Igor Tkach # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import re import typing from typing import TYPE_CHECKING, cast if TYPE_CHECKING: import io from collections.abc import Iterator, Sequence from lxml.html import HtmlElement as Element from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.core import log from pyglossary.io_utils import nullBinaryIO from pyglossary.text_utils import toStr from pyglossary.xdxf.transform import XdxfTransformer from pyglossary.xdxf.xsl_transform import XslXdxfTransformer __all__ = ["Reader"] if TYPE_CHECKING: class TransformerType(typing.Protocol): def transform(self, article: Element) -> str: ... class Reader: useByteProgress = True compressions = stdCompressions depends = { "lxml": "lxml", } _html: bool = True _xsl: bool = False infoKeyMap = { "full_name": "name", "full_title": "name", } def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = "" self._file: io.IOBase = nullBinaryIO self._encoding = "utf-8" self._htmlTr: TransformerType | None = None self._re_span_k = re.compile( '[^<>]*(
          )?', ) def readUntil(self, untilByte: bytes) -> tuple[int, bytes]: file = self._file buf = b"" while True: tmp = file.read(100) if not tmp: break buf += tmp index = buf.find(untilByte) if index < 0: continue file.seek(file.tell() - len(buf) + index) return index, buf[:index] return -1, buf def _readOneMetadata(self, tag: str, infoKey: str) -> None: from lxml.etree import XML endTag = f"".encode("ascii") descStart, _ = self.readUntil(f"<{tag}>".encode("ascii")) if descStart < 0: log.warning(f"did not find {tag} open") return descEnd, desc = self.readUntil(endTag) if descEnd < 0: log.warning(f"did not find {tag} close") return desc += endTag elem = XML(desc) if elem.text: self._glos.setInfo(infoKey, elem.text) def readMetadata(self) -> None: file = self._file pos = file.tell() self._readOneMetadata("full_name", "title") file.seek(pos) self._readOneMetadata("description", "description") def open(self, filename: str) -> None: # self._filename = filename if self._html: if self._xsl: self._htmlTr = XslXdxfTransformer(encoding=self._encoding) else: self._htmlTr = XdxfTransformer(encoding=self._encoding) self._glos.setDefaultDefiFormat("h") else: self._glos.setDefaultDefiFormat("x") cfile = self._file = compressionOpen(self._filename, mode="rb") self.readMetadata() cfile.seek(0, 2) self._fileSize = cfile.tell() cfile.seek(0) self._glos.setInfo("input_file_size", str(self._fileSize)) def __len__(self) -> int: return 0 def __iter__(self) -> Iterator[EntryType]: from lxml.html import fromstring, tostring while True: start, _ = self.readUntil(b"") if end < 0: break b_article += b"
          " s_article = b_article.decode("utf-8") try: article = cast("Element", fromstring(s_article)) except Exception as e: log.exception(s_article) raise e from None words = [toStr(w) for w in self.titles(article)] if self._htmlTr: defi = self._htmlTr.transform(article) defiFormat = "h" if len(words) == 1: defi = self._re_span_k.sub("", defi) else: b_defi = cast("bytes", tostring(article, encoding=self._encoding)) defi = b_defi[4:-5].decode(self._encoding).strip() defiFormat = "x" # log.info(f"{defi=}, {words=}") yield self._glos.newEntry( words, defi, defiFormat=defiFormat, byteProgress=(self._file.tell(), self._fileSize), ) def close(self) -> None: if self._file: self._file.close() self._file = nullBinaryIO @staticmethod def tostring( elem: Element, ) -> str: from lxml.html import tostring return ( tostring( elem, method="html", pretty_print=True, ) .decode("utf-8") .strip() ) def titles(self, article: Element) -> list[str]: """ :param article: tag :return: (title (str) | None, alternative titles (set)) """ from itertools import combinations titles: list[str] = [] for title_element in article.findall("k"): if title_element.text is None: # TODO: look for tag? log.warning(f"empty title element: {self.tostring(title_element)}") continue n_opts = len([c for c in title_element if c.tag == "opt"]) if n_opts: titles += [ self._mktitle(title_element, comb) for j in range(n_opts + 1) for comb in combinations(list(range(n_opts)), j) ] else: titles.append(self._mktitle(title_element)) return titles def _mktitle( # noqa: PLR6301 self, title_element: Element, include_opts: Sequence | None = None, ) -> str: if include_opts is None: include_opts = () title = title_element.text or "" opt_i = -1 for c in title_element: if c.tag == "nu" and c.tail: if title: title += c.tail else: title = c.tail if c.tag == "opt" and c.text is not None: opt_i += 1 if opt_i in include_opts: title += c.text if c.tail: title += c.tail return title.strip() pyglossary-5.0.9/pyglossary/plugins/yomichan/000077500000000000000000000000001476751035500214625ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/yomichan/__init__.py000066400000000000000000000144221476751035500235760ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.flags import ALWAYS from pyglossary.option import ( BoolOption, IntOption, Option, StrOption, ) from .writer import Writer __all__ = [ "Writer", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "yomichan" name = "Yomichan" description = "Yomichan (.zip)" extensions = (".zip",) extensionCreate = ".zip" singleFile = True sortOnWrite = ALWAYS sortKeyName = "headword" kind = "package" wiki = "" website = ( "https://foosoft.net/projects/yomichan/", "foosoft.net", ) optionsProp: dict[str, Option] = { "term_bank_size": IntOption( comment="The number of terms in each term bank json file.", ), "term_from_headword_only": BoolOption( comment=( "If set to true, only create a term for the headword for each entry, " "as opposed to create one term for each alternate word. " "If the headword is ignored by the `ignore_word_with_pattern` option, " "the next word in the alternate list that is not ignored is used as " "headword." ), ), "no_term_from_reading": BoolOption( comment=( "When there are multiple alternate words, don't create term for the " "one that is the same as the the reading form, which is chosen to be " "the first alternate forms that consists solely of Hiragana and " "Katakana. " "For example, an entry could contain both 'だいがく' and '大学' as " "alternate words. Setting this option to true would prevent a term " "to be created for the former." ), ), "delete_word_pattern": StrOption( comment=( "When given, all non-overlapping matches of this regular expression " "are removed from word strings. " "For example, if an entry has word 'あま·い', setting the " "pattern to `·` removes all center dots, or more precisely use " "`·(?=[\\u3040-\\u309F])` to only remove center dots that precede " "Hiragana characters. Either way, the original word is replaced " "with 'あまい'." ), ), "ignore_word_with_pattern": StrOption( comment=( "When given, don't create terms for a word if any of its substrings " "matches this regular expression. " "For example, an entry could contain both 'だいがく【大学】' and '大学' " "as alternate words. Setting this option with value `r'【.+】'` would " "prevent a term to be created for the former." ), ), "alternates_from_word_pattern": StrOption( comment=( "When given, the regular expression is used to find additional " "alternate words for the same entry from matching substrings in " "the original words. " "If there are no capturing groups in the regular expression, " "then all matched substrings are added to the list of alternate " "words. " "If there are capturing groups, then substrings matching the groups " "are added to the alternate words list instead. " "For example, if an entry has 'だいがく【大学】' as a word, then " "`\\w+(?=【)` adds 'だいがく' as an additional word, while " "`(\\w+)【(\\w+)】` adds both 'だいがく' and '大学'." ), ), "alternates_from_defi_pattern": StrOption( comment=( "When given, the regular expression is used to find additional " "alternate words for the same entry from matching substrings in " "the definition. `^` and `$` can be used to match start and end of " "lines, respectively. " "If there are no capturing groups in the regular expression, " "then all matched substrings are added to the list of alternate " "words. " "If there are capturing groups, then substrings matching the groups " "are added to the alternate words list instead. " "For example, if an entry has 'だいがく【大学】' in its definition, then " "`\\w+【(\\w+)】` adds '大学' as an additional word." ), ), "rule_v1_defi_pattern": StrOption( comment=( "When given, if any substring of an entry's definition matches this " "regular expression, then the term(s) created from entry are labeled " "as ichidan verb. Yomichan uses this information to match conjugated " "forms of words. `^` and `$` can be used to match start and end of " "lines, respectively. " "For example, setting this option to `^\\(動[上下]一\\)$` identifies " "entries where there's a line of '(動上一)' or '(動下一)'." ), ), "rule_v5_defi_pattern": StrOption( comment=( "When given, if any substring of an entry's definition matches this " "regular expression, then the term(s) created from entry are labeled " "as godan verb. Yomichan uses this information to match conjugated " "forms of words. `^` and `$` can be used to match start and end of " "lines, respectively. " "For example, setting this option to `^\\(動五\\)$` identifies " "entries where there's a line of '(動五)'." ), ), "rule_vs_defi_pattern": StrOption( comment=( "When given, if any substring of an entry's definition matches this " "regular expression, then the term(s) created from entry are labeled " "as suru verb. Yomichan uses this information to match conjugated " "forms of words. `^` and `$` can be used to match start and end of " "lines, respectively. " "For example, setting this option to `^スル$` identifies entries where " "there's a line of 'スル'." ), ), "rule_vk_defi_pattern": StrOption( comment=( "When given, if any substring of an entry's definition matches this " "regular expression, then the term(s) created from entry are labeled " "as kuru verb. Yomichan uses this information to match conjugated " "forms of words. `^` and `$` can be used to match start and end of " "lines, respectively. " "For example, setting this option to `^\\(動カ変\\)$` identifies " "entries where there's a line of '(動カ変)'." ), ), "rule_adji_defi_pattern": StrOption( comment=( "When given, if any substring of an entry's definition matches this " "regular expression, then the term(s) created from entry are labeled " "as i-adjective. Yomichan uses this information to match conjugated " "forms of words. `^` and `$` can be used to match start and end of " "lines, respectively. " "For example, setting this option to `r'^\\(形\\)$'` identify " "entries where there's a line of '(形)'." ), ), } pyglossary-5.0.9/pyglossary/plugins/yomichan/tools.toml000066400000000000000000000003701476751035500235170ustar00rootroot00000000000000["Yomitan"] desc = "Pop-up dictionary browser extension. Successor to Yomichan." web = "https://yomitan.wiki/" source = "https://github.com/yomidevs/yomitan" platforms = [ "Chrome", "Firefox", "Edge", "Brave",] license = "GPL" plang = "JavaScript" pyglossary-5.0.9/pyglossary/plugins/yomichan/writer.py000066400000000000000000000150411476751035500233510ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import json import os import re from os.path import join from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from collections.abc import Generator, Sequence from pyglossary.glossary_types import EntryType, WriterGlossaryType __all__ = ["Writer"] def _isKana(char: str) -> bool: assert len(char) == 1 val = ord(char) return ( 0x3040 <= val <= 0x309F # Hiragana or 0x30A0 <= val <= 0x30FF # Katakana (incl. center dot) or 0xFF65 <= val <= 0xFF9F # Half-width Katakana (incl. center dot) ) def _isKanji(char: str) -> bool: assert len(char) == 1 val = ord(char) return ( 0x3400 <= val <= 0x4DBF # CJK Unified Ideographs Extension A or 0x4E00 <= val <= 0x9FFF # CJK Unified Ideographs or 0xF900 <= val <= 0xFAFF # CJK Compatibility Ideographs or 0x20000 <= val <= 0x2A6DF # CJK Unified Ideographs Extension B or 0x2A700 <= val <= 0x2B73F # CJK Unified Ideographs Extension C or 0x2B740 <= val <= 0x2B81F # CJK Unified Ideographs Extension D or 0x2F800 <= val <= 0x2FA1F # CJK Compatibility Ideographs Supplement ) def _uniqueList(lst: Sequence[str]) -> list[str]: seen: set[str] = set() result: list[str] = [] for elem in lst: if elem not in seen: seen.add(elem) result.append(elem) return result def _compilePat(pattern: str) -> re.Pattern | None: if not pattern: return None return re.compile(pattern) class Writer: depends = { "bs4": "beautifulsoup4", } _term_bank_size = 10_000 _term_from_headword_only = True _no_term_from_reading = True _delete_word_pattern = "" _ignore_word_with_pattern = "" _alternates_from_word_pattern = "" _alternates_from_defi_pattern = "" _rule_v1_defi_pattern = "" _rule_v5_defi_pattern = "" _rule_vs_defi_pattern = "" _rule_vk_defi_pattern = "" _rule_adji_defi_pattern = "" def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._filename = "" # Yomichan technically supports "structured content" that renders to # HTML, but it doesn't seem widely used. So here we also strip HTML # formatting for simplicity. glos.removeHtmlTagsAll() self.delete_word_pattern = _compilePat(self._delete_word_pattern) self.ignore_word_with_pattern = _compilePat(self._ignore_word_with_pattern) self.alternates_from_word_pattern = _compilePat( self._alternates_from_word_pattern ) self.alternates_from_defi_pattern = _compilePat( self._alternates_from_defi_pattern ) self.rules = [ (_compilePat(self._rule_v1_defi_pattern), "v1"), (_compilePat(self._rule_v5_defi_pattern), "v5"), (_compilePat(self._rule_vs_defi_pattern), "vs"), (_compilePat(self._rule_vk_defi_pattern), "vk"), (_compilePat(self._rule_adji_defi_pattern), "adj-i"), ] def _getInfo(self, key: str) -> str: info = self._glos.getInfo(key) return info.replace("\n", "
          ") def _getAuthor(self) -> str: return self._glos.author.replace("\n", "
          ") def _getDictionaryIndex(self) -> dict[str, Any]: # Schema: https://github.com/FooSoft/yomichan/ # blob/master/ext/data/schemas/dictionary-index-schema.json return { "title": self._getInfo("title"), "revision": "PyGlossary export", "sequenced": True, "format": 3, "author": self._getAuthor(), "url": self._getInfo("website"), "description": self._getInfo("description"), } def _getExpressionsAndReadingFromEntry( self, entry: EntryType, ) -> tuple[list[str], str]: term_expressions = entry.l_word alternates_from_word_pattern = self.alternates_from_word_pattern if alternates_from_word_pattern: for word in entry.l_word: term_expressions += alternates_from_word_pattern.findall(word) if self.alternates_from_defi_pattern: term_expressions += self.alternates_from_defi_pattern.findall( entry.defi, re.MULTILINE, ) delete_word_pattern = self.delete_word_pattern if delete_word_pattern: term_expressions = [ delete_word_pattern.sub("", expression) for expression in term_expressions ] ignore_word_with_pattern = self.ignore_word_with_pattern if ignore_word_with_pattern: term_expressions = [ expression for expression in term_expressions if not ignore_word_with_pattern.search(expression) ] term_expressions = _uniqueList(term_expressions) try: reading = next( expression for expression in entry.l_word + term_expressions if all(map(_isKana, expression)) ) except StopIteration: reading = "" if self._no_term_from_reading and len(term_expressions) > 1: term_expressions = [ expression for expression in term_expressions if expression != reading ] if self._term_from_headword_only: term_expressions = term_expressions[:1] return term_expressions, reading def _getRuleIdentifiersFromEntry(self, entry: EntryType) -> list[str]: return [ rule for pattern, rule in self.rules if pattern and pattern.search(entry.defi, re.MULTILINE) ] def _getTermsFromEntry( self, entry: EntryType, sequenceNumber: int, ) -> list[list[Any]]: termExpressions, reading = self._getExpressionsAndReadingFromEntry(entry) ruleIdentifiers = self._getRuleIdentifiersFromEntry(entry) # Schema: https://github.com/FooSoft/yomichan/ # blob/master/ext/data/schemas/dictionary-term-bank-v3-schema.json return [ [ expression, # reading only added if expression contains kanji reading if any(map(_isKanji, expression)) else "", "", # definition tags " ".join(ruleIdentifiers), 0, # score [entry.defi], sequenceNumber, "", # term tags ] for expression in termExpressions ] def open(self, filename: str) -> None: self._filename = filename self._glos.mergeEntriesWithSameHeadwordPlaintext() def finish(self) -> None: self._filename = "" def write(self) -> Generator[None, EntryType, None]: direc = self._filename os.makedirs(direc, exist_ok=True) with open(join(direc, "index.json"), "w", encoding="utf-8") as f: json.dump(self._getDictionaryIndex(), f, ensure_ascii=False) entryCount = 0 termBankIndex = 0 terms: list[list[Any]] = [] def flushTerms() -> None: nonlocal termBankIndex if not terms: return with open( join(direc, f"term_bank_{termBankIndex + 1}.json"), mode="w", encoding="utf-8", ) as _file: json.dump(terms, _file, ensure_ascii=False) terms.clear() termBankIndex += 1 while True: entry: EntryType entry = yield if entry is None: break if entry.isData(): continue terms.extend(self._getTermsFromEntry(entry, entryCount)) entryCount += 1 if len(terms) >= self._term_bank_size: flushTerms() flushTerms() pyglossary-5.0.9/pyglossary/plugins/zimfile/000077500000000000000000000000001476751035500213125ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/plugins/zimfile/__init__.py000066400000000000000000000022411476751035500234220ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from pyglossary.option import Option, UnicodeErrorsOption from .reader import Reader __all__ = [ "Reader", "description", "enable", "extensionCreate", "extensions", "kind", "lname", "name", "optionsProp", "singleFile", "website", "wiki", ] enable = True lname = "zim" name = "Zim" description = "Zim (.zim, for Kiwix)" extensions = (".zim",) extensionCreate = ".zim" singleFile = True kind = "binary" wiki = "https://en.wikipedia.org/wiki/ZIM_(file_format)" website = ( "https://wiki.openzim.org/wiki/OpenZIM", "OpenZIM", ) optionsProp: dict[str, Option] = { "text_unicode_errors": UnicodeErrorsOption( comment="Unicode Errors for plaintext, values: `strict`, `ignore`, `replace`", ), "html_unicode_errors": UnicodeErrorsOption( comment="Unicode Errors for HTML, values: `strict`, `ignore`, `replace`", ), } # https://wiki.kiwix.org/wiki/Software # to download zim files: # https://archive.org/details/zimarchive # https://dumps.wikimedia.org/other/kiwix/zim/ # I can't find any way to download zim files from https://library.kiwix.org/ # which wiki.openzim.org points at for downloaing zim files pyglossary-5.0.9/pyglossary/plugins/zimfile/reader.py000066400000000000000000000111131476751035500231230ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations import os from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Iterator from libzim.reader import Archive # type: ignore from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.core import cacheDir, exc_note, log, pip __all__ = ["Reader"] class Reader: _text_unicode_errors = "replace" _html_unicode_errors = "replace" useByteProgress = False depends = { "libzim": "libzim>=1.0", } resourceMimeTypes = { "image/png", "image/jpeg", "image/gif", "image/svg+xml", "image/webp", "image/x-icon", "text/css", "text/javascript", "application/javascript", "application/json", "application/octet-stream", "application/octet-stream+xapian", "application/x-chrome-extension", "application/warc-headers", "application/font-woff", } def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._filename = "" self._zimfile: Archive | None = None def open(self, filename: str) -> None: try: from libzim.reader import Archive except ModuleNotFoundError as e: exc_note(e, f"Run `{pip} install libzim` to install") raise self._filename = filename self._zimfile = Archive(filename) def close(self) -> None: self._filename = "" self._zimfile = None def __len__(self) -> int: if self._zimfile is None: log.error("len(reader) called before reader.open()") return 0 return self._zimfile.entry_count def __iter__(self) -> Iterator[EntryType | None]: # noqa: PLR0912 glos = self._glos zimfile = self._zimfile if zimfile is None: return emptyContentCount = 0 invalidMimeTypeCount = 0 undefinedMimeTypeCount = 0 entryCount = zimfile.entry_count redirectCount = 0 windows = os.sep == "\\" try: f_namemax = os.statvfs(cacheDir).f_namemax # type: ignore except AttributeError: log.warning("Unsupported operating system (no os.statvfs)") # Windows: CreateFileA has a limit of 260 characters. # CreateFileW supports names up to about 32760 characters (64kB). f_namemax = 200 fileNameTooLong: list[str] = [] text_unicode_errors = self._text_unicode_errors html_unicode_errors = self._html_unicode_errors for entryIndex in range(entryCount): zEntry = zimfile._get_entry_by_id(entryIndex) word = zEntry.title if zEntry.is_redirect: redirectCount += 1 targetWord = zEntry.get_redirect_entry().title yield glos.newEntry( word, f'Redirect: {targetWord}', defiFormat="h", ) continue zItem = zEntry.get_item() b_content = zItem.content.tobytes() if not b_content: emptyContentCount += 1 yield None # TODO: test with more zim files # Looks like: zItem.path == zEntry.path == "-" + word # print(f"b_content empty, {word=}, {zEntry.path=}, {zItem.path=}") # if zEntry.path == "-" + word: # yield None # else: # defi = f"Path: {zEntry.path}" # yield glos.newEntry(word, defi, defiFormat="m") continue try: mimetype = zItem.mimetype except RuntimeError: invalidMimeTypeCount += 1 mimetype = "" yield glos.newDataEntry(word, b_content) if mimetype == "undefined": undefinedMimeTypeCount += 1 continue mimetype = mimetype.split(";")[0] if mimetype.startswith("text/html"): # can be "text/html;raw=true" defi = b_content.decode("utf-8", errors=html_unicode_errors) defi = defi.replace(' src="../I/', ' src="./') yield glos.newEntry(word, defi, defiFormat="h") continue if mimetype == "text/plain": yield glos.newEntry( word, b_content.decode("utf-8", errors=text_unicode_errors), defiFormat="m", ) continue if mimetype not in self.resourceMimeTypes: log.warning(f"Unrecognized {mimetype=}") if len(word) > f_namemax: fileNameTooLong.append(word) continue if "|" in word: log.warning(f"resource title: {word}") if windows: continue try: entry = glos.newDataEntry(word, b_content) except Exception as e: log.error(f"error creating file: {e}") continue yield entry log.info(f"ZIM Entry Count: {entryCount}") if fileNameTooLong: log.warning(f"Files with name too long: {len(fileNameTooLong)}") if emptyContentCount > 0: log.info(f"Empty Content Count: {emptyContentCount}") if invalidMimeTypeCount > 0: log.info(f"Invalid MIME-Type Count: {invalidMimeTypeCount}") if undefinedMimeTypeCount > 0: log.info(f"MIME-Type 'undefined' Count: {invalidMimeTypeCount}") if redirectCount > 0: log.info(f"Redirect Count: {redirectCount}") pyglossary-5.0.9/pyglossary/plugins/zimfile/tools.toml000066400000000000000000000010201476751035500233400ustar00rootroot00000000000000["Kiwix Desktop"] web = "https://github.com/kiwix/kiwix-desktop" platforms = [ "Linux", "Windows",] license = "GPL" ["Kiwix JS"] web = "https://github.com/kiwix/kiwix-js" platforms = [ "Windows",] license = "GPL" ["Kiwix Serve"] web = "https://github.com/kiwix/kiwix-tools" platforms = [ "Linux", "Windows",] license = "GPL" ["Kiwix for Apple Mac OS X"] web = "macos.kiwix.org" platforms = [ "Mac",] license = "" ["Kiwix for Android"] web = "https://github.com/kiwix/kiwix-android" platforms = [ "Android",] license = "GPL" pyglossary-5.0.9/pyglossary/queued_iter.py000066400000000000000000000012711476751035500210600ustar00rootroot00000000000000from __future__ import annotations import queue import threading from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from collections.abc import Iterator class QueuedIterator: def __init__( self, iterator: Iterator, max_size: int, ) -> None: self.iterator = iterator self.queue = queue.Queue(max_size) self.thread = threading.Thread(target=self._background_job) self.thread.start() def _background_job(self) -> None: for item in self.iterator: self.queue.put(item) self.queue.put(StopIteration) def __iter__(self) -> Iterator: return self def __next__(self) -> Any: item = self.queue.get() if item is StopIteration: raise StopIteration return item pyglossary-5.0.9/pyglossary/repro_zipfile/000077500000000000000000000000001476751035500210435ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/repro_zipfile/LICENSE000066400000000000000000000101411476751035500220450ustar00rootroot00000000000000Unless otherwise indicated, this software is copyright of DrivenData and licensed under the MIT License. Some portions of this software are copied and modified from Python 3.11, which is copyright of the Python Software Foundation and licensed under the Python Software Foundation License Version 2. ============================================================================== MIT License Copyright (c) 2023 DrivenData Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ============================================================================== PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; All Rights Reserved 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python. 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. pyglossary-5.0.9/pyglossary/repro_zipfile/__init__.py000066400000000000000000000212271476751035500231600ustar00rootroot00000000000000from copy import copy import os import shutil import sys import time from typing import Tuple, Union from zipfile import ZIP_LZMA, ZipFile, ZipInfo try: from zipfile import _MASK_COMPRESS_OPTION_1 # type: ignore[attr-defined] except ImportError: _MASK_COMPRESS_OPTION_1 = 0x02 __version__ = "0.3.1" def date_time() -> Union[time.struct_time, Tuple[int, int, int, int, int, int]]: """Returns date_time value used to force overwrite on all ZipInfo objects. Defaults to 1980-01-01 00:00:00. You can set this with the environment variable SOURCE_DATE_EPOCH as an integer value representing seconds since Epoch. """ source_date_epoch = os.environ.get("SOURCE_DATE_EPOCH", None) if source_date_epoch is not None: return time.gmtime(int(source_date_epoch)) return (1980, 1, 1, 0, 0, 0) def file_mode() -> int: """Returns the file permissions mode value used to force overwrite on all ZipInfo objects. Defaults to 0o644 (rw-r--r--). You can set this with the environment variable REPRO_ZIPFILE_FILE_MODE. It should be in the Unix standard three-digit octal representation (e.g., '644'). """ file_mode_env = os.environ.get("REPRO_ZIPFILE_FILE_MODE", None) if file_mode_env is not None: return int(file_mode_env, 8) return 0o644 def dir_mode() -> int: """Returns the directory permissions mode value used to force overwrite on all ZipInfo objects. Defaults to 0o755 (rwxr-xr-x). You can set this with the environment variable REPRO_ZIPFILE_DIR_MODE. It should be in the Unix standard three-digit octal representation (e.g., '755'). """ dir_mode_env = os.environ.get("REPRO_ZIPFILE_DIR_MODE", None) if dir_mode_env is not None: return int(dir_mode_env, 8) return 0o755 class ReproducibleZipFile(ZipFile): """Open a ZIP file, where file can be a path to a file (a string), a file-like object or a path-like object. This is a replacement for the Python standard library zipfile.ZipFile that overwrites file-modified timestamps and file/directory permissions modes in write mode in order to create a reproducible ZIP archive. Other than overwriting these values, it works the same way as zipfile.ZipFile. For documentation on use, see the Python documentation for zipfile: https://docs.python.org/3/library/zipfile.html """ # Following method modified from Python 3.11 # https://github.com/python/cpython/blob/202efe1a3bcd499f3bf17bd953c6d36d47747e78/Lib/zipfile.py#L1763-L1794 # Copyright Python Software Foundation, licensed under PSF License Version 2 # See LICENSE file for full license agreement and notice of copyright def write(self, filename, arcname=None, compress_type=None, compresslevel=None): """Put the bytes from filename into the archive under the name arcname.""" if not self.fp: raise ValueError("Attempt to write to ZIP archive that was already closed") if self._writing: raise ValueError("Can't write to ZIP archive while an open writing handle exists") zinfo = ZipInfo.from_file(filename, arcname, strict_timestamps=self._strict_timestamps) ## repro-zipfile ADDED ## # Overwrite date_time and extrnal_attr (permissions mode) zinfo = copy(zinfo) zinfo.date_time = date_time() if zinfo.is_dir(): zinfo.external_attr = (0o40000 | dir_mode()) << 16 zinfo.external_attr |= 0x10 # MS-DOS directory flag else: zinfo.external_attr = file_mode() << 16 ######################### if zinfo.is_dir(): zinfo.compress_size = 0 zinfo.CRC = 0 self.mkdir(zinfo) else: if compress_type is not None: zinfo.compress_type = compress_type else: zinfo.compress_type = self.compression if compresslevel is not None: zinfo._compresslevel = compresslevel else: zinfo._compresslevel = self.compresslevel with open(filename, "rb") as src, self.open(zinfo, "w") as dest: shutil.copyfileobj(src, dest, 1024 * 8) # Following method modified from Python 3.11 # https://github.com/python/cpython/blob/202efe1a3bcd499f3bf17bd953c6d36d47747e78/Lib/zipfile.py#L1796-L1835 # Copyright Python Software Foundation, licensed under PSF License Version 2 # See LICENSE file for full license agreement and notice of copyright def writestr(self, zinfo_or_arcname, data, compress_type=None, compresslevel=None): """Write a file into the archive. The contents is 'data', which may be either a 'str' or a 'bytes' instance; if it is a 'str', it is encoded as UTF-8 first. 'zinfo_or_arcname' is either a ZipInfo instance or the name of the file in the archive.""" if isinstance(data, str): data = data.encode("utf-8") if not isinstance(zinfo_or_arcname, ZipInfo): zinfo = ZipInfo(filename=zinfo_or_arcname, date_time=time.localtime(time.time())[:6]) zinfo.compress_type = self.compression zinfo._compresslevel = self.compresslevel if zinfo.filename.endswith("/"): zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x zinfo.external_attr |= 0x10 # MS-DOS directory flag else: zinfo.external_attr = 0o600 << 16 # ?rw------- else: zinfo = zinfo_or_arcname ## repro-zipfile ADDED ## # Overwrite date_time and extrnal_attr (permissions mode) zinfo = copy(zinfo) zinfo.date_time = date_time() if zinfo.is_dir(): zinfo.external_attr = (0o40000 | dir_mode()) << 16 zinfo.external_attr |= 0x10 # MS-DOS directory flag else: zinfo.external_attr = file_mode() << 16 ######################### if not self.fp: raise ValueError("Attempt to write to ZIP archive that was already closed") if self._writing: raise ValueError("Can't write to ZIP archive while an open writing handle exists.") if compress_type is not None: zinfo.compress_type = compress_type if compresslevel is not None: zinfo._compresslevel = compresslevel zinfo.file_size = len(data) # Uncompressed size with self._lock: with self.open(zinfo, mode="w") as dest: dest.write(data) if sys.version_info < (3, 11): # Following method modified from Python 3.11 # https://github.com/python/cpython/blob/202efe1a3bcd499f3bf17bd953c6d36d47747e78/Lib/zipfile.py#L1837-L1870 # Copyright Python Software Foundation, licensed under PSF License Version 2 # See LICENSE file for full license agreement and notice of copyright def mkdir(self, zinfo_or_directory_name, mode=511): """Creates a directory inside the zip archive.""" if isinstance(zinfo_or_directory_name, ZipInfo): zinfo = zinfo_or_directory_name if not zinfo.is_dir(): raise ValueError("The given ZipInfo does not describe a directory") elif isinstance(zinfo_or_directory_name, str): directory_name = zinfo_or_directory_name if not directory_name.endswith("/"): directory_name += "/" zinfo = ZipInfo(directory_name) zinfo.compress_size = 0 zinfo.CRC = 0 zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16 zinfo.file_size = 0 zinfo.external_attr |= 0x10 else: raise TypeError("Expected type str or ZipInfo") ## repro-zipfile ADDED ## # Overwrite date_time and extrnal_attr (permissions mode) zinfo = copy(zinfo) zinfo.date_time = date_time() zinfo.external_attr = (0o40000 | dir_mode()) << 16 zinfo.external_attr |= 0x10 # MS-DOS directory flag ######################### with self._lock: if self._seekable: self.fp.seek(self.start_dir) zinfo.header_offset = self.fp.tell() # Start of header bytes if zinfo.compress_type == ZIP_LZMA: # Compressed data includes an end-of-stream (EOS) marker zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 self._writecheck(zinfo) self._didModify = True self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo self.fp.write(zinfo.FileHeader(False)) self.start_dir = self.fp.tell() pyglossary-5.0.9/pyglossary/reverse.py000066400000000000000000000122421476751035500202200ustar00rootroot00000000000000 from __future__ import annotations import logging import re import typing from collections.abc import Iterable, Iterator from operator import itemgetter from typing import TYPE_CHECKING if TYPE_CHECKING: from .glossary_types import EntryType __all__ = ["reverseGlossary"] log = logging.getLogger("pyglossary") if TYPE_CHECKING: class _GlossaryType(typing.Protocol): def __iter__(self) -> Iterator[EntryType]: ... def getInfo(self, key: str) -> str: ... def progressInit( self, *args, # noqa: ANN002 ) -> None: ... def progress(self, pos: int, total: int, unit: str = "entries") -> None: ... def progressEnd(self) -> None: ... @property def progressbar(self) -> bool: ... @progressbar.setter def progressbar(self, enabled: bool) -> None: ... def reverseGlossary( glos: _GlossaryType, savePath: str = "", words: list[str] | None = None, includeDefs: bool = False, reportStep: int = 300, saveStep: int = 1000, # set this to zero to disable auto saving **kwargs, ) -> Iterator[int]: """ This is a generator Usage: for wordIndex in reverseGlossary(glos, ...): pass Inside the `for` loop, you can pause by waiting (for input or a flag) or stop by breaking Potential keyword arguments: words = None ## None, or list reportStep = 300 saveStep = 1000 savePath = "" matchWord = True sepChars = ".,،" maxNum = 100 minRel = 0.0 minWordLen = 3 includeDefs = False showRel = "None" allowed values: None, "Percent", "Percent At First" """ if not savePath: savePath = glos.getInfo("name") + ".txt" if saveStep < 2: raise ValueError("saveStep must be more than 1") entries: list[EntryType] = list(glos) log.info(f"loaded {len(entries)} entries into memory") if words: words = list(words) else: words = takeOutputWords(glos, entries) wordCount = len(words) log.info( f"Reversing to file {savePath!r}" f", number of words: {wordCount}", ) glos.progressInit("Reversing") wcThreshold = wordCount // 200 + 1 with open(savePath, "w", encoding="utf-8") as saveFile: for wordI in range(wordCount): word = words[wordI] if wordI % wcThreshold == 0: glos.progress(wordI, wordCount) if wordI % saveStep == 0 and wordI > 0: saveFile.flush() result = searchWordInDef( entries, word, includeDefs=includeDefs, **kwargs, ) if result: try: if includeDefs: defi = "\\n\\n".join(result) else: defi = ", ".join(result) + "." except Exception: log.exception("") log.debug(f"{result = }") return saveFile.write(f"{word}\t{defi}\n") yield wordI glos.progressEnd() yield wordCount def takeOutputWords( glos: _GlossaryType, entryIter: Iterable[EntryType], minWordLen: int = 3, ) -> list[str]: # fr"[\w]{{{minWordLen},}}" wordPattern = re.compile(r"[\w]{%d,}" % minWordLen, re.UNICODE) words = set() progressbar, glos.progressbar = glos.progressbar, False for entry in entryIter: words.update(wordPattern.findall( entry.defi, )) glos.progressbar = progressbar return sorted(words) def searchWordInDef( entryIter: Iterable[EntryType], st: str, matchWord: bool = True, sepChars: str = ".,،", maxNum: int = 100, minRel: float = 0.0, minWordLen: int = 3, includeDefs: bool = False, showRel: str = "Percent", # "Percent" | "Percent At First" | "" ) -> list[str]: # searches word "st" in definitions of the glossary splitPattern = re.compile( "|".join(re.escape(x) for x in sepChars), re.UNICODE, ) wordPattern = re.compile(r"[\w]{%d,}" % minWordLen, re.UNICODE) outRel: list[tuple[str, float] | tuple[str, float, str]] = [] for entry in entryIter: words = entry.l_word defi = entry.defi if st not in defi: continue for word in words: rel = 0.0 # relation value of word (0 <= rel <= 1) for part in splitPattern.split(defi): if not part: continue if matchWord: partWords = wordPattern.findall( part, ) if not partWords: continue rel = max( rel, partWords.count(st) / len(partWords), ) else: rel = max( rel, part.count(st) * len(st) / len(part), ) if rel <= minRel: continue if includeDefs: outRel.append((word, rel, defi)) else: outRel.append((word, rel)) outRel.sort( key=itemgetter(1), reverse=True, ) n = len(outRel) if n > maxNum > 0: outRel = outRel[:maxNum] n = maxNum num = 0 out = [] if includeDefs: for j in range(n): numP = num w, num, m = outRel[j] # type: ignore m = m.replace("\n", "\\n").replace("\t", "\\t") onePer = int(1.0 / num) if onePer == 1.0: out.append(f"{w}\\n{m}") elif showRel == "Percent": out.append(f"{w}(%{100*num})\\n{m}") elif showRel == "Percent At First": if num == numP: out.append(f"{w}\\n{m}") else: out.append(f"{w}(%{100*num})\\n{m}") else: out.append(f"{w}\\n{m}") return out for j in range(n): numP = num w, num = outRel[j] # type: ignore onePer = int(1.0 / num) if onePer == 1.0: out.append(w) elif showRel == "Percent": out.append(f"{w}(%{100*num})") elif showRel == "Percent At First": if num == numP: out.append(w) else: out.append(f"{w}(%{100*num})") else: out.append(w) return out pyglossary-5.0.9/pyglossary/sdsqlite.py000066400000000000000000000057421476751035500204040ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import annotations from os.path import isfile from typing import TYPE_CHECKING from .core import log from .glossary_utils import Error if TYPE_CHECKING: import sqlite3 from collections.abc import Generator, Iterator from .glossary_types import EntryType, ReaderGlossaryType, WriterGlossaryType from .text_utils import ( joinByBar, splitByBar, ) class Writer: def __init__(self, glos: WriterGlossaryType) -> None: self._glos = glos self._clear() def _clear(self) -> None: self._filename = "" self._con: sqlite3.Connection | None self._cur: sqlite3.Cursor | None def open(self, filename: str) -> None: import sqlite3 if isfile(filename): raise OSError(f"file {filename!r} already exists") self._filename = filename self._con = sqlite3.connect(filename) self._cur = self._con.cursor() self._con.execute( "CREATE TABLE dict (" "word TEXT," "wordlower TEXT," "alts TEXT," "defi TEXT," "defiFormat CHAR(1)," "bindata BLOB)", ) self._con.execute( "CREATE INDEX dict_sortkey ON dict(wordlower, word);", ) def write(self) -> Generator[None, EntryType, None]: con = self._con cur = self._cur if not (con and cur): log.error(f"write: {con=}, {cur=}") return count = 0 while True: entry = yield if entry is None: break word = entry.l_word[0] alts = joinByBar(entry.l_word[1:]) defi = entry.defi defiFormat = entry.defiFormat bindata = None if entry.isData(): bindata = entry.data cur.execute( "insert into dict(" "word, wordlower, alts, " "defi, defiFormat, bindata)" " values (?, ?, ?, ?, ?, ?)", ( word, word.lower(), alts, defi, defiFormat, bindata, ), ) count += 1 if count % 1000 == 0: con.commit() con.commit() def finish(self) -> None: if self._cur: self._cur.close() if self._con: self._con.close() self._clear() class Reader: def __init__(self, glos: ReaderGlossaryType) -> None: self._glos = glos self._clear() def _clear(self) -> None: self._filename = "" self._con: sqlite3.Connection | None self._cur: sqlite3.Cursor | None def open(self, filename: str) -> None: from sqlite3 import connect self._filename = filename self._con = connect(filename) self._cur = self._con.cursor() # self._glos.setDefaultDefiFormat("m") def __len__(self) -> int: if self._cur is None: return 0 self._cur.execute("select count(*) from dict") return self._cur.fetchone()[0] def __iter__(self) -> Iterator[EntryType]: if self._cur is None: raise Error("SQLite cursor is closed") self._cur.execute( "select word, alts, defi, defiFormat from dict order by wordlower, word", ) for row in self._cur: words = [row[0]] + splitByBar(row[1]) defi = row[2] defiFormat = row[3] yield self._glos.newEntry(words, defi, defiFormat=defiFormat) def close(self) -> None: if self._cur: self._cur.close() if self._con: self._con.close() self._clear() pyglossary-5.0.9/pyglossary/slob.py000066400000000000000000001063321476751035500175100ustar00rootroot00000000000000# slob.py # Copyright (C) 2020-2023 Saeed Rasooli # Copyright (C) 2019 Igor Tkach # as part of https://github.com/itkach/slob # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import encodings import io import operator import os import pickle import sys import tempfile import typing import warnings from abc import abstractmethod from bisect import bisect_left from builtins import open as fopen from datetime import datetime, timezone from functools import cache, lru_cache from io import BufferedIOBase, IOBase from os.path import isdir from struct import calcsize, pack, unpack from threading import RLock from types import MappingProxyType, TracebackType from typing import ( TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar, cast, ) from uuid import UUID, uuid4 import icu # type: ignore from icu import Collator, Locale, UCollAttribute, UCollAttributeValue if TYPE_CHECKING: from collections.abc import Callable, Iterator, Mapping, Sequence from .icu_types import T_Collator __all__ = [ "MAX_TEXT_LEN", "MAX_TINY_TEXT_LEN", "MIME_HTML", "MIME_TEXT", "MultiFileReader", "UnknownEncoding", "Writer", "encodings", "fopen", "open", "read_byte_string", "read_header", "sortkey", ] DEFAULT_COMPRESSION = "lzma2" UTF8 = "utf-8" MAGIC = b"!-1SLOB\x1f" class Compression(NamedTuple): compress: Callable[..., bytes] # first arg: bytes decompress: Callable[[bytes], bytes] class Ref(NamedTuple): key: str bin_index: int item_index: int fragment: str class Header(NamedTuple): magic: bytes uuid: UUID encoding: str compression: str tags: MappingProxyType[str, str] content_types: Sequence[str] blob_count: int store_offset: int refs_offset: int size: int U_CHAR = ">B" U_CHAR_SIZE = calcsize(U_CHAR) U_SHORT = ">H" U_SHORT_SIZE = calcsize(U_SHORT) U_INT = ">I" U_INT_SIZE = calcsize(U_INT) U_LONG_LONG = ">Q" U_LONG_LONG_SIZE = calcsize(U_LONG_LONG) def calcmax(len_size_spec: str) -> int: return 2 ** (calcsize(len_size_spec) * 8) - 1 MAX_TEXT_LEN = calcmax(U_SHORT) MAX_TINY_TEXT_LEN = calcmax(U_CHAR) MAX_LARGE_BYTE_STRING_LEN = calcmax(U_INT) MAX_BIN_ITEM_COUNT = calcmax(U_SHORT) PRIMARY: int = Collator.PRIMARY SECONDARY: int = Collator.SECONDARY TERTIARY: int = Collator.TERTIARY QUATERNARY: int = Collator.QUATERNARY IDENTICAL: int = Collator.IDENTICAL class CompressionModule(typing.Protocol): # gzip.compress(data, compresslevel=9, *, mtime=None) # bz2.compress(data, compresslevel=9) # zlib.compress(data, /, level=-1, wbits=15) # lzma.compress(data, format=1, check=-1, preset=None, filters=None) @staticmethod def compress(data: bytes, compresslevel: int = 9) -> bytes: raise NotImplementedError # gzip.decompress(data) # bz2.decompress(data) # zlib.decompress(data, /, wbits=15, bufsize=16384) # lzma.decompress(data, format=0, memlimit=None, filters=None) @staticmethod def decompress( data: bytes, **kwargs: Mapping[str, Any], ) -> bytes: raise NotImplementedError def init_compressions() -> dict[str, Compression]: def ident(x: bytes) -> bytes: return x compressions: dict[str, Compression] = { "": Compression(ident, ident), } for name in ("bz2", "zlib"): m: CompressionModule try: m = cast("CompressionModule", __import__(name)) except ImportError: warnings.showwarning( message=f"{name} is not available", category=ImportWarning, filename=__file__, lineno=0, ) continue def compress_new(x: bytes, m: CompressionModule = m) -> bytes: return m.compress(x, 9) compressions[name] = Compression(compress_new, m.decompress) try: import lzma except ImportError: warnings.warn("lzma is not available", stacklevel=1) else: filters = [{"id": lzma.FILTER_LZMA2}] compressions["lzma2"] = Compression( lambda s: lzma.compress( s, format=lzma.FORMAT_RAW, filters=filters, ), lambda s: lzma.decompress( s, format=lzma.FORMAT_RAW, filters=filters, ), ) return compressions COMPRESSIONS = init_compressions() del init_compressions MIME_TEXT = "text/plain" MIME_HTML = "text/html" class FileFormatException(Exception): pass class UnknownFileFormat(FileFormatException): pass class UnknownCompression(FileFormatException): pass class UnknownEncoding(FileFormatException): pass class IncorrectFileSize(FileFormatException): pass @cache def sortkey( strength: int, maxlength: int | None = None, ) -> Callable: # pass empty locale to use root locale # if you pass no arg, it will use system locale c: T_Collator = Collator.createInstance(Locale("")) c.setStrength(strength) c.setAttribute( UCollAttribute.ALTERNATE_HANDLING, UCollAttributeValue.SHIFTED, ) if maxlength is None: return c.getSortKey return lambda x: c.getSortKey(x)[:maxlength] class MultiFileReader(BufferedIOBase): def __init__( self, *args: str, ) -> None: filenames: list[str] = list(args) files = [] ranges = [] offset = 0 for name in filenames: size = os.stat(name).st_size ranges.append(range(offset, offset + size)) files.append(fopen(name, "rb")) offset += size self.size = offset self._ranges = ranges self._files = files self._fcount = len(self._files) self._offset = -1 self.seek(0) def __enter__(self) -> MultiFileReader: return self def __exit__( self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: self.close() def close(self) -> None: for f in self._files: f.close() self._files.clear() self._ranges.clear() @property def closed(self) -> bool: return len(self._ranges) == 0 def isatty(self) -> bool: # noqa: PLR6301 return False def readable(self) -> bool: # noqa: PLR6301 return True def seek( self, offset: int, whence: int = io.SEEK_SET, ) -> int: if whence == io.SEEK_SET: self._offset = offset elif whence == io.SEEK_CUR: self._offset += offset elif whence == io.SEEK_END: self._offset = self.size + offset else: raise ValueError(f"Invalid value for parameter whence: {whence!r}") return self._offset def seekable(self) -> bool: # noqa: PLR6301 return True def tell(self) -> int: return self._offset def writable(self) -> bool: # noqa: PLR6301 return False def read(self, n: int | None = -1) -> bytes: file_index = -1 actual_offset = 0 for i, r in enumerate(self._ranges): if self._offset in r: file_index = i actual_offset = self._offset - r.start break result = b"" to_read = self.size if n == -1 or n is None else n while -1 < file_index < self._fcount: f = self._files[file_index] f.seek(actual_offset) read = f.read(to_read) read_count = len(read) self._offset += read_count result += read to_read -= read_count if to_read > 0: file_index += 1 actual_offset = 0 else: break return result class KeydItemDict: def __init__( self, blobs: Sequence[Blob | Ref], strength: int, maxlength: int | None = None, ) -> None: self.blobs = blobs self.sortkey = sortkey(strength, maxlength=maxlength) def __len__(self) -> int: return len(self.blobs) # https://docs.python.org/3/library/bisect.html # key= parameter to bisect_left is added in Python 3.10 def __getitem__(self, key: str) -> Iterator[Blob | Ref]: blobs = self.blobs key_as_sk = self.sortkey(key) i = bisect_left( blobs, key_as_sk, key=lambda blob: self.sortkey(blob.key), ) if i == len(blobs): return while i < len(blobs): if self.sortkey(blobs[i].key) == key_as_sk: yield blobs[i] else: break i += 1 def __contains__(self, key: str) -> bool: try: next(self[key]) except StopIteration: return False return True class Blob: def __init__( # noqa: PLR0913 self, content_id: int, key: str, fragment: str, read_content_type_func: Callable[[], str], read_func: Callable, ) -> None: # print(f"read_func is {type(read_func)}") # read_func is self._content_id = content_id self._key = key self._fragment = fragment self._read_content_type = read_content_type_func self._read = read_func @property def identity(self) -> int: return self._content_id @property def key(self) -> str: return self._key @property def fragment(self) -> str: return self._fragment @property def content_type(self) -> str: return self._read_content_type() @property def content(self) -> bytes: return self._read() def __str__(self) -> str: return self.key def __repr__(self) -> str: return f"<{self.__class__.__module__}.{self.__class__.__name__} {self.key}>" def read_byte_string(f: IOBase, len_spec: str) -> bytes: length = unpack(len_spec, f.read(calcsize(len_spec)))[0] return f.read(length) class StructReader: def __init__( self, file: IOBase, encoding: str | None = None, ) -> None: self._file = file self.encoding = encoding def read_int(self) -> int: s = self.read(U_INT_SIZE) return unpack(U_INT, s)[0] def read_long(self) -> int: b = self.read(U_LONG_LONG_SIZE) return unpack(U_LONG_LONG, b)[0] def read_byte(self) -> int: s = self.read(U_CHAR_SIZE) return unpack(U_CHAR, s)[0] def read_short(self) -> int: return unpack(U_SHORT, self._file.read(U_SHORT_SIZE))[0] def _read_text(self, len_spec: str) -> str: if self.encoding is None: raise ValueError("self.encoding is None") max_len = 2 ** (8 * calcsize(len_spec)) - 1 byte_string = read_byte_string(self._file, len_spec) if len(byte_string) == max_len: terminator = byte_string.find(0) if terminator > -1: byte_string = byte_string[:terminator] return byte_string.decode(self.encoding) def read_tiny_text(self) -> str: return self._read_text(U_CHAR) def read_text(self) -> str: return self._read_text(U_SHORT) def read(self, n: int) -> bytes: return self._file.read(n) def write(self, data: bytes) -> int: return self._file.write(data) def seek(self, pos: int) -> None: self._file.seek(pos) def tell(self) -> int: return self._file.tell() def close(self) -> None: self._file.close() def flush(self) -> None: self._file.flush() class StructWriter: def __init__( self, file: io.BufferedWriter, encoding: str | None = None, ) -> None: self._file = file self.encoding = encoding def write_int(self, value: int) -> None: self._file.write(pack(U_INT, value)) def write_long(self, value: int) -> None: self._file.write(pack(U_LONG_LONG, value)) def write_byte(self, value: int) -> None: self._file.write(pack(U_CHAR, value)) def write_short(self, value: int) -> None: self._file.write(pack(U_SHORT, value)) def _write_text( self, text: str, len_size_spec: str, encoding: str | None = None, pad_to_length: int | None = None, ) -> None: if encoding is None: encoding = self.encoding if encoding is None: raise ValueError("encoding is None") text_bytes = text.encode(encoding) length = len(text_bytes) max_length = calcmax(len_size_spec) if length > max_length: raise ValueError(f"Text is too long for size spec {len_size_spec}") self._file.write( pack( len_size_spec, pad_to_length or length, ), ) self._file.write(text_bytes) if pad_to_length: for _ in range(pad_to_length - length): self._file.write(pack(U_CHAR, 0)) def write_tiny_text( self, text: str, encoding: str | None = None, editable: bool = False, ) -> None: pad_to_length = 255 if editable else None self._write_text( text, U_CHAR, encoding=encoding, pad_to_length=pad_to_length, ) def write_text( self, text: str, encoding: str | None = None, ) -> None: self._write_text(text, U_SHORT, encoding=encoding) def close(self) -> None: self._file.close() def flush(self) -> None: self._file.flush() @property def name(self) -> str: return self._file.name def tell(self) -> int: return self._file.tell() def write(self, data: bytes) -> int: return self._file.write(data) def read_header(file: MultiFileReader) -> Header: file.seek(0) magic = file.read(len(MAGIC)) if magic != MAGIC: raise UnknownFileFormat(f"magic {magic!r} != {MAGIC!r}") uuid = UUID(bytes=file.read(16)) encoding = read_byte_string(file, U_CHAR).decode(UTF8) if encodings.search_function(encoding) is None: raise UnknownEncoding(encoding) reader = StructReader(file, encoding) compression = reader.read_tiny_text() if compression not in COMPRESSIONS: raise UnknownCompression(compression) def read_tags() -> dict[str, str]: count = reader.read_byte() return {reader.read_tiny_text(): reader.read_tiny_text() for _ in range(count)} tags = read_tags() def read_content_types() -> Sequence[str]: content_types: list[str] = [] count = reader.read_byte() for _ in range(count): content_type = reader.read_text() content_types.append(content_type) return tuple(content_types) content_types = read_content_types() blob_count = reader.read_int() store_offset = reader.read_long() size = reader.read_long() refs_offset = reader.tell() return Header( magic=magic, uuid=uuid, encoding=encoding, compression=compression, tags=MappingProxyType(tags), content_types=content_types, blob_count=blob_count, store_offset=store_offset, refs_offset=refs_offset, size=size, ) def meld_ints(a: int, b: int) -> int: return (a << 16) | b def unmeld_ints(c: int) -> tuple[int, int]: bstr = bin(c).lstrip("0b").zfill(48) a, b = bstr[-48:-16], bstr[-16:] return int(a, 2), int(b, 2) class Slob: def __init__( self, *filenames: str, ) -> None: self._f = MultiFileReader(*filenames) try: self._header = read_header(self._f) if self._f.size != self._header.size: raise IncorrectFileSize( f"File size should be {self._header.size}, " f"{self._f.size} bytes found", ) except FileFormatException: self._f.close() raise self._refs = RefList( self._f, self._header.encoding, offset=self._header.refs_offset, ) self._g = MultiFileReader(*filenames) self._store = Store( self._g, self._header.store_offset, COMPRESSIONS[self._header.compression].decompress, self._header.content_types, ) def __enter__(self) -> Slob: return self def __exit__( self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: self.close() @property def identity(self) -> str: return self._header.uuid.hex @property def content_types(self) -> Sequence[str]: return self._header.content_types @property def tags(self) -> MappingProxyType[str, str]: return self._header.tags @property def blob_count(self) -> int: return self._header.blob_count @property def compression(self) -> str: return self._header.compression @property def encoding(self) -> str: return self._header.encoding def __len__(self) -> int: return len(self._refs) def __getitem__(self, i: int) -> Any: # this is called by bisect_left return self.getBlobByIndex(i) def __iter__(self) -> Iterator[Blob]: for i in range(len(self._refs)): yield self.getBlobByIndex(i) def count(self) -> int: # just to comply with Sequence and make type checker happy raise NotImplementedError def index(self, x: Blob) -> int: # just to comply with Sequence and make type checker happy raise NotImplementedError def getBlobByIndex(self, i: int) -> Blob: ref = self._refs[i] def read_func() -> bytes: return self._store.get(ref.bin_index, ref.item_index)[1] read_func = lru_cache(maxsize=None)(read_func) def read_content_type_func() -> str: return self._store.content_type(ref.bin_index, ref.item_index) content_id = meld_ints(ref.bin_index, ref.item_index) return Blob( content_id=content_id, key=ref.key, fragment=ref.fragment, read_content_type_func=read_content_type_func, read_func=read_func, ) def get(self, blob_id: int) -> tuple[str, bytes]: """Returns (content_type: str, content: bytes).""" bin_index, bin_item_index = unmeld_ints(blob_id) return self._store.get(bin_index, bin_item_index) @cache def as_dict( self: Slob, strength: int = TERTIARY, maxlength: int | None = None, ) -> KeydItemDict: return KeydItemDict( cast("Sequence", self), strength=strength, maxlength=maxlength, ) def close(self) -> None: self._f.close() self._g.close() def open(*filenames: str) -> Slob: # noqa: A001 return Slob(*filenames) class BinMemWriter: def __init__(self) -> None: self.content_type_ids: list[int] = [] self.item_dir: list[bytes] = [] self.items: list[bytes] = [] self.current_offset = 0 def add(self, content_type_id: int, blob_bytes: bytes) -> None: self.content_type_ids.append(content_type_id) self.item_dir.append(pack(U_INT, self.current_offset)) length_and_bytes = pack(U_INT, len(blob_bytes)) + blob_bytes self.items.append(length_and_bytes) self.current_offset += len(length_and_bytes) def __len__(self) -> int: return len(self.item_dir) def finalize( self, fout: BufferedIOBase, compress: Callable[[bytes], bytes], ) -> None: count = len(self) fout.write(pack(U_INT, count)) fout.writelines( pack(U_CHAR, content_type_id) for content_type_id in self.content_type_ids ) content = b"".join(self.item_dir + self.items) compressed = compress(content) fout.write(pack(U_INT, len(compressed))) fout.write(compressed) self.content_type_ids.clear() self.item_dir.clear() self.items.clear() ItemT = TypeVar("ItemT") class ItemList(Generic[ItemT]): def __init__( self, reader: StructReader, offset: int, count_or_spec: str | int, pos_spec: str, ) -> None: self.lock = RLock() self.reader = reader reader.seek(offset) count: int if isinstance(count_or_spec, str): count_spec = count_or_spec count = unpack(count_spec, reader.read(calcsize(count_spec)))[0] elif isinstance(count_or_spec, int): count = count_or_spec else: raise TypeError(f"invalid {count_or_spec = }") self._count: int = count self.pos_offset = reader.tell() self.pos_spec = pos_spec self.pos_size = calcsize(pos_spec) self.data_offset = self.pos_offset + self.pos_size * count def __len__(self) -> int: return self._count def pos(self, i: int) -> int: with self.lock: self.reader.seek(self.pos_offset + self.pos_size * i) return unpack(self.pos_spec, self.reader.read(self.pos_size))[0] def read(self, pos: int) -> ItemT: with self.lock: self.reader.seek(self.data_offset + pos) return self._read_item() @abstractmethod def _read_item(self) -> ItemT: pass def __getitem__(self, i: int) -> ItemT: if i >= len(self) or i < 0: raise IndexError("index out of range") return self.read(self.pos(i)) class RefList(ItemList[Ref]): def __init__( self, f: IOBase, encoding: str, offset: int = 0, count: int | None = None, ) -> None: super().__init__( reader=StructReader(f, encoding), offset=offset, count_or_spec=U_INT if count is None else count, pos_spec=U_LONG_LONG, ) @lru_cache(maxsize=512) def __getitem__( self, i: int, ) -> Ref: if i >= len(self) or i < 0: raise IndexError("index out of range") return cast("Ref", self.read(self.pos(i))) def _read_item(self) -> Ref: key = self.reader.read_text() bin_index = self.reader.read_int() item_index = self.reader.read_short() fragment = self.reader.read_tiny_text() return Ref( key=key, bin_index=bin_index, item_index=item_index, fragment=fragment, ) @cache def as_dict( self: RefList, strength: int = TERTIARY, maxlength: int | None = None, ) -> KeydItemDict: return KeydItemDict( cast("Sequence", self), strength=strength, maxlength=maxlength, ) class Bin(ItemList[bytes]): def __init__( self, count: int, bin_bytes: bytes, ) -> None: super().__init__( reader=StructReader(io.BytesIO(bin_bytes)), offset=0, count_or_spec=count, pos_spec=U_INT, ) def _read_item(self) -> bytes: content_len = self.reader.read_int() return self.reader.read(content_len) class StoreItem(NamedTuple): content_type_ids: list[int] compressed_content: bytes class Store(ItemList[StoreItem]): def __init__( self, file: IOBase, offset: int, decompress: Callable[[bytes], bytes], content_types: Sequence[str], ) -> None: super().__init__( reader=StructReader(file), offset=offset, count_or_spec=U_INT, pos_spec=U_LONG_LONG, ) self.decompress = decompress self.content_types = content_types @lru_cache(maxsize=32) def __getitem__( self, i: int, ) -> StoreItem: if i >= len(self) or i < 0: raise IndexError("index out of range") return cast("StoreItem", self.read(self.pos(i))) def _read_item(self) -> StoreItem: bin_item_count = self.reader.read_int() packed_content_type_ids = self.reader.read(bin_item_count * U_CHAR_SIZE) content_type_ids = [] for i in range(bin_item_count): content_type_id = unpack(U_CHAR, packed_content_type_ids[i : i + 1])[0] content_type_ids.append(content_type_id) content_length = self.reader.read_int() content = self.reader.read(content_length) return StoreItem( content_type_ids=content_type_ids, compressed_content=content, ) def _content_type( self, bin_index: int, item_index: int, ) -> tuple[str, StoreItem]: store_item = self[bin_index] content_type_id = store_item.content_type_ids[item_index] content_type = self.content_types[content_type_id] return content_type, store_item def content_type( self, bin_index: int, item_index: int, ) -> str: return self._content_type(bin_index, item_index)[0] @lru_cache(maxsize=16) def _decompress(self, bin_index: int) -> bytes: store_item = self[bin_index] return self.decompress(store_item.compressed_content) def get( self, bin_index: int, item_index: int, ) -> tuple[str, bytes]: content_type, store_item = self._content_type(bin_index, item_index) content = self._decompress(bin_index) count = len(store_item.content_type_ids) store_bin = Bin(count, content) content = store_bin[item_index] return (content_type, content) class WriterEvent(NamedTuple): name: str data: Any class Writer: def __init__( # noqa: PLR0913 self, filename: str, workdir: str | None = None, encoding: str = UTF8, compression: str | None = DEFAULT_COMPRESSION, min_bin_size: int = 512 * 1024, max_redirects: int = 5, observer: Callable[[WriterEvent], None] | None = None, version_info: bool = True, ) -> None: self.filename = filename self.observer = observer if os.path.exists(self.filename): raise SystemExit(f"File {self.filename!r} already exists") # make sure we can write with fopen(self.filename, "wb"): pass self.encoding = encoding if encodings.search_function(self.encoding) is None: raise UnknownEncoding(self.encoding) self.workdir = workdir self.tmpdir = tmpdir = tempfile.TemporaryDirectory( prefix=f"{os.path.basename(filename)}-", dir=workdir, ) self.f_ref_positions = self._wbfopen("ref-positions") self.f_store_positions = self._wbfopen("store-positions") self.f_refs = self._wbfopen("refs") self.f_store = self._wbfopen("store") self.max_redirects = max_redirects if max_redirects: self.aliases_path = os.path.join(tmpdir.name, "aliases") self.f_aliases = Writer( self.aliases_path, workdir=tmpdir.name, max_redirects=0, compression=None, version_info=False, ) if compression is None: compression = "" if compression not in COMPRESSIONS: raise UnknownCompression(compression) self.compress = COMPRESSIONS[compression].compress self.compression = compression self.content_types: dict[str, int] = {} self.min_bin_size = min_bin_size self.current_bin: BinMemWriter | None = None created_at = ( os.getenv("SLOB_TIMESTAMP") or datetime.now(timezone.utc).isoformat() ) self.blob_count = 0 self.ref_count = 0 self.bin_count = 0 self._tags = { "created.at": created_at, } if version_info: self._tags.update( { "version.python": sys.version.replace("\n", " "), "version.pyicu": icu.VERSION, "version.icu": icu.ICU_VERSION, }, ) self.tags = MappingProxyType(self._tags) def _wbfopen(self, name: str) -> StructWriter: return StructWriter( fopen(os.path.join(self.tmpdir.name, name), "wb"), encoding=self.encoding, ) def tag(self, name: str, value: str = "") -> None: if len(name.encode(self.encoding)) > MAX_TINY_TEXT_LEN: self._fire_event("tag_name_too_long", (name, value)) return if len(value.encode(self.encoding)) > MAX_TINY_TEXT_LEN: self._fire_event("tag_value_too_long", (name, value)) value = "" self._tags[name] = value @staticmethod def key_is_too_long(actual_key, fragment) -> bool: return len(actual_key) > MAX_TEXT_LEN or len(fragment) > MAX_TINY_TEXT_LEN @staticmethod def _split_key( key: str | tuple[str, str], ) -> tuple[str, str]: if isinstance(key, str): actual_key = key fragment = "" else: actual_key, fragment = key return actual_key, fragment def add( self, blob: bytes, *keys: str, content_type: str = "", ) -> None: if len(blob) > MAX_LARGE_BYTE_STRING_LEN: self._fire_event("content_too_long", blob) return if len(content_type) > MAX_TEXT_LEN: self._fire_event("content_type_too_long", content_type) return actual_keys = [] for key in keys: actual_key, fragment = self._split_key(key) if self.key_is_too_long(actual_key, fragment): self._fire_event("key_too_long", key) else: actual_keys.append((actual_key, fragment)) if not actual_keys: return current_bin = self.current_bin if current_bin is None: current_bin = self.current_bin = BinMemWriter() self.bin_count += 1 if content_type not in self.content_types: self.content_types[content_type] = len(self.content_types) current_bin.add(self.content_types[content_type], blob) self.blob_count += 1 bin_item_index = len(current_bin) - 1 bin_index = self.bin_count - 1 for actual_key, fragment in actual_keys: self._write_ref(actual_key, bin_index, bin_item_index, fragment) if ( current_bin.current_offset > self.min_bin_size or len(current_bin) == MAX_BIN_ITEM_COUNT ): self._write_current_bin() def add_alias(self, key: str, target_key: str) -> None: if not self.max_redirects: raise NotImplementedError if self.key_is_too_long(*self._split_key(key)): self._fire_event("alias_too_long", key) return if self.key_is_too_long(*self._split_key(target_key)): self._fire_event("alias_target_too_long", target_key) return self.f_aliases.add(pickle.dumps(target_key), key) def _fire_event( self, name: str, data: Any = None, ) -> None: if self.observer: self.observer(WriterEvent(name, data)) def _write_current_bin(self) -> None: current_bin = self.current_bin if current_bin is None: return self.f_store_positions.write_long(self.f_store.tell()) current_bin.finalize( self.f_store._file, self.compress, ) self.current_bin = None def _write_ref( self, key: str, bin_index: int, item_index: int, fragment: str = "", ) -> None: self.f_ref_positions.write_long(self.f_refs.tell()) self.f_refs.write_text(key) self.f_refs.write_int(bin_index) self.f_refs.write_short(item_index) self.f_refs.write_tiny_text(fragment) self.ref_count += 1 def _sort(self) -> None: self._fire_event("begin_sort") f_ref_positions_sorted = self._wbfopen("ref-positions-sorted") self.f_refs.flush() self.f_ref_positions.close() with MultiFileReader(self.f_ref_positions.name, self.f_refs.name) as f: ref_list = RefList(f, self.encoding, count=self.ref_count) sortkey_func = sortkey(IDENTICAL) for i in sorted( range(len(ref_list)), key=lambda j: sortkey_func(ref_list[j].key), ): ref_pos = ref_list.pos(i) f_ref_positions_sorted.write_long(ref_pos) f_ref_positions_sorted.close() os.remove(self.f_ref_positions.name) os.rename(f_ref_positions_sorted.name, self.f_ref_positions.name) self.f_ref_positions = StructWriter( fopen(self.f_ref_positions.name, "ab"), encoding=self.encoding, ) self._fire_event("end_sort") def _resolve_aliases(self) -> None: # noqa: PLR0912 self._fire_event("begin_resolve_aliases") self.f_aliases.finalize() def read_key_frag(item: Blob, default_fragment: str) -> tuple[str, str]: key_frag = pickle.loads(item.content) if isinstance(key_frag, str): return key_frag, default_fragment to_key, fragment = key_frag return to_key, fragment with MultiFileReader( self.f_ref_positions.name, self.f_refs.name, ) as f_ref_list: ref_list = RefList(f_ref_list, self.encoding, count=self.ref_count) ref_dict = ref_list.as_dict() with Slob(self.aliases_path) as aliasesSlob: aliases = aliasesSlob.as_dict() path = os.path.join(self.tmpdir.name, "resolved-aliases") alias_writer = Writer( path, workdir=self.tmpdir.name, max_redirects=0, compression=None, version_info=False, ) for item in aliasesSlob: from_key = item.key keys = set() keys.add(from_key) to_key, fragment = read_key_frag(item, item.fragment) count = 0 while count <= self.max_redirects: # is target key itself a redirect? try: alias_item: Blob = next(aliases[to_key]) except StopIteration: break orig_to_key = to_key to_key, fragment = read_key_frag( alias_item, fragment, ) count += 1 keys.add(orig_to_key) if count > self.max_redirects: self._fire_event("too_many_redirects", from_key) target_ref: Ref try: target_ref = cast("Ref", next(ref_dict[to_key])) except StopIteration: self._fire_event("alias_target_not_found", to_key) else: for key in keys: ref = Ref( key=key, bin_index=target_ref.bin_index, item_index=target_ref.item_index, # last fragment in the chain wins fragment=target_ref.fragment or fragment, ) alias_writer.add(pickle.dumps(ref), key) alias_writer.finalize() with Slob(path) as resolved_aliases_reader: previous = None targets = set() for item in resolved_aliases_reader: ref = pickle.loads(item.content) if previous is not None and ref.key != previous.key: for bin_index, item_index, fragment in sorted(targets): self._write_ref(previous.key, bin_index, item_index, fragment) targets.clear() targets.add((ref.bin_index, ref.item_index, ref.fragment)) previous = ref for bin_index, item_index, fragment in sorted(targets): self._write_ref(previous.key, bin_index, item_index, fragment) self._sort() self._fire_event("end_resolve_aliases") def finalize(self) -> None: self._fire_event("begin_finalize") if self.current_bin is not None: self._write_current_bin() self._sort() if self.max_redirects: self._resolve_aliases() files = ( self.f_ref_positions, self.f_refs, self.f_store_positions, self.f_store, ) for f in files: f.close() buf_size = 10 * 1024 * 1024 def write_tags(tags: MappingProxyType[str, Any], f: StructWriter) -> None: f.write(pack(U_CHAR, len(tags))) for key, value in tags.items(): f.write_tiny_text(key) f.write_tiny_text(value, editable=True) with fopen(self.filename, mode="wb") as output_file: out = StructWriter(output_file, self.encoding) out.write(MAGIC) out.write(uuid4().bytes) out.write_tiny_text(self.encoding, encoding=UTF8) out.write_tiny_text(self.compression) write_tags(self.tags, out) def write_content_types( content_types: dict[str, int], f: StructWriter, ) -> None: count = len(content_types) f.write(pack(U_CHAR, count)) types = sorted(content_types.items(), key=operator.itemgetter(1)) for content_type, _ in types: f.write_text(content_type) write_content_types(self.content_types, out) out.write_int(self.blob_count) store_offset = ( out.tell() + U_LONG_LONG_SIZE # this value + U_LONG_LONG_SIZE # file size value + U_INT_SIZE # ref count value + os.stat(self.f_ref_positions.name).st_size + os.stat(self.f_refs.name).st_size ) out.write_long(store_offset) out.flush() file_size = ( out.tell() # bytes written so far + U_LONG_LONG_SIZE # file size value + 2 * U_INT_SIZE # ref count and bin count ) file_size += sum(os.stat(f.name).st_size for f in files) out.write_long(file_size) def mv(src: StructWriter, out: StructWriter) -> None: fname = src.name self._fire_event("begin_move", fname) with fopen(fname, mode="rb") as f: while True: data = f.read(buf_size) if len(data) == 0: break out.write(data) out.flush() os.remove(fname) self._fire_event("end_move", fname) out.write_int(self.ref_count) mv(self.f_ref_positions, out) mv(self.f_refs, out) out.write_int(self.bin_count) mv(self.f_store_positions, out) mv(self.f_store, out) self.f_ref_positions = None # type: ignore # noqa: PGH003 self.f_refs = None # type: ignore # noqa: PGH003 self.f_store_positions = None # type: ignore # noqa: PGH003 self.f_store = None # type: ignore # noqa: PGH003 self.tmpdir.cleanup() self._fire_event("end_finalize") def size_header(self) -> int: size = 0 size += len(MAGIC) size += 16 # uuid bytes size += U_CHAR_SIZE + len(self.encoding.encode(UTF8)) size += U_CHAR_SIZE + len(self.compression.encode(self.encoding)) size += U_CHAR_SIZE # tag length size += U_CHAR_SIZE # content types count # tags and content types themselves counted elsewhere size += U_INT_SIZE # blob count size += U_LONG_LONG_SIZE # store offset size += U_LONG_LONG_SIZE # file size size += U_INT_SIZE # ref count size += U_INT_SIZE # bin count return size def size_tags(self) -> int: size = 0 for key in self.tags: size += U_CHAR_SIZE + len(key.encode(self.encoding)) size += 255 return size def size_content_types(self) -> int: size = 0 for content_type in self.content_types: size += U_CHAR_SIZE + len(content_type.encode(self.encoding)) return size def size_data(self) -> int: files = ( self.f_ref_positions, self.f_refs, self.f_store_positions, self.f_store, ) return sum(os.stat(f.name).st_size for f in files) def __enter__(self) -> Slob: return cast("Slob", self) def close(self) -> None: for _file in ( self.f_ref_positions, self.f_refs, self.f_store_positions, self.f_store, ): if _file is None: continue self._fire_event("WARNING: closing without finalize()") try: _file.close() except Exception: pass if self.tmpdir and isdir(self.tmpdir.name): self.tmpdir.cleanup() self.tmpdir = None # type: ignore # noqa: PGH003 def __exit__( self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: """ It used to call self.finalize() here that was bad! __exit__ is not meant for doing so much as finalize() is doing! so make sure to call writer.finalize() after you are done!. """ self.close() pyglossary-5.0.9/pyglossary/sort_keys.py000066400000000000000000000101721476751035500205670ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2022 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations from dataclasses import dataclass from typing import TYPE_CHECKING, Any, NamedTuple if TYPE_CHECKING: from .icu_types import T_Collator, T_Locale from .sort_keys_types import ( LocaleSortKeyMakerType, LocaleSQLiteSortKeyMakerType, SortKeyMakerType, SQLiteSortKeyMakerType, ) __all__ = [ "LocaleNamedSortKey", "NamedSortKey", "defaultSortKeyName", "lookupSortKey", "namedSortKeyList", ] defaultSortKeyName = "headword_lower" class NamedSortKey(NamedTuple): name: str desc: str normal: SortKeyMakerType | None sqlite: SQLiteSortKeyMakerType | None @dataclass(slots=True) # not frozen because of mod class LocaleNamedSortKey: name: str desc: str mod: Any = None @property def module(self): # noqa: ANN201 if self.mod is not None: return self.mod mod = __import__( f"pyglossary.sort_modules.{self.name}", fromlist=self.name, ) self.mod = mod return mod @property def normal(self) -> SortKeyMakerType: return self.module.normal @property def sqlite(self) -> SQLiteSortKeyMakerType: return self.module.sqlite @property def locale(self) -> LocaleSortKeyMakerType | None: return getattr(self.module, "locale", None) @property def sqlite_locale(self) -> LocaleSQLiteSortKeyMakerType | None: return getattr(self.module, "sqlite_locale", None) namedSortKeyList = [ LocaleNamedSortKey( name="headword", desc="Headword", ), LocaleNamedSortKey( name="headword_lower", desc="Lowercase Headword", ), LocaleNamedSortKey( name="headword_bytes_lower", desc="ASCII-Lowercase Headword", ), LocaleNamedSortKey( name="stardict", desc="StarDict", ), LocaleNamedSortKey( name="ebook", desc="E-Book (prefix length: 2)", ), LocaleNamedSortKey( name="ebook_length3", desc="E-Book (prefix length: 3)", ), LocaleNamedSortKey( name="dicformids", desc="DictionaryForMIDs", ), LocaleNamedSortKey( name="random", desc="Random", ), ] _sortKeyByName = {item.name: item for item in namedSortKeyList} def lookupSortKey(sortKeyId: str) -> NamedSortKey | None: localeName: str | None = None parts = sortKeyId.split(":") if len(parts) == 1: (sortKeyName,) = parts elif len(parts) == 2: sortKeyName, localeName = parts else: raise ValueError(f"invalid {sortKeyId = }") if not sortKeyName: sortKeyName = defaultSortKeyName localeSK = _sortKeyByName.get(sortKeyName) if localeSK is None: return None if not localeName: return NamedSortKey( name=localeSK.name, desc=localeSK.desc, normal=localeSK.normal, sqlite=localeSK.sqlite, ) from icu import Collator, Locale # type: ignore localeObj: T_Locale = Locale(localeName) localeNameFull = localeObj.getName() collator: T_Collator = Collator.createInstance(localeObj) return NamedSortKey( name=f"{localeSK.name}:{localeNameFull}", desc=f"{localeSK.desc}:{localeNameFull}", normal=localeSK.locale(collator) if localeSK.locale else None, # pyright: ignore[reportArgumentType] sqlite=localeSK.sqlite_locale(collator) if localeSK.sqlite_locale else None, # pyright: ignore[reportArgumentType] ) # https://en.wikipedia.org/wiki/UTF-8#Comparison_with_other_encodings # Sorting order: The chosen values of the leading bytes means that a list # of UTF-8 strings can be sorted in code point order by sorting the # corresponding byte sequences. pyglossary-5.0.9/pyglossary/sort_keys_types.py000066400000000000000000000020331476751035500220100ustar00rootroot00000000000000from __future__ import annotations from collections.abc import Callable from typing import TYPE_CHECKING, Any, Protocol, TypeAlias if TYPE_CHECKING: from pyglossary.icu_types import T_Collator SortKeyType: TypeAlias = Callable[ [list[str]], Any, ] SQLiteSortKeyType: TypeAlias = list[tuple[str, str, SortKeyType]] class SortKeyMakerType(Protocol): def __call__( self, sortEncoding: str = "utf-8", **kwargs, # noqa: ANN003 ) -> SortKeyType: ... class SQLiteSortKeyMakerType(Protocol): def __call__( self, sortEncoding: str = "utf-8", **kwargs, # noqa: ANN003 ) -> SQLiteSortKeyType: ... class LocaleSortKeyMakerType(Protocol): def __call__( self, collator: T_Collator, # noqa: F821 ) -> SortKeyMakerType: ... class LocaleSQLiteSortKeyMakerType(Protocol): def __call__( self, collator: T_Collator, # noqa: F821 ) -> SQLiteSortKeyMakerType: ... __all__ = [ "LocaleSQLiteSortKeyMakerType", "LocaleSortKeyMakerType", "SQLiteSortKeyMakerType", "SQLiteSortKeyType", "SortKeyMakerType", "SortKeyType", ] pyglossary-5.0.9/pyglossary/sort_modules/000077500000000000000000000000001476751035500207115ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/sort_modules/__init__.py000066400000000000000000000000001476751035500230100ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/sort_modules/dicformids.py000066400000000000000000000013731476751035500234120ustar00rootroot00000000000000from __future__ import annotations import re from typing import TYPE_CHECKING if TYPE_CHECKING: from pyglossary.sort_keys_types import SortKeyType, SQLiteSortKeyType desc = "DictionaryForMIDs" def normal(**_options) -> SortKeyType: re_punc = re.compile( r"""[!"$§%&/()=?´`\\{}\[\]^°+*~#'\-_.:,;<>@|]*""", # noqa: RUF001 ) re_spaces = re.compile(" +") re_tabs = re.compile("\t+") def sortKey(words: list[str]) -> str: word = words[0] word = word.strip() word = re_punc.sub("", word) word = re_spaces.sub(" ", word) word = re_tabs.sub(" ", word) word = word.lower() return word # noqa: RET504 return sortKey def sqlite(**options) -> SQLiteSortKeyType: return [ ( "headword_norm", "TEXT", normal(**options), ), ] pyglossary-5.0.9/pyglossary/sort_modules/ebook.py000066400000000000000000000021751476751035500223670ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from pyglossary.sort_keys_types import SortKeyType, SQLiteSortKeyType __all__ = ["normal", "sqlite"] desc = "E-Book (prefix length: 2)" def normal( sortEncoding: str = "utf-8", # noqa: ARG001 **options, ) -> SortKeyType: length = options.get("group_by_prefix_length", 2) def sortKey(words: list[str]) -> Any: word = words[0] if not word: return "", "" prefix = word[:length].lower() if prefix[0] < "a": return "SPECIAL", word return prefix, word return sortKey def sqlite(sortEncoding: str = "utf-8", **options) -> SQLiteSortKeyType: length = options.get("group_by_prefix_length", 2) def getPrefix(words: list[str]) -> str: word = words[0] if not word: return "" prefix = word[:length].lower() if prefix[0] < "a": return "SPECIAL" return prefix def headword(words: list[str]) -> Any: return words[0].encode(sortEncoding, errors="replace") type_ = "TEXT" if sortEncoding == "utf-8" else "BLOB" return [ ( "prefix", type_, getPrefix, ), ( "headword", type_, headword, ), ] pyglossary-5.0.9/pyglossary/sort_modules/ebook_length3.py000066400000000000000000000010521476751035500240040ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING from pyglossary.sort_modules import ebook if TYPE_CHECKING: from pyglossary.sort_keys_types import SortKeyType, SQLiteSortKeyType desc = "E-Book (prefix length: 3)" def normal(sortEncoding: str = "utf-8", **_options) -> SortKeyType: return ebook.normal( sortEncoding=sortEncoding, group_by_prefix_length=3, ) def sqlite( sortEncoding: str = "utf-8", **_options, ) -> SQLiteSortKeyType: return ebook.sqlite( sortEncoding=sortEncoding, group_by_prefix_length=3, ) pyglossary-5.0.9/pyglossary/sort_modules/headword.py000066400000000000000000000025271476751035500230660ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from pyglossary.icu_types import T_Collator from pyglossary.sort_keys_types import ( SortKeyMakerType, SortKeyType, SQLiteSortKeyMakerType, SQLiteSortKeyType, ) desc = "Headword" def normal(sortEncoding: str = "utf-8", **_options) -> SortKeyType: def sortKey(words: list[str]) -> Any: return words[0].encode(sortEncoding, errors="replace") return sortKey def locale( collator: T_Collator, # noqa: F821 ) -> SortKeyMakerType: cSortKey = collator.getSortKey def sortKey(words: list[str]) -> Any: return cSortKey(words[0]) def warpper(sortEncoding: str = "utf-8", **_options) -> SortKeyType: # noqa: ARG001 return sortKey return warpper def sqlite(sortEncoding: str = "utf-8", **_options) -> SQLiteSortKeyType: def sortKey(words: list[str]) -> Any: return words[0].encode(sortEncoding, errors="replace") return [ ( "headword", "TEXT" if sortEncoding == "utf-8" else "BLOB", sortKey, ), ] def sqlite_locale( collator: T_Collator, # noqa: F821 ) -> SQLiteSortKeyMakerType: cSortKey = collator.getSortKey def sortKey(words: list[str]) -> Any: return cSortKey(words[0]) def wrapper(sortEncoding: str = "", **_options) -> SQLiteSortKeyType: # noqa: ARG001 return [("sortkey", "BLOB", sortKey)] return wrapper pyglossary-5.0.9/pyglossary/sort_modules/headword_bytes_lower.py000066400000000000000000000014311476751035500254750ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from pyglossary.sort_keys_types import SortKeyType, SQLiteSortKeyType desc = "ASCII-Lowercase Headword" def normal( sortEncoding: str = "utf-8", **_options, ) -> SortKeyType: def sortKey(words: list[str]) -> Any: return words[0].encode(sortEncoding, errors="replace").lower() return sortKey # def locale( # collator: "T_Collator", # noqa: F821 # ) -> SortKeyType: # raise NotImplementedError("") def sqlite(sortEncoding: str = "utf-8", **_options) -> SQLiteSortKeyType: def sortKey(words: list[str]) -> Any: return words[0].encode(sortEncoding, errors="replace").lower() return [ ( "headword_blower", "TEXT" if sortEncoding == "utf-8" else "BLOB", sortKey, ), ] pyglossary-5.0.9/pyglossary/sort_modules/headword_lower.py000066400000000000000000000027361476751035500243000ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from pyglossary.icu_types import T_Collator from pyglossary.sort_keys_types import ( SortKeyMakerType, SortKeyType, SQLiteSortKeyMakerType, SQLiteSortKeyType, ) desc = "Lowercase Headword" def normal(sortEncoding: str = "utf-8", **_options) -> SortKeyType: def sortKey(words: list[str]) -> Any: # assert isinstance(words, list) # OK return words[0].lower().encode(sortEncoding, errors="replace") return sortKey def locale( collator: T_Collator, # noqa: F821 ) -> SortKeyMakerType: cSortKey = collator.getSortKey def sortKey(words: list[str]) -> Any: # assert isinstance(words, list) # OK return cSortKey(words[0].lower()) def warpper(sortEncoding: str = "utf-8", **_options) -> SortKeyType: # noqa: ARG001 return sortKey return warpper def sqlite( sortEncoding: str = "utf-8", **_options, ) -> SQLiteSortKeyType: def sortKey(words: list[str]) -> Any: return words[0].lower().encode(sortEncoding, errors="replace") return [ ( "headword_lower", "TEXT" if sortEncoding == "utf-8" else "BLOB", sortKey, ), ] def sqlite_locale( collator: T_Collator, # noqa: F821 ) -> SQLiteSortKeyMakerType: cSortKey = collator.getSortKey def sortKey(words: list[str]) -> Any: return cSortKey(words[0].lower()) def wrapper(sortEncoding: str = "", **_options) -> SQLiteSortKeyType: # noqa: ARG001 return [("sortkey", "BLOB", sortKey)] return wrapper pyglossary-5.0.9/pyglossary/sort_modules/random.py000066400000000000000000000020761476751035500225500ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from collections.abc import Callable from pyglossary.icu_types import T_Collator from pyglossary.sort_keys_types import ( SortKeyMakerType, SortKeyType, SQLiteSortKeyType, ) desc = "Random" def normal(**_options) -> SortKeyType: from random import random return lambda _words: random() def locale( collator: T_Collator, # noqa: ARG001 # noqa: F821 ) -> SortKeyMakerType: from random import random def sortKey(words: list[str]) -> Any: # noqa: ARG001 return random() def warpper(sortEncoding: str = "utf-8", **_options) -> SortKeyType: # noqa: ARG001 return sortKey return warpper def sqlite(**_options) -> SQLiteSortKeyType: from random import random return [ ( "random", "REAL", lambda _words: random(), ), ] def sqlite_locale( _collator: T_Collator, # noqa: F821 **_options, ) -> Callable[..., SQLiteSortKeyType]: from random import random return lambda **_opt: [ ( "random", "REAL", lambda _words: random(), ), ] pyglossary-5.0.9/pyglossary/sort_modules/stardict.py000066400000000000000000000015371476751035500231060ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from pyglossary.sort_keys_types import SortKeyType, SQLiteSortKeyType desc = "StarDict" def normal(sortEncoding: str = "utf-8", **_options) -> SortKeyType: def sortKey(words: list[str]) -> Any: b_word = words[0].encode(sortEncoding, errors="replace") return (b_word.lower(), b_word) return sortKey def sqlite(sortEncoding: str = "utf-8", **_options) -> SQLiteSortKeyType: def headword_lower(words: list[str]) -> Any: return words[0].encode(sortEncoding, errors="replace").lower() def headword(words: list[str]) -> Any: return words[0].encode(sortEncoding, errors="replace") type_ = "TEXT" if sortEncoding == "utf-8" else "BLOB" return [ ( "headword_lower", type_, headword_lower, ), ( "headword", type_, headword, ), ] pyglossary-5.0.9/pyglossary/sq_entry_list.py000066400000000000000000000122771476751035500214540ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Copyright © 2008-2022 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import logging from typing import TYPE_CHECKING from .glossary_utils import Error if TYPE_CHECKING: from collections.abc import Callable, Iterable, Iterator from typing import Any from .glossary_types import EntryType, RawEntryType from .sort_keys import NamedSortKey from .sort_keys_types import SQLiteSortKeyType __all__ = ["SqEntryList"] log = logging.getLogger("pyglossary") class SqEntryList: def __init__( # noqa: PLR0913 self, entryToRaw: Callable[[EntryType], RawEntryType], entryFromRaw: Callable[[RawEntryType], EntryType], database: str, create: bool = True, ) -> None: """sqliteSortKey[i] == (name, type, valueFunc).""" import sqlite3 self._entryToRaw = entryToRaw self._entryFromRaw = entryFromRaw self._database = database self._con: sqlite3.Connection | None = sqlite3.connect(database) self._cur: sqlite3.Cursor | None = self._con.cursor() if not database: raise ValueError(f"invalid {database=}") self._orderBy = "rowid" self._sorted = False self._reverse = False self._len = 0 self._create = create self._sqliteSortKey: SQLiteSortKeyType = [] self._columnNames = "" def hasSortKey(self) -> bool: return bool(self._sqliteSortKey) def setSortKey( self, namedSortKey: NamedSortKey, sortEncoding: str | None, writeOptions: dict[str, Any], ) -> None: """sqliteSortKey[i] == (name, type, valueFunc).""" if self._con is None: raise RuntimeError("self._con is None") if self._sqliteSortKey: raise RuntimeError("Called setSortKey twice") if namedSortKey.sqlite is None: raise NotImplementedError( f"sort key {namedSortKey.name!r} is not supported", ) kwargs = writeOptions.copy() if sortEncoding: kwargs["sortEncoding"] = sortEncoding sqliteSortKey = namedSortKey.sqlite(**kwargs) self._sqliteSortKey = sqliteSortKey self._columnNames = ",".join(col[0] for col in sqliteSortKey) if not self._create: self._parseExistingIndex() return colDefs = ",".join( [f"{col[0]} {col[1]}" for col in sqliteSortKey] + ["data BLOB"], ) self._con.execute( f"CREATE TABLE data ({colDefs})", ) def __len__(self) -> int: return self._len def _encode(self, entry: EntryType) -> bytes: return b"\x00".join(self._entryToRaw(entry)) def _decode(self, data: bytes) -> EntryType: return self._entryFromRaw(data.split(b"\x00")) def append(self, entry: EntryType) -> None: self._cur.execute( # type: ignore f"insert into data({self._columnNames}, data)" f" values (?{', ?' * len(self._sqliteSortKey)})", [col[2](entry.l_word) for col in self._sqliteSortKey] + [self._encode(entry)], ) self._len += 1 def __iter__(self) -> Iterator[EntryType]: if self._cur is None: raise Error("SQLite cursor is closed") self._cur.execute(f"SELECT data FROM data ORDER BY {self._orderBy}") for row in self._cur: yield self._decode(row[0]) def __iadd__(self, other: Iterable) -> SqEntryList: for item in other: self.append(item) return self def sort(self, reverse: bool = False) -> None: if self._sorted: raise NotImplementedError("can not sort more than once") if not self._sqliteSortKey: raise RuntimeError("self._sqliteSortKey is empty") self._reverse = reverse self._sorted = True sortColumnNames = self._columnNames self._orderBy = sortColumnNames if reverse: self._orderBy = ",".join(f"{col[0]} DESC" for col in self._sqliteSortKey) assert self._con self._con.commit() self._con.execute( f"CREATE INDEX sortkey ON data({sortColumnNames});", ) self._con.commit() def _parseExistingIndex(self) -> bool: if self._cur is None: return False self._cur.execute("select sql FROM sqlite_master WHERE name='sortkey'") row = self._cur.fetchone() if row is None: return False sql = row[0] # sql == "CREATE INDEX sortkey ON data(wordlower,word)" i = sql.find("(") if i < 0: log.error(f"error parsing index {sql=}") return False j = sql.find(")", i) if j < 0: log.error(f"error parsing index {sql=}") return False columnNames = sql[i + 1 : j] self._sorted = True self._orderBy = columnNames return True def clear(self) -> None: self.close() def close(self) -> None: if self._con is None or self._cur is None: return self._con.commit() self._cur.close() self._con.close() self._con = None self._cur = None def __del__(self) -> None: self.close() pyglossary-5.0.9/pyglossary/text_reader.py000066400000000000000000000167751476751035500210720ustar00rootroot00000000000000from __future__ import annotations import io import logging import os import typing from os.path import isdir, isfile, join, splitext from typing import TYPE_CHECKING, cast if TYPE_CHECKING: from collections.abc import Generator, Iterator from pyglossary.entry_base import MultiStr from pyglossary.glossary_types import EntryType, ReaderGlossaryType from pyglossary.compression import ( compressionOpen, stdCompressions, ) from pyglossary.entry import DataEntry from pyglossary.io_utils import nullTextIO __all__ = ["TextFilePosWrapper", "TextGlossaryReader", "nextBlockResultType"] log = logging.getLogger("pyglossary") nextBlockResultType: typing.TypeAlias = ( "tuple[str | list[str], str, list[tuple[str, str]] | None] | None" ) # ( # word: str | list[str], # defi: str, # images: list[tuple[str, str]] | None # ) class TextFilePosWrapper(io.TextIOBase): def __init__(self, fileobj: io.TextIOBase, encoding: str) -> None: self.fileobj = fileobj self._encoding = encoding self.pos = 0 def __iter__(self) -> Iterator[str]: # type: ignore return self def close(self) -> None: self.fileobj.close() def __next__(self) -> str: # type: ignore line = self.fileobj.__next__() self.pos += len(line.encode(self._encoding)) return line def tell(self) -> int: return self.pos class TextGlossaryReader: _encoding: str = "utf-8" compressions = stdCompressions def __init__(self, glos: ReaderGlossaryType, hasInfo: bool = True) -> None: self._glos = glos self._filename = "" self._file: io.TextIOBase = nullTextIO self._hasInfo = hasInfo self._pendingEntries: list[EntryType] = [] self._wordCount = 0 self._fileSize = 0 self._progress = True self._pos = -1 self._fileCount = 1 self._fileIndex = -1 self._bufferLine = "" self._entryIndex = 0 self._resDir = "" self._resFileNames: list[str] = [] def _setResDir(self, resDir: str) -> bool: if self._glos.getConfig("skip_resources", False): return False if isdir(resDir): log.info(f"Listing res dir {self._resDir}") self._resDir = resDir self._resFileNames = os.listdir(self._resDir) return True return False def detectResDir(self, filename: str) -> bool: if self._setResDir(f"{filename}_res"): return True filenameNoExt, ext = splitext(filename) ext = ext.lstrip(".") if ext not in self.compressions: return False return self._setResDir(f"{filenameNoExt}_res") def readline(self) -> str: if self._bufferLine: line = self._bufferLine self._bufferLine = "" return line try: return next(self._file) except StopIteration: return "" def _calcFilzeSize(self, cfile: io.TextIOBase, filename: str) -> None: if cfile.seekable(): log.info("Calculating file size") cfile.seek(0, 2) self._fileSize = cfile.tell() cfile.seek(0) log.debug(f"File size of {filename}: {self._fileSize}") self._glos.setInfo("input_file_size", str(self._fileSize)) else: log.warning("TextGlossaryReader: file is not seekable") def _openGen(self, filename: str) -> Iterator[tuple[int, int]]: self._fileIndex += 1 log.info(f"Reading file: {filename}") cfile = cast( "io.TextIOBase", compressionOpen( filename, mode="rt", encoding=self._encoding, ), ) if self._glos.progressbar: self._calcFilzeSize(cfile, filename) self._progress = self._fileSize > 0 else: if os.getenv("CALC_FILE_SIZE"): self._calcFilzeSize(cfile, filename) self._progress = False self._file = TextFilePosWrapper(cfile, self._encoding) if self._hasInfo: yield from self.loadInfo() self.detectResDir(filename) def _open(self, filename: str) -> None: for _ in self._openGen(filename): pass def open(self, filename: str) -> None: self._filename = filename self._open(filename) def openGen(self, filename: str) -> Iterator[tuple[int, int]]: """ Like open() but return a generator / iterator to track the progress example for reader.open: yield from TextGlossaryReader.openGen(self, filename). """ self._filename = filename yield from self._openGen(filename) def openNextFile(self) -> bool: self.close() nextFilename = f"{self._filename}.{self._fileIndex + 1}" if isfile(nextFilename): self._open(nextFilename) return True for ext in self.compressions: if isfile(f"{nextFilename}.{ext}"): self._open(f"{nextFilename}.{ext}") return True if self._fileCount != -1: log.warning(f"next file not found: {nextFilename}") return False def close(self) -> None: try: self._file.close() except Exception: log.exception(f"error while closing file {self._filename!r}") self._file = nullTextIO def newEntry(self, word: MultiStr, defi: str) -> EntryType: byteProgress: tuple[int, int] | None = None if self._progress: self._entryIndex += 1 if self._entryIndex % 1000 == 0: byteProgress = (self._file.tell(), self._fileSize) return self._glos.newEntry( word, defi, byteProgress=byteProgress, ) def setInfo(self, key: str, value: str) -> None: self._glos.setInfo(key, value) def _loadNextInfo(self) -> bool: """Returns True when reached the end.""" block = self.nextBlock() if not block: return False key, value, _ = block origKey = key if isinstance(key, list): key = key[0] if not self.isInfoWords(key): self._pendingEntries.append(self.newEntry(origKey, value)) return True if not value: return False key = self.fixInfoWord(key) if not key: return False self.setInfo(key, value) return False def loadInfo(self) -> Generator[tuple[int, int], None, None]: self._pendingEntries = [] try: while True: if self._loadNextInfo(): break yield (self._file.tell(), self._fileSize) except StopIteration: pass if self._fileIndex == 0 and not os.getenv("NO_READ_MULTI_PART"): fileCountStr = self._glos.getInfo("file_count") if fileCountStr: self._fileCount = int(fileCountStr) self._glos.setInfo("file_count", "") @staticmethod def _genDataEntries( resList: list[tuple[str, str]], resPathSet: set[str], ) -> Iterator[DataEntry]: for relPath, fullPath in resList: if relPath in resPathSet: continue resPathSet.add(relPath) yield DataEntry( fname=relPath, tmpPath=fullPath, ) def __iter__(self) -> Iterator[EntryType | None]: resPathSet: set[str] = set() while True: self._pos += 1 if self._pendingEntries: yield self._pendingEntries.pop(0) continue ### try: block = self.nextBlock() except StopIteration: if ( self._fileCount == -1 or self._fileIndex < self._fileCount - 1 ) and self.openNextFile(): continue self._wordCount = self._pos break if not block: yield None continue word, defi, resList = block if resList: yield from self._genDataEntries(resList, resPathSet) yield self.newEntry(word, defi) resDir = self._resDir for fname in self._resFileNames: fpath = join(resDir, fname) if not isfile(fpath): log.error(f"No such file: {fpath}") continue with open(fpath, "rb") as _file: yield self._glos.newDataEntry( fname, _file.read(), ) def __len__(self) -> int: return self._wordCount @classmethod def isInfoWord(cls, word: str) -> bool: raise NotImplementedError def isInfoWords(self, arg: str | list[str]) -> bool: if isinstance(arg, str): return self.isInfoWord(arg) if isinstance(arg, list): return self.isInfoWord(arg[0]) raise TypeError(f"bad argument {arg}") @classmethod def fixInfoWord(cls, word: str) -> str: raise NotImplementedError def nextBlock(self) -> nextBlockResultType: raise NotImplementedError pyglossary-5.0.9/pyglossary/text_utils.py000066400000000000000000000105631476751035500207550ustar00rootroot00000000000000# -*- coding: utf-8 -*- # text_utils.py # # Copyright © 2008-2022 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import binascii import logging import re import struct import sys from typing import AnyStr __all__ = [ "crc32hex", "escapeNTB", "excMessage", "fixUtf8", "joinByBar", "replacePostSpaceChar", "splitByBar", "splitByBarUnescapeNTB", "toStr", "uint32FromBytes", "uint32ToBytes", "uint64FromBytes", "uint64ToBytes", "uintFromBytes", "unescapeBar", "unescapeNTB", "urlToPath", ] log = logging.getLogger("pyglossary") endFormat = "\x1b[0;0;0m" # len=8 def toStr(s: AnyStr) -> str: if isinstance(s, bytes): return str(s, "utf-8") return str(s) def fixUtf8(st: AnyStr) -> str: if isinstance(st, str): return st.encode("utf-8").replace(b"\x00", b"").decode("utf-8", "replace") return st.replace(b"\x00", b"").decode("utf-8", "replace") pattern_n_us = re.compile(r"((? str: """Scapes Newline, Tab, Baskslash, and vertical Bar (if bar=True).""" st = st.replace("\\", "\\\\") st = st.replace("\t", r"\t") st = st.replace("\r", "") st = st.replace("\n", r"\n") if bar: st = st.replace("|", r"\|") return st # noqa: RET504 def unescapeNTB(st: str, bar: bool = False) -> str: """Unscapes Newline, Tab, Baskslash, and vertical Bar (if bar=True).""" st = pattern_n_us.sub("\\1\n", st) st = pattern_t_us.sub("\\1\t", st) if bar: st = pattern_bar_us.sub(r"\1|", st) st = st.replace("\\\\", "\\") # probably faster than re.sub return st # noqa: RET504 def splitByBarUnescapeNTB(st: str) -> list[str]: r""" Split by "|" (and not "\\|") then unescapes Newline (\\n), Tab (\\t), Baskslash (\\) and Bar (\\|) in each part returns a list. """ return [unescapeNTB(part, bar=True) for part in pattern_bar_sp.split(st)] def escapeBar(st: str) -> str: r"""Scapes vertical bar (\|).""" return st.replace("\\", "\\\\").replace("|", r"\|") def unescapeBar(st: str) -> str: r"""Unscapes vertical bar (\|).""" # str.replace is probably faster than re.sub return pattern_bar_us.sub(r"\1|", st).replace("\\\\", "\\") def splitByBar(st: str) -> list[str]: r""" Split by "|" (and not "\\|") then unescapes Baskslash (\\) and Bar (\\|) in each part. """ return [unescapeBar(part) for part in pattern_bar_sp.split(st)] def joinByBar(parts: list[str]) -> str: return "|".join(escapeBar(part) for part in parts) # return a message string describing the current exception def excMessage() -> str: i = sys.exc_info() if not i[0]: return "" return f"{i[0].__name__}: {i[1]}" # ___________________________________________ # def uint32ToBytes(n: int) -> bytes: return struct.pack(">I", n) def uint64ToBytes(n: int) -> bytes: return struct.pack(">Q", n) def uint32FromBytes(bs: bytes) -> int: return struct.unpack(">I", bs)[0] def uint64FromBytes(bs: bytes) -> int: return struct.unpack(">Q", bs)[0] def uintFromBytes(bs: bytes) -> int: n = 0 for c in bs: n = (n << 8) + c return n def crc32hex(bs: bytes) -> str: return struct.pack(">I", binascii.crc32(bs) & 0xFFFFFFFF).hex() # ___________________________________________ # def urlToPath(url: str) -> str: from urllib.parse import unquote if not url.startswith("file://"): return unquote(url) path = url[7:] if path[-2:] == "\r\n": path = path[:-2] elif path[-1] == "\r": path = path[:-1] # here convert html unicode symbols to utf-8 string: return unquote(path) def replacePostSpaceChar(st: str, ch: str) -> str: return ( st.replace(f" {ch}", ch) .replace(ch, f"{ch} ") .replace(f"{ch} ", f"{ch} ") .removesuffix(" ") ) pyglossary-5.0.9/pyglossary/text_writer.py000066400000000000000000000144601476751035500211310ustar00rootroot00000000000000from __future__ import annotations import logging import os from os.path import ( isdir, ) from typing import TYPE_CHECKING, cast if TYPE_CHECKING: import io from collections.abc import Callable, Generator from .glossary_types import EntryType, WriterGlossaryType from .compression import compressionOpen as c_open from .io_utils import nullTextIO __all__ = ["TextGlossaryWriter", "writeTxt"] log = logging.getLogger("pyglossary") file_size_check_every = 100 class TextGlossaryWriter: _encoding: str = "utf-8" _newline: str = "\n" _wordListEncodeFunc: Callable[[list[str]], str] | None = None _wordEscapeFunc: Callable[[str], str] | None = None _defiEscapeFunc: Callable[[str], str] | None = None _ext: str = ".txt" _head: str = "" _tail: str = "" _resources: bool = True _file_size_approx: int = 0 _word_title: bool = False def __init__( self, glos: WriterGlossaryType, entryFmt: str = "", # contain {word} and {defi} writeInfo: bool = True, outInfoKeysAliasDict: dict[str, str] | None = None, ) -> None: self._glos = glos self._filename = "" self._file: io.TextIOBase = nullTextIO self._resDir = "" if not entryFmt: raise ValueError("entryFmt argument is missing") self._entryFmt = entryFmt self._writeInfo = writeInfo self._outInfoKeysAliasDict = outInfoKeysAliasDict or {} # TODO: replace outInfoKeysAliasDict arg with a func? # TODO: use @property setters def setAttrs( # noqa: PLR0913 self, encoding: str | None = None, newline: str | None = None, wordListEncodeFunc: Callable | None = None, wordEscapeFunc: Callable | None = None, defiEscapeFunc: Callable | None = None, ext: str | None = None, head: str | None = None, tail: str | None = None, resources: bool | None = None, word_title: bool | None = None, file_size_approx: int | None = None, ) -> None: if encoding is not None: self._encoding = encoding if newline is not None: self._newline = newline if wordListEncodeFunc is not None: self._wordListEncodeFunc = wordListEncodeFunc if wordEscapeFunc is not None: self._wordEscapeFunc = wordEscapeFunc if defiEscapeFunc is not None: self._defiEscapeFunc = defiEscapeFunc if ext is not None: self._ext = ext if head is not None: self._head = head if tail is not None: self._tail = tail if resources is not None: self._resources = resources if word_title is not None: self._word_title = word_title if file_size_approx is not None: self._file_size_approx = file_size_approx def open(self, filename: str) -> None: if self._file_size_approx > 0: self._glos.setInfo("file_count", "-1") self._open(filename) self._filename = filename if not self._glos.getConfig("skip_resources", False): self._resDir = f"{filename}_res" if not isdir(self._resDir): os.mkdir(self._resDir) def _doWriteInfo(self, file: io.TextIOBase) -> None: entryFmt = self._entryFmt outInfoKeysAliasDict = self._outInfoKeysAliasDict wordEscapeFunc = self._wordEscapeFunc defiEscapeFunc = self._defiEscapeFunc for key, value in self._glos.iterInfo(): # both key and value are supposed to be non-empty string if not (key and value): log.warning(f"skipping info {key=}, {value=}") continue key = outInfoKeysAliasDict.get(key, key) # noqa: PLW2901 if not key: continue word = f"##{key}" if wordEscapeFunc is not None: word = wordEscapeFunc(word) if not word: continue if defiEscapeFunc is not None: value = defiEscapeFunc(value) # noqa: PLW2901 if not value: continue file.write( entryFmt.format( word=word, defi=value, ), ) def _open(self, filename: str) -> io.TextIOBase: if not filename: filename = self._glos.filename + self._ext file = self._file = cast( "io.TextIOBase", c_open( filename, mode="wt", encoding=self._encoding, newline=self._newline, ), ) file.write(self._head) if self._writeInfo: self._doWriteInfo(file) file.flush() return file def write(self) -> Generator[None, EntryType, None]: glos = self._glos file = self._file entryFmt = self._entryFmt wordListEncodeFunc = self._wordListEncodeFunc wordEscapeFunc = self._wordEscapeFunc defiEscapeFunc = self._defiEscapeFunc resources = self._resources word_title = self._word_title file_size_approx = self._file_size_approx entryCount = 0 fileIndex = 0 glosName = self._glos.getInfo("name") or self._filename while True: entry = yield if entry is None: break if entry.isData(): if resources: entry.save(self._resDir) continue word = entry.s_word defi = entry.defi # if glos.alts: # FIXME if word_title: defi = glos.wordTitleStr(entry.l_word[0]) + defi if wordListEncodeFunc is not None: word = wordListEncodeFunc(entry.l_word) elif wordEscapeFunc is not None: word = wordEscapeFunc(word) if defiEscapeFunc is not None: defi = defiEscapeFunc(defi) file.write(entryFmt.format(word=word, defi=defi)) if file_size_approx > 0: entryCount += 1 if ( entryCount % file_size_check_every == 0 and file.tell() >= file_size_approx ): fileIndex += 1 log.info(f"Creating {self._filename}.{fileIndex}") self._glos.setInfo("name", f"{glosName} part {fileIndex + 1}") file = self._open(f"{self._filename}.{fileIndex}") def finish(self) -> None: if self._tail: self._file.write(self._tail) self._file.close() if not os.listdir(self._resDir): os.rmdir(self._resDir) def writeTxt( # noqa: PLR0913 glos: WriterGlossaryType, entryFmt: str = "", # contain {word} and {defi} filename: str = "", writeInfo: bool = True, wordEscapeFunc: Callable | None = None, defiEscapeFunc: Callable | None = None, ext: str = ".txt", head: str = "", tail: str = "", outInfoKeysAliasDict: dict[str, str] | None = None, encoding: str = "utf-8", newline: str = "\n", resources: bool = True, word_title: bool = False, ) -> Generator[None, EntryType, None]: writer = TextGlossaryWriter( glos, entryFmt=entryFmt, writeInfo=writeInfo, outInfoKeysAliasDict=outInfoKeysAliasDict, ) writer.setAttrs( encoding=encoding, newline=newline, wordEscapeFunc=wordEscapeFunc, defiEscapeFunc=defiEscapeFunc, ext=ext, head=head, tail=tail, resources=resources, word_title=word_title, ) writer.open(filename) yield from writer.write() writer.finish() pyglossary-5.0.9/pyglossary/ui/000077500000000000000000000000001476751035500166075ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/ui/__init__.py000066400000000000000000000000001476751035500207060ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/ui/argparse_main.py000066400000000000000000000143471476751035500220020ustar00rootroot00000000000000from __future__ import annotations import os from typing import TYPE_CHECKING, Any from pyglossary.ui.base import UIBase from pyglossary.ui.option_ui import registerConfigOption if TYPE_CHECKING: import argparse import logging def defineFlags(parser: argparse.ArgumentParser, config: dict[str, Any]) -> None: osType = "windows" if os.sep == "\\" else "unix" colorConf = f"color.enable.cmd.{osType}" defaultHasColor = config.get(colorConf, True) and not os.getenv("NO_COLOR") parser.add_argument( "-v", "--verbosity", action="store", dest="verbosity", type=int, choices=(0, 1, 2, 3, 4, 5), required=False, default=int(os.getenv("VERBOSITY", "3")), ) parser.add_argument( "--version", action="store_true", ) parser.add_argument( "-h", "--help", dest="help", action="store_true", ) parser.add_argument( "-u", "--ui", dest="ui_type", default="auto", choices=( "cmd", "gtk", "gtk3", "gtk4", "tk", "web", # "qt", "auto", "none", ), ) parser.add_argument( "--cmd", dest="ui_type", action="store_const", const="cmd", default=None, help="use command-line user interface", ) parser.add_argument( "--gtk", dest="ui_type", action="store_const", const="gtk", default=None, help="use Gtk-based user interface", ) parser.add_argument( "--gtk3", dest="ui_type", action="store_const", const="gtk3", default=None, help="use Gtk4-based user interface", ) parser.add_argument( "--gtk4", dest="ui_type", action="store_const", const="gtk4", default=None, help="use Gtk4-based user interface", ) parser.add_argument( "--tk", dest="ui_type", action="store_const", const="tk", default=None, help="use Tkinter-based user interface", ) parser.add_argument( "--web", dest="ui_type", action="store_const", const="web", default=None, help="use web browser interface", ) parser.add_argument( "--interactive", "--inter", dest="interactive", action="store_true", default=None, help="switch to interactive command line interface", ) parser.add_argument( "--no-interactive", "--no-inter", dest="no_interactive", action="store_true", default=None, help=( "do not automatically switch to interactive command line" " interface, for scripts" ), ) parser.add_argument( "-r", "--read-options", dest="readOptions", default="", ) parser.add_argument( "-w", "--write-options", dest="writeOptions", default="", ) parser.add_argument( "--json-read-options", dest="jsonReadOptions", default=None, ) parser.add_argument( "--json-write-options", dest="jsonWriteOptions", default=None, ) parser.add_argument( "--read-format", dest="inputFormat", ) parser.add_argument( "--write-format", dest="outputFormat", action="store", ) parser.add_argument( "--direct", dest="direct", action="store_true", default=None, help="if possible, convert directly without loading into memory", ) parser.add_argument( "--indirect", dest="direct", action="store_false", default=None, help=( "disable `direct` mode, load full data into memory before writing" ", this is default" ), ) parser.add_argument( "--sqlite", dest="sqlite", action="store_true", default=None, help=( "use SQLite as middle storage instead of RAM in direct mode," "for very large glossaries" ), ) parser.add_argument( "--no-sqlite", dest="sqlite", action="store_false", default=None, help="do not use SQLite mode", ) parser.add_argument( "--no-progress-bar", dest="progressbar", action="store_false", default=None, ) parser.add_argument( "--no-color", dest="noColor", action="store_true", default=not defaultHasColor, ) parser.add_argument( "--sort", dest="sort", action="store_true", default=None, ) parser.add_argument( "--no-sort", dest="sort", action="store_false", default=None, ) parser.add_argument( "--sort-key", action="store", dest="sortKeyName", default=None, help="name of sort key", ) parser.add_argument( "--sort-encoding", action="store", dest="sortEncoding", default=None, help="encoding of sort (default utf-8)", ) # _______________________________ parser.add_argument( "--source-lang", action="store", dest="sourceLang", default=None, help="source/query language", ) parser.add_argument( "--target-lang", action="store", dest="targetLang", default=None, help="target/definition language", ) parser.add_argument( "--name", action="store", dest="name", default=None, help="glossary name/title", ) # _______________________________ parser.add_argument( "--reverse", dest="reverse", action="store_true", ) parser.add_argument( "inputFilename", action="store", default="", nargs="?", ) parser.add_argument( "outputFilename", action="store", default="", nargs="?", ) # _______________________________ for key, option in UIBase.configDefDict.items(): registerConfigOption(parser, key, option) def validateFlags(args: argparse.Namespace, log: logging.Logger) -> bool: from pyglossary.sort_keys import lookupSortKey, namedSortKeyList for param1, param2 in UIBase.conflictingParams: if getattr(args, param1) and getattr(args, param2): log.critical( "Conflicting flags: " f"--{param1.replace('_', '-')} and " f"--{param2.replace('_', '-')}", ) return False if not args.sort: if args.sortKeyName: log.critical("Passed --sort-key without --sort") return False if args.sortEncoding: log.critical("Passed --sort-encoding without --sort") return False if args.sortKeyName and not lookupSortKey(args.sortKeyName): valuesStr = ", ".join(_sk.name for _sk in namedSortKeyList) log.critical( f"Invalid sortKeyName={args.sortKeyName!r}. Supported values:\n{valuesStr}", ) return False return True def configFromArgs( args: argparse.Namespace, log: logging.Logger, ) -> dict[str, Any]: config: dict[str, Any] = {} for key, option in UIBase.configDefDict.items(): if not option.hasFlag: continue value = getattr(args, key, None) if value is None: continue log.debug(f"config: {key} = {value}") if not option.validate(value): log.error(f"invalid config value: {key} = {value!r}") continue config[key] = value return config pyglossary-5.0.9/pyglossary/ui/argparse_utils.py000066400000000000000000000022411476751035500222040ustar00rootroot00000000000000from __future__ import annotations import argparse class StoreConstAction(argparse.Action): def __init__( self, option_strings: list[str], same_dest: str = "", const_value: bool | None = None, nargs: int = 0, **kwargs, ) -> None: if isinstance(option_strings, str): option_strings = [option_strings] argparse.Action.__init__( self, option_strings=option_strings, nargs=nargs, **kwargs, ) self.same_dest = same_dest self.const_value = const_value def __call__( # noqa: PLR0913 self, parser: argparse.ArgumentParser | None = None, namespace: argparse.Namespace | None = None, values: list | None = None, # noqa: ARG002 option_strings: list[str] | None = None, # noqa: ARG002 required: bool = False, # noqa: ARG002 dest: str | None = None, ) -> StoreConstAction: if not parser: return self dest = self.dest if getattr(namespace, dest) is not None: flag = self.option_strings[0] if getattr(namespace, dest) == self.const_value: parser.error(f"multiple {flag} options") else: parser.error(f"conflicting options: {self.same_dest} and {flag}") setattr(namespace, dest, self.const_value) return self pyglossary-5.0.9/pyglossary/ui/base.py000066400000000000000000000161711476751035500201010ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2012-2022 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import logging from os.path import isfile, join from pyglossary.core import ( appResDir, confJsonFile, dataDir, rootConfJsonFile, ) from pyglossary.entry_filters import entryFiltersRules from pyglossary.option import ( BoolOption, FloatOption, IntOption, Option, StrOption, ) __all__ = ["UIBase", "aboutText", "authors", "fread", "licenseText", "logo"] def fread(path: str) -> str: with open(path, encoding="utf-8") as fp: return fp.read() log = logging.getLogger("pyglossary") logo = join(appResDir, "pyglossary.png") aboutText = fread(join(dataDir, "about")) licenseText = fread(join(dataDir, "_license-dialog")) authors = fread(join(dataDir, "AUTHORS")).split("\n") summary = ( "A tool for converting dictionary files aka glossaries with" " various formats for different dictionary applications" ) _entryFilterConfigDict = { configParam: (filterClass, default) for configParam, default, filterClass in entryFiltersRules if configParam } def getEntryFilterOption(name: str) -> Option: filterClass, default = _entryFilterConfigDict[name] if isinstance(default, bool): optClass = BoolOption elif isinstance(default, str): optClass = StrOption else: raise TypeError(f"{default = }") return optClass( hasFlag=True, comment=filterClass.desc, falseComment=filterClass.falseComment, ) class UIBase: configDefDict: dict[str, Option] = { "log_time": BoolOption( hasFlag=True, comment="Show date and time in logs", falseComment="Do not show date and time in logs", ), "cleanup": BoolOption( hasFlag=True, comment="Cleanup cache or temporary files after conversion", falseComment=("Do not cleanup cache or temporary files after conversion",), ), "auto_sqlite": BoolOption( hasFlag=False, comment=( "Auto-enable --sqlite to limit RAM usage when direct\n" "mode is not possible. Can override with --no-sqlite" ), ), "enable_alts": BoolOption( hasFlag=True, customFlag="alts", comment="Enable alternates", falseComment="Disable alternates", ), # TODO: version 6.0: replace with "resources" # comment="Use resources (images, audio, etc)" "skip_resources": BoolOption( hasFlag=True, comment="Skip resources (images, audio, css, etc)", ), "save_info_json": BoolOption( hasFlag=True, customFlag="info", comment="Save .info file alongside output file(s)", ), "lower": getEntryFilterOption("lower"), "utf8_check": getEntryFilterOption("utf8_check"), "rtl": getEntryFilterOption("rtl"), "remove_html": getEntryFilterOption("remove_html"), "remove_html_all": getEntryFilterOption("remove_html_all"), "normalize_html": getEntryFilterOption("normalize_html"), "skip_duplicate_headword": getEntryFilterOption("skip_duplicate_headword"), "trim_arabic_diacritics": getEntryFilterOption("trim_arabic_diacritics"), "unescape_word_links": getEntryFilterOption("unescape_word_links"), "color.enable.cmd.unix": BoolOption( hasFlag=False, comment="Enable colors in Linux/Unix command line", ), "color.enable.cmd.windows": BoolOption( hasFlag=False, comment="Enable colors in Windows command line", ), "color.cmd.critical": IntOption( hasFlag=False, comment="Color code for critical errors in command line", ), "color.cmd.error": IntOption( hasFlag=False, comment="Color code for errors in command line", ), "color.cmd.warning": IntOption( hasFlag=False, comment="Color code for warnings in command line", ), # interactive command line interface: "cmdi.prompt.indent.str": StrOption(hasFlag=False), "cmdi.prompt.indent.color": IntOption(hasFlag=False), "cmdi.prompt.msg.color": IntOption(hasFlag=False), "cmdi.msg.color": IntOption(hasFlag=False), # general GUI options "ui_autoSetFormat": BoolOption(hasFlag=False), # Tkinter "tk.progressbar.color.fill": StrOption( hasFlag=False, comment="Tkinter: progressbar fill color", ), "tk.progressbar.color.background": StrOption( hasFlag=False, comment="Tkinter: progressbar background color", ), "tk.progressbar.color.text": StrOption( hasFlag=False, comment="Tkinter: progressbar text color", ), "tk.progressbar.font": StrOption( hasFlag=False, comment='Tkinter: progressbar text font. Example: "Sans", "Sans 15"', ), # Reverse "reverse_matchWord": BoolOption(hasFlag=False), "reverse_showRel": StrOption(hasFlag=False), "reverse_saveStep": IntOption(hasFlag=False), "reverse_minRel": FloatOption(hasFlag=False), "reverse_maxNum": IntOption(hasFlag=False), "reverse_includeDefs": BoolOption(hasFlag=False), } conflictingParams = [ ("sqlite", "direct"), ("remove_html", "remove_html_all"), ] def __init__(self, **_kwargs) -> None: self.config = {} def progressInit(self, title: str) -> None: pass def progress(self, ratio: float, text: str = "") -> None: pass def progressEnd(self) -> None: self.progress(1.0) def loadConfig( self, user: bool = True, **options, ) -> None: from pyglossary.json_utils import jsonToData data = jsonToData(fread(rootConfJsonFile)) assert isinstance(data, dict) if user and isfile(confJsonFile): try: userData = jsonToData(fread(confJsonFile)) except Exception: log.exception( f"error while loading user config file {confJsonFile!r}", ) else: data.update(userData) for key in self.configDefDict: try: self.config[key] = data.pop(key) except KeyError: # noqa: PERF203 pass for key in data: log.warning( f"unknown config key {key!r}, you may edit {confJsonFile}" " file and remove this key", ) for key, value in options.items(): if key in self.configDefDict: self.config[key] = value log.setTimeEnable(self.config["log_time"]) log.debug(f"loaded config: {self.config}") def saveConfig(self) -> None: from pyglossary.json_utils import dataToPrettyJson config = {} for key, option in self.configDefDict.items(): if key not in self.config: log.warning(f"saveConfig: missing key {key!r}") continue value = self.config[key] if not option.validate(value): log.error(f"saveConfig: invalid {key}={value!r}") continue config[key] = value jsonStr = dataToPrettyJson(config) with open(confJsonFile, mode="w", encoding="utf-8") as _file: _file.write(jsonStr) log.info(f"saved {confJsonFile!r}") pyglossary-5.0.9/pyglossary/ui/dependency.py000066400000000000000000000026711476751035500213050ustar00rootroot00000000000000# -*- coding: utf-8 -*- # dependency.py # # Copyright © 2019-2019 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . # reqs = subprocess.check_output([sys.executable, '-m', 'pip', 'freeze']) # ^ this takes about 3 seconds # installed_packages = set(r.decode().split('==')[0] for r in reqs.split()) from __future__ import annotations __all__ = ["checkDepends"] def checkDepends(depends: dict[str, str]) -> list[str]: """Return the list of non-installed dependencies.""" if not depends: return [] not_installed = [] for moduleName, pkgName in depends.items(): try: __import__(moduleName) except ModuleNotFoundError: # noqa: PERF203 not_installed.append(pkgName) return not_installed pyglossary-5.0.9/pyglossary/ui/gtk3_utils/000077500000000000000000000000001476751035500206775ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/ui/gtk3_utils/__init__.py000066400000000000000000000002631476751035500230110ustar00rootroot00000000000000# mypy: ignore-errors # do not sort these imports! from gi.repository import Gtk as gtk # noqa: I001 from gi.repository import Gdk as gdk # noqa: I001 __all__ = ["gdk", "gtk"] pyglossary-5.0.9/pyglossary/ui/gtk3_utils/about.py000066400000000000000000000075371476751035500223770ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2020 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from . import gtk from .utils import ( VBox, imageFromFile, pack, ) __all__ = ["AboutWidget"] class AboutWidget(gtk.Box): def __init__( # noqa: PLR0913 self, logo: str = "", header: str = "", about: str = "", authors: str = "", license_text: str = "", **_kwargs, ) -> None: gtk.Box.__init__(self, orientation=gtk.Orientation.VERTICAL) ## headerBox = gtk.Box(orientation=gtk.Orientation.HORIZONTAL) if logo: headerBox.pack_start(imageFromFile(logo), False, False, 0) headerLabel = gtk.Label(label=header) headerLabel.set_selectable(True) headerLabel.set_can_focus(False) headerBox.pack_start(headerLabel, False, False, 15) headerBox.show_all() self.pack_start(headerBox, False, False, 0) ## notebook = gtk.Notebook() self.notebook = notebook self.pack_start(notebook, True, True, 5) notebook.set_tab_pos(gtk.PositionType.LEFT) notebook.set_can_focus(True) ## tab1_about = self.newTabLabelWidget(about) tab2_authors = self.newTabWidgetTextView(authors) tab3_license = self.newTabWidgetTextView(license_text) ## tabs = [ (tab1_about, self.newTabTitle("About", "dialog-information-22.png")), (tab2_authors, self.newTabTitle("Authors", "author-22.png")), (tab3_license, self.newTabTitle("License", "license-22.png")), ] ## for widget, titleW in tabs: notebook.append_page(widget, titleW) ## self.show_all() # Something does not work with TextView @staticmethod def newTabWidgetTextView( text: str, wrap: bool = False, justification: gtk.Justification | None = None, ) -> gtk.ScrolledWindow: tv = gtk.TextView() tv.set_editable(False) tv.set_can_focus(False) if wrap: tv.set_wrap_mode(gtk.WrapMode.WORD) if justification is not None: tv.set_justification(justification) tv.set_cursor_visible(False) tv.set_border_width(10) buf = tv.get_buffer() # buf.insert_markup(buf.get_end_iter(), markup=text, # len=len(text.encode("utf-8"))) buf.set_text(text) tv.show_all() swin = gtk.ScrolledWindow() swin.set_policy(gtk.PolicyType.AUTOMATIC, gtk.PolicyType.AUTOMATIC) swin.set_border_width(0) swin.add(tv) return swin @staticmethod def newTabLabelWidget( text: str, # wrap: bool = False, # justification: "gtk.Justification | None" = None, ) -> gtk.ScrolledWindow: box = VBox() box.set_border_width(10) label = gtk.Label() label.set_selectable(True) label.set_xalign(0) label.set_yalign(0) pack(box, label, 0, 0) # if wrap: # tv.set_wrap_mode(gtk.WrapMode.WORD) # if justification is not None: # tv.set_justification(justification) label.set_can_focus(False) # label.set_border_width(10) label.set_markup(text) label.show_all() swin = gtk.ScrolledWindow() swin.set_policy(gtk.PolicyType.AUTOMATIC, gtk.PolicyType.AUTOMATIC) swin.set_border_width(0) swin.add(box) return swin @staticmethod def newTabTitle(title: str, icon: str) -> gtk.Box: box = gtk.Box(orientation=gtk.Orientation.VERTICAL) if icon: box.pack_start(imageFromFile(icon), False, False, 5) if title: box.pack_start(gtk.Label(label=title), False, False, 5) box.show_all() return box pyglossary-5.0.9/pyglossary/ui/gtk3_utils/dialog.py000066400000000000000000000026511476751035500225140ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2016-2017 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from collections.abc import Callable from gi.repository import Gdk as gdk from gi.repository import Gtk as gtk __all__ = ["MyDialog"] class MyDialog: def startWaiting(self) -> None: self.queue_draw() self.vbox.set_sensitive(False) self.get_window().set_cursor(gdk.Cursor.new(gdk.CursorType.WATCH)) while gtk.events_pending(): gtk.main_iteration_do(False) def endWaiting(self) -> None: self.get_window().set_cursor(gdk.Cursor.new(gdk.CursorType.LEFT_PTR)) self.vbox.set_sensitive(True) def waitingDo(self, func: Callable, *args, **kwargs) -> None: # noqa: ANN002 self.startWaiting() try: func(*args, **kwargs) except Exception as e: raise e finally: self.endWaiting() pyglossary-5.0.9/pyglossary/ui/gtk3_utils/resize_button.py000066400000000000000000000026101476751035500241440ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2016-2017 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from typing import Any from . import gdk, gtk from .utils import imageFromFile __all__ = ["ResizeButton"] class ResizeButton(gtk.EventBox): def __init__( self, win: gtk.Window, edge: gdk.WindowEdge = gdk.WindowEdge.SOUTH_EAST, ) -> None: gtk.EventBox.__init__(self) self.win = win self.edge = edge ### self.image = imageFromFile("resize.png") self.add(self.image) self.connect("button-press-event", self.buttonPress) def buttonPress(self, _obj: Any, gevent: gdk.ButtonEvent) -> None: self.win.begin_resize_drag( self.edge, gevent.button, int(gevent.x_root), int(gevent.y_root), gevent.time, ) pyglossary-5.0.9/pyglossary/ui/gtk3_utils/utils.py000066400000000000000000000113521476751035500224130ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2016-2019 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import logging from os.path import isabs, join from typing import TYPE_CHECKING from gi.repository import Pango as pango from pyglossary.core import appResDir from . import gdk, gtk if TYPE_CHECKING: from collections.abc import Callable __all__ = [ "HBox", "VBox", "dialog_add_button", "imageFromFile", "pack", "rgba_parse", "showInfo", ] log = logging.getLogger("pyglossary") def VBox(**kwargs) -> gtk.Box: return gtk.Box(orientation=gtk.Orientation.VERTICAL, **kwargs) def HBox(**kwargs) -> gtk.Box: return gtk.Box(orientation=gtk.Orientation.HORIZONTAL, **kwargs) def imageFromFile(path: str) -> gtk.Image: # the file must exist if not isabs(path): path = join(appResDir, path) im = gtk.Image() try: im.set_from_file(path) except Exception: log.exception("") return im def imageFromIconName(iconName: str, size: int, nonStock: bool = False) -> gtk.Image: # So gtk.Image.new_from_stock is deprecated # And the doc says we should use gtk.Image.new_from_icon_name # which does NOT have the same functionality! # because not all stock items are existing in all themes (even popular themes) # and new_from_icon_name does not seem to look in other (non-default) themes! # So for now we use new_from_stock, unless it's not a stock item # But we do not use either of these two outside this function # So that it's easy to switch if nonStock: return gtk.Image.new_from_icon_name(iconName, size) try: return gtk.Image.new_from_stock(iconName, size) except Exception: return gtk.Image.new_from_icon_name(iconName, size) def rgba_parse(colorStr: str) -> gdk.RGBA: rgba = gdk.RGBA() if not rgba.parse(colorStr): raise ValueError(f"bad color string {colorStr!r}") return rgba def pack( box: gtk.Box | gtk.CellLayout, child: gtk.Widget | gtk.CellRenderer, expand: bool = False, fill: bool = False, padding: int = 0, ) -> None: if isinstance(box, gtk.Box): box.pack_start(child, expand, fill, padding) elif isinstance(box, gtk.CellLayout): box.pack_start(child, expand) else: raise TypeError(f"pack: unknown type {type(box)}") def dialog_add_button( dialog: gtk.Dialog, _iconName: str, # TODO: remove label: str, resId: int, onClicked: Callable | None = None, tooltip: str = "", ) -> None: b = dialog.add_button(label, resId) if onClicked: b.connect("clicked", onClicked) if tooltip: b.set_tooltip_text(tooltip) def showMsg( # noqa: PLR0913 msg: str, iconName: str = "", parent: gtk.Widget | None = None, transient_for: gtk.Widget | None = None, title: str = "", borderWidth: int = 10, iconSize: gtk.IconSize = gtk.IconSize.DIALOG, selectable: bool = False, ) -> None: win = gtk.Dialog( parent=parent, transient_for=transient_for, ) # flags=0 makes it skip task bar if title: win.set_title(title) hbox = HBox(spacing=10) hbox.set_border_width(borderWidth) if iconName: # win.set_icon(...) pack(hbox, imageFromIconName(iconName, iconSize)) label = gtk.Label(label=msg) # set_line_wrap(True) makes the window go crazy tall (taller than screen) # and that's the reason for label.set_size_request and win.resize label.set_line_wrap(True) label.set_line_wrap_mode(pango.WrapMode.WORD) label.set_size_request(500, 1) if selectable: label.set_selectable(True) pack(hbox, label) hbox.show_all() pack(win.vbox, hbox) dialog_add_button( win, "gtk-close", "_Close", gtk.ResponseType.OK, ) win.resize(600, 1) win.run() win.destroy() def showError(msg, **kwargs) -> None: # noqa: ANN001 # gtk-dialog-error is deprecated since version 3.10: # Use named icon “dialog-error”. showMsg(msg, iconName="gtk-dialog-error", **kwargs) def showWarning(msg, **kwargs) -> None: # noqa: ANN001 # gtk-dialog-warning is deprecated since version 3.10: # Use named icon “dialog-warning”. showMsg(msg, iconName="gtk-dialog-warning", **kwargs) def showInfo(msg, **kwargs) -> None: # noqa: ANN001 # gtk-dialog-info is deprecated since version 3.10: # Use named icon “dialog-information”. showMsg(msg, iconName="gtk-dialog-info", **kwargs) pyglossary-5.0.9/pyglossary/ui/main.py000066400000000000000000000167721476751035500201220ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # ui/main.py # # Copyright © 2008-2022 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import argparse import logging import sys from dataclasses import dataclass from typing import Any, cast from pyglossary import core, logger # essential from pyglossary.langs import langDict from pyglossary.ui.argparse_main import configFromArgs, defineFlags, validateFlags from pyglossary.ui.base import UIBase __all__ = ["main", "mainNoExit"] # TODO: move to docs: # examples for read and write options: # --read-options testOption=stringValue # --read-options enableFoo=True # --read-options fooList=[1,2,3] # --read-options 'fooList=[1, 2, 3]' # --read-options 'testOption=stringValue; enableFoo=True; fooList=[1, 2, 3]' # --read-options 'testOption=stringValue;enableFoo=True;fooList=[1,2,3]' # if a desired value contains ";", you can use --json-read-options # or --json-write-options flags instead, with json object as value, # quoted for command line. for example: # '--json-write-options={"delimiter": ";"}' # the first thing to do is to set up logger. # other modules also using logger "root", so it is essential to set it up prior # to importing anything else; with exception to pyglossary.core which sets up # logger class, and so should be done before actually initializing logger. # verbosity level may be given on command line, so we have to parse arguments # before setting up logger. # once more: # - import system modules like os, sys, argparse etc and pyglossary.core # - parse args # - set up logger # - import submodules # - other code # no-progress-bar only for command line UI # TODO: load ui-dependent available options from ui modules # (for example ui_cmd.available_options) # the only problem is that it has to "import gtk" before it get the # "ui_gtk.available_options" # TODO # -v (verbose or version?) # -r (reverse or read-options) log: logger.Logger | None = None def validateLangStr(st: str) -> str | None: lang = langDict[st] if lang: return lang.name lang = langDict[st.lower()] if lang: return lang.name assert log log.error(f"unknown language {st!r}") return None convertOptionsKeys = ( "direct", "sort", "sortKeyName", "sortEncoding", "sqlite", ) infoOverrideSpec = ( ("sourceLang", validateLangStr), ("targetLang", validateLangStr), ("name", str), ) @dataclass(slots=True, frozen=True) class MainPrepareResult: args: argparse.Namespace uiType: str inputFilename: str outputFilename: str inputFormat: str | None outputFormat: str | None reverse: bool config: dict readOptions: dict[str, Any] writeOptions: dict[str, Any] convertOptions: dict[str, Any] def getConvertOptions(args: argparse.Namespace) -> dict[str, Any]: convertOptions: dict[str, Any] = {} for key in convertOptionsKeys: value = getattr(args, key, None) if value is not None: convertOptions[key] = value infoOverride: dict[str, str] = {} for key, validate in infoOverrideSpec: value = getattr(args, key, None) if value is None: continue value = validate(value) if value is None: continue infoOverride[key] = value if infoOverride: convertOptions["infoOverride"] = infoOverride return convertOptions # PLR0911 Too many return statements (7 > 6) def mainPrepare(argv: list[str]) -> tuple[bool, MainPrepareResult | None]: # noqa: PLR0911 global log uiBase = UIBase() uiBase.loadConfig() config = uiBase.config parser = argparse.ArgumentParser( prog=argv[0], add_help=False, # allow_abbrev=False, ) defineFlags(parser, config) # _______________________________ args = parser.parse_args(argv[1:]) # parser.conflict_handler == "error" if args.version: from pyglossary.ui.version import getVersion print(f"PyGlossary {getVersion()}") return True, None log = cast("logger.Logger", logging.getLogger("pyglossary")) if args.ui_type == "none": args.noColor = True core.noColor = args.noColor logHandler = logger.StdLogHandler( noColor=args.noColor, ) log.setVerbosity(args.verbosity) log.addHandler(logHandler) # with the logger set up, we can import other pyglossary modules, so they # can do some logging in right way. if not validateFlags(args, log): return False, None if args.sqlite: # args.direct is None by default which means automatic args.direct = False core.checkCreateConfDir() if sys.getdefaultencoding() != "utf-8": log.warning(f"System encoding is not utf-8, it's {sys.getdefaultencoding()!r}") ############################## from pyglossary.glossary_v2 import Glossary from pyglossary.ui.ui_cmd import printHelp Glossary.init() if core.isDebug(): log.debug(f"en -> {langDict['en']!r}") ############################## # log.info(f"PyGlossary {core.VERSION}") if args.help: printHelp() return True, None from pyglossary.ui.option_ui import ( evaluateReadOptions, evaluateWriteOptions, parseReadWriteOptions, ) # only used in ui_cmd for now rwOpts, err = parseReadWriteOptions(args) if err: log.error(err) if rwOpts is None: return False, None readOptions, writeOptions = rwOpts config.update(configFromArgs(args, log)) logHandler.config = config convertOptions = getConvertOptions(args) if args.inputFilename and readOptions: readOptions, err = evaluateReadOptions( readOptions, args.inputFilename, args.inputFormat, ) if err: log.error(err) if readOptions is None: return False, None if args.outputFilename and writeOptions: writeOptions, err = evaluateWriteOptions( writeOptions, args.inputFilename, args.outputFilename, args.outputFormat, ) if err: log.error(err) if writeOptions is None: return False, None if convertOptions: log.debug(f"{convertOptions = }") return True, MainPrepareResult( args=args, uiType=args.ui_type, inputFilename=args.inputFilename, outputFilename=args.outputFilename, inputFormat=args.inputFormat, outputFormat=args.outputFormat, reverse=args.reverse, config=config, readOptions=readOptions, writeOptions=writeOptions, convertOptions=convertOptions, ) def mainNoExit(argv: list[str]) -> bool: # noqa: PLR0912 ok, res = mainPrepare(argv) if not ok: return False if res is None: # --version or --help return True from pyglossary.ui.runner import getRunner assert log run = getRunner(res.args, res.uiType, log) if run is None: return False try: return run( inputFilename=res.inputFilename, outputFilename=res.outputFilename, inputFormat=res.inputFormat, outputFormat=res.outputFormat, reverse=res.reverse, config=res.config, readOptions=res.readOptions, writeOptions=res.writeOptions, convertOptions=res.convertOptions, glossarySetAttrs=None, ) except KeyboardInterrupt: log.error("Cancelled") return False def main() -> None: sys.exit(int(not mainNoExit(sys.argv))) pyglossary-5.0.9/pyglossary/ui/option_ui.py000066400000000000000000000100301476751035500211600ustar00rootroot00000000000000from __future__ import annotations import json from typing import TYPE_CHECKING, Any from pyglossary.ui.argparse_utils import StoreConstAction if TYPE_CHECKING: import argparse from pyglossary.option import Option def registerConfigOption( parser: argparse.ArgumentParser, key: str, option: Option, ) -> None: if not option.hasFlag: return flag = option.customFlag if not flag: flag = key.replace("_", "-") if option.typ != "bool": parser.add_argument( f"--{flag}", dest=key, default=None, help=option.comment, ) return if not option.comment: print(f"registerConfigOption: option has no comment: {option}") return if not option.falseComment: parser.add_argument( f"--{flag}", dest=key, action="store_true", default=None, help=option.comment, ) return parser.add_argument( dest=key, action=StoreConstAction( f"--{flag}", same_dest=f"--no-{flag}", const_value=True, dest=key, default=None, help=option.comment, ), ) parser.add_argument( dest=key, action=StoreConstAction( f"--no-{flag}", same_dest=f"--{flag}", const_value=False, dest=key, default=None, help=option.falseComment, ), ) def evaluateReadOptions( options: dict[str, Any], inputFilename: str, inputFormat: str | None, ) -> tuple[dict[str, Any] | None, str | None]: from pyglossary.glossary_v2 import Glossary inputArgs = Glossary.detectInputFormat( inputFilename, formatName=inputFormat, ) if not inputArgs: return None, f"Could not detect format for input file {inputFilename}" inputFormat = inputArgs.formatName optionsProp = Glossary.plugins[inputFormat].optionsProp for optName, optValue in options.items(): if optName not in Glossary.formatsReadOptions[inputFormat]: return None, f"Invalid option name {optName} for format {inputFormat}" prop = optionsProp[optName] optValueNew, ok = prop.evaluate(optValue) if not ok or not prop.validate(optValueNew): return ( None, f"Invalid option value {optName}={optValue!r} for format {inputFormat}", ) options[optName] = optValueNew return options, None def evaluateWriteOptions( options: dict[str, Any], inputFilename: str, outputFilename: str, outputFormat: str | None, ) -> tuple[dict[str, Any] | None, str | None]: from pyglossary.glossary_v2 import Glossary outputArgs = Glossary.detectOutputFormat( filename=outputFilename, formatName=outputFormat, inputFilename=inputFilename, ) if outputArgs is None: return None, "failed to detect output format" outputFormat = outputArgs.formatName optionsProp = Glossary.plugins[outputFormat].optionsProp for optName, optValue in options.items(): if optName not in Glossary.formatsWriteOptions[outputFormat]: return None, f"Invalid option name {optName} for format {outputFormat}" prop = optionsProp[optName] optValueNew, ok = prop.evaluate(optValue) if not ok or not prop.validate(optValueNew): return ( None, f"Invalid option value {optName}={optValue!r} " f"for format {outputFormat}", ) options[optName] = optValueNew return options, None def parseReadWriteOptions( args: argparse.Namespace, ) -> tuple[tuple[dict[str, Any], dict[str, Any]] | None, str | None]: from pyglossary.ui.ui_cmd import parseFormatOptionsStr readOptions = parseFormatOptionsStr(args.readOptions) if readOptions is None: return None, "" if args.jsonReadOptions: newReadOptions = json.loads(args.jsonReadOptions) if not isinstance(newReadOptions, dict): return None, ( "invalid value for --json-read-options, " f"must be an object/dict, not {type(newReadOptions)}" ) readOptions.update(newReadOptions) writeOptions = parseFormatOptionsStr(args.writeOptions) if writeOptions is None: return None, "" if args.jsonWriteOptions: newWriteOptions = json.loads(args.jsonWriteOptions) if not isinstance(newWriteOptions, dict): return None, ( "invalid value for --json-write-options, " f"must be an object/dict, not {type(newWriteOptions)}" ) writeOptions.update(newWriteOptions) return (readOptions, writeOptions), None pyglossary-5.0.9/pyglossary/ui/pbar_legacy.py000066400000000000000000000006561476751035500214400ustar00rootroot00000000000000# mypy: ignore-errors from . import progressbar as pb __all__ = ["createProgressBar"] def createProgressBar(title: str) -> pb.ProgressBar: rot = pb.RotatingMarker() pbar = pb.ProgressBar( maxval=1.0, # update_step=0.5, removed ) pbar.widgets = [ title + " ", pb.AnimatedMarker(), " ", pb.Bar(marker="█"), pb.Percentage(), " ", pb.ETA(), ] pbar.start(num_intervals=1000) rot.pbar = pbar return pbar pyglossary-5.0.9/pyglossary/ui/pbar_tqdm.py000066400000000000000000000026571476751035500211440ustar00rootroot00000000000000# mypy: ignore-errors from __future__ import annotations from typing import TYPE_CHECKING, Any from tqdm import tqdm if TYPE_CHECKING: from collections.abc import MutableMapping __all__ = ["createProgressBar"] def createProgressBar(title: str) -> MyTqdm: return MyTqdm( total=1.0, desc=title, ) class MyTqdm(tqdm): @property def format_dict(self) -> MutableMapping[str, Any]: d = super().format_dict # return dict( # n=self.n, total=self.total, # elapsed=self._time() - self.start_t # if hasattr(self, 'start_t') else 0, # ncols=ncols, nrows=nrows, # prefix=self.desc, ascii=self.ascii, unit=self.unit, # unit_scale=self.unit_scale, # rate=1 / self.avg_time if self.avg_time else None, # bar_format=self.bar_format, postfix=self.postfix, # unit_divisor=self.unit_divisor, initial=self.initial, # colour=self.colour, # ) d["bar_format"] = ( "{desc}: %{percentage:04.1f} |" "{bar}|[{elapsed}<{remaining}" ", {rate_fmt}{postfix}]" ) # Possible vars: # l_bar, bar, r_bar, n, n_fmt, total, total_fmt, # percentage, elapsed, elapsed_s, ncols, nrows, desc, unit, # rate, rate_fmt, rate_noinv, rate_noinv_fmt, # rate_inv, rate_inv_fmt, postfix, unit_divisor, # remaining, remaining_s. return d def update(self, ratio: float) -> None: tqdm.update(self, ratio - self.n) def finish(self) -> None: self.close() @property def term_width(self) -> int: return self.ncols pyglossary-5.0.9/pyglossary/ui/progressbar/000077500000000000000000000000001476751035500211405ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/ui/progressbar/__init__.py000066400000000000000000000035511476751035500232550ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # progressbar - Text progress bar library for Python. # Copyright (c) 2005 Nilton Volpato # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """ Text progress bar library for Python. A text progress bar is typically used to display the progress of a long running operation, providing a visual cue that processing is underway. The ProgressBar class manages the current progress, and the format of the line is given by a number of widgets. A widget is an object that may display differently depending on the state of the progress bar. There are three types of widgets: - a string, which always shows itself - a ProgressBarWidget, which may return a different value every time its update method is called - a ProgressBarWidgetHFill, which is like ProgressBarWidget, except it expands to fill the remaining width of the line. The progressbar module is very easy to use, yet very powerful. It will also automatically enable features like auto-resizing when the system supports it. """ __author__ = 'Nilton Volpato' __author_email__ = 'nilton.volpato@gmail.com' __date__ = '2011-05-14' __version__ = '2.5' from .progressbar import * # noqa: F403 from .widgets import * # noqa: F403 pyglossary-5.0.9/pyglossary/ui/progressbar/progressbar.py000066400000000000000000000231721476751035500240500ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # progressbar - Text progress bar library for Python. # Copyright (c) 2023 Saeed Rasooli # Copyright (c) 2005 Nilton Volpato # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import annotations """Main ProgressBar class.""" import math import os import signal import sys import time try: import termios from array import array from fcntl import ioctl except ImportError: pass from . import widgets class ProgressBar: """ The ProgressBar class which updates and prints the bar. A common way of using it is like: >>> pbar = ProgressBar().start() >>> for i in range(100): ... # do something ... pbar.update(i+1) ... >>> pbar.finish() You can also use a ProgressBar as an iterator: >>> progress = ProgressBar() >>> for i in progress(some_iterable): ... # do something ... Since the progress bar is incredibly customizable you can specify different widgets of any type in any order. You can even write your own widgets! However, since there are already a good number of widgets you should probably play around with them before moving on to create your own widgets. The term_width parameter represents the current terminal width. If the parameter is set to an integer then the progress bar will use that, otherwise it will attempt to determine the terminal width falling back to 80 columns if the width cannot be determined. When implementing a widget's update method you are passed a reference to the current progress bar. As a result, you have access to the ProgressBar's methods and attributes. Although there is nothing preventing you from changing the ProgressBar you should treat it as read only. Useful methods and attributes include (Public API): - currval: current progress (0 <= currval <= maxval) - maxval: maximum (and final) value - finished: True if the bar has finished (reached 100%) - start_time: the time when start() method of ProgressBar was called - seconds_elapsed: seconds elapsed since start_time and last call to update - percentage(): progress in percent [0..100] """ __slots__ = ( "__iterable", "_time_sensitive", "currval", "fd", "finished", "last_update_time", "left_justify", "maxval", "next_update", "num_intervals", "poll", "seconds_elapsed", "signal_set", "start_time", "term_width", "update_interval", "widgets", ) _DEFAULT_MAXVAL = 100 _DEFAULT_TERMSIZE = 80 _DEFAULT_WIDGETS = [widgets.Percentage(), " ", widgets.Bar()] def __init__( self, maxval=None, widgets=None, term_width: int | None = None, poll=1, left_justify=True, fd=None, ) -> None: """Initializes a progress bar with sane defaults.""" # Don't share a reference with any other progress bars if widgets is None: widgets = self._DEFAULT_WIDGETS.copy() self.maxval = maxval self.widgets = widgets self.fd = fd if fd is not None else sys.stderr self.left_justify = left_justify self.signal_set = False if term_width is not None: self.term_width = term_width else: try: self._handle_resize() signal.signal(signal.SIGWINCH, self._handle_resize) self.signal_set = True except (SystemExit, KeyboardInterrupt): raise except: # noqa: E722 self.term_width = self._env_size() self.__iterable = None self._update_widgets() self.currval = 0 self.finished = False self.last_update_time = None self.poll = poll self.seconds_elapsed = 0 self.start_time = None self.update_interval = 1 self.next_update = 0 def __call__(self, iterable): """Use a ProgressBar to iterate through an iterable.""" try: self.maxval = len(iterable) except TypeError: if self.maxval is None: self.maxval = widgets.UnknownLength self.__iterable = iter(iterable) return self def __iter__(self): return self def __next__(self): try: value = next(self.__iterable) if self.start_time is None: self.start() else: self.update(self.currval + 1) return value except StopIteration: if self.start_time is None: self.start() self.finish() raise def _env_size(self): """Tries to find the term_width from the environment.""" return int(os.environ.get("COLUMNS", self._DEFAULT_TERMSIZE)) - 1 def _handle_resize(self, signum=None, frame=None): """Tries to catch resize signals sent from the terminal.""" h, w = array("h", ioctl(self.fd, termios.TIOCGWINSZ, "\0" * 8))[:2] self.term_width = w def percentage(self): """Returns the progress as a percentage.""" if self.maxval is widgets.UnknownLength: return float("NaN") if self.currval >= self.maxval: return 100.0 return (self.currval * 100.0 / self.maxval) if self.maxval else 100.00 percent = property(percentage) def _format_widgets(self): result = [] expanding = [] width = self.term_width for index, widget in enumerate(self.widgets): if isinstance(widget, widgets.WidgetHFill): result.append(widget) expanding.insert(0, index) else: widget = widgets.format_updatable(widget, self) result.append(widget) width -= len(widget) count = len(expanding) while count: portion = max(int(math.ceil(width * 1. / count)), 0) index = expanding.pop() count -= 1 widget = result[index].update(self, portion) width -= len(widget) result[index] = widget return result def _format_line(self): """Joins the widgets and justifies the line.""" widgets = "".join(self._format_widgets()) if self.left_justify: return widgets.ljust(self.term_width) return widgets.rjust(self.term_width) def _need_update(self): """Returns whether the ProgressBar should redraw the line.""" if self.currval >= self.next_update or self.finished: return True return self._time_sensitive and time.perf_counter() - self.last_update_time > self.poll def _update_widgets(self): """Checks all widgets for the time sensitive bit.""" self._time_sensitive = any( getattr(w, "TIME_SENSITIVE", False) for w in self.widgets ) def update(self, value=None): """Updates the ProgressBar to a new value.""" if value is not None and value is not widgets.UnknownLength: if ( self.maxval is not widgets.UnknownLength and not 0 <= value <= self.maxval ): raise ValueError("Value out of range") self.currval = value if not self._need_update(): return if self.start_time is None: raise RuntimeError('You must call "start" before calling "update"') now = time.perf_counter() self.seconds_elapsed = now - self.start_time self.next_update = self.currval + self.update_interval self.fd.write(self._format_line() + "\r") self.fd.flush() self.last_update_time = now def start(self, num_intervals=0): """ Starts measuring time, and prints the bar at 0%. It returns self so you can use it like this: >>> pbar = ProgressBar().start() >>> for i in range(100): ... # do something ... pbar.update(i+1) ... >>> pbar.finish() """ if self.maxval is None: self.maxval = self._DEFAULT_MAXVAL if num_intervals > 0: self.num_intervals = num_intervals else: self.num_intervals = max(100, self.term_width) self.next_update = 0 if self.maxval is not widgets.UnknownLength: if self.maxval < 0: raise ValueError("Value out of range") self.update_interval = self.maxval / self.num_intervals self.start_time = self.last_update_time = time.perf_counter() self.update(0) return self def finish(self): """Puts the ProgressBar bar in the finished state.""" if self.finished: return self.finished = True self.update(self.maxval) self.fd.write("\n") if self.signal_set: signal.signal(signal.SIGWINCH, signal.SIG_DFL) pyglossary-5.0.9/pyglossary/ui/progressbar/widgets.py000066400000000000000000000266731476751035500231760ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # progressbar - Text progress bar library for Python. # Copyright (c) 2005 Nilton Volpato # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """Default ProgressBar widgets.""" from __future__ import annotations from __future__ import division import datetime import math try: from abc import ABCMeta, abstractmethod except ImportError: AbstractWidget = object def abstractmethod(fn): return fn else: AbstractWidget = ABCMeta('AbstractWidget', (object,), {}) class UnknownLength: pass def format_updatable(updatable, pbar): if hasattr(updatable, 'update'): return updatable.update(pbar) return updatable class Widget(AbstractWidget): """ The base class for all widgets. The ProgressBar will call the widget's update value when the widget should be updated. The widget's size may change between calls, but the widget may display incorrectly if the size changes drastically and repeatedly. The boolean TIME_SENSITIVE informs the ProgressBar that it should be updated more often because it is time sensitive. """ TIME_SENSITIVE = False __slots__ = () @abstractmethod def update(self, pbar): """ Updates the widget. pbar - a reference to the calling ProgressBar """ class WidgetHFill(Widget): """ The base class for all variable width widgets. This widget is much like the \\hfill command in TeX, it will expand to fill the line. You can use more than one in the same line, and they will all have the same width, and together will fill the line. """ @abstractmethod def update(self, pbar, width): """ Updates the widget providing the total width the widget must fill. pbar - a reference to the calling ProgressBar width - The total width the widget must fill """ class Timer(Widget): """Widget which displays the elapsed seconds.""" __slots__ = ('format_string',) TIME_SENSITIVE = True def __init__(self, format='Elapsed Time: %s') -> None: self.format_string = format @staticmethod def format_time(seconds): """Formats time as the string "HH:MM:SS".""" return str(datetime.timedelta(seconds=int(seconds))) def update(self, pbar): """Updates the widget to show the elapsed time.""" return self.format_string % self.format_time(pbar.seconds_elapsed) class ETA(Timer): """Widget which attempts to estimate the time of arrival.""" TIME_SENSITIVE = True def update(self, pbar): """Updates the widget to show the ETA or total time when finished.""" if pbar.maxval is UnknownLength or pbar.currval == 0: return 'ETA: --:--:--' if pbar.finished: return f'Time: {self.format_time(pbar.seconds_elapsed)}' elapsed = pbar.seconds_elapsed eta = elapsed * pbar.maxval / pbar.currval - elapsed return f'ETA: {self.format_time(eta)}' class AdaptiveETA(Timer): """ Widget which attempts to estimate the time of arrival. Uses a weighted average of two estimates: 1) ETA based on the total progress and time elapsed so far 2) ETA based on the progress as per the last 10 update reports The weight depends on the current progress so that to begin with the total progress is used and at the end only the most recent progress is used. """ TIME_SENSITIVE = True NUM_SAMPLES = 10 def _update_samples(self, currval, elapsed): sample = (currval, elapsed) if not hasattr(self, 'samples'): self.samples = [sample] * (self.NUM_SAMPLES + 1) else: self.samples.append(sample) return self.samples.pop(0) def _eta(self, maxval, currval, elapsed): return elapsed * maxval / float(currval) - elapsed def update(self, pbar): """Updates the widget to show the ETA or total time when finished.""" if pbar.maxval is UnknownLength or pbar.currval == 0: return 'ETA: --:--:--' if pbar.finished: return f'Time: {self.format_time(pbar.seconds_elapsed)}' elapsed = pbar.seconds_elapsed currval1, elapsed1 = self._update_samples(pbar.currval, elapsed) eta = self._eta(pbar.maxval, pbar.currval, elapsed) if pbar.currval > currval1: etasamp = self._eta(pbar.maxval - currval1, pbar.currval - currval1, elapsed - elapsed1) weight = (pbar.currval / float(pbar.maxval)) ** 0.5 eta = (1 - weight) * eta + weight * etasamp return f'ETA: {self.format_time(eta)}' class FileTransferSpeed(Widget): """Widget for showing the transfer speed (useful for file transfers).""" FMT = '%6.2f %s%s/s' PREFIXES = ' kMGTPEZY' __slots__ = ('unit',) def __init__(self, unit='B') -> None: self.unit = unit def update(self, pbar): """Updates the widget with the current SI prefixed speed.""" if pbar.seconds_elapsed < 2e-6 or pbar.currval < 2e-6: # =~ 0 scaled = power = 0 else: speed = pbar.currval / pbar.seconds_elapsed power = int(math.log(speed, 1000)) scaled = speed / 1000.**power return self.FMT % (scaled, self.PREFIXES[power], self.unit) class AnimatedMarker(Widget): """ An animated marker for the progress bar which defaults to appear as if it were rotating. """ __slots__ = ('markers', 'curmark') def __init__(self, markers='|/-\\') -> None: self.markers = markers self.curmark = -1 def update(self, pbar): """ Updates the widget to show the next marker or the first marker when finished. """ if pbar.finished: return self.markers[0] self.curmark = (self.curmark + 1) % len(self.markers) return self.markers[self.curmark] # Alias for backwards compatibility RotatingMarker = AnimatedMarker class Counter(Widget): """Displays the current count.""" __slots__ = ('format_string',) def __init__(self, format='%d') -> None: self.format_string = format def update(self, pbar): return self.format_string % pbar.currval class Percentage(Widget): """Displays the current percentage as a number with a percent sign.""" def __init__(self, prefix="%") -> None: Widget.__init__(self) self.prefix = prefix def update(self, pbar): return f"{self.prefix}{pbar.percentage():.1f}"\ .rjust(5 + len(self.prefix)) class FormatLabel(Timer): """Displays a formatted label.""" mapping = { 'elapsed': ('seconds_elapsed', Timer.format_time), 'finished': ('finished', None), 'last_update': ('last_update_time', None), 'max': ('maxval', None), 'seconds': ('seconds_elapsed', None), 'start': ('start_time', None), 'value': ('currval', None), } __slots__ = ('format_string',) def __init__(self, format) -> None: self.format_string = format def update(self, pbar): context = {} for name, (key, transform) in self.mapping.items(): try: value = getattr(pbar, key) if transform is None: context[name] = value else: context[name] = transform(value) except: # noqa: E722 pass # noqa: S110 return self.format_string % context class SimpleProgress(Widget): """Returns progress as a count of the total (e.g.: "5 of 47").""" __slots__ = ('sep',) def __init__(self, sep=' of ') -> None: self.sep = sep def update(self, pbar): if pbar.maxval is UnknownLength: return '%d%s?' % (pbar.currval, self.sep) return '%d%s%s' % (pbar.currval, self.sep, pbar.maxval) class Bar(WidgetHFill): """A progress bar which stretches to fill the line.""" __slots__ = ('marker', 'left', 'right', 'fill', 'fill_left') def __init__(self, marker='#', left='|', right='|', fill=' ', fill_left=True) -> None: """ Creates a customizable progress bar. marker - string or updatable object to use as a marker left - string or updatable object to use as a left border right - string or updatable object to use as a right border fill - character to use for the empty part of the progress bar fill_left - whether to fill from the left or the right """ self.marker = marker self.left = left self.right = right self.fill = fill self.fill_left = fill_left def update(self, pbar, width): """Updates the progress bar and its subcomponents.""" left, marked, right = ( format_updatable(i, pbar) for i in (self.left, self.marker, self.right) ) width -= len(left) + len(right) # Marked must *always* have length of 1 if pbar.maxval is not UnknownLength and pbar.maxval: marked *= int(pbar.currval / pbar.maxval * width) else: marked = '' if self.fill_left: return f'{left}{marked.ljust(width, self.fill)}{right}' return f'{left}{marked.rjust(width, self.fill)}{right}' class ReverseBar(Bar): """A bar which has a marker which bounces from side to side.""" def __init__(self, marker='#', left='|', right='|', fill=' ', fill_left=False) -> None: """ Creates a customizable progress bar. marker - string or updatable object to use as a marker left - string or updatable object to use as a left border right - string or updatable object to use as a right border fill - character to use for the empty part of the progress bar fill_left - whether to fill from the left or the right """ self.marker = marker self.left = left self.right = right self.fill = fill self.fill_left = fill_left class BouncingBar(Bar): def update(self, pbar, width): """Updates the progress bar and its subcomponents.""" left, marker, right = (format_updatable(i, pbar) for i in (self.left, self.marker, self.right)) width -= len(left) + len(right) if pbar.finished: return f'{left}{width * marker}{right}' position = int(pbar.currval % (width * 2 - 1)) if position > width: position = width * 2 - position lpad = self.fill * (position - 1) rpad = self.fill * (width - len(marker) - len(lpad)) # Swap if we want to bounce the other way if not self.fill_left: rpad, lpad = lpad, rpad return f'{left}{lpad}{marker}{rpad}{right}' pyglossary-5.0.9/pyglossary/ui/runner.py000066400000000000000000000064761476751035500205070ustar00rootroot00000000000000from __future__ import annotations import os import sys from typing import TYPE_CHECKING from pyglossary import core from pyglossary.glossary_v2 import Error from pyglossary.ui.base import UIBase if TYPE_CHECKING: import argparse import logging from collections.abc import Callable from typing import Any ui_list = ["gtk3", "gtk4", "tk", "web"] if os.sep == "\\" or core.sysName == "darwin": # windows or mac ui_list = ["tk", "gtk3", "gtk4", "web"] log: logging.Logger | None = None def canRunGUI() -> bool: if core.sysName == "linux": return bool(os.getenv("DISPLAY")) if core.sysName == "darwin": try: import tkinter # noqa: F401 except ModuleNotFoundError: return False return True def shouldUseCMD(args: argparse.Namespace) -> bool: if not canRunGUI(): return True if args.interactive: return True return bool(args.inputFilename and args.outputFilename) def base_ui_run( # noqa: PLR0913 inputFilename: str = "", outputFilename: str = "", inputFormat: str = "", outputFormat: str = "", reverse: bool = False, config: dict[str, Any] | None = None, readOptions: dict[str, Any] | None = None, writeOptions: dict[str, Any] | None = None, convertOptions: dict[str, Any] | None = None, glossarySetAttrs: dict[str, Any] | None = None, ) -> bool: from pyglossary.glossary_v2 import ConvertArgs, Glossary assert log if reverse: log.error("--reverse does not work with --ui=none") return False ui = UIBase(progressbar=False) ui.loadConfig(**config) glos = Glossary(ui=ui) glos.config = ui.config glos.progressbar = False if glossarySetAttrs: for attr, value in glossarySetAttrs.items(): setattr(glos, attr, value) try: glos.convert( ConvertArgs( inputFilename=inputFilename, outputFilename=outputFilename, inputFormat=inputFormat, outputFormat=outputFormat, readOptions=readOptions, writeOptions=writeOptions, **convertOptions, ), ) except Error as e: log.critical(str(e)) glos.cleanup() return False return True def getRunner( args: argparse.Namespace, ui_type: str, logArg: logging.Logger, ) -> Callable | None: global log log = logArg if ui_type == "none": return base_ui_run if ui_type == "auto" and shouldUseCMD(args): ui_type = "cmd" uiArgs = { "progressbar": args.progressbar is not False, } if ui_type == "cmd": if args.interactive: from pyglossary.ui.ui_cmd_interactive import UI elif args.inputFilename and args.outputFilename: from pyglossary.ui.ui_cmd import UI elif not args.no_interactive: from pyglossary.ui.ui_cmd_interactive import UI else: log.error("no input file given, try --help") return None return UI(**uiArgs).run if ui_type == "gtk": ui_type = "gtk3" if ui_type == "auto": if not args.no_interactive and sys.stdin.isatty(): ui_list.insert(3, "cmd_interactive") log.debug(f"{ui_list = }") for ui_type2 in ui_list: try: ui_module = __import__( f"pyglossary.ui.ui_{ui_type2}", fromlist=f"ui_{ui_type2}", ) except ImportError as e: # noqa: PERF203 log.error(str(e)) else: return ui_module.UI(**uiArgs).run log.error( "no user interface module found! " f'try "{sys.argv[0]} -h" to see command line usage', ) return None ui_module = __import__( f"pyglossary.ui.ui_{ui_type}", fromlist=f"ui_{ui_type}", ) return ui_module.UI(**uiArgs).run pyglossary-5.0.9/pyglossary/ui/termcolors.py000066400000000000000000001012211476751035500213470ustar00rootroot00000000000000from collections import namedtuple ColorProp = namedtuple( "ColorProp", ( "code", # int "rgb", # Tuple[float, float, float] "hsl", # Tuple[Number, Number, Number] "hex", # str "names", # List[str] ), ) colors = [ ColorProp( code=0, rgb=(0, 0, 0), hsl=(0, 0, 0), hex="#000000", names=["black", "css:black"], ), ColorProp( code=1, rgb=(170, 0, 0), hsl=(0, 1, 0.3333333333333333), hex="#aa0000", names=["dark red 1", "Heartbeat"], ), ColorProp( code=2, rgb=(0, 170, 0), hsl=(120, 1, 0.3333333333333333), hex="#00aa00", names=["mixed green 1", "Phosphor Green"], ), ColorProp( code=3, rgb=(170, 85, 0), hsl=(30, 1, 0.3333333333333333), hex="#aa5500", names=["mixed orange 1", "Tijolo"], ), ColorProp( code=4, rgb=(0, 0, 170), hsl=(240, 1, 0.3333333333333333), hex="#0000aa", names=["mixed blue 1", "Bohemian Blue"], ), ColorProp( code=5, rgb=(170, 0, 170), hsl=(300, 1, 0.3333333333333333), hex="#aa00aa", names=["purple 1", "Purple Potion"], ), ColorProp( code=6, rgb=(0, 170, 170), hsl=(180, 1, 0.3333333333333333), hex="#00aaaa", names=["cyan 1", "Jade Orchid"], ), ColorProp( code=7, rgb=(185, 185, 185), hsl=(0, 0, 0.7254901960784313), hex="#b9b9b9", names=["light gray", "Covered in Platinum"], ), ColorProp( code=8, rgb=(85, 85, 85), hsl=(0, 0, 0.3333333333333333), hex="#555555", names=["dark gray", "Stone Cold Gray"], ), ColorProp( code=9, rgb=(255, 85, 85), hsl=(0, 1, 0.6666666666666666), hex="#ff5555", names=["light red", "Fluorescent Red"], ), ColorProp( code=10, rgb=(85, 255, 85), hsl=(120, 1, 0.6666666666666666), hex="#55ff55", names=["light green", "Puyo Blob Green"], ), ColorProp( code=11, rgb=(255, 255, 85), hsl=(60, 1, 0.6666666666666666), hex="#ffff55", names=["yellow", "Pīlā Yellow"], ), ColorProp( code=12, rgb=(85, 85, 255), hsl=(240, 1, 0.6666666666666666), hex="#5555ff", names=["light blue", "Shady Neon Blue"], ), ColorProp( code=13, rgb=(255, 85, 255), hsl=(300, 1, 0.6666666666666666), hex="#ff55ff", names=["light purple", "Ultimate Pink"], ), ColorProp( code=14, rgb=(85, 255, 255), hsl=(180, 1, 0.6666666666666666), hex="#55ffff", names=["light cyan", "Electric Sheep"], ), ColorProp( code=15, rgb=(255, 255, 255), hsl=(0, 0, 1), hex="#ffffff", names=["white", "css:white"], ), ColorProp( code=16, rgb=(0, 0, 0), hsl=(0, 0, 0), hex="#000000", names=["black", "css:black"], ), ColorProp( code=17, rgb=(0, 0, 95), hsl=(240, 1, 0.18627450980392157), hex="#00005f", names=["blue 4"], ), ColorProp( code=18, rgb=(0, 0, 135), hsl=(240, 1, 0.2647058823529412), hex="#000087", names=["blue 3", "css:darkblue", "Midnight in Tokyo"], ), ColorProp( code=19, rgb=(0, 0, 175), hsl=(240, 1, 0.3431372549019608), hex="#0000af", names=["blue 2"], ), ColorProp( code=20, rgb=(0, 0, 215), hsl=(240, 1, 0.4215686274509804), hex="#0000d7", names=["blue 1", "css:mediumblue", "Bluealicious"], ), ColorProp( code=21, rgb=(0, 0, 255), hsl=(240, 1, 0.5), hex="#0000ff", names=["blue", "css:blue"], ), ColorProp( code=22, rgb=(0, 95, 0), hsl=(120, 1, 0.18627450980392157), hex="#005f00", names=["green 4", "css:darkgreen", "Cucumber", "Pakistan Green"], ), ColorProp( code=23, rgb=(0, 95, 95), hsl=(180, 1, 0.18627450980392157), hex="#005f5f", names=["blue stone"], ), ColorProp( code=24, rgb=(0, 95, 135), hsl=(197.77777777777777, 1, 0.2647058823529412), hex="#005f87", names=["orient"], ), ColorProp( code=25, rgb=(0, 95, 175), hsl=(207.42857142857144, 1, 0.3431372549019608), hex="#005faf", names=[], ), ColorProp( code=26, rgb=(0, 95, 215), hsl=(213.48837209302326, 1, 0.4215686274509804), hex="#005fd7", names=[], ), ColorProp( code=27, rgb=(0, 95, 255), hsl=(217.64705882352942, 1, 0.5), hex="#005fff", names=[], ), ColorProp( code=28, rgb=(0, 135, 0), hsl=(120, 1, 0.2647058823529412), hex="#008700", names=["green 3", "css:green", "Fine Pine"], ), ColorProp( code=29, rgb=(0, 135, 95), hsl=(162.22222222222223, 1, 0.2647058823529412), hex="#00875f", names=["Absinthe Turquoise", "Golf Green", "Chagall Green"], ), ColorProp( code=30, rgb=(0, 135, 135), hsl=(180, 1, 0.2647058823529412), hex="#008787", names=["css:darkcyan", "Green Moblin"], ), ColorProp( code=31, rgb=(0, 135, 175), hsl=(193.71428571428572, 1, 0.3431372549019608), hex="#0087af", names=["Stomy Shower"], ), ColorProp( code=32, rgb=(0, 135, 215), hsl=(202.32558139534882, 1, 0.4215686274509804), hex="#0087d7", names=[], ), ColorProp( code=33, rgb=(0, 135, 255), hsl=(208.23529411764707, 1, 0.5), hex="#0087ff", names=["Too Blue to be True"], ), ColorProp( code=34, rgb=(0, 175, 0), hsl=(120, 1, 0.3431372549019608), hex="#00af00", names=["green 2"], ), ColorProp( code=35, rgb=(0, 175, 95), hsl=(152.57142857142856, 1, 0.3431372549019608), hex="#00af5f", names=[], ), ColorProp( code=36, rgb=(0, 175, 135), hsl=(166.28571428571428, 1, 0.3431372549019608), hex="#00af87", names=[], ), ColorProp( code=37, rgb=(0, 175, 175), hsl=(180, 1, 0.3431372549019608), hex="#00afaf", names=[], ), ColorProp( code=38, rgb=(0, 175, 215), hsl=(191.1627906976744, 1, 0.4215686274509804), hex="#00afd7", names=[], ), ColorProp( code=39, rgb=(0, 175, 255), hsl=(198.8235294117647, 1, 0.5), hex="#00afff", names=[], ), ColorProp( code=40, rgb=(0, 215, 0), hsl=(120, 1, 0.4215686274509804), hex="#00d700", names=["green 1"], ), ColorProp( code=41, rgb=(0, 215, 95), hsl=(146.51162790697674, 1, 0.4215686274509804), hex="#00d75f", names=[], ), ColorProp( code=42, rgb=(0, 215, 135), hsl=(157.67441860465118, 1, 0.4215686274509804), hex="#00d787", names=[], ), ColorProp( code=43, rgb=(0, 215, 175), hsl=(168.8372093023256, 1, 0.4215686274509804), hex="#00d7af", names=[], ), ColorProp( code=44, rgb=(0, 215, 215), hsl=(180, 1, 0.4215686274509804), hex="#00d7d7", names=[], ), ColorProp( code=45, rgb=(0, 215, 255), hsl=(189.41176470588235, 1, 0.5), hex="#00d7ff", names=[], ), ColorProp( code=46, rgb=(0, 255, 0), hsl=(120, 1, 0.5), hex="#00ff00", names=["green", "css:lime"], ), ColorProp( code=47, rgb=(0, 255, 95), hsl=(142.35294117647058, 1, 0.5), hex="#00ff5f", names=[], ), ColorProp( code=48, rgb=(0, 255, 135), hsl=(151.76470588235293, 1, 0.5), hex="#00ff87", names=["css:springgreen"], ), ColorProp( code=49, rgb=(0, 255, 175), hsl=(161.1764705882353, 1, 0.5), hex="#00ffaf", names=[], ), ColorProp( code=50, rgb=(0, 255, 215), hsl=(170.58823529411765, 1, 0.5), hex="#00ffd7", names=[], ), ColorProp( code=51, rgb=(0, 255, 255), hsl=(180, 1, 0.5), hex="#00ffff", names=["cyan", "css:cyan", "css:aqua"], ), ColorProp( code=52, rgb=(95, 0, 0), hsl=(0, 1, 0.18627450980392157), hex="#5f0000", names=["red 5", "Spikey Red"], ), ColorProp( code=53, rgb=(95, 0, 95), hsl=(300, 1, 0.18627450980392157), hex="#5f005f", names=[], ), ColorProp( code=54, rgb=(95, 0, 135), hsl=(282.22222222222223, 1, 0.2647058823529412), hex="#5f0087", names=[], ), ColorProp( code=55, rgb=(95, 0, 175), hsl=(272.57142857142856, 1, 0.3431372549019608), hex="#5f00af", names=[], ), ColorProp( code=56, rgb=(95, 0, 215), hsl=(266.51162790697674, 1, 0.4215686274509804), hex="#5f00d7", names=[], ), ColorProp( code=57, rgb=(95, 0, 255), hsl=(262.3529411764706, 1, 0.5), hex="#5f00ff", names=[], ), ColorProp( code=58, rgb=(95, 95, 0), hsl=(60, 1, 0.18627450980392157), hex="#5f5f00", names=[], ), ColorProp( code=59, rgb=(95, 95, 95), hsl=(0, 0, 0.37254901960784315), hex="#5f5f5f", names=["Rhine Castle", "Shades On"], ), ColorProp( code=60, rgb=(95, 95, 135), hsl=(240, 0.17391304347826086, 0.45098039215686275), hex="#5f5f87", names=[], ), ColorProp( code=61, rgb=(95, 95, 175), hsl=(240, 0.3333333333333333, 0.5294117647058824), hex="#5f5faf", names=[], ), ColorProp( code=62, rgb=(95, 95, 215), hsl=(240, 0.6000000000000001, 0.607843137254902), hex="#5f5fd7", names=[], ), ColorProp( code=63, rgb=(95, 95, 255), hsl=(240, 1, 0.6862745098039216), hex="#5f5fff", names=[], ), ColorProp( code=64, rgb=(95, 135, 0), hsl=(77.77777777777777, 1, 0.2647058823529412), hex="#5f8700", names=[], ), ColorProp( code=65, rgb=(95, 135, 95), hsl=(120, 0.17391304347826086, 0.45098039215686275), hex="#5f875f", names=["glade green"], ), ColorProp( code=66, rgb=(95, 135, 135), hsl=(180, 0.17391304347826086, 0.45098039215686275), hex="#5f8787", names=[], ), ColorProp( code=67, rgb=(95, 135, 175), hsl=(210, 0.3333333333333333, 0.5294117647058824), hex="#5f87af", names=[], ), ColorProp( code=68, rgb=(95, 135, 215), hsl=(220, 0.6000000000000001, 0.607843137254902), hex="#5f87d7", names=[], ), ColorProp( code=69, rgb=(95, 135, 255), hsl=(225, 1, 0.6862745098039216), hex="#5f87ff", names=[], ), ColorProp( code=70, rgb=(95, 175, 0), hsl=(87.42857142857144, 1, 0.3431372549019608), hex="#5faf00", names=[], ), ColorProp( code=71, rgb=(95, 175, 95), hsl=(120, 0.3333333333333333, 0.5294117647058824), hex="#5faf5f", names=[], ), ColorProp( code=72, rgb=(95, 175, 135), hsl=(150, 0.3333333333333333, 0.5294117647058824), hex="#5faf87", names=["Verdigris Green"], ), ColorProp( code=73, rgb=(95, 175, 175), hsl=(180, 0.3333333333333333, 0.5294117647058824), hex="#5fafaf", names=[], ), ColorProp( code=74, rgb=(95, 175, 215), hsl=(200, 0.6000000000000001, 0.607843137254902), hex="#5fafd7", names=["tradewind"], ), ColorProp( code=75, rgb=(95, 175, 255), hsl=(210, 1, 0.6862745098039216), hex="#5fafff", names=[], ), ColorProp( code=76, rgb=(95, 215, 0), hsl=(93.48837209302326, 1, 0.4215686274509804), hex="#5fd700", names=[], ), ColorProp( code=77, rgb=(95, 215, 95), hsl=(120, 0.6000000000000001, 0.607843137254902), hex="#5fd75f", names=[], ), ColorProp( code=78, rgb=(95, 215, 135), hsl=(140, 0.6000000000000001, 0.607843137254902), hex="#5fd787", names=[], ), ColorProp( code=79, rgb=(95, 215, 175), hsl=(160, 0.6000000000000001, 0.607843137254902), hex="#5fd7af", names=[], ), ColorProp( code=80, rgb=(95, 215, 215), hsl=(180, 0.6000000000000001, 0.607843137254902), hex="#5fd7d7", names=[], ), ColorProp( code=81, rgb=(95, 215, 255), hsl=(195, 1, 0.6862745098039216), hex="#5fd7ff", names=[], ), ColorProp( code=82, rgb=(95, 255, 0), hsl=(97.6470588235294, 1, 0.5), hex="#5fff00", names=[], ), ColorProp( code=83, rgb=(95, 255, 95), hsl=(120, 1, 0.6862745098039216), hex="#5fff5f", names=[], ), ColorProp( code=84, rgb=(95, 255, 135), hsl=(135, 1, 0.6862745098039216), hex="#5fff87", names=[], ), ColorProp( code=85, rgb=(95, 255, 175), hsl=(150, 1, 0.6862745098039216), hex="#5fffaf", names=[], ), ColorProp( code=86, rgb=(95, 255, 215), hsl=(165, 1, 0.6862745098039216), hex="#5fffd7", names=[], ), ColorProp( code=87, rgb=(95, 255, 255), hsl=(180, 1, 0.6862745098039216), hex="#5fffff", names=[], ), ColorProp( code=88, rgb=(135, 0, 0), hsl=(0, 1, 0.2647058823529412), hex="#870000", names=["red 4", "css:darkred", "Chanticleer"], ), ColorProp( code=89, rgb=(135, 0, 95), hsl=(317.77777777777777, 1, 0.2647058823529412), hex="#87005f", names=[], ), ColorProp( code=90, rgb=(135, 0, 135), hsl=(300, 1, 0.2647058823529412), hex="#870087", names=["css:darkmagenta", "Mardi Gras"], ), ColorProp( code=91, rgb=(135, 0, 175), hsl=(286.2857142857143, 1, 0.3431372549019608), hex="#8700af", names=["Shade of Violet"], ), ColorProp( code=92, rgb=(135, 0, 215), hsl=(277.6744186046512, 1, 0.4215686274509804), hex="#8700d7", names=[], ), ColorProp( code=93, rgb=(135, 0, 255), hsl=(271.7647058823529, 1, 0.5), hex="#8700ff", names=["electric violet"], ), ColorProp( code=94, rgb=(135, 95, 0), hsl=(42.22222222222222, 1, 0.2647058823529412), hex="#875f00", names=["Rat Brown"], ), ColorProp( code=95, rgb=(135, 95, 95), hsl=(0, 0.17391304347826086, 0.45098039215686275), hex="#875f5f", names=["Rabbit Paws"], ), ColorProp( code=96, rgb=(135, 95, 135), hsl=(300, 0.17391304347826086, 0.45098039215686275), hex="#875f87", names=[], ), ColorProp( code=97, rgb=(135, 95, 175), hsl=(270, 0.3333333333333333, 0.5294117647058824), hex="#875faf", names=[], ), ColorProp( code=98, rgb=(135, 95, 215), hsl=(260, 0.6000000000000001, 0.607843137254902), hex="#875fd7", names=[], ), ColorProp( code=99, rgb=(135, 95, 255), hsl=(255, 1, 0.6862745098039216), hex="#875fff", names=[], ), ColorProp( code=100, rgb=(135, 135, 0), hsl=(60, 1, 0.2647058823529412), hex="#878700", names=["css:olive"], ), ColorProp( code=101, rgb=(135, 135, 95), hsl=(60, 0.17391304347826086, 0.45098039215686275), hex="#87875f", names=["clay creek"], ), ColorProp( code=102, rgb=(135, 135, 135), hsl=(0, 0, 0.5294117647058824), hex="#878787", names=[], ), ColorProp( code=103, rgb=(135, 135, 175), hsl=(240, 0.2, 0.607843137254902), hex="#8787af", names=[], ), ColorProp( code=104, rgb=(135, 135, 215), hsl=(240, 0.5000000000000001, 0.6862745098039216), hex="#8787d7", names=[], ), ColorProp( code=105, rgb=(135, 135, 255), hsl=(240, 1, 0.7647058823529411), hex="#8787ff", names=[], ), ColorProp( code=106, rgb=(135, 175, 0), hsl=(73.71428571428572, 1, 0.3431372549019608), hex="#87af00", names=[], ), ColorProp( code=107, rgb=(135, 175, 95), hsl=(90, 0.3333333333333333, 0.5294117647058824), hex="#87af5f", names=[], ), ColorProp( code=108, rgb=(135, 175, 135), hsl=(120, 0.2, 0.607843137254902), hex="#87af87", names=[], ), ColorProp( code=109, rgb=(135, 175, 175), hsl=(180, 0.2, 0.607843137254902), hex="#87afaf", names=[], ), ColorProp( code=110, rgb=(135, 175, 215), hsl=(210, 0.5000000000000001, 0.6862745098039216), hex="#87afd7", names=[], ), ColorProp( code=111, rgb=(135, 175, 255), hsl=(220, 1, 0.7647058823529411), hex="#87afff", names=[], ), ColorProp( code=112, rgb=(135, 215, 0), hsl=(82.32558139534883, 1, 0.4215686274509804), hex="#87d700", names=[], ), ColorProp( code=113, rgb=(135, 215, 95), hsl=(100, 0.6000000000000001, 0.607843137254902), hex="#87d75f", names=[], ), ColorProp( code=114, rgb=(135, 215, 135), hsl=(120, 0.5000000000000001, 0.6862745098039216), hex="#87d787", names=[], ), ColorProp( code=115, rgb=(135, 215, 175), hsl=(150, 0.5000000000000001, 0.6862745098039216), hex="#87d7af", names=[], ), ColorProp( code=116, rgb=(135, 215, 215), hsl=(180, 0.5000000000000001, 0.6862745098039216), hex="#87d7d7", names=["Island Oasis"], ), ColorProp( code=117, rgb=(135, 215, 255), hsl=(200, 1, 0.7647058823529411), hex="#87d7ff", names=["css:lightskyblue"], ), ColorProp( code=118, rgb=(135, 255, 0), hsl=(88.23529411764707, 1, 0.5), hex="#87ff00", names=["css:chartreuse", "Lasting Lime"], ), ColorProp( code=119, rgb=(135, 255, 95), hsl=(105, 1, 0.6862745098039216), hex="#87ff5f", names=[], ), ColorProp( code=120, rgb=(135, 255, 135), hsl=(120, 1, 0.7647058823529411), hex="#87ff87", names=[], ), ColorProp( code=121, rgb=(135, 255, 175), hsl=(140, 1, 0.7647058823529411), hex="#87ffaf", names=[], ), ColorProp( code=122, rgb=(135, 255, 215), hsl=(160, 1, 0.7647058823529411), hex="#87ffd7", names=["css:aquamarine"], ), ColorProp( code=123, rgb=(135, 255, 255), hsl=(180, 1, 0.7647058823529411), hex="#87ffff", names=["Glitter Shower"], ), ColorProp( code=124, rgb=(175, 0, 0), hsl=(0, 1, 0.3431372549019608), hex="#af0000", names=["red 3", "bright red"], ), ColorProp( code=125, rgb=(175, 0, 95), hsl=(327.42857142857144, 1, 0.3431372549019608), hex="#af005f", names=[], ), ColorProp( code=126, rgb=(175, 0, 135), hsl=(313.7142857142857, 1, 0.3431372549019608), hex="#af0087", names=[], ), ColorProp( code=127, rgb=(175, 0, 175), hsl=(300, 1, 0.3431372549019608), hex="#af00af", names=[], ), ColorProp( code=128, rgb=(175, 0, 215), hsl=(288.83720930232556, 1, 0.4215686274509804), hex="#af00d7", names=[], ), ColorProp( code=129, rgb=(175, 0, 255), hsl=(281.1764705882353, 1, 0.5), hex="#af00ff", names=[], ), ColorProp( code=130, rgb=(175, 95, 0), hsl=(32.57142857142857, 1, 0.3431372549019608), hex="#af5f00", names=["Orange Brown"], ), ColorProp( code=131, rgb=(175, 95, 95), hsl=(0, 0.3333333333333333, 0.5294117647058824), hex="#af5f5f", names=["Italian Villa", "Poppy Prose", "Sienna Red"], ), ColorProp( code=132, rgb=(175, 95, 135), hsl=(330, 0.3333333333333333, 0.5294117647058824), hex="#af5f87", names=[], ), ColorProp( code=133, rgb=(175, 95, 175), hsl=(300, 0.3333333333333333, 0.5294117647058824), hex="#af5faf", names=[], ), ColorProp( code=134, rgb=(175, 95, 215), hsl=(280, 0.6000000000000001, 0.607843137254902), hex="#af5fd7", names=[], ), ColorProp( code=135, rgb=(175, 95, 255), hsl=(270, 1, 0.6862745098039216), hex="#af5fff", names=[], ), ColorProp( code=136, rgb=(175, 135, 0), hsl=(46.285714285714285, 1, 0.3431372549019608), hex="#af8700", names=[], ), ColorProp( code=137, rgb=(175, 135, 95), hsl=(30, 0.3333333333333333, 0.5294117647058824), hex="#af875f", names=["Clay Ochre", "Roman Coin", "Light Oak Brown"], ), ColorProp( code=138, rgb=(175, 135, 135), hsl=(0, 0.2, 0.607843137254902), hex="#af8787", names=[], ), ColorProp( code=139, rgb=(175, 135, 175), hsl=(300, 0.2, 0.607843137254902), hex="#af87af", names=[], ), ColorProp( code=140, rgb=(175, 135, 215), hsl=(270, 0.5000000000000001, 0.6862745098039216), hex="#af87d7", names=[], ), ColorProp( code=141, rgb=(175, 135, 255), hsl=(260, 1, 0.7647058823529411), hex="#af87ff", names=[], ), ColorProp( code=142, rgb=(175, 175, 0), hsl=(60, 1, 0.3431372549019608), hex="#afaf00", names=[], ), ColorProp( code=143, rgb=(175, 175, 95), hsl=(60, 0.3333333333333333, 0.5294117647058824), hex="#afaf5f", names=["Palm"], ), ColorProp( code=144, rgb=(175, 175, 135), hsl=(60, 0.2, 0.607843137254902), hex="#afaf87", names=["Lively Ivy", "Wall Green"], ), ColorProp( code=145, rgb=(175, 175, 175), hsl=(0, 0, 0.6862745098039216), hex="#afafaf", names=["silver chalice", "Smoke Screen"], ), ColorProp( code=146, rgb=(175, 175, 215), hsl=(240, 0.3333333333333334, 0.7647058823529411), hex="#afafd7", names=[], ), ColorProp( code=147, rgb=(175, 175, 255), hsl=(240, 1, 0.8431372549019608), hex="#afafff", names=[], ), ColorProp( code=148, rgb=(175, 215, 0), hsl=(71.16279069767441, 1, 0.4215686274509804), hex="#afd700", names=["King Lime"], ), ColorProp( code=149, rgb=(175, 215, 95), hsl=(80.00000000000001, 0.6000000000000001, 0.607843137254902), hex="#afd75f", names=[], ), ColorProp( code=150, rgb=(175, 215, 135), hsl=(90, 0.5000000000000001, 0.6862745098039216), hex="#afd787", names=[], ), ColorProp( code=151, rgb=(175, 215, 175), hsl=(120, 0.3333333333333334, 0.7647058823529411), hex="#afd7af", names=[], ), ColorProp( code=152, rgb=(175, 215, 215), hsl=(180, 0.3333333333333334, 0.7647058823529411), hex="#afd7d7", names=["Rivers Edge"], ), ColorProp( code=153, rgb=(175, 215, 255), hsl=(210, 1, 0.8431372549019608), hex="#afd7ff", names=[], ), ColorProp( code=154, rgb=(175, 255, 0), hsl=(78.82352941176471, 1, 0.5), hex="#afff00", names=["Lime Acid"], ), ColorProp( code=155, rgb=(175, 255, 95), hsl=(90, 1, 0.6862745098039216), hex="#afff5f", names=[], ), ColorProp( code=156, rgb=(175, 255, 135), hsl=(100, 1, 0.7647058823529411), hex="#afff87", names=[], ), ColorProp( code=157, rgb=(175, 255, 175), hsl=(120, 1, 0.8431372549019608), hex="#afffaf", names=[], ), ColorProp( code=158, rgb=(175, 255, 215), hsl=(150, 1, 0.8431372549019608), hex="#afffd7", names=["Mintastic"], ), ColorProp( code=159, rgb=(175, 255, 255), hsl=(180, 1, 0.8431372549019608), hex="#afffff", names=["Celeste", "Frostbite", "Italian Sky Blue"], ), ColorProp( code=160, rgb=(215, 0, 0), hsl=(0, 1, 0.4215686274509804), hex="#d70000", names=["red 2", "Rosso Corsa", "Hot Fever"], ), ColorProp( code=161, rgb=(215, 0, 95), hsl=(333.48837209302326, 1, 0.4215686274509804), hex="#d7005f", names=[], ), ColorProp( code=162, rgb=(215, 0, 135), hsl=(322.3255813953488, 1, 0.4215686274509804), hex="#d70087", names=[], ), ColorProp( code=163, rgb=(215, 0, 175), hsl=(311.16279069767444, 1, 0.4215686274509804), hex="#d700af", names=[], ), ColorProp( code=164, rgb=(215, 0, 215), hsl=(300, 1, 0.4215686274509804), hex="#d700d7", names=[], ), ColorProp( code=165, rgb=(215, 0, 255), hsl=(290.5882352941176, 1, 0.5), hex="#d700ff", names=[], ), ColorProp( code=166, rgb=(215, 95, 0), hsl=(26.511627906976745, 1, 0.4215686274509804), hex="#d75f00", names=[], ), ColorProp( code=167, rgb=(215, 95, 95), hsl=(0, 0.6000000000000001, 0.607843137254902), hex="#d75f5f", names=["Spiced Coral", "Deep Sea Coral", "Roman"], ), ColorProp( code=168, rgb=(215, 95, 135), hsl=(340, 0.6000000000000001, 0.607843137254902), hex="#d75f87", names=[], ), ColorProp( code=169, rgb=(215, 95, 175), hsl=(320, 0.6000000000000001, 0.607843137254902), hex="#d75faf", names=[], ), ColorProp( code=170, rgb=(215, 95, 215), hsl=(300, 0.6000000000000001, 0.607843137254902), hex="#d75fd7", names=[], ), ColorProp( code=171, rgb=(215, 95, 255), hsl=(285, 1, 0.6862745098039216), hex="#d75fff", names=[], ), ColorProp( code=172, rgb=(215, 135, 0), hsl=(37.674418604651166, 1, 0.4215686274509804), hex="#d78700", names=[], ), ColorProp( code=173, rgb=(215, 135, 95), hsl=(20, 0.6000000000000001, 0.607843137254902), hex="#d7875f", names=[], ), ColorProp( code=174, rgb=(215, 135, 135), hsl=(0, 0.5000000000000001, 0.6862745098039216), hex="#d78787", names=["Peaches of Immortality", "Copperfield"], ), ColorProp( code=175, rgb=(215, 135, 175), hsl=(330, 0.5000000000000001, 0.6862745098039216), hex="#d787af", names=[], ), ColorProp( code=176, rgb=(215, 135, 215), hsl=(300, 0.5000000000000001, 0.6862745098039216), hex="#d787d7", names=[], ), ColorProp( code=177, rgb=(215, 135, 255), hsl=(280, 1, 0.7647058823529411), hex="#d787ff", names=[], ), ColorProp( code=178, rgb=(215, 175, 0), hsl=(48.83720930232558, 1, 0.4215686274509804), hex="#d7af00", names=["Palomino Gold"], ), ColorProp( code=179, rgb=(215, 175, 95), hsl=(40, 0.6000000000000001, 0.607843137254902), hex="#d7af5f", names=["Sell Gold", "Butterscotch Bliss", "Equator"], ), ColorProp( code=180, rgb=(215, 175, 135), hsl=(30, 0.5000000000000001, 0.6862745098039216), hex="#d7af87", names=["css:tan", "Santa Fe Tan", "Calico", "Caramel Cloud"], ), ColorProp( code=181, rgb=(215, 175, 175), hsl=(0, 0.3333333333333334, 0.7647058823529411), hex="#d7afaf", names=["Mary Rose"], ), ColorProp( code=182, rgb=(215, 175, 215), hsl=(300, 0.3333333333333334, 0.7647058823529411), hex="#d7afd7", names=[], ), ColorProp( code=183, rgb=(215, 175, 255), hsl=(270, 1, 0.8431372549019608), hex="#d7afff", names=[], ), ColorProp( code=184, rgb=(215, 215, 0), hsl=(60, 1, 0.4215686274509804), hex="#d7d700", names=[], ), ColorProp( code=185, rgb=(215, 215, 95), hsl=(60, 0.6000000000000001, 0.607843137254902), hex="#d7d75f", names=[], ), ColorProp( code=186, rgb=(215, 215, 135), hsl=(60, 0.5000000000000001, 0.6862745098039216), hex="#d7d787", names=[], ), ColorProp( code=187, rgb=(215, 215, 175), hsl=(60, 0.3333333333333334, 0.7647058823529411), hex="#d7d7af", names=["Green Mesh"], ), ColorProp( code=188, rgb=(215, 215, 215), hsl=(0, 0, 0.8431372549019608), hex="#d7d7d7", names=["gray 3"], ), ColorProp( code=189, rgb=(215, 215, 255), hsl=(240, 1, 0.9215686274509804), hex="#d7d7ff", names=[], ), ColorProp( code=190, rgb=(215, 255, 0), hsl=(69.41176470588235, 1, 0.5), hex="#d7ff00", names=[], ), ColorProp( code=191, rgb=(215, 255, 95), hsl=(75, 1, 0.6862745098039216), hex="#d7ff5f", names=[], ), ColorProp( code=192, rgb=(215, 255, 135), hsl=(80.00000000000001, 1, 0.7647058823529411), hex="#d7ff87", names=[], ), ColorProp( code=193, rgb=(215, 255, 175), hsl=(90, 1, 0.8431372549019608), hex="#d7ffaf", names=[], ), ColorProp( code=194, rgb=(215, 255, 215), hsl=(120, 1, 0.9215686274509804), hex="#d7ffd7", names=["snowy mint"], ), ColorProp( code=195, rgb=(215, 255, 255), hsl=(180, 1, 0.9215686274509804), hex="#d7ffff", names=["css:lightcyan", "Refreshing Primer"], ), ColorProp( code=196, rgb=(255, 0, 0), hsl=(0, 1, 0.5), hex="#ff0000", names=["red", "css:red"], ), ColorProp( code=197, rgb=(255, 0, 95), hsl=(337.6470588235294, 1, 0.5), hex="#ff005f", names=["Flaming Hot Flamingoes"], ), ColorProp( code=198, rgb=(255, 0, 135), hsl=(328.2352941176471, 1, 0.5), hex="#ff0087", names=["Fancy Fuchsia"], ), ColorProp( code=199, rgb=(255, 0, 175), hsl=(318.8235294117647, 1, 0.5), hex="#ff00af", names=["Mean Girls Lipstick"], ), ColorProp( code=200, rgb=(255, 0, 215), hsl=(309.4117647058824, 1, 0.5), hex="#ff00d7", names=[], ), ColorProp( code=201, rgb=(255, 0, 255), hsl=(300, 1, 0.5), hex="#ff00ff", names=["css:fuchsia", "css:magenta"], ), ColorProp( code=202, rgb=(255, 95, 0), hsl=(22.352941176470587, 1, 0.5), hex="#ff5f00", names=["Vivid Orange"], ), ColorProp( code=203, rgb=(255, 95, 95), hsl=(0, 1, 0.6862745098039216), hex="#ff5f5f", names=[], ), ColorProp( code=204, rgb=(255, 95, 135), hsl=(345, 1, 0.6862745098039216), hex="#ff5f87", names=[], ), ColorProp( code=205, rgb=(255, 95, 175), hsl=(330, 1, 0.6862745098039216), hex="#ff5faf", names=[], ), ColorProp( code=206, rgb=(255, 95, 215), hsl=(315, 1, 0.6862745098039216), hex="#ff5fd7", names=[], ), ColorProp( code=207, rgb=(255, 95, 255), hsl=(300, 1, 0.6862745098039216), hex="#ff5fff", names=[], ), ColorProp( code=208, rgb=(255, 135, 0), hsl=(31.764705882352942, 1, 0.5), hex="#ff8700", names=["css:darkorange", "Mandarin Jelly"], ), ColorProp( code=209, rgb=(255, 135, 95), hsl=(15, 1, 0.6862745098039216), hex="#ff875f", names=[], ), ColorProp( code=210, rgb=(255, 135, 135), hsl=(0, 1, 0.7647058823529411), hex="#ff8787", names=["geraldine"], ), ColorProp( code=211, rgb=(255, 135, 175), hsl=(340, 1, 0.7647058823529411), hex="#ff87af", names=[], ), ColorProp( code=212, rgb=(255, 135, 215), hsl=(320, 1, 0.7647058823529411), hex="#ff87d7", names=["Pink Delight"], ), ColorProp( code=213, rgb=(255, 135, 255), hsl=(300, 1, 0.7647058823529411), hex="#ff87ff", names=["Darling Bud"], ), ColorProp( code=214, rgb=(255, 175, 0), hsl=(41.1764705882353, 1, 0.5), hex="#ffaf00", names=["css:orange", "Gold Fusion"], ), ColorProp( code=215, rgb=(255, 175, 95), hsl=(30, 1, 0.6862745098039216), hex="#ffaf5f", names=["Vintage Orange"], ), ColorProp( code=216, rgb=(255, 175, 135), hsl=(20, 1, 0.7647058823529411), hex="#ffaf87", names=["Spice Pink"], ), ColorProp( code=217, rgb=(255, 175, 175), hsl=(0, 1, 0.8431372549019608), hex="#ffafaf", names=["cornflower lilac"], ), ColorProp( code=218, rgb=(255, 175, 215), hsl=(330, 1, 0.8431372549019608), hex="#ffafd7", names=[], ), ColorProp( code=219, rgb=(255, 175, 255), hsl=(300, 1, 0.8431372549019608), hex="#ffafff", names=[], ), ColorProp( code=220, rgb=(255, 215, 0), hsl=(50.588235294117645, 1, 0.5), hex="#ffd700", names=["css:gold"], ), ColorProp( code=221, rgb=(255, 215, 95), hsl=(45, 1, 0.6862745098039216), hex="#ffd75f", names=["dandelion"], ), ColorProp( code=222, rgb=(255, 215, 135), hsl=(40, 1, 0.7647058823529411), hex="#ffd787", names=["Workout Routine"], ), ColorProp( code=223, rgb=(255, 215, 175), hsl=(30, 1, 0.8431372549019608), hex="#ffd7af", names=["css:navajowhite", "light apricot"], ), ColorProp( code=224, rgb=(255, 215, 215), hsl=(0, 1, 0.9215686274509804), hex="#ffd7d7", names=["cosmos"], ), ColorProp( code=225, rgb=(255, 215, 255), hsl=(300, 1, 0.9215686274509804), hex="#ffd7ff", names=[], ), ColorProp( code=226, rgb=(255, 255, 0), hsl=(60, 1, 0.5), hex="#ffff00", names=["css:yellow"], ), ColorProp( code=227, rgb=(255, 255, 95), hsl=(60, 1, 0.6862745098039216), hex="#ffff5f", names=[], ), ColorProp( code=228, rgb=(255, 255, 135), hsl=(60, 1, 0.7647058823529411), hex="#ffff87", names=["Cinque Foil"], ), ColorProp( code=229, rgb=(255, 255, 175), hsl=(60, 1, 0.8431372549019608), hex="#ffffaf", names=["portafino"], ), ColorProp( code=230, rgb=(255, 255, 215), hsl=(60, 1, 0.9215686274509804), hex="#ffffd7", names=["css:cornsilk", "cumulus"], ), ColorProp( code=231, rgb=(255, 255, 255), hsl=(0, 0, 1), hex="#ffffff", names=["white", "css:white"], ), ColorProp( code=232, rgb=(8, 8, 8), hsl=(0, 0, 0.03137254901960784), hex="#080808", names=["gray 24", "cod gray", "Reversed Grey"], ), ColorProp( code=233, rgb=(18, 18, 18), hsl=(0, 0, 0.07058823529411765), hex="#121212", names=["gray 23", "Dark Tone Ink"], ), ColorProp( code=234, rgb=(28, 28, 28), hsl=(0, 0, 0.10980392156862745), hex="#1c1c1c", names=["gray 22", "Eerie Black"], ), ColorProp( code=235, rgb=(38, 38, 38), hsl=(0, 0, 0.14901960784313725), hex="#262626", names=["gray 21", "Nero"], ), ColorProp( code=236, rgb=(48, 48, 48), hsl=(0, 0, 0.18823529411764706), hex="#303030", names=["gray 20", "mine shaft"], ), ColorProp( code=237, rgb=(58, 58, 58), hsl=(0, 0, 0.22745098039215686), hex="#3a3a3a", names=["gray 19", "Dead Pixel"], ), ColorProp( code=238, rgb=(68, 68, 68), hsl=(0, 0, 0.26666666666666666), hex="#444444", names=["gray 18", "Goshawk Grey"], ), ColorProp( code=239, rgb=(78, 78, 78), hsl=(0, 0, 0.3058823529411765), hex="#4e4e4e", names=["gray 17", "Black Oak"], ), ColorProp( code=240, rgb=(88, 88, 88), hsl=(0, 0, 0.34509803921568627), hex="#585858", names=["gray 16", "Shadow Mountain"], ), ColorProp( code=241, rgb=(98, 98, 98), hsl=(0, 0, 0.3843137254901961), hex="#626262", names=["gray 15"], ), ColorProp( code=242, rgb=(108, 108, 108), hsl=(0, 0, 0.4235294117647059), hex="#6c6c6c", names=["gray 14", "css:dimgray", "dove gray"], ), ColorProp( code=243, rgb=(118, 118, 118), hsl=(0, 0, 0.4627450980392157), hex="#767676", names=["gray 13", "Steel Wool", "Sonic Silver"], ), ColorProp( code=244, rgb=(128, 128, 128), hsl=(0, 0, 0.5019607843137255), hex="#808080", names=["gray 12", "css:gray", "css:grey"], ), ColorProp( code=245, rgb=(138, 138, 138), hsl=(0, 0, 0.5411764705882353), hex="#8a8a8a", names=["gray 11"], ), ColorProp( code=246, rgb=(148, 148, 148), hsl=(0, 0, 0.5803921568627451), hex="#949494", names=["gray 10"], ), ColorProp( code=247, rgb=(158, 158, 158), hsl=(0, 0, 0.6196078431372549), hex="#9e9e9e", names=["gray 9"], ), ColorProp( code=248, rgb=(168, 168, 168), hsl=(0, 0, 0.6588235294117647), hex="#a8a8a8", names=["gray 8", "css:darkgrey"], ), ColorProp( code=249, rgb=(178, 178, 178), hsl=(0, 0, 0.6980392156862745), hex="#b2b2b2", names=["gray 7"], ), ColorProp( code=250, rgb=(188, 188, 188), hsl=(0, 0, 0.7372549019607844), hex="#bcbcbc", names=["gray 6", "css:silver", "Dust to Dust"], ), ColorProp( code=251, rgb=(198, 198, 198), hsl=(0, 0, 0.7764705882352941), hex="#c6c6c6", names=["gray 5", "Silver Polish"], ), ColorProp( code=252, rgb=(208, 208, 208), hsl=(0, 0, 0.8156862745098039), hex="#d0d0d0", names=["gray 4", "css:lightgray", "Ancestral Water"], ), ColorProp( code=253, rgb=(218, 218, 218), hsl=(0, 0, 0.8549019607843137), hex="#dadada", names=["gray 2", "css:gainsboro", "alto"], ), ColorProp( code=254, rgb=(228, 228, 228), hsl=(0, 0, 0.8941176470588236), hex="#e4e4e4", names=["gray 1", "mercury"], ), ColorProp( code=255, rgb=(238, 238, 238), hsl=(0, 0, 0.9333333333333333), hex="#eeeeee", names=["gray 0", "gallery"], ), ] pyglossary-5.0.9/pyglossary/ui/tools/000077500000000000000000000000001476751035500177475ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/ui/tools/__init__.py000066400000000000000000000000001476751035500220460ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/ui/tools/colors.py000066400000000000000000000003221476751035500216170ustar00rootroot00000000000000__all__ = ["green", "red", "reset", "yellow"] redCode = 1 greenCode = 2 yellowCode = 3 red = f"\x1b[38;5;{redCode}m" green = f"\x1b[38;5;{greenCode}m" yellow = f"\x1b[38;5;{yellowCode}m" reset = "\x1b[0;0;0m" pyglossary-5.0.9/pyglossary/ui/tools/diff_glossary.py000077500000000000000000000171001476751035500231560ustar00rootroot00000000000000#!/usr/bin/env python # mypy: ignore-errors from __future__ import annotations import atexit import difflib import os import os.path import shlex import sys from subprocess import PIPE, Popen from typing import TYPE_CHECKING from pyglossary.core import log from pyglossary.glossary_v2 import Glossary from pyglossary.ui.tools.colors import ( green, red, reset, yellow, ) from pyglossary.ui.tools.format_entry import formatEntry from pyglossary.ui.tools.word_diff import ( formatDiff, xmlDiff, ) if TYPE_CHECKING: from collections.abc import Iterator from pyglossary.glossary_types import EntryType __all__ = ["diffGlossary"] Glossary.init() log.setVerbosity(1) entrySep = f"\n{'_' * 40}\n\n" noInfo = os.getenv("GLOSSARY_DIFF_NO_INFO") == "1" def formatInfoValueDiff(diff: Iterator[str]) -> str: a = "" b = "" for part in diff: if part[0] == " ": a += part[2:] b += part[2:] continue if part[0] == "-": a += red + part[2:] + reset continue if part[0] == "+": b += green + part[2:] + reset continue return a + "\n" + b def diffGlossary( # noqa: PLR0912, PLR0913 filename1: str, filename2: str, format1: str | None = None, format2: str | None = None, header: str = "", pager: bool = True, ) -> None: glos1 = Glossary(ui=None) if not glos1.directRead(filename1, formatName=format1): return glos2 = Glossary(ui=None) if not glos2.directRead(filename2, formatName=format2): return if pager: pagerCmd = ["less", "-R"] if os.getenv("PAGER"): pagerCmd = shlex.split(os.getenv("PAGER")) proc = Popen( pagerCmd, stdin=PIPE, ) def write(msg: str) -> None: proc.stdin.write(msg.encode("utf-8")) else: proc = None def write(msg: str) -> None: print(msg, end="") if header: write(header + "\n") iter1 = iter(glos1) iter2 = iter(glos2) # infoIter1 = iter(sorted(glos1.iterInfo())) # infoIter2 = iter(sorted(glos2.iterInfo())) if noInfo: infoIter1 = iter([]) infoIter2 = iter([]) else: infoIter1 = glos1.iterInfo() infoIter2 = glos2.iterInfo() index1 = -1 index2 = -1 def nextEntry1() -> None: nonlocal entry1, index1 entry1 = next(iter1) index1 += 1 def nextEntry2() -> None: nonlocal entry2, index2 entry2 = next(iter2) index2 += 1 def printEntry(color: str, prefix: str, index: int, entry: EntryType) -> None: formatted = ( f"{color}{prefix}#{index} " + formatEntry(entry).replace("\n", "\n" + color) + entrySep ) write(formatted) def printInfo(color: str, prefix: str, pair: tuple[str, str]) -> None: key, value = pair spaces = " " * (len(prefix) + 7) valueColor = color + spaces + value.replace("\n", "\n" + spaces + color) formatted = f"{color}{prefix} Info: {key}\n{valueColor}" + entrySep write(formatted) def printChangedEntry(entry1: EntryType, entry2: EntryType) -> None: defiDiff = formatDiff(xmlDiff(entry1.defi, entry2.defi)) entry1._defi = defiDiff if index1 < 0: ids = "" elif index1 == index2: ids = f"#{index1}" else: ids = f"A#{index1} B#{index2}" formatted = f"=== {yellow}{ids}{reset} " + formatEntry(entry1) + entrySep write(formatted) def printChangedInfo(key: str, value1: str, value2: str) -> str: valueDiff = formatInfoValueDiff(xmlDiff(value1, value2)) printInfo(yellow, "=== ", (key, valueDiff)) infoPair1 = None infoPair2 = None def infoStep() -> None: nonlocal infoPair1, infoPair2 if infoPair1 is None: infoPair1 = next(infoIter1) if infoPair2 is None: infoPair2 = next(infoIter2) if infoPair1 == infoPair2: infoPair1, infoPair2 = None, None return if infoPair1[0] == infoPair2[0]: printChangedInfo(infoPair1[0], infoPair1[1], infoPair2[1]) infoPair1, infoPair2 = None, None return if infoPair1[0] < infoPair2[0]: printInfo(red, "--- A: ", infoPair1) infoPair1 = None return printInfo(green, "+++ B: ", infoPair2) infoPair2 = None def printAltsChangedEntry( entry1: EntryType, entry2: EntryType, showDefi: bool = True, ) -> None: ids = f"#{index1}" if index1 == index2 else f"A#{index1} B#{index2}" header = f"=== {yellow}{ids}{reset} " altsDiff = difflib.ndiff( [f"Alt: {alt}\n" for alt in entry1.l_word[1:]], [f"Alt: {alt}\n" for alt in entry2.l_word[1:]], linejunk=None, charjunk=None, ) if entry1.l_word[0] == entry2.l_word[0]: firstWordLine = f">> {entry1.l_word[0]}" else: firstWordLine = f">> {entry1.l_word[0]} (A)\n>> {entry2.l_word[0]} (B)" entryFormatted = "\n".join( [ firstWordLine, formatDiff(altsDiff), entry1.defi if showDefi else "", ], ) formatted = header + entryFormatted + entrySep write(formatted) count = 0 entry1 = None entry2 = None def step() -> None: nonlocal count, entry1, entry2 if entry1 is None: nextEntry1() if entry2 is None: nextEntry2() words1 = entry1.l_word words2 = entry2.l_word if words1 == words2: if entry1.defi == entry2.defi: entry1, entry2 = None, None return printChangedEntry(entry1, entry2) entry1, entry2 = None, None return if entry1.defi == entry2.defi and (words1[0] in words2 or words2[0] in words1): printAltsChangedEntry(entry1, entry2) entry1, entry2 = None, None return if words1 < words2: printEntry(red, "--- A", index1, entry1) entry1 = None else: printEntry(green, "+++ B", index2, entry2) entry2 = None if (count + 1) % 50 == 0: sys.stdout.flush() count += 1 def run() -> None: # noqa: PLR0912 nonlocal index1, index2 while True: try: infoStep() except StopIteration: break except (OSError, BrokenPipeError): break if infoPair1: printInfo(red, "--- A: ", infoPair1) if infoPair2: printInfo(green, "+++ B: ", infoPair2) for pair in infoIter1: printInfo(red, "--- A: ", pair) for pair in infoIter2: printInfo(green, "+++ B: ", pair) while True: try: step() except StopIteration: break except (OSError, BrokenPipeError): break if entry1: printEntry(red, "--- A", index1, entry1) index1 += 1 if entry2: printEntry(green, "+++ B", index2, entry2) index2 += 1 for entry in iter1: printEntry(red, "--- A", index1, entry) index1 += 1 for entry in iter2: printEntry(green, "+++ B", index2, entry) index2 += 1 try: run() except (OSError, BrokenPipeError): pass # noqa: S110 except Exception as e: print(e) finally: if proc: proc.communicate() # proc.wait() # proc.terminate() sys.stdin.flush() sys.stdout.flush() # NOTE: make sure to set GIT_PAGER or config core.pager # for example GIT_PAGER=less # or GIT_PAGER='less -R -S -N' def gitDiffMain() -> None: # print(sys.argv[1:]) # arguments: # path old_file old_hex old_mode new_file new_hex new_mode old_hex = sys.argv[3][:7] new_hex = sys.argv[6][:7] filename1 = sys.argv[2] filename2 = sys.argv[1] header = f"{'_' * 80}\n\n### File: {filename2} ({old_hex}..{new_hex})\n" resDir = filename2 + "_res" if os.path.isdir(resDir): resDirTmp = filename1 + "_res" os.symlink(os.path.realpath(resDir), resDirTmp) atexit.register(os.remove, resDirTmp) diffGlossary( filename1, filename2, format1=None, format2=None, pager=False, header=header, ) def main() -> None: import os if os.getenv("GIT_DIFF_PATH_COUNTER"): return gitDiffMain() filename1 = sys.argv[1] filename2 = sys.argv[2] format1 = None format2 = None if len(sys.argv) > 3: format1 = sys.argv[3] if len(sys.argv) > 4: format2 = sys.argv[4] filename1 = os.path.expanduser(filename1) filename2 = os.path.expanduser(filename2) diffGlossary( filename1, filename2, format1=format1, format2=format2, pager=True, ) return None if __name__ == "__main__": main() pyglossary-5.0.9/pyglossary/ui/tools/format_entry.py000066400000000000000000000006661476751035500230420ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from pyglossary.glossary_types import EntryType __all__ = ["formatEntry"] def formatEntry(entry: EntryType) -> str: words = entry.l_word headword = "" if words: headword = words[0] lines = [ f">> {headword}", ] if len(words) > 1: lines += [f"Alt: {alt}" for alt in words[1:]] lines.append(f"\n{entry.defi}") return "\n".join(lines) pyglossary-5.0.9/pyglossary/ui/tools/view_glossary.py000077500000000000000000000057641476751035500232350ustar00rootroot00000000000000#!/usr/bin/env python # mypy: ignore-errors from __future__ import annotations import argparse import os.path import shlex import sys from subprocess import PIPE, Popen from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Callable from pyglossary.glossary_types import EntryType from pyglossary.core import log from pyglossary.glossary_v2 import Glossary from pyglossary.ui.tools.colors import reset, yellow from pyglossary.ui.tools.format_entry import formatEntry Glossary.init() log.setVerbosity(1) noColor = bool(os.getenv("NO_COLOR")) if noColor: yellow = reset = "" # noqa: F811 def getEntryHighlighter() -> Callable[[EntryType], None] | None: if noColor: return None try: import pygments # noqa: F401 except ModuleNotFoundError: return None from pygments import highlight from pygments.formatters import Terminal256Formatter as Formatter from pygments.lexers import HtmlLexer, XmlLexer formatter = Formatter() h_lexer = HtmlLexer() x_lexer = XmlLexer() def highlightEntry(entry: EntryType) -> None: entry.detectDefiFormat() if entry.defiFormat == "h": entry._defi = highlight(entry.defi, h_lexer, formatter) return if entry.defiFormat == "x": entry._defi = highlight(entry.defi, x_lexer, formatter) return return highlightEntry def viewGlossary( filename: str, formatName: str | None = None, glos: Glossary | None = None, noRes: bool = False, ) -> None: highlightEntry = getEntryHighlighter() if glos is None: glos = Glossary(ui=None) if not glos.directRead(filename, formatName=formatName): return pagerCmd = ["less", "-R"] if os.getenv("PAGER"): pagerCmd = shlex.split(os.getenv("PAGER")) proc = Popen( pagerCmd, stdin=PIPE, ) index = 0 entrySep = "_" * 50 def handleEntry(entry: EntryType) -> None: nonlocal index if noRes and entry.isData(): return if highlightEntry: highlightEntry(entry) entryStr = ( f"{yellow}#{index}{reset} " + formatEntry(entry) + "\n" + entrySep + "\n\n" ) proc.stdin.write(entryStr.encode("utf-8")) if (index + 1) % 50 == 0: sys.stdout.flush() index += 1 try: for entry in glos: try: handleEntry(entry) except (OSError, BrokenPipeError): break except (OSError, BrokenPipeError): pass # noqa: S110 except Exception as e: print(e) finally: proc.communicate() # proc.wait() # proc.terminate() sys.stdin.flush() sys.stdout.flush() def main() -> None: parser = argparse.ArgumentParser( prog=sys.argv[0], add_help=True, # allow_abbrev=False, ) parser.add_argument( "--format", dest="formatName", default=None, help="format name", ) parser.add_argument( "--no-res", dest="noRes", action="store_true", default=False, help="do not automatically show resources / files", ) parser.add_argument( "filename", action="store", default="", nargs=1, ) args = parser.parse_args() viewGlossary( os.path.expanduser(args.filename[0]), formatName=args.formatName, noRes=args.noRes, ) if __name__ == "__main__": main() pyglossary-5.0.9/pyglossary/ui/tools/word_diff.py000066400000000000000000000032531476751035500222670ustar00rootroot00000000000000from __future__ import annotations import difflib import re import sys from typing import TYPE_CHECKING from pyglossary.ui.tools.colors import green, red, reset if TYPE_CHECKING: from collections.abc import Iterator __all__ = ["formatDiff", "xmlDiff"] zwnj = "\u200c" wordRE = re.compile(r"(\W)", re.MULTILINE) xmlTagRE = re.compile( "]*>", re.IGNORECASE | re.MULTILINE, ) def plainWordSplit(text: str) -> list[str]: return [word for word in wordRE.split(text) if word] def xmlWordSplit(text: str) -> list[str]: pos = 0 words = [] for m in xmlTagRE.finditer(text): start, end = m.span() match = m.group() if start > pos: words += plainWordSplit(text[pos:start]) words.append(match) pos = end if pos < len(text): words += plainWordSplit(text[pos:]) return words def xmlDiff(text1: str, text2: str) -> Iterator[str]: words1 = xmlWordSplit(text1) words2 = xmlWordSplit(text2) return difflib.ndiff(words1, words2, linejunk=None, charjunk=None) def formatDiff(diff: Iterator[str]) -> str: res = "" for part in diff: if part[0] == " ": res += part[2:] continue if part[0] == "-": res += red + part[2:] + reset + zwnj continue if part[0] == "+": res += green + part[2:] + reset continue return res def main_word_split() -> None: text = sys.argv[1] print(text) for word in xmlWordSplit(text): print(f"word: {word!r}") def main() -> None: filename1 = sys.argv[1] filename2 = sys.argv[2] with open(filename1, encoding="utf-8") as _file: text1 = _file.read() with open(filename2, encoding="utf-8") as _file: text2 = _file.read() print(formatDiff(xmlDiff(text1, text2))) if __name__ == "__main__": main() pyglossary-5.0.9/pyglossary/ui/ui_cmd.py000066400000000000000000000213371476751035500204270ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # ui_cmd.py # # Copyright © 2008-2021 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations import os import sys from os.path import join from typing import TYPE_CHECKING, Any from pyglossary.core import dataDir, log from pyglossary.glossary_v2 import ConvertArgs, Error, Glossary from .base import UIBase, fread from .wcwidth import wcswidth if TYPE_CHECKING: import logging from collections.abc import Mapping __all__ = ["COMMAND", "UI", "parseFormatOptionsStr", "printHelp"] def wc_ljust(text: str, length: int, padding: str = " ") -> str: return text + padding * max(0, (length - wcswidth(text))) if os.sep == "\\": # Operating system is Windows startBold = "" startUnderline = "" endFormat = "" else: startBold = "\x1b[1m" # Start Bold # len=4 startUnderline = "\x1b[4m" # Start Underline # len=4 endFormat = "\x1b[0;0;0m" # End Format # len=8 # redOnGray = "\x1b[0;1;31;47m" COMMAND = "pyglossary" def getColWidth(subject: str, strings: list[str]) -> int: return max(len(x) for x in [subject] + strings) def getFormatsTable(names: list[str], header: str) -> str: descriptions = [Glossary.plugins[name].description for name in names] extensions = [" ".join(Glossary.plugins[name].extensions) for name in names] nameWidth = getColWidth("Name", names) descriptionWidth = getColWidth("Description", descriptions) extensionsWidth = getColWidth("Extensions", extensions) lines = [ "\n", startBold + header + endFormat, " | ".join( [ "Name".center(nameWidth), "Description".center(descriptionWidth), "Extensions".center(extensionsWidth), ], ), "-+-".join( [ "-" * nameWidth, "-" * descriptionWidth, "-" * extensionsWidth, ], ), ] for index, name in enumerate(names): lines.append( " | ".join( [ name.ljust(nameWidth), descriptions[index].ljust(descriptionWidth), extensions[index].ljust(extensionsWidth), ], ), ) return "\n".join(lines) def printHelp() -> None: import string text = fread(join(dataDir, "help")) text = ( text.replace("", startBold) .replace("", startUnderline) .replace("", endFormat) .replace("", endFormat) ) text = string.Template(text).substitute( CMD=COMMAND, ) text += getFormatsTable(Glossary.readFormats, "Supported input formats:") text += getFormatsTable(Glossary.writeFormats, "Supported output formats:") print(text) # TODO: raise exception instead of returning None def parseFormatOptionsStr(st: str) -> dict[str, str] | None: """Prints error and returns None if failed to parse one option.""" st = st.strip() if not st: return {} opt: dict[str, str] = {} parts = st.split(";") for part in parts: if not part: continue eq = part.find("=") if eq < 1: log.critical(f"bad option syntax: {part!r}") return None key = part[:eq].strip() if not key: log.critical(f"bad option syntax: {part!r}") return None value = part[eq + 1 :].strip() opt[key] = value return opt class NullObj: def __getattr__(self, attr: str) -> NullObj: return self def __setattr__(self, attr: str, value: Any) -> None: pass def __setitem__(self, key: str, value: Any) -> None: pass def __call__( self, *args: tuple[Any], **kwargs: Mapping[str, Any], ) -> None: pass def __bool__(self) -> bool: return False class UI(UIBase): def __init__( self, progressbar: bool = True, ) -> None: UIBase.__init__(self) # log.debug(self.config) self.pbar = NullObj() self._toPause = False self._resetLogFormatter = None self._progressbar = progressbar def onSigInt( self, *_args: tuple[Any], ) -> None: log.info("") if self._toPause: log.info("Operation Canceled") sys.exit(0) else: self._toPause = True log.info("Please wait...") def setText(self, text: str) -> None: self.pbar.widgets[0] = text def fixLogger(self) -> None: for h in log.handlers: if h.name == "std": self.fixLogHandler(h) return def fillMessage(self, msg: str) -> str: term_width = self.pbar.term_width if not term_width: return msg return "\r" + wc_ljust(msg, term_width) def fixLogHandler(self, h: logging.Handler) -> None: def reset() -> None: h.formatter.fill = None self._resetLogFormatter = reset h.formatter.fill = self.fillMessage def progressInit(self, title: str) -> None: try: from .pbar_tqdm import createProgressBar except ModuleNotFoundError: from .pbar_legacy import createProgressBar self.pbar = createProgressBar(title) self.fixLogger() def progress( self, ratio: float, text: str = "", # noqa: ARG002 ) -> None: self.pbar.update(ratio) def progressEnd(self) -> None: self.pbar.finish() if self._resetLogFormatter: self._resetLogFormatter() def reverseLoop( self, *_args: tuple[Any], **kwargs: Mapping[Any], ) -> None: from pyglossary.reverse import reverseGlossary reverseKwArgs: dict[str, Any] = {} for key in ( "words", "matchWord", "showRel", "includeDefs", "reportStep", "saveStep", "maxNum", "minRel", "minWordLen", ): try: reverseKwArgs[key] = self.config["reverse_" + key] except KeyError: pass reverseKwArgs.update(kwargs) if not self._toPause: log.info("Reversing glossary... (Press Ctrl+C to pause/stop)") for _ in reverseGlossary(self.glos, **reverseKwArgs): if self._toPause: log.info( "Reverse is paused. Press Enter to continue, and Ctrl+C to exit", ) input() self._toPause = False # PLR0912 Too many branches (19 > 12) def run( # noqa: PLR0912, PLR0913 self, inputFilename: str = "", outputFilename: str = "", inputFormat: str = "", outputFormat: str = "", reverse: bool = False, config: dict[str, Any] | None = None, readOptions: dict[str, Any] | None = None, writeOptions: dict[str, Any] | None = None, convertOptions: dict[str, Any] | None = None, glossarySetAttrs: dict[str, Any] | None = None, ) -> bool: if config is None: config = {} if readOptions is None: readOptions = {} if writeOptions is None: writeOptions = {} if convertOptions is None: convertOptions = {} if glossarySetAttrs is None: glossarySetAttrs = {} self.config = config if inputFormat: # noqa: SIM102 # inputFormat = inputFormat.capitalize() if inputFormat not in Glossary.readFormats: log.error(f"invalid read format {inputFormat}") if outputFormat: # noqa: SIM102 # outputFormat = outputFormat.capitalize() if outputFormat not in Glossary.writeFormats: log.error(f"invalid write format {outputFormat}") log.error(f"try: {COMMAND} --help") return False if not outputFilename: if reverse: pass elif outputFormat: try: ext = Glossary.plugins[outputFormat].extensions[0] except (KeyError, IndexError): log.error(f"invalid write format {outputFormat}") log.error(f"try: {COMMAND} --help") return False outputFilename = os.path.splitext(inputFilename)[0] + ext else: log.error("neither output file nor output format is given") log.error(f"try: {COMMAND} --help") return False glos = self.glos = Glossary(ui=self) glos.config = self.config glos.progressbar = self._progressbar for attr, value in glossarySetAttrs.items(): setattr(glos, attr, value) if reverse: import signal signal.signal(signal.SIGINT, self.onSigInt) readOptions["direct"] = True if not glos.read( inputFilename, formatName=inputFormat, **readOptions, ): log.error("reading input file was failed!") return False self.setText("Reversing: ") self.pbar.update_step = 0.1 self.reverseLoop(savePath=outputFilename) return True try: finalOutputFile = self.glos.convert( ConvertArgs( inputFilename, inputFormat=inputFormat, outputFilename=outputFilename, outputFormat=outputFormat, readOptions=readOptions, writeOptions=writeOptions, **convertOptions, ), ) except Error as e: log.critical(str(e)) glos.cleanup() return False return bool(finalOutputFile) pyglossary-5.0.9/pyglossary/ui/ui_cmd_interactive.py000066400000000000000000000742361476751035500230320ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # ui_cmd_interactive.py # # Copyright © 2008-2022 Saeed Rasooli (ilius) # This file is part of PyGlossary project, https://github.com/ilius/pyglossary # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. Or on Debian systems, from /usr/share/common-licenses/GPL # If not, see . from __future__ import annotations """ To use this user interface: sudo pip3 install prompt_toolkit. """ # GitHub repo for prompt_toolkit # https://github.com/prompt-toolkit/python-prompt-toolkit # The code for Python's cmd.Cmd was very ugly and hard to understand last I # checked. But we don't use cmd module here, and nor does prompt_toolkit. # Completion func for Python's readline, silently (and stupidly) hides any # exception, and only shows the print if it's in the first line of function. # very awkward! # We also don't use readline module, and nor does prompt_toolkit. # Looks like prompt_toolkit works directly with sys.stdin, sys.stdout # and sys.stderr. # prompt_toolkit also supports ncurses-like dialogs with buttons and widgets, # but I prefer this kind of UI with auto-completion and history import argparse import json import logging import os import shlex from os.path import ( abspath, dirname, isabs, isdir, join, relpath, ) from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from collections.abc import Iterable from prompt_toolkit.completion import CompleteEvent from prompt_toolkit.document import Document from prompt_toolkit.formatted_text import StyleAndTextTuples from prompt_toolkit.key_binding.key_processor import KeyPressEvent from pyglossary.option import Option from pyglossary.plugin_prop import PluginProp from prompt_toolkit import ANSI from prompt_toolkit import prompt as promptLow from prompt_toolkit.auto_suggest import AutoSuggestFromHistory from prompt_toolkit.completion import ( Completion, PathCompleter, WordCompleter, ) from prompt_toolkit.history import FileHistory from prompt_toolkit.key_binding import KeyBindings from prompt_toolkit.keys import Keys from prompt_toolkit.shortcuts import PromptSession, confirm from pyglossary import core from pyglossary.core import confDir from pyglossary.glossary_v2 import Error, Glossary from pyglossary.sort_keys import lookupSortKey, namedSortKeyList from pyglossary.ui import ui_cmd from pyglossary.ui.termcolors import colors __all__ = ["UI"] endFormat = "\x1b[0;0;0m" class MiniCheckBoxPrompt: def __init__( self, formatted: StyleAndTextTuples, value: bool = False, ) -> None: self.formatted = formatted self.value = value def __pt_formatted_text__(self) -> StyleAndTextTuples: # noqa: PLW3201 return self.formatted + [("", "[x]" if self.value else "[ ]")] log = logging.getLogger("pyglossary") indent = "\t" cmdiConfDir = join(confDir, "cmdi") histDir = join(cmdiConfDir, "history") for direc in (cmdiConfDir, histDir): os.makedirs(direc, mode=0o700, exist_ok=True) if __name__ == "__main__": Glossary.init() pluginByDesc = {plugin.description: plugin for plugin in Glossary.plugins.values()} readFormatDescList = [ Glossary.plugins[_format].description for _format in Glossary.readFormats ] writeFormatDescList = [ Glossary.plugins[_format].description for _format in Glossary.writeFormats ] convertOptionsFlags = { "direct": ("indirect", "direct"), "sqlite": ("", "sqlite"), "sort": ("no-sort", "sort"), } infoOverrideFlags = { "sourceLang": "source-lang", "targetLang": "target-lang", "name": "name", } def dataToPrettyJson( data: dict[str, Any] | list[Any], ensure_ascii: bool = False, sort_keys: bool = False, ) -> str: return json.dumps( data, sort_keys=sort_keys, indent=2, ensure_ascii=ensure_ascii, ) def prompt( message: ANSI | str, multiline: bool = False, **kwargs, ) -> str: if kwargs.get("default", "") is None: kwargs["default"] = "" text = promptLow(message=message, **kwargs) if multiline and text == "!m": print("Entering Multi-line mode, press Alt+ENTER to end") text = promptLow( message="", multiline=True, **kwargs, ) return text # noqa: RET504 back = "back" class MyPathCompleter(PathCompleter): def __init__( self, reading: bool, # noqa: ARG002 fs_action_names: list[str] | None = None, **kwargs, ) -> None: PathCompleter.__init__( self, file_filter=self.file_filter, **kwargs, ) self.fs_action_names = fs_action_names or [] @staticmethod def file_filter(_filename: str) -> bool: # filename is full/absolute file path return True # def get_completions_exception(document, complete_event, e): # log.error(f"Exception in get_completions: {e}") def get_completions( self, document: Document, complete_event: CompleteEvent, ) -> Iterable[Completion]: text = document.text_before_cursor for action in self.fs_action_names: if action.startswith(text): yield Completion( text=action, start_position=-len(text), display=action, ) yield from PathCompleter.get_completions( self, document=document, complete_event=complete_event, ) class AbsolutePathHistory(FileHistory): def load_history_strings(self) -> Iterable[str]: # pwd = os.getcwd() pathList = FileHistory.load_history_strings(self) return [relpath(p) for p in pathList] def store_string(self, string: str) -> None: FileHistory.store_string(self, abspath(string)) class UI(ui_cmd.UI): def __init__( self, progressbar: bool = True, ) -> None: self._inputFilename = "" self._outputFilename = "" self._inputFormat = "" self._outputFormat = "" self.config: dict[str, Any] = {} self._readOptions = {} self._writeOptions = {} self._convertOptions = {} ui_cmd.UI.__init__( self, progressbar=progressbar, ) self.ls_parser = argparse.ArgumentParser(add_help=False) self.ls_parser.add_argument( "-l", "--long", action="store_true", dest="long", help="use a long listing format", ) self.ls_parser.add_argument( "--help", action="store_true", dest="help", help="display help", ) self.ls_usage = ( "Usage: !ls [--help] [-l] [FILE/DIRECTORY]...\n\n" "optional arguments:\n" " --help show this help message and exit\n" " -l, --long use a long listing format\n" ) self._fsActions = { "!pwd": (self.fs_pwd, ""), "!ls": (self.fs_ls, self.ls_usage), "!..": (self.fs_cd_parent, ""), "!cd": (self.fs_cd, ""), } self._finalActions = { "formats": self.askFormats, "read-options": self.askReadOptions, "write-options": self.askWriteOptions, "reset-read-options": self.resetReadOptions, "reset-write-options": self.resetWriteOptions, "config": self.askConfig, "indirect": self.setIndirect, "sqlite": self.setSQLite, "no-progressbar": self.setNoProgressbar, "sort": self.setSort, "sort-key": self.setSortKey, "show-options": self.showOptions, "back": None, } @staticmethod def fs_pwd(args: list[str]) -> None: if args: print(f"extra arguments: {args}") print(os.getcwd()) @staticmethod def get_ls_l( arg: str, st: os.stat_result | None = None, parentDir: str = "", sizeWidth: int = 0, ) -> str: import grp import pwd import stat import time argPath = arg if parentDir: argPath = join(parentDir, arg) if st is None: st = os.lstat(argPath) # os.lstat does not follow sym links, like "ls" command details = [ stat.filemode(st.st_mode), pwd.getpwuid(st.st_uid).pw_name, grp.getgrgid(st.st_gid).gr_name, str(st.st_size).rjust(sizeWidth), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(st.st_mtime)), arg, ] if stat.S_ISLNK(st.st_mode): details.append(f"-> {os.readlink(argPath)}") return " ".join(details) def fs_ls(self, args: list[str]) -> None: opts, args = self.ls_parser.parse_known_args(args=args) if opts.help: print(self.ls_usage) return if not args: args = [os.getcwd()] showTitle = len(args) > 1 # Note: isdir and isfile funcs follow sym links, so no worry about links for argI, arg in enumerate(args): if argI > 0: print() if not isdir(arg): print(self.get_ls_l(arg)) continue if showTitle: print(f"> List of directory {arg!r}:") if not opts.long: for path in os.listdir(arg): if isdir(path): print(f"{path}/") else: print(f"{path}") continue contents = os.listdir(arg) statList = [os.lstat(join(arg, _path)) for _path in contents] maxFileSize = max(st.st_size for st in statList) sizeWidth = len(str(maxFileSize)) for pathI, path_ in enumerate(contents): print( self.get_ls_l( path_, parentDir=arg, st=statList[pathI], sizeWidth=sizeWidth, ), ) @staticmethod def fs_cd_parent(args: list[str]) -> None: if args: log.error("This command does not take arguments") return newDir = dirname(os.getcwd()) os.chdir(newDir) print(f"Changed current directory to: {newDir}") @staticmethod def fs_cd(args: list[str]) -> None: if len(args) != 1: log.error("This command takes exactly one argument") return newDir = args[0] if not isabs(newDir): newDir = abspath(newDir) os.chdir(newDir) print(f"Changed current directory to: {newDir}") def formatPromptMsg( self, level: int, msg: str, colon: str = ":", ) -> tuple[str, bool]: indent_ = self.promptIndentStr * level if core.noColor: return f"{indent_} {msg}{colon} ", False if self.promptIndentColor >= 0: indent_ = f"\x1b[38;5;{self.promptIndentColor}m{indent_}{endFormat}" if self.promptMsgColor >= 0: msg = f"\x1b[38;5;{self.promptMsgColor}m{msg}{endFormat}" return f"{indent_} {msg}{colon} ", True def formatPromptMsgStyleList( self, level: int, msg: str, colon: str = ":", ) -> StyleAndTextTuples: indent_ = self.promptIndentStr * level if core.noColor: return [("", f"{indent_} {msg}{colon} ")] indentStyle = "" if self.promptIndentColor >= 0: indentStyle = "fg:" + colors[self.promptIndentColor].hex msgStyle = "" if self.promptMsgColor >= 0: msgStyle = "fg:" + colors[self.promptMsgColor].hex return [ (indentStyle, f"{indent_} "), (msgStyle, msg), ("", f"{colon} "), ] def prompt(self, level: int, msg: str, colon: str = ":", **kwargs) -> str: msg2, colored = self.formatPromptMsg(level, msg, colon) if colored: msg2 = ANSI(msg2) return prompt(msg2, **kwargs) def checkbox_prompt( self, level: int, msg: str, default: bool, colon: str = ":", ) -> bool: """Create a `PromptSession` object for the 'confirm' function.""" # msg, colored = self.formatPromptMsg(level, msg, colon) bindings = KeyBindings() check = MiniCheckBoxPrompt( formatted=self.formatPromptMsgStyleList(level, msg, colon=colon), value=default, ) @bindings.add(" ") def space(_event: KeyPressEvent) -> None: check.value = not check.value # cursor_pos = check.formatMessage().find("[") + 1 # cur_cursor_pos = session.default_buffer.cursor_position # print(f"{cur_cursor_pos=}, {cursor_pos=}") # session.default_buffer.cursor_position = cursor_pos @bindings.add(Keys.Any) def _(_event: KeyPressEvent) -> None: """Disallow inserting other text.""" complete_message = check session: PromptSession[bool] = PromptSession( complete_message, key_bindings=bindings, ) session.prompt() return check.value def askFile( self, kind: str, histName: str, varName: str, reading: bool, ) -> str: from shlex import split as shlex_split history = AbsolutePathHistory(join(histDir, histName)) auto_suggest = AutoSuggestFromHistory() # Note: isdir and isfile funcs follow sym links, so no worry about links completer = MyPathCompleter( reading=reading, fs_action_names=list(self._fsActions), ) default = getattr(self, varName) while True: filename = self.prompt( 1, kind, history=history, auto_suggest=auto_suggest, completer=completer, default=default, ) if not filename: continue try: parts = shlex_split(filename) except ValueError: # file name can have single/double quote setattr(self, varName, filename) return filename if parts[0] in self._fsActions: actionFunc, usage = self._fsActions[parts[0]] try: actionFunc(parts[1:]) except Exception: log.exception("") if usage: print("\n" + usage) continue setattr(self, varName, filename) return filename raise ValueError(f"{kind} is not given") def askInputFile(self) -> str: return self.askFile( "Input file", "filename-input", "_inputFilename", True, ) def askOutputFile(self) -> str: return self.askFile( "Output file", "filename-output", "_outputFilename", False, ) @staticmethod def pluginByNameOrDesc(value: str) -> PluginProp | None: plugin = pluginByDesc.get(value) if plugin: return plugin plugin = Glossary.plugins.get(value) if plugin: return plugin log.error(f"internal error: invalid format name/desc {value!r}") return None def askInputFormat(self) -> str: history = FileHistory(join(histDir, "format-input")) auto_suggest = AutoSuggestFromHistory() completer = WordCompleter( readFormatDescList + Glossary.readFormats, ignore_case=True, match_middle=True, sentence=True, ) while True: value = self.prompt( 1, "Input format", history=history, auto_suggest=auto_suggest, completer=completer, default=self._inputFormat, ) if not value: continue plugin = self.pluginByNameOrDesc(value) if plugin: return plugin.name raise ValueError("input format is not given") def askOutputFormat(self) -> str: history = FileHistory(join(histDir, "format-output")) auto_suggest = AutoSuggestFromHistory() completer = WordCompleter( writeFormatDescList + Glossary.writeFormats, ignore_case=True, match_middle=True, sentence=True, ) while True: value = self.prompt( 1, "Output format", history=history, auto_suggest=auto_suggest, completer=completer, default=self._outputFormat, ) if not value: continue plugin = self.pluginByNameOrDesc(value) if plugin: return plugin.name raise ValueError("output format is not given") def finish(self) -> None: pass # TODO: how to handle \r and \n in NewlineOption.values? @staticmethod def getOptionValueSuggestValues(option: Option) -> list[str] | None: if option.values: return [str(x) for x in option.values] if option.typ == "bool": return ["True", "False"] return None def getOptionValueCompleter(self, option: Option) -> WordCompleter | None: values = self.getOptionValueSuggestValues(option) if values: return WordCompleter( values, ignore_case=True, match_middle=True, sentence=True, ) return None # PLR0912 Too many branches (15 > 12) def askReadOptions(self) -> None: # noqa: PLR0912 options = Glossary.formatsReadOptions.get(self._inputFormat) if options is None: log.error(f"internal error: invalid format {self._inputFormat!r}") return optionsProp = Glossary.plugins[self._inputFormat].optionsProp history = FileHistory(join(histDir, f"read-options-{self._inputFormat}")) auto_suggest = AutoSuggestFromHistory() completer = WordCompleter( list(options), ignore_case=True, match_middle=True, sentence=True, ) while True: try: optName = self.prompt( 2, "ReadOption: Name (ENTER if done)", history=history, auto_suggest=auto_suggest, completer=completer, ) except (KeyboardInterrupt, EOFError): return if not optName: return option = optionsProp[optName] valueCompleter = self.getOptionValueCompleter(option) default = self._readOptions.get(optName) if default is None: default = options[optName] print(f"Comment: {option.longComment}") while True: if option.typ == "bool": try: valueNew = self.checkbox_prompt( 3, f"ReadOption: {optName}", default=default, ) except (KeyboardInterrupt, EOFError): break print(f"Set read-option: {optName} = {valueNew!r}") self._readOptions[optName] = valueNew break try: value = self.prompt( 3, f"ReadOption: {optName}", colon=" =", history=FileHistory(join(histDir, f"option-value-{optName}")), auto_suggest=AutoSuggestFromHistory(), default=str(default), completer=valueCompleter, ) except (KeyboardInterrupt, EOFError): break if value == "" and option.typ != "str": # noqa: PLC1901 if optName in self._readOptions: print(f"Unset read-option {optName!r}") del self._readOptions[optName] break valueNew, ok = option.evaluate(value) if not ok or not option.validate(valueNew): log.error( f"Invalid read option value {optName}={value!r}" f" for format {self._inputFormat}", ) continue print(f"Set read-option: {optName} = {valueNew!r}") self._readOptions[optName] = valueNew break # PLR0912 Too many branches (15 > 12) def askWriteOptions(self) -> None: # noqa: PLR0912 options = Glossary.formatsWriteOptions.get(self._outputFormat) if options is None: log.error(f"internal error: invalid format {self._outputFormat!r}") return optionsProp = Glossary.plugins[self._outputFormat].optionsProp history = FileHistory(join(histDir, f"write-options-{self._outputFormat}")) auto_suggest = AutoSuggestFromHistory() completer = WordCompleter( list(options), ignore_case=True, match_middle=True, sentence=True, ) while True: try: optName = self.prompt( 2, "WriteOption: Name (ENTER if done)", history=history, auto_suggest=auto_suggest, completer=completer, ) except (KeyboardInterrupt, EOFError): return if not optName: return option = optionsProp[optName] print(f"Comment: {option.longComment}") valueCompleter = self.getOptionValueCompleter(option) default = self._writeOptions.get(optName) if default is None: default = options[optName] while True: if option.typ == "bool": try: valueNew = self.checkbox_prompt( 3, f"WriteOption: {optName}", default=default, ) except (KeyboardInterrupt, EOFError): break print(f"Set write-option: {optName} = {valueNew!r}") self._writeOptions[optName] = valueNew break try: value = self.prompt( 3, f"WriteOption: {optName}", colon=" =", history=FileHistory(join(histDir, f"option-value-{optName}")), auto_suggest=AutoSuggestFromHistory(), default=str(default), completer=valueCompleter, ) except (KeyboardInterrupt, EOFError): break if value == "" and option.typ != "str": # noqa: PLC1901 if optName in self._writeOptions: print(f"Unset write-option {optName!r}") del self._writeOptions[optName] break valueNew, ok = option.evaluate(value) if not ok or not option.validate(valueNew): log.error( f"Invalid write option value {optName}={value!r}" f" for format {self._outputFormat}", ) continue print(f"Set write-option: {optName} = {valueNew!r}") self._writeOptions[optName] = valueNew break def resetReadOptions(self) -> None: self._readOptions = {} def resetWriteOptions(self) -> None: self._writeOptions = {} def askConfigValue(self, configKey: str, option: Option) -> str: default = self.config.get(configKey, "") if option.typ == "bool": return str( self.checkbox_prompt( 3, f"Config: {configKey}", default=bool(default), ), ) return self.prompt( 3, f"Config: {configKey}", colon=" =", history=FileHistory(join(histDir, f"config-value-{configKey}")), auto_suggest=AutoSuggestFromHistory(), default=str(default), completer=self.getOptionValueCompleter(option), ) def askConfig(self) -> None: configKeys = sorted(self.configDefDict) history = FileHistory(join(histDir, "config-key")) auto_suggest = AutoSuggestFromHistory() completer = WordCompleter( configKeys, ignore_case=True, match_middle=True, sentence=True, ) while True: try: configKey = self.prompt( 2, "Config: Key (ENTER if done)", history=history, auto_suggest=auto_suggest, completer=completer, ) except (KeyboardInterrupt, EOFError): return if not configKey: return option = self.configDefDict[configKey] while True: try: value = self.askConfigValue(configKey, option) except (KeyboardInterrupt, EOFError): break if value == "" and option.typ != "str": # noqa: PLC1901 if configKey in self.config: print(f"Unset config {configKey!r}") del self.config[configKey] break valueNew, ok = option.evaluate(value) if not ok or not option.validate(valueNew): log.error( f"Invalid config value {configKey}={value!r}", ) continue print(f"Set config: {configKey} = {valueNew!r}") self.config[configKey] = valueNew self.config[configKey] = valueNew break def showOptions(self) -> None: print(f"readOptions = {self._readOptions}") print(f"writeOptions = {self._writeOptions}") print(f"convertOptions = {self._convertOptions}") print(f"config = {self.config}") print() def setIndirect(self) -> None: self._convertOptions["direct"] = False self._convertOptions["sqlite"] = None print("Switched to indirect mode") def setSQLite(self) -> None: self._convertOptions["direct"] = None self._convertOptions["sqlite"] = True print("Switched to SQLite mode") def setNoProgressbar(self) -> None: self._glossarySetAttrs["progressbar"] = False print("Disabled progress bar") def setSort(self) -> None: try: value = self.checkbox_prompt( 2, "Enable Sort", default=self._convertOptions.get("sort", False), ) except (KeyboardInterrupt, EOFError): return self._convertOptions["sort"] = value def setSortKey(self) -> None: completer = WordCompleter( [_sk.name for _sk in namedSortKeyList], ignore_case=False, match_middle=True, sentence=True, ) default = self._convertOptions.get("sortKeyName", "") sortKeyName = self.prompt( 2, "SortKey", history=FileHistory(join(histDir, "sort-key")), auto_suggest=AutoSuggestFromHistory(), default=default, completer=completer, ) if not sortKeyName: if "sortKeyName" in self._convertOptions: del self._convertOptions["sortKeyName"] return if not lookupSortKey(sortKeyName): log.error(f"invalid {sortKeyName = }") return self._convertOptions["sortKeyName"] = sortKeyName if not self._convertOptions.get("sort"): self.setSort() def askFinalAction(self) -> str | None: history = FileHistory(join(histDir, "action")) auto_suggest = AutoSuggestFromHistory() completer = WordCompleter( list(self._finalActions), ignore_case=False, match_middle=True, sentence=True, ) while True: action = self.prompt( 1, "Select action (ENTER to convert)", history=history, auto_suggest=auto_suggest, completer=completer, ) if not action: return None if action not in self._finalActions: log.error(f"invalid action: {action}") continue return action def askFinalOptions(self) -> bool | str: while True: try: action = self.askFinalAction() except (KeyboardInterrupt, EOFError): return False except Exception: log.exception("") return False if action == back: return back if action is None: return True # convert actionFunc = self._finalActions[action] if actionFunc is None: return True # convert actionFunc() return True # convert def getRunKeywordArgs(self) -> dict: return { "inputFilename": self._inputFilename, "outputFilename": self._outputFilename, "inputFormat": self._inputFormat, "outputFormat": self._outputFormat, "config": self.config, "readOptions": self._readOptions, "writeOptions": self._writeOptions, "convertOptions": self._convertOptions, "glossarySetAttrs": self._glossarySetAttrs, } def checkInputFormat(self, forceAsk: bool = False) -> None: if not forceAsk: try: inputArgs = Glossary.detectInputFormat(self._inputFilename) except Error: pass else: inputFormat = inputArgs.formatName self._inputFormat = inputFormat return self._inputFormat = self.askInputFormat() def checkOutputFormat(self, forceAsk: bool = False) -> None: if not forceAsk: try: outputArgs = Glossary.detectOutputFormat( filename=self._outputFilename, inputFilename=self._inputFilename, ) except Error: pass else: self._outputFormat = outputArgs.formatName return self._outputFormat = self.askOutputFormat() def askFormats(self) -> None: self.checkInputFormat(forceAsk=True) self.checkOutputFormat(forceAsk=True) def askInputOutputAgain(self) -> None: self.askInputFile() self.checkInputFormat(forceAsk=True) self.askOutputFile() self.checkOutputFormat(forceAsk=True) def printNonInteractiveCommand(self) -> None: # noqa: PLR0912 cmd = [ ui_cmd.COMMAND, self._inputFilename, self._outputFilename, f"--read-format={self._inputFormat}", f"--write-format={self._outputFormat}", ] if self._readOptions: optionsJson = json.dumps(self._readOptions, ensure_ascii=True) cmd += ["--json-read-options", optionsJson] if self._writeOptions: optionsJson = json.dumps(self._writeOptions, ensure_ascii=True) cmd += ["--json-write-options", optionsJson] if self.config: for key, value in self.config.items(): if value is None: continue if value == self.savedConfig.get(key): continue option = self.configDefDict.get(key) if option is None: log.error(f"config key {key} was not found") if not option.hasFlag: log.error(f"config key {key} has no command line flag") flag = option.customFlag if not flag: flag = key.replace("_", "-") if option.typ == "bool": if not value: flag = f"no-{flag}" cmd.append(f"--{flag}") else: cmd.append(f"--{flag}={value}") if self._convertOptions: if "infoOverride" in self._convertOptions: infoOverride = self._convertOptions.pop("infoOverride") for key, value in infoOverride.items(): flag = infoOverrideFlags.get(key) if not flag: log.error(f"unknown key {key} in infoOverride") continue cmd.append(f"--{flag}={value}") if "sortKeyName" in self._convertOptions: value = self._convertOptions.pop("sortKeyName") cmd.append(f"--sort-key={value}") for key, value in self._convertOptions.items(): if value is None: continue if key not in convertOptionsFlags: log.error(f"unknown key {key} in convertOptions") continue ftup = convertOptionsFlags[key] if ftup is None: continue if isinstance(value, bool): flag = ftup[int(value)] if flag: cmd.append(f"--{flag}") else: flag = ftup[0] cmd.append(f"--{flag}={value}") if ( "progressbar" in self._glossarySetAttrs and not self._glossarySetAttrs["progressbar"] ): cmd.append("--no-progress-bar") print() print( "If you want to repeat this conversion later, you can use this command:", ) # shlex.join is added in Python 3.8 print(shlex.join(cmd)) def setConfigAttrs(self) -> None: config = self.config self.promptIndentStr = config.get("cmdi.prompt.indent.str", ">") self.promptIndentColor = config.get("cmdi.prompt.indent.color", 2) self.promptMsgColor = config.get("cmdi.prompt.msg.color", -1) self.msgColor = config.get("cmdi.msg.color", -1) # PLR0912 Too many branches (19 > 12) def main(self, again: bool = False) -> None: # noqa: PLR0912 if again or not self._inputFilename: try: self.askInputFile() except (KeyboardInterrupt, EOFError): return None if again or not self._inputFormat: try: self.checkInputFormat() except (KeyboardInterrupt, EOFError): return None if again or not self._outputFilename: try: self.askOutputFile() except (KeyboardInterrupt, EOFError): return None if again or not self._outputFormat: try: self.checkOutputFormat() except (KeyboardInterrupt, EOFError): return None while True: status = self.askFinalOptions() if status == back: self.askInputOutputAgain() continue if not status: return None try: succeed = ui_cmd.UI.run(self, **self.getRunKeywordArgs()) except Exception: log.exception("") else: self.printNonInteractiveCommand() if succeed: return succeed print("Press Control + C to exit") def run( # noqa: PLR0913 self, inputFilename: str = "", outputFilename: str = "", inputFormat: str = "", outputFormat: str = "", reverse: bool = False, config: dict[str, Any] | None = None, readOptions: dict[str, Any] | None = None, writeOptions: dict[str, Any] | None = None, convertOptions: dict[str, Any] | None = None, glossarySetAttrs: dict[str, Any] | None = None, ) -> bool: if reverse: raise NotImplementedError("Reverse is not implemented in this UI") self._inputFilename = inputFilename self._outputFilename = outputFilename self._inputFormat = inputFormat self._outputFormat = outputFormat self._readOptions = readOptions or {} self._writeOptions = writeOptions or {} self._convertOptions = convertOptions or {} self._glossarySetAttrs = glossarySetAttrs or {} if not self._progressbar: self._glossarySetAttrs["progressbar"] = False self.loadConfig() self.savedConfig = self.config.copy() self.config = config or {} del inputFilename, outputFilename, inputFormat, outputFormat del config, readOptions, writeOptions, convertOptions self.setConfigAttrs() self.main() try: while ( self.prompt( level=1, msg="Press enter to exit, 'a' to convert again", default="", ) == "a" ): self.main(again=True) except KeyboardInterrupt: pass if self.config != self.savedConfig and confirm("Save Config?"): self.saveConfig() return True pyglossary-5.0.9/pyglossary/ui/ui_gtk3.py000066400000000000000000001312421476751035500205310ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # ui_gtk.py # # Copyright © 2008-2022 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import logging import traceback from os.path import abspath, isfile from typing import TYPE_CHECKING, Any import gi from pyglossary import core from pyglossary.glossary_v2 import ConvertArgs, Error, Glossary from pyglossary.sort_keys import defaultSortKeyName, namedSortKeyList from pyglossary.text_utils import urlToPath from .base import ( UIBase, aboutText, authors, licenseText, logo, ) from .dependency import checkDepends from .version import getVersion gi.require_version("Gtk", "3.0") from .gtk3_utils import gdk, gtk # noqa: E402 from .gtk3_utils.about import AboutWidget # noqa: E402 from .gtk3_utils.dialog import MyDialog # noqa: E402 from .gtk3_utils.resize_button import ResizeButton # noqa: E402 from .gtk3_utils.utils import ( # noqa: E402 HBox, VBox, dialog_add_button, imageFromFile, pack, rgba_parse, showInfo, ) if TYPE_CHECKING: from collections.abc import Callable from pyglossary.plugin_prop import PluginProp # from gi.repository import GdkPixbuf log = logging.getLogger("pyglossary") gtk.Window.set_default_icon_from_file(logo) _ = str # later replace with translator function pluginByDesc = {plugin.description: plugin for plugin in Glossary.plugins.values()} readDesc = [ plugin.description for plugin in Glossary.plugins.values() if plugin.canRead ] writeDesc = [ plugin.description for plugin in Glossary.plugins.values() if plugin.canWrite ] def getMonitor() -> gdk.Monitor | None: display = gdk.Display.get_default() monitor = display.get_monitor_at_point(1, 1) if monitor is not None: log.debug("getMonitor: using get_monitor_at_point") return monitor monitor = display.get_primary_monitor() if monitor is not None: log.debug("getMonitor: using get_primary_monitor") return monitor monitor = display.get_monitor_at_window(gdk.get_default_root_window()) if monitor is not None: log.debug("getMonitor: using get_monitor_at_window") return monitor return None def getWorkAreaSize() -> tuple[int, int] | None: monitor = getMonitor() if monitor is None: return None rect = monitor.get_workarea() return rect.width, rect.height class FormatDialog(gtk.Dialog): def __init__( self, descList: list[str], parent: gtk.Widget | None = None, **kwargs, ) -> None: gtk.Dialog.__init__(self, parent=parent, **kwargs) self.descList = descList self.items = descList self.activeDesc = "" ## self.connect("response", lambda _w, _e: self.hide()) dialog_add_button( self, "gtk-cancel", "_Cancel", gtk.ResponseType.CANCEL, ) dialog_add_button( self, "gtk-ok", "_OK", gtk.ResponseType.OK, ) ### treev = gtk.TreeView() treeModel = gtk.ListStore(str) treev.set_headers_visible(False) treev.set_model(treeModel) treev.connect("row-activated", self.rowActivated) # treev.connect("response", self.onResponse) ### self.treev = treev ############# cell = gtk.CellRendererText(editable=False) col = gtk.TreeViewColumn( title="Descriptin", cell_renderer=cell, text=0, ) col.set_property("expand", True) col.set_resizable(True) treev.append_column(col) self.descCol = col ############ hbox = HBox(spacing=15) hbox.set_border_width(10) pack(hbox, gtk.Label("Search:")) entry = self.entry = gtk.Entry() pack(hbox, entry, 1, 1) pack(self.vbox, hbox) ### entry.connect("changed", self.onEntryChange) ############ self.swin = swin = gtk.ScrolledWindow() swin.add(treev) swin.set_policy(gtk.PolicyType.NEVER, gtk.PolicyType.AUTOMATIC) pack(self.vbox, swin, 1, 1) self.vbox.show_all() ## treev.set_can_focus(True) # no need, just to be safe treev.set_can_default(True) treev.set_receives_default(True) # print("can_focus:", treev.get_can_focus()) # print("can_default:", treev.get_can_default()) # print("receives_default:", treev.get_receives_default()) #### self.updateTree() self.resize(400, 400) self.connect("realize", self.onRealize) def onRealize(self, _widget: Any = None) -> None: if self.activeDesc: self.treev.grab_focus() else: self.entry.grab_focus() def onEntryChange(self, entry: gtk.Entry) -> None: text = entry.get_text().strip() if not text: self.items = self.descList self.updateTree() return text = text.lower() descList = self.descList items1 = [] items2 = [] for desc in descList: if desc.lower().startswith(text): items1.append(desc) elif text in desc.lower(): items2.append(desc) self.items = items1 + items2 self.updateTree() def setCursor(self, desc: str) -> None: model = self.treev.get_model() iter_ = model.iter_children(None) while iter_ is not None: if model.get_value(iter_, 0) == desc: path = model.get_path(iter_) self.treev.set_cursor(path, self.descCol, False) self.treev.scroll_to_cell(path) return iter_ = model.iter_next(iter_) def updateTree(self) -> None: model = self.treev.get_model() model.clear() for desc in self.items: model.append([desc]) if self.activeDesc: self.setCursor(self.activeDesc) def getActive(self) -> PluginProp | None: iter_ = self.treev.get_selection().get_selected()[1] if iter_ is None: return None model = self.treev.get_model() desc = model.get_value(iter_, 0) return pluginByDesc[desc] def setActive(self, plugin: PluginProp) -> None: if plugin is None: self.activeDesc = "" return desc = plugin.description self.activeDesc = desc self.setCursor(desc) def rowActivated( self, treev: gtk.TreeView, path: gtk.GtkTreePath, _col: Any, ) -> None: model = treev.get_model() iter_ = model.get_iter(path) desc = model.get_value(iter_, 0) self.activeDesc = desc self.response(gtk.ResponseType.OK) # def onResponse class FormatButton(gtk.Button): noneLabel = "[Select Format]" dialogTitle = "Select Format" def __init__(self, descList: list[str], parent: gtk.Widget | None = None) -> None: gtk.Button.__init__(self) self.set_label(self.noneLabel) ### self.descList = descList self._parent = parent self.activePlugin = None ### self.connect("clicked", self.onClick) def onChanged(self, _obj: Any = None) -> None: pass def onClick(self, _button: Any = None) -> None: dialog = FormatDialog( descList=self.descList, parent=self._parent, title=self.dialogTitle, ) dialog.setActive(self.activePlugin) if dialog.run() != gtk.ResponseType.OK: return plugin = dialog.getActive() self.activePlugin = plugin if plugin: self.set_label(plugin.description) else: self.set_label(self.noneLabel) self.onChanged() def getActive(self) -> str: if self.activePlugin is None: return "" return self.activePlugin.name def setActive(self, formatName: str) -> None: plugin = Glossary.plugins[formatName] self.activePlugin = plugin self.set_label(plugin.description) self.onChanged() class FormatOptionsDialog(gtk.Dialog): commentLen = 60 def __init__( self, formatName: str, options: list[str], optionsValues: dict[str, Any], parent: gtk.Widget | None = None, ) -> None: gtk.Dialog.__init__(self, parent=parent) optionsProp = Glossary.plugins[formatName].optionsProp self.optionsProp = optionsProp ## self.connect("response", lambda _w, _e: self.hide()) dialog_add_button( self, "gtk-cancel", "_Cancel", gtk.ResponseType.CANCEL, ) dialog_add_button( self, "gtk-ok", "_OK", gtk.ResponseType.OK, ) ### treev = gtk.TreeView() treeModel = gtk.ListStore( bool, # enable str, # name str, # comment str, # value ) treev.set_headers_clickable(True) treev.set_model(treeModel) treev.connect("row-activated", self.rowActivated) treev.connect("button-press-event", self.treeviewButtonPress) ### self.treev = treev ############# cell = gtk.CellRendererToggle() # cell.set_property("activatable", True) cell.connect("toggled", self.enableToggled) col = gtk.TreeViewColumn(title="Enable", cell_renderer=cell) col.add_attribute(cell, "active", 0) # cell.set_active(False) col.set_property("expand", False) col.set_resizable(True) treev.append_column(col) ### col = gtk.TreeViewColumn( title="Name", cell_renderer=gtk.CellRendererText(), text=1, ) col.set_property("expand", False) col.set_resizable(True) treev.append_column(col) ### cell = gtk.CellRendererText(editable=True) self.valueCell = cell self.valueCol = 3 cell.connect("edited", self.valueEdited) col = gtk.TreeViewColumn( title="Value", cell_renderer=cell, text=self.valueCol, ) col.set_property("expand", True) col.set_resizable(True) col.set_min_width(200) treev.append_column(col) ### col = gtk.TreeViewColumn( title="Comment", cell_renderer=gtk.CellRendererText(), text=2, ) col.set_property("expand", False) col.set_resizable(False) treev.append_column(col) ############# for name in options: prop = optionsProp[name] comment = prop.longComment if len(comment) > self.commentLen: comment = comment[: self.commentLen] + "..." if prop.typ != "bool" and not prop.values: comment += " (double-click to edit)" treeModel.append( [ name in optionsValues, # enable name, # name comment, # comment str(optionsValues.get(name, "")), # value ], ) ############ pack(self.vbox, treev, 1, 1) self.vbox.show_all() def enableToggled(self, cell: gtk.CellRenderer, path: gtk.TreePath) -> None: # enable is column 0 model = self.treev.get_model() active = not cell.get_active() itr = model.get_iter(path) model.set_value(itr, 0, active) def valueEdited(self, _cell: Any, path: gtk.TreePath, rawValue: str) -> None: # value is column 3 model = self.treev.get_model() itr = model.get_iter(path) optName = model.get_value(itr, 1) prop = self.optionsProp[optName] if not prop.customValue: return enable = True if rawValue == "" and prop.typ != "str": # noqa: PLC1901 enable = False elif not prop.validateRaw(rawValue): log.error(f"invalid {prop.typ} value: {optName} = {rawValue!r}") return model.set_value(itr, self.valueCol, rawValue) model.set_value(itr, 0, enable) def rowActivated(self, _treev: Any, path: gtk.TreePath, _col: Any) -> bool: # forceMenu=True because we can not enter edit mode # if double-clicked on a cell other than Value return self.valueCellClicked(path, forceMenu=True) def treeviewButtonPress(self, treev: gtk.TreeView, gevent: gdk.ButtonEvent) -> bool: if gevent.button != 1: return False pos_t = treev.get_path_at_pos(int(gevent.x), int(gevent.y)) if not pos_t: return False # pos_t == path, col, xRel, yRel path = pos_t[0] col = pos_t[1] # cell = col.get_cells()[0] if col.get_title() == "Value": return self.valueCellClicked(path) return False def valueItemActivate(self, item: gtk.MenuItem, itr: gtk.TreeIter) -> None: # value is column 3 value = item.get_label() model = self.treev.get_model() model.set_value(itr, self.valueCol, value) model.set_value(itr, 0, True) # enable it def valueCustomOpenDialog(self, itr: gtk.TreeIter, optName: str) -> None: model = self.treev.get_model() prop = self.optionsProp[optName] currentValue = model.get_value(itr, self.valueCol) optDesc = optName if prop.comment: optDesc += f" ({prop.comment})" label = gtk.Label(label=f"Value for {optDesc}") dialog = gtk.Dialog(parent=self, title="Option Value") dialog.connect("response", lambda _w, _e: dialog.hide()) dialog_add_button( dialog, "gtk-cancel", "_Cancel", gtk.ResponseType.CANCEL, ) dialog_add_button( dialog, "gtk-ok", "_OK", gtk.ResponseType.OK, ) pack(dialog.vbox, label, 0, 0) entry = gtk.Entry() entry.set_text(currentValue) entry.connect("activate", lambda _w: dialog.response(gtk.ResponseType.OK)) pack(dialog.vbox, entry, 0, 0) dialog.vbox.show_all() if dialog.run() != gtk.ResponseType.OK: return value = entry.get_text() model.set_value(itr, self.valueCol, value) model.set_value(itr, 0, True) # enable it def valueItemCustomActivate( self, _item: gtk.MenuItem, itr: gtk.TreeIter, ) -> None: model = self.treev.get_model() optName = model.get_value(itr, 1) self.valueCustomOpenDialog(itr, optName) def valueCellClicked(self, path: gtk.TreePath, forceMenu: bool = False) -> bool: """ Returns True if event is handled, False if not handled (need to enter edit mode). """ model = self.treev.get_model() itr = model.get_iter(path) optName = model.get_value(itr, 1) prop = self.optionsProp[optName] if prop.typ == "bool": rawValue = model.get_value(itr, self.valueCol) if rawValue == "": # noqa: PLC1901 value = False else: value, isValid = prop.evaluate(rawValue) if not isValid: log.error(f"invalid {optName} = {rawValue!r}") value = False model.set_value(itr, self.valueCol, str(not value)) model.set_value(itr, 0, True) # enable it return True propValues = prop.values if not propValues: if forceMenu: propValues = [] else: return False menu = gtk.Menu() if prop.customValue: item = gtk.MenuItem("[Custom Value]") item.connect("activate", self.valueItemCustomActivate, itr) item.show() menu.append(item) groupedValues = None if len(propValues) > 10: groupedValues = prop.groupValues() if groupedValues: for groupName, values in groupedValues.items(): item = gtk.MenuItem() item.set_label(groupName) if values is None: item.connect("activate", self.valueItemActivate, itr) else: subMenu = gtk.Menu() for subValue in values: subItem = gtk.MenuItem(label=str(subValue)) subItem.connect("activate", self.valueItemActivate, itr) subItem.show() subMenu.append(subItem) item.set_submenu(subMenu) item.show() menu.append(item) else: for value in propValues: item = gtk.MenuItem(value) item.connect("activate", self.valueItemActivate, itr) item.show() menu.append(item) etime = gtk.get_current_event_time() menu.popup(None, None, None, None, 3, etime) return True def getOptionsValues(self) -> dict[str, Any]: model = self.treev.get_model() optionsValues: dict[str, Any] = {} for row in model: if not row[0]: # not enable continue optName = row[1] rawValue = row[3] prop = self.optionsProp[optName] value, isValid = prop.evaluate(rawValue) if not isValid: log.error(f"invalid option value {optName} = {rawValue}") continue optionsValues[optName] = value return optionsValues class FormatBox(FormatButton): def __init__(self, descList: list[str], parent: gtk.Widget = None) -> None: FormatButton.__init__(self, descList, parent=parent) self.optionsValues = {} self.optionsButton = gtk.Button(label="Options") self.optionsButton.set_image( gtk.Image.new_from_icon_name( "gtk-preferences", gtk.IconSize.BUTTON, ), ) self.optionsButton.connect("clicked", self.optionsButtonClicked) self.dependsButton = gtk.Button(label="Install dependencies") self.dependsButton.pkgNames = [] self.dependsButton.connect("clicked", self.dependsButtonClicked) def setOptionsValues(self, optionsValues: dict[str, Any]) -> None: self.optionsValues = optionsValues def kind(self) -> str: """Return 'r' or 'w'.""" raise NotImplementedError def getActiveOptions(self) -> list[str] | None: raise NotImplementedError def optionsButtonClicked(self, _button: Any) -> None: formatName = self.getActive() options = self.getActiveOptions() if options is None: return dialog = FormatOptionsDialog( formatName, options, self.optionsValues, parent=self._parent, ) dialog.set_title("Options for " + formatName) if dialog.run() != gtk.ResponseType.OK: dialog.destroy() return self.optionsValues = dialog.getOptionsValues() dialog.destroy() def dependsButtonClicked(self, button: gtk.Button) -> None: formatName = self.getActive() pkgNames = button.pkgNames if not pkgNames: print("All dependencies are stattisfied for " + formatName) return pkgNamesStr = " ".join(pkgNames) msg = f"Run the following command:\n{core.pip} install {pkgNamesStr}" showInfo( msg, title="Dependencies for " + formatName, selectable=True, parent=self._parent, ) self.onChanged(self) def onChanged(self, _widget: Any = None) -> None: name = self.getActive() if not name: self.optionsButton.set_visible(False) return self.optionsValues.clear() self.optionsButton.set_visible(bool(self.getActiveOptions())) kind = self.kind() plugin = Glossary.plugins[name] if kind == "r": depends = plugin.readDepends elif kind == "w": depends = plugin.writeDepends else: raise RuntimeError(f"invalid {kind=}") uninstalled = checkDepends(depends) self.dependsButton.pkgNames = uninstalled self.dependsButton.set_visible(bool(uninstalled)) class InputFormatBox(FormatBox): dialogTitle = "Select Input Format" def __init__(self, **kwargs) -> None: FormatBox.__init__(self, readDesc, **kwargs) def kind(self) -> str: """Return 'r' or 'w'.""" return "r" def getActiveOptions(self) -> list[str] | None: formatName = self.getActive() if not formatName: return None return list(Glossary.formatsReadOptions[formatName]) class OutputFormatBox(FormatBox): dialogTitle = "Select Output Format" def __init__(self, **kwargs) -> None: FormatBox.__init__(self, writeDesc, **kwargs) def kind(self) -> str: """Return 'r' or 'w'.""" return "w" def getActiveOptions(self) -> list[str] | None: return list(Glossary.formatsWriteOptions[self.getActive()]) class GtkTextviewLogHandler(logging.Handler): def __init__(self, ui: UI, textview_dict: dict[str, gtk.TextView]) -> None: logging.Handler.__init__(self) self.ui = ui self.buffers = {} for levelNameCap in log.levelNamesCap[:-1]: levelName = levelNameCap.upper() textview = textview_dict[levelName] buff = textview.get_buffer() tag = gtk.TextTag.new(levelName) buff.get_tag_table().add(tag) self.buffers[levelName] = buff def getTag(self, levelname: str) -> gtk.TextTag: return self.buffers[levelname].get_tag_table().lookup(levelname) def setColor(self, levelname: str, rgba: gdk.RGBA) -> None: self.getTag(levelname).set_property("foreground-rgba", rgba) # foreground-gdk is deprecated since Gtk 3.4 def emit(self, record: logging.LogRecord) -> None: msg = "" if record.getMessage(): msg = self.format(record) # msg = msg.replace("\x00", "") if record.exc_info: type_, value, tback = record.exc_info tback_text = "".join( traceback.format_exception(type_, value, tback), ) if msg: msg += "\n" msg += tback_text buff = self.buffers[record.levelname] buff.insert_with_tags_by_name( buff.get_end_iter(), msg + "\n", record.levelname, ) if record.levelno == logging.CRITICAL: self.ui.status(record.getMessage()) class GtkSingleTextviewLogHandler(GtkTextviewLogHandler): def __init__(self, ui: UI, textview: gtk.TextView) -> None: GtkTextviewLogHandler.__init__( self, ui, { "CRITICAL": textview, "ERROR": textview, "WARNING": textview, "INFO": textview, "DEBUG": textview, "TRACE": textview, }, ) class BrowseButton(gtk.Button): def __init__( self, setFilePathFunc: Callable[[str], None], label: str = "Browse", actionSave: bool = False, title: str = "Select File", ) -> None: gtk.Button.__init__(self) self.set_label(label) self.set_image( gtk.Image.new_from_icon_name( "document-save" if actionSave else "document-open", gtk.IconSize.BUTTON, ), ) self.actionSave = actionSave self.setFilePathFunc = setFilePathFunc self.title = title self.connect("clicked", self.onClick) def onClick(self, _widget: Any) -> None: fcd = gtk.FileChooserNative( transient_for=( self.get_root() if hasattr(self, "get_root") else self.get_toplevel() ), action=gtk.FileChooserAction.SAVE if self.actionSave else gtk.FileChooserAction.OPEN, title=self.title, ) fcd.connect("response", lambda _w, _e: fcd.hide()) fcd.connect( "file-activated", lambda _w: fcd.response(gtk.ResponseType.ACCEPT), ) if fcd.run() == gtk.ResponseType.ACCEPT: self.setFilePathFunc(fcd.get_filename()) fcd.destroy() sortKeyNameByDesc = {_sk.desc: _sk.name for _sk in namedSortKeyList} sortKeyNames = [_sk.name for _sk in namedSortKeyList] class SortOptionsBox(gtk.Box): def __init__(self, ui: UI) -> None: gtk.Box.__init__(self, orientation=gtk.Orientation.VERTICAL) self.ui = ui ### hbox = gtk.HBox() sortCheck = gtk.CheckButton("Sort entries by") sortKeyCombo = gtk.ComboBoxText() for _sk in namedSortKeyList: sortKeyCombo.append_text(_sk.desc) sortKeyCombo.set_active(sortKeyNames.index(defaultSortKeyName)) sortKeyCombo.set_border_width(0) sortKeyCombo.set_sensitive(False) # sortKeyCombo.connect("changed", self.sortKeyComboChanged) self.sortCheck = sortCheck self.sortKeyCombo = sortKeyCombo sortCheck.connect("clicked", self.onSortCheckClicked) pack(hbox, sortCheck, 0, 0, padding=5) pack(hbox, sortKeyCombo, 0, 0, padding=5) pack(self, hbox, 0, 0, padding=5) ### hbox = self.encodingHBox = gtk.HBox() encodingCheck = self.encodingCheck = gtk.CheckButton(label="Sort Encoding") encodingEntry = self.encodingEntry = gtk.Entry() encodingEntry.set_text("utf-8") encodingEntry.set_width_chars(15) pack(hbox, gtk.Label(label=" ")) pack(hbox, encodingCheck, 0, 0) pack(hbox, encodingEntry, 0, 0, padding=5) pack(self, hbox, 0, 0, padding=5) ### # RadioButton in Gtk3 is very unstable, # I could not make set_group work at all! # encodingRadio.get_group() == [encodingRadio] # localeRadio.set_group(encodingRadio) says: # TypeError: Must be sequence, not RadioButton # localeRadio.set_group([encodingRadio]) causes a crash!! # but localeRadio.join_group(encodingRadio) works, # so does group= argument to RadioButton() # Note: RadioButton does not exist in Gtk 4.0, # you have to use CheckButton with its new set_group() method hbox = self.localeHBox = gtk.HBox() localeEntry = self.localeEntry = gtk.Entry() localeEntry.set_width_chars(15) pack(hbox, gtk.Label(label=" ")) pack(hbox, gtk.Label(label="Sort Locale"), 0, 0) pack(hbox, localeEntry, 0, 0, padding=5) pack(self, hbox, 0, 0, padding=5) ### self.show_all() def onSortCheckClicked(self, check: gtk.CheckButton) -> None: sort = check.get_active() self.sortKeyCombo.set_sensitive(sort) self.encodingHBox.set_sensitive(sort) self.localeHBox.set_sensitive(sort) def updateWidgets(self) -> None: convertOptions = self.ui.convertOptions sort = convertOptions.get("sort") self.sortCheck.set_active(sort) self.sortKeyCombo.set_sensitive(sort) self.encodingHBox.set_sensitive(sort) self.localeHBox.set_sensitive(sort) sortKeyName = convertOptions.get("sortKeyName") if sortKeyName: sortKeyName, _, localeName = sortKeyName.partition(":") if sortKeyName: self.sortKeyCombo.set_active(sortKeyNames.index(sortKeyName)) self.localeEntry.set_text(localeName) if "sortEncoding" in convertOptions: self.encodingCheck.set_active(True) self.encodingEntry.set_text(convertOptions["sortEncoding"]) def applyChanges(self) -> None: convertOptions = self.ui.convertOptions sort = self.sortCheck.get_active() if not sort: for param in ("sort", "sortKeyName", "sortEncoding"): if param in convertOptions: del convertOptions[param] return sortKeyDesc = self.sortKeyCombo.get_active_text() sortKeyName = sortKeyNameByDesc[sortKeyDesc] sortLocale = self.localeEntry.get_text() if sortLocale: sortKeyName = f"{sortKeyName}:{sortLocale}" convertOptions["sort"] = True convertOptions["sortKeyName"] = sortKeyName if self.encodingCheck.get_active(): convertOptions["sortEncoding"] = self.encodingEntry.get_text() class GeneralOptionsDialog(gtk.Dialog): def onDeleteEvent(self, _widget: Any, _event: Any) -> bool: self.hide() return True def onResponse(self, _widget: Any, _event: Any) -> bool: self.applyChanges() self.hide() return True def __init__(self, ui: UI, **kwargs) -> None: gtk.Dialog.__init__( self, transient_for=ui, **kwargs, ) self.set_title("General Options") self.ui = ui ## self.resize(600, 500) self.connect("delete-event", self.onDeleteEvent) ## self.connect("response", self.onResponse) dialog_add_button( self, "gtk-ok", "_OK", gtk.ResponseType.OK, ) ## hpad = 10 vpad = 5 ## self.sortOptionsBox = SortOptionsBox(ui) pack(self.vbox, self.sortOptionsBox, 0, 0, padding=vpad) ## hbox = gtk.HBox() self.sqliteCheck = gtk.CheckButton(label="SQLite mode") pack(hbox, self.sqliteCheck, 0, 0, padding=hpad) pack(self.vbox, hbox, 0, 0, padding=vpad) ## self.configParams = { "save_info_json": False, "lower": False, "skip_resources": False, "rtl": False, "enable_alts": True, "cleanup": True, "remove_html_all": True, } self.configCheckButtons = {} configDefDict = UIBase.configDefDict for param in self.configParams: hbox = gtk.HBox() comment = configDefDict[param].comment comment = comment.split("\n")[0] checkButton = gtk.CheckButton( label=comment, ) self.configCheckButtons[param] = checkButton pack(hbox, checkButton, 0, 0, padding=hpad) pack(self.vbox, hbox, 0, 0, padding=vpad) ## self.updateWidgets() self.vbox.show_all() def getSQLite(self) -> bool: convertOptions = self.ui.convertOptions sqlite = convertOptions.get("sqlite") if sqlite is not None: return sqlite return self.ui.config.get("auto_sqlite", True) def updateWidgets(self) -> None: config = self.ui.config self.sortOptionsBox.updateWidgets() self.sqliteCheck.set_active(self.getSQLite()) for param, check in self.configCheckButtons.items(): default = self.configParams[param] check.set_active(config.get(param, default)) def applyChanges(self) -> None: # print("applyChanges") self.sortOptionsBox.applyChanges() convertOptions = self.ui.convertOptions config = self.ui.config convertOptions["sqlite"] = self.sqliteCheck.get_active() for param, check in self.configCheckButtons.items(): config[param] = check.get_active() class GeneralOptionsButton(gtk.Button): def __init__(self, ui: UI) -> None: gtk.Button.__init__(self, label="General Options") self.ui = ui self.connect("clicked", self.onClick) self.dialog = None def onClick(self, _widget: Any) -> None: if self.dialog is None: self.dialog = GeneralOptionsDialog(self.ui) self.dialog.present() class UI(gtk.Dialog, MyDialog, UIBase): def status(self, msg: str) -> None: # try: # _id = self.statusMsgDict[msg] # except KeyError: # _id = self.statusMsgDict[msg] = self.statusNewId # self.statusNewId += 1 id_ = self.statusBar.get_context_id(msg) self.statusBar.push(id_, msg) def __init__( self, progressbar: bool = True, ) -> None: gtk.Dialog.__init__(self) UIBase.__init__(self) self.set_title("PyGlossary (Gtk3)") ### self.progressbarEnable = progressbar ##### screenSize = getWorkAreaSize() if screenSize: winSize = min(800, screenSize[0] - 50, screenSize[1] - 50) self.resize(winSize, winSize) # print(f"{screenSize = }") ##### self.connect("delete-event", self.onDeleteEvent) self.pages = [] # self.statusNewId = 0 # self.statusMsgDict = {}## message -> id ##### self.convertOptions = {} ##### self.styleProvider = gtk.CssProvider() gtk.StyleContext.add_provider_for_screen( gdk.Screen.get_default(), self.styleProvider, gtk.STYLE_PROVIDER_PRIORITY_APPLICATION, ) css = "check {min-width: 1.25em; min-height: 1.25em;}\n" self.styleProvider.load_from_data(css.encode("utf-8")) ##### self.assert_quit = False self.path = "" # ____________________ Tab 1 - Convert ____________________ # labelSizeGroup = gtk.SizeGroup(mode=gtk.SizeGroupMode.HORIZONTAL) buttonSizeGroup = gtk.SizeGroup(mode=gtk.SizeGroupMode.HORIZONTAL) #### vbox = VBox() vbox.label = _("Convert") vbox.icon = "" # "*.png" self.pages.append(vbox) ###### hbox = HBox(spacing=3) hbox.label = gtk.Label(label=_("Input File:")) pack(hbox, hbox.label) labelSizeGroup.add_widget(hbox.label) hbox.label.set_property("xalign", 0) self.convertInputEntry = gtk.Entry() pack(hbox, self.convertInputEntry, 1, 1) button = BrowseButton( self.convertInputEntry.set_text, label="Browse", actionSave=False, title="Select Input File", ) pack(hbox, button) buttonSizeGroup.add_widget(button) pack(vbox, hbox) ## self.convertInputEntry.connect( "changed", self.convertInputEntryChanged, ) ### hbox = HBox(spacing=3) hbox.label = gtk.Label(label=_("Input Format:")) pack(hbox, hbox.label) labelSizeGroup.add_widget(hbox.label) hbox.label.set_property("xalign", 0) self.convertInputFormatCombo = InputFormatBox(parent=self) buttonSizeGroup.add_widget(self.convertInputFormatCombo.optionsButton) pack(hbox, self.convertInputFormatCombo) pack(hbox, gtk.Label(), 1, 1) pack(hbox, self.convertInputFormatCombo.dependsButton) pack(hbox, self.convertInputFormatCombo.optionsButton) pack(vbox, hbox) ##### vbox.sep1 = gtk.Label(label="") vbox.sep1.show() pack(vbox, vbox.sep1) ##### hbox = HBox(spacing=3) hbox.label = gtk.Label(label=_("Output File:")) pack(hbox, hbox.label) labelSizeGroup.add_widget(hbox.label) hbox.label.set_property("xalign", 0) self.convertOutputEntry = gtk.Entry() pack(hbox, self.convertOutputEntry, 1, 1) button = BrowseButton( self.convertOutputEntry.set_text, label="Browse", actionSave=True, title="Select Output File", ) pack(hbox, button) buttonSizeGroup.add_widget(button) pack(vbox, hbox) ## self.convertOutputEntry.connect( "changed", self.convertOutputEntryChanged, ) ### hbox = HBox(spacing=3) hbox.label = gtk.Label(label=_("Output Format:")) pack(hbox, hbox.label) labelSizeGroup.add_widget(hbox.label) hbox.label.set_property("xalign", 0) self.convertOutputFormatCombo = OutputFormatBox(parent=self) buttonSizeGroup.add_widget(self.convertOutputFormatCombo.optionsButton) pack(hbox, self.convertOutputFormatCombo) pack(hbox, gtk.Label(), 1, 1) pack(hbox, self.convertOutputFormatCombo.dependsButton) pack(hbox, self.convertOutputFormatCombo.optionsButton) pack(vbox, hbox) ##### hbox = HBox(spacing=10) label = gtk.Label(label="") pack(hbox, label, 1, 1, 5) ## button = GeneralOptionsButton(self) button.set_size_request(300, 40) pack(hbox, button, 0, 0, 0) ## self.convertButton = gtk.Button() self.convertButton.set_label("Convert") self.convertButton.connect("clicked", self.convertClicked) self.convertButton.set_size_request(300, 40) pack(hbox, self.convertButton, 0, 0, 10) ## pack(vbox, hbox, 0, 0, 15) ##### self.convertConsoleTextview = textview = gtk.TextView() swin = gtk.ScrolledWindow() swin.set_policy(gtk.PolicyType.AUTOMATIC, gtk.PolicyType.AUTOMATIC) swin.set_border_width(0) swin.add(textview) pack(vbox, swin, 1, 1) # ____________________ Tab 2 - Reverse ____________________ # self.reverseStatus = "" #### labelSizeGroup = gtk.SizeGroup(mode=gtk.SizeGroupMode.HORIZONTAL) #### vbox = VBox() vbox.label = _("Reverse") vbox.icon = "" # "*.png" # self.pages.append(vbox) ###### hbox = HBox(spacing=3) hbox.label = gtk.Label(label=_("Input Format:")) pack(hbox, hbox.label) labelSizeGroup.add_widget(hbox.label) hbox.label.set_property("xalign", 0) self.reverseInputFormatCombo = InputFormatBox() pack(hbox, self.reverseInputFormatCombo) pack(vbox, hbox) ### hbox = HBox(spacing=3) hbox.label = gtk.Label(label=_("Input File:")) pack(hbox, hbox.label) labelSizeGroup.add_widget(hbox.label) hbox.label.set_property("xalign", 0) self.reverseInputEntry = gtk.Entry() pack(hbox, self.reverseInputEntry, 1, 1) button = BrowseButton( self.reverseInputEntry.set_text, label="Browse", actionSave=False, title="Select Input File", ) pack(hbox, button) pack(vbox, hbox) ## self.reverseInputEntry.connect( "changed", self.reverseInputEntryChanged, ) ##### vbox.sep1 = gtk.Label(label="") vbox.sep1.show() pack(vbox, vbox.sep1) ##### hbox = HBox(spacing=3) hbox.label = gtk.Label(label=_("Output Tabfile:")) pack(hbox, hbox.label) labelSizeGroup.add_widget(hbox.label) hbox.label.set_property("xalign", 0) self.reverseOutputEntry = gtk.Entry() pack(hbox, self.reverseOutputEntry, 1, 1) button = BrowseButton( self.reverseOutputEntry.set_text, label="Browse", actionSave=True, title="Select Output File", ) pack(hbox, button) pack(vbox, hbox) ## self.reverseOutputEntry.connect( "changed", self.reverseOutputEntryChanged, ) ##### hbox = HBox(spacing=3) label = gtk.Label(label="") pack(hbox, label, 1, 1, 5) ### self.reverseStartButton = gtk.Button() self.reverseStartButton.set_label(_("Start")) self.reverseStartButton.connect("clicked", self.reverseStartClicked) pack(hbox, self.reverseStartButton, 1, 1, 2) ### self.reversePauseButton = gtk.Button() self.reversePauseButton.set_label(_("Pause")) self.reversePauseButton.set_sensitive(False) self.reversePauseButton.connect("clicked", self.reversePauseClicked) pack(hbox, self.reversePauseButton, 1, 1, 2) ### self.reverseResumeButton = gtk.Button() self.reverseResumeButton.set_label(_("Resume")) self.reverseResumeButton.set_sensitive(False) self.reverseResumeButton.connect("clicked", self.reverseResumeClicked) pack(hbox, self.reverseResumeButton, 1, 1, 2) ### self.reverseStopButton = gtk.Button() self.reverseStopButton.set_label(_("Stop")) self.reverseStopButton.set_sensitive(False) self.reverseStopButton.connect("clicked", self.reverseStopClicked) pack(hbox, self.reverseStopButton, 1, 1, 2) ### pack(vbox, hbox, 0, 0, 5) ###### about = AboutWidget( logo=logo, header=f"PyGlossary\nVersion {getVersion()}", # about=summary, about=f'{aboutText}\n{core.homePage}', authors="\n".join(authors), license_text=licenseText, ) about.label = _("About") about.icon = "" # "*.png" self.pages.append(about) ##### # ____________________________________________________________ # notebook = gtk.Notebook() self.notebook = notebook ######### for vbox in self.pages: label = gtk.Label(label=vbox.label) label.set_use_underline(True) vb = VBox(spacing=3) if vbox.icon: vbox.image = imageFromFile(vbox.icon) pack(vb, vbox.image) pack(vb, label) vb.show_all() notebook.append_page(vbox, vb) try: notebook.set_tab_reorderable(vbox, True) except AttributeError: pass ####################### pack(self.vbox, notebook, 1, 1) # for i in ui.pagesOrder: # try: # j = pagesOrder[i] # except IndexError: # continue # notebook.reorder_child(self.pages[i], j) # ____________________________________________________________ # handler = GtkSingleTextviewLogHandler(self, textview) log.addHandler(handler) ### textview.override_background_color( gtk.StateFlags.NORMAL, gdk.RGBA(0, 0, 0, 1), ) ### handler.setColor("CRITICAL", rgba_parse("red")) handler.setColor("ERROR", rgba_parse("red")) handler.setColor("WARNING", rgba_parse("yellow")) handler.setColor("INFO", rgba_parse("white")) handler.setColor("DEBUG", rgba_parse("white")) handler.setColor("TRACE", rgba_parse("white")) ### textview.get_buffer().set_text("Output & Error Console:\n") textview.set_editable(False) # ____________________________________________________________ # self.progressTitle = "" self.progressBar = pbar = gtk.ProgressBar() pbar.set_fraction(0) # pbar.set_text(_("Progress Bar")) # pbar.get_style_context() # pbar.set_property("height-request", 20) pack(self.vbox, pbar, 0, 0) ############ hbox = HBox(spacing=5) clearButton = gtk.Button( use_stock=gtk.STOCK_CLEAR, always_show_image=True, label=_("Clear"), ) clearButton.show_all() # image = gtk.Image() # image.set_from_stock(gtk.STOCK_CLEAR, gtk.IconSize.MENU) # clearButton.add(image) clearButton.set_border_width(0) clearButton.connect("clicked", self.consoleClearButtonClicked) clearButton.set_tooltip_text("Clear Console") pack(hbox, clearButton, 0, 0) #### # hbox.sepLabel1 = gtk.Label(label="") # pack(hbox, hbox.sepLabel1, 1, 1) ###### hbox.verbosityLabel = gtk.Label(label=_("Verbosity:")) pack(hbox, hbox.verbosityLabel, 0, 0) ## self.verbosityCombo = combo = gtk.ComboBoxText() for level, levelName in enumerate(log.levelNamesCap): combo.append_text(f"{level} - {_(levelName)}") combo.set_active(log.getVerbosity()) combo.set_border_width(0) combo.connect("changed", self.verbosityComboChanged) pack(hbox, combo, 0, 0) #### # hbox.sepLabel2 = gtk.Label(label="") # pack(hbox, hbox.sepLabel2, 1, 1) #### self.statusBar = gtk.Statusbar() pack(hbox, self.statusBar, 1, 1) #### hbox.resizeButton = ResizeButton(self) pack(hbox, hbox.resizeButton, 0, 0) ###### pack(self.vbox, hbox, 0, 0) # ____________________________________________________________ # self.vbox.show_all() notebook.set_current_page(0) # Convert tab self.convertInputFormatCombo.dependsButton.hide() self.convertOutputFormatCombo.dependsButton.hide() self.convertInputFormatCombo.optionsButton.hide() self.convertOutputFormatCombo.optionsButton.hide() ######## self.status("Select input file") def run( # noqa: PLR0913 self, inputFilename: str = "", outputFilename: str = "", inputFormat: str = "", outputFormat: str = "", reverse: bool = False, config: dict[str, Any] | None = None, readOptions: dict[str, Any] | None = None, writeOptions: dict[str, Any] | None = None, convertOptions: dict[str, Any] | None = None, glossarySetAttrs: dict[str, Any] | None = None, ) -> None: self.config = config if inputFilename: self.convertInputEntry.set_text(abspath(inputFilename)) if outputFilename: self.convertOutputEntry.set_text(abspath(outputFilename)) if inputFormat: self.convertInputFormatCombo.setActive(inputFormat) if outputFormat: self.convertOutputFormatCombo.setActive(outputFormat) if reverse: log.error("Gtk interface does not support Reverse feature") if readOptions: self.convertInputFormatCombo.setOptionsValues(readOptions) if writeOptions: self.convertOutputFormatCombo.setOptionsValues(writeOptions) self.convertOptions = convertOptions if convertOptions: log.debug(f"Using {convertOptions=}") self._glossarySetAttrs = glossarySetAttrs or {} self.convertInputEntry.grab_focus() gtk.Dialog.present(self) gtk.main() def onDeleteEvent(self, _widget: Any, _event: Any) -> None: self.destroy() # gtk.main_quit() # if called while converting, main_quit does not exit program, # it keeps printing warnings, # and makes you close the terminal or force kill the process gtk.main_quit() def consoleClearButtonClicked(self, _widget: Any = None) -> None: self.convertConsoleTextview.get_buffer().set_text("") def verbosityComboChanged(self, _widget: Any = None) -> None: verbosity = self.verbosityCombo.get_active() # or int(self.verbosityCombo.get_active_text()) log.setVerbosity(verbosity) def convertClicked(self, _widget: Any = None) -> None: inPath = self.convertInputEntry.get_text() if not inPath: log.critical("Input file path is empty!") return inFormat = self.convertInputFormatCombo.getActive() outPath = self.convertOutputEntry.get_text() if not outPath: log.critical("Output file path is empty!") return outFormat = self.convertOutputFormatCombo.getActive() while gtk.events_pending(): gtk.main_iteration_do(False) self.convertButton.set_sensitive(False) self.progressTitle = "Converting" readOptions = self.convertInputFormatCombo.optionsValues writeOptions = self.convertOutputFormatCombo.optionsValues glos = Glossary(ui=self) glos.config = self.config glos.progressbar = self.progressbarEnable for attr, value in self._glossarySetAttrs.items(): setattr(glos, attr, value) log.debug(f"readOptions: {readOptions}") log.debug(f"writeOptions: {writeOptions}") log.debug(f"convertOptions: {self.convertOptions}") log.debug(f"config: {self.config}") try: glos.convert( ConvertArgs( inPath, inputFormat=inFormat, outputFilename=outPath, outputFormat=outFormat, readOptions=readOptions, writeOptions=writeOptions, **self.convertOptions, ), ) self.status("Convert finished") except Error as e: log.critical(str(e)) glos.cleanup() finally: self.convertButton.set_sensitive(True) self.assert_quit = False self.progressTitle = "" def convertInputEntryChanged(self, _widget: Any = None) -> None: inPath = self.convertInputEntry.get_text() inFormat = self.convertInputFormatCombo.getActive() if inPath.startswith("file://"): inPath = urlToPath(inPath) self.convertInputEntry.set_text(inPath) if self.config["ui_autoSetFormat"] and not inFormat: try: inputArgs = Glossary.detectInputFormat(inPath) except Error: pass else: self.convertInputFormatCombo.setActive(inputArgs.formatName) if not isfile(inPath): return self.status("Select output file") def convertOutputEntryChanged(self, _widget: Any = None) -> None: outPath = self.convertOutputEntry.get_text() outFormat = self.convertOutputFormatCombo.getActive() if not outPath: return if outPath.startswith("file://"): outPath = urlToPath(outPath) self.convertOutputEntry.set_text(outPath) if self.config["ui_autoSetFormat"] and not outFormat: try: outputArgs = Glossary.detectOutputFormat( filename=outPath, inputFilename=self.convertInputEntry.get_text(), ) except Error: pass else: outFormat = outputArgs.formatName self.convertOutputFormatCombo.setActive(outFormat) if outFormat: self.status('Press "Convert"') else: self.status("Select output format") def reverseLoad(self) -> None: pass def reverseStartLoop(self) -> None: pass def reverseStart(self) -> None: if not self.reverseLoad(): return ### self.reverseStatus = "doing" self.reverseStartLoop() ### self.reverseStartButton.set_sensitive(False) self.reversePauseButton.set_sensitive(True) self.reverseResumeButton.set_sensitive(False) self.reverseStopButton.set_sensitive(True) def reverseStartClicked(self, _widget: Any = None) -> None: self.waitingDo(self.reverseStart) def reversePause(self) -> None: self.reverseStatus = "pause" ### self.reverseStartButton.set_sensitive(False) self.reversePauseButton.set_sensitive(False) self.reverseResumeButton.set_sensitive(True) self.reverseStopButton.set_sensitive(True) def reversePauseClicked(self, _widget: Any = None) -> None: self.waitingDo(self.reversePause) def reverseResume(self) -> None: self.reverseStatus = "doing" ### self.reverseStartButton.set_sensitive(False) self.reversePauseButton.set_sensitive(True) self.reverseResumeButton.set_sensitive(False) self.reverseStopButton.set_sensitive(True) def reverseResumeClicked(self, _widget: Any = None) -> None: self.waitingDo(self.reverseResume) def reverseStop(self) -> None: self.reverseStatus = "stop" ### self.reverseStartButton.set_sensitive(True) self.reversePauseButton.set_sensitive(False) self.reverseResumeButton.set_sensitive(False) self.reverseStopButton.set_sensitive(False) def reverseStopClicked(self, _widget: Any = None) -> None: self.waitingDo(self.reverseStop) def reverseInputEntryChanged(self, _widget: Any = None) -> None: inPath = self.reverseInputEntry.get_text() if inPath.startswith("file://"): inPath = urlToPath(inPath) self.reverseInputEntry.set_text(inPath) if ( self.config["ui_autoSetFormat"] and not self.reverseInputFormatCombo.getActive() ): try: inputArgs = Glossary.detectInputFormat(inPath) except Error: pass else: inFormat = inputArgs[1] self.reverseInputFormatCombo.setActive(inFormat) def reverseOutputEntryChanged(self, _widget: Any = None) -> None: pass def progressInit(self, title: str) -> None: self.progressTitle = title def progress(self, ratio: float, text: str = "") -> None: if not text: text = "%" + str(int(ratio * 100)) text += " - " + self.progressTitle self.progressBar.set_fraction(ratio) # self.progressBar.set_text(text) # not working self.status(text) while gtk.events_pending(): gtk.main_iteration_do(False) pyglossary-5.0.9/pyglossary/ui/ui_gtk4/000077500000000000000000000000001476751035500201555ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/ui/ui_gtk4/__init__.py000066400000000000000000000002271476751035500222670ustar00rootroot00000000000000from __future__ import annotations import gi gi.require_version("Gtk", "4.0") gi.require_version("Gdk", "4.0") from .ui import UI __all__ = ["UI"] pyglossary-5.0.9/pyglossary/ui/ui_gtk4/about.py000066400000000000000000000106601476751035500216440ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2020 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from gi.repository import Gtk as gtk from .utils import ( VBox, imageFromFile, pack, ) __all__ = ["AboutWidget"] class AboutTabTitleBox(gtk.Box): def __init__(self, title: str, icon: str) -> None: gtk.Box.__init__(self, orientation=gtk.Orientation.VERTICAL) self.set_spacing(10) pack(self, VBox(), expand=0) if icon: image = imageFromFile(icon) image.get_pixel_size() image.set_size_request(24, 24) # I don't know how to stop Gtk from resizing the image # I should probably use svg files to avoid blurry images pack(self, image, expand=0) if title: pack(self, gtk.Label(label=title), expand=0) pack(self, VBox(), expand=0) self.set_size_request(60, 60) # def do_get_preferred_height_for_width(self, size: int) -> tuple[int, int]: # height = int(size * 1.5) # return height, height # returns: (minimum: int, natural: int, # minimum_baseline: int, natural_baseline: int) # def do_measure(self, orientation, for_size): # return (for_size, for_size, for_size, for_size) class AboutWidget(gtk.Box): def __init__( # noqa: PLR0913 self, logo: str = "", header: str = "", about: str = "", authors: str = "", license_text: str = "", **_kwargs, ) -> None: gtk.Box.__init__(self, orientation=gtk.Orientation.VERTICAL) self.set_spacing(15) ## headerBox = gtk.Box(orientation=gtk.Orientation.HORIZONTAL) if logo: pack(headerBox, imageFromFile(logo)) headerLabel = gtk.Label(label=header) headerLabel.set_selectable(True) pack(headerBox, headerLabel) headerBox.show() pack(self, headerBox) ## notebook = gtk.Notebook() self.notebook = notebook pack(self, notebook, expand=True) notebook.set_tab_pos(gtk.PositionType.LEFT) ## tab1_about = self.newTabLabelWidget(about) tab2_authors = self.newTabWidgetTextView(authors) tab3_license = self.newTabWidgetTextView(license_text) ## tabs = [ (tab1_about, self.newTabTitle("About", "dialog-information-22.png")), (tab2_authors, self.newTabTitle("Authors", "author-22.png")), (tab3_license, self.newTabTitle("License", "license-22.png")), ] ## for widget, titleW in tabs: notebook.append_page(widget, titleW) ## self.show() # Something does not work with TextView @staticmethod def newTabWidgetTextView( text: str, wrap: bool = False, justification: gtk.Justification | None = None, ) -> gtk.ScrolledWindow: tv = gtk.TextView() if wrap: tv.set_wrap_mode(gtk.WrapMode.WORD) if justification is not None: tv.set_justification(justification) tv.set_cursor_visible(False) # tv.set_border_width(10) buf = tv.get_buffer() # buf.insert_markup(buf.get_end_iter(), markup=text, # len=len(text.encode("utf-8"))) buf.set_text(text) tv.show() swin = gtk.ScrolledWindow() swin.set_policy(gtk.PolicyType.AUTOMATIC, gtk.PolicyType.AUTOMATIC) # swin.set_border_width(0) swin.set_child(tv) return swin @staticmethod def newTabLabelWidget( text: str, # wrap: bool = False, # justification: "gtk.Justification | None" = None, ) -> gtk.ScrolledWindow: box = VBox() # box.set_border_width(10) label = gtk.Label() label.set_selectable(True) label.set_xalign(0) label.set_yalign(0) pack(box, label, 0, 0) # if wrap: # tv.set_wrap_mode(gtk.WrapMode.WORD) # if justification is not None: # tv.set_justification(justification) # label.set_cursor_visible(False) # label.set_border_width(10) label.set_markup(text) label.show() swin = gtk.ScrolledWindow() swin.set_policy(gtk.PolicyType.AUTOMATIC, gtk.PolicyType.AUTOMATIC) # swin.set_border_width(0) swin.set_child(box) return swin @staticmethod def newTabTitle(title: str, icon: str) -> AboutTabTitleBox: return AboutTabTitleBox(title, icon) pyglossary-5.0.9/pyglossary/ui/ui_gtk4/browse.py000066400000000000000000000045421476751035500220350ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2008-2025 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from typing import TYPE_CHECKING, Any from gi.repository import Gio as gio from gi.repository import GLib as glib from gi.repository import Gtk as gtk if TYPE_CHECKING: from collections.abc import Callable class BrowseButton(gtk.Button): def __init__( self, setFilePathFunc: Callable[[str], None], label: str = "Browse", actionSave: bool = False, title: str = "Select File", ) -> None: gtk.Button.__init__(self) self.set_label(label) # TODO: self.set_icon_name # self.set_image(gtk.Image.new_from_icon_name( # "document-save" if actionSave else "document-open", # gtk.IconSize.BUTTON, # )) self.actionSave = actionSave self.setFilePathFunc = setFilePathFunc self.title = title self.connect("clicked", self.onClick) def onFiledialogOpen( self, filedialog: gtk.FileDialog, task: gio.Task, ) -> None: try: file = filedialog.open_finish(task) except glib.GError: return if file is None: return self.setFilePathFunc(file.get_path()) def onFiledialogSave( self, filedialog: gtk.FileDialog, task: gio.Task, ) -> None: try: file = filedialog.save_finish(task) except glib.GError: return if file is None: return self.setFilePathFunc(file.get_path()) def onClick(self, _widget: Any) -> None: dialog = gtk.FileDialog.new() dialog.set_title(self.title) # dialog.set_initial_folder(dir_name) if self.actionSave: dialog.save( parent=self.get_root(), cancellable=None, callback=self.onFiledialogSave, ) else: dialog.open( parent=self.get_root(), cancellable=None, callback=self.onFiledialogOpen, ) pyglossary-5.0.9/pyglossary/ui/ui_gtk4/dialog.py000066400000000000000000000025701476751035500217720ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2016-2017 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from collections.abc import Callable from gi.repository import Gdk as gdk from .utils import gtk_event_iteration_loop class MyDialog: def startWaiting(self) -> None: self.queue_draw() self.vbox.set_sensitive(False) self.get_window().set_cursor(gdk.Cursor.new(gdk.CursorType.WATCH)) gtk_event_iteration_loop() def endWaiting(self) -> None: self.get_window().set_cursor(gdk.Cursor.new(gdk.CursorType.LEFT_PTR)) self.vbox.set_sensitive(True) def waitingDo(self, func: Callable, *args, **kwargs) -> None: # noqa: ANN002 self.startWaiting() try: func(*args, **kwargs) except Exception as e: raise e finally: self.endWaiting() pyglossary-5.0.9/pyglossary/ui/ui_gtk4/format_widgets.py000066400000000000000000000446221476751035500235550ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2008-2025 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import logging from typing import TYPE_CHECKING, Any from gi.repository import Gio as gio from gi.repository import Gtk as gtk from pyglossary import core from pyglossary.glossary_v2 import Glossary from pyglossary.ui.dependency import checkDepends from .utils import ( HBox, dialog_add_button, pack, showInfo, ) if TYPE_CHECKING: from collections.abc import Callable from pyglossary.plugin_prop import PluginProp pluginByDesc = {plugin.description: plugin for plugin in Glossary.plugins.values()} readDesc = [ plugin.description for plugin in Glossary.plugins.values() if plugin.canRead ] writeDesc = [ plugin.description for plugin in Glossary.plugins.values() if plugin.canWrite ] log = logging.getLogger("pyglossary") class FormatDialog(gtk.Dialog): def __init__( self, descList: list[str], parent: gtk.Widget | None = None, **kwargs, ) -> None: gtk.Dialog.__init__(self, transient_for=parent, **kwargs) self.set_default_size(400, 400) self.vbox = self.get_content_area() ## self.descList = descList self.items = descList self.activeDesc = "" ## self.connect("response", lambda _w, _e: self.hide()) dialog_add_button( self, "gtk-cancel", "_Cancel", gtk.ResponseType.CANCEL, ) dialog_add_button( self, "gtk-ok", "_OK", gtk.ResponseType.OK, ) ### treev = gtk.TreeView() treeModel = gtk.ListStore(str) treev.set_headers_visible(False) treev.set_model(treeModel) treev.connect("row-activated", self.rowActivated) # treev.connect("response", self.onResponse) ### self.treev = treev ############# cell = gtk.CellRendererText(editable=False) col = gtk.TreeViewColumn( title="Descriptin", cell_renderer=cell, text=0, ) col.set_property("expand", True) col.set_resizable(True) treev.append_column(col) self.descCol = col ############ hbox = HBox(spacing=15) hbox.get_style_context().add_class("margin_05") pack(hbox, gtk.Label(label="Search:")) entry = self.entry = gtk.Entry() pack(hbox, entry, 1, 1) pack(self.vbox, hbox) ### entry.connect("changed", self.onEntryChange) ############ self.swin = swin = gtk.ScrolledWindow() swin.set_child(treev) swin.set_policy(gtk.PolicyType.NEVER, gtk.PolicyType.AUTOMATIC) pack(self.vbox, swin, 1, 1) self.vbox.show() ## treev.set_can_focus(True) # no need, just to be safe # treev.set_can_default(True) treev.set_receives_default(True) # print("can_focus:", treev.get_can_focus()) # print("can_default:", treev.get_can_default()) # print("receives_default:", treev.get_receives_default()) #### self.updateTree() self.connect("realize", self.onRealize) def onRealize(self, _widget: Any = None) -> None: if self.activeDesc: self.treev.grab_focus() else: self.entry.grab_focus() def onEntryChange(self, entry: gtk.Entry) -> None: text = entry.get_text().strip() if not text: self.items = self.descList self.updateTree() return text = text.lower() descList = self.descList items1 = [] items2 = [] for desc in descList: if desc.lower().startswith(text): items1.append(desc) elif text in desc.lower(): items2.append(desc) self.items = items1 + items2 self.updateTree() def setCursor(self, desc: str) -> None: model = self.treev.get_model() iter_ = model.iter_children(None) while iter_ is not None: if model.get_value(iter_, 0) == desc: path = model.get_path(iter_) self.treev.set_cursor(path, self.descCol, False) self.treev.scroll_to_cell(path) return iter_ = model.iter_next(iter_) def updateTree(self) -> None: model = self.treev.get_model() model.clear() for desc in self.items: model.append([desc]) if self.activeDesc: self.setCursor(self.activeDesc) def getActive(self) -> PluginProp | None: iter_ = self.treev.get_selection().get_selected()[1] if iter_ is None: return None model = self.treev.get_model() desc = model.get_value(iter_, 0) return pluginByDesc[desc] def setActive(self, plugin: PluginProp) -> None: if plugin is None: self.activeDesc = "" return desc = plugin.description self.activeDesc = desc self.setCursor(desc) def rowActivated( self, treev: gtk.TreeView, path: gtk.GtkTreePath, _col: Any, ) -> None: model = treev.get_model() iter_ = model.get_iter(path) desc = model.get_value(iter_, 0) self.activeDesc = desc self.response(gtk.ResponseType.OK) # def onResponse class FormatButton(gtk.Button): noneLabel = "[Select Format]" dialogTitle = "Select Format" def __init__(self, descList: list[str], parent: gtk.Widget | None = None) -> None: gtk.Button.__init__(self) self.set_label(self.noneLabel) ### self.descList = descList self._parent = parent self.activePlugin = None ### self.connect("clicked", self.onClick) def onChanged(self, _obj: Any = None) -> None: pass def onDialogResponse(self, dialog: gtk.Dialog, response_id: int) -> None: print(f"onDialogResponse: {dialog}, {response_id}") if response_id != gtk.ResponseType.OK: return plugin = dialog.getActive() self.activePlugin = plugin if plugin: self.set_label(plugin.description) else: self.set_label(self.noneLabel) self.onChanged() def onClick(self, _button: Any = None) -> None: dialog = FormatDialog( descList=self.descList, parent=self._parent, title=self.dialogTitle, ) dialog.setActive(self.activePlugin) dialog.connect("response", self.onDialogResponse) dialog.present() def getActive(self) -> str: if self.activePlugin is None: return "" return self.activePlugin.name def setActive(self, formatName: str) -> None: plugin = Glossary.plugins[formatName] self.activePlugin = plugin self.set_label(plugin.description) self.onChanged() class FormatOptionsDialog(gtk.Dialog): commentLen = 60 def __init__( self, app: gtk.Application, formatName: str, options: list[str], optionsValues: dict[str, Any], **kwargs, ) -> None: self.app = app gtk.Dialog.__init__(self, **kwargs) self.vbox = self.get_content_area() ## optionsProp = Glossary.plugins[formatName].optionsProp self.optionsProp = optionsProp self.formatName = formatName self.actionIds = set() ## self.connect("response", lambda _w, _e: self.hide()) dialog_add_button( self, "gtk-cancel", "_Cancel", gtk.ResponseType.CANCEL, ) dialog_add_button( self, "gtk-ok", "_OK", gtk.ResponseType.OK, ) ### treev = gtk.TreeView() treeModel = gtk.ListStore( bool, # enable str, # name str, # comment str, # value ) treev.set_headers_clickable(True) treev.set_model(treeModel) treev.connect("row-activated", self.rowActivated) gesture = gtk.GestureClick.new() gesture.connect("pressed", self.treeviewButtonPress) treev.add_controller(gesture) ### self.treev = treev ############# cell = gtk.CellRendererToggle() # cell.set_property("activatable", True) cell.connect("toggled", self.enableToggled) col = gtk.TreeViewColumn(title="Enable", cell_renderer=cell) col.add_attribute(cell, "active", 0) # cell.set_active(False) col.set_property("expand", False) col.set_resizable(True) treev.append_column(col) ### col = gtk.TreeViewColumn( title="Name", cell_renderer=gtk.CellRendererText(), text=1, ) col.set_property("expand", False) col.set_resizable(True) treev.append_column(col) ### cell = gtk.CellRendererText(editable=True) self.valueCell = cell self.valueCol = 3 cell.connect("edited", self.valueEdited) col = gtk.TreeViewColumn( title="Value", cell_renderer=cell, text=self.valueCol, ) col.set_property("expand", True) col.set_resizable(True) col.set_min_width(200) treev.append_column(col) ### col = gtk.TreeViewColumn( title="Comment", cell_renderer=gtk.CellRendererText(), text=2, ) col.set_property("expand", False) col.set_resizable(False) treev.append_column(col) ############# for name in options: prop = optionsProp[name] comment = prop.longComment if len(comment) > self.commentLen: comment = comment[: self.commentLen] + "..." if prop.typ != "bool" and not prop.values: comment += " (double-click to edit)" treeModel.append( [ name in optionsValues, # enable name, # name comment, # comment str(optionsValues.get(name, "")), # value ], ) ############ pack(self.vbox, treev, 1, 1) self.vbox.show() def enableToggled(self, cell: gtk.CellRenderer, path: gtk.TreePath) -> None: # enable is column 0 model = self.treev.get_model() active = not cell.get_active() itr = model.get_iter(path) model.set_value(itr, 0, active) def valueEdited(self, _cell: Any, path: gtk.TreePath, rawValue: str) -> None: # value is column 3 model = self.treev.get_model() itr = model.get_iter(path) optName = model.get_value(itr, 1) prop = self.optionsProp[optName] if not prop.customValue: return enable = True if rawValue == "" and prop.typ != "str": # noqa: PLC1901 enable = False elif not prop.validateRaw(rawValue): log.error(f"invalid {prop.typ} value: {optName} = {rawValue!r}") return model.set_value(itr, self.valueCol, rawValue) model.set_value(itr, 0, enable) def rowActivated(self, _treev: Any, path: gtk.TreePath, _col: Any) -> bool: # forceMenu=True because we can not enter edit mode # if double-clicked on a cell other than Value return self.valueCellClicked(path, forceMenu=True) def treeviewButtonPress(self, _gesture: Any, _n_press: Any, x: int, y: int) -> bool: # if gevent.button != 1: # return False pos_t = self.treev.get_path_at_pos(int(x), int(y)) if not pos_t: return False # pos_t == path, col, xRel, yRel path = pos_t[0] col = pos_t[1] # cell = col.get_cells()[0] if col.get_title() == "Value": return self.valueCellClicked(path) return False def valueItemActivate(self, item: gio.MenuItem, itr: gtk.TreeIter) -> None: # value is column 3 value = item.get_label() model = self.treev.get_model() model.set_value(itr, self.valueCol, value) model.set_value(itr, 0, True) # enable it def valueCustomOpenDialog(self, itr: gtk.TreeIter, optName: str) -> None: model = self.treev.get_model() prop = self.optionsProp[optName] currentValue = model.get_value(itr, self.valueCol) optDesc = optName if prop.comment: optDesc += f" ({prop.comment})" label = gtk.Label(label=f"Value for {optDesc}") dialog = gtk.Dialog(transient_for=self, title="Option Value") dialog.connect("response", lambda _w, _e: dialog.hide()) dialog_add_button( dialog, "gtk-cancel", "_Cancel", gtk.ResponseType.CANCEL, ) dialog_add_button( dialog, "gtk-ok", "_OK", gtk.ResponseType.OK, ) pack(dialog.vbox, label) entry = gtk.Entry() entry.set_text(currentValue) entry.connect("activate", lambda _w: dialog.response(gtk.ResponseType.OK)) pack(dialog.vbox, entry) dialog.vbox.show() dialog.connect("response", self.valueCustomDialogResponse, entry) dialog.present() def valueCustomDialogResponse( self, _dialog: Any, response_id: int, entry: gtk.Entry, ) -> None: if response_id != gtk.ResponseType.OK: return model = self.treev.get_model() value = entry.get_text() print(model, value) # FIXME # model.set_value(itr, self.valueCol, value) # model.set_value(itr, 0, True) # enable it def valueItemCustomActivate( self, _item: gtk.MenuItem, itr: gtk.TreeIter, ) -> None: model = self.treev.get_model() optName = model.get_value(itr, 1) self.valueCustomOpenDialog(itr, optName) def addAction( self, path: gtk.TreePath, name: str, callback: Callable, *args, # noqa: ANN002 ) -> str: actionId = self.formatName + "_" + str(path[0]) + "_" + name if actionId not in self.actionIds: action = gio.SimpleAction(name=actionId) action.connect("activate", callback, *args) self.app.add_action(action) return "app." + actionId def valueCellClicked(self, path: gtk.TreePath, forceMenu: bool = False) -> bool: """ Returns True if event is handled, False if not handled (need to enter edit mode). """ model = self.treev.get_model() itr = model.get_iter(path) optName = model.get_value(itr, 1) prop = self.optionsProp[optName] if prop.typ == "bool": rawValue = model.get_value(itr, self.valueCol) if rawValue == "": # noqa: PLC1901 value = False else: value, isValid = prop.evaluate(rawValue) if not isValid: log.error(f"invalid {optName} = {rawValue!r}") value = False model.set_value(itr, self.valueCol, str(not value)) model.set_value(itr, 0, True) # enable it return True propValues = prop.values if not propValues: if forceMenu: propValues = [] else: return False menu = gtk.PopoverMenu() menu.set_parent(self) menuM = menu.get_menu_model() # gio.MenuModel if prop.customValue: item = gio.MenuItem() item.set_label("[Custom Value]") item.set_detailed_action( self.addAction( path, "__custom__", self.valueItemCustomActivate, itr, ), ) menuM.append_item(item) groupedValues = None if len(propValues) > 10: groupedValues = prop.groupValues() if groupedValues: for groupName, values in groupedValues.items(): item = gio.MenuItem() item.set_label(groupName) if values is None: item.set_detailed_action( self.addAction( path, groupName, self.valueItemActivat, itr, ), ) else: subMenu = gio.Menu() for subValue in values: subItem = gio.MenuItem() subItem.set_label(str(subValue)) item.set_detailed_action( self.addAction( path, groupName, self.valueItemActivate, itr, ), ) subMenu.append_item(subItem) item.set_submenu(subMenu) item.show() menu.append_item(item) else: for value in propValues: item = gio.MenuItem() item.set_label(value) item.connect("activate", self.valueItemActivate, itr) item.show() menu.append_item(item) # etime = gtk.get_current_event_time() menu.popup() return True def getOptionsValues(self) -> dict[str, Any]: model = self.treev.get_model() optionsValues: dict[str, Any] = {} for row in model: if not row[0]: # not enable continue optName = row[1] rawValue = row[3] prop = self.optionsProp[optName] value, isValid = prop.evaluate(rawValue) if not isValid: log.error(f"invalid option value {optName} = {rawValue}") continue optionsValues[optName] = value return optionsValues class FormatBox(FormatButton): def __init__( self, app: gtk.Application, descList: list[str], parent: gtk.Widget | None = None, ) -> None: self.app = app FormatButton.__init__(self, descList, parent=parent) self.optionsValues = {} self.optionsButton = gtk.Button(label="Options") # TODO: self.optionsButton.set_icon_name # self.optionsButton.set_image(gtk.Image.new_from_icon_name( # "gtk-preferences", # gtk.IconSize.BUTTON, # )) self.optionsButton.connect("clicked", self.optionsButtonClicked) self.dependsButton = gtk.Button(label="Install dependencies") self.dependsButton.pkgNames = [] self.dependsButton.connect("clicked", self.dependsButtonClicked) def setOptionsValues(self, optionsValues: dict[str, Any]) -> None: self.optionsValues = optionsValues def kind(self) -> str: """Return 'r' or 'w'.""" raise NotImplementedError def getActiveOptions(self) -> list[str] | None: raise NotImplementedError def optionsButtonClicked(self, _button: Any) -> None: formatName = self.getActive() options = self.getActiveOptions() if options is None: return dialog = FormatOptionsDialog( self.app, formatName, options, self.optionsValues, transient_for=self._parent, ) dialog.set_title("Options for " + formatName) dialog.connect("response", self.optionsDialogResponse) dialog.present() def optionsDialogResponse( self, dialog: FormatOptionsDialog, response_id: gtk.ResponseType, ) -> None: if response_id == gtk.ResponseType.OK: self.optionsValues = dialog.getOptionsValues() dialog.destroy() def dependsButtonClicked(self, button: gtk.Button) -> None: formatName = self.getActive() pkgNames = button.pkgNames if not pkgNames: print("All dependencies are stattisfied for " + formatName) return pkgNamesStr = " ".join(pkgNames) msg = f"Run the following command:\n{core.pip} install {pkgNamesStr}" showInfo( msg, title="Dependencies for " + formatName, selectable=True, parent=self._parent, ) self.onChanged(self) def onChanged(self, _obj: Any = None) -> None: name = self.getActive() if not name: self.optionsButton.set_visible(False) return self.optionsValues.clear() self.optionsButton.set_visible(bool(self.getActiveOptions())) kind = self.kind() plugin = Glossary.plugins[name] if kind == "r": depends = plugin.readDepends elif kind == "w": depends = plugin.writeDepends else: raise RuntimeError(f"invalid {kind=}") uninstalled = checkDepends(depends) self.dependsButton.pkgNames = uninstalled self.dependsButton.set_visible(bool(uninstalled)) class InputFormatBox(FormatBox): dialogTitle = "Select Input Format" def __init__(self, app: gtk.Application, **kwargs) -> None: FormatBox.__init__(self, app, readDesc, **kwargs) def kind(self) -> str: """Return 'r' or 'w'.""" return "r" def getActiveOptions(self) -> list[str] | None: formatName = self.getActive() if not formatName: return None return list(Glossary.formatsReadOptions[formatName]) class OutputFormatBox(FormatBox): dialogTitle = "Select Output Format" def __init__(self, app: gtk.Application, **kwargs) -> None: FormatBox.__init__(self, app, writeDesc, **kwargs) def kind(self) -> str: """Return 'r' or 'w'.""" return "w" def getActiveOptions(self) -> list[str] | None: return list(Glossary.formatsWriteOptions[self.getActive()]) pyglossary-5.0.9/pyglossary/ui/ui_gtk4/general_options.py000066400000000000000000000072541476751035500237270ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2008-2025 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from typing import Any from gi.repository import Gtk as gtk from pyglossary.ui.base import UIBase from .sort_options import SortOptionsBox from .utils import ( HBox, dialog_add_button, pack, ) class GeneralOptionsDialog(gtk.Dialog): def onCloseRequest(self, _widget: Any) -> bool: self.hide() return True def onResponse(self, _widget: Any, _event: Any) -> bool: self.applyChanges() self.hide() return True def __init__(self, mainWin: gtk.Window, **kwargs: Any) -> None: gtk.Dialog.__init__( self, transient_for=mainWin, **kwargs, ) self.set_title("General Options") self.mainWin = mainWin ## self.vbox = self.get_content_area() self.vbox.set_spacing(5) ## self.set_default_size(600, 500) self.connect("close-request", self.onCloseRequest) ## self.connect("response", self.onResponse) dialog_add_button( self, "gtk-ok", "_OK", gtk.ResponseType.OK, ) ## hpad = 10 ## self.sortOptionsBox = SortOptionsBox(mainWin) pack(self.vbox, self.sortOptionsBox) ## hbox = HBox(spacing=hpad) self.sqliteCheck = gtk.CheckButton(label="SQLite mode") pack(hbox, self.sqliteCheck) pack(self.vbox, hbox) ## self.configParams = { "save_info_json": False, "lower": False, "skip_resources": False, "rtl": False, "enable_alts": True, "cleanup": True, "remove_html_all": True, } self.configCheckButtons = {} configDefDict = UIBase.configDefDict for param in self.configParams: hbox = HBox(spacing=hpad) comment = configDefDict[param].comment checkButton = gtk.CheckButton( label=comment.split("\n")[0], ) self.configCheckButtons[param] = checkButton pack(hbox, checkButton) pack(self.vbox, hbox) ## self.updateWidgets() self.vbox.show() def getSQLite(self) -> bool: convertOptions = self.mainWin.convertOptions sqlite = convertOptions.get("sqlite") if sqlite is not None: return sqlite return self.mainWin.config.get("auto_sqlite", True) def updateWidgets(self) -> None: config = self.mainWin.config self.sortOptionsBox.updateWidgets() self.sqliteCheck.set_active(self.getSQLite()) for param, check in self.configCheckButtons.items(): default = self.configParams[param] check.set_active(config.get(param, default)) def applyChanges(self) -> None: # print("applyChanges") self.sortOptionsBox.applyChanges() convertOptions = self.mainWin.convertOptions config = self.mainWin.config convertOptions["sqlite"] = self.sqliteCheck.get_active() for param, check in self.configCheckButtons.items(): config[param] = check.get_active() class GeneralOptionsButton(gtk.Button): def __init__(self, mainWin: gtk.Window) -> None: gtk.Button.__init__(self, label="General Options") self.mainWin = mainWin self.connect("clicked", self.onClick) self.dialog = None def onClick(self, _widget: Any) -> None: if self.dialog is None: self.dialog = GeneralOptionsDialog(self.mainWin) self.dialog.present() pyglossary-5.0.9/pyglossary/ui/ui_gtk4/log_handler.py000066400000000000000000000052351476751035500230120ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2008-2025 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import logging import traceback from typing import Protocol from gi.repository import Gdk as gdk from gi.repository import Gtk as gtk log = logging.getLogger("pyglossary") class MainWinType(Protocol): def status(self, msg: str) -> None: ... class GtkTextviewLogHandler(logging.Handler): def __init__( self, mainWin: MainWinType, textview_dict: dict[str, gtk.TextView], ) -> None: logging.Handler.__init__(self) self.mainWin = mainWin self.buffers = {} for levelNameCap in log.levelNamesCap[:-1]: levelName = levelNameCap.upper() textview = textview_dict[levelName] buff = textview.get_buffer() tag = gtk.TextTag.new(levelName) buff.get_tag_table().add(tag) self.buffers[levelName] = buff def getTag(self, levelname: str) -> gtk.TextTag: return self.buffers[levelname].get_tag_table().lookup(levelname) def setColor(self, levelname: str, rgba: gdk.RGBA) -> None: self.getTag(levelname).set_property("foreground-rgba", rgba) # foreground-gdk is deprecated since Gtk 3.4 def emit(self, record: logging.LogRecord) -> None: msg = "" if record.getMessage(): msg = self.format(record) # msg = msg.replace("\x00", "") if record.exc_info: type_, value, tback = record.exc_info tback_text = "".join( traceback.format_exception(type_, value, tback), ) if msg: msg += "\n" msg += tback_text buff = self.buffers[record.levelname] buff.insert_with_tags_by_name( buff.get_end_iter(), msg + "\n", record.levelname, ) if record.levelno == logging.CRITICAL: self.mainWin.status(record.getMessage()) class GtkSingleTextviewLogHandler(GtkTextviewLogHandler): def __init__(self, mainWin: MainWinType, textview: gtk.TextView) -> None: GtkTextviewLogHandler.__init__( self, mainWin, { "CRITICAL": textview, "ERROR": textview, "WARNING": textview, "INFO": textview, "DEBUG": textview, "TRACE": textview, }, ) pyglossary-5.0.9/pyglossary/ui/ui_gtk4/mainwin.py000066400000000000000000000373771476751035500222120ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2008-2025 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import logging from os.path import abspath, isfile from typing import Any from gi.repository import Gdk as gdk from gi.repository import Gtk as gtk from pyglossary import core from pyglossary.glossary_v2 import ConvertArgs, Error, Glossary from pyglossary.text_utils import urlToPath from pyglossary.ui.base import ( UIBase, aboutText, authors, licenseText, logo, ) from pyglossary.ui.version import getVersion from .about import AboutWidget from .browse import BrowseButton from .format_widgets import InputFormatBox, OutputFormatBox from .general_options import GeneralOptionsButton from .log_handler import GtkSingleTextviewLogHandler from .utils import ( HBox, VBox, getWorkAreaSize, gtk_event_iteration_loop, imageFromFile, pack, rgba_parse, ) # from gi.repository import GdkPixbuf log = logging.getLogger("pyglossary") _ = str # later replace with translator function # GTK 4 has removed the GtkContainer::border-width property # (together with the rest of GtkContainer). # Use other means to influence the spacing of your containers, # such as the CSS margin and padding properties on child widgets, # or the CSS border-spacing property on containers. class MainWindow(gtk.ApplicationWindow): # @property # def config(self): # return self.ui.config css = """ textview.console text { background-color: rgb(0, 0, 0); } check { min-width: 1.25em; min-height: 1.25em; } .margin_03 { margin-top: 0.5em; margin-right: 0.5em; margin-bottom: 0.5em; margin-left: 0.5em; } .margin_05 { margin-top: 0.5em; margin-right: 0.5em; margin-bottom: 0.5em; margin-left: 0.5em; } .margin_10 { margin-top: 1em; margin-right: 1em; margin-bottom: 1em; margin-left: 1em; } """ def status(self, msg: str) -> None: # try: # _id = self.statusMsgDict[msg] # except KeyError: # _id = self.statusMsgDict[msg] = self.statusNewId # self.statusNewId += 1 id_ = self.statusBar.get_context_id(msg) self.statusBar.push(id_, msg) def __init__( self, ui: UIBase, app: gtk.Application, progressbar: bool = True, **kwargs, ) -> None: self.app = app self.ui = ui ##### gtk.ApplicationWindow.__init__(self, application=app, **kwargs) self.set_title("PyGlossary (Gtk3)") self.progressbarEnable = progressbar ##### self.vbox = VBox() self.set_child(self.vbox) ##### screenW, screenH = getWorkAreaSize(self) winSize = min(800, screenW - 50, screenH - 50) self.set_default_size(winSize, winSize) ##### # gesture = gtk.GestureClick.new() # gesture.connect("pressed", self.onButtonPress) # self.add_controller(gesture) ### ckey = gtk.EventControllerKey() ckey.connect("key-pressed", self.onKeyPress) self.add_controller(ckey) #### self.connect("close-request", self.onCloseRequest) #### self.pages = [] # self.statusNewId = 0 # self.statusMsgDict = {}## message -> id ##### self.convertOptions = {} ##### self.styleProvider = gtk.CssProvider() gtk.StyleContext.add_provider_for_display( gdk.Display.get_default(), self.styleProvider, gtk.STYLE_PROVIDER_PRIORITY_APPLICATION, ) # gtk.StyleContext.add_provider_for_screen( # gdk.Screen.get_default(), # self.styleProvider, # gtk.STYLE_PROVIDER_PRIORITY_APPLICATION, # ) self.styleProvider.load_from_data(self.css, len(self.css.encode("utf-8"))) ##### self.assert_quit = False self.path = "" # ____________________ Tab 1 - Convert ____________________ # labelSizeGroup = gtk.SizeGroup(mode=gtk.SizeGroupMode.HORIZONTAL) buttonSizeGroup = gtk.SizeGroup(mode=gtk.SizeGroupMode.HORIZONTAL) #### vbox = VBox(spacing=5) vbox.label = _("Convert") vbox.icon = "" # "*.png" self.pages.append(vbox) ###### hbox = HBox(spacing=3) hbox.label = gtk.Label(label=_("Input File:")) pack(hbox, hbox.label) labelSizeGroup.add_widget(hbox.label) hbox.label.set_property("xalign", 0) self.convertInputEntry = gtk.Entry() pack(hbox, self.convertInputEntry, 1, 1) button = BrowseButton( self.convertInputEntry.set_text, label="Browse", actionSave=False, title="Select Input File", ) pack(hbox, button) buttonSizeGroup.add_widget(button) pack(vbox, hbox) ## self.convertInputEntry.connect( "changed", self.convertInputEntryChanged, ) ### hbox = HBox(spacing=3) hbox.label = gtk.Label(label=_("Input Format:")) pack(hbox, hbox.label) labelSizeGroup.add_widget(hbox.label) hbox.label.set_property("xalign", 0) self.convertInputFormatCombo = InputFormatBox(self.app, parent=self) buttonSizeGroup.add_widget(self.convertInputFormatCombo.optionsButton) pack(hbox, self.convertInputFormatCombo) pack(hbox, gtk.Label(), 1, 1) pack(hbox, self.convertInputFormatCombo.dependsButton) pack(hbox, self.convertInputFormatCombo.optionsButton) pack(vbox, hbox) ##### hbox = HBox() hbox.get_style_context().add_class("margin_03") pack(vbox, hbox) ##### hbox = HBox(spacing=3) hbox.label = gtk.Label(label=_("Output File:")) pack(hbox, hbox.label) labelSizeGroup.add_widget(hbox.label) hbox.label.set_property("xalign", 0) self.convertOutputEntry = gtk.Entry() pack(hbox, self.convertOutputEntry, 1, 1) button = BrowseButton( self.convertOutputEntry.set_text, label="Browse", actionSave=True, title="Select Output File", ) pack(hbox, button) buttonSizeGroup.add_widget(button) pack(vbox, hbox) ## self.convertOutputEntry.connect( "changed", self.convertOutputEntryChanged, ) ### hbox = HBox(spacing=3) hbox.label = gtk.Label(label=_("Output Format:")) pack(hbox, hbox.label) labelSizeGroup.add_widget(hbox.label) hbox.label.set_property("xalign", 0) self.convertOutputFormatCombo = OutputFormatBox(self.app, parent=self) buttonSizeGroup.add_widget(self.convertOutputFormatCombo.optionsButton) pack(hbox, self.convertOutputFormatCombo) pack(hbox, gtk.Label(), 1, 1) pack(hbox, self.convertOutputFormatCombo.dependsButton) pack(hbox, self.convertOutputFormatCombo.optionsButton) pack(vbox, hbox) ##### hbox = HBox(spacing=10) hbox.get_style_context().add_class("margin_03") label = gtk.Label(label="") pack(hbox, label, expand=True) ## button = GeneralOptionsButton(self) button.set_size_request(300, 40) pack(hbox, button) ## self.convertButton = gtk.Button() self.convertButton.set_label("Convert") self.convertButton.connect("clicked", self.convertClicked) self.convertButton.set_size_request(300, 40) pack(hbox, self.convertButton) ## pack(vbox, hbox) ##### self.convertConsoleTextview = textview = gtk.TextView() swin = gtk.ScrolledWindow() swin.set_policy(gtk.PolicyType.AUTOMATIC, gtk.PolicyType.AUTOMATIC) swin.set_child(textview) pack(vbox, swin, expand=True) ###### about = AboutWidget( logo=logo, header=f"PyGlossary\nVersion {getVersion()}", # about=summary, about=f'{aboutText}\n{core.homePage}', authors="\n".join(authors), license_text=licenseText, ) about.label = _("About") about.icon = "" # "*.png" self.pages.append(about) ##### # ____________________________________________________________ # notebook = gtk.Notebook() self.notebook = notebook ######### for vbox in self.pages: label = gtk.Label(label=vbox.label) label.set_use_underline(True) vb = VBox(spacing=3) if vbox.icon: vbox.image = imageFromFile(vbox.icon) pack(vb, vbox.image) pack(vb, label) vb.show() notebook.append_page(vbox, vb) try: notebook.set_tab_reorderable(vbox, True) except AttributeError: pass ####################### pack(self.vbox, notebook, 1, 1) # for i in ui.pagesOrder: # try: # j = pagesOrder[i] # except IndexError: # continue # notebook.reorder_child(self.pages[i], j) # ____________________________________________________________ # ########## textview.get_style_context().add_class("console") handler = GtkSingleTextviewLogHandler(self, textview) log.addHandler(handler) ### handler.setColor("CRITICAL", rgba_parse("red")) handler.setColor("ERROR", rgba_parse("red")) handler.setColor("WARNING", rgba_parse("yellow")) handler.setColor("INFO", rgba_parse("white")) handler.setColor("DEBUG", rgba_parse("white")) handler.setColor("TRACE", rgba_parse("white")) ### textview.get_buffer().set_text("Output & Error Console:\n") textview.set_editable(False) # ____________________________________________________________ # self.progressTitle = "" self.progressBar = pbar = gtk.ProgressBar() pbar.set_fraction(0) # pbar.set_text(_("Progress Bar")) # pbar.get_style_context() # pbar.set_property("height-request", 20) pack(self.vbox, pbar) ############ hbox = HBox(spacing=5) clearButton = gtk.Button( # always_show_image=True, label=_("Clear"), # icon_name="clear", ) clearButton.show() # image = gtk.Image() # image.set_icon_name(...) # clearButton.add(image) clearButton.connect("clicked", self.consoleClearButtonClicked) clearButton.set_tooltip_text("Clear Console") pack(hbox, clearButton) #### # hbox.sepLabel1 = gtk.Label(label="") # pack(hbox, hbox.sepLabel1, 1, 1) ###### hbox.verbosityLabel = gtk.Label(label=_("Verbosity:")) pack(hbox, hbox.verbosityLabel) ## self.verbosityCombo = combo = gtk.ComboBoxText() for level, levelName in enumerate(log.levelNamesCap): combo.append_text(f"{level} - {_(levelName)}") combo.set_active(log.getVerbosity()) combo.connect("changed", self.verbosityComboChanged) pack(hbox, combo) #### # hbox.sepLabel2 = gtk.Label(label="") # pack(hbox, hbox.sepLabel2, 1, 1) #### self.statusBar = gtk.Statusbar() pack(hbox, self.statusBar, 1, 1) #### # ResizeButton does not work in Gtk 4.0 # hbox.resizeButton = ResizeButton(self) # pack(hbox, hbox.resizeButton) ###### pack(self.vbox, hbox) # ____________________________________________________________ # self.vbox.show() notebook.set_current_page(0) # Convert tab self.convertInputFormatCombo.dependsButton.hide() self.convertOutputFormatCombo.dependsButton.hide() self.convertInputFormatCombo.optionsButton.hide() self.convertOutputFormatCombo.optionsButton.hide() ######## self.status("Select input file") def run( # noqa: PLR0913 self, inputFilename: str = "", outputFilename: str = "", inputFormat: str = "", outputFormat: str = "", reverse: bool = False, config: dict[str, Any] | None = None, readOptions: dict[str, Any] | None = None, writeOptions: dict[str, Any] | None = None, convertOptions: dict[str, Any] | None = None, glossarySetAttrs: dict[str, Any] | None = None, ) -> None: self.config = config if inputFilename: self.convertInputEntry.set_text(abspath(inputFilename)) if outputFilename: self.convertOutputEntry.set_text(abspath(outputFilename)) if inputFormat: self.convertInputFormatCombo.setActive(inputFormat) if outputFormat: self.convertOutputFormatCombo.setActive(outputFormat) if reverse: log.error("Gtk interface does not support Reverse feature") if readOptions: self.convertInputFormatCombo.setOptionsValues(readOptions) if writeOptions: self.convertOutputFormatCombo.setOptionsValues(writeOptions) self.convertOptions = convertOptions if convertOptions: log.debug(f"Using {convertOptions=}") self._glossarySetAttrs = glossarySetAttrs or {} self.present() def exitApp(self) -> None: self.destroy() # unlike Gtk3, no need for sys.exit or gtk.main_quit (which does not exist) def onCloseRequest(self, _widget: Any) -> None: self.exitApp() def onKeyPress( self, _ckey: gtk.EventControllerKey, keyval: int, _keycode: int, _state: gdk.ModifierType, ) -> None: if keyval == gdk.KEY_Escape: self.exitApp() def onButtonPress( self, gesture: gtk.GestureClick, _n_press: Any, _x: int, _y: int, ) -> None: print(f"MainWindow.onButtonPress: {gesture}") def consoleClearButtonClicked(self, _widget: Any = None) -> None: self.convertConsoleTextview.get_buffer().set_text("") def verbosityComboChanged(self, _widget: Any = None) -> None: verbosity = self.verbosityCombo.get_active() # or int(self.verbosityCombo.get_active_text()) log.setVerbosity(verbosity) def convertClicked(self, _widget: Any = None) -> None: inPath = self.convertInputEntry.get_text() if not inPath: log.critical("Input file path is empty!") return inFormat = self.convertInputFormatCombo.getActive() outPath = self.convertOutputEntry.get_text() if not outPath: log.critical("Output file path is empty!") return outFormat = self.convertOutputFormatCombo.getActive() gtk_event_iteration_loop() self.convertButton.set_sensitive(False) self.progressTitle = "Converting" readOptions = self.convertInputFormatCombo.optionsValues writeOptions = self.convertOutputFormatCombo.optionsValues glos = Glossary(ui=self.ui) glos.config = self.config glos.progressbar = self.progressbarEnable for attr, value in self._glossarySetAttrs.items(): setattr(glos, attr, value) log.debug(f"readOptions: {readOptions}") log.debug(f"writeOptions: {writeOptions}") log.debug(f"convertOptions: {self.convertOptions}") log.debug(f"config: {self.config}") try: glos.convert( ConvertArgs( inPath, inputFormat=inFormat, outputFilename=outPath, outputFormat=outFormat, readOptions=readOptions, writeOptions=writeOptions, **self.convertOptions, ), ) self.status("Convert finished") except Error as e: log.critical(str(e)) glos.cleanup() finally: self.convertButton.set_sensitive(True) self.assert_quit = False self.progressTitle = "" def convertInputEntryChanged(self, _widget: Any = None) -> None: inPath = self.convertInputEntry.get_text() inFormat = self.convertInputFormatCombo.getActive() if inPath.startswith("file://"): inPath = urlToPath(inPath) self.convertInputEntry.set_text(inPath) if self.config["ui_autoSetFormat"] and not inFormat: try: inputArgs = Glossary.detectInputFormat(inPath) except Error: pass else: self.convertInputFormatCombo.setActive(inputArgs.formatName) if not isfile(inPath): return self.status("Select output file") def convertOutputEntryChanged(self, _widget: Any = None) -> None: outPath = self.convertOutputEntry.get_text() outFormat = self.convertOutputFormatCombo.getActive() if not outPath: return if outPath.startswith("file://"): outPath = urlToPath(outPath) self.convertOutputEntry.set_text(outPath) if self.config["ui_autoSetFormat"] and not outFormat: try: outputArgs = Glossary.detectOutputFormat( filename=outPath, inputFilename=self.convertInputEntry.get_text(), ) except Error: pass else: outFormat = outputArgs.formatName self.convertOutputFormatCombo.setActive(outFormat) if outFormat: self.status('Press "Convert"') else: self.status("Select output format") def progressInit(self, title: str) -> None: self.progressTitle = title def progress(self, ratio: float, text: str = "") -> None: if not text: text = "%" + str(int(ratio * 100)) text += " - " + self.progressTitle self.progressBar.set_fraction(ratio) # self.progressBar.set_text(text) # not working self.status(text) gtk_event_iteration_loop() pyglossary-5.0.9/pyglossary/ui/ui_gtk4/resize_button.py000066400000000000000000000027631476751035500234330ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2016-2017 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from . import gdk, gtk from .utils import imageFromFile class ResizeButton(gtk.Box): def __init__( self, win: gtk.Window, edge: gdk.SurfaceEdge = gdk.SurfaceEdge.SOUTH_EAST, ) -> None: gtk.Box.__init__(self) self.win = win self.edge = edge ### self.image = imageFromFile("resize.png") self.append(self.image) gesture = gtk.GestureClick.new() gesture.connect("pressed", self.buttonPress) self.add_controller(gesture) def buttonPress(self, gesture: gtk.EventController, button, x, y) -> None: # noqa: ANN001 # Gesture is subclass of EventController pass # FIXME # self.win.begin_resize( # self.edge, # button, # int(gevent.x_root), # int(gevent.y_root), # gesture.get_current_event_time(), # ) pyglossary-5.0.9/pyglossary/ui/ui_gtk4/sort_options.py000066400000000000000000000070251476751035500232750ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2008-2025 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from typing import Any from gi.repository import Gtk as gtk from pyglossary.sort_keys import defaultSortKeyName, namedSortKeyList from .utils import HBox, pack # log = logging.getLogger("pyglossary") sortKeyNameByDesc = {_sk.desc: _sk.name for _sk in namedSortKeyList} sortKeyNames = [_sk.name for _sk in namedSortKeyList] # Gtk.CheckButton is not a subclass of Gtk.Button! LOL class SortOptionsBox(gtk.Box): def __init__(self, mainWin: gtk.Window) -> None: gtk.Box.__init__(self, orientation=gtk.Orientation.VERTICAL) self.mainWin = mainWin ### self.set_spacing(5) ### hbox = HBox(spacing=5) sortCheck = gtk.CheckButton(label="Sort entries by") sortKeyCombo = gtk.ComboBoxText() for _sk in namedSortKeyList: sortKeyCombo.append_text(_sk.desc) sortKeyCombo.set_active(sortKeyNames.index(defaultSortKeyName)) sortKeyCombo.set_sensitive(False) # sortKeyCombo.connect("changed", self.sortKeyComboChanged) self.sortCheck = sortCheck self.sortKeyCombo = sortKeyCombo sortCheck.connect("toggled", self.onSortCheckToggled) pack(hbox, sortCheck) pack(hbox, sortKeyCombo) pack(self, hbox) ### hbox = self.encodingHBox = HBox(spacing=5) encodingRadio = self.encodingRadio = gtk.CheckButton(label="Sort Encoding") encodingEntry = self.encodingEntry = gtk.Entry() encodingEntry.set_text("utf-8") encodingEntry.set_width_chars(15) pack(hbox, gtk.Label(label=" ")) pack(hbox, encodingRadio) pack(hbox, encodingEntry) pack(self, hbox) encodingRadio.set_active(True) ### sortRadioSizeGroup = gtk.SizeGroup(mode=gtk.SizeGroupMode.HORIZONTAL) sortRadioSizeGroup.add_widget(encodingRadio) ### self.show() def onSortCheckToggled(self, *_args: Any) -> None: sort = self.sortCheck.get_active() self.sortKeyCombo.set_sensitive(sort) self.encodingHBox.set_sensitive(sort) def updateWidgets(self) -> None: convertOptions = self.mainWin.convertOptions sort = convertOptions.get("sort") self.sortCheck.set_active(sort) self.sortKeyCombo.set_sensitive(sort) self.encodingHBox.set_sensitive(sort) sortKeyName = convertOptions.get("sortKeyName") if sortKeyName: self.sortKeyCombo.set_active(sortKeyNames.index(sortKeyName)) sortEncoding = convertOptions.get("sortEncoding", "utf-8") self.encodingEntry.set_text(sortEncoding) def applyChanges(self) -> None: convertOptions = self.mainWin.convertOptions sort = self.sortCheck.get_active() if not sort: for param in ("sort", "sortKeyName", "sortEncoding"): if param in convertOptions: del convertOptions[param] return sortKeyDesc = self.sortKeyCombo.get_active_text() convertOptions["sort"] = sort convertOptions["sortKeyName"] = sortKeyNameByDesc[sortKeyDesc] if self.encodingRadio.get_active(): convertOptions["sortEncoding"] = self.encodingEntry.get_text() pyglossary-5.0.9/pyglossary/ui/ui_gtk4/ui.py000066400000000000000000000030261476751035500211450ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2008-2025 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import logging from gi.repository import Gio as gio from gi.repository import Gtk as gtk from pyglossary.ui.base import UIBase from .mainwin import MainWindow log = logging.getLogger("pyglossary") # gtk.Window.set_default_icon_from_file(logo) # removed in Gtk 4.0 class UI(UIBase, gtk.Application): def __init__( self, progressbar: bool = True, ) -> None: UIBase.__init__(self) gtk.Application.__init__( self, application_id="apps.pyglossary", flags=gio.ApplicationFlags.FLAGS_NONE, ) self.progressbar = progressbar self.runArgs = {} def run(self, **kwargs) -> None: self.runArgs = kwargs gtk.Application.run(self) def do_activate(self) -> None: MainWindow( ui=self, app=self, progressbar=self.progressbar, ).run(**self.runArgs) pyglossary-5.0.9/pyglossary/ui/ui_gtk4/utils.py000066400000000000000000000132261476751035500216730ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # # Copyright © 2016-2019 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from os.path import isabs, join from typing import TYPE_CHECKING, Any from gi.repository import Gdk as gdk # noqa: I001 from gi.repository import GLib as glib from gi.repository import Gtk as gtk from pyglossary.core import appResDir if TYPE_CHECKING: from collections.abc import Callable __all__ = [ "HBox", "VBox", "dialog_add_button", "gtk_event_iteration_loop", "imageFromFile", "pack", "rgba_parse", "showInfo", ] def getWorkAreaSize(_w: Any) -> tuple[int, int]: display = gdk.Display.get_default() # monitor = display.get_monitor_at_surface(w.get_surface()) # if monitor is None: monitor = display.get_primary_monitor() rect = monitor.get_workarea() return rect.width, rect.height def gtk_event_iteration_loop() -> None: ctx = glib.MainContext.default() try: while ctx.pending(): ctx.iteration(True) except KeyboardInterrupt: pass def VBox(**kwargs) -> gtk.Box: return gtk.Box(orientation=gtk.Orientation.VERTICAL, **kwargs) def HBox(**kwargs) -> gtk.Box: return gtk.Box(orientation=gtk.Orientation.HORIZONTAL, **kwargs) def imageFromFile(path: str) -> gtk.Image: # the file must exist if not isabs(path): path = join(appResDir, path) im = gtk.Image() im.set_from_file(path) return im def imageFromIconName(iconName: str, size: int, nonStock: bool = False) -> gtk.Image: # So gtk.Image.new_from_stock is deprecated # And the doc says we should use gtk.Image.new_from_icon_name # which does NOT have the same functionality! # because not all stock items are existing in all themes (even popular themes) # and new_from_icon_name does not seem to look in other (non-default) themes! # So for now we use new_from_stock, unless it's not a stock item # But we do not use either of these two outside this function # So that it's easy to switch if nonStock: return gtk.Image.new_from_icon_name(iconName) try: return gtk.Image.new_from_stock(iconName, size) except Exception: return gtk.Image.new_from_icon_name(iconName) def rgba_parse(colorStr: str) -> gdk.RGBA: rgba = gdk.RGBA() if not rgba.parse(colorStr): raise ValueError(f"bad color string {colorStr!r}") return rgba def pack( box: gtk.Box | gtk.CellLayout, child: gtk.Widget | gtk.CellRenderer, expand: bool = False, fill: bool = False, # noqa: ARG001 padding: int = 0, ) -> None: # noqa: ARG001 if padding > 0: print(f"pack: padding={padding} ignored") if isinstance(box, gtk.Box): box.append(child) if expand: if box.get_orientation() == gtk.Orientation.VERTICAL: child.set_vexpand(True) else: child.set_hexpand(True) # FIXME: what to do with: fill, padding elif isinstance(box, gtk.CellLayout): box.pack_start(child, expand) else: raise TypeError(f"pack: unknown type {type(box)}") def dialog_add_button( dialog: gtk.Dialog, _iconName: str, # TODO: remove label: str, resId: int, onClicked: Callable | None = None, tooltip: str = "", ) -> None: button = gtk.Button( label=label, use_underline=True, # icon_name=iconName, ) # fixed bug: used to ignore resId and pass gtk.ResponseType.OK dialog.add_action_widget( button, resId, ) if onClicked: label.connect("clicked", onClicked) if tooltip: label.set_tooltip_text(tooltip) def showMsg( # noqa: PLR0913 msg: str, iconName: str = "", transient_for: gtk.Widget | None = None, title: str = "", borderWidth: int = 10, # noqa: ARG001 iconSize: gtk.IconSize = gtk.IconSize.LARGE, selectable: bool = False, ) -> None: win = gtk.Dialog( transient_for=transient_for, ) # flags=0 makes it skip task bar if title: win.set_title(title) hbox = HBox(spacing=10) # hbox.set_border_width(borderWidth) if iconName: # win.set_icon(...) pack(hbox, imageFromIconName(iconName, iconSize)) label = gtk.Label(label=msg) # set_line_wrap(True) makes the window go crazy tall (taller than screen) # and that's the reason for label.set_size_request and win.resize # label.set_line_wrap(True) # label.set_line_wrap_mode(pango.WrapMode.WORD) label.set_size_request(500, 1) if selectable: label.set_selectable(True) pack(hbox, label) hbox.show() content_area = win.get_content_area() pack(content_area, hbox) dialog_add_button( win, "gtk-close", "_Close", gtk.ResponseType.OK, ) def onResponse(_w: Any, _response_id: int) -> None: win.destroy() win.connect("response", onResponse) # win.resize(600, 1) win.show() def showError(msg, **kwargs) -> None: # noqa: ANN001 # gtk-dialog-error is deprecated since version 3.10: # Use named icon “dialog-error”. showMsg(msg, iconName="gtk-dialog-error", **kwargs) def showWarning(msg, **kwargs) -> None: # noqa: ANN001 # gtk-dialog-warning is deprecated since version 3.10: # Use named icon “dialog-warning”. showMsg(msg, iconName="gtk-dialog-warning", **kwargs) def showInfo(msg, **kwargs) -> None: # noqa: ANN001 # gtk-dialog-info is deprecated since version 3.10: # Use named icon “dialog-information”. showMsg(msg, iconName="gtk-dialog-info", **kwargs) pyglossary-5.0.9/pyglossary/ui/ui_qt.py000066400000000000000000000026031476751035500203030ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # ui_qk.py # # Copyright © 2010-2019 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations from os.path import join from PyQt4 import QtGui as qt from pyglossary.glossary_v2 import * from .base import * noneItem = 'Not Selected' class UI(qt.QWidget, UIBase): def __init__(self) -> None: qt.QWidget.__init__(self) UIBase.__init__(self) self.setWindowTitle('PyGlossary (Qt)') self.setWindowIcon(qt.QIcon(join(uiDir, 'pyglossary.png'))) ###################### self.running = False self.glos = Glossary(ui=self) self.glos.config = self.config self.pathI = '' self.pathO = '' self.fcd_dir = join(homeDir, 'Desktop') ###################### vbox = qt.QVBoxLayout() self.setLayout(vbox) pyglossary-5.0.9/pyglossary/ui/ui_tk.py000066400000000000000000001155551476751035500203100ustar00rootroot00000000000000# -*- coding: utf-8 -*- # mypy: ignore-errors # ui_tk.py # # Copyright © 2009-2021 Saeed Rasooli (ilius) # # This program is a free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # You can get a copy of GNU General Public License along this program # But you can always get it from http://www.gnu.org/licenses/gpl.txt # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from __future__ import annotations import logging import os import tkinter as tk import traceback from os.path import abspath, isfile, join, splitext from tkinter import filedialog, ttk from tkinter import font as tkFont from typing import TYPE_CHECKING, Any, Literal from pyglossary import core from pyglossary.core import confDir, homeDir from pyglossary.glossary_v2 import ConvertArgs, Error, Glossary from pyglossary.text_utils import urlToPath from .base import ( UIBase, aboutText, authors, licenseText, logo, ) from .version import getVersion if TYPE_CHECKING: from collections.abc import Callable from tkinter.font import Font log = logging.getLogger("pyglossary") pluginByDesc = {plugin.description: plugin for plugin in Glossary.plugins.values()} readDesc = [ plugin.description for plugin in Glossary.plugins.values() if plugin.canRead ] writeDesc = [ plugin.description for plugin in Glossary.plugins.values() if plugin.canWrite ] def set_window_icon(window) -> None: window.iconphoto( True, tk.PhotoImage(file=logo), ) def decodeGeometry(gs): """ Example for gs: "253x252+30+684" returns (x, y, w, h). """ p = gs.split("+") w, h = p[0].split("x") return (int(p[1]), int(p[2]), int(w), int(h)) def encodeGeometry(x, y, w, h) -> str: return f"{w}x{h}+{x}+{y}" def encodeLocation(x, y) -> str: return f"+{x}+{y}" def centerWindow(win) -> None: """ Centers a tkinter window :param win: the root or Toplevel window to center. """ win.update_idletasks() width = win.winfo_width() frm_width = win.winfo_rootx() - win.winfo_x() win_width = width + 2 * frm_width height = win.winfo_height() titlebar_height = win.winfo_rooty() - win.winfo_y() win_height = height + titlebar_height + frm_width x = win.winfo_screenwidth() // 2 - win_width // 2 y = win.winfo_screenheight() // 2 - win_height // 2 win.geometry(encodeGeometry(x, y, width, height)) win.deiconify() def newButton(*args, **kwargs): button = ttk.Button(*args, **kwargs) def onEnter(_event) -> None: button.invoke() button.bind("", onEnter) button.bind("", onEnter) return button def newLabelWithImage(parent, file=""): image = tk.PhotoImage(file=file) label = ttk.Label(parent, image=image) label.image = image # keep a reference! return label def newReadOnlyText( parent, text="", borderwidth=10, font=None, ): height = len(text.strip().split("\n")) widget = tk.Text( parent, height=height, borderwidth=borderwidth, font=font, ) widget.insert(1.0, text) widget.pack() # widget.bind("", lambda e: break) widget.configure(state="disabled") return widget class TkTextLogHandler(logging.Handler): def __init__(self, tktext) -> None: logging.Handler.__init__(self) ##### tktext.tag_config("CRITICAL", foreground="#ff0000") tktext.tag_config("ERROR", foreground="#ff0000") tktext.tag_config("WARNING", foreground="#ffff00") tktext.tag_config("INFO", foreground="#00ff00") tktext.tag_config("DEBUG", foreground="#ffffff") tktext.tag_config("TRACE", foreground="#ffffff") ### self.tktext = tktext def emit(self, record) -> None: msg = "" if record.getMessage(): msg = self.format(record) ### if record.exc_info: type_, value, tback = record.exc_info tback_text = "".join( traceback.format_exception(type_, value, tback), ) if msg: msg += "\n" msg += tback_text ### self.tktext.insert( "end", msg + "\n", record.levelname, ) # Monkey-patch Tkinter # http://stackoverflow.com/questions/5191830/python-exception-logging def CallWrapper__call__(self, *args): """Apply first function SUBST to arguments, than FUNC.""" if self.subst: args = self.subst(*args) try: return self.func(*args) except Exception: log.exception("Exception in Tkinter callback:") tk.CallWrapper.__call__ = CallWrapper__call__ class ProgressBar(ttk.Frame): def __init__( # noqa: PLR0913 self, rootWin, min_: float, max_: float, width: int, height: int, appearance: str, # "sunken" fillColor: str, background: str, labelColor: str, labelFont: str, value: float = 0, ) -> None: self.min = min_ self.max = max_ self.width = width self.height = height self.value = value ttk.Frame.__init__(self, rootWin, relief=appearance) self.canvas = tk.Canvas( self, height=height, width=width, bd=0, highlightthickness=0, background=background, ) self.scale = self.canvas.create_rectangle( 0, 0, width, height, fill=fillColor, ) self.label = self.canvas.create_text( width / 2, height / 2, text="", anchor="c", fill=labelColor, font=labelFont, ) self.update() self.bind("", self.update) self.canvas.pack(side="top", fill="x", expand=False) def updateProgress(self, value, max_=None, text="") -> None: if max_: self.max = max_ self.value = value self.update(None, text) def update(self, event=None, labelText="") -> None: if event: # instance of tkinter.Event width = getattr(event, "width", None) or int(self.winfo_width()) if width != self.width: # window is resized self.canvas.coords(self.label, width / 2, self.height / 2) self.width = width else: width = self.width self.canvas.coords( self.scale, 0, 0, width * max(min(self.value, self.max), self.min) / self.max, self.height, ) if labelText: # TODO: replace below `// 10` with a number based on current font size self.canvas.itemconfig( self.label, text=labelText[: width // 10], ) self.canvas.update_idletasks() # class VerticalProgressBar(ProgressBar): # def update(self, event=None, labelText="") -> None: # ... # self.canvas.coords( # self.scale, # 0, # self.height * (1 - value / self.max), # width, # self.height, # ) class FormatDialog(tk.Toplevel): def __init__( # noqa: PLR0913 self, descList: list[str], title: str, onOk: Callable, button: FormatButton, activeDesc: str = "", ) -> None: tk.Toplevel.__init__(self) # bg="#0f0" does not work self.descList = descList self.items = self.descList self.onOk = onOk self.activeDesc = activeDesc self.lastSearch = None self.resizable(width=True, height=True) if title: self.title(title) set_window_icon(self) self.bind("", lambda _e: self.destroy()) px, py, pw, ph = decodeGeometry(button.winfo_toplevel().geometry()) width = 400 height = 400 self.geometry( encodeGeometry( px + pw // 2 - width // 2, py + ph // 2 - height // 2, width, height, ), ) entryBox = ttk.Frame(master=self) label = ttk.Label(master=entryBox, text="Search: ") label.pack(side="left") entry = self.entry = ttk.Entry(master=entryBox) entry.pack(fill="x", expand=True, side="left") entryBox.pack(fill="x", padx=5, pady=5) entry.bind("", self.onEntryKeyRelease) entry.focus() treevBox = ttk.Frame(master=self) treev = self.treev = ttk.Treeview( master=treevBox, columns=["Description"], show="", ) treev.bind("", self.onTreeDoubleClick) treev.pack( side="left", fill="both", expand=True, ) vsb = ttk.Scrollbar( master=treevBox, orient="vertical", command=treev.yview, ) vsb.pack(side="right", fill="y") treevBox.pack( fill="both", expand=True, padx=5, pady=5, ) treev.configure(yscrollcommand=vsb.set) self.updateTree() buttonBox = ttk.Frame(master=self) cancelButton = newButton( buttonBox, text="Cancel", command=self.cancelClicked, ) cancelButton.pack(side="right") okButton = newButton( buttonBox, text=" OK ", command=self.okClicked, # bg="#ff0000", # activebackground="#ff5050", ) okButton.pack(side="right") buttonBox.pack(fill="x") self.bind("", self.onReturnPress) self.bind("", self.onReturnPress) self.bind("", self.onDownPress) self.bind("", self.onUpPress) # self.bind("", self.onKeyPress) def setActiveRow(self, desc) -> None: self.treev.selection_set(desc) self.treev.see(desc) def updateTree(self) -> None: treev = self.treev current = treev.get_children() if current: treev.delete(*current) for desc in self.items: treev.insert("", "end", values=[desc], iid=desc) # iid should be rowId if self.activeDesc in self.items: self.setActiveRow(self.activeDesc) def onEntryKeyRelease(self, _event) -> None: text = self.entry.get().strip() if text == self.lastSearch: return if not text: self.items = self.descList self.updateTree() self.lastSearch = text return text = text.lower() descList = self.descList items1 = [] items2 = [] for desc in descList: if desc.lower().startswith(text): items1.append(desc) elif text in desc.lower(): items2.append(desc) self.items = items1 + items2 self.updateTree() self.lastSearch = text def onTreeDoubleClick(self, _event) -> None: self.okClicked() def cancelClicked(self) -> None: self.destroy() def onReturnPress(self, _event) -> None: self.okClicked() def onDownPress(self, _event) -> None: treev = self.treev selection = treev.selection() if selection: nextDesc = treev.next(selection[0]) if nextDesc: self.setActiveRow(nextDesc) elif self.items: self.setActiveRow(self.items[0]) treev.focus() def onUpPress(self, _event) -> None: treev = self.treev treev.focus() selection = treev.selection() if not selection: if self.items: self.setActiveRow(self.items[0]) return nextDesc = treev.prev(selection[0]) if nextDesc: self.setActiveRow(nextDesc) def onKeyPress(self, event) -> None: print(f"FormatDialog: onKeyPress: {event}") def okClicked(self) -> None: treev = self.treev selectedList = treev.selection() desc = selectedList[0] if selectedList else "" self.onOk(desc) self.destroy() class FormatButton(ttk.Button): noneLabel = "[Select Format]" def __init__( self, descList: list[str], dialogTitle: str, onChange: Callable, master=None, ) -> None: self.var = tk.StringVar() self.var.set(self.noneLabel) ttk.Button.__init__( self, master=master, textvariable=self.var, command=self.onClick, ) self.descList = descList self.dialogTitle = dialogTitle self._onChange = onChange self.activeDesc = "" self.bind("", self.onEnter) self.bind("", self.onEnter) def onEnter(self, _event=None) -> None: self.invoke() def onChange(self, desc) -> None: self.setValue(desc) self._onChange(desc) def get(self): return self.activeDesc def setValue(self, desc) -> None: if desc: self.var.set(desc) else: self.var.set(self.noneLabel) self.activeDesc = desc def onClick(self) -> None: dialog = FormatDialog( descList=self.descList, title=self.dialogTitle, onOk=self.onChange, button=self, activeDesc=self.activeDesc, ) dialog.focus() class FormatOptionsDialog(tk.Toplevel): commentLen = 60 kindFormatsOptions = { "Read": Glossary.formatsReadOptions, "Write": Glossary.formatsWriteOptions, } def __init__( self, formatName, kind, values, master=None, # noqa: ARG002 ) -> None: tk.Toplevel.__init__(self) # bg="#0f0" does not work self.resizable(width=True, height=True) self.title(kind + " Options") set_window_icon(self) self.bind("", lambda _e: self.destroy()) self.menu = None self.format = formatName self.kind = kind self.values = values self.options = list(self.kindFormatsOptions[kind][formatName]) self.optionsProp = Glossary.plugins[formatName].optionsProp self.createOptionsList() buttonBox = ttk.Frame(self) okButton = newButton( buttonBox, text=" OK ", command=self.okClicked, # bg="#ff0000", # activebackground="#ff5050", ) okButton.pack(side="right") buttonBox.pack(fill="x") def createOptionsList(self) -> None: values = self.values self.valueCol = "#3" cols = [ "Enable", # bool "Name", # str "Value", # str "Comment", # str ] treev = self.treev = ttk.Treeview( master=self, columns=cols, show="headings", ) for col in cols: treev.heading( col, text=col, # command=lambda c=col: sortby(treev, c, 0), ) # adjust the column's width to the header string treev.column( col, width=tkFont.Font().measure(col.title()), ) ### treev.bind( "", # "<>", # event.x and event.y are zero self.treeClicked, ) treev.pack(fill="x", expand=True) ### for optName in self.options: prop = self.optionsProp[optName] comment = prop.longComment if len(comment) > self.commentLen: comment = comment[: self.commentLen] + "..." row = [ int(optName in values), optName, str(values.get(optName, "")), comment, ] treev.insert("", "end", values=row, iid=optName) # iid should be rowId # adjust column's width if necessary to fit each value for col_i, valueTmp in enumerate(row): value = str(valueTmp) if col_i == 3: value = value.zfill(20) # to reserve window width, because it's hard to resize it later col_w = tkFont.Font().measure(value) if treev.column(cols[col_i], width=None) < col_w: treev.column(cols[col_i], width=col_w) def valueMenuItemCustomSelected( self, treev, formatName: str, optName: str, menu=None, ) -> None: if menu: menu.destroy() self.menu = None value = treev.set(optName, self.valueCol) dialog = tk.Toplevel(master=treev) # bg="#0f0" does not work dialog.resizable(width=True, height=True) dialog.title(optName) set_window_icon(dialog) dialog.bind("", lambda _e: dialog.destroy()) px, py, pw, ph = decodeGeometry(treev.winfo_toplevel().geometry()) width = 300 height = 100 dialog.geometry( encodeGeometry( px + pw // 2 - width // 2, py + ph // 2 - height // 2, width, height, ), ) frame = ttk.Frame(master=dialog) label = ttk.Label(master=frame, text="Value for " + optName) label.pack() entry = ttk.Entry(master=frame) entry.insert(0, value) entry.pack(fill="x") prop = Glossary.plugins[formatName].optionsProp[optName] def customOkClicked(_event=None) -> None: rawValue = entry.get() if not prop.validateRaw(rawValue): log.error(f"invalid {prop.typ} value: {optName} = {rawValue!r}") return treev.set(optName, self.valueCol, rawValue) treev.set(optName, "#1", "1") # enable it col_w = tkFont.Font().measure(rawValue) if treev.column("Value", width=None) < col_w: treev.column("Value", width=col_w) dialog.destroy() entry.bind("", customOkClicked) label = ttk.Label(master=frame) label.pack(fill="x") customOkbutton = newButton( frame, text=" OK ", command=customOkClicked, # bg="#ff0000", # activebackground="#ff5050", ) customOkbutton.pack(side="right") ### frame.pack(fill="x") dialog.focus() def valueMenuItemSelected(self, optName, menu, value) -> None: treev = self.treev treev.set(optName, self.valueCol, value) treev.set(optName, "#1", "1") # enable it col_w = tkFont.Font().measure(value) if treev.column("Value", width=None) < col_w: treev.column("Value", width=col_w) menu.destroy() self.menu = None def valueCellClicked(self, event, optName) -> None: if not optName: return treev = self.treev prop = self.optionsProp[optName] propValues = prop.values if not propValues: if prop.customValue: self.valueMenuItemCustomSelected(treev, self.format, optName, None) else: log.error( f"invalid option {optName}, values={propValues}" f", customValue={prop.customValue}", ) return if prop.typ == "bool": rawValue = treev.set(optName, self.valueCol) if rawValue == "": # noqa: PLC1901 value = False else: value, isValid = prop.evaluate(rawValue) if not isValid: log.error(f"invalid {optName} = {rawValue!r}") value = False treev.set(optName, self.valueCol, str(not value)) treev.set(optName, "#1", "1") # enable it return menu = tk.Menu( master=treev, title=optName, tearoff=False, ) self.menu = menu # to destroy it later if prop.customValue: menu.add_command( label="[Custom Value]", command=lambda: self.valueMenuItemCustomSelected( treev, self.format, optName, menu, ), ) groupedValues = None if len(propValues) > 10: groupedValues = prop.groupValues() maxItemW = 0 def valueMenuItemSelectedCommand(value): def callback() -> None: self.valueMenuItemSelected(optName, menu, value) return callback if groupedValues: for groupName, subValues in groupedValues.items(): if subValues is None: menu.add_command( label=str(value), command=valueMenuItemSelectedCommand(value), ) maxItemW = max(maxItemW, tkFont.Font().measure(str(value))) else: subMenu = tk.Menu(tearoff=False) for subValue in subValues: subMenu.add_command( label=str(subValue), command=valueMenuItemSelectedCommand(subValue), ) menu.add_cascade(label=groupName, menu=subMenu) maxItemW = max(maxItemW, tkFont.Font().measure(groupName)) else: for valueTmp in propValues: value = str(valueTmp) menu.add_command( label=value, command=valueMenuItemSelectedCommand(value), ) def close() -> None: menu.destroy() self.menu = None menu.add_command( label="[Close]", command=close, ) try: menu.tk_popup( event.x_root, event.y_root, ) # do not pass the third argument (entry), so that the menu # appears where the pointer is on its top-left corner finally: # make sure to release the grab (Tk 8.0a1 only) menu.grab_release() def treeClicked(self, event) -> None: treev = self.treev if self.menu: self.menu.destroy() self.menu = None return optName = treev.identify_row(event.y) # optName is rowId if not optName: return col = treev.identify_column(event.x) # "#1" to self.valueCol if col == "#1": value = treev.set(optName, col) treev.set(optName, col, 1 - int(value)) return if col == self.valueCol: self.valueCellClicked(event, optName) def okClicked(self) -> None: treev = self.treev for optName in self.options: enable = bool(int(treev.set(optName, "#1"))) if not enable: if optName in self.values: del self.values[optName] continue rawValue = treev.set(optName, self.valueCol) prop = self.optionsProp[optName] value, isValid = prop.evaluate(rawValue) if not isValid: log.error(f"invalid option value {optName} = {rawValue}") continue self.values[optName] = value self.destroy() class FormatOptionsButton(ttk.Button): def __init__( self, kind: Literal["Read", "Write"], values: dict, formatInput: FormatButton, master=None, ) -> None: ttk.Button.__init__( self, master=master, text="Options", command=self.buttonClicked, # bg="#f0f000", # activebackground="#f6f622", ) self.kind = kind self.values = values self.formatInput = formatInput def setOptionsValues(self, values) -> None: self.values = values def buttonClicked(self) -> None: formatD = self.formatInput.get() if not formatD: return dialog = FormatOptionsDialog( pluginByDesc[formatD].name, self.kind, self.values, master=self, ) # x, y, w, h = decodeGeometry(dialog.geometry()) w, h = 380, 250 # w and h are rough estimated width and height of `dialog` px, py, pw, ph = decodeGeometry(self.winfo_toplevel().geometry()) # move dialog without changing the size dialog.geometry( encodeLocation( px + pw // 2 - w // 2, py + ph // 2 - h // 2, ), ) dialog.focus() class VerticalNotebook(ttk.Frame): def __init__( self, parent: tk.Widget, font: Font | None = None, **kwargs, ): ttk.Frame.__init__(self, parent, **kwargs) self.rowconfigure(0, weight=1) self.columnconfigure(2, weight=1) # scrollable tabs self._listbox = tk.Listbox( self, width=1, highlightthickness=0, relief="raised", justify="center", font=font, ) self._listbox.configure() # list of widgets associated with the tabs self._tabs = [] self._current_tab = None # currently displayed tab self._listbox.grid(row=0, column=1, sticky="ns") # binding to display the selected tab self._listbox.bind("<>", self._on_listbox_select) self._maxWidth = 0 # add tab def add(self, widget: tk.Widget, text: str): self._listbox.insert("end", text) # resize listbox to be large enough to show all tab labels self._maxWidth = max(self._maxWidth, len(text)) self._listbox.configure( width=self._maxWidth + 2, ) index = len(self._tabs) self._tabs.append(widget) if self._current_tab is None: self.switch_tab(index) def switch_tab(self, index: int): self._show_tab_index(index) self._listbox.selection_clear(0, "end") self._listbox.selection_set(index) self._listbox.see(index) def _show_tab_index(self, index: int): widget = self._tabs[index] if self._current_tab is not None: self._current_tab.grid_remove() self._current_tab = widget widget.grid(in_=self, column=2, row=0, sticky="ewns") def _on_listbox_select(self, _event=None): selection = self._listbox.curselection() if not selection: return index = selection[0] if index >= len(self._tabs): print(f"{index=}") return self._show_tab_index(index) class UI(tk.Frame, UIBase): fcd_dir_save_path = join(confDir, "ui-tk-fcd-dir") def __init__( self, progressbar: bool = True, ) -> None: rootWin = self.rootWin = tk.Tk() # a hack that hides the window until we move it to the center of screen if os.sep == "\\": # Windows rootWin.attributes("-alpha", 0.0) else: # Linux rootWin.withdraw() tk.Frame.__init__(self, rootWin) UIBase.__init__(self) rootWin.title("PyGlossary (Tkinter)") rootWin.resizable(True, False) # self.progressbarEnable = progressbar ######## set_window_icon(rootWin) rootWin.bind("", lambda _e: rootWin.quit()) ######### # Linux: ('clam', 'alt', 'default', 'classic') # Windows: ('winnative', 'clam', 'alt', 'default', 'classic', 'vista', # 'xpnative') style = ttk.Style() style.configure("TButton", borderwidth=3) # style.theme_use("default") # there is no tk.Style() ######## self.pack(fill="x") # rootWin.bind("", self.resized) ####################### defaultFont = tkFont.nametofont("TkDefaultFont") if core.sysName in {"linux", "freebsd"}: defaultFont.configure(size=int(defaultFont.cget("size") * 1.4)) #### self.bigFont = defaultFont.copy() self.bigFont.configure(size=int(defaultFont.cget("size") * 1.6)) # self.biggerFont = defaultFont.copy() # self.biggerFont.configure(size=int(defaultFont.cget("size") * 1.8)) ###################### self.glos = Glossary(ui=self) self.glos.config = self.config self.glos.progressbar = progressbar self._convertOptions = {} self.pathI = "" self.pathO = "" fcd_dir = join(homeDir, "Desktop") if isfile(self.fcd_dir_save_path): try: with open(self.fcd_dir_save_path, encoding="utf-8") as fp: fcd_dir = fp.read().strip("\n") except Exception: log.exception("") self.fcd_dir = fcd_dir ###################### notebook = ttk.Notebook(self) convertFrame = ttk.Frame(notebook, height=200) ################### row = 0 label = ttk.Label(convertFrame, text="Input File: ") label.grid( row=row, column=0, sticky=tk.W, padx=5, ) ## entry = ttk.Entry(convertFrame) entry.grid( row=row, column=1, columnspan=2, sticky=tk.W + tk.E, padx=0, ) entry.bind_all("", self.anyEntryChanged) self.entryInputConvert = entry ## button = newButton( convertFrame, text="Browse", command=self.browseInputConvert, # bg="#f0f000", # activebackground="#f6f622", ) button.grid( row=row, column=3, sticky=tk.W + tk.E, padx=5, ) ###################### row += 1 label = ttk.Label(convertFrame, text="Input Format: ") label.grid( row=row, column=0, sticky=tk.W, padx=5, ) ## self.formatButtonInputConvert = FormatButton( master=convertFrame, descList=readDesc, dialogTitle="Select Input Format", onChange=self.inputFormatChanged, ) self.formatButtonInputConvert.grid( row=row, column=1, columnspan=2, sticky=tk.W, padx=0, ) ## self.readOptions: dict[str, Any] = {} self.writeOptions: dict[str, Any] = {} ## self.readOptionsButton = FormatOptionsButton( "Read", self.readOptions, self.formatButtonInputConvert, master=convertFrame, ) self.inputFormatRow = row ###################### row += 1 label = ttk.Label(convertFrame) label.grid( row=row, column=0, sticky=tk.W, ) ###################### row += 1 label = ttk.Label(convertFrame, text="Output Format: ") label.grid( row=row, column=0, sticky=tk.W, padx=5, ) ## self.formatButtonOutputConvert = FormatButton( master=convertFrame, descList=writeDesc, dialogTitle="Select Output Format", onChange=self.outputFormatChanged, ) self.formatButtonOutputConvert.grid( row=row, column=1, columnspan=2, sticky=tk.W, padx=0, ) ## self.writeOptionsButton = FormatOptionsButton( "Write", self.writeOptions, self.formatButtonOutputConvert, master=convertFrame, ) self.outputFormatRow = row ################### row += 1 label = ttk.Label(convertFrame, text="Output File: ") label.grid( row=row, column=0, sticky=tk.W, padx=5, ) ## entry = ttk.Entry(convertFrame) entry.grid( row=row, column=1, columnspan=2, sticky=tk.W + tk.E, padx=0, ) entry.bind_all("", self.anyEntryChanged) self.entryOutputConvert = entry ## button = newButton( convertFrame, text="Browse", command=self.browseOutputConvert, # bg="#f0f000", # activebackground="#f6f622", ) button.grid( row=row, column=3, sticky=tk.W + tk.E, padx=5, ) ################### row += 1 button = newButton( convertFrame, text="Convert", command=self.convert, # background="#00e000", # activebackground="#22f022", # borderwidth=7, # font=self.biggerFont, # padx=5, # pady=5, ) button.grid( row=row, column=2, columnspan=3, sticky=tk.W + tk.E + tk.S, padx=5, pady=5, ) # print(f"row number for Convert button: {row}") ################# row += 1 console = tk.Text( convertFrame, height=15, background="#000", foreground="#fff", ) console.bind("", self.consoleKeyPress) # self.consoleH = 15 # sbar = Tix.Scrollbar( # convertFrame, # orien=Tix.VERTICAL, # command=console.yview # ) # sbar.grid (row=row, column=1) # console["yscrollcommand"] = sbar.set console.grid( row=row, column=0, columnspan=4, sticky=tk.W + tk.E, padx=5, pady=0, ) log.addHandler( TkTextLogHandler(console), ) console.insert("end", "Console:\n") #### self.console = console ################## aboutFrame = ttk.Frame(notebook) versionFrame = ttk.Frame(aboutFrame, borderwidth=5) newLabelWithImage(versionFrame, file=logo).pack( side="left", fill="both", expand=False ) ttk.Label(versionFrame, text=f"PyGlossary\nVersion {getVersion()}").pack( side="left", fill="both", expand=False ) versionFrame.pack(side="top", fill="x") ## aboutNotebook = VerticalNotebook(aboutFrame, font=self.bigFont) aboutAboutFrame = ttk.Frame() newReadOnlyText( aboutAboutFrame, text=f"{aboutText}\nHome page: {core.homePage}", font=("DejaVu Sans", 11, ""), ).pack(fill="both", expand=True) aboutAboutFrame.pack(side="top", fill="x") aboutNotebook.add(aboutAboutFrame, "About") authorsFrame = ttk.Frame() authorsText = "\n".join(authors).replace("\t", " ") newReadOnlyText( authorsFrame, text=authorsText, font=("DejaVu Sans", 11, ""), ).pack(fill="both", expand=True) aboutNotebook.add(authorsFrame, "Authors") licenseFrame = ttk.Frame() newReadOnlyText( licenseFrame, text=licenseText, font=("DejaVu Sans", 11, ""), ).pack(fill="both", expand=True) aboutNotebook.add(licenseFrame, "License") aboutNotebook.pack(fill="both", expand=True) # aboutNotebook.show_tab_index(0) statusBarFrame = self.statusBarFrame = ttk.Frame(convertFrame) statusBarFrame.grid( row=row + 1, column=0, columnspan=4, sticky=tk.W + tk.E, padx=5, pady=0, ) clearB = newButton( statusBarFrame, text="Clear", command=self.console_clear, # how to set borderwidth using style? # bg="black", # fg="#ffff00", # activebackground="#333333", # activeforeground="#ffff00", # borderwidth=3, # height=2, ) clearB.pack(side="left") #### label = ttk.Label(statusBarFrame, text="Verbosity") label.pack(side="left") ## comboVar = tk.StringVar() combo = ttk.OptionMenu( statusBarFrame, comboVar, log.getVerbosity(), # default "0", "1", "2", "3", "4", "5", ) comboVar.trace_add("write", self.verbosityChanged) combo.pack(side="left") self.verbosityCombo = comboVar comboVar.set(log.getVerbosity()) notebook.add(convertFrame, text="Convert", underline=-1) notebook.add(aboutFrame, text="About", underline=-1) # convertFrame.pack(fill="x") # convertFrame.grid(sticky=tk.W + tk.E + tk.N + tk.S) ###################### tk.Grid.columnconfigure(convertFrame, 0, weight=1) tk.Grid.columnconfigure(convertFrame, 1, weight=30) tk.Grid.columnconfigure(convertFrame, 2, weight=20) tk.Grid.columnconfigure(convertFrame, 3, weight=1) tk.Grid.rowconfigure(convertFrame, 0, weight=50) tk.Grid.rowconfigure(convertFrame, 1, weight=50) tk.Grid.rowconfigure(convertFrame, 2, weight=1) tk.Grid.rowconfigure(convertFrame, 3, weight=50) tk.Grid.rowconfigure(convertFrame, 4, weight=50) tk.Grid.rowconfigure(convertFrame, 5, weight=1) tk.Grid.rowconfigure(convertFrame, 6, weight=50) # _________________________________________________________________ # notebook.pack(fill="both", expand=True) def textSelectAll(self, tktext) -> None: tktext.tag_add(tk.SEL, "1.0", tk.END) tktext.mark_set(tk.INSERT, "1.0") tktext.see(tk.INSERT) def consoleKeyPress(self, e) -> str | None: # print(e.state, e.keysym) if e.state > 0: if e.keysym == "c": return None if e.keysym == "a": self.textSelectAll(self.console) return "break" if e.keysym == "Escape": return None return "break" def verbosityChanged(self, _index, _value, _op) -> None: log.setVerbosity( int(self.verbosityCombo.get()), ) # def resized(self, event): # self.rootWin.winfo_height() - self.winfo_height() # log.debug(dh, self.consoleH) # if dh > 20: # self.consoleH += 1 # self.console["height"] = self.consoleH # self.console["width"] = int(self.console["width"]) + 1 # self.console.grid() # for x in dir(self): # if "info" in x: # log.debug(x) def inputFormatChanged(self, *_args) -> None: formatDesc = self.formatButtonInputConvert.get() if not formatDesc: return self.readOptions.clear() # reset the options, DO NOT re-assign if Glossary.formatsReadOptions[pluginByDesc[formatDesc].name]: self.readOptionsButton.grid( row=self.inputFormatRow, column=3, sticky=tk.W + tk.E, padx=5, pady=0, ) else: self.readOptionsButton.grid_forget() def outputFormatChanged(self, *_args) -> None: formatDesc = self.formatButtonOutputConvert.get() if not formatDesc: return formatName = pluginByDesc[formatDesc].name plugin = Glossary.plugins.get(formatName) if not plugin: log.error(f"plugin {formatName} not found") return self.writeOptions.clear() # reset the options, DO NOT re-assign if Glossary.formatsWriteOptions[formatName]: self.writeOptionsButton.grid( row=self.outputFormatRow, column=3, sticky=tk.W + tk.E, padx=5, pady=0, ) else: self.writeOptionsButton.grid_forget() pathI = self.entryInputConvert.get() if ( pathI and not self.entryOutputConvert.get() and self.formatButtonInputConvert.get() and plugin.extensionCreate ): pathNoExt, _ext = splitext(pathI) self.entryOutputConvert.insert( 0, pathNoExt + plugin.extensionCreate, ) def anyEntryChanged(self, _event=None) -> None: self.inputEntryChanged() self.outputEntryChanged() def inputEntryChanged(self, _event=None) -> None: # char = event.keysym pathI = self.entryInputConvert.get() if self.pathI == pathI: return if pathI.startswith("file://"): pathI = urlToPath(pathI) self.entryInputConvert.delete(0, "end") self.entryInputConvert.insert(0, pathI) if self.config["ui_autoSetFormat"]: formatDesc = self.formatButtonInputConvert.get() if not formatDesc: try: inputArgs = Glossary.detectInputFormat(pathI) except Error: pass else: plugin = Glossary.plugins.get(inputArgs.formatName) if plugin: self.formatButtonInputConvert.setValue(plugin.description) self.inputFormatChanged() self.pathI = pathI def outputEntryChanged(self, _event=None) -> None: pathO = self.entryOutputConvert.get() if self.pathO == pathO: return if pathO.startswith("file://"): pathO = urlToPath(pathO) self.entryOutputConvert.delete(0, "end") self.entryOutputConvert.insert(0, pathO) if self.config["ui_autoSetFormat"]: formatDesc = self.formatButtonOutputConvert.get() if not formatDesc: try: outputArgs = Glossary.detectOutputFormat( filename=pathO, inputFilename=self.entryInputConvert.get(), ) except Error: pass else: self.formatButtonOutputConvert.setValue( Glossary.plugins[outputArgs.formatName].description, ) self.outputFormatChanged() self.pathO = pathO def save_fcd_dir(self) -> None: if not self.fcd_dir: return with open(self.fcd_dir_save_path, mode="w", encoding="utf-8") as fp: fp.write(self.fcd_dir) def browseInputConvert(self) -> None: path = filedialog.askopenfilename(initialdir=self.fcd_dir) if path: self.entryInputConvert.delete(0, "end") self.entryInputConvert.insert(0, path) self.inputEntryChanged() self.fcd_dir = os.path.dirname(path) self.save_fcd_dir() def browseOutputConvert(self) -> None: path = filedialog.asksaveasfilename() if path: self.entryOutputConvert.delete(0, "end") self.entryOutputConvert.insert(0, path) self.outputEntryChanged() self.fcd_dir = os.path.dirname(path) self.save_fcd_dir() def convert(self): inPath = self.entryInputConvert.get() if not inPath: log.critical("Input file path is empty!") return None inFormatDesc = self.formatButtonInputConvert.get() # if not inFormatDesc: # log.critical("Input format is empty!");return inFormat = pluginByDesc[inFormatDesc].name if inFormatDesc else "" outPath = self.entryOutputConvert.get() if not outPath: log.critical("Output file path is empty!") return None outFormatDesc = self.formatButtonOutputConvert.get() if not outFormatDesc: log.critical("Output format is empty!") return None outFormat = pluginByDesc[outFormatDesc].name for attr, value in self._glossarySetAttrs.items(): setattr(self.glos, attr, value) try: finalOutputFile = self.glos.convert( ConvertArgs( inPath, inputFormat=inFormat, outputFilename=outPath, outputFormat=outFormat, readOptions=self.readOptions, writeOptions=self.writeOptions, **self._convertOptions, ), ) except Error as e: log.critical(str(e)) self.glos.cleanup() return False # if finalOutputFile: # self.status("Convert finished") # else: # self.status("Convert failed") return bool(finalOutputFile) def run( # noqa: PLR0913 self, inputFilename: str = "", outputFilename: str = "", inputFormat: str = "", outputFormat: str = "", reverse: bool = False, config: dict[str, Any] | None = None, readOptions: dict[str, Any] | None = None, writeOptions: dict[str, Any] | None = None, convertOptions: dict[str, Any] | None = None, glossarySetAttrs: dict[str, Any] | None = None, ) -> None: config = config or {} self.config = config if inputFilename: self.entryInputConvert.insert(0, abspath(inputFilename)) self.inputEntryChanged() if outputFilename: self.entryOutputConvert.insert(0, abspath(outputFilename)) self.outputEntryChanged() if inputFormat: self.formatButtonInputConvert.setValue( Glossary.plugins[inputFormat].description, ) self.inputFormatChanged() if outputFormat: self.formatButtonOutputConvert.setValue( Glossary.plugins[outputFormat].description, ) self.outputFormatChanged() if reverse: log.error("Tkinter interface does not support Reverse feature") pbar = ProgressBar( self.statusBarFrame, min_=0, max_=100, width=700, height=28, appearance="sunken", fillColor=config.get("tk.progressbar.color.fill", "blue"), background=config.get("tk.progressbar.color.background", "gray"), labelColor=config.get("tk.progressbar.color.text", "yellow"), labelFont=config.get("tk.progressbar.font", "Sans"), ) pbar.pack(side="left", fill="x", expand=True, padx=10) self.pbar = pbar pbar.pack(fill="x") self.progressTitle = "" # _________________________________________________________________ # centerWindow(self.rootWin) # show the window if os.sep == "\\": # Windows self.rootWin.attributes("-alpha", 1.0) else: # Linux self.rootWin.deiconify() # must be before setting self.readOptions and self.writeOptions self.anyEntryChanged() if readOptions: self.readOptionsButton.setOptionsValues(readOptions) self.readOptions = readOptions if writeOptions: self.writeOptionsButton.setOptionsValues(writeOptions) self.writeOptions = writeOptions self._convertOptions = convertOptions if convertOptions: log.info(f"Using {convertOptions=}") self._glossarySetAttrs = glossarySetAttrs or {} # inputFilename and readOptions are for DB Editor # which is not implemented self.mainloop() def progressInit(self, title) -> None: self.progressTitle = title def progress(self, ratio, text="") -> None: if not text: text = "%" + str(int(ratio * 100)) text += " - " + self.progressTitle self.pbar.updateProgress(ratio * 100, None, text) # self.pbar.value = ratio * 100 # self.pbar.update() self.rootWin.update() def console_clear(self, _event=None) -> None: self.console.delete("1.0", "end") self.console.insert("end", "Console:\n") # def reverseBrowseInput(self): # pass # def reverseBrowseOutput(self): # pass # def reverseLoad(self): # pass if __name__ == "__main__": import sys _path = sys.argv[1] if len(sys.argv) > 1 else "" _ui = UI(_path) _ui.run() pyglossary-5.0.9/pyglossary/ui/ui_web/000077500000000000000000000000001476751035500200615ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/ui/ui_web/__init__.py000066400000000000000000000001151476751035500221670ustar00rootroot00000000000000from pyglossary.ui.ui_web.ui_controller import WebUI as UI __all__ = ["UI"] pyglossary-5.0.9/pyglossary/ui/ui_web/browse.html000066400000000000000000000224711476751035500222560ustar00rootroot00000000000000 PyGlossary Browser
          pyglossary-5.0.9/pyglossary/ui/ui_web/favicon.ico000066400000000000000000000410761476751035500222120ustar00rootroot00000000000000@@ (B(@ _ !!!  !\"U***""" / N5c1\-T)P)NX+U X///&&&###>oFFFD}>r6d1\,S'J#CH@@(((555,,,""""""-+/EFFFFFFFFD}=r5c2\,R(I&LC 222:::000%%%***6)AFFFFFFFFFFFFFFC|;m3_0Y+Q&Gz#232>>>@@@444(((!!!###2229OFFFFFFFFFFFFFFFFFFFBz;m5b1\,Q'Hn&K"d KKKDDD777***$$$)))<<<:\FFFFFFFFFFFFFFFFFFFFFFFEBy9j3^.V*M"Ba#:&&&NNNBBB777+++'''000EEE>mFFFFFFFFFFFFFFFFFFFFFFFFFFFFE?v6c,R,,,LLL@@@333+++&&&555DCEEFFFFFGFFFFFFFFFFFFFFFFFFFFFFFFF=q333III<<<111+++###888?4IFFFFGO WSIGFFFFFFFFFFFFFFFFFFFFFFF(:::EEE:::...+++&&&;;;@&UFFFGR^ǣc(IFFFFFFFFFFFFFFFFFFFFFFF\???CCC777,,,"""(((===;\FFFJh/ٳԟϣXN FFFFFFFFFFFFFFFFFFFFFFEEEE@@@555*** +++@@@<hFFFM |JƩԟѢ\O FFFFFFFFFFFFFFFFFFFFFFGh###HHH>>>333(((...>>>ByFFFO bҡԟŪ{IM FFFFFFFFFFFFFFFFFFFFFFF>(((FFF;;;000---1116/%%%@@@666,,,,,,,,,'1FFFGWĠԟԟΩ] GFFFFFFFFFFFFFFFFFFFFFF@K***>>>444***+++ ///+@FFFId*ײԟԟTFFFFFFFFFFFFFFFFFFFFFFF ///;;;222(((""" 1118 ^FFFLuAԟԟmPFFGGGFFFFFFFFFFFFFFFFFF555999000%%%"""333=mFFFN YΥԟʧRRO RV\ZURM IGFFFFFFFFFFFFE999777---$$$$$$.,/D~FFFQxԟԟ佮}Lq;[}Pd)QHGFFFFFFFFFFFi ===444+++&&&&&& 'FFFGVԟԟضaa%LGFFFFFFFFFF>B 999111((())) (((+FFFHb'ӯԟԟo8N GFFFFFFFF@|$$$666...%%%%%% ***, HFFFKr=侮Ԡt?M GFFFFFFF '''222***### +++;hFFFN T˦gn7YTU] vB}h/JFFFFFFF+++///(((  (()BzFFFQpۿo8RJGFFGLW}L~WGFFFFFG...,,,%%% "FFFGUg-LGFFFFFFGQwDwCLFFFFFFi9000)))"""  FFFHb&t?M GFFFFFFFFGR^SGFFFFE?w---&&&   -FFFM zG~UGFFFFFFFFFFHb&a%HFFFFI)))### 2WFFFQaN FFFFFFFFFFFFRpuALFFFF&&&  !!!@uFFFRKFFFFFFFFFFFFN S½PM FFFE ### FFFFR{d)HFFFFFFFFFFFFM WԟỲXN FFFG. !!!!!!  FFFFRɲg-IFFFFFFFFFFFFM àԟԟԟ~N FFFFjr""" FFFFPhӠԟ㽯wCLFFFFFFFFFFFFPqҠԟѡޱLFFFE? "8FFFFM NǨԟϣjRGFFFFFFFFFFG[ԟԟհg-IFFFI=mFFFFIf,ҭԟԟʧj1JFFFFFFFFFGO O濮ԟԟUGFFF EFFFFGSyҡԟΣz] IFFFFFFFGM l4ĠԟԟīPM FFFE# FFFFFFJm6ҭԟԟƩra%M GFFFGHQn7ϣԟԟ\HFFFGm  FFFFFFGR^Ūԟԟʧ|J\RPQTc([ȤѢԟԟҬt?LFFFFFj  FFFFFFFGZqǨԟԟӠ侰ġyhk̨ȧԟԟԟܶQQGFFFFH@  *JFFFFFFFFH[j⼮ԟԟԟԟԟӠҡԟԟԟԟҡϫPSGFFFFFI  C{FFFFFFFFFHU}LŪԟԟԟԟԟԟԟӠ㽮o9QGFFFFFF FFFFFFFFFFFGN ^!OΩݷ佮⽯۵ƢsvBWKGFFFFFFEf  FFFFFFFFFFFFFHM S] h/o9n7f,ZQLGFFFFFFFFFFFFFFFFFFFFFFFFFGIJJIGFFFFFFFFFFFFjqqqiiiLLL666!!!#3KFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFH@ׇkkkMMM\BqxY`_xF\IFFFFFFFFFFFFFFFFFFFFFFFFFFFF܎~[HoV:odFnN{VVs>ZHFFFFFFFFFFFFFFFFFFFFFG{888]]]]IpR9k\?xa@lCwGn6VGFFFFFFFFFFFFFFFFF(] 333]]]^JqP5jX9t]9{ftincodvix_KsO4jT4qX4xa6h3a&O FFFFFFFEk1'EM@^gbYnf]wdYvi\|tgy`JsP3iS2pR+uY(b*XLFJRA3)GI?[e[SjbYsaUtfXzre{_JsI,aJ(iL#oI"o5 @G=YdVNd[Ql[On]QshY~qukJ6hN;v,!CH;UcPHbWMhVIjWImfV}rE6[R PyGlossary Web
          Input file
          Output file
          ⓘ How to convert a dictionary?

          1. Paste the full path to a dictionary file on your local file system in the Input file field.
          2. Select input file format if not detected automatically.
          3. Paste the full path to the converted file in the Output file field.
          4. Select a target format to convert to
          5. Click the Convert button and wait for the conversion to complete. For large files the operation can take several minutes.


          Conversion Options

          Please enter valid JSON for custom conversion options.

          Example config (See available options in plugins options reference):

          + Click for an example

          {
           "convertOptions": {
            "sortKeyName": "headword_lower:es_ES",
            "sortEncoding": "utf-8",
           },
           "readOptions": {
            "encoding": "utf-8",
            "example_color": "blue"
           },
           "writeOptions": {
            "resources": false
           }
          }

          pyglossary-5.0.9/pyglossary/ui/ui_web/minimal-theme-switcher.js000066400000000000000000000035501476751035500247760ustar00rootroot00000000000000/*! * Minimal theme switcher * * Pico.css - https://picocss.com * Copyright 2019-2024 - Licensed under MIT */ const themeSwitcher = { // Config _scheme: "auto", menuTarget: "details.dropdown", buttonsTarget: "a[data-theme-switcher]", buttonAttribute: "data-theme-switcher", rootAttribute: "data-theme", localStorageKey: "picoPreferredColorScheme", // Init init() { this.scheme = this.schemeFromLocalStorage; this.initSwitchers(); }, // Get color scheme from local storage get schemeFromLocalStorage() { return window.localStorage?.getItem(this.localStorageKey) ?? this._scheme; }, // Preferred color scheme get preferredColorScheme() { return window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light"; }, // Init switchers initSwitchers() { const buttons = document.querySelectorAll(this.buttonsTarget); buttons.forEach((button) => { button.addEventListener( "click", (event) => { event.preventDefault(); // Set scheme this.scheme = button.getAttribute(this.buttonAttribute); // Close dropdown document.querySelector(this.menuTarget)?.removeAttribute("open"); }, false ); }); }, // Set scheme set scheme(scheme) { if (scheme == "auto") { this._scheme = this.preferredColorScheme; } else if (scheme == "dark" || scheme == "light") { this._scheme = scheme; } this.applyScheme(); this.schemeToLocalStorage(); }, // Get scheme get scheme() { return this._scheme; }, // Apply scheme applyScheme() { document.querySelector("html")?.setAttribute(this.rootAttribute, this.scheme); }, // Store scheme to local storage schemeToLocalStorage() { window.localStorage?.setItem(this.localStorageKey, this.scheme); }, }; // Init themeSwitcher.init();pyglossary-5.0.9/pyglossary/ui/ui_web/pico.green.min.css000066400000000000000000002404221476751035500234120ustar00rootroot00000000000000@charset "UTF-8";/*! * Pico CSS ✨ v2.0.6 (https://picocss.com) * Copyright 2019-2024 - Licensed under MIT */:root{--pico-font-family-emoji:"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";--pico-font-family-sans-serif:system-ui,"Segoe UI",Roboto,Oxygen,Ubuntu,Cantarell,Helvetica,Arial,"Helvetica Neue",sans-serif,var(--pico-font-family-emoji);--pico-font-family-monospace:ui-monospace,SFMono-Regular,"SF Mono",Menlo,Consolas,"Liberation Mono",monospace,var(--pico-font-family-emoji);--pico-font-family:var(--pico-font-family-sans-serif);--pico-line-height:1.5;--pico-font-weight:400;--pico-font-size:100%;--pico-text-underline-offset:0.1rem;--pico-border-radius:0.25rem;--pico-border-width:0.0625rem;--pico-outline-width:0.125rem;--pico-transition:0.2s ease-in-out;--pico-spacing:1rem;--pico-typography-spacing-vertical:1rem;--pico-block-spacing-vertical:var(--pico-spacing);--pico-block-spacing-horizontal:var(--pico-spacing);--pico-grid-column-gap:var(--pico-spacing);--pico-grid-row-gap:var(--pico-spacing);--pico-form-element-spacing-vertical:0.75rem;--pico-form-element-spacing-horizontal:1rem;--pico-group-box-shadow:0 0 0 rgba(0, 0, 0, 0);--pico-group-box-shadow-focus-with-button:0 0 0 var(--pico-outline-width) var(--pico-primary-focus);--pico-group-box-shadow-focus-with-input:0 0 0 0.0625rem var(--pico-form-element-border-color);--pico-modal-overlay-backdrop-filter:blur(0.375rem);--pico-nav-element-spacing-vertical:1rem;--pico-nav-element-spacing-horizontal:0.5rem;--pico-nav-link-spacing-vertical:0.5rem;--pico-nav-link-spacing-horizontal:0.5rem;--pico-nav-breadcrumb-divider:">";--pico-icon-checkbox:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(255, 255, 255)' stroke-width='4' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='20 6 9 17 4 12'%3E%3C/polyline%3E%3C/svg%3E");--pico-icon-minus:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(255, 255, 255)' stroke-width='4' stroke-linecap='round' stroke-linejoin='round'%3E%3Cline x1='5' y1='12' x2='19' y2='12'%3E%3C/line%3E%3C/svg%3E");--pico-icon-chevron:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(136, 145, 164)' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='6 9 12 15 18 9'%3E%3C/polyline%3E%3C/svg%3E");--pico-icon-date:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(136, 145, 164)' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Crect x='3' y='4' width='18' height='18' rx='2' ry='2'%3E%3C/rect%3E%3Cline x1='16' y1='2' x2='16' y2='6'%3E%3C/line%3E%3Cline x1='8' y1='2' x2='8' y2='6'%3E%3C/line%3E%3Cline x1='3' y1='10' x2='21' y2='10'%3E%3C/line%3E%3C/svg%3E");--pico-icon-time:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(136, 145, 164)' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Ccircle cx='12' cy='12' r='10'%3E%3C/circle%3E%3Cpolyline points='12 6 12 12 16 14'%3E%3C/polyline%3E%3C/svg%3E");--pico-icon-search:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(136, 145, 164)' stroke-width='1.5' stroke-linecap='round' stroke-linejoin='round'%3E%3Ccircle cx='11' cy='11' r='8'%3E%3C/circle%3E%3Cline x1='21' y1='21' x2='16.65' y2='16.65'%3E%3C/line%3E%3C/svg%3E");--pico-icon-close:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(136, 145, 164)' stroke-width='3' stroke-linecap='round' stroke-linejoin='round'%3E%3Cline x1='18' y1='6' x2='6' y2='18'%3E%3C/line%3E%3Cline x1='6' y1='6' x2='18' y2='18'%3E%3C/line%3E%3C/svg%3E");--pico-icon-loading:url("data:image/svg+xml,%3Csvg fill='none' height='24' width='24' viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg' %3E%3Cstyle%3E g %7B animation: rotate 2s linear infinite; transform-origin: center center; %7D circle %7B stroke-dasharray: 75,100; stroke-dashoffset: -5; animation: dash 1.5s ease-in-out infinite; stroke-linecap: round; %7D @keyframes rotate %7B 0%25 %7B transform: rotate(0deg); %7D 100%25 %7B transform: rotate(360deg); %7D %7D @keyframes dash %7B 0%25 %7B stroke-dasharray: 1,100; stroke-dashoffset: 0; %7D 50%25 %7B stroke-dasharray: 44.5,100; stroke-dashoffset: -17.5; %7D 100%25 %7B stroke-dasharray: 44.5,100; stroke-dashoffset: -62; %7D %7D %3C/style%3E%3Cg%3E%3Ccircle cx='12' cy='12' r='10' fill='none' stroke='rgb(136, 145, 164)' stroke-width='4' /%3E%3C/g%3E%3C/svg%3E")}@media (min-width:576px){:root{--pico-font-size:106.25%}}@media (min-width:768px){:root{--pico-font-size:112.5%}}@media (min-width:1024px){:root{--pico-font-size:118.75%}}@media (min-width:1280px){:root{--pico-font-size:125%}}@media (min-width:1536px){:root{--pico-font-size:131.25%}}a{--pico-text-decoration:underline}a.contrast,a.secondary{--pico-text-decoration:underline}small{--pico-font-size:0.875em}h1,h2,h3,h4,h5,h6{--pico-font-weight:700}h1{--pico-font-size:2rem;--pico-line-height:1.125;--pico-typography-spacing-top:3rem}h2{--pico-font-size:1.75rem;--pico-line-height:1.15;--pico-typography-spacing-top:2.625rem}h3{--pico-font-size:1.5rem;--pico-line-height:1.175;--pico-typography-spacing-top:2.25rem}h4{--pico-font-size:1.25rem;--pico-line-height:1.2;--pico-typography-spacing-top:1.874rem}h5{--pico-font-size:1.125rem;--pico-line-height:1.225;--pico-typography-spacing-top:1.6875rem}h6{--pico-font-size:1rem;--pico-line-height:1.25;--pico-typography-spacing-top:1.5rem}tfoot td,tfoot th,thead td,thead th{--pico-font-weight:600;--pico-border-width:0.1875rem}code,kbd,pre,samp{--pico-font-family:var(--pico-font-family-monospace)}kbd{--pico-font-weight:bolder}:where(select,textarea),input:not([type=submit],[type=button],[type=reset],[type=checkbox],[type=radio],[type=file]){--pico-outline-width:0.0625rem}[type=search]{--pico-border-radius:5rem}[type=checkbox],[type=radio]{--pico-border-width:0.125rem}[type=checkbox][role=switch]{--pico-border-width:0.1875rem}details.dropdown summary:not([role=button]){--pico-outline-width:0.0625rem}nav details.dropdown summary:focus-visible{--pico-outline-width:0.125rem}[role=search]{--pico-border-radius:5rem}[role=group]:has(button.secondary:focus,[type=submit].secondary:focus,[type=button].secondary:focus,[role=button].secondary:focus),[role=search]:has(button.secondary:focus,[type=submit].secondary:focus,[type=button].secondary:focus,[role=button].secondary:focus){--pico-group-box-shadow-focus-with-button:0 0 0 var(--pico-outline-width) var(--pico-secondary-focus)}[role=group]:has(button.contrast:focus,[type=submit].contrast:focus,[type=button].contrast:focus,[role=button].contrast:focus),[role=search]:has(button.contrast:focus,[type=submit].contrast:focus,[type=button].contrast:focus,[role=button].contrast:focus){--pico-group-box-shadow-focus-with-button:0 0 0 var(--pico-outline-width) var(--pico-contrast-focus)}[role=group] [role=button],[role=group] [type=button],[role=group] [type=submit],[role=group] button,[role=search] [role=button],[role=search] [type=button],[role=search] [type=submit],[role=search] button{--pico-form-element-spacing-horizontal:2rem}details summary[role=button]:not(.outline)::after{filter:brightness(0) invert(1)}[aria-busy=true]:not(input,select,textarea):is(button,[type=submit],[type=button],[type=reset],[role=button]):not(.outline)::before{filter:brightness(0) invert(1)}:root:not([data-theme=dark]),[data-theme=light]{--pico-background-color:#fff;--pico-color:#373c44;--pico-text-selection-color:rgba(71, 164, 23, 0.25);--pico-muted-color:#646b79;--pico-muted-border-color:#e7eaf0;--pico-primary:#33790f;--pico-primary-background:#398712;--pico-primary-border:var(--pico-primary-background);--pico-primary-underline:rgba(51, 121, 15, 0.5);--pico-primary-hover:#265e09;--pico-primary-hover-background:#33790f;--pico-primary-hover-border:var(--pico-primary-hover-background);--pico-primary-hover-underline:var(--pico-primary-hover);--pico-primary-focus:rgba(71, 164, 23, 0.5);--pico-primary-inverse:#fff;--pico-secondary:#5d6b89;--pico-secondary-background:#525f7a;--pico-secondary-border:var(--pico-secondary-background);--pico-secondary-underline:rgba(93, 107, 137, 0.5);--pico-secondary-hover:#48536b;--pico-secondary-hover-background:#48536b;--pico-secondary-hover-border:var(--pico-secondary-hover-background);--pico-secondary-hover-underline:var(--pico-secondary-hover);--pico-secondary-focus:rgba(93, 107, 137, 0.25);--pico-secondary-inverse:#fff;--pico-contrast:#181c25;--pico-contrast-background:#181c25;--pico-contrast-border:var(--pico-contrast-background);--pico-contrast-underline:rgba(24, 28, 37, 0.5);--pico-contrast-hover:#000;--pico-contrast-hover-background:#000;--pico-contrast-hover-border:var(--pico-contrast-hover-background);--pico-contrast-hover-underline:var(--pico-secondary-hover);--pico-contrast-focus:rgba(93, 107, 137, 0.25);--pico-contrast-inverse:#fff;--pico-box-shadow:0.0145rem 0.029rem 0.174rem rgba(129, 145, 181, 0.01698),0.0335rem 0.067rem 0.402rem rgba(129, 145, 181, 0.024),0.0625rem 0.125rem 0.75rem rgba(129, 145, 181, 0.03),0.1125rem 0.225rem 1.35rem rgba(129, 145, 181, 0.036),0.2085rem 0.417rem 2.502rem rgba(129, 145, 181, 0.04302),0.5rem 1rem 6rem rgba(129, 145, 181, 0.06),0 0 0 0.0625rem rgba(129, 145, 181, 0.015);--pico-h1-color:#2d3138;--pico-h2-color:#373c44;--pico-h3-color:#424751;--pico-h4-color:#4d535e;--pico-h5-color:#5c6370;--pico-h6-color:#646b79;--pico-mark-background-color:#fde7c0;--pico-mark-color:#0f1114;--pico-ins-color:#1d6a54;--pico-del-color:#883935;--pico-blockquote-border-color:var(--pico-muted-border-color);--pico-blockquote-footer-color:var(--pico-muted-color);--pico-button-box-shadow:0 0 0 rgba(0, 0, 0, 0);--pico-button-hover-box-shadow:0 0 0 rgba(0, 0, 0, 0);--pico-table-border-color:var(--pico-muted-border-color);--pico-table-row-stripped-background-color:rgba(111, 120, 135, 0.0375);--pico-code-background-color:#f3f5f7;--pico-code-color:#646b79;--pico-code-kbd-background-color:var(--pico-color);--pico-code-kbd-color:var(--pico-background-color);--pico-form-element-background-color:#fbfcfc;--pico-form-element-selected-background-color:#dfe3eb;--pico-form-element-border-color:#cfd5e2;--pico-form-element-color:#23262c;--pico-form-element-placeholder-color:var(--pico-muted-color);--pico-form-element-active-background-color:#fff;--pico-form-element-active-border-color:var(--pico-primary-border);--pico-form-element-focus-color:var(--pico-primary-border);--pico-form-element-disabled-opacity:0.5;--pico-form-element-invalid-border-color:#b86a6b;--pico-form-element-invalid-active-border-color:#c84f48;--pico-form-element-invalid-focus-color:var(--pico-form-element-invalid-active-border-color);--pico-form-element-valid-border-color:#4c9b8a;--pico-form-element-valid-active-border-color:#279977;--pico-form-element-valid-focus-color:var(--pico-form-element-valid-active-border-color);--pico-switch-background-color:#bfc7d9;--pico-switch-checked-background-color:var(--pico-primary-background);--pico-switch-color:#fff;--pico-switch-thumb-box-shadow:0 0 0 rgba(0, 0, 0, 0);--pico-range-border-color:#dfe3eb;--pico-range-active-border-color:#bfc7d9;--pico-range-thumb-border-color:var(--pico-background-color);--pico-range-thumb-color:var(--pico-secondary-background);--pico-range-thumb-active-color:var(--pico-primary-background);--pico-accordion-border-color:var(--pico-muted-border-color);--pico-accordion-active-summary-color:var(--pico-primary-hover);--pico-accordion-close-summary-color:var(--pico-color);--pico-accordion-open-summary-color:var(--pico-muted-color);--pico-card-background-color:var(--pico-background-color);--pico-card-border-color:var(--pico-muted-border-color);--pico-card-box-shadow:var(--pico-box-shadow);--pico-card-sectioning-background-color:#fbfcfc;--pico-dropdown-background-color:#fff;--pico-dropdown-border-color:#eff1f4;--pico-dropdown-box-shadow:var(--pico-box-shadow);--pico-dropdown-color:var(--pico-color);--pico-dropdown-hover-background-color:#eff1f4;--pico-loading-spinner-opacity:0.5;--pico-modal-overlay-background-color:rgba(232, 234, 237, 0.75);--pico-progress-background-color:#dfe3eb;--pico-progress-color:var(--pico-primary-background);--pico-tooltip-background-color:var(--pico-contrast-background);--pico-tooltip-color:var(--pico-contrast-inverse);--pico-icon-valid:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(76, 155, 138)' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='20 6 9 17 4 12'%3E%3C/polyline%3E%3C/svg%3E");--pico-icon-invalid:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(200, 79, 72)' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Ccircle cx='12' cy='12' r='10'%3E%3C/circle%3E%3Cline x1='12' y1='8' x2='12' y2='12'%3E%3C/line%3E%3Cline x1='12' y1='16' x2='12.01' y2='16'%3E%3C/line%3E%3C/svg%3E");color-scheme:light}:root:not([data-theme=dark]) input:is([type=submit],[type=button],[type=reset],[type=checkbox],[type=radio],[type=file]),[data-theme=light] input:is([type=submit],[type=button],[type=reset],[type=checkbox],[type=radio],[type=file]){--pico-form-element-focus-color:var(--pico-primary-focus)}@media only screen and (prefers-color-scheme:dark){:root:not([data-theme]){--pico-background-color:#13171f;--pico-color:#c2c7d0;--pico-text-selection-color:rgba(78, 179, 27, 0.1875);--pico-muted-color:#7b8495;--pico-muted-border-color:#202632;--pico-primary:#4eb31b;--pico-primary-background:#398712;--pico-primary-border:var(--pico-primary-background);--pico-primary-underline:rgba(78, 179, 27, 0.5);--pico-primary-hover:#5dd121;--pico-primary-hover-background:#409614;--pico-primary-hover-border:var(--pico-primary-hover-background);--pico-primary-hover-underline:var(--pico-primary-hover);--pico-primary-focus:rgba(78, 179, 27, 0.375);--pico-primary-inverse:#fff;--pico-secondary:#969eaf;--pico-secondary-background:#525f7a;--pico-secondary-border:var(--pico-secondary-background);--pico-secondary-underline:rgba(150, 158, 175, 0.5);--pico-secondary-hover:#b3b9c5;--pico-secondary-hover-background:#5d6b89;--pico-secondary-hover-border:var(--pico-secondary-hover-background);--pico-secondary-hover-underline:var(--pico-secondary-hover);--pico-secondary-focus:rgba(144, 158, 190, 0.25);--pico-secondary-inverse:#fff;--pico-contrast:#dfe3eb;--pico-contrast-background:#eff1f4;--pico-contrast-border:var(--pico-contrast-background);--pico-contrast-underline:rgba(223, 227, 235, 0.5);--pico-contrast-hover:#fff;--pico-contrast-hover-background:#fff;--pico-contrast-hover-border:var(--pico-contrast-hover-background);--pico-contrast-hover-underline:var(--pico-contrast-hover);--pico-contrast-focus:rgba(207, 213, 226, 0.25);--pico-contrast-inverse:#000;--pico-box-shadow:0.0145rem 0.029rem 0.174rem rgba(7, 9, 12, 0.01698),0.0335rem 0.067rem 0.402rem rgba(7, 9, 12, 0.024),0.0625rem 0.125rem 0.75rem rgba(7, 9, 12, 0.03),0.1125rem 0.225rem 1.35rem rgba(7, 9, 12, 0.036),0.2085rem 0.417rem 2.502rem rgba(7, 9, 12, 0.04302),0.5rem 1rem 6rem rgba(7, 9, 12, 0.06),0 0 0 0.0625rem rgba(7, 9, 12, 0.015);--pico-h1-color:#f0f1f3;--pico-h2-color:#e0e3e7;--pico-h3-color:#c2c7d0;--pico-h4-color:#b3b9c5;--pico-h5-color:#a4acba;--pico-h6-color:#8891a4;--pico-mark-background-color:#014063;--pico-mark-color:#fff;--pico-ins-color:#62af9a;--pico-del-color:#ce7e7b;--pico-blockquote-border-color:var(--pico-muted-border-color);--pico-blockquote-footer-color:var(--pico-muted-color);--pico-button-box-shadow:0 0 0 rgba(0, 0, 0, 0);--pico-button-hover-box-shadow:0 0 0 rgba(0, 0, 0, 0);--pico-table-border-color:var(--pico-muted-border-color);--pico-table-row-stripped-background-color:rgba(111, 120, 135, 0.0375);--pico-code-background-color:#1a1f28;--pico-code-color:#8891a4;--pico-code-kbd-background-color:var(--pico-color);--pico-code-kbd-color:var(--pico-background-color);--pico-form-element-background-color:#1c212c;--pico-form-element-selected-background-color:#2a3140;--pico-form-element-border-color:#2a3140;--pico-form-element-color:#e0e3e7;--pico-form-element-placeholder-color:#8891a4;--pico-form-element-active-background-color:#1a1f28;--pico-form-element-active-border-color:var(--pico-primary-border);--pico-form-element-focus-color:var(--pico-primary-border);--pico-form-element-disabled-opacity:0.5;--pico-form-element-invalid-border-color:#964a50;--pico-form-element-invalid-active-border-color:#b7403b;--pico-form-element-invalid-focus-color:var(--pico-form-element-invalid-active-border-color);--pico-form-element-valid-border-color:#2a7b6f;--pico-form-element-valid-active-border-color:#16896a;--pico-form-element-valid-focus-color:var(--pico-form-element-valid-active-border-color);--pico-switch-background-color:#333c4e;--pico-switch-checked-background-color:var(--pico-primary-background);--pico-switch-color:#fff;--pico-switch-thumb-box-shadow:0 0 0 rgba(0, 0, 0, 0);--pico-range-border-color:#202632;--pico-range-active-border-color:#2a3140;--pico-range-thumb-border-color:var(--pico-background-color);--pico-range-thumb-color:var(--pico-secondary-background);--pico-range-thumb-active-color:var(--pico-primary-background);--pico-accordion-border-color:var(--pico-muted-border-color);--pico-accordion-active-summary-color:var(--pico-primary-hover);--pico-accordion-close-summary-color:var(--pico-color);--pico-accordion-open-summary-color:var(--pico-muted-color);--pico-card-background-color:#181c25;--pico-card-border-color:var(--pico-card-background-color);--pico-card-box-shadow:var(--pico-box-shadow);--pico-card-sectioning-background-color:#1a1f28;--pico-dropdown-background-color:#181c25;--pico-dropdown-border-color:#202632;--pico-dropdown-box-shadow:var(--pico-box-shadow);--pico-dropdown-color:var(--pico-color);--pico-dropdown-hover-background-color:#202632;--pico-loading-spinner-opacity:0.5;--pico-modal-overlay-background-color:rgba(8, 9, 10, 0.75);--pico-progress-background-color:#202632;--pico-progress-color:var(--pico-primary-background);--pico-tooltip-background-color:var(--pico-contrast-background);--pico-tooltip-color:var(--pico-contrast-inverse);--pico-icon-valid:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(42, 123, 111)' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='20 6 9 17 4 12'%3E%3C/polyline%3E%3C/svg%3E");--pico-icon-invalid:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(150, 74, 80)' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Ccircle cx='12' cy='12' r='10'%3E%3C/circle%3E%3Cline x1='12' y1='8' x2='12' y2='12'%3E%3C/line%3E%3Cline x1='12' y1='16' x2='12.01' y2='16'%3E%3C/line%3E%3C/svg%3E");color-scheme:dark}:root:not([data-theme]) input:is([type=submit],[type=button],[type=reset],[type=checkbox],[type=radio],[type=file]){--pico-form-element-focus-color:var(--pico-primary-focus)}:root:not([data-theme]) details summary[role=button].contrast:not(.outline)::after{filter:brightness(0)}:root:not([data-theme]) [aria-busy=true]:not(input,select,textarea).contrast:is(button,[type=submit],[type=button],[type=reset],[role=button]):not(.outline)::before{filter:brightness(0)}}[data-theme=dark]{--pico-background-color:#13171f;--pico-color:#c2c7d0;--pico-text-selection-color:rgba(78, 179, 27, 0.1875);--pico-muted-color:#7b8495;--pico-muted-border-color:#202632;--pico-primary:#4eb31b;--pico-primary-background:#398712;--pico-primary-border:var(--pico-primary-background);--pico-primary-underline:rgba(78, 179, 27, 0.5);--pico-primary-hover:#5dd121;--pico-primary-hover-background:#409614;--pico-primary-hover-border:var(--pico-primary-hover-background);--pico-primary-hover-underline:var(--pico-primary-hover);--pico-primary-focus:rgba(78, 179, 27, 0.375);--pico-primary-inverse:#fff;--pico-secondary:#969eaf;--pico-secondary-background:#525f7a;--pico-secondary-border:var(--pico-secondary-background);--pico-secondary-underline:rgba(150, 158, 175, 0.5);--pico-secondary-hover:#b3b9c5;--pico-secondary-hover-background:#5d6b89;--pico-secondary-hover-border:var(--pico-secondary-hover-background);--pico-secondary-hover-underline:var(--pico-secondary-hover);--pico-secondary-focus:rgba(144, 158, 190, 0.25);--pico-secondary-inverse:#fff;--pico-contrast:#dfe3eb;--pico-contrast-background:#eff1f4;--pico-contrast-border:var(--pico-contrast-background);--pico-contrast-underline:rgba(223, 227, 235, 0.5);--pico-contrast-hover:#fff;--pico-contrast-hover-background:#fff;--pico-contrast-hover-border:var(--pico-contrast-hover-background);--pico-contrast-hover-underline:var(--pico-contrast-hover);--pico-contrast-focus:rgba(207, 213, 226, 0.25);--pico-contrast-inverse:#000;--pico-box-shadow:0.0145rem 0.029rem 0.174rem rgba(7, 9, 12, 0.01698),0.0335rem 0.067rem 0.402rem rgba(7, 9, 12, 0.024),0.0625rem 0.125rem 0.75rem rgba(7, 9, 12, 0.03),0.1125rem 0.225rem 1.35rem rgba(7, 9, 12, 0.036),0.2085rem 0.417rem 2.502rem rgba(7, 9, 12, 0.04302),0.5rem 1rem 6rem rgba(7, 9, 12, 0.06),0 0 0 0.0625rem rgba(7, 9, 12, 0.015);--pico-h1-color:#f0f1f3;--pico-h2-color:#e0e3e7;--pico-h3-color:#c2c7d0;--pico-h4-color:#b3b9c5;--pico-h5-color:#a4acba;--pico-h6-color:#8891a4;--pico-mark-background-color:#014063;--pico-mark-color:#fff;--pico-ins-color:#62af9a;--pico-del-color:#ce7e7b;--pico-blockquote-border-color:var(--pico-muted-border-color);--pico-blockquote-footer-color:var(--pico-muted-color);--pico-button-box-shadow:0 0 0 rgba(0, 0, 0, 0);--pico-button-hover-box-shadow:0 0 0 rgba(0, 0, 0, 0);--pico-table-border-color:var(--pico-muted-border-color);--pico-table-row-stripped-background-color:rgba(111, 120, 135, 0.0375);--pico-code-background-color:#1a1f28;--pico-code-color:#8891a4;--pico-code-kbd-background-color:var(--pico-color);--pico-code-kbd-color:var(--pico-background-color);--pico-form-element-background-color:#1c212c;--pico-form-element-selected-background-color:#2a3140;--pico-form-element-border-color:#2a3140;--pico-form-element-color:#e0e3e7;--pico-form-element-placeholder-color:#8891a4;--pico-form-element-active-background-color:#1a1f28;--pico-form-element-active-border-color:var(--pico-primary-border);--pico-form-element-focus-color:var(--pico-primary-border);--pico-form-element-disabled-opacity:0.5;--pico-form-element-invalid-border-color:#964a50;--pico-form-element-invalid-active-border-color:#b7403b;--pico-form-element-invalid-focus-color:var(--pico-form-element-invalid-active-border-color);--pico-form-element-valid-border-color:#2a7b6f;--pico-form-element-valid-active-border-color:#16896a;--pico-form-element-valid-focus-color:var(--pico-form-element-valid-active-border-color);--pico-switch-background-color:#333c4e;--pico-switch-checked-background-color:var(--pico-primary-background);--pico-switch-color:#fff;--pico-switch-thumb-box-shadow:0 0 0 rgba(0, 0, 0, 0);--pico-range-border-color:#202632;--pico-range-active-border-color:#2a3140;--pico-range-thumb-border-color:var(--pico-background-color);--pico-range-thumb-color:var(--pico-secondary-background);--pico-range-thumb-active-color:var(--pico-primary-background);--pico-accordion-border-color:var(--pico-muted-border-color);--pico-accordion-active-summary-color:var(--pico-primary-hover);--pico-accordion-close-summary-color:var(--pico-color);--pico-accordion-open-summary-color:var(--pico-muted-color);--pico-card-background-color:#181c25;--pico-card-border-color:var(--pico-card-background-color);--pico-card-box-shadow:var(--pico-box-shadow);--pico-card-sectioning-background-color:#1a1f28;--pico-dropdown-background-color:#181c25;--pico-dropdown-border-color:#202632;--pico-dropdown-box-shadow:var(--pico-box-shadow);--pico-dropdown-color:var(--pico-color);--pico-dropdown-hover-background-color:#202632;--pico-loading-spinner-opacity:0.5;--pico-modal-overlay-background-color:rgba(8, 9, 10, 0.75);--pico-progress-background-color:#202632;--pico-progress-color:var(--pico-primary-background);--pico-tooltip-background-color:var(--pico-contrast-background);--pico-tooltip-color:var(--pico-contrast-inverse);--pico-icon-valid:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(42, 123, 111)' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='20 6 9 17 4 12'%3E%3C/polyline%3E%3C/svg%3E");--pico-icon-invalid:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='rgb(150, 74, 80)' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Ccircle cx='12' cy='12' r='10'%3E%3C/circle%3E%3Cline x1='12' y1='8' x2='12' y2='12'%3E%3C/line%3E%3Cline x1='12' y1='16' x2='12.01' y2='16'%3E%3C/line%3E%3C/svg%3E");color-scheme:dark}[data-theme=dark] input:is([type=submit],[type=button],[type=reset],[type=checkbox],[type=radio],[type=file]){--pico-form-element-focus-color:var(--pico-primary-focus)}[data-theme=dark] details summary[role=button].contrast:not(.outline)::after{filter:brightness(0)}[data-theme=dark] [aria-busy=true]:not(input,select,textarea).contrast:is(button,[type=submit],[type=button],[type=reset],[role=button]):not(.outline)::before{filter:brightness(0)}[type=checkbox],[type=radio],[type=range],progress{accent-color:var(--pico-primary)}*,::after,::before{box-sizing:border-box;background-repeat:no-repeat}::after,::before{text-decoration:inherit;vertical-align:inherit}:where(:root){-webkit-tap-highlight-color:transparent;-webkit-text-size-adjust:100%;-moz-text-size-adjust:100%;text-size-adjust:100%;background-color:var(--pico-background-color);color:var(--pico-color);font-weight:var(--pico-font-weight);font-size:var(--pico-font-size);line-height:var(--pico-line-height);font-family:var(--pico-font-family);text-underline-offset:var(--pico-text-underline-offset);text-rendering:optimizeLegibility;overflow-wrap:break-word;-moz-tab-size:4;-o-tab-size:4;tab-size:4}body{width:100%;margin:0}main{display:block}body>footer,body>header,body>main{padding-block:var(--pico-block-spacing-vertical)}section{margin-bottom:var(--pico-block-spacing-vertical)}.container,.container-fluid{width:100%;margin-right:auto;margin-left:auto;padding-right:var(--pico-spacing);padding-left:var(--pico-spacing)}@media (min-width:576px){.container{max-width:510px;padding-right:0;padding-left:0}}@media (min-width:768px){.container{max-width:700px}}@media (min-width:1024px){.container{max-width:950px}}@media (min-width:1280px){.container{max-width:1200px}}@media (min-width:1536px){.container{max-width:1450px}}.grid{grid-column-gap:var(--pico-grid-column-gap);grid-row-gap:var(--pico-grid-row-gap);display:grid;grid-template-columns:1fr}@media (min-width:768px){.grid{grid-template-columns:repeat(auto-fit,minmax(0%,1fr))}}.grid>*{min-width:0}.overflow-auto{overflow:auto}b,strong{font-weight:bolder}sub,sup{position:relative;font-size:.75em;line-height:0;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}address,blockquote,dl,ol,p,pre,table,ul{margin-top:0;margin-bottom:var(--pico-typography-spacing-vertical);color:var(--pico-color);font-style:normal;font-weight:var(--pico-font-weight)}h1,h2,h3,h4,h5,h6{margin-top:0;margin-bottom:var(--pico-typography-spacing-vertical);color:var(--pico-color);font-weight:var(--pico-font-weight);font-size:var(--pico-font-size);line-height:var(--pico-line-height);font-family:var(--pico-font-family)}h1{--pico-color:var(--pico-h1-color)}h2{--pico-color:var(--pico-h2-color)}h3{--pico-color:var(--pico-h3-color)}h4{--pico-color:var(--pico-h4-color)}h5{--pico-color:var(--pico-h5-color)}h6{--pico-color:var(--pico-h6-color)}:where(article,address,blockquote,dl,figure,form,ol,p,pre,table,ul)~:is(h1,h2,h3,h4,h5,h6){margin-top:var(--pico-typography-spacing-top)}p{margin-bottom:var(--pico-typography-spacing-vertical)}hgroup{margin-bottom:var(--pico-typography-spacing-vertical)}hgroup>*{margin-top:0;margin-bottom:0}hgroup>:not(:first-child):last-child{--pico-color:var(--pico-muted-color);--pico-font-weight:unset;font-size:1rem}:where(ol,ul) li{margin-bottom:calc(var(--pico-typography-spacing-vertical) * .25)}:where(dl,ol,ul) :where(dl,ol,ul){margin:0;margin-top:calc(var(--pico-typography-spacing-vertical) * .25)}ul li{list-style:square}mark{padding:.125rem .25rem;background-color:var(--pico-mark-background-color);color:var(--pico-mark-color);vertical-align:baseline}blockquote{display:block;margin:var(--pico-typography-spacing-vertical) 0;padding:var(--pico-spacing);border-right:none;border-left:.25rem solid var(--pico-blockquote-border-color);border-inline-start:0.25rem solid var(--pico-blockquote-border-color);border-inline-end:none}blockquote footer{margin-top:calc(var(--pico-typography-spacing-vertical) * .5);color:var(--pico-blockquote-footer-color)}abbr[title]{border-bottom:1px dotted;text-decoration:none;cursor:help}ins{color:var(--pico-ins-color);text-decoration:none}del{color:var(--pico-del-color)}::-moz-selection{background-color:var(--pico-text-selection-color)}::selection{background-color:var(--pico-text-selection-color)}:where(a:not([role=button])),[role=link]{--pico-color:var(--pico-primary);--pico-background-color:transparent;--pico-underline:var(--pico-primary-underline);outline:0;background-color:var(--pico-background-color);color:var(--pico-color);-webkit-text-decoration:var(--pico-text-decoration);text-decoration:var(--pico-text-decoration);text-decoration-color:var(--pico-underline);text-underline-offset:0.125em;transition:background-color var(--pico-transition),color var(--pico-transition),box-shadow var(--pico-transition),-webkit-text-decoration var(--pico-transition);transition:background-color var(--pico-transition),color var(--pico-transition),text-decoration var(--pico-transition),box-shadow var(--pico-transition);transition:background-color var(--pico-transition),color var(--pico-transition),text-decoration var(--pico-transition),box-shadow var(--pico-transition),-webkit-text-decoration var(--pico-transition)}:where(a:not([role=button])):is([aria-current]:not([aria-current=false]),:hover,:active,:focus),[role=link]:is([aria-current]:not([aria-current=false]),:hover,:active,:focus){--pico-color:var(--pico-primary-hover);--pico-underline:var(--pico-primary-hover-underline);--pico-text-decoration:underline}:where(a:not([role=button])):focus-visible,[role=link]:focus-visible{box-shadow:0 0 0 var(--pico-outline-width) var(--pico-primary-focus)}:where(a:not([role=button])).secondary,[role=link].secondary{--pico-color:var(--pico-secondary);--pico-underline:var(--pico-secondary-underline)}:where(a:not([role=button])).secondary:is([aria-current]:not([aria-current=false]),:hover,:active,:focus),[role=link].secondary:is([aria-current]:not([aria-current=false]),:hover,:active,:focus){--pico-color:var(--pico-secondary-hover);--pico-underline:var(--pico-secondary-hover-underline)}:where(a:not([role=button])).contrast,[role=link].contrast{--pico-color:var(--pico-contrast);--pico-underline:var(--pico-contrast-underline)}:where(a:not([role=button])).contrast:is([aria-current]:not([aria-current=false]),:hover,:active,:focus),[role=link].contrast:is([aria-current]:not([aria-current=false]),:hover,:active,:focus){--pico-color:var(--pico-contrast-hover);--pico-underline:var(--pico-contrast-hover-underline)}a[role=button]{display:inline-block}button{margin:0;overflow:visible;font-family:inherit;text-transform:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[role=button],[type=button],[type=file]::file-selector-button,[type=reset],[type=submit],button{--pico-background-color:var(--pico-primary-background);--pico-border-color:var(--pico-primary-border);--pico-color:var(--pico-primary-inverse);--pico-box-shadow:var(--pico-button-box-shadow, 0 0 0 rgba(0, 0, 0, 0));padding:var(--pico-form-element-spacing-vertical) var(--pico-form-element-spacing-horizontal);border:var(--pico-border-width) solid var(--pico-border-color);border-radius:var(--pico-border-radius);outline:0;background-color:var(--pico-background-color);box-shadow:var(--pico-box-shadow);color:var(--pico-color);font-weight:var(--pico-font-weight);font-size:1rem;line-height:var(--pico-line-height);text-align:center;text-decoration:none;cursor:pointer;-webkit-user-select:none;-moz-user-select:none;user-select:none;transition:background-color var(--pico-transition),border-color var(--pico-transition),color var(--pico-transition),box-shadow var(--pico-transition)}[role=button]:is(:hover,:active,:focus),[role=button]:is([aria-current]:not([aria-current=false])),[type=button]:is(:hover,:active,:focus),[type=button]:is([aria-current]:not([aria-current=false])),[type=file]::file-selector-button:is(:hover,:active,:focus),[type=file]::file-selector-button:is([aria-current]:not([aria-current=false])),[type=reset]:is(:hover,:active,:focus),[type=reset]:is([aria-current]:not([aria-current=false])),[type=submit]:is(:hover,:active,:focus),[type=submit]:is([aria-current]:not([aria-current=false])),button:is(:hover,:active,:focus),button:is([aria-current]:not([aria-current=false])){--pico-background-color:var(--pico-primary-hover-background);--pico-border-color:var(--pico-primary-hover-border);--pico-box-shadow:var(--pico-button-hover-box-shadow, 0 0 0 rgba(0, 0, 0, 0));--pico-color:var(--pico-primary-inverse)}[role=button]:focus,[role=button]:is([aria-current]:not([aria-current=false])):focus,[type=button]:focus,[type=button]:is([aria-current]:not([aria-current=false])):focus,[type=file]::file-selector-button:focus,[type=file]::file-selector-button:is([aria-current]:not([aria-current=false])):focus,[type=reset]:focus,[type=reset]:is([aria-current]:not([aria-current=false])):focus,[type=submit]:focus,[type=submit]:is([aria-current]:not([aria-current=false])):focus,button:focus,button:is([aria-current]:not([aria-current=false])):focus{--pico-box-shadow:var(--pico-button-hover-box-shadow, 0 0 0 rgba(0, 0, 0, 0)),0 0 0 var(--pico-outline-width) var(--pico-primary-focus)}[type=button],[type=reset],[type=submit]{margin-bottom:var(--pico-spacing)}:is(button,[type=submit],[type=button],[role=button]).secondary,[type=file]::file-selector-button,[type=reset]{--pico-background-color:var(--pico-secondary-background);--pico-border-color:var(--pico-secondary-border);--pico-color:var(--pico-secondary-inverse);cursor:pointer}:is(button,[type=submit],[type=button],[role=button]).secondary:is([aria-current]:not([aria-current=false]),:hover,:active,:focus),[type=file]::file-selector-button:is([aria-current]:not([aria-current=false]),:hover,:active,:focus),[type=reset]:is([aria-current]:not([aria-current=false]),:hover,:active,:focus){--pico-background-color:var(--pico-secondary-hover-background);--pico-border-color:var(--pico-secondary-hover-border);--pico-color:var(--pico-secondary-inverse)}:is(button,[type=submit],[type=button],[role=button]).secondary:focus,:is(button,[type=submit],[type=button],[role=button]).secondary:is([aria-current]:not([aria-current=false])):focus,[type=file]::file-selector-button:focus,[type=file]::file-selector-button:is([aria-current]:not([aria-current=false])):focus,[type=reset]:focus,[type=reset]:is([aria-current]:not([aria-current=false])):focus{--pico-box-shadow:var(--pico-button-hover-box-shadow, 0 0 0 rgba(0, 0, 0, 0)),0 0 0 var(--pico-outline-width) var(--pico-secondary-focus)}:is(button,[type=submit],[type=button],[role=button]).contrast{--pico-background-color:var(--pico-contrast-background);--pico-border-color:var(--pico-contrast-border);--pico-color:var(--pico-contrast-inverse)}:is(button,[type=submit],[type=button],[role=button]).contrast:is([aria-current]:not([aria-current=false]),:hover,:active,:focus){--pico-background-color:var(--pico-contrast-hover-background);--pico-border-color:var(--pico-contrast-hover-border);--pico-color:var(--pico-contrast-inverse)}:is(button,[type=submit],[type=button],[role=button]).contrast:focus,:is(button,[type=submit],[type=button],[role=button]).contrast:is([aria-current]:not([aria-current=false])):focus{--pico-box-shadow:var(--pico-button-hover-box-shadow, 0 0 0 rgba(0, 0, 0, 0)),0 0 0 var(--pico-outline-width) var(--pico-contrast-focus)}:is(button,[type=submit],[type=button],[role=button]).outline,[type=reset].outline{--pico-background-color:transparent;--pico-color:var(--pico-primary);--pico-border-color:var(--pico-primary)}:is(button,[type=submit],[type=button],[role=button]).outline:is([aria-current]:not([aria-current=false]),:hover,:active,:focus),[type=reset].outline:is([aria-current]:not([aria-current=false]),:hover,:active,:focus){--pico-background-color:transparent;--pico-color:var(--pico-primary-hover);--pico-border-color:var(--pico-primary-hover)}:is(button,[type=submit],[type=button],[role=button]).outline.secondary,[type=reset].outline{--pico-color:var(--pico-secondary);--pico-border-color:var(--pico-secondary)}:is(button,[type=submit],[type=button],[role=button]).outline.secondary:is([aria-current]:not([aria-current=false]),:hover,:active,:focus),[type=reset].outline:is([aria-current]:not([aria-current=false]),:hover,:active,:focus){--pico-color:var(--pico-secondary-hover);--pico-border-color:var(--pico-secondary-hover)}:is(button,[type=submit],[type=button],[role=button]).outline.contrast{--pico-color:var(--pico-contrast);--pico-border-color:var(--pico-contrast)}:is(button,[type=submit],[type=button],[role=button]).outline.contrast:is([aria-current]:not([aria-current=false]),:hover,:active,:focus){--pico-color:var(--pico-contrast-hover);--pico-border-color:var(--pico-contrast-hover)}:where(button,[type=submit],[type=reset],[type=button],[role=button])[disabled],:where(fieldset[disabled]) :is(button,[type=submit],[type=button],[type=reset],[role=button]){opacity:.5;pointer-events:none}:where(table){width:100%;border-collapse:collapse;border-spacing:0;text-indent:0}td,th{padding:calc(var(--pico-spacing)/ 2) var(--pico-spacing);border-bottom:var(--pico-border-width) solid var(--pico-table-border-color);background-color:var(--pico-background-color);color:var(--pico-color);font-weight:var(--pico-font-weight);text-align:left;text-align:start}tfoot td,tfoot th{border-top:var(--pico-border-width) solid var(--pico-table-border-color);border-bottom:0}table.striped tbody tr:nth-child(odd) td,table.striped tbody tr:nth-child(odd) th{background-color:var(--pico-table-row-stripped-background-color)}:where(audio,canvas,iframe,img,svg,video){vertical-align:middle}audio,video{display:inline-block}audio:not([controls]){display:none;height:0}:where(iframe){border-style:none}img{max-width:100%;height:auto;border-style:none}:where(svg:not([fill])){fill:currentColor}svg:not(:root){overflow:hidden}code,kbd,pre,samp{font-size:.875em;font-family:var(--pico-font-family)}pre code{font-size:inherit;font-family:inherit}pre{-ms-overflow-style:scrollbar;overflow:auto}code,kbd,pre{border-radius:var(--pico-border-radius);background:var(--pico-code-background-color);color:var(--pico-code-color);font-weight:var(--pico-font-weight);line-height:initial}code,kbd{display:inline-block;padding:.375rem}pre{display:block;margin-bottom:var(--pico-spacing);overflow-x:auto}pre>code{display:block;padding:var(--pico-spacing);background:0 0;line-height:var(--pico-line-height)}kbd{background-color:var(--pico-code-kbd-background-color);color:var(--pico-code-kbd-color);vertical-align:baseline}figure{display:block;margin:0;padding:0}figure figcaption{padding:calc(var(--pico-spacing) * .5) 0;color:var(--pico-muted-color)}hr{height:0;margin:var(--pico-typography-spacing-vertical) 0;border:0;border-top:1px solid var(--pico-muted-border-color);color:inherit}[hidden],template{display:none!important}canvas{display:inline-block}input,optgroup,select,textarea{margin:0;font-size:1rem;line-height:var(--pico-line-height);font-family:inherit;letter-spacing:inherit}input{overflow:visible}select{text-transform:none}legend{max-width:100%;padding:0;color:inherit;white-space:normal}textarea{overflow:auto}[type=checkbox],[type=radio]{padding:0}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}::-moz-focus-inner{padding:0;border-style:none}:-moz-focusring{outline:0}:-moz-ui-invalid{box-shadow:none}::-ms-expand{display:none}[type=file],[type=range]{padding:0;border-width:0}input:not([type=checkbox],[type=radio],[type=range]){height:calc(1rem * var(--pico-line-height) + var(--pico-form-element-spacing-vertical) * 2 + var(--pico-border-width) * 2)}fieldset{width:100%;margin:0;margin-bottom:var(--pico-spacing);padding:0;border:0}fieldset legend,label{display:block;margin-bottom:calc(var(--pico-spacing) * .375);color:var(--pico-color);font-weight:var(--pico-form-label-font-weight,var(--pico-font-weight))}fieldset legend{margin-bottom:calc(var(--pico-spacing) * .5)}button[type=submit],input:not([type=checkbox],[type=radio]),select,textarea{width:100%}input:not([type=checkbox],[type=radio],[type=range],[type=file]),select,textarea{-webkit-appearance:none;-moz-appearance:none;appearance:none;padding:var(--pico-form-element-spacing-vertical) var(--pico-form-element-spacing-horizontal)}input,select,textarea{--pico-background-color:var(--pico-form-element-background-color);--pico-border-color:var(--pico-form-element-border-color);--pico-color:var(--pico-form-element-color);--pico-box-shadow:none;border:var(--pico-border-width) solid var(--pico-border-color);border-radius:var(--pico-border-radius);outline:0;background-color:var(--pico-background-color);box-shadow:var(--pico-box-shadow);color:var(--pico-color);font-weight:var(--pico-font-weight);transition:background-color var(--pico-transition),border-color var(--pico-transition),color var(--pico-transition),box-shadow var(--pico-transition)}:where(select,textarea):not([readonly]):is(:active,:focus),input:not([type=submit],[type=button],[type=reset],[type=checkbox],[type=radio],[readonly]):is(:active,:focus){--pico-background-color:var(--pico-form-element-active-background-color)}:where(select,textarea):not([readonly]):is(:active,:focus),input:not([type=submit],[type=button],[type=reset],[role=switch],[readonly]):is(:active,:focus){--pico-border-color:var(--pico-form-element-active-border-color)}:where(select,textarea):not([readonly]):focus,input:not([type=submit],[type=button],[type=reset],[type=range],[type=file],[readonly]):focus{--pico-box-shadow:0 0 0 var(--pico-outline-width) var(--pico-form-element-focus-color)}:where(fieldset[disabled]) :is(input:not([type=submit],[type=button],[type=reset]),select,textarea),input:not([type=submit],[type=button],[type=reset])[disabled],label[aria-disabled=true],select[disabled],textarea[disabled]{opacity:var(--pico-form-element-disabled-opacity);pointer-events:none}label[aria-disabled=true] input[disabled]{opacity:1}:where(input,select,textarea):not([type=checkbox],[type=radio],[type=date],[type=datetime-local],[type=month],[type=time],[type=week],[type=range])[aria-invalid]{padding-right:calc(var(--pico-form-element-spacing-horizontal) + 1.5rem)!important;padding-left:var(--pico-form-element-spacing-horizontal);padding-inline-start:var(--pico-form-element-spacing-horizontal)!important;padding-inline-end:calc(var(--pico-form-element-spacing-horizontal) + 1.5rem)!important;background-position:center right .75rem;background-size:1rem auto;background-repeat:no-repeat}:where(input,select,textarea):not([type=checkbox],[type=radio],[type=date],[type=datetime-local],[type=month],[type=time],[type=week],[type=range])[aria-invalid=false]:not(select){background-image:var(--pico-icon-valid)}:where(input,select,textarea):not([type=checkbox],[type=radio],[type=date],[type=datetime-local],[type=month],[type=time],[type=week],[type=range])[aria-invalid=true]:not(select){background-image:var(--pico-icon-invalid)}:where(input,select,textarea)[aria-invalid=false]{--pico-border-color:var(--pico-form-element-valid-border-color)}:where(input,select,textarea)[aria-invalid=false]:is(:active,:focus){--pico-border-color:var(--pico-form-element-valid-active-border-color)!important}:where(input,select,textarea)[aria-invalid=false]:is(:active,:focus):not([type=checkbox],[type=radio]){--pico-box-shadow:0 0 0 var(--pico-outline-width) var(--pico-form-element-valid-focus-color)!important}:where(input,select,textarea)[aria-invalid=true]{--pico-border-color:var(--pico-form-element-invalid-border-color)}:where(input,select,textarea)[aria-invalid=true]:is(:active,:focus){--pico-border-color:var(--pico-form-element-invalid-active-border-color)!important}:where(input,select,textarea)[aria-invalid=true]:is(:active,:focus):not([type=checkbox],[type=radio]){--pico-box-shadow:0 0 0 var(--pico-outline-width) var(--pico-form-element-invalid-focus-color)!important}[dir=rtl] :where(input,select,textarea):not([type=checkbox],[type=radio]):is([aria-invalid],[aria-invalid=true],[aria-invalid=false]){background-position:center left .75rem}input::-webkit-input-placeholder,input::placeholder,select:invalid,textarea::-webkit-input-placeholder,textarea::placeholder{color:var(--pico-form-element-placeholder-color);opacity:1}input:not([type=checkbox],[type=radio]),select,textarea{margin-bottom:var(--pico-spacing)}select::-ms-expand{border:0;background-color:transparent}select:not([multiple],[size]){padding-right:calc(var(--pico-form-element-spacing-horizontal) + 1.5rem);padding-left:var(--pico-form-element-spacing-horizontal);padding-inline-start:var(--pico-form-element-spacing-horizontal);padding-inline-end:calc(var(--pico-form-element-spacing-horizontal) + 1.5rem);background-image:var(--pico-icon-chevron);background-position:center right .75rem;background-size:1rem auto;background-repeat:no-repeat}select[multiple] option:checked{background:var(--pico-form-element-selected-background-color);color:var(--pico-form-element-color)}[dir=rtl] select:not([multiple],[size]){background-position:center left .75rem}textarea{display:block;resize:vertical}textarea[aria-invalid]{--pico-icon-height:calc(1rem * var(--pico-line-height) + var(--pico-form-element-spacing-vertical) * 2 + var(--pico-border-width) * 2);background-position:top right .75rem!important;background-size:1rem var(--pico-icon-height)!important}:where(input,select,textarea,fieldset,.grid)+small{display:block;width:100%;margin-top:calc(var(--pico-spacing) * -.75);margin-bottom:var(--pico-spacing);color:var(--pico-muted-color)}:where(input,select,textarea,fieldset,.grid)[aria-invalid=false]+small{color:var(--pico-ins-color)}:where(input,select,textarea,fieldset,.grid)[aria-invalid=true]+small{color:var(--pico-del-color)}label>:where(input,select,textarea){margin-top:calc(var(--pico-spacing) * .25)}label:has([type=checkbox],[type=radio]){width:-moz-fit-content;width:fit-content;cursor:pointer}[type=checkbox],[type=radio]{-webkit-appearance:none;-moz-appearance:none;appearance:none;width:1.25em;height:1.25em;margin-top:-.125em;margin-inline-end:.5em;border-width:var(--pico-border-width);vertical-align:middle;cursor:pointer}[type=checkbox]::-ms-check,[type=radio]::-ms-check{display:none}[type=checkbox]:checked,[type=checkbox]:checked:active,[type=checkbox]:checked:focus,[type=radio]:checked,[type=radio]:checked:active,[type=radio]:checked:focus{--pico-background-color:var(--pico-primary-background);--pico-border-color:var(--pico-primary-border);background-image:var(--pico-icon-checkbox);background-position:center;background-size:.75em auto;background-repeat:no-repeat}[type=checkbox]~label,[type=radio]~label{display:inline-block;margin-bottom:0;cursor:pointer}[type=checkbox]~label:not(:last-of-type),[type=radio]~label:not(:last-of-type){margin-inline-end:1em}[type=checkbox]:indeterminate{--pico-background-color:var(--pico-primary-background);--pico-border-color:var(--pico-primary-border);background-image:var(--pico-icon-minus);background-position:center;background-size:.75em auto;background-repeat:no-repeat}[type=radio]{border-radius:50%}[type=radio]:checked,[type=radio]:checked:active,[type=radio]:checked:focus{--pico-background-color:var(--pico-primary-inverse);border-width:.35em;background-image:none}[type=checkbox][role=switch]{--pico-background-color:var(--pico-switch-background-color);--pico-color:var(--pico-switch-color);width:2.25em;height:1.25em;border:var(--pico-border-width) solid var(--pico-border-color);border-radius:1.25em;background-color:var(--pico-background-color);line-height:1.25em}[type=checkbox][role=switch]:not([aria-invalid]){--pico-border-color:var(--pico-switch-background-color)}[type=checkbox][role=switch]:before{display:block;aspect-ratio:1;height:100%;border-radius:50%;background-color:var(--pico-color);box-shadow:var(--pico-switch-thumb-box-shadow);content:"";transition:margin .1s ease-in-out}[type=checkbox][role=switch]:focus{--pico-background-color:var(--pico-switch-background-color);--pico-border-color:var(--pico-switch-background-color)}[type=checkbox][role=switch]:checked{--pico-background-color:var(--pico-switch-checked-background-color);--pico-border-color:var(--pico-switch-checked-background-color);background-image:none}[type=checkbox][role=switch]:checked::before{margin-inline-start:calc(2.25em - 1.25em)}[type=checkbox][role=switch][disabled]{--pico-background-color:var(--pico-border-color)}[type=checkbox][aria-invalid=false]:checked,[type=checkbox][aria-invalid=false]:checked:active,[type=checkbox][aria-invalid=false]:checked:focus,[type=checkbox][role=switch][aria-invalid=false]:checked,[type=checkbox][role=switch][aria-invalid=false]:checked:active,[type=checkbox][role=switch][aria-invalid=false]:checked:focus{--pico-background-color:var(--pico-form-element-valid-border-color)}[type=checkbox]:checked:active[aria-invalid=true],[type=checkbox]:checked:focus[aria-invalid=true],[type=checkbox]:checked[aria-invalid=true],[type=checkbox][role=switch]:checked:active[aria-invalid=true],[type=checkbox][role=switch]:checked:focus[aria-invalid=true],[type=checkbox][role=switch]:checked[aria-invalid=true]{--pico-background-color:var(--pico-form-element-invalid-border-color)}[type=checkbox][aria-invalid=false]:checked,[type=checkbox][aria-invalid=false]:checked:active,[type=checkbox][aria-invalid=false]:checked:focus,[type=checkbox][role=switch][aria-invalid=false]:checked,[type=checkbox][role=switch][aria-invalid=false]:checked:active,[type=checkbox][role=switch][aria-invalid=false]:checked:focus,[type=radio][aria-invalid=false]:checked,[type=radio][aria-invalid=false]:checked:active,[type=radio][aria-invalid=false]:checked:focus{--pico-border-color:var(--pico-form-element-valid-border-color)}[type=checkbox]:checked:active[aria-invalid=true],[type=checkbox]:checked:focus[aria-invalid=true],[type=checkbox]:checked[aria-invalid=true],[type=checkbox][role=switch]:checked:active[aria-invalid=true],[type=checkbox][role=switch]:checked:focus[aria-invalid=true],[type=checkbox][role=switch]:checked[aria-invalid=true],[type=radio]:checked:active[aria-invalid=true],[type=radio]:checked:focus[aria-invalid=true],[type=radio]:checked[aria-invalid=true]{--pico-border-color:var(--pico-form-element-invalid-border-color)}[type=color]::-webkit-color-swatch-wrapper{padding:0}[type=color]::-moz-focus-inner{padding:0}[type=color]::-webkit-color-swatch{border:0;border-radius:calc(var(--pico-border-radius) * .5)}[type=color]::-moz-color-swatch{border:0;border-radius:calc(var(--pico-border-radius) * .5)}input:not([type=checkbox],[type=radio],[type=range],[type=file]):is([type=date],[type=datetime-local],[type=month],[type=time],[type=week]){--pico-icon-position:0.75rem;--pico-icon-width:1rem;padding-right:calc(var(--pico-icon-width) + var(--pico-icon-position));background-image:var(--pico-icon-date);background-position:center right var(--pico-icon-position);background-size:var(--pico-icon-width) auto;background-repeat:no-repeat}input:not([type=checkbox],[type=radio],[type=range],[type=file])[type=time]{background-image:var(--pico-icon-time)}[type=date]::-webkit-calendar-picker-indicator,[type=datetime-local]::-webkit-calendar-picker-indicator,[type=month]::-webkit-calendar-picker-indicator,[type=time]::-webkit-calendar-picker-indicator,[type=week]::-webkit-calendar-picker-indicator{width:var(--pico-icon-width);margin-right:calc(var(--pico-icon-width) * -1);margin-left:var(--pico-icon-position);opacity:0}@-moz-document url-prefix(){[type=date],[type=datetime-local],[type=month],[type=time],[type=week]{padding-right:var(--pico-form-element-spacing-horizontal)!important;background-image:none!important}}[dir=rtl] :is([type=date],[type=datetime-local],[type=month],[type=time],[type=week]){text-align:right}[type=file]{--pico-color:var(--pico-muted-color);margin-left:calc(var(--pico-outline-width) * -1);padding:calc(var(--pico-form-element-spacing-vertical) * .5) 0;padding-left:var(--pico-outline-width);border:0;border-radius:0;background:0 0}[type=file]::file-selector-button{margin-right:calc(var(--pico-spacing)/ 2);padding:calc(var(--pico-form-element-spacing-vertical) * .5) var(--pico-form-element-spacing-horizontal)}[type=file]:is(:hover,:active,:focus)::file-selector-button{--pico-background-color:var(--pico-secondary-hover-background);--pico-border-color:var(--pico-secondary-hover-border)}[type=file]:focus::file-selector-button{--pico-box-shadow:var(--pico-button-hover-box-shadow, 0 0 0 rgba(0, 0, 0, 0)),0 0 0 var(--pico-outline-width) var(--pico-secondary-focus)}[type=range]{-webkit-appearance:none;-moz-appearance:none;appearance:none;width:100%;height:1.25rem;background:0 0}[type=range]::-webkit-slider-runnable-track{width:100%;height:.375rem;border-radius:var(--pico-border-radius);background-color:var(--pico-range-border-color);-webkit-transition:background-color var(--pico-transition),box-shadow var(--pico-transition);transition:background-color var(--pico-transition),box-shadow var(--pico-transition)}[type=range]::-moz-range-track{width:100%;height:.375rem;border-radius:var(--pico-border-radius);background-color:var(--pico-range-border-color);-moz-transition:background-color var(--pico-transition),box-shadow var(--pico-transition);transition:background-color var(--pico-transition),box-shadow var(--pico-transition)}[type=range]::-ms-track{width:100%;height:.375rem;border-radius:var(--pico-border-radius);background-color:var(--pico-range-border-color);-ms-transition:background-color var(--pico-transition),box-shadow var(--pico-transition);transition:background-color var(--pico-transition),box-shadow var(--pico-transition)}[type=range]::-webkit-slider-thumb{-webkit-appearance:none;width:1.25rem;height:1.25rem;margin-top:-.4375rem;border:2px solid var(--pico-range-thumb-border-color);border-radius:50%;background-color:var(--pico-range-thumb-color);cursor:pointer;-webkit-transition:background-color var(--pico-transition),transform var(--pico-transition);transition:background-color var(--pico-transition),transform var(--pico-transition)}[type=range]::-moz-range-thumb{-webkit-appearance:none;width:1.25rem;height:1.25rem;margin-top:-.4375rem;border:2px solid var(--pico-range-thumb-border-color);border-radius:50%;background-color:var(--pico-range-thumb-color);cursor:pointer;-moz-transition:background-color var(--pico-transition),transform var(--pico-transition);transition:background-color var(--pico-transition),transform var(--pico-transition)}[type=range]::-ms-thumb{-webkit-appearance:none;width:1.25rem;height:1.25rem;margin-top:-.4375rem;border:2px solid var(--pico-range-thumb-border-color);border-radius:50%;background-color:var(--pico-range-thumb-color);cursor:pointer;-ms-transition:background-color var(--pico-transition),transform var(--pico-transition);transition:background-color var(--pico-transition),transform var(--pico-transition)}[type=range]:active,[type=range]:focus-within{--pico-range-border-color:var(--pico-range-active-border-color);--pico-range-thumb-color:var(--pico-range-thumb-active-color)}[type=range]:active::-webkit-slider-thumb{transform:scale(1.25)}[type=range]:active::-moz-range-thumb{transform:scale(1.25)}[type=range]:active::-ms-thumb{transform:scale(1.25)}input:not([type=checkbox],[type=radio],[type=range],[type=file])[type=search]{padding-inline-start:calc(var(--pico-form-element-spacing-horizontal) + 1.75rem);background-image:var(--pico-icon-search);background-position:center left calc(var(--pico-form-element-spacing-horizontal) + .125rem);background-size:1rem auto;background-repeat:no-repeat}input:not([type=checkbox],[type=radio],[type=range],[type=file])[type=search][aria-invalid]{padding-inline-start:calc(var(--pico-form-element-spacing-horizontal) + 1.75rem)!important;background-position:center left 1.125rem,center right .75rem}input:not([type=checkbox],[type=radio],[type=range],[type=file])[type=search][aria-invalid=false]{background-image:var(--pico-icon-search),var(--pico-icon-valid)}input:not([type=checkbox],[type=radio],[type=range],[type=file])[type=search][aria-invalid=true]{background-image:var(--pico-icon-search),var(--pico-icon-invalid)}[dir=rtl] :where(input):not([type=checkbox],[type=radio],[type=range],[type=file])[type=search]{background-position:center right 1.125rem}[dir=rtl] :where(input):not([type=checkbox],[type=radio],[type=range],[type=file])[type=search][aria-invalid]{background-position:center right 1.125rem,center left .75rem}details{display:block;margin-bottom:var(--pico-spacing)}details summary{line-height:1rem;list-style-type:none;cursor:pointer;transition:color var(--pico-transition)}details summary:not([role]){color:var(--pico-accordion-close-summary-color)}details summary::-webkit-details-marker{display:none}details summary::marker{display:none}details summary::-moz-list-bullet{list-style-type:none}details summary::after{display:block;width:1rem;height:1rem;margin-inline-start:calc(var(--pico-spacing,1rem) * .5);float:right;transform:rotate(-90deg);background-image:var(--pico-icon-chevron);background-position:right center;background-size:1rem auto;background-repeat:no-repeat;content:"";transition:transform var(--pico-transition)}details summary:focus{outline:0}details summary:focus:not([role]){color:var(--pico-accordion-active-summary-color)}details summary:focus-visible:not([role]){outline:var(--pico-outline-width) solid var(--pico-primary-focus);outline-offset:calc(var(--pico-spacing,1rem) * 0.5);color:var(--pico-primary)}details summary[role=button]{width:100%;text-align:left}details summary[role=button]::after{height:calc(1rem * var(--pico-line-height,1.5))}details[open]>summary{margin-bottom:var(--pico-spacing)}details[open]>summary:not([role]):not(:focus){color:var(--pico-accordion-open-summary-color)}details[open]>summary::after{transform:rotate(0)}[dir=rtl] details summary{text-align:right}[dir=rtl] details summary::after{float:left;background-position:left center}article{margin-bottom:var(--pico-block-spacing-vertical);padding:var(--pico-block-spacing-vertical) var(--pico-block-spacing-horizontal);border-radius:var(--pico-border-radius);background:var(--pico-card-background-color);box-shadow:var(--pico-card-box-shadow)}article>footer,article>header{margin-right:calc(var(--pico-block-spacing-horizontal) * -1);margin-left:calc(var(--pico-block-spacing-horizontal) * -1);padding:calc(var(--pico-block-spacing-vertical) * .66) var(--pico-block-spacing-horizontal);background-color:var(--pico-card-sectioning-background-color)}article>header{margin-top:calc(var(--pico-block-spacing-vertical) * -1);margin-bottom:var(--pico-block-spacing-vertical);border-bottom:var(--pico-border-width) solid var(--pico-card-border-color);border-top-right-radius:var(--pico-border-radius);border-top-left-radius:var(--pico-border-radius)}article>footer{margin-top:var(--pico-block-spacing-vertical);margin-bottom:calc(var(--pico-block-spacing-vertical) * -1);border-top:var(--pico-border-width) solid var(--pico-card-border-color);border-bottom-right-radius:var(--pico-border-radius);border-bottom-left-radius:var(--pico-border-radius)}details.dropdown{position:relative;border-bottom:none}details.dropdown summary::after,details.dropdown>a::after,details.dropdown>button::after{display:block;width:1rem;height:calc(1rem * var(--pico-line-height,1.5));margin-inline-start:.25rem;float:right;transform:rotate(0) translateX(.2rem);background-image:var(--pico-icon-chevron);background-position:right center;background-size:1rem auto;background-repeat:no-repeat;content:""}nav details.dropdown{margin-bottom:0}details.dropdown summary:not([role]){height:calc(1rem * var(--pico-line-height) + var(--pico-form-element-spacing-vertical) * 2 + var(--pico-border-width) * 2);padding:var(--pico-form-element-spacing-vertical) var(--pico-form-element-spacing-horizontal);border:var(--pico-border-width) solid var(--pico-form-element-border-color);border-radius:var(--pico-border-radius);background-color:var(--pico-form-element-background-color);color:var(--pico-form-element-placeholder-color);line-height:inherit;cursor:pointer;-webkit-user-select:none;-moz-user-select:none;user-select:none;transition:background-color var(--pico-transition),border-color var(--pico-transition),color var(--pico-transition),box-shadow var(--pico-transition)}details.dropdown summary:not([role]):active,details.dropdown summary:not([role]):focus{border-color:var(--pico-form-element-active-border-color);background-color:var(--pico-form-element-active-background-color)}details.dropdown summary:not([role]):focus{box-shadow:0 0 0 var(--pico-outline-width) var(--pico-form-element-focus-color)}details.dropdown summary:not([role]):focus-visible{outline:0}details.dropdown summary:not([role])[aria-invalid=false]{--pico-form-element-border-color:var(--pico-form-element-valid-border-color);--pico-form-element-active-border-color:var(--pico-form-element-valid-focus-color);--pico-form-element-focus-color:var(--pico-form-element-valid-focus-color)}details.dropdown summary:not([role])[aria-invalid=true]{--pico-form-element-border-color:var(--pico-form-element-invalid-border-color);--pico-form-element-active-border-color:var(--pico-form-element-invalid-focus-color);--pico-form-element-focus-color:var(--pico-form-element-invalid-focus-color)}nav details.dropdown{display:inline;margin:calc(var(--pico-nav-element-spacing-vertical) * -1) 0}nav details.dropdown summary::after{transform:rotate(0) translateX(0)}nav details.dropdown summary:not([role]){height:calc(1rem * var(--pico-line-height) + var(--pico-nav-link-spacing-vertical) * 2);padding:calc(var(--pico-nav-link-spacing-vertical) - var(--pico-border-width) * 2) var(--pico-nav-link-spacing-horizontal)}nav details.dropdown summary:not([role]):focus-visible{box-shadow:0 0 0 var(--pico-outline-width) var(--pico-primary-focus)}details.dropdown summary+ul{display:flex;z-index:99;position:absolute;left:0;flex-direction:column;width:100%;min-width:-moz-fit-content;min-width:fit-content;margin:0;margin-top:var(--pico-outline-width);padding:0;border:var(--pico-border-width) solid var(--pico-dropdown-border-color);border-radius:var(--pico-border-radius);background-color:var(--pico-dropdown-background-color);box-shadow:var(--pico-dropdown-box-shadow);color:var(--pico-dropdown-color);white-space:nowrap;opacity:0;transition:opacity var(--pico-transition),transform 0s ease-in-out 1s}details.dropdown summary+ul[dir=rtl]{right:0;left:auto}details.dropdown summary+ul li{width:100%;margin-bottom:0;padding:calc(var(--pico-form-element-spacing-vertical) * .5) var(--pico-form-element-spacing-horizontal);list-style:none}details.dropdown summary+ul li:first-of-type{margin-top:calc(var(--pico-form-element-spacing-vertical) * .5)}details.dropdown summary+ul li:last-of-type{margin-bottom:calc(var(--pico-form-element-spacing-vertical) * .5)}details.dropdown summary+ul li a{display:block;margin:calc(var(--pico-form-element-spacing-vertical) * -.5) calc(var(--pico-form-element-spacing-horizontal) * -1);padding:calc(var(--pico-form-element-spacing-vertical) * .5) var(--pico-form-element-spacing-horizontal);overflow:hidden;border-radius:0;color:var(--pico-dropdown-color);text-decoration:none;text-overflow:ellipsis}details.dropdown summary+ul li a:active,details.dropdown summary+ul li a:focus,details.dropdown summary+ul li a:focus-visible,details.dropdown summary+ul li a:hover,details.dropdown summary+ul li a[aria-current]:not([aria-current=false]){background-color:var(--pico-dropdown-hover-background-color)}details.dropdown summary+ul li label{width:100%}details.dropdown summary+ul li:has(label):hover{background-color:var(--pico-dropdown-hover-background-color)}details.dropdown[open] summary{margin-bottom:0}details.dropdown[open] summary+ul{transform:scaleY(1);opacity:1;transition:opacity var(--pico-transition),transform 0s ease-in-out 0s}details.dropdown[open] summary::before{display:block;z-index:1;position:fixed;width:100vw;height:100vh;inset:0;background:0 0;content:"";cursor:default}label>details.dropdown{margin-top:calc(var(--pico-spacing) * .25)}[role=group],[role=search]{display:inline-flex;position:relative;width:100%;margin-bottom:var(--pico-spacing);border-radius:var(--pico-border-radius);box-shadow:var(--pico-group-box-shadow,0 0 0 transparent);vertical-align:middle;transition:box-shadow var(--pico-transition)}[role=group] input:not([type=checkbox],[type=radio]),[role=group] select,[role=group]>*,[role=search] input:not([type=checkbox],[type=radio]),[role=search] select,[role=search]>*{position:relative;flex:1 1 auto;margin-bottom:0}[role=group] input:not([type=checkbox],[type=radio]):not(:first-child),[role=group] select:not(:first-child),[role=group]>:not(:first-child),[role=search] input:not([type=checkbox],[type=radio]):not(:first-child),[role=search] select:not(:first-child),[role=search]>:not(:first-child){margin-left:0;border-top-left-radius:0;border-bottom-left-radius:0}[role=group] input:not([type=checkbox],[type=radio]):not(:last-child),[role=group] select:not(:last-child),[role=group]>:not(:last-child),[role=search] input:not([type=checkbox],[type=radio]):not(:last-child),[role=search] select:not(:last-child),[role=search]>:not(:last-child){border-top-right-radius:0;border-bottom-right-radius:0}[role=group] input:not([type=checkbox],[type=radio]):focus,[role=group] select:focus,[role=group]>:focus,[role=search] input:not([type=checkbox],[type=radio]):focus,[role=search] select:focus,[role=search]>:focus{z-index:2}[role=group] [role=button]:not(:first-child),[role=group] [type=button]:not(:first-child),[role=group] [type=reset]:not(:first-child),[role=group] [type=submit]:not(:first-child),[role=group] button:not(:first-child),[role=group] input:not([type=checkbox],[type=radio]):not(:first-child),[role=group] select:not(:first-child),[role=search] [role=button]:not(:first-child),[role=search] [type=button]:not(:first-child),[role=search] [type=reset]:not(:first-child),[role=search] [type=submit]:not(:first-child),[role=search] button:not(:first-child),[role=search] input:not([type=checkbox],[type=radio]):not(:first-child),[role=search] select:not(:first-child){margin-left:calc(var(--pico-border-width) * -1)}[role=group] [role=button],[role=group] [type=button],[role=group] [type=reset],[role=group] [type=submit],[role=group] button,[role=search] [role=button],[role=search] [type=button],[role=search] [type=reset],[role=search] [type=submit],[role=search] button{width:auto}@supports selector(:has(*)){[role=group]:has(button:focus,[type=submit]:focus,[type=button]:focus,[role=button]:focus),[role=search]:has(button:focus,[type=submit]:focus,[type=button]:focus,[role=button]:focus){--pico-group-box-shadow:var(--pico-group-box-shadow-focus-with-button)}[role=group]:has(button:focus,[type=submit]:focus,[type=button]:focus,[role=button]:focus) input:not([type=checkbox],[type=radio]),[role=group]:has(button:focus,[type=submit]:focus,[type=button]:focus,[role=button]:focus) select,[role=search]:has(button:focus,[type=submit]:focus,[type=button]:focus,[role=button]:focus) input:not([type=checkbox],[type=radio]),[role=search]:has(button:focus,[type=submit]:focus,[type=button]:focus,[role=button]:focus) select{border-color:transparent}[role=group]:has(input:not([type=submit],[type=button]):focus,select:focus),[role=search]:has(input:not([type=submit],[type=button]):focus,select:focus){--pico-group-box-shadow:var(--pico-group-box-shadow-focus-with-input)}[role=group]:has(input:not([type=submit],[type=button]):focus,select:focus) [role=button],[role=group]:has(input:not([type=submit],[type=button]):focus,select:focus) [type=button],[role=group]:has(input:not([type=submit],[type=button]):focus,select:focus) [type=submit],[role=group]:has(input:not([type=submit],[type=button]):focus,select:focus) button,[role=search]:has(input:not([type=submit],[type=button]):focus,select:focus) [role=button],[role=search]:has(input:not([type=submit],[type=button]):focus,select:focus) [type=button],[role=search]:has(input:not([type=submit],[type=button]):focus,select:focus) [type=submit],[role=search]:has(input:not([type=submit],[type=button]):focus,select:focus) button{--pico-button-box-shadow:0 0 0 var(--pico-border-width) var(--pico-primary-border);--pico-button-hover-box-shadow:0 0 0 var(--pico-border-width) var(--pico-primary-hover-border)}[role=group] [role=button]:focus,[role=group] [type=button]:focus,[role=group] [type=reset]:focus,[role=group] [type=submit]:focus,[role=group] button:focus,[role=search] [role=button]:focus,[role=search] [type=button]:focus,[role=search] [type=reset]:focus,[role=search] [type=submit]:focus,[role=search] button:focus{box-shadow:none}}[role=search]>:first-child{border-top-left-radius:5rem;border-bottom-left-radius:5rem}[role=search]>:last-child{border-top-right-radius:5rem;border-bottom-right-radius:5rem}[aria-busy=true]:not(input,select,textarea,html){white-space:nowrap}[aria-busy=true]:not(input,select,textarea,html)::before{display:inline-block;width:1em;height:1em;background-image:var(--pico-icon-loading);background-size:1em auto;background-repeat:no-repeat;content:"";vertical-align:-.125em}[aria-busy=true]:not(input,select,textarea,html):not(:empty)::before{margin-inline-end:calc(var(--pico-spacing) * .5)}[aria-busy=true]:not(input,select,textarea,html):empty{text-align:center}[role=button][aria-busy=true],[type=button][aria-busy=true],[type=reset][aria-busy=true],[type=submit][aria-busy=true],a[aria-busy=true],button[aria-busy=true]{pointer-events:none}:root{--pico-scrollbar-width:0px}dialog{display:flex;z-index:999;position:fixed;top:0;right:0;bottom:0;left:0;align-items:center;justify-content:center;width:inherit;min-width:100%;height:inherit;min-height:100%;padding:0;border:0;-webkit-backdrop-filter:var(--pico-modal-overlay-backdrop-filter);backdrop-filter:var(--pico-modal-overlay-backdrop-filter);background-color:var(--pico-modal-overlay-background-color);color:var(--pico-color)}dialog article{width:100%;max-height:calc(100vh - var(--pico-spacing) * 2);margin:var(--pico-spacing);overflow:auto}@media (min-width:576px){dialog article{max-width:510px}}@media (min-width:768px){dialog article{max-width:700px}}dialog article>header>*{margin-bottom:0}dialog article>header .close,dialog article>header :is(a,button)[rel=prev]{margin:0;margin-left:var(--pico-spacing);padding:0;float:right}dialog article>footer{text-align:right}dialog article>footer [role=button],dialog article>footer button{margin-bottom:0}dialog article>footer [role=button]:not(:first-of-type),dialog article>footer button:not(:first-of-type){margin-left:calc(var(--pico-spacing) * .5)}dialog article .close,dialog article :is(a,button)[rel=prev]{display:block;width:1rem;height:1rem;margin-top:calc(var(--pico-spacing) * -1);margin-bottom:var(--pico-spacing);margin-left:auto;border:none;background-image:var(--pico-icon-close);background-position:center;background-size:auto 1rem;background-repeat:no-repeat;background-color:transparent;opacity:.5;transition:opacity var(--pico-transition)}dialog article .close:is([aria-current]:not([aria-current=false]),:hover,:active,:focus),dialog article :is(a,button)[rel=prev]:is([aria-current]:not([aria-current=false]),:hover,:active,:focus){opacity:1}dialog:not([open]),dialog[open=false]{display:none}.modal-is-open{padding-right:var(--pico-scrollbar-width,0);overflow:hidden;pointer-events:none;touch-action:none}.modal-is-open dialog{pointer-events:auto;touch-action:auto}:where(.modal-is-opening,.modal-is-closing) dialog,:where(.modal-is-opening,.modal-is-closing) dialog>article{animation-duration:.2s;animation-timing-function:ease-in-out;animation-fill-mode:both}:where(.modal-is-opening,.modal-is-closing) dialog{animation-duration:.8s;animation-name:modal-overlay}:where(.modal-is-opening,.modal-is-closing) dialog>article{animation-delay:.2s;animation-name:modal}.modal-is-closing dialog,.modal-is-closing dialog>article{animation-delay:0s;animation-direction:reverse}@keyframes modal-overlay{from{-webkit-backdrop-filter:none;backdrop-filter:none;background-color:transparent}}@keyframes modal{from{transform:translateY(-100%);opacity:0}}:where(nav li)::before{float:left;content:"​"}nav,nav ul{display:flex}nav{justify-content:space-between;overflow:visible}nav ol,nav ul{align-items:center;margin-bottom:0;padding:0;list-style:none}nav ol:first-of-type,nav ul:first-of-type{margin-left:calc(var(--pico-nav-element-spacing-horizontal) * -1)}nav ol:last-of-type,nav ul:last-of-type{margin-right:calc(var(--pico-nav-element-spacing-horizontal) * -1)}nav li{display:inline-block;margin:0;padding:var(--pico-nav-element-spacing-vertical) var(--pico-nav-element-spacing-horizontal)}nav li :where(a,[role=link]){display:inline-block;margin:calc(var(--pico-nav-link-spacing-vertical) * -1) calc(var(--pico-nav-link-spacing-horizontal) * -1);padding:var(--pico-nav-link-spacing-vertical) var(--pico-nav-link-spacing-horizontal);border-radius:var(--pico-border-radius)}nav li :where(a,[role=link]):not(:hover){text-decoration:none}nav li [role=button],nav li [type=button],nav li button,nav li input:not([type=checkbox],[type=radio],[type=range],[type=file]),nav li select{height:auto;margin-right:inherit;margin-bottom:0;margin-left:inherit;padding:calc(var(--pico-nav-link-spacing-vertical) - var(--pico-border-width) * 2) var(--pico-nav-link-spacing-horizontal)}nav[aria-label=breadcrumb]{align-items:center;justify-content:start}nav[aria-label=breadcrumb] ul li:not(:first-child){margin-inline-start:var(--pico-nav-link-spacing-horizontal)}nav[aria-label=breadcrumb] ul li a{margin:calc(var(--pico-nav-link-spacing-vertical) * -1) 0;margin-inline-start:calc(var(--pico-nav-link-spacing-horizontal) * -1)}nav[aria-label=breadcrumb] ul li:not(:last-child)::after{display:inline-block;position:absolute;width:calc(var(--pico-nav-link-spacing-horizontal) * 4);margin:0 calc(var(--pico-nav-link-spacing-horizontal) * -1);content:var(--pico-nav-breadcrumb-divider);color:var(--pico-muted-color);text-align:center;text-decoration:none;white-space:nowrap}nav[aria-label=breadcrumb] a[aria-current]:not([aria-current=false]){background-color:transparent;color:inherit;text-decoration:none;pointer-events:none}aside li,aside nav,aside ol,aside ul{display:block}aside li{padding:calc(var(--pico-nav-element-spacing-vertical) * .5) var(--pico-nav-element-spacing-horizontal)}aside li a{display:block}aside li [role=button]{margin:inherit}[dir=rtl] nav[aria-label=breadcrumb] ul li:not(:last-child) ::after{content:"\\"}progress{display:inline-block;vertical-align:baseline}progress{-webkit-appearance:none;-moz-appearance:none;display:inline-block;appearance:none;width:100%;height:.5rem;margin-bottom:calc(var(--pico-spacing) * .5);overflow:hidden;border:0;border-radius:var(--pico-border-radius);background-color:var(--pico-progress-background-color);color:var(--pico-progress-color)}progress::-webkit-progress-bar{border-radius:var(--pico-border-radius);background:0 0}progress[value]::-webkit-progress-value{background-color:var(--pico-progress-color);-webkit-transition:inline-size var(--pico-transition);transition:inline-size var(--pico-transition)}progress::-moz-progress-bar{background-color:var(--pico-progress-color)}@media (prefers-reduced-motion:no-preference){progress:indeterminate{background:var(--pico-progress-background-color) linear-gradient(to right,var(--pico-progress-color) 30%,var(--pico-progress-background-color) 30%) top left/150% 150% no-repeat;animation:progress-indeterminate 1s linear infinite}progress:indeterminate[value]::-webkit-progress-value{background-color:transparent}progress:indeterminate::-moz-progress-bar{background-color:transparent}}@media (prefers-reduced-motion:no-preference){[dir=rtl] progress:indeterminate{animation-direction:reverse}}@keyframes progress-indeterminate{0%{background-position:200% 0}100%{background-position:-200% 0}}[data-tooltip]{position:relative}[data-tooltip]:not(a,button,input){border-bottom:1px dotted;text-decoration:none;cursor:help}[data-tooltip]::after,[data-tooltip]::before,[data-tooltip][data-placement=top]::after,[data-tooltip][data-placement=top]::before{display:block;z-index:99;position:absolute;bottom:100%;left:50%;padding:.25rem .5rem;overflow:hidden;transform:translate(-50%,-.25rem);border-radius:var(--pico-border-radius);background:var(--pico-tooltip-background-color);content:attr(data-tooltip);color:var(--pico-tooltip-color);font-style:normal;font-weight:var(--pico-font-weight);font-size:.875rem;text-decoration:none;text-overflow:ellipsis;white-space:nowrap;opacity:0;pointer-events:none}[data-tooltip]::after,[data-tooltip][data-placement=top]::after{padding:0;transform:translate(-50%,0);border-top:.3rem solid;border-right:.3rem solid transparent;border-left:.3rem solid transparent;border-radius:0;background-color:transparent;content:"";color:var(--pico-tooltip-background-color)}[data-tooltip][data-placement=bottom]::after,[data-tooltip][data-placement=bottom]::before{top:100%;bottom:auto;transform:translate(-50%,.25rem)}[data-tooltip][data-placement=bottom]:after{transform:translate(-50%,-.3rem);border:.3rem solid transparent;border-bottom:.3rem solid}[data-tooltip][data-placement=left]::after,[data-tooltip][data-placement=left]::before{top:50%;right:100%;bottom:auto;left:auto;transform:translate(-.25rem,-50%)}[data-tooltip][data-placement=left]:after{transform:translate(.3rem,-50%);border:.3rem solid transparent;border-left:.3rem solid}[data-tooltip][data-placement=right]::after,[data-tooltip][data-placement=right]::before{top:50%;right:auto;bottom:auto;left:100%;transform:translate(.25rem,-50%)}[data-tooltip][data-placement=right]:after{transform:translate(-.3rem,-50%);border:.3rem solid transparent;border-right:.3rem solid}[data-tooltip]:focus::after,[data-tooltip]:focus::before,[data-tooltip]:hover::after,[data-tooltip]:hover::before{opacity:1}@media (hover:hover) and (pointer:fine){[data-tooltip]:focus::after,[data-tooltip]:focus::before,[data-tooltip]:hover::after,[data-tooltip]:hover::before{--pico-tooltip-slide-to:translate(-50%, -0.25rem);transform:translate(-50%,.75rem);animation-duration:.2s;animation-fill-mode:forwards;animation-name:tooltip-slide;opacity:0}[data-tooltip]:focus::after,[data-tooltip]:hover::after{--pico-tooltip-caret-slide-to:translate(-50%, 0rem);transform:translate(-50%,-.25rem);animation-name:tooltip-caret-slide}[data-tooltip][data-placement=bottom]:focus::after,[data-tooltip][data-placement=bottom]:focus::before,[data-tooltip][data-placement=bottom]:hover::after,[data-tooltip][data-placement=bottom]:hover::before{--pico-tooltip-slide-to:translate(-50%, 0.25rem);transform:translate(-50%,-.75rem);animation-name:tooltip-slide}[data-tooltip][data-placement=bottom]:focus::after,[data-tooltip][data-placement=bottom]:hover::after{--pico-tooltip-caret-slide-to:translate(-50%, -0.3rem);transform:translate(-50%,-.5rem);animation-name:tooltip-caret-slide}[data-tooltip][data-placement=left]:focus::after,[data-tooltip][data-placement=left]:focus::before,[data-tooltip][data-placement=left]:hover::after,[data-tooltip][data-placement=left]:hover::before{--pico-tooltip-slide-to:translate(-0.25rem, -50%);transform:translate(.75rem,-50%);animation-name:tooltip-slide}[data-tooltip][data-placement=left]:focus::after,[data-tooltip][data-placement=left]:hover::after{--pico-tooltip-caret-slide-to:translate(0.3rem, -50%);transform:translate(.05rem,-50%);animation-name:tooltip-caret-slide}[data-tooltip][data-placement=right]:focus::after,[data-tooltip][data-placement=right]:focus::before,[data-tooltip][data-placement=right]:hover::after,[data-tooltip][data-placement=right]:hover::before{--pico-tooltip-slide-to:translate(0.25rem, -50%);transform:translate(-.75rem,-50%);animation-name:tooltip-slide}[data-tooltip][data-placement=right]:focus::after,[data-tooltip][data-placement=right]:hover::after{--pico-tooltip-caret-slide-to:translate(-0.3rem, -50%);transform:translate(-.05rem,-50%);animation-name:tooltip-caret-slide}}@keyframes tooltip-slide{to{transform:var(--pico-tooltip-slide-to);opacity:1}}@keyframes tooltip-caret-slide{50%{opacity:0}to{transform:var(--pico-tooltip-caret-slide-to);opacity:1}}[aria-controls]{cursor:pointer}[aria-disabled=true],[disabled]{cursor:not-allowed}[aria-hidden=false][hidden]{display:initial}[aria-hidden=false][hidden]:not(:focus){clip:rect(0,0,0,0);position:absolute}[tabindex],a,area,button,input,label,select,summary,textarea{-ms-touch-action:manipulation}[dir=rtl]{direction:rtl}@media (prefers-reduced-motion:reduce){:not([aria-busy=true]),:not([aria-busy=true])::after,:not([aria-busy=true])::before{background-attachment:initial!important;animation-duration:1ms!important;animation-delay:-1ms!important;animation-iteration-count:1!important;scroll-behavior:auto!important;transition-delay:0s!important;transition-duration:0s!important}}pyglossary-5.0.9/pyglossary/ui/ui_web/ui_controller.py000066400000000000000000000105411476751035500233140ustar00rootroot00000000000000from __future__ import annotations import json import logging import webbrowser from pathlib import Path from typing import Any from pyglossary.glossary_v2 import ConvertArgs, Glossary from pyglossary.ui.base import UIBase from pyglossary.ui.ui_web.websocket_main import create_server log = logging.getLogger("pyglossary.web") HOST = "127.0.0.1" PORT = 1984 class WebUI(UIBase): def __init__(self, progressbar: bool = True) -> None: UIBase.__init__(self) self._toPause = False self._resetLogFormatter = None self._progressbar = progressbar self.server = None def progressInit(self, title: str) -> None: self.server.send_message_to_all( json.dumps({"type": "progress", "text": title or "", "ratio": 0}) ) def progress(self, ratio: float, text: str = "") -> None: if not text: text = f"{int(ratio * 100)!s}%" self.server.send_message_to_all( json.dumps({"type": "progress", "text": text, "ratio": ratio}) ) def run( # noqa: PLR0912, PLR0913 self, inputFilename: str, outputFilename: str, inputFormat: str, outputFormat: str, reverse: bool = False, config: dict[str, Any] | None = None, readOptions: dict[str, Any] | None = None, writeOptions: dict[str, Any] | None = None, convertOptions: dict[str, Any] | None = None, glossarySetAttrs: dict[str, Any] | None = None, ) -> bool: if reverse: raise ValueError("reverse is not supported") self.inputFilename = inputFilename self.outputFilename = outputFilename self.inputFormat = inputFormat self.outputFormat = outputFormat self.config = config or {} self.readOptions = readOptions or {} self.writeOptions = writeOptions or {} self.convertOptions = convertOptions or {} self.glossarySetAttrs = glossarySetAttrs or {} try: self.server = create_server(host=HOST, port=PORT) self.server.ui_controller = self url = self.server.url log.info(url) webbrowser.open(url) self.server.run_forever() except OSError as e: if "Address already in use" in str(e): print(f"Server already running:\n{e!s}\n Use Menu -> Exit to stop") webbrowser.open(f"http://{HOST}:{PORT}/") return False raise e from None return True def getPayloadStr(self, payload: dict[str, Any], name: str) -> str: value = payload.get(name) if value is None: return "" if not isinstance(value, str): raise ValueError(f"{name} must be string") return value def getPayloadDict(self, payload: dict[str, Any], name: str) -> dict: value = payload.get(name) if value is None: return {} if not isinstance(value, dict): raise ValueError(f"{name} must be a dict") return {} def start_convert_job(self, payload: dict[str, Any]) -> bool: glos = Glossary(ui=self) inputFilename = ( self.getPayloadStr(payload, "inputFilename") or self.inputFilename ) if not inputFilename: raise ValueError("inputFilename is missing") inputFormat = self.getPayloadStr(payload, "inputFormat") or self.inputFormat if not inputFormat: raise ValueError("inputFormat is missing") outputFilename = ( self.getPayloadStr(payload, "outputFilename") or self.outputFilename ) if not outputFilename: raise ValueError("outputFilename is missing") outputFormat = self.getPayloadStr(payload, "outputFormat") or self.outputFormat if not outputFormat: raise ValueError("outputFormat is missing") readOptions = self.getPayloadDict(payload, "readOptions") or self.readOptions writeOptions = self.getPayloadDict(payload, "writeOptions") or self.writeOptions convertOptions = ( self.getPayloadDict(payload, "convertOptions") or self.convertOptions ) log.debug(f"readOptions: {self.readOptions}") log.debug(f"writeOptions: {self.writeOptions}") log.debug(f"convertOptions: {self.convertOptions}") log.debug(f"config: {self.config}") glos.config = self.config for attr, value in self.glossarySetAttrs.items(): setattr(glos, attr, value) try: finalOutputFile = glos.convert( ConvertArgs( # allow ~ in paths inputFilename=str(Path(inputFilename).expanduser().resolve()), inputFormat=inputFormat, outputFilename=str(Path(outputFilename).expanduser().resolve()), outputFormat=outputFormat, readOptions=readOptions, writeOptions=writeOptions, **convertOptions, ), ) except Exception as e: log.critical(str(e)) glos.cleanup() return False log.info("Convert finished") return bool(finalOutputFile) pyglossary-5.0.9/pyglossary/ui/ui_web/weblog.py000066400000000000000000000040071476751035500217130ustar00rootroot00000000000000# Based on: https://github.com/Pithikos/python-websocket-server # Copyright (c) 2024 Saeed Rasooli # Copyright (c) 2024 https://github.com/glowinthedark (https://legbehindneck.com) # Copyright (c) 2018 Johan Hanssen Seferidis # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import annotations import logging import traceback from typing import TYPE_CHECKING, Protocol if TYPE_CHECKING: class ServerType(Protocol): def send_message_to_all(self, msg: str | dict) -> None: ... class WebLogHandler(logging.Handler): def __init__(self, server: ServerType) -> None: logging.Handler.__init__(self) self.srv = server def emit(self, record: logging.LogRecord) -> None: msg = "" if record.getMessage(): msg = self.format(record) msg = msg.replace("\x00", "") if record.exc_info: type_, value, tback = record.exc_info tback_text = "".join( traceback.format_exception(type_, value, tback), ) if msg: msg += "\n" msg += tback_text self.srv.send_message_to_all({"type": "info", "text": msg}) pyglossary-5.0.9/pyglossary/ui/ui_web/websocket_handler.py000066400000000000000000000332111476751035500241160ustar00rootroot00000000000000# Based on: https://github.com/Pithikos/python-websocket-server # Copyright (c) 2024 Saeed Rasooli # Copyright (c) 2024 https://github.com/glowinthedark (https://legbehindneck.com) # Copyright (c) 2018 Johan Hanssen Seferidis # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import annotations import errno import json import logging import os import posixpath import struct import threading from base64 import b64encode from hashlib import sha1 from http import HTTPStatus from http.server import SimpleHTTPRequestHandler from pathlib import Path from typing import TYPE_CHECKING from urllib.parse import unquote from pyglossary.glossary_v2 import Glossary from pyglossary.ui.ui_web.websocket_server import ( CLOSE_STATUS_NORMAL, DEFAULT_CLOSE_REASON, FIN, MASKED, OPCODE, OPCODE_BINARY, OPCODE_CLOSE_CONN, OPCODE_CONTINUATION, OPCODE_PING, OPCODE_PONG, OPCODE_TEXT, PAYLOAD_LEN, PAYLOAD_LEN_EXT16, PAYLOAD_LEN_EXT64, ) if TYPE_CHECKING: import socket as socketlib import socketserver from typing import Any log = logging.getLogger("pyglossary.web.server") class HTTPWebSocketHandler(SimpleHTTPRequestHandler): browse_roots = [] @classmethod def add_browse_root(cls, path: str) -> None: """Additional browse roots for css/js/etc resources.""" cls.browse_roots.append(path) def __init__( self, socket: socketlib.socket, addr: tuple[str, int], # (ip: str, port: int) server: socketserver.BaseServer, *args, # noqa: ANN001, ANN002 **kwargs, ) -> None: if hasattr(self, "_send_lock"): raise RuntimeError("_send_lock already exists") self._send_lock = threading.Lock() self.server = server webroot = str(Path(__file__).parent) self.browse_roots.append(webroot) super().__init__( socket, addr, server, *args, **kwargs, directory=webroot, ) def translate_path(self, path: str) -> str: """ Overlay of https://github.com/python/cpython/blob/47c5a0f307cff3ed477528536e8de095c0752efa/Lib/http/server.py#L841 patched to support multiple browse roots Translate a /-separated PATH to the local filename syntax. Components that mean special things to the local file system (e.g. drive or directory names) are ignored. (XXX They should probably be diagnosed.) """ # abandon query parameters if self.command == "GET": path = path.split("?", 1)[0] path = path.split("#", 1)[0] # Handle explicit trailing slash when normalizing trailing_slash = path.rstrip().endswith("/") try: path = unquote(path, errors="surrogatepass") except UnicodeDecodeError: path = unquote(path) path = posixpath.normpath(path) # normpath already replaces // (or /// etc) with / pathParts = path.split("/") # Iterate through each browsing root to find a matching path for root in self.browse_roots: rootPath = os.path.join(root, *pathParts) # Normalize path and check if the file exists if os.path.exists(rootPath): if trailing_slash and os.path.isdir(rootPath): rootPath += "/" return rootPath # If no valid path found in any root, send 404 self.send_error(HTTPStatus.NOT_FOUND, "Not found") return "" # fallback to super for other methods return super().translate_path(path) def do_GET(self) -> None: if self.path == "/config": self.send_config() else: super().do_GET() def send_config(self) -> None: self.send_response(HTTPStatus.OK) self.send_header("Content-Type", "application/json") self.end_headers() READ = 1 # 01 WRITE = 2 # 10 conversion_config = { name: { "desc": plug.description, "can": (READ * plug.canRead) | (WRITE * plug.canWrite), "ext": plug.ext, } for name, plug in Glossary.plugins.items() } self.wfile.write(json.dumps(conversion_config).encode()) def do_POST(self) -> None: # custom ajax action for /convert POST if self.path == "/convert": self.handle_convert_job() return self.send_response(HTTPStatus.BAD_REQUEST) self.send_header("Content-Type", "application/json") self.end_headers() json.dump( { "value": f"{self.path}: POST unsupported", }, self.wfile, ) def setup(self) -> None: SimpleHTTPRequestHandler.setup(self) self.keep_alive = True self.handshake_done = False self.valid_client = False def set_keep_alive(self, keep_alive: bool) -> None: self.keep_alive = keep_alive def handle(self) -> None: self.close_connection = True try: self.handle_one_request() while not self.close_connection: self.handle_one_request() except Exception as e: self.log_error(str(e)) def handle_ws(self) -> None: while self.keep_alive: if not self.handshake_done: self.handshake() elif self.valid_client: self.read_next_message() def handle_convert_job(self) -> None: try: payload: dict[str, Any] = json.loads( self.rfile.read(int(self.headers.get("Content-Length", 0))) ) except json.JSONDecodeError: self.json_decode_error() return except Exception as e: self.internal_exception(e) return log.debug(f"Handle convert request from {self.client_address[0]}") log.debug(f"POST PAYLOAD {payload}") try: self.server.ui_controller.start_convert_job(payload) except ValueError as e: self.validation_exception(e) return self.send_response(HTTPStatus.OK) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(b"POST successful") def validation_exception(self, e: Exception) -> None: self.send_response(HTTPStatus.BAD_REQUEST) self.send_header("Content-type", "application/json") self.end_headers() json.dump({"error": str(e)}, self.wfile) def json_decode_error(self) -> None: self.send_response(HTTPStatus.BAD_REQUEST) self.send_header("Content-type", "application/json") self.end_headers() self.wfile.write(b"Invalid JSON data.") def internal_exception(self, e: Exception) -> None: log.error(e) self.send_response(HTTPStatus.INTERNAL_SERVER_ERROR) # Internal Server Error self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(f"Error: {e!s}".encode()) def _handle_one_request(self) -> None: self.raw_requestline = self.rfile.readline(65537) if len(self.raw_requestline) > 65536: self.requestline = "" self.request_version = "" self.command = "" self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) return if not self.raw_requestline: self.close_connection = True return if not self.parse_request(): # An error code has been sent, just exit return if self.path.startswith("/ws") and self.headers.get("upgrade") == "websocket": self.handle_ws() return mname = "do_" + self.command if not hasattr(self, mname): self.send_error( HTTPStatus.NOT_IMPLEMENTED, f"Unsupported method ({self.command})", ) return method = getattr(self, mname) method() self.wfile.flush() # actually send the response if not already done. def handle_one_request(self) -> None: """ Handle a single HTTP/WS request. Override ootb method to delegate to WebSockets handler based on /ws path and presence of custom header: "upgrade: websocket". """ try: self._handle_one_request() except TimeoutError as e: # a read or a write timed out. Discard this connection self.log_error("Request timed out: %r", e) self.close_connection = True def read_bytes(self, num: int) -> bytes: return self.rfile.read(num) def read_next_message(self) -> None: try: b1, b2 = self.read_bytes(2) except OSError as e: # to be replaced with ConnectionResetError for py3 if e.errno == errno.ECONNRESET: log.info("Client closed connection.") self.keep_alive = 0 return b1, b2 = 0, 0 except ValueError: b1, b2 = 0, 0 opcode = b1 & OPCODE masked = b2 & MASKED payload_length = b2 & PAYLOAD_LEN if opcode == OPCODE_CLOSE_CONN: log.info("Client asked to close connection.") self.keep_alive = 0 return if not masked: log.warning("Client must always be masked.") self.keep_alive = 0 return if opcode == OPCODE_CONTINUATION: log.warning("Continuation frames are not supported.") return if opcode == OPCODE_BINARY: log.warning("Binary frames are not supported.") return if opcode == OPCODE_TEXT: opcode_handler = self.server.message_received_handler elif opcode == OPCODE_PING: opcode_handler = self.server.ping_received_handler elif opcode == OPCODE_PONG: opcode_handler = self.server.pong_received_handler else: log.warning(f"Unknown opcode {opcode:#x}.") self.keep_alive = 0 return if payload_length == 126: payload_length = struct.unpack(">H", self.rfile.read(2))[0] elif payload_length == 127: payload_length = struct.unpack(">Q", self.rfile.read(8))[0] masks = self.read_bytes(4) message_bytes = bytearray() for message_byte in self.read_bytes(payload_length): message_byte ^= masks[len(message_bytes) % 4] # noqa: PLW2901 message_bytes.append(message_byte) opcode_handler(self, message_bytes.decode("utf8")) def send_message(self, message: str | bytes) -> None: self.send_text(message) def send_pong(self, message: str | bytes) -> None: self.send_text(message, OPCODE_PONG) def send_close( self, status: int = CLOSE_STATUS_NORMAL, reason: bytes = DEFAULT_CLOSE_REASON, ) -> None: """ Send CLOSE to client. Args: status: Status as defined in https://datatracker.ietf.org/doc/html/rfc6455#section-7.4.1 reason: Text with reason of closing the connection """ if status < CLOSE_STATUS_NORMAL or status > 1015: raise Exception(f"CLOSE status must be between 1000 and 1015, got {status}") header = bytearray() payload = struct.pack("!H", status) + reason payload_length = len(payload) assert payload_length <= 125, ( "We only support short closing reasons at the moment" ) # Send CLOSE with status & reason header.append(FIN | OPCODE_CLOSE_CONN) header.append(payload_length) with self._send_lock: try: self.request.send(header + payload) except Exception as e: self.log_error(f"ws: CLOSE not sent - client disconnected! {e!s}") def send_text(self, message: str | bytes, opcode: int = OPCODE_TEXT) -> bool | None: """ Important: Fragmented(=continuation) messages are not supported since their usage cases are limited - when we don't know the payload length. """ # Validate message if isinstance(message, bytes): # this is slower but ensures we have UTF-8 message = try_decode_UTF8(message) if not message: log.warning("Can't send message, message is not valid UTF-8") return False elif not isinstance(message, str): log.warning( "Can't send message, message has to be a string or bytes. " f"Got {type(message)}" ) return False header = bytearray() payload = encode_to_UTF8(message) payload_length = len(payload) # Normal payload if payload_length <= 125: header.append(FIN | opcode) header.append(payload_length) # Extended payload elif payload_length >= 126 and payload_length <= 65535: header.append(FIN | opcode) header.append(PAYLOAD_LEN_EXT16) header.extend(struct.pack(">H", payload_length)) # Huge extended payload elif payload_length < 18446744073709551616: header.append(FIN | opcode) header.append(PAYLOAD_LEN_EXT64) header.extend(struct.pack(">Q", payload_length)) else: raise Exception("Message is too big. Consider breaking it into chunks.") with self._send_lock: self.request.send(header + payload) # type: ignore return None def handshake(self) -> None: try: key = self.headers.get("sec-websocket-key") except KeyError: log.warning("Client tried to connect but was missing a key") self.keep_alive = False return response = self.make_handshake_response(key) with self._send_lock: self.handshake_done = self.request.send(response.encode()) self.valid_client = True self.server.new_client_handler(self) @classmethod def make_handshake_response(cls, key: str) -> str: return ( "HTTP/1.1 101 Switching Protocols\r\n" "Upgrade: websocket\r\n" "Connection: Upgrade\r\n" f"Sec-WebSocket-Accept: {cls.calculate_response_key(key)}\r\n" "\r\n" ) @classmethod def calculate_response_key(cls, key: str) -> str: seed = sha1(key.encode() + b"258EAFA5-E914-47DA-95CA-C5AB0DC85B11") response_key = b64encode(seed.digest()).strip() return response_key.decode("ASCII") def finish(self) -> None: if not self.valid_client: return self.server.client_left_handler(self) self.connection.close() def encode_to_UTF8(data: str) -> bytes: try: return data.encode("UTF-8") except UnicodeEncodeError as e: log.error(f"Could not encode data to UTF-8 -- {e}") return b"" except Exception as e: raise e def try_decode_UTF8(data: bytes) -> str | None: try: return data.decode("utf-8") except UnicodeDecodeError: return None except Exception as e: raise e pyglossary-5.0.9/pyglossary/ui/ui_web/websocket_main.py000066400000000000000000000141001476751035500234210ustar00rootroot00000000000000# Based on: https://github.com/Pithikos/python-websocket-server # Copyright (c) 2024 Saeed Rasooli # Copyright (c) 2024 https://github.com/glowinthedark (https://legbehindneck.com) # Copyright (c) 2018 Johan Hanssen Seferidis # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import annotations import base64 import json import logging import os.path from pathlib import Path from typing import TYPE_CHECKING, Any, Protocol from pyglossary.glossary_v2 import Glossary from pyglossary.ui.ui_web.weblog import WebLogHandler from pyglossary.ui.ui_web.websocket_handler import HTTPWebSocketHandler from pyglossary.ui.ui_web.websocket_server import HttpWebsocketServer if TYPE_CHECKING: from pyglossary.glossary_types import EntryType class ServerType(Protocol): def send_message_to_all(self, msg: str | dict) -> None: ... def shutdown(self) -> None: ... MAX_IMAGE_SIZE = 512000 DEFAULT_MAX_BROWSE_ENTRIES = 42 log = logging.getLogger("pyglossary.web.server") log.setLevel(logging.DEBUG) """ Custom endpoints: - ws://localhost:1984/ws : 2-way client-server communication - GET /config : Returns plugins metadata as JSON - POST /convert : Starts a conversion job; takes JSON with paths + formats """ # ======================= IMPLEMENTATION SECTION ========================= def new_client(client: dict[str, Any], server: ServerType) -> None: client_id = client.get("id", "n/a") print(f"New client connected and was given id {client_id}") server.send_message_to_all( {"type": "info", "text": f"ws: client id 🔗: {client_id}"} ) # Called on client disconnecting def client_left(client: dict[str, Any], server: ServerType) -> None: log.info(f"{server}: Client({(client and client.get('id')) or -1}) disconnected") # Callback invoked when client sends a message def message_received(client: dict[str, Any], server: ServerType, message: str) -> None: if message == "ping": print(f"Client({client.get('id')}) said: {message}") server.send_message_to_all({"type": "info", "text": "ws: pong ✔️"}) elif "browse" in message: try: handle_browse_request(client, server, message) except Exception as e: log.error(f"{e!s} handling client message {client}") elif message == "exit": try: server.send_message_to_all( {"type": "info", "text": "\n\nws: shutdown request received ✔️"} ) server.shutdown() except Exception as e: log.warning(str(e)) def browse_check_entry(entry: EntryType, wordQuery: str) -> str | None: # get first max entries if no word or filter until max results if wordQuery and not entry.s_word.lower().startswith(wordQuery.lower()): return None html_entry = None if entry.defiFormat in {"h", "m", "x"}: return f"""
          {entry.s_word}
          {entry.defi}
          """ html_entry = f"📎
          {entry.s_word} ({entry.size()})
          " if ( entry.isData() and entry.size() < MAX_IMAGE_SIZE and entry.s_word.lower().endswith((".jpg", "jpeg", ".png")) ): extension = Path(entry.s_word).suffix[1:] html_entry += f""" {entry.s_word} """ return html_entry def handle_browse_request( client: dict[str, Any], server: ServerType, message: str, ) -> None: log.debug(f"processing client #{client} message") params = json.loads(message) wordQuery = params.get("word") glossary_path = params.get("path") glossary_format = params.get("format") max_results = int(params.get("max", DEFAULT_MAX_BROWSE_ENTRIES)) if not glossary_path or not os.path.exists(glossary_path): log.error(f"invalid PATH: '{glossary_path}'") server.send_message_to_all( {"type": "browse", "error": f"invalid path: '{glossary_path}'"} ) return glos_path = Path(glossary_path).expanduser().resolve() # add parent folder as a browse root to allow resolution of # .css/.js/.jpg resources for .mdx files HTTPWebSocketHandler.add_browse_root(str(glos_path.parent)) glos = Glossary(ui=None) if not glos.directRead(glossary_path, formatName=glossary_format): server.send_message_to_all( { "type": "browse", "error": f"Error reading {glossary_path} with format {glossary_format}", } ) num_results = 0 for entry in glos: html_entry = browse_check_entry(entry, wordQuery) if not html_entry: continue num_results += 1 try: server.send_message_to_all( { "type": "browse", "data": html_entry, "num": num_results, "max": max_results, } ) except Exception as e: server.send_message_to_all( {"type": "browse", "error": f"exception: '{e!s}'"} ) finally: server.send_message_to_all( { "type": "browse", "data": f"
          Total: {num_results}", "num": num_results, "max": max_results, } ) if num_results >= max_results: break def create_server(host: str, port: int) -> HttpWebsocketServer: server = HttpWebsocketServer( HTTPWebSocketHandler, log, host=host, port=port, ) log.addHandler(WebLogHandler(server)) server.set_fn_new_client(new_client) server.set_fn_client_left(client_left) server.set_fn_message_received(message_received) return server pyglossary-5.0.9/pyglossary/ui/ui_web/websocket_server.py000066400000000000000000000232541476751035500240150ustar00rootroot00000000000000# Based on: https://github.com/Pithikos/python-websocket-server # Copyright (c) 2024 Saeed Rasooli # Copyright (c) 2024 https://github.com/glowinthedark (https://legbehindneck.com) # Copyright (c) 2018 Johan Hanssen Seferidis # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import annotations import json import sys import threading from http.server import HTTPServer from socketserver import ThreadingMixIn from typing import TYPE_CHECKING, Any, Protocol if TYPE_CHECKING: import logging from collections.abc import Callable class ServerType(Protocol): def send_message_to_all(self, msg: str | dict) -> None: ... def shutdown(self) -> None: ... class HandlerType(Protocol): def send_pong(self, message: str | bytes) -> None: ... def send_close( self, status: int, reason: bytes, ) -> None: ... def set_keep_alive(self, keep_alive: bool) -> None: ... def finish(self) -> None: ... """ +-+-+-+-+-------+-+-------------+-------------------------------+ 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-------+-+-------------+-------------------------------+ |F|R|R|R| opcode|M| Payload len | Extended payload length | |I|S|S|S| (4) |A| (7) | (16/64) | |N|V|V|V| |S| | (if payload len==126/127) | | |1|2|3| |K| | | +-+-+-+-+-------+-+-------------+ - - - - - - - - - - - - - - - + | Extended payload length continued, if payload len == 127 | + - - - - - - - - - - - - - - - +-------------------------------+ | Payload Data continued ... | +---------------------------------------------------------------+ """ FIN = 0x80 OPCODE = 0x0F MASKED = 0x80 PAYLOAD_LEN = 0x7F PAYLOAD_LEN_EXT16 = 0x7E PAYLOAD_LEN_EXT64 = 0x7F OPCODE_CONTINUATION = 0x0 OPCODE_TEXT = 0x1 OPCODE_BINARY = 0x2 OPCODE_CLOSE_CONN = 0x8 OPCODE_PING = 0x9 OPCODE_PONG = 0xA CLOSE_STATUS_NORMAL = 1000 DEFAULT_CLOSE_REASON = b"" class API: def run_forever(self, threaded: bool = False) -> None: raise NotImplementedError def new_client(self, client: dict[str, Any], server: ServerType) -> None: pass def client_left(self, client: dict[str, Any], server: ServerType) -> None: pass def message_received( self, client: dict[str, Any], server: ServerType, message: str, ) -> None: pass def set_fn_new_client( self, fn: Callable[[dict[str, Any], ServerType], None], ) -> None: self.new_client = fn def set_fn_client_left( self, fn: Callable[[dict[str, Any], ServerType], None], ) -> None: self.client_left = fn def set_fn_message_received(self, fn) -> None: # noqa: ANN001 self.message_received = fn def send_message(self, client: dict[str, Any], msg: str | bytes) -> None: self._unicast(client, msg) def send_message_to_all(self, msg: str | dict) -> None: if isinstance(msg, str): self._multicast(msg) else: self._multicast(json.dumps(msg)) def deny_new_connections( self, status: int = CLOSE_STATUS_NORMAL, reason: int = DEFAULT_CLOSE_REASON, ) -> None: self._deny_new_connections(status, reason) def allow_new_connections(self) -> None: self._allow_new_connections() def shutdown_gracefully( self, status: int = CLOSE_STATUS_NORMAL, reason: int = DEFAULT_CLOSE_REASON, ) -> None: self._shutdown_gracefully(status, reason) def shutdown_abruptly(self) -> None: self._shutdown_abruptly() def disconnect_clients_gracefully( self, status: int = CLOSE_STATUS_NORMAL, reason: int = DEFAULT_CLOSE_REASON, ) -> None: self._disconnect_clients_gracefully(status, reason) def disconnect_clients_abruptly(self) -> None: self._disconnect_clients_abruptly() class HttpWebsocketServer(ThreadingMixIn, HTTPServer, API): """ A websocket server waiting for clients to connect. Args: port(int): Port to bind to host(str): Hostname or IP to listen for connections. By default 127.0.0.1 is being used. To accept connections from any client, you should use 0.0.0.0. Properties: clients(list): A list of connected clients. A client is a dictionary like below. { 'id' : id, 'handler' : handler, 'address' : (addr, port) } """ allow_reuse_address = True daemon_threads = True # comment to keep threads alive until finished def __init__( self, handlerClass: type, logger: logging.Logger, host: str = "127.0.0.1", port: int = 0, ) -> None: # server's own logger HTTPServer.__init__(self, (host, port), handlerClass) self.host = host self.port = self.socket.getsockname()[1] self.clients = [] self.id_counter = 0 self.thread = None self.headers = None self.ui_controller = None self.logger = logger self._deny_clients = False @property def url(self) -> str: return f"http://{self.host}:{self.port}/" def info(self, *args, **kwargs) -> None: # noqa: ANN002 self.logger.info(*args, **kwargs) def error(self, *args, **kwargs) -> None: # noqa: ANN002 self.logger.error(*args, **kwargs) def exception(self, *args, **kwargs) -> None: # noqa: ANN002 self.logger.error(*args, **kwargs) def run_forever(self, threaded: bool = False) -> None: cls_name = self.__class__.__name__ try: self.info(f"Listening on http://{self.host}:{self.port}/") if threaded: self.daemon = True self.thread = threading.Thread( target=super().serve_forever, daemon=True, ) self.info(f"Starting {cls_name} on thread {self.thread.getName()}.") self.thread.start() else: self.thread = threading.current_thread() self.info(f"Starting {cls_name} on main thread.") super().serve_forever() except KeyboardInterrupt: self.server_close() self.info("Server terminated.") except Exception as e: self.exception(str(e), exc_info=True) sys.exit(1) def message_received_handler(self, handler: HandlerType, msg: str) -> None: self.message_received(self.handler_to_client(handler), self, msg) def ping_received_handler(self, handler: HandlerType, msg: str) -> None: handler.send_pong(msg) def pong_received_handler(self, handler: HandlerType, msg: str) -> None: pass def new_client_handler(self, handler: HandlerType) -> None: if self._deny_clients: status = self._deny_clients["status"] reason = self._deny_clients["reason"] handler.send_close(status, reason) self._terminate_client_handler(handler) return self.id_counter += 1 client = { "id": self.id_counter, "handler": handler, "address": handler.client_address, } self.clients.append(client) self.new_client(client, self) def client_left_handler(self, handler: HandlerType) -> None: client = self.handler_to_client(handler) if not client: self.logger.warning("client handler was not found") return self.client_left(client, self) if client in self.clients: self.clients.remove(client) def _unicast(self, receiver_client: dict[str, Any], msg: str | bytes) -> None: receiver_client["handler"].send_message(msg) def _multicast(self, msg: str | bytes) -> None: for client in self.clients: try: self._unicast(client, msg) except Exception as e: print(str(e)) def handler_to_client(self, handler: HandlerType) -> dict[str, Any] | None: for client in self.clients: if client["handler"] == handler: return client return None def _terminate_client_handler(self, handler: HandlerType) -> None: handler.set_keep_alive(False) handler.finish() def _terminate_client_handlers(self) -> None: """Ensures request handler for each client is terminated correctly.""" for client in self.clients: self._terminate_client_handler(client["handler"]) def _shutdown_gracefully( self, status: int = CLOSE_STATUS_NORMAL, reason: bytes = DEFAULT_CLOSE_REASON, ) -> None: """Send a CLOSE handshake to all connected clients before terminating server.""" self.keep_alive = False self._disconnect_clients_gracefully(status, reason) self.server_close() self.shutdown() def _shutdown_abruptly(self) -> None: """Terminate server without sending a CLOSE handshake.""" self.keep_alive = False self._disconnect_clients_abruptly() self.server_close() self.shutdown() def _disconnect_clients_gracefully( self, status: int = CLOSE_STATUS_NORMAL, reason: bytes = DEFAULT_CLOSE_REASON, ) -> None: """Terminate clients gracefully without shutting down the server.""" for client in self.clients: client["handler"].send_close(status, reason) self._terminate_client_handlers() def _disconnect_clients_abruptly(self) -> None: """ Terminate clients abruptly (no CLOSE handshake) without shutting down the server. """ self._terminate_client_handlers() def _deny_new_connections( self, status: int, reason: bytes, ) -> None: self._deny_clients = { "status": status, "reason": reason, } def _allow_new_connections(self) -> None: self._deny_clients = False pyglossary-5.0.9/pyglossary/ui/version.py000066400000000000000000000016451476751035500206540ustar00rootroot00000000000000from __future__ import annotations import sys from os.path import isdir, join from pyglossary import core __all__ = ["getVersion"] def getGitVersion(gitDir: str) -> str: import subprocess try: outputB, _err = subprocess.Popen( [ "git", "--git-dir", gitDir, "describe", "--always", ], stdout=subprocess.PIPE, ).communicate() except Exception as e: sys.stderr.write(str(e) + "\n") return "" # if _err is None: return outputB.decode("utf-8").strip() def getVersion() -> str: from pyglossary.core import rootDir gitDir = join(rootDir, ".git") if isdir(gitDir): version = getGitVersion(gitDir) if version: return version return core.VERSION def getPipSafeVersion() -> str: from pyglossary.core import rootDir gitDir = join(rootDir, ".git") if isdir(gitDir): version = getGitVersion(gitDir) if version: return "-".join(version.split("-")[:2]) return core.VERSION pyglossary-5.0.9/pyglossary/ui/wcwidth/000077500000000000000000000000001476751035500202605ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/ui/wcwidth/LICENSE000066400000000000000000000024521476751035500212700ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2014 Jeff Quast Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Markus Kuhn -- 2007-05-26 (Unicode 5.0) Permission to use, copy, modify, and distribute this software for any purpose and without fee is hereby granted. The author disclaims all warranties with regard to this software. pyglossary-5.0.9/pyglossary/ui/wcwidth/__init__.py000066400000000000000000000004111476751035500223650ustar00rootroot00000000000000'\nWcwidth module.\n\nhttps://github.com/jquast/wcwidth\n' from.wcwidth import ZERO_WIDTH,WIDE_EASTASIAN,VS16_NARROW_TO_WIDE,wcwidth,wcswidth,_bisearch,list_versions,_wcmatch_version,_wcversion_value __all__='wcwidth','wcswidth','list_versions' __version__='0.2.13'pyglossary-5.0.9/pyglossary/ui/wcwidth/table_vs16.py000066400000000000000000000033011476751035500225750ustar00rootroot00000000000000'\nExports VS16_NARROW_TO_WIDE table keyed by supporting unicode version level.\n\nThis code generated by wcwidth/bin/update-tables.py on 2023-11-07 16:43:49 UTC.\n' VS16_NARROW_TO_WIDE={'9.0.0':((35,35),(42,42),(48,57),(169,169),(174,174),(8252,8252),(8265,8265),(8482,8482),(8505,8505),(8596,8601),(8617,8618),(9000,9000),(9167,9167),(9197,9199),(9201,9202),(9208,9210),(9410,9410),(9642,9643),(9654,9654),(9664,9664),(9723,9724),(9728,9732),(9742,9742),(9745,9745),(9752,9752),(9757,9757),(9760,9760),(9762,9763),(9766,9766),(9770,9770),(9774,9775),(9784,9786),(9792,9792),(9794,9794),(9823,9824),(9827,9827),(9829,9830),(9832,9832),(9851,9851),(9854,9854),(9874,9874),(9876,9879),(9881,9881),(9883,9884),(9888,9888),(9895,9895),(9904,9905),(9928,9928),(9935,9935),(9937,9937),(9939,9939),(9961,9961),(9968,9969),(9972,9972),(9975,9977),(9986,9986),(9992,9993),(9996,9997),(9999,9999),(10002,10002),(10004,10004),(10006,10006),(10013,10013),(10017,10017),(10035,10036),(10052,10052),(10055,10055),(10083,10084),(10145,10145),(10548,10549),(11013,11015),(127344,127345),(127358,127359),(127777,127777),(127780,127788),(127798,127798),(127869,127869),(127894,127895),(127897,127899),(127902,127903),(127947,127950),(127956,127967),(127987,127987),(127989,127989),(127991,127991),(128063,128063),(128065,128065),(128253,128253),(128329,128330),(128367,128368),(128371,128377),(128391,128391),(128394,128397),(128400,128400),(128421,128421),(128424,128424),(128433,128434),(128444,128444),(128450,128452),(128465,128467),(128476,128478),(128481,128481),(128483,128483),(128488,128488),(128495,128495),(128499,128499),(128506,128506),(128715,128715),(128717,128719),(128736,128741),(128745,128745),(128752,128752),(128755,128755))}pyglossary-5.0.9/pyglossary/ui/wcwidth/table_wide.py000066400000000000000000000477201476751035500227430ustar00rootroot00000000000000'\nExports WIDE_EASTASIAN table keyed by supporting unicode version level.\n\nThis code generated by wcwidth/bin/update-tables.py on 2024-01-06 01:39:49 UTC.\n' WIDE_EASTASIAN={'4.1.0':((4352,4441),(4447,4447),(9001,9002),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12588),(12593,12686),(12688,12727),(12736,12751),(12784,12830),(12832,12867),(12880,13054),(13056,19893),(19968,40891),(40960,42124),(42128,42182),(44032,55203),(63744,64045),(64048,64106),(64112,64217),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(131072,196605),(196608,262141)),'5.0.0':((4352,4441),(4447,4447),(9001,9002),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12588),(12593,12686),(12688,12727),(12736,12751),(12784,12830),(12832,12867),(12880,13054),(13056,19893),(19968,40891),(40960,42124),(42128,42182),(44032,55203),(63744,64045),(64048,64106),(64112,64217),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(131072,196605),(196608,262141)),'5.1.0':((4352,4441),(4447,4447),(9001,9002),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12589),(12593,12686),(12688,12727),(12736,12771),(12784,12830),(12832,12867),(12880,13054),(13056,19893),(19968,40899),(40960,42124),(42128,42182),(44032,55203),(63744,64045),(64048,64106),(64112,64217),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(131072,196605),(196608,262141)),'5.2.0':((4352,4447),(9001,9002),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12589),(12593,12686),(12688,12727),(12736,12771),(12784,12830),(12832,12871),(12880,13054),(13056,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(127488,127488),(127504,127537),(127552,127560),(131072,196605),(196608,262141)),'6.0.0':((4352,4447),(9001,9002),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12589),(12593,12686),(12688,12730),(12736,12771),(12784,12830),(12832,12871),(12880,13054),(13056,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(110592,110593),(127488,127490),(127504,127546),(127552,127560),(127568,127569),(131072,196605),(196608,262141)),'6.1.0':((4352,4447),(9001,9002),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12589),(12593,12686),(12688,12730),(12736,12771),(12784,12830),(12832,12871),(12880,13054),(13056,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(110592,110593),(127488,127490),(127504,127546),(127552,127560),(127568,127569),(131072,196605),(196608,262141)),'6.2.0':((4352,4447),(9001,9002),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12589),(12593,12686),(12688,12730),(12736,12771),(12784,12830),(12832,12871),(12880,13054),(13056,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(110592,110593),(127488,127490),(127504,127546),(127552,127560),(127568,127569),(131072,196605),(196608,262141)),'6.3.0':((4352,4447),(9001,9002),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12589),(12593,12686),(12688,12730),(12736,12771),(12784,12830),(12832,12871),(12880,13054),(13056,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(110592,110593),(127488,127490),(127504,127546),(127552,127560),(127568,127569),(131072,196605),(196608,262141)),'7.0.0':((4352,4447),(9001,9002),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12589),(12593,12686),(12688,12730),(12736,12771),(12784,12830),(12832,12871),(12880,13054),(13056,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(110592,110593),(127488,127490),(127504,127546),(127552,127560),(127568,127569),(131072,196605),(196608,262141)),'8.0.0':((4352,4447),(9001,9002),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12589),(12593,12686),(12688,12730),(12736,12771),(12784,12830),(12832,12871),(12880,13054),(13056,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(110592,110593),(127488,127490),(127504,127546),(127552,127560),(127568,127569),(131072,196605),(196608,262141)),'9.0.0':((4352,4447),(8986,8987),(9001,9002),(9193,9196),(9200,9200),(9203,9203),(9725,9726),(9748,9749),(9800,9811),(9855,9855),(9875,9875),(9889,9889),(9898,9899),(9917,9918),(9924,9925),(9934,9934),(9940,9940),(9962,9962),(9970,9971),(9973,9973),(9978,9978),(9981,9981),(9989,9989),(9994,9995),(10024,10024),(10060,10060),(10062,10062),(10067,10069),(10071,10071),(10133,10135),(10160,10160),(10175,10175),(11035,11036),(11088,11088),(11093,11093),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12589),(12593,12686),(12688,12730),(12736,12771),(12784,12830),(12832,12871),(12880,13054),(13056,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(94176,94176),(94208,100332),(100352,101106),(110592,110593),(126980,126980),(127183,127183),(127374,127374),(127377,127386),(127488,127490),(127504,127547),(127552,127560),(127568,127569),(127744,127776),(127789,127797),(127799,127868),(127870,127891),(127904,127946),(127951,127955),(127968,127984),(127988,127988),(127992,127994),(128000,128062),(128064,128064),(128066,128252),(128255,128317),(128331,128334),(128336,128359),(128378,128378),(128405,128406),(128420,128420),(128507,128591),(128640,128709),(128716,128716),(128720,128722),(128747,128748),(128756,128758),(129296,129310),(129312,129319),(129328,129328),(129331,129342),(129344,129355),(129360,129374),(129408,129425),(129472,129472),(131072,196605),(196608,262141)),'10.0.0':((4352,4447),(8986,8987),(9001,9002),(9193,9196),(9200,9200),(9203,9203),(9725,9726),(9748,9749),(9800,9811),(9855,9855),(9875,9875),(9889,9889),(9898,9899),(9917,9918),(9924,9925),(9934,9934),(9940,9940),(9962,9962),(9970,9971),(9973,9973),(9978,9978),(9981,9981),(9989,9989),(9994,9995),(10024,10024),(10060,10060),(10062,10062),(10067,10069),(10071,10071),(10133,10135),(10160,10160),(10175,10175),(11035,11036),(11088,11088),(11093,11093),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12590),(12593,12686),(12688,12730),(12736,12771),(12784,12830),(12832,12871),(12880,13054),(13056,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(94176,94177),(94208,100332),(100352,101106),(110592,110878),(110960,111355),(126980,126980),(127183,127183),(127374,127374),(127377,127386),(127488,127490),(127504,127547),(127552,127560),(127568,127569),(127584,127589),(127744,127776),(127789,127797),(127799,127868),(127870,127891),(127904,127946),(127951,127955),(127968,127984),(127988,127988),(127992,127994),(128000,128062),(128064,128064),(128066,128252),(128255,128317),(128331,128334),(128336,128359),(128378,128378),(128405,128406),(128420,128420),(128507,128591),(128640,128709),(128716,128716),(128720,128722),(128747,128748),(128756,128760),(129296,129342),(129344,129356),(129360,129387),(129408,129431),(129472,129472),(129488,129510),(131072,196605),(196608,262141)),'11.0.0':((4352,4447),(8986,8987),(9001,9002),(9193,9196),(9200,9200),(9203,9203),(9725,9726),(9748,9749),(9800,9811),(9855,9855),(9875,9875),(9889,9889),(9898,9899),(9917,9918),(9924,9925),(9934,9934),(9940,9940),(9962,9962),(9970,9971),(9973,9973),(9978,9978),(9981,9981),(9989,9989),(9994,9995),(10024,10024),(10060,10060),(10062,10062),(10067,10069),(10071,10071),(10133,10135),(10160,10160),(10175,10175),(11035,11036),(11088,11088),(11093,11093),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12591),(12593,12686),(12688,12730),(12736,12771),(12784,12830),(12832,12871),(12880,13054),(13056,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(94176,94177),(94208,100337),(100352,101106),(110592,110878),(110960,111355),(126980,126980),(127183,127183),(127374,127374),(127377,127386),(127488,127490),(127504,127547),(127552,127560),(127568,127569),(127584,127589),(127744,127776),(127789,127797),(127799,127868),(127870,127891),(127904,127946),(127951,127955),(127968,127984),(127988,127988),(127992,127994),(128000,128062),(128064,128064),(128066,128252),(128255,128317),(128331,128334),(128336,128359),(128378,128378),(128405,128406),(128420,128420),(128507,128591),(128640,128709),(128716,128716),(128720,128722),(128747,128748),(128756,128761),(129296,129342),(129344,129392),(129395,129398),(129402,129402),(129404,129442),(129456,129465),(129472,129474),(129488,129535),(131072,196605),(196608,262141)),'12.0.0':((4352,4447),(8986,8987),(9001,9002),(9193,9196),(9200,9200),(9203,9203),(9725,9726),(9748,9749),(9800,9811),(9855,9855),(9875,9875),(9889,9889),(9898,9899),(9917,9918),(9924,9925),(9934,9934),(9940,9940),(9962,9962),(9970,9971),(9973,9973),(9978,9978),(9981,9981),(9989,9989),(9994,9995),(10024,10024),(10060,10060),(10062,10062),(10067,10069),(10071,10071),(10133,10135),(10160,10160),(10175,10175),(11035,11036),(11088,11088),(11093,11093),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12591),(12593,12686),(12688,12730),(12736,12771),(12784,12830),(12832,12871),(12880,13054),(13056,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(94176,94179),(94208,100343),(100352,101106),(110592,110878),(110928,110930),(110948,110951),(110960,111355),(126980,126980),(127183,127183),(127374,127374),(127377,127386),(127488,127490),(127504,127547),(127552,127560),(127568,127569),(127584,127589),(127744,127776),(127789,127797),(127799,127868),(127870,127891),(127904,127946),(127951,127955),(127968,127984),(127988,127988),(127992,127994),(128000,128062),(128064,128064),(128066,128252),(128255,128317),(128331,128334),(128336,128359),(128378,128378),(128405,128406),(128420,128420),(128507,128591),(128640,128709),(128716,128716),(128720,128722),(128725,128725),(128747,128748),(128756,128762),(128992,129003),(129293,129393),(129395,129398),(129402,129442),(129445,129450),(129454,129482),(129485,129535),(129648,129651),(129656,129658),(129664,129666),(129680,129685),(131072,196605),(196608,262141)),'12.1.0':((4352,4447),(8986,8987),(9001,9002),(9193,9196),(9200,9200),(9203,9203),(9725,9726),(9748,9749),(9800,9811),(9855,9855),(9875,9875),(9889,9889),(9898,9899),(9917,9918),(9924,9925),(9934,9934),(9940,9940),(9962,9962),(9970,9971),(9973,9973),(9978,9978),(9981,9981),(9989,9989),(9994,9995),(10024,10024),(10060,10060),(10062,10062),(10067,10069),(10071,10071),(10133,10135),(10160,10160),(10175,10175),(11035,11036),(11088,11088),(11093,11093),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12591),(12593,12686),(12688,12730),(12736,12771),(12784,12830),(12832,12871),(12880,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(94176,94179),(94208,100343),(100352,101106),(110592,110878),(110928,110930),(110948,110951),(110960,111355),(126980,126980),(127183,127183),(127374,127374),(127377,127386),(127488,127490),(127504,127547),(127552,127560),(127568,127569),(127584,127589),(127744,127776),(127789,127797),(127799,127868),(127870,127891),(127904,127946),(127951,127955),(127968,127984),(127988,127988),(127992,127994),(128000,128062),(128064,128064),(128066,128252),(128255,128317),(128331,128334),(128336,128359),(128378,128378),(128405,128406),(128420,128420),(128507,128591),(128640,128709),(128716,128716),(128720,128722),(128725,128725),(128747,128748),(128756,128762),(128992,129003),(129293,129393),(129395,129398),(129402,129442),(129445,129450),(129454,129482),(129485,129535),(129648,129651),(129656,129658),(129664,129666),(129680,129685),(131072,196605),(196608,262141)),'13.0.0':((4352,4447),(8986,8987),(9001,9002),(9193,9196),(9200,9200),(9203,9203),(9725,9726),(9748,9749),(9800,9811),(9855,9855),(9875,9875),(9889,9889),(9898,9899),(9917,9918),(9924,9925),(9934,9934),(9940,9940),(9962,9962),(9970,9971),(9973,9973),(9978,9978),(9981,9981),(9989,9989),(9994,9995),(10024,10024),(10060,10060),(10062,10062),(10067,10069),(10071,10071),(10133,10135),(10160,10160),(10175,10175),(11035,11036),(11088,11088),(11093,11093),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12591),(12593,12686),(12688,12771),(12784,12830),(12832,12871),(12880,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(94176,94179),(94208,100343),(100352,101589),(101632,101640),(110592,110878),(110928,110930),(110948,110951),(110960,111355),(126980,126980),(127183,127183),(127374,127374),(127377,127386),(127488,127490),(127504,127547),(127552,127560),(127568,127569),(127584,127589),(127744,127776),(127789,127797),(127799,127868),(127870,127891),(127904,127946),(127951,127955),(127968,127984),(127988,127988),(127992,127994),(128000,128062),(128064,128064),(128066,128252),(128255,128317),(128331,128334),(128336,128359),(128378,128378),(128405,128406),(128420,128420),(128507,128591),(128640,128709),(128716,128716),(128720,128722),(128725,128727),(128747,128748),(128756,128764),(128992,129003),(129292,129338),(129340,129349),(129351,129400),(129402,129483),(129485,129535),(129648,129652),(129656,129658),(129664,129670),(129680,129704),(129712,129718),(129728,129730),(129744,129750),(131072,196605),(196608,262141)),'14.0.0':((4352,4447),(8986,8987),(9001,9002),(9193,9196),(9200,9200),(9203,9203),(9725,9726),(9748,9749),(9800,9811),(9855,9855),(9875,9875),(9889,9889),(9898,9899),(9917,9918),(9924,9925),(9934,9934),(9940,9940),(9962,9962),(9970,9971),(9973,9973),(9978,9978),(9981,9981),(9989,9989),(9994,9995),(10024,10024),(10060,10060),(10062,10062),(10067,10069),(10071,10071),(10133,10135),(10160,10160),(10175,10175),(11035,11036),(11088,11088),(11093,11093),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12591),(12593,12686),(12688,12771),(12784,12830),(12832,12871),(12880,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(94176,94179),(94208,100343),(100352,101589),(101632,101640),(110576,110579),(110581,110587),(110589,110590),(110592,110882),(110928,110930),(110948,110951),(110960,111355),(126980,126980),(127183,127183),(127374,127374),(127377,127386),(127488,127490),(127504,127547),(127552,127560),(127568,127569),(127584,127589),(127744,127776),(127789,127797),(127799,127868),(127870,127891),(127904,127946),(127951,127955),(127968,127984),(127988,127988),(127992,127994),(128000,128062),(128064,128064),(128066,128252),(128255,128317),(128331,128334),(128336,128359),(128378,128378),(128405,128406),(128420,128420),(128507,128591),(128640,128709),(128716,128716),(128720,128722),(128725,128727),(128733,128735),(128747,128748),(128756,128764),(128992,129003),(129008,129008),(129292,129338),(129340,129349),(129351,129535),(129648,129652),(129656,129660),(129664,129670),(129680,129708),(129712,129722),(129728,129733),(129744,129753),(129760,129767),(129776,129782),(131072,196605),(196608,262141)),'15.0.0':((4352,4447),(8986,8987),(9001,9002),(9193,9196),(9200,9200),(9203,9203),(9725,9726),(9748,9749),(9800,9811),(9855,9855),(9875,9875),(9889,9889),(9898,9899),(9917,9918),(9924,9925),(9934,9934),(9940,9940),(9962,9962),(9970,9971),(9973,9973),(9978,9978),(9981,9981),(9989,9989),(9994,9995),(10024,10024),(10060,10060),(10062,10062),(10067,10069),(10071,10071),(10133,10135),(10160,10160),(10175,10175),(11035,11036),(11088,11088),(11093,11093),(11904,11929),(11931,12019),(12032,12245),(12272,12283),(12288,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12591),(12593,12686),(12688,12771),(12784,12830),(12832,12871),(12880,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(94176,94179),(94208,100343),(100352,101589),(101632,101640),(110576,110579),(110581,110587),(110589,110590),(110592,110882),(110898,110898),(110928,110930),(110933,110933),(110948,110951),(110960,111355),(126980,126980),(127183,127183),(127374,127374),(127377,127386),(127488,127490),(127504,127547),(127552,127560),(127568,127569),(127584,127589),(127744,127776),(127789,127797),(127799,127868),(127870,127891),(127904,127946),(127951,127955),(127968,127984),(127988,127988),(127992,127994),(128000,128062),(128064,128064),(128066,128252),(128255,128317),(128331,128334),(128336,128359),(128378,128378),(128405,128406),(128420,128420),(128507,128591),(128640,128709),(128716,128716),(128720,128722),(128725,128727),(128732,128735),(128747,128748),(128756,128764),(128992,129003),(129008,129008),(129292,129338),(129340,129349),(129351,129535),(129648,129660),(129664,129672),(129680,129725),(129727,129733),(129742,129755),(129760,129768),(129776,129784),(131072,196605),(196608,262141)),'15.1.0':((4352,4447),(8986,8987),(9001,9002),(9193,9196),(9200,9200),(9203,9203),(9725,9726),(9748,9749),(9800,9811),(9855,9855),(9875,9875),(9889,9889),(9898,9899),(9917,9918),(9924,9925),(9934,9934),(9940,9940),(9962,9962),(9970,9971),(9973,9973),(9978,9978),(9981,9981),(9989,9989),(9994,9995),(10024,10024),(10060,10060),(10062,10062),(10067,10069),(10071,10071),(10133,10135),(10160,10160),(10175,10175),(11035,11036),(11088,11088),(11093,11093),(11904,11929),(11931,12019),(12032,12245),(12272,12329),(12336,12350),(12353,12438),(12443,12543),(12549,12591),(12593,12686),(12688,12771),(12783,12830),(12832,12871),(12880,19903),(19968,42124),(42128,42182),(43360,43388),(44032,55203),(63744,64255),(65040,65049),(65072,65106),(65108,65126),(65128,65131),(65281,65376),(65504,65510),(94176,94179),(94208,100343),(100352,101589),(101632,101640),(110576,110579),(110581,110587),(110589,110590),(110592,110882),(110898,110898),(110928,110930),(110933,110933),(110948,110951),(110960,111355),(126980,126980),(127183,127183),(127374,127374),(127377,127386),(127488,127490),(127504,127547),(127552,127560),(127568,127569),(127584,127589),(127744,127776),(127789,127797),(127799,127868),(127870,127891),(127904,127946),(127951,127955),(127968,127984),(127988,127988),(127992,127994),(128000,128062),(128064,128064),(128066,128252),(128255,128317),(128331,128334),(128336,128359),(128378,128378),(128405,128406),(128420,128420),(128507,128591),(128640,128709),(128716,128716),(128720,128722),(128725,128727),(128732,128735),(128747,128748),(128756,128764),(128992,129003),(129008,129008),(129292,129338),(129340,129349),(129351,129535),(129648,129660),(129664,129672),(129680,129725),(129727,129733),(129742,129755),(129760,129768),(129776,129784),(131072,196605),(196608,262141))}pyglossary-5.0.9/pyglossary/ui/wcwidth/table_zero.py000066400000000000000000001700511476751035500227640ustar00rootroot00000000000000'\nExports ZERO_WIDTH table keyed by supporting unicode version level.\n\nThis code generated by wcwidth/bin/update-tables.py on 2024-01-04 07:14:52 UTC.\n' ZERO_WIDTH={'4.1.0':((0,0),(173,173),(768,879),(1155,1158),(1160,1161),(1425,1465),(1467,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1539),(1552,1557),(1611,1630),(1648,1648),(1750,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2305,2307),(2364,2364),(2366,2381),(2385,2388),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2672,2673),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2817,2819),(2876,2876),(2878,2883),(2887,2888),(2891,2893),(2902,2903),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3073,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3202,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3330,3331),(3390,3395),(3398,3400),(3402,3405),(3415,3415),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3984,3991),(3993,4028),(4038,4038),(4140,4146),(4150,4153),(4182,4185),(4448,4607),(4959,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6157),(6313,6313),(6432,6443),(6448,6459),(6576,6592),(6600,6601),(6679,6683),(7616,7619),(8203,8207),(8232,8238),(8288,8291),(8298,8303),(8400,8427),(12330,12335),(12441,12442),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(55216,55295),(64286,64286),(65024,65039),(65056,65059),(65279,65279),(65529,65531),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(917505,917505),(917536,917631),(917760,917999)),'5.0.0':((0,0),(173,173),(768,879),(1155,1158),(1160,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1539),(1552,1557),(1611,1630),(1648,1648),(1750,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2305,2307),(2364,2364),(2366,2381),(2385,2388),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2672,2673),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2817,2819),(2876,2876),(2878,2883),(2887,2888),(2891,2893),(2902,2903),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3073,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3202,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3330,3331),(3390,3395),(3398,3400),(3402,3405),(3415,3415),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3984,3991),(3993,4028),(4038,4038),(4140,4146),(4150,4153),(4182,4185),(4448,4607),(4959,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6157),(6313,6313),(6432,6443),(6448,6459),(6576,6592),(6600,6601),(6679,6683),(6912,6916),(6964,6980),(7019,7027),(7616,7626),(7678,7679),(8203,8207),(8232,8238),(8288,8291),(8298,8303),(8400,8431),(12330,12335),(12441,12442),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(55216,55295),(64286,64286),(65024,65039),(65056,65059),(65279,65279),(65529,65531),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(917505,917505),(917536,917631),(917760,917999)),'5.1.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1539),(1552,1562),(1611,1630),(1648,1648),(1750,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2305,2307),(2364,2364),(2366,2381),(2385,2388),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3073,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3202,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3330,3331),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3984,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4448,4607),(4959,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6157),(6313,6313),(6432,6443),(6448,6459),(6576,6592),(6600,6601),(6679,6683),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7082),(7204,7223),(7616,7654),(7678,7679),(8203,8207),(8232,8238),(8288,8292),(8298,8303),(8400,8432),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42620,42621),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43204),(43302,43309),(43335,43347),(43561,43574),(43587,43587),(43596,43597),(55216,55295),(64286,64286),(65024,65039),(65056,65062),(65279,65279),(65529,65531),(66045,66045),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(917505,917505),(917536,917631),(917760,917999)),'5.2.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1539),(1552,1562),(1611,1630),(1648,1648),(1750,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2304,2307),(2364,2364),(2366,2382),(2385,2389),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3073,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3202,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3330,3331),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3984,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4959,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6157),(6313,6313),(6432,6443),(6448,6459),(6576,6592),(6600,6601),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7082),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7410,7410),(7616,7654),(7677,7679),(8203,8207),(8232,8238),(8288,8292),(8298,8303),(8400,8432),(11503,11505),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42620,42621),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43204),(43232,43249),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43561,43574),(43587,43587),(43596,43597),(43643,43643),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65062),(65279,65279),(65529,65531),(66045,66045),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(69760,69762),(69808,69818),(69821,69821),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(917505,917505),(917536,917631),(917760,917999)),'6.0.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1539),(1552,1562),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2304,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3073,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3202,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3330,3331),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6157),(6313,6313),(6432,6443),(6448,6459),(6576,6592),(6600,6601),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7082),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7410,7410),(7616,7654),(7676,7679),(8203,8207),(8232,8238),(8288,8292),(8298,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42620,42621),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43204),(43232,43249),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43561,43574),(43587,43587),(43596,43597),(43643,43643),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65062),(65279,65279),(65529,65531),(66045,66045),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(69632,69634),(69688,69702),(69760,69762),(69808,69818),(69821,69821),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(917505,917505),(917536,917631),(917760,917999)),'6.1.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1540),(1552,1562),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2276,2302),(2304,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3073,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3202,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3330,3331),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6157),(6313,6313),(6432,6443),(6448,6459),(6576,6592),(6600,6601),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7410,7412),(7616,7654),(7676,7679),(8203,8207),(8232,8238),(8288,8292),(8298,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42655,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43204),(43232,43249),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43561,43574),(43587,43587),(43596,43597),(43643,43643),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65062),(65279,65279),(65529,65531),(66045,66045),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(69632,69634),(69688,69702),(69760,69762),(69808,69818),(69821,69821),(69888,69890),(69927,69940),(70016,70018),(70067,70080),(71339,71351),(94033,94078),(94095,94098),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(917505,917505),(917536,917631),(917760,917999)),'6.2.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1540),(1552,1562),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2276,2302),(2304,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3073,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3202,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3330,3331),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6157),(6313,6313),(6432,6443),(6448,6459),(6576,6592),(6600,6601),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7410,7412),(7616,7654),(7676,7679),(8203,8207),(8232,8238),(8288,8292),(8298,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42655,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43204),(43232,43249),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43561,43574),(43587,43587),(43596,43597),(43643,43643),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65062),(65279,65279),(65529,65531),(66045,66045),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(69632,69634),(69688,69702),(69760,69762),(69808,69818),(69821,69821),(69888,69890),(69927,69940),(70016,70018),(70067,70080),(71339,71351),(94033,94078),(94095,94098),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(917505,917505),(917536,917631),(917760,917999)),'6.3.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1540),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2276,2302),(2304,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3073,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3202,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3330,3331),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6158),(6313,6313),(6432,6443),(6448,6459),(6576,6592),(6600,6601),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7410,7412),(7616,7654),(7676,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42655,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43204),(43232,43249),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43561,43574),(43587,43587),(43596,43597),(43643,43643),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65062),(65279,65279),(65529,65531),(66045,66045),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(69632,69634),(69688,69702),(69760,69762),(69808,69818),(69821,69821),(69888,69890),(69927,69940),(70016,70018),(70067,70080),(71339,71351),(94033,94078),(94095,94098),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(917505,917505),(917536,917631),(917760,917999)),'7.0.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1541),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2276,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3072,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3201,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3329,3331),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6158),(6313,6313),(6432,6443),(6448,6459),(6576,6592),(6600,6601),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6832,6846),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7410,7412),(7416,7417),(7616,7669),(7676,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42655,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43204),(43232,43249),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43493,43493),(43561,43574),(43587,43587),(43596,43597),(43643,43645),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65069),(65279,65279),(65529,65531),(66045,66045),(66272,66272),(66422,66426),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(68325,68326),(69632,69634),(69688,69702),(69759,69762),(69808,69818),(69821,69821),(69888,69890),(69927,69940),(70003,70003),(70016,70018),(70067,70080),(70188,70199),(70367,70378),(70401,70403),(70460,70460),(70462,70468),(70471,70472),(70475,70477),(70487,70487),(70498,70499),(70502,70508),(70512,70516),(70832,70851),(71087,71093),(71096,71104),(71216,71232),(71339,71351),(92912,92916),(92976,92982),(94033,94078),(94095,94098),(113821,113822),(113824,113827),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(125136,125142),(917505,917505),(917536,917631),(917760,917999)),'8.0.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1541),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2275,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3072,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3201,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3329,3331),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6158),(6313,6313),(6432,6443),(6448,6459),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6832,6846),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7410,7412),(7416,7417),(7616,7669),(7676,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42654,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43204),(43232,43249),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43493,43493),(43561,43574),(43587,43587),(43596,43597),(43643,43645),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65071),(65279,65279),(65529,65531),(66045,66045),(66272,66272),(66422,66426),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(68325,68326),(69632,69634),(69688,69702),(69759,69762),(69808,69818),(69821,69821),(69888,69890),(69927,69940),(70003,70003),(70016,70018),(70067,70080),(70090,70092),(70188,70199),(70367,70378),(70400,70403),(70460,70460),(70462,70468),(70471,70472),(70475,70477),(70487,70487),(70498,70499),(70502,70508),(70512,70516),(70832,70851),(71087,71093),(71096,71104),(71132,71133),(71216,71232),(71339,71351),(71453,71467),(92912,92916),(92976,92982),(94033,94078),(94095,94098),(113821,113822),(113824,113827),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(121344,121398),(121403,121452),(121461,121461),(121476,121476),(121499,121503),(121505,121519),(125136,125142),(127995,127999),(917505,917505),(917536,917631),(917760,917999)),'9.0.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1541),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2260,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3072,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3201,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3329,3331),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6158),(6277,6278),(6313,6313),(6432,6443),(6448,6459),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6832,6846),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7410,7412),(7416,7417),(7616,7669),(7675,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42654,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43205),(43232,43249),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43493,43493),(43561,43574),(43587,43587),(43596,43597),(43643,43645),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65071),(65279,65279),(65529,65531),(66045,66045),(66272,66272),(66422,66426),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(68325,68326),(69632,69634),(69688,69702),(69759,69762),(69808,69818),(69821,69821),(69888,69890),(69927,69940),(70003,70003),(70016,70018),(70067,70080),(70090,70092),(70188,70199),(70206,70206),(70367,70378),(70400,70403),(70460,70460),(70462,70468),(70471,70472),(70475,70477),(70487,70487),(70498,70499),(70502,70508),(70512,70516),(70709,70726),(70832,70851),(71087,71093),(71096,71104),(71132,71133),(71216,71232),(71339,71351),(71453,71467),(72751,72758),(72760,72767),(72850,72871),(72873,72886),(92912,92916),(92976,92982),(94033,94078),(94095,94098),(113821,113822),(113824,113827),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(121344,121398),(121403,121452),(121461,121461),(121476,121476),(121499,121503),(121505,121519),(122880,122886),(122888,122904),(122907,122913),(122915,122916),(122918,122922),(125136,125142),(125252,125258),(127995,127999),(917505,917505),(917536,917631),(917760,917999)),'10.0.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1541),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2260,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2810,2815),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3072,3075),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3201,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3328,3331),(3387,3388),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6158),(6277,6278),(6313,6313),(6432,6443),(6448,6459),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6832,6846),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7410,7412),(7415,7417),(7616,7673),(7675,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42654,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43205),(43232,43249),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43493,43493),(43561,43574),(43587,43587),(43596,43597),(43643,43645),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65071),(65279,65279),(65529,65531),(66045,66045),(66272,66272),(66422,66426),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(68325,68326),(69632,69634),(69688,69702),(69759,69762),(69808,69818),(69821,69821),(69888,69890),(69927,69940),(70003,70003),(70016,70018),(70067,70080),(70090,70092),(70188,70199),(70206,70206),(70367,70378),(70400,70403),(70460,70460),(70462,70468),(70471,70472),(70475,70477),(70487,70487),(70498,70499),(70502,70508),(70512,70516),(70709,70726),(70832,70851),(71087,71093),(71096,71104),(71132,71133),(71216,71232),(71339,71351),(71453,71467),(72193,72202),(72243,72249),(72251,72254),(72263,72263),(72273,72283),(72330,72345),(72751,72758),(72760,72767),(72850,72871),(72873,72886),(73009,73014),(73018,73018),(73020,73021),(73023,73029),(73031,73031),(92912,92916),(92976,92982),(94033,94078),(94095,94098),(113821,113822),(113824,113827),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(121344,121398),(121403,121452),(121461,121461),(121476,121476),(121499,121503),(121505,121519),(122880,122886),(122888,122904),(122907,122913),(122915,122916),(122918,122922),(125136,125142),(125252,125258),(127995,127999),(917505,917505),(917536,917631),(917760,917999)),'11.0.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1541),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2045,2045),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2259,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2558,2558),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2810,2815),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3072,3076),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3201,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3328,3331),(3387,3388),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3769),(3771,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6158),(6277,6278),(6313,6313),(6432,6443),(6448,6459),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6832,6846),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7410,7412),(7415,7417),(7616,7673),(7675,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42654,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43205),(43232,43249),(43263,43263),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43493,43493),(43561,43574),(43587,43587),(43596,43597),(43643,43645),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65071),(65279,65279),(65529,65531),(66045,66045),(66272,66272),(66422,66426),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(68325,68326),(68900,68903),(69446,69456),(69632,69634),(69688,69702),(69759,69762),(69808,69818),(69821,69821),(69837,69837),(69888,69890),(69927,69940),(69957,69958),(70003,70003),(70016,70018),(70067,70080),(70089,70092),(70188,70199),(70206,70206),(70367,70378),(70400,70403),(70459,70460),(70462,70468),(70471,70472),(70475,70477),(70487,70487),(70498,70499),(70502,70508),(70512,70516),(70709,70726),(70750,70750),(70832,70851),(71087,71093),(71096,71104),(71132,71133),(71216,71232),(71339,71351),(71453,71467),(71724,71738),(72193,72202),(72243,72249),(72251,72254),(72263,72263),(72273,72283),(72330,72345),(72751,72758),(72760,72767),(72850,72871),(72873,72886),(73009,73014),(73018,73018),(73020,73021),(73023,73029),(73031,73031),(73098,73102),(73104,73105),(73107,73111),(73459,73462),(92912,92916),(92976,92982),(94033,94078),(94095,94098),(113821,113822),(113824,113827),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(121344,121398),(121403,121452),(121461,121461),(121476,121476),(121499,121503),(121505,121519),(122880,122886),(122888,122904),(122907,122913),(122915,122916),(122918,122922),(125136,125142),(125252,125258),(127995,127999),(917505,917505),(917536,917631),(917760,917999)),'12.0.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1541),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2045,2045),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2259,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2558,2558),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2810,2815),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3072,3076),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3201,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3328,3331),(3387,3388),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6158),(6277,6278),(6313,6313),(6432,6443),(6448,6459),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6832,6846),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7412,7412),(7415,7417),(7616,7673),(7675,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42654,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43205),(43232,43249),(43263,43263),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43493,43493),(43561,43574),(43587,43587),(43596,43597),(43643,43645),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65071),(65279,65279),(65529,65531),(66045,66045),(66272,66272),(66422,66426),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(68325,68326),(68900,68903),(69446,69456),(69632,69634),(69688,69702),(69759,69762),(69808,69818),(69821,69821),(69837,69837),(69888,69890),(69927,69940),(69957,69958),(70003,70003),(70016,70018),(70067,70080),(70089,70092),(70188,70199),(70206,70206),(70367,70378),(70400,70403),(70459,70460),(70462,70468),(70471,70472),(70475,70477),(70487,70487),(70498,70499),(70502,70508),(70512,70516),(70709,70726),(70750,70750),(70832,70851),(71087,71093),(71096,71104),(71132,71133),(71216,71232),(71339,71351),(71453,71467),(71724,71738),(72145,72151),(72154,72160),(72164,72164),(72193,72202),(72243,72249),(72251,72254),(72263,72263),(72273,72283),(72330,72345),(72751,72758),(72760,72767),(72850,72871),(72873,72886),(73009,73014),(73018,73018),(73020,73021),(73023,73029),(73031,73031),(73098,73102),(73104,73105),(73107,73111),(73459,73462),(78896,78904),(92912,92916),(92976,92982),(94031,94031),(94033,94087),(94095,94098),(113821,113822),(113824,113827),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(121344,121398),(121403,121452),(121461,121461),(121476,121476),(121499,121503),(121505,121519),(122880,122886),(122888,122904),(122907,122913),(122915,122916),(122918,122922),(123184,123190),(123628,123631),(125136,125142),(125252,125258),(127995,127999),(917505,917505),(917536,917631),(917760,917999)),'12.1.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1541),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2045,2045),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2259,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2558,2558),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2810,2815),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2902,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3072,3076),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3201,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3328,3331),(3387,3388),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3458,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6158),(6277,6278),(6313,6313),(6432,6443),(6448,6459),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6832,6846),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7412,7412),(7415,7417),(7616,7673),(7675,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42654,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43136,43137),(43188,43205),(43232,43249),(43263,43263),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43493,43493),(43561,43574),(43587,43587),(43596,43597),(43643,43645),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65071),(65279,65279),(65529,65531),(66045,66045),(66272,66272),(66422,66426),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(68325,68326),(68900,68903),(69446,69456),(69632,69634),(69688,69702),(69759,69762),(69808,69818),(69821,69821),(69837,69837),(69888,69890),(69927,69940),(69957,69958),(70003,70003),(70016,70018),(70067,70080),(70089,70092),(70188,70199),(70206,70206),(70367,70378),(70400,70403),(70459,70460),(70462,70468),(70471,70472),(70475,70477),(70487,70487),(70498,70499),(70502,70508),(70512,70516),(70709,70726),(70750,70750),(70832,70851),(71087,71093),(71096,71104),(71132,71133),(71216,71232),(71339,71351),(71453,71467),(71724,71738),(72145,72151),(72154,72160),(72164,72164),(72193,72202),(72243,72249),(72251,72254),(72263,72263),(72273,72283),(72330,72345),(72751,72758),(72760,72767),(72850,72871),(72873,72886),(73009,73014),(73018,73018),(73020,73021),(73023,73029),(73031,73031),(73098,73102),(73104,73105),(73107,73111),(73459,73462),(78896,78904),(92912,92916),(92976,92982),(94031,94031),(94033,94087),(94095,94098),(113821,113822),(113824,113827),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(121344,121398),(121403,121452),(121461,121461),(121476,121476),(121499,121503),(121505,121519),(122880,122886),(122888,122904),(122907,122913),(122915,122916),(122918,122922),(123184,123190),(123628,123631),(125136,125142),(125252,125258),(127995,127999),(917505,917505),(917536,917631),(917760,917999)),'13.0.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1541),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2045,2045),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2259,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2558,2558),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2810,2815),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2901,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3072,3076),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3201,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3328,3331),(3387,3388),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3457,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5908),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6158),(6277,6278),(6313,6313),(6432,6443),(6448,6459),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6832,6848),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7412,7412),(7415,7417),(7616,7673),(7675,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42654,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43052,43052),(43136,43137),(43188,43205),(43232,43249),(43263,43263),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43493,43493),(43561,43574),(43587,43587),(43596,43597),(43643,43645),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65071),(65279,65279),(65529,65531),(66045,66045),(66272,66272),(66422,66426),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(68325,68326),(68900,68903),(69291,69292),(69446,69456),(69632,69634),(69688,69702),(69759,69762),(69808,69818),(69821,69821),(69837,69837),(69888,69890),(69927,69940),(69957,69958),(70003,70003),(70016,70018),(70067,70080),(70089,70092),(70094,70095),(70188,70199),(70206,70206),(70367,70378),(70400,70403),(70459,70460),(70462,70468),(70471,70472),(70475,70477),(70487,70487),(70498,70499),(70502,70508),(70512,70516),(70709,70726),(70750,70750),(70832,70851),(71087,71093),(71096,71104),(71132,71133),(71216,71232),(71339,71351),(71453,71467),(71724,71738),(71984,71989),(71991,71992),(71995,71998),(72000,72000),(72002,72003),(72145,72151),(72154,72160),(72164,72164),(72193,72202),(72243,72249),(72251,72254),(72263,72263),(72273,72283),(72330,72345),(72751,72758),(72760,72767),(72850,72871),(72873,72886),(73009,73014),(73018,73018),(73020,73021),(73023,73029),(73031,73031),(73098,73102),(73104,73105),(73107,73111),(73459,73462),(78896,78904),(92912,92916),(92976,92982),(94031,94031),(94033,94087),(94095,94098),(94180,94180),(94192,94193),(113821,113822),(113824,113827),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(121344,121398),(121403,121452),(121461,121461),(121476,121476),(121499,121503),(121505,121519),(122880,122886),(122888,122904),(122907,122913),(122915,122916),(122918,122922),(123184,123190),(123628,123631),(125136,125142),(125252,125258),(127995,127999),(917505,917505),(917536,917631),(917760,917999)),'14.0.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1541),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2045,2045),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2192,2193),(2200,2207),(2250,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2558,2558),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2810,2815),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2901,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3072,3076),(3132,3132),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3201,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3328,3331),(3387,3388),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3457,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3772),(3784,3789),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5909),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6159),(6277,6278),(6313,6313),(6432,6443),(6448,6459),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6832,6862),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7412,7412),(7415,7417),(7616,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42654,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43052,43052),(43136,43137),(43188,43205),(43232,43249),(43263,43263),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43493,43493),(43561,43574),(43587,43587),(43596,43597),(43643,43645),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65071),(65279,65279),(65529,65531),(66045,66045),(66272,66272),(66422,66426),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(68325,68326),(68900,68903),(69291,69292),(69446,69456),(69506,69509),(69632,69634),(69688,69702),(69744,69744),(69747,69748),(69759,69762),(69808,69818),(69821,69821),(69826,69826),(69837,69837),(69888,69890),(69927,69940),(69957,69958),(70003,70003),(70016,70018),(70067,70080),(70089,70092),(70094,70095),(70188,70199),(70206,70206),(70367,70378),(70400,70403),(70459,70460),(70462,70468),(70471,70472),(70475,70477),(70487,70487),(70498,70499),(70502,70508),(70512,70516),(70709,70726),(70750,70750),(70832,70851),(71087,71093),(71096,71104),(71132,71133),(71216,71232),(71339,71351),(71453,71467),(71724,71738),(71984,71989),(71991,71992),(71995,71998),(72000,72000),(72002,72003),(72145,72151),(72154,72160),(72164,72164),(72193,72202),(72243,72249),(72251,72254),(72263,72263),(72273,72283),(72330,72345),(72751,72758),(72760,72767),(72850,72871),(72873,72886),(73009,73014),(73018,73018),(73020,73021),(73023,73029),(73031,73031),(73098,73102),(73104,73105),(73107,73111),(73459,73462),(78896,78904),(92912,92916),(92976,92982),(94031,94031),(94033,94087),(94095,94098),(94180,94180),(94192,94193),(113821,113822),(113824,113827),(118528,118573),(118576,118598),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(121344,121398),(121403,121452),(121461,121461),(121476,121476),(121499,121503),(121505,121519),(122880,122886),(122888,122904),(122907,122913),(122915,122916),(122918,122922),(123184,123190),(123566,123566),(123628,123631),(125136,125142),(125252,125258),(127995,127999),(917505,917505),(917536,917631),(917760,917999)),'15.0.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1541),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2045,2045),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2192,2193),(2200,2207),(2250,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2558,2558),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2810,2815),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2901,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3072,3076),(3132,3132),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3201,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3315,3315),(3328,3331),(3387,3388),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3457,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3772),(3784,3790),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5909),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6159),(6277,6278),(6313,6313),(6432,6443),(6448,6459),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6832,6862),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7412,7412),(7415,7417),(7616,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42654,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43052,43052),(43136,43137),(43188,43205),(43232,43249),(43263,43263),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43493,43493),(43561,43574),(43587,43587),(43596,43597),(43643,43645),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65071),(65279,65279),(65529,65531),(66045,66045),(66272,66272),(66422,66426),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(68325,68326),(68900,68903),(69291,69292),(69373,69375),(69446,69456),(69506,69509),(69632,69634),(69688,69702),(69744,69744),(69747,69748),(69759,69762),(69808,69818),(69821,69821),(69826,69826),(69837,69837),(69888,69890),(69927,69940),(69957,69958),(70003,70003),(70016,70018),(70067,70080),(70089,70092),(70094,70095),(70188,70199),(70206,70206),(70209,70209),(70367,70378),(70400,70403),(70459,70460),(70462,70468),(70471,70472),(70475,70477),(70487,70487),(70498,70499),(70502,70508),(70512,70516),(70709,70726),(70750,70750),(70832,70851),(71087,71093),(71096,71104),(71132,71133),(71216,71232),(71339,71351),(71453,71467),(71724,71738),(71984,71989),(71991,71992),(71995,71998),(72000,72000),(72002,72003),(72145,72151),(72154,72160),(72164,72164),(72193,72202),(72243,72249),(72251,72254),(72263,72263),(72273,72283),(72330,72345),(72751,72758),(72760,72767),(72850,72871),(72873,72886),(73009,73014),(73018,73018),(73020,73021),(73023,73029),(73031,73031),(73098,73102),(73104,73105),(73107,73111),(73459,73462),(73472,73473),(73475,73475),(73524,73530),(73534,73538),(78896,78912),(78919,78933),(92912,92916),(92976,92982),(94031,94031),(94033,94087),(94095,94098),(94180,94180),(94192,94193),(113821,113822),(113824,113827),(118528,118573),(118576,118598),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(121344,121398),(121403,121452),(121461,121461),(121476,121476),(121499,121503),(121505,121519),(122880,122886),(122888,122904),(122907,122913),(122915,122916),(122918,122922),(123023,123023),(123184,123190),(123566,123566),(123628,123631),(124140,124143),(125136,125142),(125252,125258),(127995,127999),(917505,917505),(917536,917631),(917760,917999)),'15.1.0':((0,0),(173,173),(768,879),(1155,1161),(1425,1469),(1471,1471),(1473,1474),(1476,1477),(1479,1479),(1536,1541),(1552,1562),(1564,1564),(1611,1631),(1648,1648),(1750,1757),(1759,1764),(1767,1768),(1770,1773),(1807,1807),(1809,1809),(1840,1866),(1958,1968),(2027,2035),(2045,2045),(2070,2073),(2075,2083),(2085,2087),(2089,2093),(2137,2139),(2192,2193),(2200,2207),(2250,2307),(2362,2364),(2366,2383),(2385,2391),(2402,2403),(2433,2435),(2492,2492),(2494,2500),(2503,2504),(2507,2509),(2519,2519),(2530,2531),(2558,2558),(2561,2563),(2620,2620),(2622,2626),(2631,2632),(2635,2637),(2641,2641),(2672,2673),(2677,2677),(2689,2691),(2748,2748),(2750,2757),(2759,2761),(2763,2765),(2786,2787),(2810,2815),(2817,2819),(2876,2876),(2878,2884),(2887,2888),(2891,2893),(2901,2903),(2914,2915),(2946,2946),(3006,3010),(3014,3016),(3018,3021),(3031,3031),(3072,3076),(3132,3132),(3134,3140),(3142,3144),(3146,3149),(3157,3158),(3170,3171),(3201,3203),(3260,3260),(3262,3268),(3270,3272),(3274,3277),(3285,3286),(3298,3299),(3315,3315),(3328,3331),(3387,3388),(3390,3396),(3398,3400),(3402,3405),(3415,3415),(3426,3427),(3457,3459),(3530,3530),(3535,3540),(3542,3542),(3544,3551),(3570,3571),(3633,3633),(3636,3642),(3655,3662),(3761,3761),(3764,3772),(3784,3790),(3864,3865),(3893,3893),(3895,3895),(3897,3897),(3902,3903),(3953,3972),(3974,3975),(3981,3991),(3993,4028),(4038,4038),(4139,4158),(4182,4185),(4190,4192),(4194,4196),(4199,4205),(4209,4212),(4226,4237),(4239,4239),(4250,4253),(4448,4607),(4957,4959),(5906,5909),(5938,5940),(5970,5971),(6002,6003),(6068,6099),(6109,6109),(6155,6159),(6277,6278),(6313,6313),(6432,6443),(6448,6459),(6679,6683),(6741,6750),(6752,6780),(6783,6783),(6832,6862),(6912,6916),(6964,6980),(7019,7027),(7040,7042),(7073,7085),(7142,7155),(7204,7223),(7376,7378),(7380,7400),(7405,7405),(7412,7412),(7415,7417),(7616,7679),(8203,8207),(8232,8238),(8288,8292),(8294,8303),(8400,8432),(11503,11505),(11647,11647),(11744,11775),(12330,12335),(12441,12442),(42607,42610),(42612,42621),(42654,42655),(42736,42737),(43010,43010),(43014,43014),(43019,43019),(43043,43047),(43052,43052),(43136,43137),(43188,43205),(43232,43249),(43263,43263),(43302,43309),(43335,43347),(43392,43395),(43443,43456),(43493,43493),(43561,43574),(43587,43587),(43596,43597),(43643,43645),(43696,43696),(43698,43700),(43703,43704),(43710,43711),(43713,43713),(43755,43759),(43765,43766),(44003,44010),(44012,44013),(55216,55295),(64286,64286),(65024,65039),(65056,65071),(65279,65279),(65529,65531),(66045,66045),(66272,66272),(66422,66426),(68097,68099),(68101,68102),(68108,68111),(68152,68154),(68159,68159),(68325,68326),(68900,68903),(69291,69292),(69373,69375),(69446,69456),(69506,69509),(69632,69634),(69688,69702),(69744,69744),(69747,69748),(69759,69762),(69808,69818),(69821,69821),(69826,69826),(69837,69837),(69888,69890),(69927,69940),(69957,69958),(70003,70003),(70016,70018),(70067,70080),(70089,70092),(70094,70095),(70188,70199),(70206,70206),(70209,70209),(70367,70378),(70400,70403),(70459,70460),(70462,70468),(70471,70472),(70475,70477),(70487,70487),(70498,70499),(70502,70508),(70512,70516),(70709,70726),(70750,70750),(70832,70851),(71087,71093),(71096,71104),(71132,71133),(71216,71232),(71339,71351),(71453,71467),(71724,71738),(71984,71989),(71991,71992),(71995,71998),(72000,72000),(72002,72003),(72145,72151),(72154,72160),(72164,72164),(72193,72202),(72243,72249),(72251,72254),(72263,72263),(72273,72283),(72330,72345),(72751,72758),(72760,72767),(72850,72871),(72873,72886),(73009,73014),(73018,73018),(73020,73021),(73023,73029),(73031,73031),(73098,73102),(73104,73105),(73107,73111),(73459,73462),(73472,73473),(73475,73475),(73524,73530),(73534,73538),(78896,78912),(78919,78933),(92912,92916),(92976,92982),(94031,94031),(94033,94087),(94095,94098),(94180,94180),(94192,94193),(113821,113822),(113824,113827),(118528,118573),(118576,118598),(119141,119145),(119149,119170),(119173,119179),(119210,119213),(119362,119364),(121344,121398),(121403,121452),(121461,121461),(121476,121476),(121499,121503),(121505,121519),(122880,122886),(122888,122904),(122907,122913),(122915,122916),(122918,122922),(123023,123023),(123184,123190),(123566,123566),(123628,123631),(124140,124143),(125136,125142),(125252,125258),(127995,127999),(917505,917505),(917536,917631),(917760,917999))}pyglossary-5.0.9/pyglossary/ui/wcwidth/unicode_versions.py000066400000000000000000000012271476751035500242120ustar00rootroot00000000000000'\nExports function list_versions() for unicode version level support.\n\nThis code generated by wcwidth/bin/update-tables.py on 2023-09-14 15:45:33 UTC.\n' def list_versions():'\n Return Unicode version levels supported by this module release.\n\n Any of the version strings returned may be used as keyword argument\n ``unicode_version`` to the ``wcwidth()`` family of functions.\n\n :returns: Supported Unicode version numbers in ascending sorted order.\n :rtype: list[str]\n ';return'4.1.0','5.0.0','5.1.0','5.2.0','6.0.0','6.1.0','6.2.0','6.3.0','7.0.0','8.0.0','9.0.0','10.0.0','11.0.0','12.0.0','12.1.0','13.0.0','14.0.0','15.0.0','15.1.0'pyglossary-5.0.9/pyglossary/ui/wcwidth/wcwidth.py000066400000000000000000000217021476751035500223050ustar00rootroot00000000000000'\nThis is a python implementation of wcwidth() and wcswidth().\n\nhttps://github.com/jquast/wcwidth\n\nfrom Markus Kuhn\'s C code, retrieved from:\n\n http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c\n\nThis is an implementation of wcwidth() and wcswidth() (defined in\nIEEE Std 1002.1-2001) for Unicode.\n\nhttp://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html\nhttp://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html\n\nIn fixed-width output devices, Latin characters all occupy a single\n"cell" position of equal width, whereas ideographic CJK characters\noccupy two such cells. Interoperability between terminal-line\napplications and (teletype-style) character terminals using the\nUTF-8 encoding requires agreement on which character should advance\nthe cursor by how many cell positions. No established formal\nstandards exist at present on which Unicode character shall occupy\nhow many cell positions on character terminals. These routines are\na first attempt of defining such behavior based on simple rules\napplied to data provided by the Unicode Consortium.\n\nFor some graphical characters, the Unicode standard explicitly\ndefines a character-cell width via the definition of the East Asian\nFullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.\nIn all these cases, there is no ambiguity about which width a\nterminal shall use. For characters in the East Asian Ambiguous (A)\nclass, the width choice depends purely on a preference of backward\ncompatibility with either historic CJK or Western practice.\nChoosing single-width for these characters is easy to justify as\nthe appropriate long-term solution, as the CJK practice of\ndisplaying these characters as double-width comes from historic\nimplementation simplicity (8-bit encoded characters were displayed\nsingle-width and 16-bit ones double-width, even for Greek,\nCyrillic, etc.) and not any typographic considerations.\n\nMuch less clear is the choice of width for the Not East Asian\n(Neutral) class. Existing practice does not dictate a width for any\nof these characters. It would nevertheless make sense\ntypographically to allocate two character cells to characters such\nas for instance EM SPACE or VOLUME INTEGRAL, which cannot be\nrepresented adequately with a single-width glyph. The following\nroutines at present merely assign a single-cell width to all\nneutral characters, in the interest of simplicity. This is not\nentirely satisfactory and should be reconsidered before\nestablishing a formal standard in this area. At the moment, the\ndecision which Not East Asian (Neutral) characters should be\nrepresented by double-width glyphs cannot yet be answered by\napplying a simple rule from the Unicode database content. Setting\nup a proper standard for the behavior of UTF-8 character terminals\nwill require a careful analysis not only of each Unicode character,\nbut also of each presentation form, something the author of these\nroutines has avoided to do so far.\n\nhttp://www.unicode.org/unicode/reports/tr11/\n\nLatest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c\n' from __future__ import division _B='auto' _A=None import os,sys,warnings from.table_vs16 import VS16_NARROW_TO_WIDE from.table_wide import WIDE_EASTASIAN from.table_zero import ZERO_WIDTH from.unicode_versions import list_versions try:from functools import lru_cache except ImportError:from backports.functools_lru_cache import lru_cache _PY3=sys.version_info[0]>=3 def _bisearch(ucs,table): '\n Auxiliary function for binary search in interval table.\n\n :arg int ucs: Ordinal value of unicode character.\n :arg list table: List of starting and ending ranges of ordinal values,\n in form of ``[(start, end), ...]``.\n :rtype: int\n :returns: 1 if ordinal value ucs is found within lookup table, else 0.\n ';B=ucs;A=table;E=0;C=len(A)-1 if BA[C][1]:return 0 while C>=E: D=(E+C)//2 if B>A[D][1]:E=D+1 elif B=(9,0,0):F+=_bisearch(ord(B),VS16_NARROW_TO_WIDE['9.0.0']);B=_A A+=1;continue D=wcwidth(C,G) if D<0:return D if D>0:B=C F+=D;A+=1 return F @lru_cache(maxsize=128) def _wcversion_value(ver_string):'\n Integer-mapped value of given dotted version string.\n\n :param str ver_string: Unicode version string, of form ``n.n.n``.\n :rtype: tuple(int)\n :returns: tuple of digit tuples, ``tuple(int, [...])``.\n ';A=tuple(map(int,ver_string.split('.')));return A @lru_cache(maxsize=8) def _wcmatch_version(given_version): "\n Return nearest matching supported Unicode version level.\n\n If an exact match is not determined, the nearest lowest version level is\n returned after a warning is emitted. For example, given supported levels\n ``4.1.0`` and ``5.0.0``, and a version string of ``4.9.9``, then ``4.1.0``\n is selected and returned:\n\n >>> _wcmatch_version('4.9.9')\n '4.1.0'\n >>> _wcmatch_version('8.0')\n '8.0.0'\n >>> _wcmatch_version('1')\n '4.1.0'\n\n :param str given_version: given version for compare, may be ``auto``\n (default), to select Unicode Version from Environment Variable,\n ``UNICODE_VERSION``. If the environment variable is not set, then the\n latest is used.\n :rtype: str\n :returns: unicode string, or non-unicode ``str`` type for python 2\n when given ``version`` is also type ``str``.\n ";G='latest';A=given_version;D=not _PY3 and isinstance(A,str) if D:B=list(map(lambda ucs:ucs.encode(),list_versions())) else:B=list_versions() C=B[-1] if A in(_B,_B):A=os.environ.get('UNICODE_VERSION',G if not D else C.encode()) if A in(G,G):return C if not D else C.encode() if A in B:return A if not D else A.encode() try:E=_wcversion_value(A) except ValueError:warnings.warn("UNICODE_VERSION value, {given_version!r}, is invalid. Value should be in form of `integer[.]+', the latest supported unicode version {latest_version!r} has been inferred.".format(given_version=A,latest_version=C));return C if not D else C.encode() F=B[0];J=_wcversion_value(F) if E<=J:warnings.warn('UNICODE_VERSION value, {given_version!r}, is lower than any available unicode version. Returning lowest version level, {earliest_version!r}'.format(given_version=A,earliest_version=F));return F if not D else F.encode() for(H,K)in enumerate(B): try:I=_wcversion_value(B[H+1]) except IndexError:return C if not D else C.encode() if E==I[:len(E)]:return B[H+1] if I>E:return K assert False,('Code path unreachable',A,B)pyglossary-5.0.9/pyglossary/ui_type.py000066400000000000000000000004031476751035500202170ustar00rootroot00000000000000__all__ = ["UIType"] class UIType: def progressInit(self, title: str) -> None: raise NotImplementedError def progress(self, ratio: float, text: str = "") -> None: raise NotImplementedError def progressEnd(self) -> None: raise NotImplementedError pyglossary-5.0.9/pyglossary/xdxf/000077500000000000000000000000001476751035500171435ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/xdxf/__init__.py000066400000000000000000000000001476751035500212420ustar00rootroot00000000000000pyglossary-5.0.9/pyglossary/xdxf/css_js_transform.py000066400000000000000000000307071476751035500231030ustar00rootroot00000000000000from __future__ import annotations import logging from io import BytesIO from typing import TYPE_CHECKING, cast if TYPE_CHECKING: from pyglossary.lxml_types import Element, T_htmlfile log = logging.getLogger("pyglossary") __all__ = [ "XdxfTransformer", ] class XdxfTransformer: def __init__(self, encoding: str = "utf-8") -> None: self._encoding = encoding self._childTagWriteMapping = { "br": self._write_br, "u": self._write_basic_format, "i": self._write_basic_format, "b": self._write_basic_format, "sub": self._write_basic_format, "sup": self._write_basic_format, "tt": self._write_basic_format, "big": self._write_basic_format, "small": self._write_basic_format, "blockquote": self._write_blockquote, "tr": self._write_tr, "k": self._write_k, "sr": self._write_sr, "ex": self._write_example, "mrkd": self._write_mrkd, "kref": self._write_kref, "iref": self._write_iref, "pos": self._write_pos, "abr": self._write_abr, "abbr": self._write_abbr, "dtrn": self._write_dtrn, "co": self._write_co, "c": self._write_c, "rref": self._write_rref, "def": self._write_def, "deftext": self._write_deftext, "span": self._write_span, "gr": self._write_gr, "ex_orig": self._write_ex_orig, "categ": self._write_categ, "opt": self._write_opt, "img": self._write_img, "etm": self._write_etm, } @staticmethod def tostring(elem: Element) -> str: from lxml import etree as ET return ( ET.tostring( elem, method="html", pretty_print=True, ) .decode("utf-8") .strip() ) @staticmethod def hasPrevText(prev: str | Element | None) -> bool: if isinstance(prev, str): return True if prev is None: return False if prev.tag == "k": return False if prev.tag in { "dtrn", "def", "span", "co", "i", "b", "sub", "sup", "tt", "big", "small", }: return True if prev.text: # noqa: SIM103 return True # print(prev) return False def writeString( # noqa: PLR0913 self, hf: T_htmlfile, child: str, parent: Element, prev: str | Element | None, stringSep: str | None = None, ) -> None: from lxml import etree as ET def addSep() -> None: if stringSep is None: hf.write(ET.Element("br")) else: hf.write(stringSep) hasPrev = self.hasPrevText(prev) trail = False if parent.tag in {"ar", "font"}: if child.startswith("\n"): child = child.lstrip("\n") if hasPrev: hf.write(ET.Element("br")) elif child.endswith("\n"): child = child.rstrip("\n") trail = True if not hasPrev: child = child.lstrip() elif child.startswith("\n"): # child = child.lstrip() if hasPrev: addSep() lines = [line for line in child.split("\n") if line] for index, line in enumerate(lines): if index > 0: # and line[0] not in ".,;)" addSep() hf.write(line) if trail: addSep() def _write_example(self, hf: T_htmlfile, elem: Element) -> None: prev = None stringSep = " " with hf.element( # noqa: PLR1702 "div", attrib={"class": elem.tag}, ): for child in elem.xpath("child::node()"): if isinstance(child, str): # if not child.strip(): # continue self.writeString(hf, child, elem, prev, stringSep=stringSep) continue if child.tag == "iref": with hf.element("div"): self._write_iref(hf, child) # NESTED 5 continue if child.tag == "ex_orig": with hf.element("span", attrib={"class": child.tag}): self.writeChildrenOf(hf, child, stringSep=stringSep) continue if child.tag == "ex_tran": ex_trans = elem.xpath("./ex_tran") if ex_trans.index(child) == 0: # when several translations, make HTML unordered list of them if len(ex_trans) > 1: with hf.element("ul", attrib={}): for ex_tran in ex_trans: with hf.element("li", attrib={}): self._write_ex_transl(hf, ex_tran) else: self._write_ex_transl(hf, child) continue # log.warning(f"unknown tag {child.tag} inside ") self.writeChild(hf, child, elem, prev, stringSep=stringSep) prev = child def _write_ex_orig(self, hf: T_htmlfile, child: Element) -> None: # TODO NOT REACHABLE log.warning("---- _write_ex_orig") with hf.element("i"): self.writeChildrenOf(hf, child) def _write_ex_transl(self, hf: T_htmlfile, child: Element) -> None: with hf.element("span", attrib={"class": child.tag}): self.writeChildrenOf(hf, child) def _write_iref(self, hf: T_htmlfile, child: Element) -> None: iref_url = child.attrib.get("href", "") if iref_url.endswith((".mp3", ".wav", ".aac", ".ogg")): # with hf.element("audio", src=iref_url): with hf.element( "a", attrib={ "class": "iref", "href": iref_url, }, ): hf.write("🔊") return with hf.element( "a", attrib={ "class": "iref", "href": child.attrib.get("href", child.text or ""), }, ): self.writeChildrenOf(hf, child, stringSep=" ") def _write_blockquote(self, hf: T_htmlfile, child: Element) -> None: with hf.element("div", attrib={"class": "m"}): self.writeChildrenOf(hf, child) def _write_tr(self, hf: T_htmlfile, child: Element) -> None: from lxml import etree as ET hf.write("[") self.writeChildrenOf(hf, child) hf.write("]") hf.write(ET.Element("br")) def _write_k(self, hf: T_htmlfile, child: Element) -> None: index = child.getparent().index(child) if index == 0: with hf.element("div", attrib={"class": child.tag}): # with hf.element(glos.titleTag(child.text)): # ^ no glos object here! self.writeChildrenOf(hf, child) # TODO Lenny: show other forms in a collapsible list # else: # with (hf.element("span", attrib={"class": child.tag})): # hf.write(str(index)) # self.writeChildrenOf(hf, child) def _write_mrkd(self, hf: T_htmlfile, child: Element) -> None: # noqa: PLR6301 if not child.text: return with hf.element("span", attrib={"class": child.tag}): hf.write(child.text) def _write_kref(self, hf: T_htmlfile, child: Element) -> None: if not child.text: log.warning(f"kref with no text: {self.tostring(child)}") return with hf.element( "a", attrib={ "class": "kref", "href": f"bword://{child.attrib.get('k', child.text)}", }, ): hf.write(child.text) def _write_sr(self, hf: T_htmlfile, child: Element) -> None: with hf.element("div", attrib={"class": child.tag}): self.writeChildrenOf(hf, child) def _write_pos(self, hf: T_htmlfile, child: Element) -> None: with hf.element("span", attrib={"class": child.tag}): self.writeChildrenOf(hf, child) def _write_abr(self, hf: T_htmlfile, child: Element) -> None: with hf.element("span", attrib={"class": "abbr"}): self.writeChildrenOf(hf, child) def _write_abbr(self, hf: T_htmlfile, child: Element) -> None: # noqa: PLR6301 with hf.element("span", attrib={"class": child.tag}): self.writeChildrenOf(hf, child) def _write_dtrn(self, hf: T_htmlfile, child: Element) -> None: self.writeChildrenOf(hf, child, sep=" ") def _write_co(self, hf: T_htmlfile, child: Element) -> None: with hf.element("span", attrib={"class": child.tag}): hf.write("(") self.writeChildrenOf(hf, child, sep=" ") hf.write(")") def _write_basic_format(self, hf: T_htmlfile, child: Element) -> None: with hf.element(child.tag): self.writeChildrenOf(hf, child) # if child.text is not None: # hf.write(child.text.strip("\n")) def _write_br(self, hf: T_htmlfile, child: Element) -> None: from lxml import etree as ET hf.write(ET.Element("br")) self.writeChildrenOf(hf, child) def _write_c(self, hf: T_htmlfile, child: Element) -> None: color = child.attrib.get("c", "green") with hf.element("font", color=color): self.writeChildrenOf(hf, child) def _write_rref(self, _hf: T_htmlfile, child: Element) -> None: if not child.text: log.warning(f"rref with no text: {self.tostring(child)}") return def _write_def(self, hf: T_htmlfile, elem: Element) -> None: has_nested_def = False has_deftext = False for child in elem.iterchildren(): if child.tag == "def": has_nested_def = True if child.tag == "deftext": has_deftext = True if elem.getparent().tag == "ar": # this is a root if has_nested_def: with hf.element("ol"): self.writeChildrenOf(hf, elem) else: with hf.element("div"): self.writeChildrenOf(hf, elem) elif has_deftext: with hf.element("li"): self.writeChildrenOf(hf, elem) elif has_nested_def: with hf.element("li"): with hf.element("ol"): self.writeChildrenOf(hf, elem) else: with hf.element("li"): self.writeChildrenOf(hf, elem) def _write_deftext(self, hf: T_htmlfile, child: Element) -> None: with hf.element("span", attrib={"class": child.tag}): self.writeChildrenOf(hf, child, stringSep=" ", sep=" ") def _write_span(self, hf: T_htmlfile, child: Element) -> None: with hf.element("span"): self.writeChildrenOf(hf, child) def _write_gr(self, hf: T_htmlfile, child: Element) -> None: with hf.element("div", attrib={"class": child.tag}): self.writeChildrenOf(hf, child) def _write_categ(self, hf: T_htmlfile, child: Element) -> None: with hf.element("span", style="background-color: green;"): self.writeChildrenOf(hf, child, stringSep=" ") def _write_opt(self, hf: T_htmlfile, child: Element) -> None: # noqa: PLR6301 if child.text: hf.write(" (") hf.write(child.text) hf.write(")") def _write_img(self, hf: T_htmlfile, child: Element) -> None: # noqa: PLR6301 with hf.element("img", attrib=dict(child.attrib)): pass def _write_etm(self, hf: T_htmlfile, child: Element) -> None: # noqa: PLR6301 # Etymology (history and origin) # TODO: formatting? hf.write(child.text or "") def writeChildElem( # noqa: PLR0913 self, hf: T_htmlfile, child: Element, parent: Element, # noqa: ARG002 prev: str | Element | None, stringSep: str | None = None, # noqa: ARG002 ) -> None: func = self._childTagWriteMapping.get(child.tag, None) if func is not None: func(hf, child) return if child.tag == "ex_transl" and prev is not None: if isinstance(prev, str): pass elif prev.tag == "ex_orig": if child.text != prev.text: with hf.element("i"): self.writeChildrenOf(hf, child) return log.warning(f"unknown tag {child.tag}") self.writeChildrenOf(hf, child) def writeChild( # noqa: PLR0913 self, hf: T_htmlfile, child: str | Element, parent: Element, prev: str | Element | None, stringSep: str | None = None, ) -> None: if isinstance(child, str): self.writeString(hf, child, parent, prev, stringSep=stringSep) else: self.writeChildElem( hf=hf, child=child, parent=parent, prev=prev, stringSep=stringSep, ) def shouldAddSep( # noqa: PLR6301 self, child: str | Element, prev: str | Element, ) -> bool: if isinstance(child, str): return not (len(child) > 0 and child[0] in ".,;)") if child.tag in {"sub", "sup"}: return False if isinstance(prev, str): pass elif prev.tag in {"sub", "sup"}: return False return True def writeChildrenOf( self, hf: T_htmlfile, elem: Element, sep: str | None = None, stringSep: str | None = None, ) -> None: prev = None for child in elem.xpath("child::node()"): if sep and prev is not None and self.shouldAddSep(child, prev): hf.write(sep) self.writeChild(hf, child, elem, prev, stringSep=stringSep) prev = child @staticmethod def stringify_children(elem: Element) -> str: from itertools import chain from lxml.etree import tostring children = [ chunk for chunk in chain( (elem.text,), chain.from_iterable( (tostring(child, with_tail=False), child.tail) for child in elem.getchildren() ), (elem.tail,), ) if chunk ] normalized_children = "" for chunk in children: if isinstance(chunk, str): normalized_children += chunk if isinstance(chunk, bytes): normalized_children += chunk.decode(encoding="utf-8") return normalized_children def transform(self, article: Element) -> str: from lxml import etree as ET # encoding = self._encoding f = BytesIO() with ET.htmlfile(f, encoding="utf-8") as hf: with hf.element("div", attrib={"class": "article"}): self.writeChildrenOf(cast("T_htmlfile", hf), article) text = f.getvalue().decode("utf-8") text = text.replace("
          ", "
          ") # for compatibility return text # noqa: RET504 def transformByInnerString(self, articleInnerStr: str) -> str: from lxml import etree as ET return self.transform( ET.fromstring(f"{articleInnerStr}"), ) pyglossary-5.0.9/pyglossary/xdxf/transform.py000066400000000000000000000272411476751035500215360ustar00rootroot00000000000000from __future__ import annotations import logging from io import BytesIO from typing import TYPE_CHECKING, cast if TYPE_CHECKING: from pyglossary.lxml_types import Element, T_htmlfile log = logging.getLogger("pyglossary") __all__ = [ "XdxfTransformer", ] class XdxfTransformer: _gram_color: str = "green" _example_padding: int = 10 def __init__(self, encoding: str = "utf-8") -> None: self._encoding = encoding self._childTagWriteMapping = { "br": self._write_br, "u": self._write_basic_format, "i": self._write_basic_format, "b": self._write_basic_format, "sub": self._write_basic_format, "sup": self._write_basic_format, "tt": self._write_basic_format, "big": self._write_basic_format, "small": self._write_basic_format, "blockquote": self._write_blockquote, "tr": self._write_tr, "k": self._write_k, "sr": self._write_sr, "ex": self._write_example, "mrkd": self._write_mrkd, "kref": self._write_kref, "iref": self._write_iref, "pos": self._write_pos, "abr": self._write_abr, "dtrn": self._write_dtrn, "co": self._write_co, "c": self._write_c, "rref": self._write_rref, "def": self._write_def, "deftext": self._write_deftext, "span": self._write_span, "abbr_def": self._write_abbr_def, "gr": self._write_gr, "ex_orig": self._write_ex_orig, "categ": self._write_categ, "opt": self._write_opt, "img": self._write_img, "abbr": self._write_abbr, "etm": self._write_etm, } @staticmethod def tostring(elem: Element) -> str: from lxml import etree as ET return ( ET.tostring( elem, method="html", pretty_print=True, ) .decode("utf-8") .strip() ) @staticmethod def hasPrevText(prev: str | Element | None) -> bool: if isinstance(prev, str): return True if prev is None: return False if prev.tag == "k": return False if prev.tag in { "dtrn", "def", "span", "co", "i", "b", "sub", "sup", "tt", "big", "small", }: return True if prev.text: # noqa: SIM103 return True # print(prev) return False def writeString( # noqa: PLR0913 self, hf: T_htmlfile, child: str, parent: Element, prev: str | Element | None, stringSep: str | None = None, ) -> None: from lxml import etree as ET def addSep() -> None: if stringSep is None: hf.write(ET.Element("br")) else: hf.write(stringSep) hasPrev = self.hasPrevText(prev) trail = False if parent.tag in {"ar", "font"}: if child.startswith("\n"): child = child.lstrip("\n") if hasPrev: hf.write(ET.Element("br")) elif child.endswith("\n"): child = child.rstrip("\n") trail = True if not hasPrev: child = child.lstrip() elif child.startswith("\n"): child = child.lstrip() if hasPrev: addSep() child = child.rstrip() lines = [line for line in child.split("\n") if line] for index, line in enumerate(lines): if index > 0: # and line[0] not in ".,;)" addSep() hf.write(line) if trail: addSep() def _write_example(self, hf: T_htmlfile, elem: Element) -> None: children = elem.xpath("child::node()") if not children: return if not isinstance(children, list): log.warning(f"unexpected {children=}") return prev = None stringSep = " " with hf.element( "div", attrib={ "class": "example", "style": f"padding: {self._example_padding}px 0px;", }, ): for child in children: if isinstance(child, str): # if not child.strip(): # continue self.writeString(hf, child, elem, prev, stringSep=stringSep) continue if isinstance(child, bytes | tuple): # TODO log.warning(f"unexpected {child=}") continue if not child: continue if child.tag == "iref": with hf.element("div"): self._write_iref(hf, child) # NESTED 5 continue if child.tag in {"ex_orig", "ex_tran"}: with hf.element("div"): self.writeChildrenOf(hf, child, stringSep=stringSep) # NESTED 5 continue # log.warning(f"unknown tag {child.tag} inside ") self.writeChild(hf, child, elem, prev, stringSep=stringSep) prev = child def _write_iref(self, hf: T_htmlfile, child: Element) -> None: iref_url = child.attrib.get("href", "") if iref_url.endswith((".mp3", ".wav", ".aac", ".ogg")): # with hf.element("audio", src=iref_url): with hf.element( "a", attrib={ "class": "iref", "href": iref_url, }, ): hf.write("🔊") return with hf.element( "a", attrib={ "class": "iref", "href": child.attrib.get("href", child.text or ""), }, ): self.writeChildrenOf(hf, child, stringSep=" ") def _write_blockquote(self, hf: T_htmlfile, child: Element) -> None: with hf.element("div", attrib={"class": "m"}): self.writeChildrenOf(hf, child) def _write_tr(self, hf: T_htmlfile, child: Element) -> None: from lxml import etree as ET hf.write("[") self.writeChildrenOf(hf, child) hf.write("]") hf.write(ET.Element("br")) def _write_k(self, hf: T_htmlfile, child: Element) -> None: with hf.element("div", attrib={"class": child.tag}): # with hf.element(glos.titleTag(child.text)): # ^ no glos object here! with hf.element("b"): self.writeChildrenOf(hf, child) def _write_mrkd(self, hf: T_htmlfile, child: Element) -> None: # noqa: PLR6301 if not child.text: return with hf.element("span", attrib={"class": child.tag}): with hf.element("b"): hf.write(child.text) def _write_kref(self, hf: T_htmlfile, child: Element) -> None: if not child.text: log.warning(f"kref with no text: {self.tostring(child)}") return with hf.element( "a", attrib={ "class": "kref", "href": f"bword://{child.attrib.get('k', child.text)}", }, ): hf.write(child.text) def _write_sr(self, hf: T_htmlfile, child: Element) -> None: with hf.element("div", attrib={"class": child.tag}): self.writeChildrenOf(hf, child) def _write_pos(self, hf: T_htmlfile, child: Element) -> None: with hf.element("span", attrib={"class": child.tag}): with hf.element("font", color="green"): with hf.element("i"): self.writeChildrenOf(hf, child) # NESTED 5 def _write_abr(self, hf: T_htmlfile, child: Element) -> None: with hf.element("span", attrib={"class": child.tag}): with hf.element("font", color="green"): with hf.element("i"): self.writeChildrenOf(hf, child) # NESTED 5 def _write_dtrn(self, hf: T_htmlfile, child: Element) -> None: self.writeChildrenOf(hf, child, sep=" ") def _write_co(self, hf: T_htmlfile, child: Element) -> None: self.writeChildrenOf(hf, child, sep=" ") def _write_basic_format(self, hf: T_htmlfile, child: Element) -> None: with hf.element(child.tag): self.writeChildrenOf(hf, child) # if child.text is not None: # hf.write(child.text.strip("\n")) def _write_br(self, hf: T_htmlfile, child: Element) -> None: from lxml import etree as ET hf.write(ET.Element("br")) self.writeChildrenOf(hf, child) def _write_c(self, hf: T_htmlfile, child: Element) -> None: color = child.attrib.get("c", "green") with hf.element("font", color=color): self.writeChildrenOf(hf, child) def _write_rref(self, _hf: T_htmlfile, child: Element) -> None: if not child.text: log.warning(f"rref with no text: {self.tostring(child)}") return def _write_def(self, hf: T_htmlfile, child: Element) -> None: # TODO: create a list (ol / ul) unless it has one item only # like FreeDict reader with hf.element("div"): self.writeChildrenOf(hf, child) def _write_deftext(self, hf: T_htmlfile, child: Element) -> None: self.writeChildrenOf(hf, child, stringSep=" ", sep=" ") def _write_span(self, hf: T_htmlfile, child: Element) -> None: with hf.element("span"): self.writeChildrenOf(hf, child) def _write_abbr_def(self, hf: T_htmlfile, child: Element) -> None: # _type = child.attrib.get("type", "") # {"": "", "grm": "grammatical", "stl": "stylistical", # "knl": "area/field of knowledge", "aux": "subsidiary" # "oth": "others"}[_type] self.writeChildrenOf(hf, child) def _write_gr(self, hf: T_htmlfile, child: Element) -> None: from lxml import etree as ET with hf.element("font", color=self._gram_color): hf.write(child.text or "") hf.write(ET.Element("br")) def _write_ex_orig(self, hf: T_htmlfile, child: Element) -> None: with hf.element("i"): self.writeChildrenOf(hf, child) # def _write_ex_transl(self, hf: T_htmlfile, child: Element) -> None: def _write_categ(self, hf: T_htmlfile, child: Element) -> None: with hf.element("span", style="background-color: green;"): self.writeChildrenOf(hf, child, stringSep=" ") def _write_opt(self, hf: T_htmlfile, child: Element) -> None: # noqa: PLR6301 if child.text: hf.write(" (") hf.write(child.text) hf.write(")") def _write_img(self, hf: T_htmlfile, child: Element) -> None: # noqa: PLR6301 with hf.element("img", attrib=dict(child.attrib)): pass def _write_abbr(self, hf: T_htmlfile, child: Element) -> None: # noqa: PLR6301 # FIXME: may need an space or newline before it with hf.element("i"): hf.write(child.text or "") def _write_etm(self, hf: T_htmlfile, child: Element) -> None: # noqa: PLR6301 # Etymology (history and origin) # TODO: formatting? hf.write(child.text or "") def writeChildElem( # noqa: PLR0913 self, hf: T_htmlfile, child: Element, parent: Element, # noqa: ARG002 prev: str | Element | None, stringSep: str | None = None, # noqa: ARG002 ) -> None: func = self._childTagWriteMapping.get(child.tag, None) if func is not None: func(hf, child) return if child.tag == "ex_transl" and prev is not None: if isinstance(prev, str): pass elif prev.tag == "ex_orig": if child.text != prev.text: with hf.element("i"): self.writeChildrenOf(hf, child) return log.warning(f"unknown tag {child.tag}") self.writeChildrenOf(hf, child) def writeChild( # noqa: PLR0913 self, hf: T_htmlfile, child: str | Element, parent: Element, prev: str | Element | None, stringSep: str | None = None, ) -> None: if isinstance(child, str): if not child.strip(): return self.writeString(hf, child, parent, prev, stringSep=stringSep) return self.writeChildElem( hf=hf, child=child, parent=parent, prev=prev, stringSep=stringSep, ) def shouldAddSep( # noqa: PLR6301 self, child: str | Element, prev: str | Element, ) -> bool: if isinstance(child, str): return not (len(child) > 0 and child[0] in ".,;)") if child.tag in {"sub", "sup"}: return False if isinstance(prev, str): pass elif prev.tag in {"sub", "sup"}: return False return True def writeChildrenOf( self, hf: T_htmlfile, elem: Element, sep: str | None = None, stringSep: str | None = None, ) -> None: children = elem.xpath("child::node()") if not children: return if not isinstance(children, list): log.warning(f"unexpceted {children=}") return prev = None for child in children: if sep and prev is not None and self.shouldAddSep(child, prev): hf.write(sep) if isinstance(child, bytes | tuple): log.warning(f"unexpected {child=}") continue self.writeChild(hf, child, elem, prev, stringSep=stringSep) prev = child def transform(self, article: Element) -> str: from lxml import etree as ET # encoding = self._encoding f = BytesIO() with ET.htmlfile(f, encoding="utf-8") as hf: with hf.element("div", attrib={"class": "article"}): self.writeChildrenOf(cast("T_htmlfile", hf), article) text = f.getvalue().decode("utf-8") text = text.replace("
          ", "
          ") # for compatibility return text # noqa: RET504 def transformByInnerString(self, articleInnerStr: str) -> str: from lxml import etree as ET return self.transform( ET.fromstring(f"{articleInnerStr}"), ) pyglossary-5.0.9/pyglossary/xdxf/xdxf.css000066400000000000000000000017531476751035500206340ustar00rootroot00000000000000div.k { font-weight: 700; font-size: 150%; } span.k { font-size: 100%; } .gr { color: green; } ol { list-style-type: decimal; padding-left: 20px; } ol > li > ol > li > ol { list-style-type: lower-latin; } .ex { margin: 0px 0px 0px 20px; color: #888888; } .ex i { color: red; } .ex_orig { font-weight: 700; } .ex .mrkd { text-decoration: underline; } .co { color: #888888; font-style: italic; } .abbr { color: green; font-style: italic; text-decoration: underline; text-decoration-style: dotted; } .pos { color: red; font-style: italic; } .abbr_popup { background: #feffca; border: 1px solid rgba(0,0,0,.15); border-radius: 2px; box-shadow: 2px 2px 3px rgba(0,0,0,.1),0 2px 0 rgba(255,255,255,.4) inset,0 -2px 0 rgba(242,85,0,1) inset; cursor: pointer; display: none; font-size: 100%; font-style: normal; padding: .05em .6em .2em; position: absolute; z-index: 999; margin-bottom: 100px; }pyglossary-5.0.9/pyglossary/xdxf/xdxf.js000066400000000000000000000035221476751035500204540ustar00rootroot00000000000000 prepare_tooltips() // iterate over all tags that can show tooltip function prepare_tooltips() { var pos_elems = document.querySelectorAll(".pos"); var abbr_elems = document.querySelectorAll(".abbr"); iterate_over_abbr_elems(pos_elems) iterate_over_abbr_elems(abbr_elems) } function iterate_over_abbr_elems(elems) { for (var i = 0; i < elems.length; i++) { var elem = elems[i]; if (abbr_map.has(elem.textContent)) { elem.classList.add("abbr"); elem.classList.remove("pos"); elem.addEventListener("mouseover", show_popup); elem.addEventListener("mouseout", hide_popup); } else { elem.classList.add("pos"); elem.classList.remove("abbr"); } } } function show_popup(event) { var pos_elem = event.target var pos_text = pos_elem.textContent var s = document.createElement("small"); s.classList.add("abbr_popup"); s.innerHTML = abbr_map.get(pos_text) pos_elem.parentNode.insertBefore(s, pos_elem.nextSibling); if (s.offsetWidth > 200) { if ((pos_elem.offsetLeft + 200) > document.body.offsetWidth) { s.style.left = pos_elem.offsetLeft - ((pos_elem.offsetLeft + 200) - document.body.offsetWidth) + 'px'; } else { s.style.left = pos_elem.offsetLeft + 'px'; } } else { if ((pos_elem.offsetLeft + s.offsetWidth) > document.body.offsetWidth) { s.style.left = pos_elem.offsetLeft - ((pos_elem.offsetLeft + s.offsetWidth) - document.body.offsetWidth) + 'px'; } else { s.style.left = pos_elem.offsetLeft + 'px'; } } s.style.display = 'block'; } function hide_popup(event) { var popups = document.getElementsByClassName('abbr_popup'); for (var i = 0; i < popups.length; ++i) { popups[i].remove(); } } pyglossary-5.0.9/pyglossary/xdxf/xdxf.xsl000066400000000000000000000112611476751035500206450ustar00rootroot00000000000000

          [] . ()
          pyglossary-5.0.9/pyglossary/xdxf/xsl_transform.py000066400000000000000000000026401476751035500224200ustar00rootroot00000000000000from __future__ import annotations import logging from os.path import join from typing import TYPE_CHECKING if TYPE_CHECKING: from lxml.etree import _XSLTResultTree from pyglossary.lxml_types import Element from pyglossary import core from pyglossary.core import rootDir log = logging.getLogger("pyglossary") __all__ = [ "XslXdxfTransformer", ] class XslXdxfTransformer: _gram_color: str = "green" _example_padding: int = 10 def __init__(self, encoding: str = "utf-8") -> None: try: from lxml import etree as ET except ModuleNotFoundError as e: e.msg += f", run `{core.pip} install lxml` to install" raise e with open( join(rootDir, "pyglossary", "xdxf", "xdxf.xsl"), encoding="utf-8", ) as f: xslt_txt = f.read() xslt = ET.XML(xslt_txt) self._transform = ET.XSLT(xslt) self._encoding = encoding @staticmethod def tostring(elem: _XSLTResultTree | Element) -> str: from lxml import etree as ET return ( ET.tostring( elem, method="html", pretty_print=True, ) .decode("utf-8") .strip() ) def transform(self, article: Element) -> str: result_tree = self._transform(article) text = self.tostring(result_tree) text = text.replace("
          ", "
          ") return text # noqa: RET504 def transformByInnerString(self, articleInnerStr: str) -> str: from lxml import etree as ET return self.transform( ET.fromstring(f"{articleInnerStr}"), ) pyglossary-5.0.9/pyglossary/xml_utils.py000066400000000000000000000007771476751035500205770ustar00rootroot00000000000000# from xml.sax.saxutils import escape as xml_escape # from xml.sax.saxutils import unescape as xml_unescape from __future__ import annotations __all__ = ["xml_escape"] def xml_escape(data: str, quotation: bool = True) -> str: """Escape &, <, and > in a string of data.""" # must do ampersand first data = data.replace("&", "&") data = data.replace(">", ">") data = data.replace("<", "<") if quotation: data = data.replace('"', """).replace("'", "'") return data # noqa: RET504 pyglossary-5.0.9/pyproject.toml000066400000000000000000000321561476751035500167010ustar00rootroot00000000000000[tool.ruff.format] quote-style = "double" indent-style = "tab" skip-magic-trailing-comma = false line-ending = "lf" docstring-code-format = false exclude = ["pyglossary/plugin_lib/ripemd128.py"] [tool.ruff] line-length = 88 target-version = "py310" # Exclude a variety of commonly ignored directories. exclude = [ "pyglossary/repro_zipfile/", "whitelist.py", # for vulture "pyglossary/plugins/babylon_bgl/bgl_gzip.py", "pyglossary/plugins/testformat", "pyglossary/ui/gtk*_utils/__init__.py", "pyglossary/ui/ui_qt.py", "pyglossary/ui/progressbar/", "pyglossary/reverse.py", "wcwidth*", ".direnv", ".eggs", ".git", ".mypy_cache", ".nox", ".pants.d", ".ruff_cache", ".tox", ".venv", "__pypackages__", "_build", "buck-out", "build", "dist", "venv", ] [tool.ruff.lint] select = [ "ANN", # flake8-annotationsq "F", # Pyflakes "E", # pycodestyle Error "W", # pycodestyle Warning "C90", # mccabe: C901: {name} is too complex ({complexity}) "I", # isort: unsorted-imports, missing-required-import "D", # pydocstyle "B", # flake8-bugbear "UP", # pyupgrade "YTT", # flake8-2020 "ASYNC1", # flake8-trio "BLE", # flake8-blind-except "B", # flake8-bugbear "A", # flake8-builtins "COM", # flake8-commas # "CPY", # flake8-copyright --preview "C4", # flake8-comprehensions "DTZ", # flake8-datetimez "T10", # flake8-debugger "DJ", # flake8-django "EXE", # flake8-executable "FA", # flake8-future-annotations "ISC", # flake8-implicit-str-concat "ICN", # flake8-import-conventions "G", # flake8-logging-format "INP", # flake8-no-pep420 "PIE", # flake8-pie "T20", # flake8-print "PYI", # flake8-pyi "PT", # flake8-pytest-style "Q", # flake8-quotes "RSE", # flake8-raise "RET", # flake8-return "SLF", # flake8-self "SLOT", # flake8-slots "SIM", # flake8-simplify "TID", # flake8-tidy-imports "TCH", # flake8-type-checking "INT", # flake8-gettext "ARG", # flake8-unused-arguments # "PTH", # flake8-use-pathlib "PD", # pandas-vet "PGH", # pygrep-hooks "PL", # Pylint # "TRY", # tryceratops, they all sound BS # "FLY", # flynt "NPY", # NumPy-specific rules "AIR", # Airflow "PERF", # Perflint "FURB", # refurb --preview "LOG", # flake8-logging "RUF", # Ruff-specific rules ] ignore = [ "ANN003", # Missing type annotation for `**kwargs`, 15 remaining "PLR0917", # Too many positional arguments (x/5) "PLR0914", # Too many local variables (x/15) "ANN401", # Dynamically typed expressions (typing.Any) are disallowed in ... "PYI042", # Type alias `...` should be CamelCase FIXME "RUF039", # First argument to `re.compile()` is not raw string "FURB189", # FURB189 Subclassing `dict` can be error prone, use `collections.UserDict` instead # FURB189 Subclassing `str` can be error prone, use `collections.UserStr` instead "COM812", # Trailing comma missing "SLF", # Private member accessed "PYI034", # py3.11: `__iadd__` methods in classes like `SqEntryList` usually return `self` at runtime "DTZ001", # The use of `datetime.datetime()` without `tzinfo` argument is not allowed "DTZ005", # The use of `datetime.datetime.now()` without `tz` argument is not allowed "PGH003", # Use specific rule codes when ignoring type issues "PLR0915", # Too many statements "PLR0911", # Too many return statements (x > 6) "PLR2004", # Magic value used in comparison, consider replacing `...` with a constant variable "FURB166", # Use of `int` with explicit `base=16` after removing prefix "FURB103", # `open` and `write` should be replaced by `Path(... "PLC0415", # `import` should be at the top-level of a file "PLW0603", # Using the global statement to update `mockLog` is discouraged "PT027", # Use `pytest.raises` instead of unittest-style `assertRaises`, why? "PD011", # Use `.to_numpy()` instead of `.values`, WTF? "ICN001", # `tkinter` should be imported as `tk`, WTF? "RUF005", # Consider `[*_list, x]` instead of concatenation "PT009", # Use a regular `assert` instead of unittest-style `assertEqual`, why? "RUF012", # Mutable class attributes should be annotated with `typing.ClassVar` "BLE001", # Do not catch blind exception: `Exception` "G004", # Logging statement uses f-string, WTF? "TRY400", # Use `logging.exception` instead of `logging.error` "TRY003", # Avoid specifying long messages outside the exception class, ??? "RUF100", # Unused `noqa` directive (non-enabled: ...) "FURB101", # `open` and `read` should be replaced by `Path(rootConfJsonFile).read_text()` "B019", # Use of `functools.lru_cache` or `functools.cache` on methods can lead to memory leaks "D100", # Missing docstring in public module "D101", # Missing docstring in public class "D102", # Missing docstring in public method "D103", # Missing docstring in public function "D104", # Missing docstring in public package "D105", # Missing docstring in magic method "D107", # Missing docstring in `__init__` "D205", # 1 blank line required between summary line and description "D206", # Docstring should be indented with spaces, not tabs "D211", # (Do not enable) no-blank-line-before-class "D212", # multi-line-summary-first-line, conflicts with D213:multi-line-summary-second-line "D401", # First line of docstring should be in imperative mood "D417", # Missing argument descriptions in the docstring "E402", # Module level import not at top of file "E721", # Do not compare types, use `isinstance()` "SIM105", # Use contextlib.suppress({exception}) instead of try-except-pass "SIM117", # Use a single with statement with multiple contexts... "UP009", # UTF-8 encoding declaration is unnecessary "UP037", # Remove quotes from type annotation "SIM115", # Use context handler for opening files "W191", # Indentation contains tabs ] # Allow autofix for all enabled rules (when `--fix`) is provided. unfixable = [] # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" mccabe.max-complexity = 13 # Unlike Flake8, default to a complexity level of 10. [tool.ruff.lint.per-file-ignores] "pyglossary/ui/termcolors.py" = [ "PYI024", # Use `typing.NamedTuple` instead of `collections.namedtuple` ] "pyglossary/sort_modules/random.py" = [ "A005", # Module `random` shadows a Python standard-library module ] "pyglossary/glossary.py" = ["ANN"] "*_types.py" = [ "TC003", # Move standard library import `...` into a type-checking block ] "pyglossary/plugins/**/*.py" = [ "PLR0904", # Too many public methods "C90", # mccabe: C901: {name} is too complex ({complexity}) ] "slob.py" = [ "C901", # `...` is too complex (14 > 13) "ANN", ] "html_dir.py" = [ "C901", # `...` is too complex ] "zimfile.py" = [ "C901", # `...` is too complex ] "pyglossary/plugins/babylon_bgl/*.py" = [ "C901", # `...` is too complex ] "pyglossary/html_utils.py" = ["RUF003"] "persian_utils.py" = ["RUF001"] "ru.py" = ["RUF001", "RUF003"] "pyglossary/plugins/dikt_json/*.py" = ["RUF003"] "pyglossary/plugin_lib/*.py" = [ "ANN", "PT018", # Assertion should be broken down into multiple parts "D", "RUF015", # Prefer `next(zip(*_list, strict=False))` over single element slice "PLR2004", # Magic value used in comparison, consider replacing `...` with a constant variable ] "scripts/wiki-formats.py" = ["E501"] "pyglossary/io_utils.py" = ["ANN"] "pyglossary/plugins/babylon_bgl/reader_debug.py" = ["ANN", "FURB"] "pyglossary/ui/ui_tk.py" = ["ANN"] "pyglossary/ui/**/*.py" = [ "T201", "PERF203", "PLR0904", # Too many public methods "PLR0912", # Too many branches "PLR0915", # Too many statements "PLR6301", # Method `...` could be a function, class method, or static method "C90", # mccabe: C901: {name} is too complex ({complexity}) ] "tests/*.py" = [ "ANN", "T201", "PLR0915", # Too many statements "PLR6301", # Method `...` could be a function, class method, or static method "E501", # Line too long ] "*_test.py" = [ "ANN", "T201", "RUF001", # String contains ambiguous ... (ARABIC LETTER ...). Did you mean `l` ... "PLR0904", # Too many public methods ] "test.py" = ["ANN", "T201"] "scripts/*.py" = ["ANN", "T201", "INP001"] "scripts/*/*.py" = ["ANN", "T201", "INP001"] "doc/lib-examples/*.py" = ["ANN", "INP"] [tool.mypy] exclude = [ # '.*/plugin_lib/.*', ] [tool.refurb] ignore = [ 117, # Replace `open(filename, "rb")` with `filename.open("rb")` 184, # Assignment statement should be chained 101, # Replace `with open(x, ...) as f: y = f.read()` with `y = Path(x).read_bytes()` 103, # Replace `with open(x, ...) as f: f.write(y)` with `Path(x).write_bytes(y)` 104, # Replace `os.getcwd()` with `Path.cwd()` 107, # Replace `try: ... except OSError: pass` with `with suppress(OSError): ...` 141, # Replace `os.path.exists(x)` with `Path(x).exists()` 144, # Replace `os.remove(x)` with `Path(x).unlink()` 146, # Replace `os.path.isfile(x)` with `Path(x).is_file()` 150, # Replace `os.makedirs(x)` with `Path(x).mkdir(parents=True)` 155, # Replace `os.path.getsize(x)` with `Path(x).stat().st_size` ] # refurb has no exclude param! #load = ["some_module"] #quiet = true [tool.pylint.messages_control] max-line-length = 88 disable = [ "no-member", "no-name-in-module", "missing-module-docstring", "bad-indentation", "invalid-name", "logging-fstring-interpolation", "too-many-arguments", "broad-exception-caught", "missing-function-docstring", "unused-argument", "import-outside-toplevel", "missing-class-docstring", "too-many-instance-attributes", "fixme", "redefined-builtin", "pointless-statement", "abstract-method", "unidiomatic-typecheck", "attribute-defined-outside-init", "unspecified-encoding", "super-init-not-called", "redefined-slots-in-subclass", "redefined-outer-name", "wrong-import-position", "too-few-public-methods", "too-many-lines", "too-many-public-methods", "too-many-statements", "too-many-locals", "too-many-branches", "too-many-return-statements", "unused-import", "import-error", "protected-access", "consider-using-with", "disallowed-name", "useless-return", "method-cache-max-size-none", "global-statement", "R0801", # Similar lines in 2 files "ungrouped-imports", # C0412: Imports from package pyglossary are not grouped "inconsistent-return-statements", # R1710: Either all return statements in a function should return an expression, or none of them should "too-many-ancestors", # R0901: Too many ancestors ] [tool.pylint.master] ignore-paths = [ "^pyglossary/reverse.py$", "^pyglossary/ui/progressbar/.*", "^pyglossary/ui/ui_qt.py$", "^pyglossary/ui/wcwidth/", ] [tool.vulture] exclude = [ "build/", "tests/", "*_test.py", "test.py", "pyglossary/ui/", "*_types.py", "pyglossary/ui_type.py", "pyglossary/reverse.py", "doc/lib-examples/", "pyglossary/plugin_lib/", ] # ignore_decorators = ["@require_*"] ignore_names = [ "_*", "Generator", "GLOSSARY_API_VERSION", # "Iterable", "AnyStr", # "RawEntryType", # "EntryListType", ] make_whitelist = true min_confidence = 60 # paths = [] sort_by_size = false verbose = false [tool.import-analyzer] exclude = ["pyglossary/ui/wcwidth/", "build/"] [tool.pyright] pythonVersion = "3.10" pythonPlatform = "Linux" reportMissingImports = "error" reportMissingTypeStubs = false exclude = [ "pyglossary/slob.py", "setup.py", "whitelist.py", # for vulture # "pyglossary/ui/gtk4_utils/*", # "pyglossary/ui/gtk3_utils/*", "pyglossary/plugins/babylon_bgl/bgl_gzip.py", "pyglossary/plugins/testformat.py", # "pyglossary/plugin_lib/*", "pyglossary/ui/gtk*_utils/__init__.py", "pyglossary/ui/ui_qt.py", "pyglossary/ui/progressbar/", "pyglossary/reverse.py", "wcwidth*", ".direnv", ".eggs", ".git", ".mypy_cache", ".nox", ".pants.d", ".ruff_cache", ".tox", ".venv", "__pypackages__", "_build", "buck-out", "build", "dist", "venv", ] [tool.fixit] disable = ["fixit.rules.no_namedtuple"] [[tool.fixit.overrides]] path = "pyglossary/reverse.py" disable = ["fixit.rules"] [[tool.fixit.overrides]] path = "pyglossary/ui/wcwidth/" disable = ["fixit.rules"] [[tool.fixit.overrides]] path = "pyglossary/ui/progressbar/" disable = ["fixit.rules"] [build-system] requires = [ "setuptools", # min version? # "setuptools_scm[toml] >= 4, <6", # "setuptools_scm_git_archive", # "wheel >= 0.29.0", ] build-backend = "setuptools.build_meta" [project] name = "pyglossary" version = "5.0.9" description = "A tool for converting dictionary files aka glossaries." readme = "README.md" authors = [{ name = "Saeed Rasooli", email = "saeed.gnu@gmail.com" }] license = { text = "GPLv3+" } keywords = ["dictionary", "glossary"] classifiers = [ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", "Operating System :: OS Independent", "Typing :: Typed", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] requires-python = ">= 3.10" dependencies = [] [project.optional-dependencies] all = ["PyICU", "lxml", "beautifulsoup4"] [project.urls] Homepage = "https://github.com/ilius/pyglossary" Issues = "https://github.com/ilius/pyglossary/issues" Changelog = "https://github.com/ilius/pyglossary/tree/master/doc/releases" pyglossary-5.0.9/res/000077500000000000000000000000001476751035500145475ustar00rootroot00000000000000pyglossary-5.0.9/res/author-22.png000066400000000000000000000017271476751035500170070ustar00rootroot00000000000000PNG  IHDRĴl;sBIT|d pHYs9tEXtSoftwarewww.inkscape.org<TIDAT8]h[eo6YI:!ŭMaPTd+.vDQ7٨_ln8:dq5M$&&=qгsw#@ i:sssd2vHp$b,p@H)3VPM=g⤂tQNzcxx VAQ!V=%~$mW_j|# q[= sn\0śM^ ^U7-\m亣[zy!S\rw'-tvyn0C;H ǿ_.*/ݙ3껏bB Ã<|唚bvf?a3j:j:o,:M 1k>| ܦRv쎎j7!>, '8s/𘏸]]SXs'[D_/_#]y_Zw,tpw σadx/W<s[Ztm}]3 nY %p\Tp\rvUz;șSj1Կ}rȡdn( //`k1YkY[ZOCRt *ݣ{T~d`n^*t=>YknݴqY\ KcEcF~̽ TxwvC(h*,G&47!.[ ^?U)ijR(8b-f>y4 E0Eg wv9E]4íDJ--5`! BQ {Y65x?qRVW#1~?>hUQTHd lFJ|4rrhӶ dT$ab(YnM?|Vqw'w^p1psIQ$ .,xa~z?[_=xƾL- +.u{>|+LՃ(1]AL^v7ךӋ`.Ky~ˇO>=@ь!6N2H1X\MF] ێ=vR ֶ2!`ߢhk_hR F澥GM]s¿FY 9JWuA}'Obb```i{C=/sUȴe`##Ed(z%0^A])?R' 5gh6:4(6iR300|ytKB7EIENDB`pyglossary-5.0.9/res/pyglossary-256x256.png000066400000000000000000000651571476751035500204460ustar00rootroot00000000000000PNG  IHDR\rf pHYsetEXtSoftwarewww.inkscape.org< IDATxyl}&ګ#ieQ-ْeYg2OA8@IA0AI1I1Y-KD"%q_߾{ǹ֩U{uBuݺuVu};RS&SGSpS<88SpS, À!P(*4I {0ȈTm5w^^QQĩ8őT˧gd]R&crdf%(]VJHy|Dž`+h?G;8cSAHᧉ>Raq/|.]ӠK d D&ɐ9.<+7FJ8)Z8|jHޗ@爦]L}104$A:Q BLJl/ [.b<&1/ᗯ/f}$KU.w ?qé8Ş񩅧fd~>*jF&S,JC7LD @24"C9<ہz}Δ<;yL]d 9HYA)io$*B %̓&N]SlRYUWԲBX,IP@&QJ̌9.\ہ帛֕d*&abqKfe,C$hBH:J)lFoo Ʀ1W#C S /E~=jI-c,&2)U @(dJARr:}.+2 SUL,͡n)]QMdm# Þ2 2 ~?*55Db8+,@/ܷuR*1KSű1R,itIL)P%FUqL,a&gQYe%;.Kt2n,Žv d).x8)@|O9oQ 3L.S\*CW5( %B@98\ˆk]5tFgfQ@q MK4{!lH1Y @.KQR J)PJ0U @ST3E.U٫?8d ucF6`L2K(Ǡ*T"A(@O.\ˆձ]3 P] * 3ȏAQg7 h `YD#IlP}!zE "K<|!Ne3Hj)z3 NP&ϩ1q|RCӠ@E edwCcmlgӺzDivgf1~f)&*PU5MLLzNdnBvJ)dYF6a" þZ=Mv$I(x٧^! mAAπRN@SpL!]3z.`̍㨔+P@)D !܉#{\,gP@aD$:c#yn:O4d2M! HN9M~~1(g>_$B@[PeoXTG>q_e10rc_R&0$B )t,d7KTf16?i&as *4MKx.8ك[&M !!$ǬDvBH5! k3"sӨ[LXQM@A'{/d 9:1ju2J2HB 4pLݳ& (N8=#c&ZcYy0 DDQ0 ]׹ΏEQQd17E)M !x߅=QCP-n3݁ կ~U_x|jSF1f\NR@TT(OA|mvӗ$0]ؙiQCq 3MH͵e,u'I5;a.VhqQPVǻ޸Zs!S dVj,@aBM"=8o<cF.3e$d20B ⅀ذ[f2STK(LC3$8zBtяڒR)=Iǭ~ٹN=m !\k}Cc"'f09>RlHm꭛?/~@Y }Hvx\ OLM\*CU( Y4^xՆ]Vd&Ks(L ?5M +DH+GVخ/ y&-@}+3^0_o;WkV߀:=dKdu cc( 0 [. emZ"?S:_@]?_hT?RO1BQ=*5JDy>xh\N4{슮0BkyNzU%IJMIȞΧN)(nNgirz cgW!!ˁ5-6z nˆq@DU0=O?cSx/peg>isdi,MLT,2 "@n=c03 cD{k6위bLZ[d-!$qici\Ǎ_~wZmm(+83=)T( Љ Y eîa7ᵙ>@6T<[O_1/Ws-fm5>aKzά+2=;r]!Q Նhjn&Qcla1'Ƒ(#7VwNRBHrLQ.M 7A<=6Gc{u@+_ǝ7c%d׉j 3󘚘db *c|vah1D[+(~o FT!l>ɞ)ly 9EdLRHY::6:fvӳQccQa&|Z$9bpN$;'v+Ġ'j3N4)gX] D#Dg~ 33X(mh܇SoZom~!< T$ڹ\z)[?0y|~b aB B`ъh%Σ8;8rcгfb뺾\;savOA do/6׈濘OX+/+7QorO˘Auj 4IEmnB #ڄh3p"! Qo~O|gQah*$Jd8p-ji4a5Z=]o$!7Uy'*OUs?b^Ӵuݤa1أ&>w{V 8o;7ѫoب:CֱPyL̠:1]y:WWVkYkh1>dwNzqSsנ x7Щ`5Z4[].2rc@a2"MdƋM0v`H8Eu1I:N񂞴PI碨K{1mׁ=x:Ņ`rjr t}}eNk {ѣ !yK'ln/x8ݫe ى1&@n \"ЪBUDr3^(1f~)~Uoyefql7}fr g`ffӓ(W*PX ihZZkE6D@gN_{q 2fd{ܼWS_1䧪V0%IJȜ&:7E/N}tzZB͞Nݥn.]#]|.}G?}k ٳ 8ps8#{m~F}6zN>8x* d^x".^sa~~Jwhi=Hv'9wŽx$dyO$8}}mOMwi/ 8ٯ@,.Mřa|Ju*BǃӲк ֆ@giw46JX  , FGh{]P(… pΟ?9T*\?]b>M+x= UȞ"ٯ6dyg0;7IqG6,é`Ydo# y &!8N$=79yNO>{Oyq<',hjl&odOo>\_z7w@ٳ?sSȗZQ0rо FguFQwWq8l׾5%Hp1@YqzGSNM$vz m-^z7C]RQԅǙi!˵[;h߯ygzN :Qǃ 8pF|0>>/4E.GҹfO7MAVO ~ rp7pp2j,c>X:3FTj &uw`6`o`5ۈ ȋ*JR/Gyl&u^V->j4 %pn$D45.&<״qo&d7pfW4T*Ss]GuzlF!B7q`ݫVoZmyg 5X-MS+|q_пwrX8pK|ׯcnn.E͞6w_O[J7wō7_&P!#%-rU"FvˁuZ{h/o^o%)SD 8{/R˘Dt\?oEv^a&۷quܸq7n;w>2 ? 'yMN BTV~Dž\c-k 4o\ZuOs'<0o} zMAE4fA;w~:n޼Cv>uT*4MDQJq )>q1P/o{Wp6ZwQit>ʼnw@)M`r o+_JZN~39oܸCvɬRL&0y8-w/`"^kZkR?͵?xR >b sR <̦6}D߼ys~rLl6ٶ ˲0 ҏ~ҋkOq\A@( zO_"]I/gP,{ε7p݄|N>G>G6Mz9y$$Fh rS$AfLA ?qX{=I} @>GPHj2ؒtNN':ua6F.Y,g?/Q_a\VVV{.֠i2bBzMӒaccV $ ]N =ԃ8 AB x<ة L>(8{,.]T9L˽w,J|w>|#miAi~1 6Т.0Fc,B X|B4&zc #$ҏRk.DqR& gsn˷tUa[!2eLc6ҤA" Ĉ.$qHRP6FhD?AbDAeG}^/tDr= @`gEB`"Y$K ԽWdvN&]GaE4dQ(<G4 AI\ Q`x@@AؤuRY-zH|i32dM*5n*E B~A="D>> hDGN<(Rr.> :&Z,~ZE .ЇP~ď8#I*d]bh?UȚ u zLix.„|0f{~A xP2C~4'TqJlȉvWM C15s! kjbHkT@0F@oD8Dv[.=lGpA#PS/>4Ӛ|noM0``H|]bPMjրѡLjFg + 2 E|3 ?ADjRd߇jwZm(6V :orwށoB>A'n:hcG񦇍?4#~:.0/$Ih@cf:]M43bv>-`ML)W`py_ˢFQQ[|e'ےĽm7~fv" B 0(I @Um;>"M P|xeRZ슮A1TF 73r#3 hONΕ(! e؅20x"[_GvroBo1iL;lkx{qq,n0<kAZla.0AimߋD~|}_^2ٽր5љ; .)Sʅǐ[_A5n@c %oCϛl KiA-3 IDATxGj Hl6F[R#ivZ]@ꑍ\ahCE䗵8ѡ ,R.2͟5 岨_uϛ2C]D2 Q!W@Ze NYkY,F8\!;8 "ϧne nU$$V+ P'Zt@L~n˚Ō_`7ʹDyr>XPQCodewA"zI@6ĵKpBxi%h~K{I`8H~Ԍ-o(`s0 k!<|6@3pғϜ;y6L 0kYm`.  8 ,t+?H ^sˬzO1ThYz!Y)B/f!pU=~IM7ͧ?-0IZW& I\C'>pRZ#l5\A5C$hZI@~Y 9Y#S)@/2_G.#wkscjVy3r[K9p(B@Fp '8pXwx`>ȿ!I* /(`V T 0 0J9h6.? n3Q|uFz7q84!(£ſ'ia߯hКqТ v i_ fLLr:;ԁP #P5f~ճ1OX#oSN᠅Ix?d?"zR?~-E=TvHr0D^A)Ci!uV߮O\#.(IaOQe ʾb Op'0W#tX%z>Ozt|XK /GV4)oƘڳ͟U}ٱ}s/}&@JbCO'0^x|^'Zhu.G$Q4xyE>yFEOOeL&Z Îr<ۋ4($=)Kl@X1؁޲0 :šA= ~Ә`&H D!04x@Q`x) @`?Ѥ.[ FeIBhSÜ ~B\-g(t_f9y]l70З"= v֞ [1x3HCG='3Q=̭×;_8<f}&bwEVhDQ 8LCn^8LcgcaGqI ~ 1eMMẕ<2V᧖rsp͝O/&b;(}|ˁax^˂+vYn2'ԓ۵I$b7YԬ=7&E*cǰ>wvǟ*UԳxa,Ex>$gSܪ`qYa>Zu8!Hh*?Vˋ|Vk!Tܛ@wvۆ۴5-u״z/S}]Vw*M+nO Y^*25ܹԎ \|Fq q/W$| fPQV¸w $X$x;@` Y(a:o`!7a7-ZN pxA(j|Byyҩ[B@ĕ )|Ebᵙ1,!Ïjcpf}&>[ HRBGaVB <,IPpt_ >X ˏp# .)+bq," $BH2 9eD8c=s蔆RI½'||7qL d?\ 8Wg1Hsp `'$ XE`@x8H53Ic3|\a7a`Zpxm'ƙ~ڒ\Qabp؝ 4bq)V~O7C(e3eC#vM{Nj&6AE>oZ~t`R$,C$$Eg@55+vy"ڕ-9von#&ZV,,5Zcd#O4]o@vLybgQu#!~q 8l)8lS?7sL6 IlB/%qϰ1^9r>;97VZjZzN&."?nſhL ?΋w"1oO"#Bf_!n`'12 n!ag+f[ ɶT]0]  5 І#nяjjO3Ѽ|z9ja?b>րڀ| s &*ī$&!}e\gIm*)Z|.B&0(ؗX $LJiN^#jMkAq, Iv8x)xy0Q7^A07"_q^l^ˎk g]wlلsCAl bj5D!,~8?;L p;A< U},l6q#ɗ ?0_5u.~[ n ,(!X~h)c?msƮ}|ʷ?q2C[\z|2xt0ـ 7&vQQ WL /ꒃ>ݭT(ɺPoLecowY/GPFo#޲mk rے[`{N֐УKf.V|E2JdY,h.d]LN~n&w[m Bh1W Uu X6!cƙ=$D'*x$RZ:cB~@zƾ ?dU]P[qjcknAXeWߠLqeI&`@t, M=U[it@']''`p˲ g-nq)=)2!v+wzهٗ>tP B) bC>xXif>=[ӈ[h?EP?Om'uDNؘ?AjC@lYLù[tf%hSÀ7䬿l} $>2kwTsAA6Ja0[ 텦Uu;`; ɲ*?0=~E?hȆd#$}l# ;K_`4;Q&m8B ? Bmzi)e@{  8J ``Sw? ɲR{yہW6%6#[sB&;|45 >cU`4k|aMn@Ox'Bp+ZD~bcO R1#TPmߟ>ktIV { Brs>cf<(!r/ 5T VO}i!Y~k EFGl@H~ubd8ҙAdKw?60w s7]`xa|&4 a27JNB@~;z+K;AcnA O?y|7u IEAVrLE]rSmKC$F:n]U9 T܍w~\3lmwh7I0=$Avl^x K1!lLpbGm>wS̏񂠴 !X$8.=%#m L`]Y0UCMwN= 5 8ρ J1K];)K7'l9$4}><k.0&t__XxbD^&zT#6͜PѭW~\Y1@>-v8jx`E!ŵxP/Jhq-os3o7d\| hP `]QP ~Aex$DsD nR Yp橵m řgl<;j(IqE- "ICUbڻϠ;[6TZY(=)Q?MIc8<m:%ɲ `]ه@ '(,>]g>yu٪YU ŃLO$H HVSL޿Xb}FN}6 m+ci-  {o J3E /K7X^F M!x7Q@fo}aꛘ!sbC|6iG܎*FA:ڃ`L@AEeh >`Y(̨x=NMn!)2QqWV7QYh 40}#ka\<ɻP%waO NF& @) !B(/;{o@lhHJ%mx07f6c.h݆@v\ H'$O}F~;*}g2>f? hPmB9^xcRq&@_ U]U1P?|[gNO@õ`\Jc3k;xӌI`?zb)>B߈,!7yUQYll:ީIlP<0e˧)}Y0 . *lәP+{.[_Xk9c pQMM'&h$ʏu2l[.> Q=Hf{$x/F#Ù]]SH'Jc.*'C=ɾk1dfbVDǾ`THٰY~{bIlhOkhv_^.|YHW Ps_0TSO/ẳ:kJw  &`$eH۲h;? Hi RK:) @랍矾oe|d\y[ۏ/d dx3gcjls~S0c1 gqKX< BS=|val~yd?!tSkP \Z!H1?D 8qQa3Y,#({,N /&'֑'P3:3fޒmZ>VB[ 炅5m,b߳OP>=02RZp/u,Oe=tK5M۴V<ؔ={FiwelG_:A5b)Cg`30Yh9w\-L.2ЋD]7 No-氍EXOl lmaBE K[ q1poukmf(Z΄V(e2L$UIR#k&"gMC;[z8Pn?Z*Ysga&]ނ^ S)/ ;QBZԌޭ4AaM{^#S-&b<54",iitlj60R.@ @iMω]7FoiaPN`Uu0gvx= `E* 05MwEׇ^wlƹ|vm||8׺"Ëbqt/rGwm IDAT;O<T /`2x7OLy3rLi d~poE䣿M~1N;} uGƁxP'Q?m~ {CzւF{Y!.'2rp,L0xmq"y(`rȌ֎p vnކ8]!w@X T>-Z- <PW)Wqj ձt~,~-_wp(<[{P64(\@hʮ8d9 ɺ Pe 9F1 ĈYO~Rßo(lsZFNݘs:|kc9@)<?k s0GPUaZzl*{I 0 V| o=.J+/Qk0-f O8jD-KU}'=_^UNynmQ̲bWQ,=[Q h _5cle>P2dz`@W ``^" ) ۅ zQ0ʦ# wSi糉5AYG- ~j+wVߨ[Yz-qkY,WnǝqLBJ%+6k|hyӲ3l]{Y{mvڰ7Zp-ƣ/y*CU&t:aA@]XcCv0nLxwMtxOA/KyV mT\i7 I ˰َmOӒQO {N|1:WM=6xO5#ym~":D۲۵ܦ6by~m0`@Ҁn\ A!UFRVyV s vb i.1D&^2TH;ךF>ÿQCۯhcdsa&ݮ;(6w@0{ǔŦbH/EWQ>C1D]w_mXpj-M$3󐅀MQqA$X yMwLQ2&,N"<%| 3 4g4gqO;lԓY<z1+vu-T%]r 0yqoؔ^îٰkm--8⦪SP ݂p ]A?*t]Gwz h1iWvM|gwo##^]]x&֯eZA`T,/%5%Cq6D匇]lhx:k_kZk^oi0?)8$LH ?G,Jѥ~|Lӄ}&dYFT Vz_` (EeBL_jcR+y_G fZ597@/f1B 7a#7CNz=]N͂тDgO,hL( W!U P)]L]a~EQzn9%Bk > =L0al-tgz'5 )Dг!tpd[(+/pf53w37^!Q%~;!1Y5kLz`nY ޵d(Z# [Q݆¤LكQDp|=FzJ:Z;)0 Q_N0g \KWa78(0c(P q,:pZ v7~[ksJZ3; ęcPJm`xZ qHv񽒒`m%Hc7r`1~0&4"~e]-tVh߯sk"-D~pQ l-DpKYݒ)H: XFTjVpUla5=Z(κ/иkY[op2S8^cG]h3 1tIn?# LwKqSP0u`nulX(]6&71 {4U~-de']}ַf}Gq> $B - x0{EQVT ^voҞb ޝ7L 2]AFa*mL_jCˌ j*5q=t,VWgmZ]۲<pzfU5 0xx 01Y5= p¨AT>Lcg ¨! X%{X Sw@#m"Ղq5;pF'&~I)gg 0%hMֆ E6?$d & qt,(r8))>4Lrș]sNUtuUwtOn}FWw[]we#t>BD*pĤ!v}sN~3wOo`J F}: \Z3RK _νM$y#e<՗0| _%u~ uD H@{K`Gi/ؑ$w} VsGgxdfr4YO~(;Pg;.7\z\u() IkJAFreՅqQuJKت@?/߫} "#6*=I9MF3ۀ2481k/4]7:j:%x !0<9< IG! D3.^vn.\\)}pRdhդz0kHP2$/96^'7^ GB>T$˵ ÁQU| !ç'ȅKpZ]>ɶbq18>C8zHlʶEӎNѼ3yawBXכ'0u3`abSflB͟7Fo~7_iA/ /@_0l ;P+>`v@$N!##v9uap 壸t73|{K|]ˍߞ/^d=e!|ց Ŷ@ȁ%?z"^o*%n `p]=#NTѻjD]/|E!-UVX! <'m!A"(g0Mss6|G`Z;Vn̫)0Pu{߹䑀m_"(Ե(-{pjZtݼg|~'o~Sv闹 y$`OPTmĿy: ,w ^tl-pCw\s {b`@v^'{Y$>]8/ܬF!Zu*!<>tz[/--DoM )H@)œu l/ivFl~9{CZi~sU=gVo;XTGkkt*dB1Ոzr_^'Dc⛐4QDKim֌JoT햗`@#MӅ@;$ c!Jd²d]zY_+~>7U4sGz9$<&ǤALb@b'FohQ6|{K`VeD~빮9m™4$ "a@B?UV믳0& /G_^ 3lZʷ7Kj844JbA]&yc޽qAV~`X9%XkbJ7YtB+Sq?:Y_MHt="^IF!2JH@k@ k84xT1LE!iq wPY(0 1Mv*ɣFуBTŹ[l*[X#>.Ru[ECu hc8mo.(ZEP%$OFL(.f9ƿ%œ"By68zS]{~ p8^1 h:ȳ7Ǟ.$Qi&"*" -BA@J߮zlx</ =89v h:7TN~c$U:O hp0ya2 Te{ltX>%xOj`qfY[ ?.`ph6$8eUwu:̊9'7 'DPig[ 03!nJAhoV|[_7[!DA@qEy(G `0~5꽂Fsx?{BHHG.CxK1/^׿dLR_%ʼNpW10\]]A0K)qI)-4NXS"Rb1y!vp^$y-۾A@럦)RJ42\;y @aayU'X 11#)c7I\g6W"x>Hk}0C&7Z)%S !fW78777e={qH۶K3n|htپ]S B,yZ'۾g%#W_k~vƈm~q @)55/Nk0hIeXszF֍$|eOF=p^U?ᶯ8X@)U6 MŇ"ccAhEP"(VDc)cHB=~%響6pRꟗҐi1j:lhNgʰjo? Rd?Q6 Q7/  {ofj\gu7C7773RN2C`o.-bv_#ZDyr&@kM+3; ZUᷪXDF.Crp^ Pyh6pLH)2 -:J `H&`t{tyRl @k}%8j|Y~~$hĝ;wzDQ4cf_vLkͥG2UǤBF~m_8x_4) y6@C,@% HED]F>*aNbBG.PG۾VgIN3SL| ("\ 9DQDӍ6 (s"8vq{my6v㏣(O~lU?c:U /Zk8HrgghZꚐ*SۈӿIL/pO4gkvhnW8>>&ok gc?CR׈TOxwq~tBo%^Lƭ!Q[DW n':) Xe8LX;E_|OSq!EMEumBgyYeh遛.X{bb$n|܏\0VoQ[EZWq^T 720nr5;6Y'/^U[4­"zSmJ $AJ9Ee! $ޥ,"f:;}[;Z"n#i^J1e`^d0}94Le>n3aoFFFD}>r6d1\,S'J#CH@@(((555,,,""""""-+/EFFFFFFFFD}=r5c2\,R(I&LC 222:::000%%%***6)AFFFFFFFFFFFFFFC|;m3_0Y+Q&Gz#232>>>@@@444(((!!!###2229OFFFFFFFFFFFFFFFFFFFBz;m5b1\,Q'Hn&K"d KKKDDD777***$$$)))<<<:\FFFFFFFFFFFFFFFFFFFFFFFEBy9j3^.V*M"Ba#:&&&NNNBBB777+++'''000EEE>mFFFFFFFFFFFFFFFFFFFFFFFFFFFFE?v6c,R,,,LLL@@@333+++&&&555DCEEFFFFFGFFFFFFFFFFFFFFFFFFFFFFFFF=q333III<<<111+++###888?4IFFFFGO WSIGFFFFFFFFFFFFFFFFFFFFFFF(:::EEE:::...+++&&&;;;@&UFFFGR^ǣc(IFFFFFFFFFFFFFFFFFFFFFFF\???CCC777,,,"""(((===;\FFFJh/ٳԟϣXN FFFFFFFFFFFFFFFFFFFFFFEEEE@@@555*** +++@@@<hFFFM |JƩԟѢ\O FFFFFFFFFFFFFFFFFFFFFFGh###HHH>>>333(((...>>>ByFFFO bҡԟŪ{IM FFFFFFFFFFFFFFFFFFFFFFF>(((FFF;;;000---1116/%%%@@@666,,,,,,,,,'1FFFGWĠԟԟΩ] GFFFFFFFFFFFFFFFFFFFFFF@K***>>>444***+++ ///+@FFFId*ײԟԟTFFFFFFFFFFFFFFFFFFFFFFF ///;;;222(((""" 1118 ^FFFLuAԟԟmPFFGGGFFFFFFFFFFFFFFFFFF555999000%%%"""333=mFFFN YΥԟʧRRO RV\ZURM IGFFFFFFFFFFFFE999777---$$$$$$.,/D~FFFQxԟԟ佮}Lq;[}Pd)QHGFFFFFFFFFFFi ===444+++&&&&&& 'FFFGVԟԟضaa%LGFFFFFFFFFF>B 999111((())) (((+FFFHb'ӯԟԟo8N GFFFFFFFF@|$$$666...%%%%%% ***, HFFFKr=侮Ԡt?M GFFFFFFF '''222***### +++;hFFFN T˦gn7YTU] vB}h/JFFFFFFF+++///(((  (()BzFFFQpۿo8RJGFFGLW}L~WGFFFFFG...,,,%%% "FFFGUg-LGFFFFFFGQwDwCLFFFFFFi9000)))"""  FFFHb&t?M GFFFFFFFFGR^SGFFFFE?w---&&&   -FFFM zG~UGFFFFFFFFFFHb&a%HFFFFI)))### 2WFFFQaN FFFFFFFFFFFFRpuALFFFF&&&  !!!@uFFFRKFFFFFFFFFFFFN S½PM FFFE ### FFFFR{d)HFFFFFFFFFFFFM WԟỲXN FFFG. !!!!!!  FFFFRɲg-IFFFFFFFFFFFFM àԟԟԟ~N FFFFjr""" FFFFPhӠԟ㽯wCLFFFFFFFFFFFFPqҠԟѡޱLFFFE? "8FFFFM NǨԟϣjRGFFFFFFFFFFG[ԟԟհg-IFFFI=mFFFFIf,ҭԟԟʧj1JFFFFFFFFFGO O濮ԟԟUGFFF EFFFFGSyҡԟΣz] IFFFFFFFGM l4ĠԟԟīPM FFFE# FFFFFFJm6ҭԟԟƩra%M GFFFGHQn7ϣԟԟ\HFFFGm  FFFFFFGR^Ūԟԟʧ|J\RPQTc([ȤѢԟԟҬt?LFFFFFj  FFFFFFFGZqǨԟԟӠ侰ġyhk̨ȧԟԟԟܶQQGFFFFH@  *JFFFFFFFFH[j⼮ԟԟԟԟԟӠҡԟԟԟԟҡϫPSGFFFFFI  C{FFFFFFFFFHU}LŪԟԟԟԟԟԟԟӠ㽮o9QGFFFFFF FFFFFFFFFFFGN ^!OΩݷ佮⽯۵ƢsvBWKGFFFFFFEf  FFFFFFFFFFFFFHM S] h/o9n7f,ZQLGFFFFFFFFFFFFFFFFFFFFFFFFFGIJJIGFFFFFFFFFFFFjqqqiiiLLL666!!!#3KFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFH@ׇkkkMMM\BqxY`_xF\IFFFFFFFFFFFFFFFFFFFFFFFFFFFF܎~[HoV:odFnN{VVs>ZHFFFFFFFFFFFFFFFFFFFFFG{888]]]]IpR9k\?xa@lCwGn6VGFFFFFFFFFFFFFFFFF(] 333]]]^JqP5jX9t]9{ftincodvix_KsO4jT4qX4xa6h3a&O FFFFFFFEk1'EM@^gbYnf]wdYvi\|tgy`JsP3iS2pR+uY(b*XLFJRA3)GI?[e[SjbYsaUtfXzre{_JsI,aJ(iL#oI"o5 @G=YdVNd[Ql[On]QshY~qukJ6hN;v,!CH;UcPHbWMhVIjWImfV}rE6[R~|j@gKO3Y=jϾ]&>4ĮIpK?j<s,xz]-$&05+Khm]u3 NUAxyˋox)7zk;/־m4n"P,H4&֎6Z;`4R*'{⭓\x89b5P/ϟbNl} c dDBZHwu5O ͭ-:}i.yl[阮1Ii lb@x_ >5 ĈYة8vS3n_q(- %B'pZ7RJhV@u!M-&f&ё=x&BF kqy2'&?7-z=PuK(f%@uRHY~z!7H ->1J&afZx`1ޢ> O fBq`o,ömhPJ8ib&] =$X_o"%Z5&2JKuA0;znc; *' !6@hdW #_8}Rcy#"=k  r]GF`iN24fxo42 ~WO !j?uxJ䬮9{;`'XV& /k"* hoaZ~if$uv ߡ+ W5TJp/1aT!nǡ~Ӳ닟!{{sK$#&^|C5oSltKAß$0-G狟 {uͅ2hhC)up"4,Z݂ B! ~ #KK/ro:_t/ %nL:NuJzt6C%p&5ozJd̻v uϏ,UVXN'E4t;鏎a&b Dz`3DD뺎iX=] u =MgqB}*'F/x?2LM_ggkx6m(~伦il0I$HfYAPJ%,g/ s4-fUWFAU)q Ii>i#\v_dWQ cOw+}og/ JJɡmەwӠ+p?t_bw u/O,Sw?WEDK@4bX=nQ=:c]a[ M`U}1rp(fp6*uRǔT>$Q%[ͺMT+9zr Ð0 +RiiVy:  rs1biTfܮL4O6屲7ur7۷:@UTAFTC'{u偄ds-Y v*Nhy) 7/QZ̭[WZ8ky J@)(eLQW~!Z{ʙܽ@|c#}xsp%F[@)ATEjl8"Xېp'&9;.BcrGQi#fHy+גWʕuLnj=>Pض]xR`'Y|څOێp;y!n#7VjZލnF`W5^A`F3`ԖBY ;Qq$"0 YU %As/u~'/+vG@=:48NudSSS0 m{5!B7O|}M#`(ig]=I)HCd VeNI/lhKR=S)N cD&Șܡ·( L'Y͞7;y^|W ʽRRBm6LpYDoX"8/ԥM bH>GӴN0 ʵbH>eSML.y\V= +%S:꥗zb+ V,_K\ן{,)xx<ԥm_0f,zX,jayiQ4!w#Ye^)⍗_d,|öm"N}(Api b퐬 %%.k!^[N~o !&qp+38DWl˶ѾbIC0pF{dVw(d2R鏢S?a8s̊A^)mmm躮;sP(< ß)j:u_Wg]l$|fIENDB`pyglossary-5.0.9/res/pyglossary.svg000066400000000000000000000622161476751035500175130ustar00rootroot00000000000000 image/svg+xml pyglossary-5.0.9/res/resize.png000066400000000000000000000132371476751035500165640ustar00rootroot00000000000000PNG  IHDR #ꦷ pHYs7\7\Ǥ vpAg bKGD XIDATxYwTS?(EA (cÊ{C(Hj&( :6HÀRBHBi@;sYoz{>fy: !#l ?!zAuS4q!EDt@L{ʩT\)B@;<ApBv)!ܣrY*xg>ʵ^Ss6860MwY?+cs sb)! 6^l(^aO쏰3R,'nVbb|6#ĝr\q le((zWt"Buc\+cC"3B^ \i^4w&CYZ> u@u[m.M.5f,"8_gs+cpʤFՍz̡@2uWIMֻeY1T߈G${~ ^WzBLpk9bEj }cG3o'j@tdEV>:;Sɺ˕NbwF1v=5)Z@*d;&7s Ɔ[zFKW/~2cO)1P-JбyQ<56ekog?e`͹Fpc7 ~l*5ǯAeԘ|D=۸yNTc=kuȓMQ1陖+@qy%hcd߈U03lWrNޡM ĵ4- >Sf ? 0Uv@y]^{* %T) ȶ9HۿtɖylޜEhO<*ޞ=::)ʚKZvp`a%{ү6dlkM67e즒fWƻ- `aƪ҆{@Q]{}n,yK6Q/M}g3ӇL9UF0BVnoFyQ6lf~> kM;EYSDx]9*vcy*ndk]6\6 Gs cq+7:Z}a Ra  zz{`uG=pS[nuVwa:t-/aLwJгgrw_wb}oZv07km2?nh: مgi!N:22>qt;iCʍܬC|8ozCs07yH ;ؐ`v/F YLbYjl֗20] H|p0DxABc]x!dA(:CK[_ۀRЂyȯA1   ,l{o5Z /g\r [EkӕII<[la۪\)VeF*FHit NAE=`ZZ TgӮs nSrHVgxQi<>wOK} NaE^\$ik{G`G:F̆u;Z"$B,C^$LP:M` H Z36TM@G.@+Ef՞~#DLH~I8?R(fUD4݆B;{3ԃaơfjzTr<*x?0q'n ͐B糃@Śk Hܲ,ɥVfXҳp6=LIan"bel MV]* ij!"f ѓ/mA"`':6@+U꒖vdj8Qӏ`pkI&:)`48x7!r$ ͽ)@ Ruِ ґQ _mਝEѩcPnh,[ mCDht{#EO6vvcd!bLCD|D FJ,k0m/E?Oޝ =DDg"bwd7PXfd̾c ״ YyoS CEQʝHVcx#;bvʟ5G3ElTJ{(Lt@.DG.` KTX1BPŧ.}2ȋR@}K-@t(|SC'fxoS3 5 V NfG(܅A3kԌX,2y/ڭה:h9#= Gm};P=b10"YqvVXxxtq=i~'\3}|3&]}Oրo>ˮ(Yr_>s:l"w'95P!ҔRd '{W>AI$I:A)ٌP\.YO\x9P^*fBa!ҾNf:%gLGEh\~wL J0Ո4ah}ޣ +/=}+e!|Ԛ_ X$gjO͜{L*\}p)q.!zոQS[MS!YcRt]6ɊxK ۽RvtIxYByH"ihpE ݴ4yPUӛweV'}V~vIr.C~.hME{AWc}`\*N8QO5i;caƑR+郿KQt;Mpn3J2Yh5fĽǰ9R1H5/$r!=%y>y&. ?Bnsݷ8S ;V=-t5T'}a7c~ܸF衖G[FZ~ꩋfh7\}?S ]3 ;rLZC:pb܈ ڋeh+Z&vQuJ ൡ*t׊@^w!߂bt:?z=o4݇W`sЭVmo5,x3xqY˸ ghUmݒ0-Sh⩐"=,`/³l_sr}J| ۤ\O;P?*lzz#qHewy+<8i8 E򰦸To>6Z|9qgbSU|f$hO ;[~Y(1:~`׫[ ?(Mp"v tyMMbǣ %;3ಕ1'" KFfV,H7yl༪\ЄĮrљ_IK*5tj-'arJB^ 5sXS~['[:Ȣsu)3k_SSg9ߦi8.bOaw{lRHfC rgA8u8qho6]vސj_Fߌ^6=\e^pF#!MI/%8㵄vJnYB=ڻ²\=nFD+ZbG~\)|ן*i>?0U*=i#ni)K\"ڨ{*dNlg7޸Ѿ#}S hXlNś)wY7x]FG i5.T# )h 88td.N˱@C0! qR)cg770+'T*%y%%`h}b<5J@Á5P^OQxߐ/0FGgc;lt_}t}HM':)!pL8pMKj*u.`B\o7/Q}h_`xÞ4\6" 1=6z:xFhn Q"zTXtSoftwarex+//.NN,H/J6XS\IENDB`pyglossary-5.0.9/res/resources.xml000066400000000000000000000025601476751035500173060ustar00rootroot00000000000000 Saeed Rasooli GNU GPLv2 Created based on QStarDict logo and Python logo. Python logo is a trademark of Python Software Foundation: https://www.python.org/psf/trademarks/ Saeed Rasooli GNU GPLv2 pyglossary.svg Saeed Rasooli GNU GPLv2 pyglossary.svg Oxygen Icons GNU LGPL v3 scalable/status/dialog-information.svg Oxygen Icons GNU LGPL v3 https://github.com/kamalx/oxygen-icons-svg/blob/master/icons/actions/transform-scale.svg www.svgrepo.com Public Domain https://www.svgrepo.com/svg/24604/user www.svgrepo.com Public Domain https://www.svgrepo.com/svg/112407/license pyglossary-5.0.9/run-with-docker.sh000077500000000000000000000030131476751035500173340ustar00rootroot00000000000000#!/bin/bash set -e function shouldBuild() { imageName=$1 if [ "$REBUILD" = 1 ]; then return 0 fi imageCreated=$(docker inspect -f '{{ .Created }}' "$imageName" 2>/dev/null) if [ -z "$imageCreated" ]; then return 0 fi imageAge=$(($(/bin/date +%s) - $(/bin/date +%s -d "$imageCreated"))) if [ -z "$imageAge" ]; then return 0 fi echo "Existing $imageName image is $imageAge seconds old" if [[ "$imageAge" -gt 604800 ]]; then # more than a week old return 0 fi return 1 } cd $(dirname "$0") if [ -n "$1" ]; then version="$1" else version=$(./scripts/version) fi echo "PyGlossary version: $version" set -x #./scripts/create-conf-dir.py if shouldBuild "pyglossary:$version"; then docker build . -f Dockerfile -t "pyglossary:$version" -t pyglossary:latest fi #cacheDir="$HOME/.cache/minideb" #mkdir -p "$cacheDir/var_cache" #mkdir -p "$cacheDir/usr_local_lib" #echo "Docker's cache is being stored in $cacheDir" #docker run -it \ # --volume $cacheDir/var_cache:/var/cache \ # --volume $cacheDir/usr_local_lib:/usr/local/lib \ # --volume $HOME:/root/ \ # pyglossary:$version \ # bash -c '/opt/pyglossary/scripts/docker-deb-setup.sh; python3 /opt/pyglossary/main.py --cmd' # /opt/pyglossary/scripts/docker-deb-setup.sh #imageId=$(docker images -q pyglossary:$version) #docker commit $imageId pyglossary:$version || true # FIXME: gives error: container not found docker run -it \ --user "$(id -u):$(id -g)" \ --volume "$HOME:/home/$USER" \ --env "HOME=/home/$USER" \ --workdir "/home/$USER" \ "pyglossary:$version" pyglossary-5.0.9/scripts/000077500000000000000000000000001476751035500154455ustar00rootroot00000000000000pyglossary-5.0.9/scripts/appledict/000077500000000000000000000000001476751035500174125ustar00rootroot00000000000000pyglossary-5.0.9/scripts/appledict/fix-css.py000066400000000000000000000007741476751035500213500ustar00rootroot00000000000000import sys from os.path import dirname, realpath, splitext sys.path.insert(0, dirname(dirname(dirname(realpath(__file__))))) from pyglossary.apple_utils import substituteAppleCSS for fpath in sys.argv[1:]: if fpath.endswith("-fixed.css"): continue fpathNoExt, _ = splitext(fpath) fpathNew = fpathNoExt + "-fixed.css" with open(fpath, "rb") as _file: text = _file.read() text = substituteAppleCSS(text) with open(fpathNew, "wb") as _file: _file.write(text) print("Created", fpathNew) print() pyglossary-5.0.9/scripts/autofix-plugin-types000066400000000000000000000014551476751035500215120ustar00rootroot00000000000000#!/bin/bash set -e #sed -i -E 's/(def open\(.*\)):$/\1 -> None:/g' "$@" #sed -i -E 's/(def close\(.*\)):$/\1 -> None:/g' "$@" #sed -i -E 's/(def clear\(.*\)):$/\1 -> None:/g' "$@" #sed -i -E 's/(def _clear\(.*\)):$/\1 -> None:/g' "$@" #sed -i -E 's/(def finish\(.*\)):$/\1 -> None:/g' "$@" #sed -i -E 's/(def __len__\(.*\)):$/\1 -> int:/g' "$@" #sed -i -E 's/(def __iter__\(.*\)):$/\1 -> "Iterator[EntryType]":/g' "$@" #sed -i -E 's/(def write.*\)):$/\1 -> None:/g' "$@" sed -i -E 's/(def is.*\)):$/\1 -> bool:/g' "$@" #sed -i -E 's/(def setMetadata\(.*\)):$/\1 -> None:/g' "$@" #sed -i -E 's/(def .* filename)([^:])/\1: str\2/g' "$@" #sed -i -E 's/(def .*, glos)([^:])/\1: "GlossaryType"\2/g' "$@" #sed -i -E 's/(def .*header)([,)=])/\1: str\2/g' "$@" #sed -i -E 's/(def .* word)([^:])/\1: str\2/g' "$@" pyglossary-5.0.9/scripts/check-missing-types000077500000000000000000000004321476751035500212600ustar00rootroot00000000000000#!/bin/bash set -e if [ -z "$1" ]; then cd $(dirname "$0")/.. set . fi if [ -z "$NO_COLOR" ]; then export CLICOLOR_FORCE=1 fi ruff check --fix "$@" ruff check --select ANN "$@" | grep -v 'Missing type annotation for `\*\*' | grep -v _test.py | grep -v _debug.py | less -R pyglossary-5.0.9/scripts/check-style000077500000000000000000000004041476751035500176040ustar00rootroot00000000000000#!/bin/bash set -x if [ -z "$1" ]; then cd $(dirname "$0")/.. || exit set . fi # ruff check "$@" | grep -E --color=always '\:[0-9]+\:' | less -R # no 'grep -P' on *BSD / Mac if [ -z "$NO_COLOR" ]; then export CLICOLOR_FORCE=1 fi ruff check "$@" | less -R pyglossary-5.0.9/scripts/check-style-slow000066400000000000000000000016031476751035500205650ustar00rootroot00000000000000#!/bin/bash IGNORE= function ignore() { IGNORE="$IGNORE,$1" } ignore W191 "indentation contains tabs" #ignore W503 "line break occurred before a binary operator" ignore W504 "line break after binary operator" #ignore E117 "over-indented" #ignore E261 "at least two spaces before inline comment" #ignore E262 "inline comment should start with '# '" #ignore E265 "block comment should start with '# '" ignore E402 "module level import not at top of file" #ignore E702 "multiple statements on one line (semicolon)" #ignore F403 "'from module import *' used; unable to detect undefined names" TOOL="pycodestyle --max-line-length=100" if which flake8; then TOOL=flake8 fi # pycodestyle #if [ -z $2 ] ; then $TOOL "--ignore=$IGNORE" "$@" | grep -v wcwidth | sed -E 's/([0-9]+):([0-9]+)/\1 /g' | sed 's|^./||g' | less -N #else # pycodestyle --select "$@" | grep --color=always -P ':\d*:' 2>&1 #fi pyglossary-5.0.9/scripts/config-doc.py000077500000000000000000000101551476751035500200340ustar00rootroot00000000000000#!/usr/bin/env python import json import re import sys from os.path import abspath, dirname, join from mako.template import Template rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.ui.base import UIBase ui = UIBase() ui.loadConfig(user=False) # ui.configDefDict re_flag = re.compile("(\\s)(--[a-z\\-]+)") template = Template( """${paramsTable} ${"Configuration Files"} ${"-------------------"} The default configuration values are stored in `config.json <./../config.json/>`_ file in source/installation directory. The user configuration file - if exists - will override default configuration values. The location of this file depends on the operating system: - Linux or BSD: ``~/.pyglossary/config.json`` - Mac: ``~/Library/Preferences/PyGlossary/config.json`` - Windows: ``C:\\Users\\USERNAME\\AppData\\Roaming\\PyGlossary\\config.json`` ${"Using as library"} ${"----------------"} When you use PyGlossary as a library, neither of ``config.json`` files are loaded. So if you want to change the config, you should set ``glos.config`` property (which you can do only once for each instance of ``Glossary``). For example: .. code:: python glos = Glossary() glos.config = { "lower": True, } """, ) with open(join(rootDir, "scripts/term-colors.json"), encoding="utf-8") as _file: termColors = json.load(_file) def codeValue(x): s = str(x) if s: return "``" + s + "``" return "" def tableRowSep(width, c="-"): return "+" + c + f"{c}+{c}".join([c * w for w in width]) + c + "+" def renderTable(rows): """rows[0] must be headers.""" colN = len(rows[0]) width = [ max(max(len(line) for line in row[i].split("\n")) for row in rows) for i in range(colN) ] rowSep = tableRowSep(width, "-") headerSep = tableRowSep(width, "=") lines = [rowSep] for rowI, row in enumerate(rows): newRows = [] for colI, cell in enumerate(row): for lineI, line in enumerate(cell.split("\n")): if lineI >= len(newRows): newRows.append([" " * width[colI] for colI in range(colN)]) newRows[lineI][colI] = line.ljust(width[colI], " ") lines += ["| " + " | ".join(row) + " |" for row in newRows] if rowI == 0: lines.append(headerSep) else: lines.append(rowSep) # widthsStr = ", ".join([str(w) for w in width]) # header = f".. table:: my table\n\t:widths: {widthsStr}\n\n" # return header + "\n".join(["\t" + line for line in lines]) return "\n".join(lines) def getCommandFlagsMD(name, opt): if name.startswith("color.enable.cmd."): return "``--no-color``" if not opt.hasFlag: return "" flag = opt.customFlag if not flag: flag = name.replace("_", "-") if opt.falseComment: return f"| ``--{flag}``\n| ``--no-{flag}``" # return f"- ``--{flag}``\n- ``--no-{flag}``" return f"``--{flag}``" def optionComment(name, opt): comment = opt.comment comment = re_flag.sub("\\1``\\2``", comment) if name.startswith("color.cmd."): comment = f"| {comment}\n| See `term-colors.md <./term-colors.md/>`_" return comment # noqa: RET504 def jsonCodeValue(value): # if isinstance(value, str): # return codeValue(value) return codeValue(json.dumps(value)) def defaultOptionValue(name, _opt, images): value = ui.config[name] valueMD = jsonCodeValue(value) if name.startswith("color.cmd."): hex_ = termColors[str(value)].lstrip("#") imageI = f"image{len(images)}" images.append( f".. |{imageI}| image:: https://via.placeholder.com/20/{hex_}/000000?text=+", ) valueMD += f"\n|{imageI}|" return valueMD title = "Configuration Parameters" title += "\n" + len(title) * "-" + "\n" images = [] paramsTable = title + renderTable( [ ( "Name", "Command Flags", "Type", "Default", "Comment", ), ] + [ ( codeValue(name), getCommandFlagsMD(name, opt), opt.typ, defaultOptionValue(name, opt, images), optionComment(name, opt), ) for name, opt in ui.configDefDict.items() if not opt.disabled ], ) text = template.render( codeValue=codeValue, ui=ui, paramsTable=paramsTable, ) text += "\n" for image in images: text += "\n" + image with open(join(rootDir, "doc", "config.rst"), mode="w", encoding="utf-8") as _file: _file.write(text) pyglossary-5.0.9/scripts/create-conf-dir.py000077500000000000000000000003631476751035500207660ustar00rootroot00000000000000#!/usr/bin/env python3 import os import sys from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.core import confDir os.makedirs(confDir, mode=0o755, exist_ok=True) pyglossary-5.0.9/scripts/diff-glossary000077500000000000000000000006201476751035500201420ustar00rootroot00000000000000#!/usr/bin/env bash set -e myPath=$(realpath "$0") myDir1=$(dirname "$myPath") rootDir=$(dirname "$myDir1") # There is a bug in pyenv 'python' script that splits up a (quoted) arguemnt that has spaces # So there is no way of passing a filename with spaces # That's why I changed `python` to `python3` to avoid pyenv PYTHONPATH=$rootDir python3 "$rootDir/pyglossary/ui/tools/diff_glossary.py" "$@" pyglossary-5.0.9/scripts/doc-pypi-links.sh000077500000000000000000000001241476751035500206430ustar00rootroot00000000000000#!/bin/bash grep -roh 'https://pypi.org/project/[^)]*' doc/p/ | sort | uniq --count pyglossary-5.0.9/scripts/docker-deb-setup.sh000077500000000000000000000007051476751035500211430ustar00rootroot00000000000000#!/bin/bash rm /etc/apt/apt.conf.d/docker-clean set -e apt-get update apt-get install --yes python3 apt-get install --yes python3-pip apt-get install --yes python3-lxml apt-get install --yes python3-lzo apt-get install --yes python3-icu apt-get install --yes pkg-config rm /usr/lib/python*/EXTERNALLY-MANAGED pip3 install prompt_toolkit pip3 install beautifulsoup4 pip3 install marisa-trie pip3 install 'libzim>=1.0' pip3 install 'mistune==3.0.1' pyglossary-5.0.9/scripts/dump.py000077500000000000000000000004421476751035500167670ustar00rootroot00000000000000#!/usr/bin/env python3 import sys from pprint import pformat from pyglossary.glossary import Glossary glos = Glossary() glos.read(sys.argv[1]) for entry in glos: print("Words: " + pformat(entry.l_word)) print("Definitions: " + pformat(entry.defis)) print("-------------------------") pyglossary-5.0.9/scripts/entry-filters-doc.py000077500000000000000000000036361476751035500214040ustar00rootroot00000000000000#!/usr/bin/python3 import sys from os.path import abspath, dirname, join from mako.template import Template rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.entry_filters import entryFiltersRules from pyglossary.ui.base import UIBase ui = UIBase() ui.loadConfig(user=False) template = Template( """${entryFiltersTable} """, ) def codeValue(x): s = str(x) if s: return "`" + s + "`" return "" def yesNo(x): if x is True: return "Yes" if x is False: return "No" return "" def renderCell(value): return str(value).replace("\n", "\\n").replace("\t", "\\t") def renderTable(rows): """rows[0] must be headers.""" rows = [[renderCell(cell) for cell in row] for row in rows] width = [max(len(row[i]) for row in rows) for i in range(len(rows[0]))] rows = [ [cell.ljust(width[i], " ") for i, cell in enumerate(row)] for rowI, row in enumerate(rows) ] rows.insert(1, ["-" * colWidth for colWidth in width]) return "\n".join(["| " + " | ".join(row) + " |" for row in rows]) def getCommandFlagsMD(name): if name is None: return "" opt = ui.configDefDict[name] flag = name.replace("_", "-") if opt.falseComment: return f"`--{flag}`
          `--no-{flag}`" return f"`--{flag}`" for configParam, default, filterClass in entryFiltersRules: if configParam is None: continue assert ui.config[configParam] == default assert filterClass.name == configParam entryFiltersTable = "## Entry Filters\n\n" + renderTable( [ ( "Name", "Default Enabled", "Command Flags", "Description", ), ] + [ ( codeValue(filterClass.name), yesNo(bool(default)), getCommandFlagsMD(configParam), filterClass.desc, ) for configParam, default, filterClass in entryFiltersRules ], ) text = template.render( entryFiltersTable=entryFiltersTable, ) with open( join(rootDir, "doc", "entry-filters.md"), mode="w", encoding="utf-8", ) as _file: _file.write(text) pyglossary-5.0.9/scripts/format-code000077500000000000000000000001561476751035500175750ustar00rootroot00000000000000#!/bin/bash ruff format "$@" git diff --name-only | xargs -n1 sed -i -E $'s|^\t+ | |g' ruff check --fix "$@" pyglossary-5.0.9/scripts/gen.sh000077500000000000000000000004041476751035500165530ustar00rootroot00000000000000#!/usr/bin/env bash set -e myDir1=$(dirname "$0") # to handle rename of a plugin: rm $myDir1/../doc/p/*.md || true set -x python "$myDir1/plugin-index.py" python "$myDir1/plugin-doc.py" python "$myDir1/config-doc.py" python "$myDir1/entry-filters-doc.py" pyglossary-5.0.9/scripts/get-unlisted-formats.sh000077500000000000000000000005561476751035500220670ustar00rootroot00000000000000#!/usr/bin/env bash set -e rootDir=$(dirname $(dirname "$0")) mkdir -p "$HOME/.cache/pyglossary/tmp" cd "$HOME/.cache/pyglossary/tmp" grep -oP 'doc/p/.*?\.md' "$rootDir/README.md" | sed 's|.*/||' | sort | uniq >formats-1 grep -oP '/.*?\.md' "$rootDir/doc/p/__index__.md" | sed 's|.*/||' | sort >formats-2 diff formats-1 formats-2 || true rm formats-1 formats-2 pyglossary-5.0.9/scripts/glos-find-bar-words.py000077500000000000000000000011741476751035500216050ustar00rootroot00000000000000#!/usr/bin/python3 import sys from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary import Glossary def hasBar(entry): return any("|" in word for word in entry.l_word) Glossary.init( # usePluginsJson=False, ) for direct in (True, False): print(f"\n-------- {direct=}") glos = Glossary() glos.config = { "enable_alts": True, } glos.read( filename=sys.argv[1], direct=direct, ) for entry in glos: if hasBar(entry): print(f"+++ {entry.l_word!r} -> {entry.defi[:60]}") continue # print(f"--- {entry.l_word!r} -> {entry.defi[:60]}") pyglossary-5.0.9/scripts/mypy-deps.sh000077500000000000000000000000721476751035500177320ustar00rootroot00000000000000python -m pip install types-lxml types-polib types-psutil pyglossary-5.0.9/scripts/plist-to-json.py000077500000000000000000000010241476751035500205410ustar00rootroot00000000000000#!/usr/bin/env python import json import sys import biplist plistPath = sys.argv[1] try: data = biplist.readPlist(plistPath) except (biplist.InvalidPlistException, biplist.NotBinaryPlistException): try: import plistlib with open(plistPath, mode="rb") as plist_file: data = plistlib.loads(plist_file.read()) except Exception as e: raise OSError( "'Info.plist' file is malformed, " f"Please provide 'Contents/' with a correct 'Info.plist'. {e}", ) from e print(json.dumps(data, indent="\t", sort_keys=True)) pyglossary-5.0.9/scripts/plugin-doc.py000077500000000000000000000170641476751035500200730ustar00rootroot00000000000000#!/usr/bin/python3 import sys from os.path import abspath, dirname, join from pathlib import Path import tomllib as toml from mako.template import Template rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.core import userPluginsDir from pyglossary.glossary import Glossary from pyglossary.sort_keys import defaultSortKeyName Glossary.init( # usePluginsJson=False, ) """ Mako template engine: https://docs.makotemplates.org/en/latest/ https://github.com/sqlalchemy/mako https://pypi.org/project/Mako/ Package python3-mako in Debian repos """ template = Template( """${"##"} ${description} ${topTables} % if readDependsLinks and readDependsLinks == writeDependsLinks: ${"### Dependencies for reading and writing"} PyPI Links: ${readDependsLinks} To install, run: ```sh ${readDependsCmd} ``` % else: % if readDependsLinks: ${"### Dependencies for reading"} PyPI Links: ${readDependsLinks} To install, run: ```sh ${readDependsCmd} ``` % endif % if writeDependsLinks: ${"### Dependencies for writing"} PyPI Links: ${writeDependsLinks} To install, run ```sh ${writeDependsCmd} ``` % endif % endif % if extraDocs: % for title, text in extraDocs: ${f"### {title}"} ${text.replace('(./doc/', '(../')} % endfor % endif ${toolsTable} """, ) def codeValue(x): s = str(x) if s: return "`" + s + "`" return "" def yesNo(x): if x is True: return "Yes" if x is False: return "No" return "" def kindEmoji(kind): if not kind: return "" return { "text": "📝", "binary": "🔢", "directory": "📁", "package": "📦", }[kind] def renderLink(title, url): if "(" in title or ")" in title: url = f"<{url}>" title = title.replace("|", "-") return f"[{title}]({url})" def pypiLink(pypiName): urlPath = pypiName.replace("==", "/") urlPath = urlPath.replace(">", "%3E") return renderLink( pypiName.replace("==", " "), f"https://pypi.org/project/{urlPath}", ) def makeDependsDoc(cls): if not (cls and getattr(cls, "depends", None)): return "", "" links = ", ".join([pypiLink(pypiName) for pypiName in cls.depends.values()]) cmd = "pip3 install " + " ".join( cls.depends.values(), ) return links, cmd def sortKeyName(p): value = p.sortKeyName if value: return codeValue(value) return "(" + codeValue(defaultSortKeyName) + ")" def renderCell(value): return str(value).replace("\n", "\\n").replace("\t", "\\t") def renderTable(rows): """rows[0] must be headers.""" rows = [[renderCell(cell) for cell in row] for row in rows] width = [max(len(row[i]) for row in rows) for i in range(len(rows[0]))] rows = [ [cell.ljust(width[i], " ") for i, cell in enumerate(row)] for rowI, row in enumerate(rows) ] rows.insert(1, ["-" * colWidth for colWidth in width]) return "\n".join(["| " + " | ".join(row) + " |" for row in rows]) def renderTableNoPadding(rows): """rows[0] must be headers.""" rows = [[renderCell(cell) for cell in row] for row in rows] width = [len(x) for x in rows[0]] rows.insert(1, ["-" * colWidth for colWidth in width]) return "\n".join(["| " + " | ".join(row) + " |" for row in rows]) def renderRWOptions(options): return renderTable( [("Name", "Default", "Type", "Comment")] + [ ( optName, codeValue(default), optionsType[optName], optionsComment[optName], ) for optName, default in options.items() ], ) def pluginIsActive(p): if not p.enable: return False if not (p.canRead or p.canWrite): return False return userPluginsDirPath not in p.path.parents def getToolSourceLink(tool): url = tool.get("source") if not url: return "―" _, title = url.split("://") if title.startswith("github.com/"): title = "@" + title[len("github.com/") :] return renderLink(title, url) userPluginsDirPath = Path(userPluginsDir) plugins = [p for p in Glossary.plugins.values() if pluginIsActive(p)] pluginsDir = join(rootDir, "pyglossary", "plugins") for p in plugins: module = p.module optionsProp = p.optionsProp wiki = module.wiki wiki_md = "―" if wiki: if wiki.startswith("https://github.com/"): wiki_title = "@" + wiki[len("https://github.com/") :] else: wiki_title = wiki.split("/")[-1].replace("_", " ") wiki_md = renderLink(wiki_title, wiki) website_md = "―" website = module.website if website: if isinstance(website, str): website_md = website else: try: url, title = website except ValueError: raise ValueError(f"{website = }") from None website_md = renderLink(title, url) ( readDependsLinks, readDependsCmd, ) = makeDependsDoc(getattr(module, "Reader", None)) ( writeDependsLinks, writeDependsCmd, ) = makeDependsDoc(getattr(module, "Writer", None)) extraDocs = getattr(module, "extraDocs", []) toolsFile = join(pluginsDir, p.moduleName, "tools.toml") try: with open(toolsFile, "rb") as _file: tools_toml = toml.load(_file) except FileNotFoundError: tools = [] except Exception as e: print(f"\nFile: {toolsFile}") raise e else: for toolName, tool in tools_toml.items(): tool.update({"name": toolName}) tools = tools_toml.values() table = [ ("Attribute", "Value"), ("Name", p.name), ("snake_case_name", p.lname), ("Description", p.description), ("Extensions", ", ".join([codeValue(ext) for ext in p.extensions])), ("Read support", yesNo(p.canRead)), ("Write support", yesNo(p.canWrite)), ("Single-file", yesNo(p.singleFile)), ("Kind", f"{kindEmoji(module.kind)} {module.kind}"), ] if p.canWrite: table += [ ("Sort-on-write", p.sortOnWrite.desc), ("Sort key", sortKeyName(p)), ] table += [ ("Wiki", wiki_md), ("Website", website_md), ] generalInfoTable = "### General Information\n\n" + renderTable(table) topTables = generalInfoTable try: optionsType = {optName: opt.typ for optName, opt in optionsProp.items()} except Exception: print(f"{optionsProp = }") raise optionsComment = { optName: opt.comment.replace("\n", "
          ") for optName, opt in optionsProp.items() } readOptions = p.getReadOptions() if readOptions: topTables += "\n\n### Read options\n\n" + renderRWOptions(readOptions) writeOptions = p.getWriteOptions() if writeOptions: topTables += "\n\n### Write options\n\n" + renderRWOptions(writeOptions) toolsTable = "" if tools: toolsTable = "### Dictionary Applications/Tools\n\n" + renderTable( [ ( "Name & Website", "Source code", "License", "Platforms", "Language", ), ] + [ ( f"[{tool['name']}]({tool['web']})", getToolSourceLink(tool), tool["license"], ", ".join(tool["platforms"]), tool.get("plang", ""), ) for tool in tools ], ) text = template.render( description=p.description, codeValue=codeValue, yesNo=yesNo, topTables=topTables, optionsProp=optionsProp, readOptions=readOptions, writeOptions=writeOptions, optionsComment=optionsComment, optionsType=optionsType, readDependsLinks=readDependsLinks, readDependsCmd=readDependsCmd, writeDependsLinks=writeDependsLinks, writeDependsCmd=writeDependsCmd, extraDocs=extraDocs, toolsTable=toolsTable, ) for _i in range(3): text = text.replace("\n\n\n", "\n\n") if text.endswith("\n\n"): text = text[:-1] with open( join(rootDir, "doc", "p", f"{p.lname}.md"), mode="w", encoding="utf-8", newline="\n", ) as _file: _file.write(text) indexText = renderTableNoPadding( [("Description", "Name", "Doc Link")] + [ ( p.description, p.name, renderLink(f"{p.lname}.md", f"./{p.lname}.md"), ) for p in plugins ], ) with open( join(rootDir, "doc", "p", "__index__.md"), mode="w", encoding="utf-8", newline="\n", ) as _file: _file.write(indexText + "\n") pyglossary-5.0.9/scripts/plugin-index.py000077500000000000000000000031571476751035500204330ustar00rootroot00000000000000#!/usr/bin/python3 import json import sys from os.path import abspath, dirname, join from pathlib import Path from typing import Any rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.core import userPluginsDir from pyglossary.flags import DEFAULT_NO from pyglossary.glossary import Glossary Glossary.init( usePluginsJson=False, skipDisabledPlugins=False, ) userPluginsDirPath = Path(userPluginsDir) plugins = [ p for p in Glossary.plugins.values() if userPluginsDirPath not in p.path.parents ] data = [] for p in plugins: canRead = p.canRead canWrite = p.canWrite item: dict[str, Any] = { "module": p.module.__name__, "lname": p.lname, "name": p.name, "description": p.description, "extensions": p.extensions, "singleFile": p.singleFile, "optionsProp": {name: opt.toDict() for name, opt in p.optionsProp.items()}, "canRead": canRead, "canWrite": canWrite, } if p.sortOnWrite != DEFAULT_NO: item["sortOnWrite"] = p.sortOnWrite if p.sortKeyName: item["sortKeyName"] = p.sortKeyName if canRead: item["readOptions"] = p.getReadOptions() if canWrite: item["writeOptions"] = p.getWriteOptions() if not p.enable: item["enable"] = False if p.readDepends: item["readDepends"] = p.readDepends if p.writeDepends: item["writeDepends"] = p.writeDepends if p.readCompressions: item["readCompressions"] = p.readCompressions data.append(item) jsonText = json.dumps( data, sort_keys=False, indent="\t", ensure_ascii=True, ) with open( join(rootDir, "plugins-meta", "index.json"), mode="w", encoding="utf-8", newline="\n", ) as _file: _file.write(jsonText) pyglossary-5.0.9/scripts/plugin-validate.py000077500000000000000000000011111476751035500211010ustar00rootroot00000000000000#!/usr/bin/python3 import sys from os.path import abspath, dirname from pathlib import Path rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.core import userPluginsDir from pyglossary.glossary import Glossary Glossary.init( usePluginsJson=False, skipDisabledPlugins=False, ) userPluginsDirPath = Path(userPluginsDir) plugins = [ p for p in Glossary.plugins.values() if userPluginsDirPath not in p.path.parents ] data = [] for p in plugins: module = p.module # print(module.__file__) p.checkModule(module) p.checkModuleMore(module) pyglossary-5.0.9/scripts/release-new-version-github000077500000000000000000000031001476751035500225370ustar00rootroot00000000000000#!/bin/bash set -e function yes_or_no { while true; do read -r -p "$* [y/n]: " yn case $yn in [Yy]*) return 0 ;; [Nn]*) echo "Aborted" return 1 ;; esac done } # function pip-has-version() { # pip $PIP_OPTS index versions pyglossary --pre --ignore-requires-python | grep "$1," # } CUR_VERSION=$($(dirname "$0")/version-core) VERSION=$1 if [ -z "$VERSION" ]; then echo "Usage: $0 VERSION" echo "Current version: $CUR_VERSION" exit 1 fi set -x $(dirname "$0")/version-set.py "$VERSION" git add setup.py pyglossary/core.py pyproject.toml about _license-dialog git commit -m "version $VERSION" || echo "------ Already committed" git -p show || true git -p log || true echo "Pushing to origin..." git push echo "Waiting for pypi release..." while ! pip $PIP_OPTS install "pyglossary==$VERSION"; do sleep 5 done echo "-------------- Check version in GUI: --------------" ~/.local/bin/pyglossary || true yes_or_no "Continue creating the release?" || exit 1 echo "Creating tag $VERSION" if ! git tag -a -m "version $VERSION" "$VERSION"; then echo "------ Already tagged" CUR_VERSION=$(git describe --abbrev=0 --tags $(git rev-list --tags --skip=1 --max-count=1)) fi echo "Pushing tag to origin..." git push origin "$VERSION" MD_PATH=$(realpath $(dirname $0)/../doc/releases/$VERSION.md) echo -e "## What's Changed\n" >"$MD_PATH" git log --pretty='- %h %s' --reverse "$CUR_VERSION..$VERSION" >>"$MD_PATH" echo -e "\n\n**Full Changelog**: https://github.com/ilius/pyglossary/compare/$CUR_VERSION...$VERSION\n" >>"$MD_PATH" echo "Created $MD_PATH" xdg-open "$MD_PATH" pyglossary-5.0.9/scripts/release-new-version-local000077500000000000000000000021111476751035500223500ustar00rootroot00000000000000#!/bin/bash set -e function yes_or_no { while true; do read -p "$* [y/n]: " yn case $yn in [Yy]*) return 0 ;; [Nn]*) echo "Aborted" return 1 ;; esac done } CUR_VERSION=$($(dirname $0)/version-core) VERSION=$1 if [ -z $VERSION ]; then echo "Usage: $0 VERSION" echo "Current version: $CUR_VERSION" exit 1 fi set -x $(dirname $0)/version-set.py $VERSION sudo rm -rf dist build mkdir dist build python3 setup.py sdist bdist_wheel pip3 install ./dist/*.whl -U --user --force-reinstall du -k dist/* echo "-------------- Check version in GUI: --------------" ~/.local/bin/pyglossary yes_or_no "Continue creating the release?" || exit 1 git add setup.py pyglossary/core.py pyproject.toml about _license-dialog git commit -m "version $VERSION" || echo "------ Already committed" git -p show || true git -p log || true echo "Pushing to origin..." git push echo "Creating tag $VERSION" git tag -a -m "version $VERSION" $VERSION echo "Pushing tag to origin..." git push origin $VERSION echo "Publishing to pypi" python3 -m twine upload --repository pypi dist/* --verbose pyglossary-5.0.9/scripts/term-colors.json000066400000000000000000000112241476751035500206060ustar00rootroot00000000000000{ "0": "#000000", "1": "#aa0000", "2": "#00aa00", "3": "#aa5500", "4": "#0000aa", "5": "#aa00aa", "6": "#00aaaa", "7": "#b9b9b9", "8": "#555555", "9": "#ff5555", "10": "#55ff55", "11": "#ffff55", "12": "#5555ff", "13": "#ff55ff", "14": "#55ffff", "15": "#ffffff", "16": "#000000", "17": "#00005f", "18": "#000087", "19": "#0000af", "20": "#0000d7", "21": "#0000ff", "22": "#005f00", "23": "#005f5f", "24": "#005f87", "25": "#005faf", "26": "#005fd7", "27": "#005fff", "28": "#008700", "29": "#00875f", "30": "#008787", "31": "#0087af", "32": "#0087d7", "33": "#0087ff", "34": "#00af00", "35": "#00af5f", "36": "#00af87", "37": "#00afaf", "38": "#00afd7", "39": "#00afff", "40": "#00d700", "41": "#00d75f", "42": "#00d787", "43": "#00d7af", "44": "#00d7d7", "45": "#00d7ff", "46": "#00ff00", "47": "#00ff5f", "48": "#00ff87", "49": "#00ffaf", "50": "#00ffd7", "51": "#00ffff", "52": "#5f0000", "53": "#5f005f", "54": "#5f0087", "55": "#5f00af", "56": "#5f00d7", "57": "#5f00ff", "58": "#5f5f00", "59": "#5f5f5f", "60": "#5f5f87", "61": "#5f5faf", "62": "#5f5fd7", "63": "#5f5fff", "64": "#5f8700", "65": "#5f875f", "66": "#5f8787", "67": "#5f87af", "68": "#5f87d7", "69": "#5f87ff", "70": "#5faf00", "71": "#5faf5f", "72": "#5faf87", "73": "#5fafaf", "74": "#5fafd7", "75": "#5fafff", "76": "#5fd700", "77": "#5fd75f", "78": "#5fd787", "79": "#5fd7af", "80": "#5fd7d7", "81": "#5fd7ff", "82": "#5fff00", "83": "#5fff5f", "84": "#5fff87", "85": "#5fffaf", "86": "#5fffd7", "87": "#5fffff", "88": "#870000", "89": "#87005f", "90": "#870087", "91": "#8700af", "92": "#8700d7", "93": "#8700ff", "94": "#875f00", "95": "#875f5f", "96": "#875f87", "97": "#875faf", "98": "#875fd7", "99": "#875fff", "100": "#878700", "101": "#87875f", "102": "#878787", "103": "#8787af", "104": "#8787d7", "105": "#8787ff", "106": "#87af00", "107": "#87af5f", "108": "#87af87", "109": "#87afaf", "110": "#87afd7", "111": "#87afff", "112": "#87d700", "113": "#87d75f", "114": "#87d787", "115": "#87d7af", "116": "#87d7d7", "117": "#87d7ff", "118": "#87ff00", "119": "#87ff5f", "120": "#87ff87", "121": "#87ffaf", "122": "#87ffd7", "123": "#87ffff", "124": "#af0000", "125": "#af005f", "126": "#af0087", "127": "#af00af", "128": "#af00d7", "129": "#af00ff", "130": "#af5f00", "131": "#af5f5f", "132": "#af5f87", "133": "#af5faf", "134": "#af5fd7", "135": "#af5fff", "136": "#af8700", "137": "#af875f", "138": "#af8787", "139": "#af87af", "140": "#af87d7", "141": "#af87ff", "142": "#afaf00", "143": "#afaf5f", "144": "#afaf87", "145": "#afafaf", "146": "#afafd7", "147": "#afafff", "148": "#afd700", "149": "#afd75f", "150": "#afd787", "151": "#afd7af", "152": "#afd7d7", "153": "#afd7ff", "154": "#afff00", "155": "#afff5f", "156": "#afff87", "157": "#afffaf", "158": "#afffd7", "159": "#afffff", "160": "#d70000", "161": "#d7005f", "162": "#d70087", "163": "#d700af", "164": "#d700d7", "165": "#d700ff", "166": "#d75f00", "167": "#d75f5f", "168": "#d75f87", "169": "#d75faf", "170": "#d75fd7", "171": "#d75fff", "172": "#d78700", "173": "#d7875f", "174": "#d78787", "175": "#d787af", "176": "#d787d7", "177": "#d787ff", "178": "#d7af00", "179": "#d7af5f", "180": "#d7af87", "181": "#d7afaf", "182": "#d7afd7", "183": "#d7afff", "184": "#d7d700", "185": "#d7d75f", "186": "#d7d787", "187": "#d7d7af", "188": "#d7d7d7", "189": "#d7d7ff", "190": "#d7ff00", "191": "#d7ff5f", "192": "#d7ff87", "193": "#d7ffaf", "194": "#d7ffd7", "195": "#d7ffff", "196": "#ff0000", "197": "#ff005f", "198": "#ff0087", "199": "#ff00af", "200": "#ff00d7", "201": "#ff00ff", "202": "#ff5f00", "203": "#ff5f5f", "204": "#ff5f87", "205": "#ff5faf", "206": "#ff5fd7", "207": "#ff5fff", "208": "#ff8700", "209": "#ff875f", "210": "#ff8787", "211": "#ff87af", "212": "#ff87d7", "213": "#ff87ff", "214": "#ffaf00", "215": "#ffaf5f", "216": "#ffaf87", "217": "#ffafaf", "218": "#ffafd7", "219": "#ffafff", "220": "#ffd700", "221": "#ffd75f", "222": "#ffd787", "223": "#ffd7af", "224": "#ffd7d7", "225": "#ffd7ff", "226": "#ffff00", "227": "#ffff5f", "228": "#ffff87", "229": "#ffffaf", "230": "#ffffd7", "231": "#ffffff", "232": "#080808", "233": "#121212", "234": "#1c1c1c", "235": "#262626", "236": "#303030", "237": "#3a3a3a", "238": "#444444", "239": "#4e4e4e", "240": "#585858", "241": "#626262", "242": "#6c6c6c", "243": "#767676", "244": "#808080", "245": "#8a8a8a", "246": "#949494", "247": "#9e9e9e", "248": "#a8a8a8", "249": "#b2b2b2", "250": "#bcbcbc", "251": "#c6c6c6", "252": "#d0d0d0", "253": "#dadada", "254": "#e4e4e4", "255": "#eeeeee" }pyglossary-5.0.9/scripts/test-cover-html-plugin.sh000077500000000000000000000024401476751035500223350ustar00rootroot00000000000000#!/usr/bin/env bash set -e pluginLookup="$1" if [ -z "$pluginLookup" ]; then echo 'Must give plugins l_name as argument, for example "stardict" or "octopus_mdict"' exit 1 fi rootDir=$(dirname $(realpath $(dirname "$0"))) echo $rootDir cd $rootDir/pyglossary/plugins/ pluginLname=$(ls -1d $pluginLookup* | grep -v 'cover' | sort | head -n1 | sed 's/\.py$//') if [ -z "$pluginLname" ]; then echo "Did not find a plugin matching '$pluginLookup'" exit 1 fi if [ -f "$rootDir/pyglossary/plugins/${pluginLname}.py" ]; then filePaths="$rootDir/pyglossary/plugins/${pluginLname}.py" elif [ -d "$rootDir/pyglossary/plugins/${pluginLname}" ]; then filePaths="$rootDir/pyglossary/plugins/${pluginLname}/*.py" else echo "Did not find a plugin matching '$pluginLookup'" exit 1 fi echo "Using plugin name '$pluginLname'" dataFile="$rootDir/pyglossary/plugins/${pluginLname}.cover" outDir="$rootDir/pyglossary/plugins/${pluginLname}.coverhtml" mkdir -p $outDir # echo "file://$outDir/index.html" cd "$rootDir/tests" set -x coverage run --data-file="$dataFile" -m unittest "g_${pluginLname}_test.py" coverage html --data-file="$dataFile" \ --include="$filePaths" \ --directory=$outDir || echo "'coverage html' failed with $?" set +x if [ -f "$outDir/index.html" ]; then echo "file://$outDir/index.html" fi pyglossary-5.0.9/scripts/test-cover-html.sh000077500000000000000000000005511476751035500210420ustar00rootroot00000000000000#!/usr/bin/env bash set -e rootDir=$(dirname $(realpath $(dirname "$0"))) echo "file://$rootDir/tests/htmlcov/index.html" cd "$rootDir/tests" coverage run -m unittest ./*_test.py coverage html --include="$rootDir/pyglossary/*" --omit="$rootDir/pyglossary/plugin_lib/*" || echo "'coverage html' failed with $?" echo "file://$rootDir/tests/htmlcov/index.html" pyglossary-5.0.9/scripts/test-deps.sh000077500000000000000000000002251476751035500177130ustar00rootroot00000000000000python -m pip install \ freezegun \ PyICU \ lxml==5.3 \ beautifulsoup4 \ python-idzip \ polib \ html5lib \ mistune \ marisa-trie \ biplist pyglossary-5.0.9/scripts/test-glossary.sh000077500000000000000000000003311476751035500206210ustar00rootroot00000000000000#!/usr/bin/env bash set -e rootDir=$(dirname $(dirname "$0")) echo "$rootDir/tests/glossary_test.py" python3 "$rootDir/tests/glossary_test.py" find "$rootDir/tests" -name "g_*_test.py" -print -exec python3 '{}' \; pyglossary-5.0.9/scripts/test.sh000077500000000000000000000006201476751035500167610ustar00rootroot00000000000000#!/usr/bin/env bash set -e rootDir=$(dirname $(dirname "$0")) rootDirAbs=$(realpath $rootDir) echo "$rootDir/tests" cd "$rootDir/tests" # python -m unittest *_test.py for F in *_test.py; do echo "$F" python -m unittest "$F" done echo "$rootDirAbs/tests/deprecated/" cd "$rootDirAbs/tests/deprecated/" for F in *_test.py; do echo "$F" python -W ignore::DeprecationWarning -m unittest "$F" done pyglossary-5.0.9/scripts/tools-py2toml.py000077500000000000000000000014671476751035500205760ustar00rootroot00000000000000#!/usr/bin/python3 import sys from os.path import abspath, dirname, join from pathlib import Path import toml rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.core import userPluginsDir from pyglossary.glossary import Glossary Glossary.init( # usePluginsJson=False, ) userPluginsDirPath = Path(userPluginsDir) plugins = [ p for p in Glossary.plugins.values() if userPluginsDirPath not in p.path.parents ] toolsDir = join(rootDir, "plugins-meta", "tools") for p in plugins: module = p.module optionsProp = p.optionsProp tools = {} for tool in getattr(p.module, "tools", []): tools[tool.pop("name")] = tool # if not tools: # continue # pprint(tools) with open(join(toolsDir, f"{p.lname}.toml"), mode="w", encoding="utf-8") as _file: toml.dump(tools, _file) pyglossary-5.0.9/scripts/type-checker-deps.sh000066400000000000000000000001361476751035500213150ustar00rootroot00000000000000#!/bin/bash python3 -m pip install lxml-stubs types-beautifulsoup4 types-psutil types-polib pyglossary-5.0.9/scripts/version000077500000000000000000000005001476751035500170530ustar00rootroot00000000000000#!/bin/bash set -e sourceDir=$(dirname "$0")/.. gitDir="$sourceDir/.git" if [ -d "$gitDir" ]; then git --git-dir "$gitDir" describe --always exit 0 fi while read -r line; do if [[ $line = VERSION* ]]; then echo "$line" | sed 's/VERSION\s*=\s*//' | sed 's/"//g' exit 0 fi done <"$sourceDir/pyglossary/core.py" pyglossary-5.0.9/scripts/version-core000077500000000000000000000003261476751035500200070ustar00rootroot00000000000000#!/bin/bash set -e sourceDir=$(dirname "$0")/.. while read -r line; do if [[ $line = VERSION* ]]; then echo "$line" | sed 's/VERSION\s*=\s*//' | sed 's/"//g' exit 0 fi done <"$sourceDir/pyglossary/core.py" pyglossary-5.0.9/scripts/version-set.py000077500000000000000000000022711476751035500203020ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys from datetime import datetime from os.path import abspath, dirname, join from packaging.version import parse def main(): version = sys.argv[1] parse(version) versionQuoted = f'"{version}"' rootDir = dirname(dirname(abspath(__file__))) replaceVar(join(rootDir, "pyglossary/core.py"), "VERSION", versionQuoted) replaceVar(join(rootDir, "setup.py"), "VERSION", versionQuoted) replaceVar(join(rootDir, "pyproject.toml"), "version", versionQuoted) # update copyright year number for fname in ("about", "_license-dialog"): with open(fname, encoding="utf-8") as file: text = file.read() pos = text.find("© ") text = text[: pos + 7] + str(datetime.now().year) + text[pos + 11 :] with open(fname, "w", encoding="utf-8") as file: file.write(text) def replaceVar(fname: str, name: str, value: str) -> None: prefix = name + " = " lines = [] with open(fname, encoding="utf-8") as _file: for _line in _file: line = _line if line.startswith(prefix): line = f"{name} = {value}\n" lines.append(line) with open(fname, mode="w", encoding="utf-8") as _file: _file.writelines(lines) if __name__ == "__main__": main() pyglossary-5.0.9/scripts/view-glossary000077500000000000000000000006201476751035500202040ustar00rootroot00000000000000#!/usr/bin/env bash set -e myPath=$(realpath "$0") myDir1=$(dirname "$myPath") rootDir=$(dirname "$myDir1") # There is a bug in pyenv 'python' script that splits up a (quoted) arguemnt that has spaces # So there is no way of passing a filename with spaces # That's why I changed `python` to `python3` to avoid pyenv PYTHONPATH=$rootDir python3 "$rootDir/pyglossary/ui/tools/view_glossary.py" "$@" pyglossary-5.0.9/scripts/view-glossary-plaintext000077500000000000000000000011201476751035500222060ustar00rootroot00000000000000#!/usr/bin/env python import os import sys from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.glossary_v2 import Glossary from pyglossary.ui.tools.view_glossary import viewGlossary def main() -> None: filename = sys.argv[1] formatName = None if len(sys.argv) > 2: formatName = sys.argv[2] glos = Glossary(ui=None) glos.updateEntryFilters() glos.removeHtmlTagsAll() filename = os.path.expanduser(filename) viewGlossary(filename, formatName=formatName, glos=glos) if __name__ == "__main__": main() pyglossary-5.0.9/scripts/wiki-formats.py000077500000000000000000000057041476751035500204440ustar00rootroot00000000000000#!/usr/bin/env python3 import os import sys from os.path import join from pathlib import Path from mako.template import Template rootDir = join( os.getenv("HOME"), "pyglossary", ) sys.path.insert(0, rootDir) from pyglossary.core import userPluginsDir from pyglossary.glossary import Glossary Glossary.init( # usePluginsJson=False, ) """ Mako template engine: https://docs.makotemplates.org/en/latest/ https://github.com/sqlalchemy/mako https://pypi.org/project/Mako/ Package python3-mako in Debian repos """ hasIconSet = { "aard2_slob", "appledict_bin", "appledict", "babylon_bgl", "cc_cedict", "csv", "dicformids", "dict_cc", "dict_cc_split", "digitalnk", "dsl", "epub2", "jmdict", "kobo", "lingoes_ldf", "mobi", "octopus_mdict", "sql", "stardict", "tabfile", "wiktionary_dump", "zim", } def pluginIsActive(p): if not p.enable: return False if not (p.canRead or p.canWrite): return False return userPluginsDirPath not in p.path.parents def codeValue(x): s = str(x) if s: return "`" + s + "`" return "" def yesNo(x): if x is True: return "Yes" if x is False: return "No" return "" def iconImg(p): if p.lname not in hasIconSet: return "" return ( '' ) def kindEmoji(p): kind = p.module.kind if not kind: return "" return { "text": "📝", "binary": "🔢", "directory": "📁", "package": "📦", }[kind] willNotSupportRead = { "epub2", "kobo", "mobi", # "html_dir", "info", "sql", } willNotSupportWrite = { "appledict_bin", "babylon_bgl", "cc_cedict", "cc_kedict", "freedict", "jmdict", "octopus_mdict", "wiktionary_dump", "xdxf", "wiktextract", "jmnedict", } def readCheck(p): if p.lname in willNotSupportRead: return "❌" return "✔" if p.canRead else "" def writeCheck(p): if p.lname in willNotSupportWrite: return "❌" return "✔" if p.canWrite else "" template = Template( """ | | Description | | Read | Write| Doc Link | |:-:| ----------- |:-:|:----:|:----:| -------- | % for p in plugins: | ${iconImg(p)} | ${p.description} | ${kindEmoji(p)} | ${readCheck(p)} | ${writeCheck(p)} | [${p.lname}.md](https://github.com/ilius/pyglossary/blob/master/doc/p/${p.lname}.md) | % endfor Legend: - 📁 Directory - 📝 Text file - 📦 Package/archive file - 🔢 Binary file - ✔ Supported - ❌ Will not be supported """, ) # wiki = module.wiki # wiki_md = "―" # if module.wiki: # wiki_title = wiki.split("/")[-1].replace("_", " ") # wiki_md = f"[{wiki_title}]({wiki})" # website_md = "―" # if module.website: # website_md = module.website userPluginsDirPath = Path(userPluginsDir) plugins = [p for p in Glossary.plugins.values() if pluginIsActive(p)] text = template.render( plugins=plugins, iconImg=iconImg, kindEmoji=kindEmoji, readCheck=readCheck, writeCheck=writeCheck, ) with open("Formats.md", mode="w", encoding="utf-8") as _file: _file.write(text) pyglossary-5.0.9/scripts/wiktextract/000077500000000000000000000000001476751035500200165ustar00rootroot00000000000000pyglossary-5.0.9/scripts/wiktextract/extract-schema.py000066400000000000000000000074311476751035500233050ustar00rootroot00000000000000import json import sys from collections import Counter from dataclasses import dataclass from typing import Any @dataclass(slots=True) class Node: Type: str = "" Dict: "dict[str, Node] | None" = None KeyScore: "Counter | None" = None ListOf: "Node | None" = None def keyScoreList(self): return [f"{count:.1f}: {key}" for key, count in self.KeyScore.most_common()] @property def __dict__(self): if self.Dict: assert self.ListOf is None keys = [key for key, _ in self.KeyScore.most_common()] try: keys.remove("word") except ValueError: pass else: keys.insert(0, "word") return { "__dict__": {key: self.Dict[key].__dict__ for key in keys}, # "__key_score__": self.keyScoreList(), } if self.ListOf: return {"__list_of__": self.ListOf.__dict__} return self.Type schema = Node(Type="dict") valueSet: "dict[str, set]" = {} def addToValueSet(value: "str | int | float | bool", path: list[str]): if isinstance(value, str) and "://" in value: return pathStr = ".".join(path) if pathStr in valueSet: valueSet[pathStr].add(value) return valueSet[pathStr] = {value} def getSchemaNode(path: list[str]): node = schema for name in path: if name == "[]": node.Type = "list" if not node.ListOf: node.ListOf = Node() node = node.ListOf continue node.Type = "dict" if not node.Dict: node.Dict = {} node.KeyScore = Counter() if name in node.Dict: node = node.Dict[name] else: newNode = Node() node.Dict[name] = newNode node = newNode return node def updateSchema(type_: str, path: list[str]): node = getSchemaNode(path) prevType = node.Type if prevType and prevType != type_: print( f"mismatch types for path={'.'.join(path)}, {prevType} and {type_}", ) node.Type = type_ def parseList(data: list[Any], path: list[str], node: Node): node.Type = "list" if not node.ListOf: node.ListOf = Node() if not data: return itemsPath = path + ["[]"] itemTypes = set() for item in data: itemTypes.add(type(item).__name__) if isinstance(item, dict): parseDict(item, itemsPath, node.ListOf) continue if isinstance(item, list): parseList(item, itemsPath, node.ListOf) continue if isinstance(item, str | int | float | bool): addToValueSet(item, path) itemTypesStr = " | ".join(sorted(itemTypes)) updateSchema(itemTypesStr, path + ["[]"]) def parseDict(data: "dict[str, Any]", path: list[str], node: Node): if not node.Dict: node.Dict = {} node.KeyScore = Counter() for index, (key, value) in enumerate(data.items()): node.KeyScore[key] += min(1, 50 - index) / 50 if key in node.Dict: childNode = node.Dict[key] else: childNode = node.Dict[key] = Node() if isinstance(value, dict): parseDict(value, path + [key], childNode) continue if isinstance(value, list): parseList(value, path + [key], childNode) continue if isinstance(value, str | int | float | bool): updateSchema(type(value).__name__, path + [key]) addToValueSet(value, path + [key]) jsonl_path = sys.argv[1] with open(jsonl_path, encoding="utf-8") as _file: for line in _file: line = line.strip() # noqa: PLW2901 if not line: continue try: data = json.loads(line) except Exception: print(f"bad line: {line}") continue parseDict(data, [], schema) with open(f"{jsonl_path}.schema.json", mode="w", encoding="utf-8") as _file: json.dump( schema.__dict__, _file, indent="\t", ) commonValuesList = [ (key, sorted(values)) for key, values in valueSet.items() if len(values) < 20 and len(str(values)) < 100 ] def commonValuesSortKey(item): _key, values = item return abs(len(values) - 5) commonValuesList.sort(key=commonValuesSortKey) with open(f"{jsonl_path}-common-values.json", mode="w", encoding="utf-8") as _file: json.dump(dict(commonValuesList), _file, indent="\t") pyglossary-5.0.9/scripts/wiktextract/sort-jsonl.py000077500000000000000000000006121476751035500225040ustar00rootroot00000000000000#!/usr/bin/env python # read json lines from stdin, # sort them by "word" key and print import operator import sys from json import loads data: "list[tuple[str, str]]" = [] for line in sys.stdin: line = line.strip() # noqa: PLW2901 if not line: continue row = loads(line) data.append((row.get("word"), line)) data.sort(key=operator.itemgetter(0)) for _, line in data: print(line) pyglossary-5.0.9/setup.py000077500000000000000000000072001476751035500154720ustar00rootroot00000000000000#!/usr/bin/env python3 import glob import logging import os import re import sys from os.path import dirname, exists, isdir, join from setuptools import setup from setuptools.command.install import install VERSION = "5.0.9" log = logging.getLogger("root") relRootDir = "share/pyglossary" def getGitVersion(gitDir: str) -> str: import subprocess try: outputB, _err = subprocess.Popen( [ "git", "--git-dir", gitDir, "describe", "--always", ], stdout=subprocess.PIPE, ).communicate() except Exception as e: sys.stderr.write(str(e) + "\n") return "" # if _err is None: return outputB.decode("utf-8").strip() def getPipSafeVersion() -> str: gitDir = ".git" if isdir(gitDir): version = getGitVersion(gitDir) if version: return "-".join(version.split("-")[:2]) return VERSION class my_install(install): def run(self) -> None: install.run(self) if os.sep == "/": binPath = join(self.install_scripts, "pyglossary") log.info(f"creating script file {binPath!r}") if not exists(self.install_scripts): os.makedirs(self.install_scripts) # let it fail on wrong permissions. elif not isdir(self.install_scripts): raise OSError( "installation path already exists " f"but is not a directory: {self.install_scripts}", ) open(binPath, "w", encoding="ascii").write("""#!/usr/bin/env -S python3 -O import sys from os.path import dirname sys.path.insert(0, dirname(__file__)) from pyglossary.ui.main import main main()""") os.chmod(binPath, 0o755) root_data_file_names = [ "about", "LICENSE", "_license-dialog", "Dockerfile", "pyproject.toml", "help", "AUTHORS", "config.json", ] sep = "\\\\" if os.sep == "\\" else os.sep package_data = { "": root_data_file_names, "plugins-meta": [ "index.json", "tools/*", ], "pyglossary": [ "*.py", "xdxf.xsl", "res/*", "plugins/*", "langs/*", "plugin_lib/*.py", "plugin_lib/py*/*.py", "sort_modules/*.py", "ui/*.py", "ui/progressbar/*.py", "ui/gtk3_utils/*.py", "ui/gtk4_utils/*.py", "ui/tools/*.py", "ui/wcwidth/*.py", "ui/ui_web/*.py", "ui/ui_web/*.html", "ui/ui_web/*.ico", "ui/ui_web/*.css", "ui/ui_web/*.js", "xdxf/xdxf.xsl", "xdxf/*.py", "repro_zipfile/*.py", ] + [ # safest way found so far to include every resource of plugins # producing plugins/pkg/*, plugins/pkg/sub1/*, ... except .pyc/.pyo re.sub( rf"^.*?pyglossary{sep}(?=plugins)", "", join(dirpath, fname), ) for top in glob.glob( join(dirname(__file__), "pyglossary", "plugins"), ) for dirpath, _, files in os.walk(top) for fname in files if not fname.endswith((".pyc", ".pyo")) ], } with open("README.md", encoding="utf-8") as fh: long_description = fh.read() setup( name="pyglossary", version=getPipSafeVersion(), python_requires=">=3.10.0", cmdclass={ "install": my_install, }, description="A tool for converting dictionary files aka glossaries.", long_description_content_type="text/markdown", long_description=long_description, author="Saeed Rasooli", author_email="saeed.gnu@gmail.com", license="GPLv3+", url="https://github.com/ilius/pyglossary", packages=[ "pyglossary", ], entry_points={ "console_scripts": [ "pyglossary = pyglossary.ui.main:main", ], }, package_data=package_data, # without data_files `pip install --user .` makes a broken installation data_files=[ (relRootDir, root_data_file_names), (f"{relRootDir}/plugins-meta", ["plugins-meta/index.json"]), (f"{relRootDir}/res", glob.glob("res/*")), ], extras_require={ "full": [ "lxml", "beautifulsoup4", "PyICU", "PyYAML", "marisa-trie", "libzim", "python-lzo", "html5lib", ], }, ) pyglossary-5.0.9/tests/000077500000000000000000000000001476751035500151205ustar00rootroot00000000000000pyglossary-5.0.9/tests/__init__.py000066400000000000000000000000001476751035500172170ustar00rootroot00000000000000pyglossary-5.0.9/tests/apple_utils_test.py000066400000000000000000000021711476751035500210530ustar00rootroot00000000000000import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.apple_utils import substituteAppleCSS class Test_substituteAppleCSS(unittest.TestCase): def test_remove(self): css = b""".test { -webkit-text-combine: horizontal; color: black } .test2 { -apple-color-filter: none; }""" fixed_expected = b""".test {color: black } .test2 { }""" fixed_actual = substituteAppleCSS(css) self.assertEqual(fixed_actual, fixed_expected) def test_1(self): css = b"""html.apple_display-separateview { -webkit-column-width: 25em; -webkit-column-rule-color: LightGrey; -webkit-column-rule-style: solid; -webkit-column-rule-width: 1px; } span.sn { -webkit-text-combine: horizontal; vertical-align: -6%; } """ fixed_expected = b"""html.apple_display-separateview { column-width: 25em; column-rule-color: LightGrey; column-rule-style: solid; column-rule-width: 1px; } span.sn { vertical-align: -6%; } """ fixed_actual = substituteAppleCSS(css) self.assertEqual(fixed_actual, fixed_expected) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/deprecated/000077500000000000000000000000001476751035500172205ustar00rootroot00000000000000pyglossary-5.0.9/tests/deprecated/__init__.py000066400000000000000000000000001476751035500213170ustar00rootroot00000000000000pyglossary-5.0.9/tests/deprecated/g_legacy_csv_test.py000066400000000000000000000027461476751035500232670ustar00rootroot00000000000000import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(dirname(abspath(__file__)))) sys.path.insert(0, rootDir) from glossary_test import TestGlossaryBase from pyglossary.glossary import Glossary as GlossaryLegacy class TestLegacyGlossaryCSV(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-en-de-v4.csv": "2890fb3e", "100-en-fa.csv": "eb8b0474", "100-en-fa-semicolon.csv": "b3f04599", "100-ja-en.csv": "7af18cf3", }, ) def convert_csv_txt_rw(self, fname, fname2, infoOverride=None): inputFilename = self.downloadFile(f"{fname}.csv") outputFilename = self.newTempFilePath(f"{fname}-2.txt") expectedFilename = self.downloadFile(f"{fname2}.txt") glos = self.glos = GlossaryLegacy() # using glos.convert will add "input_file_size" info key # perhaps add another optional argument to glos.convert named infoOverride rRes = glos.read(inputFilename, direct=True) self.assertTrue(rRes) if infoOverride: for key, value in infoOverride.items(): glos.setInfo(key, value) wRes = glos.write(outputFilename, format="Tabfile") self.assertEqual(outputFilename, wRes) self.compareTextFiles(outputFilename, expectedFilename) glos.cleanup() def test_convert_csv_txt_4(self): self.convert_csv_txt_rw( "100-en-fa", "100-en-fa", infoOverride={"input_file_size": None}, ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/deprecated/glossary_errors_test.py000066400000000000000000000304621476751035500240750ustar00rootroot00000000000000import logging import os import sys import unittest from os.path import abspath, dirname, isfile, join, relpath rootDir = dirname(dirname(dirname(abspath(__file__)))) sys.path.insert(0, rootDir) from glossary_test import TestGlossaryBase, appTmpDir from pyglossary.core_test import getMockLogger from pyglossary.glossary import Glossary from pyglossary.os_utils import rmtree __all__ = ["TestGlossaryErrors", "TestGlossaryErrorsBase"] Glossary.init() class MyStr(str): __slots__ = [] class TestGlossaryErrorsBase(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.mockLog = getMockLogger() def setUp(self): TestGlossaryBase.setUp(self) self.mockLog.clear() def tearDown(self): TestGlossaryBase.tearDown(self) method = self._testMethodName self.assertEqual(0, self.mockLog.printRemainingErrors(method)) warnCount = self.mockLog.printRemainingwWarnings(method) if warnCount > 0: print( f"Got {warnCount} unhandled warnings " f"from {self.__class__.__name__}: {self._testMethodName}\n", ) def assertLogCritical(self, errorMsg): self.assertIsNotNone( self.mockLog.popLog( logging.CRITICAL, errorMsg, ), msg=f"did not find critical log {errorMsg!r}", ) def assertLogError(self, errorMsg): self.assertIsNotNone( self.mockLog.popLog( logging.ERROR, errorMsg, ), msg=f"did not find error log {errorMsg!r}", ) def assertLogWarning(self, errorMsg): self.assertIsNotNone( self.mockLog.popLog( logging.WARNING, errorMsg, ), msg=f"did not find warning log {errorMsg!r}", ) def osRoot(): if os.sep == "\\": return "C:\\" return "/" if os.sep == "\\": osNoSuchFileOrDir = "[WinError 3] The system cannot find the path specified:" else: osNoSuchFileOrDir = "[Errno 2] No such file or directory:" class TestGlossaryErrors(TestGlossaryErrorsBase): def test_loadPlugins_invalidDir(self): path = join(osRoot(), "abc", "def", "ghe") Glossary.loadPlugins(path) self.assertLogCritical(f"Invalid plugin directory: {path!r}") def test_detectInputFormat_err1(self): res = Glossary.detectInputFormat( filename="", formatName="", ) self.assertIsNone(res) self.assertLogCritical("Unable to detect input format!") def test_detectInputFormat_err2(self): res = Glossary.detectInputFormat( filename="test.abcd", formatName="", ) self.assertIsNone(res) self.assertLogCritical("Unable to detect input format!") def test_detectInputFormat_err3(self): res = Glossary.detectInputFormat( filename="test.sql", formatName="", ) self.assertIsNone(res) self.assertLogCritical("plugin Sql does not support reading") def test_detectInputFormat_err4(self): res = Glossary.detectInputFormat( filename="test", formatName="FooBar", ) self.assertIsNone(res) self.assertLogCritical("Invalid format 'FooBar'") def test_detectInputFormat_ok1(self): res = Glossary.detectInputFormat( filename="test1.txt.gz", formatName="", ) self.assertEqual(res, ("test1.txt.gz", "Tabfile", "")) def test_detectInputFormat_ok2(self): res = Glossary.detectInputFormat( filename="test2.txt.zip", formatName="", ) self.assertEqual(res, ("test2.txt", "Tabfile", "zip")) def test_detectOutputFormat_err1(self): res = Glossary.detectOutputFormat( filename="", formatName="", inputFilename="", ) self.assertIsNone(res) self.assertLogCritical("Invalid filename ''") def test_detectOutputFormat_err2(self): res = Glossary.detectOutputFormat( filename="test", formatName="FooBar", inputFilename="", ) self.assertIsNone(res) self.assertLogCritical("Invalid format FooBar") def test_detectOutputFormat_err3(self): res = Glossary.detectOutputFormat( filename="", formatName="", inputFilename="test", ) self.assertIsNone(res) self.assertLogCritical("No filename nor format is given for output file") def test_detectOutputFormat_err4_1(self): res = Glossary.detectOutputFormat( filename="", formatName="BabylonBgl", inputFilename="test3.txt", ) self.assertIsNone(res) self.assertLogCritical("plugin BabylonBgl does not support writing") def test_detectOutputFormat_err4_2(self): res = Glossary.detectOutputFormat( filename="test.bgl", formatName="", inputFilename="", ) self.assertIsNone(res) self.assertLogCritical("plugin BabylonBgl does not support writing") def test_detectOutputFormat_err5(self): res = Glossary.detectOutputFormat( filename="test", formatName="", inputFilename="", ) self.assertIsNone(res) self.assertLogCritical("Unable to detect output format!") def test_detectOutputFormat_err6(self): res = Glossary.detectOutputFormat( filename="test", formatName="Tabfile", inputFilename="", addExt=True, ) self.assertEqual(res, ("test", "Tabfile", "")) self.assertLogError("inputFilename is empty") def test_init_infoBadType(self): try: Glossary(info=["a"]) except Exception as e: self.assertEqual(str(type(e)), "") self.assertEqual( str(e), "Glossary: `info` has invalid type, dict or OrderedDict expected", ) else: self.fail("did not raise an exception") def test_cleanup_removed(self): glos = Glossary() tmpFname = "test_cleanup_removed" entry = glos.newDataEntry(tmpFname, b"test") tmpFpath = entry._tmpPath self.assertTrue(bool(tmpFpath), msg="entry tmpPath is empty") self.assertTrue(isfile(tmpFpath), msg=f"tmp file does not exist: {tmpFpath}") rmtree(appTmpDir) glos.cleanup() self.assertLogError(f"no such file or directory: {appTmpDir}") def test_lang_err_get_source(self): glos = Glossary() glos.setInfo("sourcelang", "test") self.assertEqual(glos.sourceLangName, "") self.assertLogError("unknown language 'test'") def test_lang_err_get_target(self): glos = Glossary() glos.setInfo("targetlang", "test") self.assertEqual(glos.targetLangName, "") self.assertLogError("unknown language 'test'") def test_lang_err_set_source(self): glos = Glossary() glos.sourceLangName = "foobar" self.assertLogError("unknown language 'foobar'") self.assertEqual(glos.sourceLangName, "") def test_lang_err_set_target(self): glos = Glossary() glos.targetLangName = "foobar" self.assertLogError("unknown language 'foobar'") self.assertEqual(glos.targetLangName, "") def test_lang_err_setObj_source(self): glos = Glossary() try: glos.sourceLang = "foobar" except TypeError as e: self.assertEqual(str(e), "invalid lang='foobar', must be a Lang object") else: self.fail("must raise a TypeError") def test_lang_err_setObj_target(self): glos = Glossary() try: glos.targetLang = "foobar" except TypeError as e: self.assertEqual(str(e), "invalid lang='foobar', must be a Lang object") else: self.fail("must raise a TypeError") def test_config_attr_set_twice(self): glos = Glossary() glos.config = {"lower": True} self.assertEqual(glos.getConfig("lower", False), True) glos.config = {"lower": False} self.assertLogError("glos.config is set more than once") self.assertEqual(glos.getConfig("lower", False), True) def test_iter_empty(self): glos = Glossary() self.assertEqual(list(glos), []) def test_convert_typeErr_1(self): glos = Glossary() try: glos.convert( inputFilename=MyStr(""), ) except TypeError as e: self.assertEqual(str(e), "inputFilename must be str") else: self.fail("must raise TypeError") def test_convert_typeErr_2(self): glos = Glossary() try: glos.convert( inputFilename="", outputFilename=MyStr(""), ) except TypeError as e: self.assertEqual(str(e), "outputFilename must be str") else: self.fail("must raise TypeError") def test_convert_typeErr_3(self): glos = Glossary() try: glos.convert( inputFilename="", outputFilename="", inputFormat=MyStr(""), ) except TypeError as e: self.assertEqual(str(e), "inputFormat must be str") else: self.fail("must raise TypeError") def test_convert_typeErr_4(self): glos = Glossary() try: glos.convert( inputFilename="", outputFilename="", inputFormat="", outputFormat=MyStr(""), ) except TypeError as e: self.assertEqual(str(e), "outputFormat must be str") else: self.fail("must raise TypeError") def test_read_typeErr_1(self): glos = Glossary() try: glos.read( filename=MyStr(""), ) except TypeError as e: self.assertEqual(str(e), "filename must be str") else: self.fail("must raise TypeError") def test_write_typeErr_1(self): glos = Glossary() try: glos.write( filename=MyStr(""), format="", ) except TypeError as e: self.assertEqual(str(e), "filename must be str") else: self.fail("must raise TypeError") def test_write_typeErr_2(self): glos = Glossary() try: glos.write( filename="", format=MyStr(""), ) except TypeError as e: self.assertEqual(str(e), "formatName must be str") else: self.fail("must raise TypeError") def test_convert_sameFilename(self): glos = Glossary() res = glos.convert( inputFilename="test4.txt", outputFilename="test4.txt", ) self.assertIsNone(res) self.assertLogCritical("Input and output files are the same") def test_convert_dirExists(self): glos = Glossary() tempFilePath = self.newTempFilePath("test_convert_dirExists") with open(tempFilePath, mode="w", encoding="utf-8") as _file: _file.write("") res = glos.convert( inputFilename="test5.txt", outputFilename=self.tempDir, outputFormat="Stardict", ) self.assertIsNone(res) self.assertLogCritical( f"Directory already exists and not empty: {relpath(self.tempDir)}", ) def test_convert_fileNotFound(self): glos = Glossary() inputFilename = join(osRoot(), "abc", "def", "test6.txt") res = glos.convert( inputFilename=inputFilename, outputFilename="test2.txt", ) self.assertIsNone(res) self.assertLogCritical( f"[Errno 2] No such file or directory: {inputFilename!r}", ) self.assertLogCritical(f"Reading file {relpath(inputFilename)!r} failed.") def test_convert_unableDetectOutputFormat(self): glos = Glossary() res = glos.convert( inputFilename="test7.txt", outputFilename="test", outputFormat="", ) self.assertIsNone(res) self.assertLogCritical("Unable to detect output format!") def test_convert_writeFileNotFound_txt(self): outputFilename = join( appTmpDir, "test", "7de8cf6f17bc4c9abb439e71adbec95d.txt", ) glos = Glossary() res = glos.convert( inputFilename=self.downloadFile("100-en-fa.txt"), outputFilename=outputFilename, ) self.assertIsNone(res) self.assertLogCritical( f"[Errno 2] No such file or directory: {outputFilename!r}", ) self.assertLogCritical(f"Writing file {relpath(outputFilename)!r} failed.") def test_convert_writeFileNotFound_hdir(self): outputFilename = join(osRoot(), "test", "40e20107f5b04087bfc0ec0d61510017.hdir") glos = Glossary() res = glos.convert( inputFilename=self.downloadFile("100-en-fa.txt"), outputFilename=outputFilename, ) self.assertIsNone(res) self.assertLogCritical( f"{osNoSuchFileOrDir} {outputFilename!r}", ) self.assertLogCritical(f"Writing file {relpath(outputFilename)!r} failed.") def test_convert_invalidSortKeyName(self): glos = self.glos = Glossary() outputFilename = self.newTempFilePath("none.txt") res = glos.convert( inputFilename=self.downloadFile("100-en-fa.txt"), outputFilename=outputFilename, sort=True, sortKeyName="blah", ) self.assertIsNone(res) self.assertLogCritical("invalid sortKeyName = 'blah'") def test_collectDefiFormat_direct(self): fname = "100-en-fa.txt" glos = self.glos = Glossary() glos.read(self.downloadFile(fname), direct=True) res = glos.collectDefiFormat(10) self.assertIsNone(res) self.assertLogError("collectDefiFormat: not supported in direct mode") def test_sortWords_invalidSortKeyName(self): glos = self.glos = Glossary() glos.sortWords( sortKeyName="blah", ) self.assertLogCritical("invalid sortKeyName = 'blah'") # def test_collectDefiFormat_direct(self): # from pyglossary.glossary import Glossary as GlossaryLegacy # fname = "100-en-fa.txt" # glos = self.glos = GlossaryLegacy() # glos.read(self.downloadFile(fname), direct=True) # res = glos.collectDefiFormat(10) # self.assertIsNone(res) # self.assertLogError("collectDefiFormat: not supported in direct mode") if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/deprecated/glossary_security_test.py000066400000000000000000000033331476751035500244250ustar00rootroot00000000000000import logging import os import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(dirname(abspath(__file__)))) sys.path.insert(0, rootDir) from glossary_errors_test import TestGlossaryErrors from glossary_test import testCacheDir from pyglossary.glossary import Glossary class TestGlossarySecurity(TestGlossaryErrors): def __init__(self, *args, **kwargs): TestGlossaryErrors.__init__(self, *args, **kwargs) self.mockLog.setLevel(logging.INFO) def test_convert_1(self): glos = Glossary() res = glos.convert( inputFilename="os.system('abcd')", outputFilename="os.system('abcd -l')", ) self.assertIsNone(res) self.assertLogCritical("Unable to detect output format!") def test_convert_2(self): glos = Glossary() res = glos.convert( inputFilename="os.system('abcd');test.txt", outputFilename="os.system('abcd -l')", ) self.assertIsNone(res) self.assertLogCritical("Unable to detect output format!") def test_convert_3(self): glos = Glossary() res = glos.convert( inputFilename="os.system('abcd');test.txt", outputFilename="os.system('abcd -l');test.csv", ) self.assertIsNone(res) errMsg = ( "[Errno 2] No such file or directory: " f"\"{testCacheDir}{os.sep}os.system('abcd');test.txt\"" ) errMsg = errMsg.replace("\\", "\\\\") self.assertLogCritical(errMsg) self.assertLogCritical( "Reading file \"os.system('abcd');test.txt\" failed.", ) def test_convert_4(self): glos = Glossary() res = glos.convert( inputFilename="test.txt\nos.system('abcd')", outputFilename="test.csv\nos.system('abcd -l')", ) self.assertIsNone(res) self.assertLogCritical("Unable to detect output format!") if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/deprecated/glossary_test.py000066400000000000000000000727671476751035500225170ustar00rootroot00000000000000import hashlib import json import logging import os import random import sys import tempfile import tracemalloc import unittest import zipfile from os.path import abspath, dirname, isdir, isfile, join from urllib.request import urlopen rootDir = dirname(dirname(dirname(abspath(__file__)))) sys.path.insert(0, rootDir) from typing import TYPE_CHECKING from pyglossary.core import cacheDir, log, tmpDir from pyglossary.glossary import Glossary from pyglossary.os_utils import rmtree from pyglossary.text_utils import crc32hex if TYPE_CHECKING: from collections.abc import Callable __all__ = ["TestGlossaryBase", "appTmpDir", "testCacheDir"] tracemalloc.start() Glossary.init() repo = os.getenv( "PYGLOSSARY_TEST_REPO", "ilius/pyglossary-test/main", ) dataURL = f"https://raw.githubusercontent.com/{repo}/{{filename}}" testCacheDir = join(cacheDir, "test") appTmpDir = join(cacheDir, "tmp") os.makedirs(testCacheDir, exist_ok=True) os.chdir(testCacheDir) os.makedirs(join(tmpDir, "pyglossary"), exist_ok=True) class TestGlossaryBase(unittest.TestCase): def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) self.maxDiff = None self.dataFileCRC32 = { "004-bar.txt": "6775e590", "004-bar-sort.txt": "fe861123", "006-empty.txt": "07ff224b", "006-empty-filtered.txt": "2b3c1c0f", "100-en-de-v4.txt": "d420a669", "100-en-fa.txt": "f5c53133", "100-ja-en.txt": "93542e89", "100-en-de-v4-remove_font_b.txt": "a3144e2f", "100-en-de-v4.info": "f2cfb284", "100-en-fa.info": "9bddb7bb", "100-en-fa-v2.info": "7c0f646b", "100-ja-en.info": "8cf5403c", "300-rand-en-fa.txt": "586617c8", "res/stardict.png": "7e1447fa", "res/test.json": "41f8cf31", } os.environ["CALC_FILE_SIZE"] = "1" def addDirCRC32(self, dirPath: str, files: "dict[str, str]") -> None: for fpath, _hash in files.items(): self.dataFileCRC32[f"{dirPath}/{fpath}"] = _hash # The setUp() and tearDown() methods allow you to define instructions that # will be executed before and after each test method. def setUp(self): self.glos = None self.tempDir = tempfile.mkdtemp(dir=join(tmpDir, "pyglossary")) def tearDown(self): if self.glos is not None: self.glos.cleanup() self.glos.clear() if os.getenv("NO_CLEANUP"): return for direc in ( self.tempDir, appTmpDir, ): if isdir(direc): rmtree(direc) def fixDownloadFilename(self, filename): return filename.replace("/", "__").replace("\\", "__") def downloadFile(self, filename): unixFilename = filename.replace("\\", "/") crc32 = self.dataFileCRC32[unixFilename] fpath = join(testCacheDir, self.fixDownloadFilename(filename)) if isfile(fpath): with open(fpath, mode="rb") as _file: data = _file.read() if crc32hex(data) != crc32: raise RuntimeError(f"CRC32 check failed for existing file: {fpath}") return fpath try: with urlopen(dataURL.format(filename=unixFilename)) as res: data = res.read() except Exception as e: print(f"{filename=}") raise e from None actual_crc32 = crc32hex(data) if actual_crc32 != crc32: raise RuntimeError( f"CRC32 check failed for downloaded file: {filename}: {actual_crc32}", ) with open(fpath, mode="wb") as _file: _file.write(data) return fpath def downloadDir(self, dirName: str, files: list[str]) -> str: dirPath = join(testCacheDir, self.fixDownloadFilename(dirName)) for fileRelPath in files: newFilePath = join(dirPath, fileRelPath) if isfile(newFilePath): # TODO: check crc-32 continue filePath = self.downloadFile(join(dirName, fileRelPath)) os.makedirs(dirname(newFilePath), exist_ok=True) os.rename(filePath, newFilePath) return dirPath def newTempFilePath(self, filename): fpath = join(self.tempDir, filename) if isfile(fpath): os.remove(fpath) return fpath def showGlossaryDiff(self, fpath1, fpath2) -> None: from pyglossary.ui.tools.diff_glossary import diffGlossary diffGlossary(fpath1, fpath2) def compareTextFiles(self, fpath1, fpath2, showDiff=False): self.assertTrue(isfile(fpath1), f"{fpath1 = }") self.assertTrue(isfile(fpath2), f"{fpath2 = }") with open(fpath1, encoding="utf-8") as file1: text1 = file1.read().rstrip("\n") with open(fpath2, encoding="utf-8") as file2: text2 = file2.read().rstrip("\n") try: self.assertEqual( len(text1), len(text2), msg=f"{fpath1} differs from {fpath2} in file size", ) self.assertEqual( text1, text2, msg=f"{fpath1} differs from {fpath2}", ) except AssertionError as e: if showDiff: self.showGlossaryDiff(fpath1, fpath2) raise e from None def compareBinaryFiles(self, fpath1, fpath2): self.assertTrue(isfile(fpath1), f"File {fpath1} does not exist") self.assertTrue(isfile(fpath2), f"File {fpath2} does not exist") with open(fpath1, mode="rb") as file1: data1 = file1.read() with open(fpath2, mode="rb") as file2: data2 = file2.read() self.assertEqual(len(data1), len(data2), msg=f"{fpath1}") self.assertTrue( data1 == data2, msg=f"{fpath1} differs from {fpath2}", ) def compareZipFiles( self, fpath1, fpath2, dataReplaceFuncs: "dict[str, Callable]", ): zf1 = zipfile.ZipFile(fpath1) zf2 = zipfile.ZipFile(fpath2) pathList1 = zf1.namelist() pathList2 = zf2.namelist() if not self.assertEqual(pathList1, pathList2): return for zfpath in pathList1: data1 = zf1.read(zfpath) data2 = zf2.read(zfpath) func = dataReplaceFuncs.get(zfpath) if func is not None: data1 = func(data1) data2 = func(data2) self.assertEqual(len(data1), len(data2), msg=f"{zfpath=}") self.assertTrue( data1 == data2, msg=f"{zfpath=}", ) def checkZipFileSha1sum( self, fpath, sha1sumDict: "dict[str, str]", dataReplaceFuncs: "dict[str, Callable] | None" = None, ): if dataReplaceFuncs is None: dataReplaceFuncs = {} zf = zipfile.ZipFile(fpath) # pathList = zf.namelist() for zfpath, expectedSha1 in sha1sumDict.items(): data = zf.read(zfpath) func = dataReplaceFuncs.get(zfpath) if func is not None: data = func(data) actualSha1 = hashlib.sha1(data).hexdigest() self.assertEqual(actualSha1, expectedSha1, msg=f"file: {zfpath}") def convert( # noqa: PLR0913 self, fname, # input file with extension fname2, # output file with extension testId="tmp", # noqa: ARG002 compareText="", compareBinary="", sha1sum=None, md5sum=None, config=None, showDiff=False, **convertArgs, ): inputFilename = self.downloadFile(fname) outputFilename = self.newTempFilePath(fname2) glos = self.glos = Glossary() if config is not None: glos.config = config res = glos.convert( inputFilename=inputFilename, outputFilename=outputFilename, **convertArgs, ) self.assertEqual(outputFilename, res) if compareText: self.compareTextFiles( outputFilename, self.downloadFile(compareText), showDiff=showDiff, ) elif compareBinary: self.compareBinaryFiles(outputFilename, self.downloadFile(compareBinary)) elif sha1sum: with open(outputFilename, mode="rb") as _file: actualSha1 = hashlib.sha1(_file.read()).hexdigest() self.assertEqual(actualSha1, sha1sum) elif md5sum: with open(outputFilename, mode="rb") as _file: actualMd5 = hashlib.md5(_file.read()).hexdigest() self.assertEqual(actualMd5, md5sum) def convert_sqlite_both(self, *args, **kwargs): for sqlite in (None, True, False): self.convert(*args, sqlite=sqlite, **kwargs) class TestGlossary(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-en-fa-sort.txt": "d7a82dc8", "100-en-fa-sort-headword.txt": "4067a29f", "100-en-fa-sort-ebook.txt": "aa620d07", "100-en-fa-sort-ebook3.txt": "5a20f140", "100-en-fa-lower.txt": "62178940", "100-en-fa-remove_html_all-v3.txt": "d611c978", "100-en-fa-rtl.txt": "25ede1e8", "300-rand-en-fa-sort-headword-w1256.txt": "06d83bac", "300-rand-en-fa-sort-headword.txt": "df0f8020", "300-rand-en-fa-sort-w1256.txt": "9594aab3", "sort-locale/092-en-fa-alphabet-sample.txt": "b4856532", "sort-locale/092-en-fa-alphabet-sample-sorted-default.txt": "e7b70589", "sort-locale/092-en-fa-alphabet-sample-sorted-en.txt": "3d2bdf73", "sort-locale/092-en-fa-alphabet-sample-sorted-fa.txt": "245419db", "sort-locale/092-en-fa-alphabet-sample-sorted-latin-fa.txt": "261c03c0", }, ) def setUp(self): TestGlossaryBase.setUp(self) self.prevLogLevel = log.level log.setLevel(logging.ERROR) def tearDown(self): TestGlossaryBase.tearDown(self) log.setLevel(self.prevLogLevel) def test__str__1(self): glos = self.glos = Glossary() self.assertEqual(str(glos), "Glossary{filename: '', name: None}") def test__str__2(self): glos = self.glos = Glossary() glos._filename = "test.txt" self.assertEqual(str(glos), "Glossary{filename: 'test.txt', name: None}") def test__str__3(self): glos = self.glos = Glossary() glos.setInfo("title", "Test Title") self.assertEqual( str(glos), "Glossary{filename: '', name: 'Test Title'}", ) def test__str__4(self): glos = self.glos = Glossary() glos._filename = "test.txt" glos.setInfo("title", "Test Title") self.assertEqual( str(glos), "Glossary{filename: 'test.txt', name: 'Test Title'}", ) def test_info_1(self): glos = self.glos = Glossary() glos.setInfo("test", "ABC") self.assertEqual(glos.getInfo("test"), "ABC") def test_info_2(self): glos = self.glos = Glossary() glos.setInfo("bookname", "Test Glossary") self.assertEqual(glos.getInfo("title"), "Test Glossary") def test_info_3(self): glos = self.glos = Glossary() glos.setInfo("bookname", "Test Glossary") glos.setInfo("title", "Test 2") self.assertEqual(glos.getInfo("name"), "Test 2") self.assertEqual(glos.getInfo("bookname"), "Test 2") self.assertEqual(glos.getInfo("title"), "Test 2") def test_info_4(self): glos = self.glos = Glossary() glos.setInfo("test", 123) self.assertEqual(glos.getInfo("test"), "123") def test_info_del_1(self): glos = self.glos = Glossary() glos.setInfo("test", "abc") self.assertEqual(glos.getInfo("test"), "abc") glos.setInfo("test", None) self.assertEqual(glos.getInfo("test"), "") def test_info_del_2(self): glos = self.glos = Glossary() glos.setInfo("test", None) self.assertEqual(glos.getInfo("test"), "") def test_setInfo_err1(self): glos = self.glos = Glossary() try: glos.setInfo(1, "a") except TypeError as e: self.assertEqual(str(e), "invalid key=1, must be str") else: self.fail("must raise a TypeError") def test_getInfo_err1(self): glos = self.glos = Glossary() try: glos.getInfo(1) except TypeError as e: self.assertEqual(str(e), "invalid key=1, must be str") else: self.fail("must raise a TypeError") def test_getExtraInfos_1(self): glos = self.glos = Glossary() glos.setInfo("a", "test 1") glos.setInfo("b", "test 2") glos.setInfo("c", "test 3") glos.setInfo("d", "test 4") glos.setInfo("name", "my name") self.assertEqual( glos.getExtraInfos(["b", "c", "title"]), {"a": "test 1", "d": "test 4"}, ) def test_infoKeys_1(self): glos = self.glos = Glossary() glos.setInfo("a", "test 1") glos.setInfo("b", "test 2") glos.setInfo("name", "test name") glos.setInfo("title", "test title") self.assertEqual( glos.infoKeys(), ["a", "b", "name"], ) def test_config_attr_get(self): glos = self.glos = Glossary() try: glos.config # noqa: B018 except NotImplementedError: pass else: self.fail("must raise NotImplementedError") def test_config_attr_set(self): glos = self.glos = Glossary() glos.config = {"lower": True} self.assertEqual(glos.getConfig("lower", False), True) def test_read_txt_1(self): inputFilename = self.downloadFile("100-en-fa.txt") glos = self.glos = Glossary() res = glos.read(filename=inputFilename) self.assertTrue(res) self.assertEqual(glos.sourceLangName, "English") self.assertEqual(glos.targetLangName, "Persian") self.assertIn("Sample: ", glos.getInfo("name")) self.assertEqual(len(glos), 100) def test_read_txt_direct_1(self): inputFilename = self.downloadFile("100-en-fa.txt") glos = self.glos = Glossary() res = glos.read(filename=inputFilename, direct=True) self.assertTrue(res) self.assertEqual(glos.sourceLangName, "English") self.assertEqual(glos.targetLangName, "Persian") self.assertIn("Sample: ", glos.getInfo("name")) self.assertEqual(len(glos), 0) def test_init_infoDict(self): glos = self.glos = Glossary(info={"a": "b"}) self.assertEqual(list(glos.iterInfo()), [("a", "b")]) def test_init_infoOrderedDict(self): from collections import OrderedDict glos = self.glos = Glossary( info=OrderedDict( [ ("y", "z"), ("a", "b"), ("1", "2"), ], ), ) self.assertEqual(list(glos.iterInfo()), [("y", "z"), ("a", "b"), ("1", "2")]) def test_lang_1(self): glos = self.glos = Glossary() self.assertEqual(glos.sourceLangName, "") self.assertEqual(glos.targetLangName, "") glos.sourceLangName = "ru" glos.targetLangName = "de" self.assertEqual(glos.sourceLangName, "Russian") self.assertEqual(glos.targetLangName, "German") def test_lang_get_source(self): glos = self.glos = Glossary() glos.setInfo("sourcelang", "farsi") self.assertEqual(glos.sourceLangName, "Persian") def test_lang_get_target(self): glos = self.glos = Glossary() glos.setInfo("targetlang", "malay") self.assertEqual(glos.targetLangName, "Malay") def test_lang_set_source(self): glos = self.glos = Glossary() glos.sourceLangName = "en" self.assertEqual(glos.sourceLangName, "English") def test_lang_set_source_empty(self): glos = self.glos = Glossary() glos.sourceLangName = "" self.assertEqual(glos.sourceLangName, "") def test_lang_set_target(self): glos = self.glos = Glossary() glos.targetLangName = "fa" self.assertEqual(glos.targetLangName, "Persian") def test_lang_set_target_empty(self): glos = self.glos = Glossary() glos.targetLangName = "" self.assertEqual(glos.targetLangName, "") def test_lang_getObj_source(self): glos = self.glos = Glossary() glos.setInfo("sourcelang", "farsi") self.assertEqual(glos.sourceLang.name, "Persian") def test_lang_getObj_target(self): glos = self.glos = Glossary() glos.setInfo("targetlang", "malay") self.assertEqual(glos.targetLang.name, "Malay") def test_lang_detect_1(self): glos = self.glos = Glossary() glos.setInfo("name", "en-fa") glos.detectLangsFromName() self.assertEqual( (glos.sourceLangName, glos.targetLangName), ("English", "Persian"), ) def test_lang_detect_2(self): glos = self.glos = Glossary() glos.setInfo("name", "test-en-fa") glos.detectLangsFromName() self.assertEqual( (glos.sourceLangName, glos.targetLangName), ("English", "Persian"), ) def test_lang_detect_3(self): glos = self.glos = Glossary() glos.setInfo("name", "eng to per") glos.detectLangsFromName() self.assertEqual( (glos.sourceLangName, glos.targetLangName), ("English", "Persian"), ) def test_lang_detect_4(self): glos = self.glos = Glossary() glos.setInfo("name", "Test english to farsi") glos.detectLangsFromName() self.assertEqual( (glos.sourceLangName, glos.targetLangName), ("English", "Persian"), ) def test_lang_detect_5(self): glos = self.glos = Glossary() glos.setInfo("name", "freedict-eng-deu.index") glos.detectLangsFromName() self.assertEqual( (glos.sourceLangName, glos.targetLangName), ("English", "German"), ) def convert_txt_txt( self, fname, # input txt file without extension fname2, # expected output txt file without extension testId="tmp", config=None, **convertArgs, ): self.convert( f"{fname}.txt", f"{fname2}-{testId}.txt", compareText=f"{fname2}.txt", testId=testId, config=config, **convertArgs, ) def convert_to_txtZip( self, fname, # input file with extension fname2, # expected output file without extensions testId="tmp", config=None, **convertArgs, ): inputFilename = self.downloadFile(fname) outputTxtName = f"{fname2}-{testId}.txt" outputFilename = self.newTempFilePath(f"{outputTxtName}.zip") expectedFilename = self.downloadFile(f"{fname2}.txt") glos = self.glos = Glossary() if config is not None: glos.config = config res = glos.convert( inputFilename=inputFilename, outputFilename=outputFilename, **convertArgs, ) self.assertEqual(outputFilename, res) zf = zipfile.ZipFile(outputFilename) self.assertTrue( outputTxtName in zf.namelist(), msg=f"{outputTxtName} not in {zf.namelist()}", ) with open(expectedFilename, encoding="utf-8") as expectedFile: expectedText = expectedFile.read() actualText = zf.read(outputTxtName).decode("utf-8") self.assertEqual(len(actualText), len(expectedText)) self.assertEqual(actualText, expectedText) def test_txt_txtZip_1(self): self.convert_to_txtZip( "100-en-fa.txt", "100-en-fa", testId="txt_txtZip_1", infoOverride={"input_file_size": None}, ) def test_sort_1(self): self.convert_txt_txt( "100-en-fa", "100-en-fa-sort", testId="sort_1", sort=True, ) def test_sort_2(self): self.convert_txt_txt( "100-en-fa", "100-en-fa-sort", testId="sort_2", sort=True, sortKeyName="headword_lower", ) def test_sort_3(self): self.convert_txt_txt( "100-en-fa", "100-en-fa-sort-headword", testId="sort_3", sort=True, sortKeyName="headword", ) def test_sort_4(self): self.convert_txt_txt( "300-rand-en-fa", "300-rand-en-fa-sort-headword", testId="sort_4", sort=True, sortKeyName="headword", ) def test_sort_5(self): self.convert_txt_txt( "300-rand-en-fa", "300-rand-en-fa-sort-headword-w1256", testId="sort_5", sort=True, sortKeyName="headword", sortEncoding="windows-1256", ) def test_sort_6(self): self.convert_txt_txt( "300-rand-en-fa", "300-rand-en-fa-sort-w1256", testId="sort_6", sort=True, sortKeyName="headword_lower", sortEncoding="windows-1256", ) def test_sort_7(self): self.convert_txt_txt( "100-en-fa", "100-en-fa-sort-ebook", testId="sort_7", sort=True, sortKeyName="ebook", ) def test_sort_8(self): self.convert_txt_txt( "100-en-fa", "100-en-fa-sort-ebook3", testId="sort_8", sort=True, sortKeyName="ebook_length3", ) def test_lower_1(self): self.convert_txt_txt( "100-en-fa", "100-en-fa-lower", testId="lower_1", config={"lower": True}, ) def test_rtl_1(self): self.convert_txt_txt( "100-en-fa", "100-en-fa-rtl", testId="rtl_1", config={"rtl": True}, ) def test_remove_html_all_1(self): self.convert_txt_txt( "100-en-fa", "100-en-fa-remove_html_all-v3", testId="remove_html_all_1", config={"remove_html_all": True}, ) def test_remove_html_1(self): self.convert_txt_txt( "100-en-de-v4", "100-en-de-v4-remove_font_b", testId="remove_html_1", config={"remove_html": "font,b"}, ) def test_save_info_json(self): fname = "100-en-fa" testId = "save_info_json" infoPath = self.newTempFilePath(f"{fname}-{testId}.info") self.convert_txt_txt( fname, fname, testId=testId, config={"save_info_json": True}, infoOverride={"input_file_size": None}, ) with open(infoPath, encoding="utf8") as _file: infoDict = json.load(_file) with open(self.downloadFile(f"{fname}-v2.info"), encoding="utf8") as _file: infoDictExpected = json.load(_file) for key, value in infoDictExpected.items(): self.assertIn(key, infoDict) self.assertEqual(value, infoDict.get(key)) def test_convert_sqlite_direct_error(self): glos = self.glos = Glossary() try: glos.convert( inputFilename="foo.txt", outputFilename="bar.txt", direct=True, sqlite=True, ) except ValueError as e: self.assertEqual(str(e), "Conflictng arguments: direct=True, sqlite=True") else: self.fail("must raise a ValueError") def test_txt_txt_bar(self): for direct in (None, False, True): self.convert_txt_txt( "004-bar", "004-bar", testId="bar", direct=direct, infoOverride={ "name": None, "input_file_size": None, }, ) def test_txt_txt_bar_sort(self): for sqlite in (None, False, True): self.convert_txt_txt( "004-bar", "004-bar-sort", testId="bar_sort", sort=True, sqlite=sqlite, ) def test_txt_txt_empty_filtered(self): for direct in (None, False, True): self.convert_txt_txt( "006-empty", "006-empty-filtered", testId="empty_filtered", direct=direct, ) def test_txt_txt_empty_filtered_sqlite(self): for sqlite in (None, False, True): self.convert_txt_txt( "006-empty", "006-empty-filtered", testId="empty_filtered_sqlite", sqlite=sqlite, ) def test_dataEntry_save(self): glos = self.glos = Glossary() tmpFname = "test_dataEntry_save" entry = glos.newDataEntry(tmpFname, b"test") saveFpath = entry.save(self.tempDir) self.assertTrue( isfile(saveFpath), msg=f"saved file does not exist: {saveFpath}", ) def test_dataEntry_getFileName(self): glos = self.glos = Glossary() tmpFname = "test_dataEntry_getFileName" entry = glos.newDataEntry(tmpFname, b"test") self.assertEqual(entry.getFileName(), tmpFname) def test_cleanup_noFile(self): glos = self.glos = Glossary() glos.cleanup() def test_cleanup_cleanup(self): glos = self.glos = Glossary() tmpFname = "test_cleanup_cleanup" entry = glos.newDataEntry(tmpFname, b"test") tmpFpath = entry._tmpPath self.assertTrue(bool(tmpFpath), msg="entry tmpPath is empty") self.assertTrue( isfile(tmpFpath), msg=f"tmp file does not exist: {tmpFpath}", ) glos.cleanup() self.assertTrue( not isfile(tmpFpath), msg=f"tmp file still exists: {tmpFpath}", ) def test_cleanup_noCleanup(self): glos = self.glos = Glossary() tmpFname = "test_cleanup_noCleanup" entry = glos.newDataEntry(tmpFname, b"test") tmpFpath = entry._tmpPath self.assertTrue(bool(tmpFpath), msg="entry tmpPath is empty") self.assertTrue(isfile(tmpFpath), msg=f"tmp file does not exist: {tmpFpath}") glos.config = {"cleanup": False} glos.cleanup() self.assertTrue(isfile(tmpFpath), msg=f"tmp file does not exist: {tmpFpath}") def addWordsList(self, glos, words, newDefiFunc=str, defiFormat=""): wordsList = [] for index, line in enumerate(words): words = line.rstrip().split("|") wordsList.append(words) glos.addEntryObj( glos.newEntry( words, newDefiFunc(index), defiFormat=defiFormat, ), ) glos.updateIter() return wordsList def addWords(self, glos, wordsStr, **kwargs): return self.addWordsList(glos, wordsStr.split("\n"), **kwargs) tenWordsStr = """comedic tubenose organosol adipocere gid next friend bitter apple caca|ca-ca darkling beetle japonica""" tenWordsStr2 = """comedic Tubenose organosol Adipocere gid Next friend bitter apple Caca|ca-ca darkling beetle Japonica""" tenWordsStrFa = ( "بیمارانه\nگالوانومتر\nنقاهت\nرشک" "مندی\nناکاستنی\nشگفتآفرینی\nچندپاری\nنامبارکی\nآماسش\nانگیزنده" ) def test_addEntries_1(self): glos = self.glos = Glossary() wordsList = self.addWords( glos, self.tenWordsStr, newDefiFunc=lambda _i: str(random.randint(0, 10000)), ) self.assertEqual(wordsList, [entry.l_word for entry in glos]) def test_addEntries_2(self): # entry filters don't apply to loaded entries (added with addEntryObj) glos = self.glos = Glossary() glos.addEntryObj(glos.newEntry(["a"], "test 1")) glos.addEntryObj(glos.newEntry([""], "test 2")) glos.addEntryObj(glos.newEntry(["b"], "test 3")) glos.addEntryObj(glos.newEntry([], "test 4")) glos.updateEntryFilters() glos.updateIter() self.assertEqual( [["a"], [""], ["b"], []], [entry.l_word for entry in glos], ) def test_addEntries_3(self): glos = self.glos = Glossary() glos.addEntryObj(glos.newEntry(["a"], "test 1")) glos.addEntryObj(glos.newEntry(["b"], "test 3")) glos.addEntryObj( glos.newDataEntry( "file.bin", b"hello\x00world", ), ) glos.updateEntryFilters() glos.updateIter() wordListList = [] dataEntries = [] for entry in glos: wordListList.append(entry.l_word) if entry.isData(): dataEntries.append(entry) self.assertEqual( wordListList, [["a"], ["b"], ["file.bin"]], ) self.assertEqual(len(dataEntries), 1) self.assertEqual(dataEntries[0].getFileName(), "file.bin") self.assertEqual(dataEntries[0].data, b"hello\x00world") def test_sortWords_1(self): glos = self.glos = Glossary() wordsList = self.addWords( glos, self.tenWordsStr, newDefiFunc=lambda _i: str(random.randint(0, 10000)), ) self.assertEqual(wordsList, [entry.l_word for entry in glos]) glos.sortWords() self.assertEqual(sorted(wordsList), [entry.l_word for entry in glos]) def test_sortWords_2(self): glos = self.glos = Glossary() wordsList = self.addWords( glos, self.tenWordsStr2, newDefiFunc=lambda _i: str(random.randint(0, 10000)), ) self.assertEqual(wordsList, [entry.l_word for entry in glos]) glos.sortWords(sortKeyName="headword") self.assertEqual( [entry.l_word for entry in glos], [ ["Adipocere"], ["Caca", "ca-ca"], ["Japonica"], ["Next friend"], ["Tubenose"], ["bitter apple"], ["comedic"], ["darkling beetle"], ["gid"], ["organosol"], ], ) def test_sortWords_3(self): glos = self.glos = Glossary() wordsList = self.addWords( glos, self.tenWordsStrFa, newDefiFunc=lambda _i: str(random.randint(0, 10000)), ) self.assertEqual(wordsList, [entry.l_word for entry in glos]) glos.sortWords(sortKeyName="headword") ls1 = ["آماسش", "انگیزنده", "بیمارانه", "رشکمندی", "شگفتآفرینی"] ls2 = ["نامبارکی", "ناکاستنی", "نقاهت", "چندپاری", "گالوانومتر"] self.assertEqual( [entry.s_word for entry in glos], ls1 + ls2, ) def test_sortWords_4(self): glos = self.glos = Glossary() wordsList = self.addWords( glos, self.tenWordsStrFa, newDefiFunc=lambda _i: str(random.randint(0, 10000)), ) self.assertEqual(wordsList, [entry.l_word for entry in glos]) glos.sortWords( sortKeyName="headword", sortEncoding="windows-1256", ) ls1 = ["چندپاری", "گالوانومتر", "آماسش", "انگیزنده", "بیمارانه"] ls2 = ["رشکمندی", "شگفتآفرینی", "ناکاستنی", "نامبارکی", "نقاهت"] self.assertEqual( [entry.s_word for entry in glos], ls1 + ls2, ) def test_sortWords_5(self): glos = self.glos = Glossary() alphabetW1256 = "ءآأئابتثجحخدذرزسشصضطظعغـفقكلمنهوىي" alphabetW1256_shuf = "مفزنصـذرخوآظسقلدغطيعحءأتىئاجهضثشكب" wordsList = self.addWordsList( glos, list(alphabetW1256_shuf), newDefiFunc=lambda _i: str(random.randint(0, 10000)), ) self.assertEqual(wordsList, [entry.l_word for entry in glos]) glos.sortWords( sortKeyName="headword", sortEncoding="windows-1256", ) self.assertEqual( [entry.s_word for entry in glos], list(alphabetW1256), ) def test_sortWords_exc_1(self): fname = "100-en-fa.txt" glos = self.glos = Glossary() glos.read(self.downloadFile(fname), direct=True) try: glos.sortWords() except NotImplementedError as e: self.assertEqual(str(e), "can not use sortWords in direct mode") else: self.fail("must raise NotImplementedError") def test_read_filename(self): glos = self.glos = Glossary() glos.read(self.downloadFile("004-bar.txt")) self.assertEqual(glos.filename, join(testCacheDir, "004-bar")) def test_wordTitleStr_em1(self): glos = self.glos = Glossary() self.assertEqual(glos.wordTitleStr(""), "") def test_wordTitleStr_em2(self): glos = self.glos = Glossary() glos._defiHasWordTitle = True self.assertEqual(glos.wordTitleStr("test1"), "") def test_wordTitleStr_b1(self): glos = self.glos = Glossary() self.assertEqual(glos.wordTitleStr("test1"), "test1
          ") def test_wordTitleStr_b2(self): glos = self.glos = Glossary() self.assertEqual( glos.wordTitleStr("test1", class_="headword"), 'test1
          ', ) def test_wordTitleStr_cjk1(self): glos = self.glos = Glossary() self.assertEqual( glos.wordTitleStr("test1", sample="くりかえし"), "test1
          ", ) def test_wordTitleStr_cjk2(self): glos = self.glos = Glossary() self.assertEqual( glos.wordTitleStr("くりかえし"), "くりかえし
          ", ) def test_convert_sortLocale_default_1(self): name = "092-en-fa-alphabet-sample" self.convert_sqlite_both( f"sort-locale/{name}.txt", f"{name}-sorted-default.txt", compareText=f"sort-locale/{name}-sorted-default.txt", testId="sorted-default", sort=True, sortKeyName="headword_lower", ) def test_convert_sortLocale_en_1(self): name = "092-en-fa-alphabet-sample" self.convert_sqlite_both( f"sort-locale/{name}.txt", f"{name}-sorted-en.txt", compareText=f"sort-locale/{name}-sorted-en.txt", testId="sorted-en", sort=True, sortKeyName="headword_lower:en_US.UTF-8", ) def test_convert_sortLocale_fa_1(self): name = "092-en-fa-alphabet-sample" self.convert_sqlite_both( f"sort-locale/{name}.txt", f"{name}-sorted-fa.txt", compareText=f"sort-locale/{name}-sorted-fa.txt", testId="sorted-fa", sort=True, sortKeyName="headword_lower:fa_IR.UTF-8", ) def test_convert_sortLocale_fa_2(self): name = "092-en-fa-alphabet-sample" self.convert_sqlite_both( f"sort-locale/{name}.txt", f"{name}-sorted-latin-fa.txt", compareText=f"sort-locale/{name}-sorted-latin-fa.txt", testId="sorted-latin-fa", sort=True, sortKeyName="headword_lower:fa-u-kr-latn-arab", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/dictzip_test.py000066400000000000000000000055661476751035500202130ustar00rootroot00000000000000import gzip import logging import unittest from pathlib import Path from glossary_v2_errors_test import TestGlossaryErrorsBase from pyglossary.os_utils import runDictzip TEXT = """ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. """ MISSING_DEP_MARK = "Dictzip compression requires idzip module or dictzip utility," class TestDictzip(TestGlossaryErrorsBase): def setUp(self) -> None: super().setUp() self.test_file_path = Path(self.tempDir) / "test_file.txt" filename = self.test_file_path.name + ".dz" self.result_file_path = self.test_file_path.parent / filename with open(self.test_file_path, "a", encoding="utf-8") as tmp_file: tmp_file.write(TEXT) def skip_on_dep(self, method: str) -> None: warn = self.mockLog.popLog(logging.WARNING, MISSING_DEP_MARK, partial=True) if warn: self.skipTest(f"Missing {method} dependency") def test_idzip_compressed_exists(self) -> None: method = "idzip" runDictzip(self.test_file_path, method) self.skip_on_dep(method) self.assertTrue(self.result_file_path.exists()) self.assertTrue(self.result_file_path.is_file()) def test_idzip_compressed_matches(self) -> None: method = "idzip" runDictzip(self.test_file_path, method) self.skip_on_dep(method) with gzip.open(self.result_file_path, "r") as file: result = file.read().decode() self.assertEqual(result, TEXT) def test_dictzip_compressed_exists(self) -> None: method = "dictzip" runDictzip(self.test_file_path, method) self.skip_on_dep(method) self.assertTrue(self.result_file_path.exists()) self.assertTrue(self.result_file_path.is_file()) def test_dictzip_compressed_matches(self) -> None: method = "dictzip" runDictzip(self.test_file_path, method) self.skip_on_dep(method) with gzip.open(self.result_file_path, "r") as file: result = file.read().decode() self.assertEqual(result, TEXT) def test_dictzip_missing_target(self) -> None: method = "idzip" filename = "/NOT_EXISTED_PATH/file.txt" expected = f"No such file or directory: '{filename}'" runDictzip(filename, method) self.skip_on_dep(method) err = self.mockLog.popLog(logging.ERROR, expected, partial=True) self.assertIsNotNone(err) def test_idzip_missing_target(self) -> None: method = "dictzip" filename = "/NOT_EXISTED_PATH/boilerplate.txt" expected = f'Cannot open "{filename}"' runDictzip(filename, method) self.skip_on_dep(method) err = self.mockLog.popLog(logging.ERROR, expected, partial=True) self.assertIsNotNone(err) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/ebook_kobo_test.py000066400000000000000000000063161476751035500206500ustar00rootroot00000000000000# -*- coding: utf-8 -*- import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.glossary_v2 import Glossary from pyglossary.plugins.ebook_kobo import ( Writer, ) class GetPrefixTest(unittest.TestCase): def case(self, word, prefix): glos = Glossary() w = Writer(glos) self.assertEqual( w.get_prefix(word), prefix, ) def test_examples(self): # examples from https://pgaskin.net/dictutil/dicthtml/prefixes.html self.case("test", "te") self.case("a", "aa") self.case("Èe", "èe") self.case("multiple words", "mu") self.case("àççèñts", "àç") self.case("à", "àa") self.case("ç", "ça") self.case("", "11") self.case(" ", "11") self.case(" x", "xa") self.case(" 123", "11") self.case("x 23", "xa") self.case("д ", "д") self.case("дaд", "дa") self.case("未未", "未未") self.case("未", "未a") self.case(" 未", "11") self.case(" 未", "未a") # the rest of test cases are from # https://github.com/pgaskin/dictutil/blob/master/kobodict/util_test.go def test_dicthtml_en(self): self.case("a-", "11") self.case("-an", "11") self.case("GB", "gb") def test_dicthtml_fr(self): self.case("ébahir", "éb") self.case("a1", "11") self.case("ô", "ôa") self.case("kébab", "ké") self.case("aérer", "aé") self.case("living-room", "li") # dicthtml-ja # Note, Kanji not currently implemented, so not testing (note, the logic # is in a separate function, anyways). # self.case("あ", "あ") # self.case("アークとう", "アー") def test_dictword_spaces(self): # generated by dictword-test: spaces self.case(" ", "11") self.case(" ", "11") self.case("\t\t", "11") self.case("\t\f\t", "11") self.case("x ", "xa") self.case(" xx", "xa") # generated by dictword-test: spaces where trim/prefix order matters self.case(" x", "11") self.case(" xy", "11") self.case(" xyz", "11") self.case("x z", "xa") def test_dictword_cyrillic(self): # generated by dictword-test: cyrillic self.case(" д", "д") self.case(" дд", "д") self.case("д", "д") self.case("aд", "aд") self.case("дa", "дa") self.case("aдa", "aд") def test_dictword_uppercase_accented(self): # generated by dictword-test: uppercase accented letters self.case("Ȅe", "ȅe") self.case("eȄ", "eȅ") self.case("Ȅ", "ȅa") self.case("Ȅ!", "11") def test_dictword_cjk(self): # generated by dictword-test: cjk self.case("x未", "x未") self.case("未x", "未x") self.case("xy未", "xy") self.case("还没", "还没") def test_dictword_misc(self): # generated by dictword-test: misc self.case("!", "11") self.case("!!", "11") self.case("!!!", "11") self.case("x!", "11") self.case("x!!", "11") self.case("xx!", "xx") self.case("xxx!", "xx") self.case(" !", "11") self.case(" !!", "11") self.case(" !!!", "11") self.case(" !", "11") self.case(" !!", "11") self.case(" !!!", "11") self.case(" x!", "xa") self.case(" x!!", "xa") self.case(" xx!", "xa") self.case(" xxx!", "xa") def test_synthetic(self): self.case("x\x00y", "xa") self.case("\x00xy", "11") if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/entry_test.py000066400000000000000000000114421476751035500176740ustar00rootroot00000000000000from __future__ import annotations import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.entry import Entry class TestEntryBasic(unittest.TestCase): def test_exc_1(self): try: Entry(b"word", "defi") except TypeError as e: self.assertEqual(str(e), "invalid word type ") else: self.fail("must raise TypeError") def test_exc_2(self): Entry(("word",), "defi") def test_exc_3(self): try: Entry("word", b"defi") except TypeError as e: self.assertEqual(str(e), "invalid defi type ") else: self.fail("must raise TypeError") def test_exc_4(self): try: Entry("word", ("defi",)) except TypeError as e: self.assertEqual(str(e), "invalid defi type ") else: self.fail("must raise TypeError") def test_exc_5(self): try: Entry("word", "defi", "b") except ValueError as e: self.assertEqual(str(e), "invalid defiFormat 'b'") else: self.fail("must raise ValueError") def test_1(self): entry = Entry("test1", "something") self.assertEqual(entry.l_word, ["test1"]) self.assertEqual(entry.defi, "something") def test_2(self): entry = Entry(["test1"], "something") self.assertEqual(entry.l_word, ["test1"]) self.assertEqual(entry.defi, "something") def test_3(self): entry = Entry("test1", ["something"]) self.assertEqual(entry.l_word, ["test1"]) self.assertEqual(entry.defi, "something") def test_repr_1(self): entry = Entry("test1", "something") self.assertEqual( repr(entry), "Entry('test1', 'something', defiFormat='m')", ) def test_repr_2(self): entry = Entry("test1", "something", defiFormat="h") self.assertEqual( repr(entry), "Entry('test1', 'something', defiFormat='h')", ) def test_defiFormat_1(self): entry = Entry("test1", "something") self.assertEqual(entry.defiFormat, "m") def test_defiFormat_2(self): entry = Entry("test1", "something", defiFormat="h") self.assertEqual(entry.defiFormat, "h") def test_defiFormat_3(self): entry = Entry("test1", "something", defiFormat="h") entry.defiFormat = "x" self.assertEqual(entry.defiFormat, "x") def test_addAlt_1(self): entry = Entry("test1", "something") self.assertEqual(entry.l_word, ["test1"]) entry.addAlt("test 1") self.assertEqual(entry.l_word, ["test1", "test 1"]) class TestEntryDetectDefiFormat(unittest.TestCase): def test_1(self): entry = Entry("test1", "something") entry.detectDefiFormat() self.assertEqual(entry.defiFormat, "m") def test_2(self): entry = Entry("test1", "something", defiFormat="h") entry.detectDefiFormat() self.assertEqual(entry.defiFormat, "h") def test_3(self): entry = Entry("test1", "something", defiFormat="x") entry.detectDefiFormat() self.assertEqual(entry.defiFormat, "x") def test_4(self): entry = Entry("test1", "something") entry.detectDefiFormat() self.assertEqual(entry.defiFormat, "h") def test_5(self): entry = Entry("test1", "titlesomething") entry.detectDefiFormat() self.assertEqual(entry.defiFormat, "x") class TestEntryStripFullHtml(unittest.TestCase): def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) def setUp(self): pass def tearDown(self): pass def case( self, word: str, origDefi: str, fixedDefi: str, error: str | None = None, ): entry = Entry(word, origDefi) actualError = entry.stripFullHtml() self.assertEqual(entry.defi, fixedDefi) self.assertEqual(actualError, error) def test_1(self): self.case( word="test1", origDefi="plain text", fixedDefi="plain text", error=None, ) def test_2(self): self.case( word="test2", origDefi="

          simple html text

          ", fixedDefi="

          simple html text

          ", error=None, ) def test_3(self): self.case( word="test3", origDefi=( "simple " "html" ), fixedDefi="simple html", error=None, ) def test_4(self): self.case( word="test4", origDefi="simple html", fixedDefi="simple html", error=None, ) def test_5(self): self.case( word="test5", origDefi="simple html", fixedDefi="simple html", error=" Basque dict (001-headword-with-formatting.dsl) dsl = ( "{[c slategray]}{to }{[/c]}tell " "{[c slategray]}smb{[/c]} how to do " "{[c slategray]}smth{[/c]}\n [m1][trn]" "рассказать кому-либо, как что-либо делать[/trn][/m]" ) txt = ( "tell smb how to do smth\t" 'to tell ' 'smb how to do ' 'smth
          ' '

          ' "рассказать кому-либо, как что-либо делать

          " ) self.convert_string_dsl_txt(dsl, txt) def test_headword_formatting_english(self): dsl = ( "{[c slategray]}{to }{[/c]}tell" " {[c violet]}smb{[/c]} {[u]}how{[/u]}" " to do {[c violet]}smth{[/c]} {[sub]subscript[/sub]}\n" " [m1]1. main meaning[/m]\n" " [m2]a. first submeaning[/m]\n" " [m2]b. second submeaning[/m]\n" ) txt = ( "tell smb how to do smth\t" 'to tell' ' smb how' ' to do smth subscript
          ' '

          1. main meaning

          ' '

          a. first submeaning

          ' '

          b. second submeaning

          ' ) self.convert_string_dsl_txt(dsl, txt) def test_p_unclosed(self): dsl = "headword\n [m1][p]test\n" txt = ( "headword\t" '

          test\\n' ) self.convert_string_dsl_txt(dsl, txt) def test_headword_paran(self): self.convert_string_dsl_txt( "headword with (parenthesis)\n test", "headword with parenthesis|headword with\ttest", ) def test_headword_paran_2(self): self.convert_string_dsl_txt( "(headword with) parenthesis\n test", "headword with parenthesis|parenthesis\ttest", ) def test_headword_paran_escaped(self): self.convert_string_dsl_txt( "headword \\(with escaped parenthesis\\)\n test", "headword (with escaped parenthesis)\ttest", ) def test_headword_paran_escaped_2(self): self.convert_string_dsl_txt( "headword (with escaped right \\) parenthesis)\n test", "headword with escaped right \\\\) parenthesis|headword\ttest", ) def test_headword_curly(self): txt = ( "headword with curly brackets\t" "headword with curly brackets
          test" ) self.convert_string_dsl_txt( "headword with {[b]}curly brackets{[/b]}\n test", txt, ) def test_headword_curly_escaped(self): self.convert_string_dsl_txt( "headword with escaped \\{\\}curly brackets\\{\n test", "headword with escaped {}curly brackets{\ttest", ) def test_double_brackets_1(self): self.convert_string_dsl_txt( "test\n hello [[world]]", "test\thello [world]", ) def test_double_brackets_2(self): self.convert_string_dsl_txt( "test\n hello [[", "test\thello [", ) def test_double_brackets_3(self): self.convert_string_dsl_txt( "test\n hello ]]", "test\thello ]", ) def test_ref_double_ltgt(self): self.convert_string_dsl_txt( "test\n hello <>", 'test\thello world', ) def test_ref_double_ltgt_escaped(self): self.convert_string_dsl_txt( "test\n hello \\<>", "test\thello <<world>>", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_ebook_epub2_test.py000066400000000000000000000101351476751035500212330ustar00rootroot00000000000000import datetime import hashlib import os import re import sys import unittest from os.path import abspath, dirname from freezegun import freeze_time rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from glossary_v2_test import TestGlossaryBase from pyglossary.glossary_v2 import ConvertArgs, Glossary testTimeEpoch = 1730579400 testTime = datetime.datetime.fromtimestamp(testTimeEpoch, tz=datetime.timezone.utc) class TestGlossaryEPUB2(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-en-fa-res.slob": "0216d006", "100-en-fa-res-slob-v2.epub": "304d174d", "100-en-fa-prefix3-v2.epub": "1b7244ca", "300-rand-en-fa-prefix3-v2.epub": "b5dd9ec6", }, ) def setUp(self): TestGlossaryBase.setUp(self) def remove_toc_uid(self, data): return re.sub( b'', b'', data, ) def remove_content_extra(self, data): data = re.sub( b'[0-9a-f]{32}', b'', data, ) return re.sub( b'[0-9-]{10}', b'', data, ) def convert_to_epub( self, inputFname, outputFname, testId, checkZipContents=True, sha1sum="", **convertArgs, ): inputFilename = self.downloadFile(f"{inputFname}") outputFilename = self.newTempFilePath( f"{inputFname.replace('.', '_')}-{testId}.epub", ) if sha1sum: os.environ["EPUB_UUID"] = hashlib.sha1( inputFname.encode("ascii") ).hexdigest() os.environ["EBOOK_CREATION_TIME"] = str(testTimeEpoch) # print(f'{os.environ["EPUB_UUID"]=}') glos = self.glos = Glossary() res = glos.convert( ConvertArgs( inputFilename=inputFilename, outputFilename=outputFilename, **convertArgs, ) ) self.assertEqual(outputFilename, res) if checkZipContents: self.compareZipFiles( outputFilename, self.downloadFile(f"{outputFname}.epub"), { "OEBPS/toc.ncx": self.remove_toc_uid, "OEBPS/content.opf": self.remove_content_extra, }, ) if sha1sum: with open(outputFilename, mode="rb") as _file: actualSha1 = hashlib.sha1(_file.read()).hexdigest() self.assertEqual(sha1sum, actualSha1, f"{outputFilename=}") # sha1sum still depends on current date (but not time) # despite using ReproducibleZipFile and EBOOK_CREATION_TIME env var. not sure why. @freeze_time(testTime) def test_convert_txt_epub_1(self): self.convert_to_epub( "100-en-fa.txt", "100-en-fa", testId="a1", checkZipContents=False, sha1sum="beb3ad8227dd9561223cd18d805aff8dd0aef27b", ) def test_convert_to_epub_1(self): self.convert_to_epub( "100-en-fa-res.slob", "100-en-fa-res-slob-v2", testId="1", ) def test_convert_to_epub_2(self): for sort in (True, False): self.convert_to_epub( "100-en-fa-res.slob", "100-en-fa-res-slob-v2", testId="2", sort=sort, ) def test_convert_to_epub_3(self): for sqlite in (True, False): self.convert_to_epub( "100-en-fa-res.slob", "100-en-fa-res-slob-v2", testId="3", sqlite=sqlite, ) def test_convert_to_epub_4(self): for direct in (True, False): self.convert_to_epub( "100-en-fa-res.slob", "100-en-fa-res-slob-v2", testId="4", direct=direct, ) def test_convert_to_epub_5(self): for sqlite in (True, False): self.convert_to_epub( "100-en-fa.txt", "100-en-fa-prefix3-v2", testId="5", sqlite=sqlite, writeOptions={"group_by_prefix_length": 3}, ) def test_convert_to_epub_6(self): self.convert_to_epub( "300-rand-en-fa.txt", "300-rand-en-fa-prefix3-v2", testId="6", sqlite=True, writeOptions={"group_by_prefix_length": 3}, ) def test_convert_to_epub_7(self): self.convert_to_epub( "300-rand-en-fa.txt", "300-rand-en-fa-prefix3-v2", testId="7", sqlite=False, writeOptions={"group_by_prefix_length": 3}, ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_freedict_test.py000066400000000000000000000017441476751035500206320ustar00rootroot00000000000000import unittest from glossary_v2_test import TestGlossaryBase class TestGlossaryFreeDict(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-en-de.tei": "542c210e", "100-en-de-v4.txt": "d420a669", "freedict-sample-2024-12-19.tei": "c33b89d5", "freedict-sample-2024-12-19.txt": "2a0a2235", }, ) def convert_tei_txt(self, fname, fname2, **convertArgs): self.convert( f"{fname}.tei", f"{fname}-2.txt", compareText=f"{fname2}.txt", **convertArgs, ) def test_convert_tei_txt_1(self): self.convert_tei_txt( "100-en-de", "100-en-de-v4", readOptions={"auto_comma": False}, ) self.convert_tei_txt( "100-en-de", "100-en-de-v4", readOptions={"auto_comma": True}, ) def test_convert_tei_txt_2(self): self.convert_tei_txt( "freedict-sample-2024-12-19", "freedict-sample-2024-12-19", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_gettext_po_test.py000066400000000000000000000021071476751035500212210ustar00rootroot00000000000000import os import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from glossary_v2_test import TestGlossaryBase class TestGlossaryGetttestPo(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-en-fa.po": "694de186", "100-en-fa.po.txt": "f0c3ea53", }, ) os.environ["CALC_FILE_SIZE"] = "1" def convert_txt_po(self, fname, fname2, **convertArgs): self.convert( f"{fname}.txt", f"{fname}-2.po", compareText=f"{fname2}.po", **convertArgs, ) def convert_po_txt(self, fname, fname2, **convertArgs): self.convert( f"{fname}.po", f"{fname}-2.txt", compareText=f"{fname2}.txt", **convertArgs, ) def test_convert_txt_po_1(self): self.convert_txt_po("100-en-fa", "100-en-fa") # TODO def test_convert_po_txt_1(self): self.convert_po_txt( "100-en-fa", "100-en-fa.po", infoOverride={"input_file_size": None}, ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_jmdict_test.py000066400000000000000000000020461476751035500203130ustar00rootroot00000000000000import unittest import lxml from glossary_v2_test import TestGlossaryBase if not lxml.__version__.startswith("5.3."): raise OSError(f"Found lxml=={lxml.__version__}, must use lxml==5.3") class TestGlossaryJMdict(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "050-JMdict-English": "aec9ad8c", "050-JMdict-English-v3.txt": "6068b9a7", }, ) # os.environ["CALC_FILE_SIZE"] = "1" def convert_jmdict_txt(self, fname, fname2, **convertArgs): self.convert( fname, f"{fname}-2.txt", compareText=f"{fname2}.txt", inputFormat="JMDict", **convertArgs, ) # with lxml==5.3.0, for "bword://{word}", `word` is not unicode-escaped by lxml # while lxml < 5.3.0 does escape these unicode characters # that's why 050-JMdict-English-v2 was updated to 050-JMdict-English-v3 def test_convert_jmdict_txt_1(self): self.convert_jmdict_txt( "050-JMdict-English", "050-JMdict-English-v3", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_json_test.py000066400000000000000000000015231476751035500200110ustar00rootroot00000000000000import unittest from glossary_v2_test import TestGlossaryBase class TestGlossaryJSON(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "004-bar.json": "7e4b2663", "100-en-de-v4.json": "6a20c6f6", "100-en-fa.json": "8d29c1be", "100-ja-en.json": "fab2c106", }, ) def convert_txt_json(self, fname): self.convert( f"{fname}.txt", f"{fname}-2.json", compareText=f"{fname}.json", ) def test_convert_txt_json_0(self): self.convert_txt_json("004-bar") def test_convert_txt_json_1(self): self.convert_txt_json("100-en-fa") def test_convert_txt_json_2(self): self.convert_txt_json("100-en-de-v4") def test_convert_txt_json_3(self): self.convert_txt_json("100-ja-en") if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_kobo_dictfile_test.py000066400000000000000000000027761476751035500216500ustar00rootroot00000000000000import unittest import mistune # noqa: F401, to ensure it's installed from glossary_v2_test import TestGlossaryBase class TestGlossaryDictfile(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "022-en-en.df": "edff6de1", "022-en-en.df.txt": "93a2450f", "022-en-en.df.txt.df": "8e952e56", "res/01cf5b41.gif": "01cf5b41", "res/1f3c1a36.gif": "1f3c1a36", "res/3af9fd5d.gif": "3af9fd5d", "res/6684158d.gif": "6684158d", }, ) def convert_df_txt(self, fname, fname2, resFiles, **convertArgs): resFilesPath = { resFileName: self.newTempFilePath(f"{fname}-2.txt_res/{resFileName}") for resFileName in resFiles } self.convert( f"{fname}.df", f"{fname}-2.txt", compareText=f"{fname2}.txt", **convertArgs, ) for resFileName in resFiles: fpath1 = self.downloadFile(f"res/{resFileName}") fpath2 = resFilesPath[resFileName] self.compareBinaryFiles(fpath1, fpath2) def convert_txt_df(self, fname, fname2, **convertArgs): self.convert( f"{fname}.txt", f"{fname}-2.df", compareText=f"{fname2}.df", **convertArgs, ) def test_convert_df_txt_1(self): self.convert_df_txt( "022-en-en", "022-en-en.df", resFiles=[ "01cf5b41.gif", "1f3c1a36.gif", "3af9fd5d.gif", "6684158d.gif", ], ) def test_convert_txt_df_1(self): self.convert_txt_df( "022-en-en.df", "022-en-en.df.txt", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_kobo_test.py000066400000000000000000000025261476751035500177760ustar00rootroot00000000000000import gzip import os import unittest from glossary_v2_test import TestGlossaryBase class TestGlossaryKobo(TestGlossaryBase): def setUp(self): if os.getenv("SKIP_MISSING"): try: import marisa_trie # noqa: F401 except ImportError: self.skipTest("skipping module due to missing dependency: marisa_trie") TestGlossaryBase.setUp(self) def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) # self.dataFileCRC32.update({}) def convert_txt_kobo(self, fname, sha1sumDict, **convertArgs): outputFname = f"{fname}-2.kobo.zip" outputFpath = self.newTempFilePath(outputFname) # expectedFpath = self.downloadFile(f"{fname}.kobo.zip") self.convert( f"{fname}.txt", outputFname, **convertArgs, ) dataReplaceFuncs = { _zfname: gzip.decompress for _zfname in sha1sumDict if _zfname != "words" } self.checkZipFileSha1sum( outputFpath, sha1sumDict=sha1sumDict, dataReplaceFuncs=dataReplaceFuncs, ) def test_convert_txt_kobo_1(self): sha1sumDict = { "11.html": "39f0f46560da7398ab0d3b19cc1c2387ecd201dd", "aa.html": "df9460450e8b46e913c57bf39dcc799ffdc2fb33", "ab.html": "be4271a8508dbb499bafd439810af621a7b3474f", "words": "d0f74e854f090fbaa8211bcfd162ad99ec4da0a3", } self.convert_txt_kobo("100-en-fa", sha1sumDict) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_lingoes_ldf_test.py000066400000000000000000000020261476751035500213240ustar00rootroot00000000000000import unittest from glossary_v2_test import TestGlossaryBase class TestGlossaryLingoesLDF(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "004-bar.ldf": "b1aa776d", "100-en-fa.ldf": "503d1a9b", }, ) def convert_txt_ldf(self, fname, fname2, **convertArgs): self.convert( f"{fname}.txt", f"{fname}-2.ldf", compareText=f"{fname2}.ldf", **convertArgs, ) def convert_ldf_txt(self, fname, fname2, **convertArgs): self.convert( f"{fname}.ldf", f"{fname}-2.txt", compareText=f"{fname2}.txt", **convertArgs, ) def test_convert_txt_ldf_1(self): self.convert_txt_ldf( "004-bar", "004-bar", ) def test_convert_txt_ldf_2(self): self.convert_txt_ldf( "100-en-fa", "100-en-fa", ) def test_convert_ldf_txt_1(self): self.convert_ldf_txt( "004-bar", "004-bar", infoOverride={ "name": None, "input_file_size": None, }, ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_quickdic6_test.py000066400000000000000000000026531476751035500207270ustar00rootroot00000000000000import os import unittest from glossary_v2_test import TestGlossaryBase class TestGlossaryQuickDic6(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-en-de-v4.txt": "d420a669", "100-en-de-v4.txt.quickdic": "9d4ccc13", "100-en-de-v4.txt.quickdic.txt": "2dc4fc17", "100-en-fa.txt.quickdic": "2bd483df", "100-en-fa.txt.quickdic.txt": "50994fb5", }, ) os.environ["QUICKDIC_CREATION_TIME"] = "1730579400" def convert_txt_quickdic(self, fname, sha1sum, **convertArgs): self.convert( f"{fname}.txt", f"{fname}-2.quickdic", sha1sum=sha1sum, **convertArgs, ) def convert_quickdic_txt(self, fname, fname2, **convertArgs): self.convert( f"{fname}.quickdic", f"{fname}-2.txt", compareText=f"{fname2}.txt", **convertArgs, ) def test_convert_txt_quickdic_1(self): self.convert_txt_quickdic( "100-en-de-v4", "c8d9694624bace08e6e999db75c9156776f257c9", ) def test_convert_quickdic_txt_1(self): self.convert_quickdic_txt( "100-en-de-v4.txt", "100-en-de-v4.txt.quickdic", ) def test_convert_txt_quickdic_2(self): self.convert_txt_quickdic( "100-en-fa", "371ac30d5ddedffe0a1c54b8a050aef62e5b91a5", ) def test_convert_quickdic_txt_2(self): self.convert_quickdic_txt( "100-en-fa.txt", "100-en-fa.txt.quickdic", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_sql_test.py000066400000000000000000000012461476751035500176410ustar00rootroot00000000000000import os import unittest from glossary_v2_test import TestGlossaryBase class TestGlossarySQL(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-en-fa.txt": "f5c53133", "100-en-fa.txt-v2.sql": "70cd0514", }, ) os.environ["CALC_FILE_SIZE"] = "1" def convert_txt_sql(self, fname, fname2, **convertArgs): self.convert( f"{fname}.txt", f"{fname}-2.sql", compareText=f"{fname2}.sql", **convertArgs, ) def test_convert_txt_sql_1(self): self.convert_txt_sql( "100-en-fa", "100-en-fa.txt-v2", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_stardict_merge_syns_test.py000066400000000000000000000046031476751035500231120ustar00rootroot00000000000000import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from g_stardict_test import TestGlossaryStarDictBase from glossary_v2_errors_test import TestGlossaryErrorsBase __all__ = ["TestGlossaryStarDictMergeSyns"] class TestGlossaryStarDictMergeSyns(TestGlossaryStarDictBase): def convert_txt_stardict(self, *args, **kwargs): kwargs["outputFormat"] = "StardictMergeSyns" TestGlossaryStarDictBase.convert_txt_stardict(self, *args, **kwargs) def __init__(self, *args, **kwargs): TestGlossaryErrorsBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "002-plain-html.txt": "75484314", "004-plain-html-alts.txt": "505d4675", "002-plain-html-sd-merge-syns-v2/002-plain-html.dict": "2e9d20d8", "002-plain-html-sd-merge-syns-v2/002-plain-html.idx": "3956ad72", "002-plain-html-sd-merge-syns-v2/002-plain-html.ifo": "1991f125", "004-plain-html-alts-sd-merge-syns-v2/004-plain-html-alts.dict": "889f11f8", "004-plain-html-alts-sd-merge-syns-v2/004-plain-html-alts.idx": "092ba555", "004-plain-html-alts-sd-merge-syns-v2/004-plain-html-alts.ifo": "628abe99", "004-plain-html-alts-sd-merge-syns-v2/004-plain-html-alts.syn": "c07f7111", "100-en-de-v4-sd-merge-syns-v2/100-en-de-v4.dict": "5a97476f", "100-en-de-v4-sd-merge-syns-v2/100-en-de-v4.idx": "a99f29d2", "100-en-de-v4-sd-merge-syns-v2/100-en-de-v4.ifo": "2120708c", "100-en-fa-sd-merge-syns-v2/100-en-fa.dict": "223a0d1d", "100-en-fa-sd-merge-syns-v2/100-en-fa.idx": "13f1c7af", "100-en-fa-sd-merge-syns-v2/100-en-fa.ifo": "248ef828", }, ) def test_convert_txt_stardict_1_merge_syns(self): self.convert_txt_stardict( "100-en-fa", "100-en-fa-sd-merge-syns-v2", syn=False, # dictzip=False, ) def test_convert_txt_stardict_3_merge_syns(self): self.convert_txt_stardict( "100-en-de-v4", "100-en-de-v4-sd-merge-syns-v2", syn=False, # dictzip=False, ) def test_convert_txt_stardict_general_1_merge_syns(self): self.convert_txt_stardict( "002-plain-html", "002-plain-html-sd-merge-syns-v2", syn=False, # dictzip=False, ) def test_convert_txt_stardict_general_2_merge_syns(self): self.convert_txt_stardict( "004-plain-html-alts", "004-plain-html-alts-sd-merge-syns-v2", syn=False, # dictzip=False, ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_stardict_sort_test.py000066400000000000000000000044601476751035500217270ustar00rootroot00000000000000import os import unittest from g_stardict_test import TestGlossaryStarDictBase from glossary_v2_errors_test import TestGlossaryErrorsBase class TestGlossaryStarDictSortCustom(TestGlossaryStarDictBase): def __init__(self, *args, **kwargs): TestGlossaryErrorsBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-en-fa-sd-v2/100-en-fa.dict": "223a0d1d", "100-en-fa-sd-v2/100-en-fa.idx": "6df43378", "100-en-fa-sd-v2/100-en-fa.ifo": "bb916827", "100-en-fa-sd-v2/100-en-fa.syn": "1160fa0b", "100-en-fa-sd-v2.txt": "0b8b2ac0", "100-en-fa-sd.txt": "85f9d3fc", }, ) def convert_txt_stardict_enfa( self, fname, **convertArgs, ): self.convert_txt_stardict( fname, fname + "-sd-v2", config={"enable_alts": True}, info={ "sourceLang": "English", "targetLang": "Persian", }, **convertArgs, ) def convert_txt_stardict_enfa_1(self): sortKeyName = "headword" self.convert_txt_stardict_enfa( "100-en-fa", sortKeyName=sortKeyName, sqlite=True, ) self.assertLogWarning( f"Ignoring user-defined sort order {sortKeyName!r}" ", and using sortKey function from Stardict plugin", ) def test_convert_txt_stardict_enfa_2(self): sortKeyName = "ebook" self.convert_txt_stardict_enfa( "100-en-fa", sortKeyName=sortKeyName, sqlite=False, ) self.assertLogWarning( f"Ignoring user-defined sort order {sortKeyName!r}" ", and using sortKey function from Stardict plugin", ) def test_convert_txt_stardict_enfa_3(self): sortKeyName = "stardict:en_US.UTF-8" self.convert_txt_stardict_enfa( "100-en-fa", sortKeyName=sortKeyName, sqlite=True, ) self.assertLogWarning( f"Ignoring user-defined sort order {sortKeyName!r}" ", and using sortKey function from Stardict plugin", ) def test_convert_txt_stardict_enfa_4(self): sortKeyName = "stardict:fa_IR.UTF-8" self.convert_txt_stardict_enfa( "100-en-fa", sortKeyName=sortKeyName, sqlite=False, ) self.assertLogWarning( f"Ignoring user-defined sort order {sortKeyName!r}" ", and using sortKey function from Stardict plugin", ) def test_convert_txt_stardict_enfa_5(self): os.environ["NO_SQLITE"] = "1" self.convert_txt_stardict_enfa("100-en-fa", sqlite=False) del os.environ["NO_SQLITE"] if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_stardict_test.py000066400000000000000000000206511476751035500206600ustar00rootroot00000000000000import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from glossary_v2_errors_test import TestGlossaryErrorsBase from pyglossary.glossary_v2 import ConvertArgs, Error, Glossary __all__ = ["TestGlossaryStarDictBase"] class TestGlossaryStarDictBase(TestGlossaryErrorsBase): def convert_txt_stardict( # noqa: PLR0913 self, fname, sdDirName, syn=True, dictzip=False, config=None, writeOptions=None, info=None, **convertArgs, ): binExtList = ["idx", "dict"] if syn: binExtList.append("syn") inputFilename = self.downloadFile(f"{fname}.txt") outputFilename = self.newTempFilePath(f"{fname}.ifo") otherFiles = {ext: self.newTempFilePath(f"{fname}.{ext}") for ext in binExtList} glos = self.glos = Glossary() if info: for key, value in info.items(): glos.setInfo(key, value) if config is not None: glos.config = config if writeOptions is None: writeOptions = {} writeOptions["dictzip"] = dictzip result = glos.convert( ConvertArgs( inputFilename=inputFilename, outputFilename=outputFilename, writeOptions=writeOptions, **convertArgs, ) ) self.assertEqual(outputFilename, result) self.compareTextFiles( outputFilename, self.downloadFile(f"{sdDirName}/{fname}.ifo"), ) for ext in binExtList: self.compareBinaryFiles( otherFiles[ext], self.downloadFile(f"{sdDirName}/{fname}.{ext}"), ) def convert_txt_stardict_zip( # noqa: PLR0913 self, fname, sha1sumDict, dictzip=False, config=None, **convertArgs, ): inputFilename = self.downloadFile(f"{fname}.txt") outputFilename = self.newTempFilePath(f"{fname}.zip") glos = self.glos = Glossary() if config is not None: glos.config = config result = glos.convert( ConvertArgs( inputFilename=inputFilename, outputFilename=outputFilename, outputFormat="Stardict", writeOptions={ "dictzip": dictzip, }, **convertArgs, ) ) self.assertEqual(outputFilename, result) self.checkZipFileSha1sum( outputFilename, sha1sumDict=sha1sumDict, ) def convert_stardict_txt( self, inputFname: str, inputDirName: str, outputFname: str, testId: str, syn=True, **convertArgs, ): binExtList = ["idx", "dict"] if syn: binExtList.append("syn") for ext in binExtList: self.downloadFile(f"{inputDirName}/{inputFname}.{ext}") inputFilename = self.downloadFile(f"{inputDirName}/{inputFname}.ifo") outputFilename = self.newTempFilePath( f"{inputFname}-{testId}.txt", ) expectedFilename = self.downloadFile(f"{outputFname}.txt") glos = self.glos = Glossary() result = glos.convert( ConvertArgs( inputFilename=inputFilename, outputFilename=outputFilename, **convertArgs, ) ) self.assertEqual(outputFilename, result) self.compareTextFiles(outputFilename, expectedFilename) class TestGlossaryStarDict(TestGlossaryStarDictBase): def __init__(self, *args, **kwargs): TestGlossaryErrorsBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "004-bar.sd/004-bar.dict": "9ea397f8", "004-bar.sd/004-bar.idx": "cf9440cf", "004-bar.sd/004-bar.ifo": "ada870e4", "004-bar.sd/004-bar.syn": "286b17bf", "100-en-de-v4-sd-v2/100-en-de-v4.dict": "5a97476f", "100-en-de-v4-sd-v2/100-en-de-v4.idx": "a99f29d2", "100-en-de-v4-sd-v2/100-en-de-v4.ifo": "2120708c", "100-en-fa-sd-v2/100-en-fa.dict": "223a0d1d", "100-en-fa-sd-v2/100-en-fa.idx": "6df43378", "100-en-fa-sd-v2/100-en-fa.ifo": "bb916827", "100-en-fa-sd-v2/100-en-fa.syn": "1160fa0b", "100-en-fa-sd-v2.txt": "0b8b2ac0", # FIXME: remove empty description line from 100-en-fa.ifo # stardict-mixed-types-1.ifo, "stardict-mixed-types-2.ifo "100-ja-en.sd/100-ja-en.dict": "39715f01", "100-ja-en.sd/100-ja-en.idx": "adf0e552", "100-ja-en.sd/100-ja-en.ifo": "b01e368c", "100-ja-en.sd/100-ja-en.syn": "76e6df95", "300-ru-en.txt": "77cfee2f", "300-ru-en.sd/300-ru-en.dict": "8be7fa4c", "300-ru-en.sd/300-ru-en.idx": "1cd30f1a", "300-ru-en.sd/300-ru-en.ifo": "0b135812", "300-ru-en.sd/300-ru-en.syn": "87ee3372", "stardict-mixed-types-2.sd/stardict-mixed-types-2.dict": "2e43237a", "stardict-mixed-types-2.sd/stardict-mixed-types-2.idx": "65a1f9fc", "stardict-mixed-types-2.sd/stardict-mixed-types-2.ifo": "e1063b84", "stardict-mixed-types-2.sd.txt": "94de4bc6", "002-plain-html.txt": "75484314", "002-plain-html.sd/002-plain-html.dict": "2e9d20d8", "002-plain-html.sd/002-plain-html.idx": "3956ad72", "002-plain-html.sd/002-plain-html.ifo": "1991f125", "004-plain-html-alts.txt": "505d4675", "004-plain-html-alts.sd/004-plain-html-alts.dict": "889f11f8", "004-plain-html-alts.sd/004-plain-html-alts.idx": "edbe368d", "004-plain-html-alts.sd/004-plain-html-alts.ifo": "b9b92fa3", "004-plain-html-alts.sd/004-plain-html-alts.syn": "c07f7111", }, ) def test_convert_txt_stardict_0(self): self.convert_txt_stardict( "100-en-fa", "100-en-fa-sd-v2", config={"auto_sqlite": True}, direct=True, ) def test_convert_txt_stardict_1(self): for sqlite in (None, False, True): self.convert_txt_stardict( "100-en-fa", "100-en-fa-sd-v2", sqlite=sqlite, ) def test_convert_txt_stardict_1_zip(self): sha1sumDict = { "100-en-fa.dict": "1e462e829f9e2bf854ceac2ef8bc55911460c79e", "100-en-fa.idx": "943005945b35abf3a3e7b80375c76daa87e810f0", "100-en-fa.ifo": "bf12a932385f54dfcf5ab023d89a8dbd7091e60f", "100-en-fa.syn": "fcefc76628fed18b84b9aa83cd7139721b488545", } for sqlite in (None, False, True): self.convert_txt_stardict_zip( "100-en-fa", sha1sumDict=sha1sumDict, sqlite=sqlite, ) def test_convert_txt_stardict_2(self): for sqlite in (None, False, True): self.convert_txt_stardict( "004-bar", "004-bar.sd", sqlite=sqlite, ) def test_convert_txt_stardict_3(self): for sqlite in (None, False, True): self.convert_txt_stardict( "100-en-de-v4", "100-en-de-v4-sd-v2", syn=False, sqlite=sqlite, ) def test_convert_txt_stardict_4(self): for sqlite in (None, False, True): self.convert_txt_stardict( "100-ja-en", "100-ja-en.sd", syn=True, sqlite=sqlite, ) def test_convert_txt_stardict_5(self): for sqlite in (None, False, True): self.convert_txt_stardict( "300-ru-en", "300-ru-en.sd", syn=True, sqlite=sqlite, ) def test_convert_txt_stardict_sqlite_no_alts(self): self.convert_txt_stardict( "100-en-fa", "100-en-fa-sd-v2", config={"enable_alts": False}, sqlite=True, ) self.assertLogWarning( "SQLite mode only works with enable_alts=True, force-enabling it.", ) def test_convert_stardict_txt_1(self): self.convert_stardict_txt( "100-en-fa", "100-en-fa-sd-v2", "100-en-fa-sd-v2", "1", ) def test_convert_stardict_txt_mixed_types_1(self): self.convert_stardict_txt( "stardict-mixed-types-2", "stardict-mixed-types-2.sd", "stardict-mixed-types-2.sd", "mixed-types-1", syn=False, ) def test_convert_stardict_txt_mixed_types_2(self): self.convert_stardict_txt( "stardict-mixed-types-2", "stardict-mixed-types-2.sd", "stardict-mixed-types-2.sd", "mixed-types-1", syn=False, readOptions={"xdxf_to_html": False}, ) def test_convert_txt_stardict_general_1(self): self.convert_txt_stardict( "002-plain-html", "002-plain-html.sd", syn=False, ) def test_convert_txt_stardict_general_2(self): self.convert_txt_stardict( "004-plain-html-alts", "004-plain-html-alts.sd", syn=True, ) class TestGlossaryErrorsStarDict(TestGlossaryErrorsBase): def __init__(self, *args, **kwargs): TestGlossaryErrorsBase.__init__(self, *args, **kwargs) def test_convert_from_stardict_invalid_sametypesequence(self): fname = "foobar" inputFilename = self.newTempFilePath(f"{fname}.ifo") outputFilename = self.newTempFilePath(f"{fname}.txt") with open(inputFilename, mode="w", encoding="utf-8") as _file: _file.write( """StarDict's dict ifo file version=3.0.0 bookname=Test wordcount=123 idxfilesize=1234 sametypesequence=abcd """, ) glos = self.glos = Glossary() with self.assertRaisesRegex( Error, "Invalid sametypesequence = 'abcd'", ): glos.convert( ConvertArgs( inputFilename=inputFilename, outputFilename=outputFilename, ) ) # self.assertLogCritical(f"Reading file {relpath(inputFilename)!r} failed.") if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_stardict_textual_test.py000066400000000000000000000032111476751035500224170ustar00rootroot00000000000000import unittest from glossary_v2_test import TestGlossaryBase class TestGlossaryStarDictTextual(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-en-fa-sdt.xml": "48cb3336", "100-en-fa-sdt.xml.txt": "0c9b4025", "stardict-xdxf-2.xml": "b3285d5c", "stardict-xdxf-2.xml-h.txt": "97b3a22b", "stardict-xdxf-2.xml-x.txt": "de63f937", "stardict-mixed-types-2.xml": "51d9ceb2", "stardict-mixed-types-2.xml.txt": "c896cf68", }, ) def convert_txt_sdxml(self, fname, fname2, **convertArgs): self.convert( f"{fname}.txt", f"{fname}-2.xml", compareText=f"{fname2}.xml", outputFormat="StardictTextual", **convertArgs, ) def convert_sdxml_txt(self, fname, fname2, **convertArgs): self.convert( f"{fname}.xml", f"{fname}-2.txt", compareText=f"{fname2}.txt", inputFormat="StardictTextual", **convertArgs, ) def test_convert_txt_sdxml_1(self): self.convert_txt_sdxml( "100-en-fa", "100-en-fa-sdt", ) def test_convert_sdxml_txt_1(self): self.convert_sdxml_txt( "100-en-fa-sdt", "100-en-fa-sdt.xml", ) def test_convert_sdxml_txt_2(self): self.convert_sdxml_txt( "stardict-mixed-types-2", "stardict-mixed-types-2.xml", ) def test_convert_sdxml_txt_3(self): self.convert_sdxml_txt( "stardict-xdxf-2", "stardict-xdxf-2.xml-h", readOptions={"xdxf_to_html": True}, ) def test_convert_sdxml_txt_4(self): self.convert_sdxml_txt( "stardict-xdxf-2", "stardict-xdxf-2.xml-x", readOptions={"xdxf_to_html": False}, ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_wiktextract_test.py000066400000000000000000000041351476751035500214130ustar00rootroot00000000000000import os import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from glossary_v2_test import TestGlossaryBase class TestGlossaryWiktextract(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "wiktextract/10-kaikki-fa-PlacesInIran.jsonl": "f7f4a92f", "wiktextract/10-kaikki-fa-PlacesInIran.txt": "29b20845", "wiktextract/10-kaikki-fa-PlacesInIran-category.txt": "d12fa9c0", "wiktextract/10-kaikki-fa-pos-adv.jsonl": "2ddcbbbd", "wiktextract/10-kaikki-fa-pos-adv.txt": "fbaa9972", "wiktextract/10-kaikki-fa-pos-adv-word_title.txt": "4933de91", "wiktextract/03-kaikki-fa-selection.jsonl": "31223225", "wiktextract/03-kaikki-fa-selection.txt": "f54d1a97", }, ) os.environ["CALC_FILE_SIZE"] = "1" def convert_jsonl_txt(self, fname, fname2, **convertArgs): self.convert( f"wiktextract/{fname}.jsonl", f"{fname}-2.txt", compareText=f"wiktextract/{fname2}.txt", infoOverride={ # without this, glos name would become f"wiktextract__{fname}.jsonl" "name": f"{fname}.jsonl", }, **convertArgs, ) def test_convert_jsonl_txt_1(self): self.convert_jsonl_txt( "10-kaikki-fa-PlacesInIran", "10-kaikki-fa-PlacesInIran", ) def test_convert_jsonl_txt_1_cats(self): self.convert_jsonl_txt( "10-kaikki-fa-PlacesInIran", "10-kaikki-fa-PlacesInIran-category", readOptions={ "categories": True, }, ) def test_convert_jsonl_txt_2(self): self.convert_jsonl_txt( "10-kaikki-fa-pos-adv", "10-kaikki-fa-pos-adv", ) def test_convert_jsonl_txt_2_word_title(self): self.convert_jsonl_txt( "10-kaikki-fa-pos-adv", "10-kaikki-fa-pos-adv-word_title", readOptions={ "word_title": True, }, ) def test_convert_jsonl_txt_3(self): self.convert_jsonl_txt( "03-kaikki-fa-selection", "03-kaikki-fa-selection", ) # testing these features # "antonyms" in sense # "topics" in sense # "form_of" in sense if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_xdxf_css_test.py000066400000000000000000000015251476751035500206630ustar00rootroot00000000000000import unittest from glossary_v2_test import TestGlossaryBase class TestGlossaryXDXF(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-cyber_lexicon_en-es.xdxf": "8d9ba394", "100-cyber_lexicon_en-es-css.txt": "be892c84", # "100-cyber_lexicon_en-es-css.txt_res/css/xdxf.css": "206ae89d", # "100-cyber_lexicon_en-es-css.txt_res/js/xdxf.js": "938842f0", }, ) def convert_xdxf_txt(self, fname, fname2, **convertArgs): self.convert( f"{fname}.xdxf", f"{fname}-tmp.txt", compareText=f"{fname2}.txt", inputFormat="XdxfCss", **convertArgs, ) def test_convert_xdxf_txt_1(self): self.convert_xdxf_txt( "100-cyber_lexicon_en-es", "100-cyber_lexicon_en-es-css", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_xdxf_lax_test.py000066400000000000000000000013141476751035500206530ustar00rootroot00000000000000import unittest from glossary_v2_test import TestGlossaryBase class TestGlossaryXDXFLax(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-cyber_lexicon_en-es.xdxf": "8d9ba394", "100-cyber_lexicon_en-es-v3.txt": "4aa05086", }, ) def convert_xdxf_txt(self, fname, fname2, **convertArgs): self.convert( f"{fname}.xdxf", f"{fname}-tmp.txt", compareText=f"{fname2}.txt", inputFormat="XdxfLax", **convertArgs, ) def test_convert_xdxf_txt_1(self): self.convert_xdxf_txt( "100-cyber_lexicon_en-es", "100-cyber_lexicon_en-es-v3", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_xdxf_test.py000066400000000000000000000012571476751035500200150ustar00rootroot00000000000000import unittest from glossary_v2_test import TestGlossaryBase class TestGlossaryXDXF(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-cyber_lexicon_en-es.xdxf": "8d9ba394", "100-cyber_lexicon_en-es-v3.txt": "4aa05086", }, ) def convert_xdxf_txt(self, fname, fname2, **convertArgs): self.convert( f"{fname}.xdxf", f"{fname}-tmp.txt", compareText=f"{fname2}.txt", **convertArgs, ) def test_convert_xdxf_txt_1(self): self.convert_xdxf_txt( "100-cyber_lexicon_en-es", "100-cyber_lexicon_en-es-v3", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/g_yomichan_test.py000066400000000000000000000036071476751035500206540ustar00rootroot00000000000000import datetime import hashlib import sys import unittest from os.path import abspath, dirname from freezegun import freeze_time rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from glossary_v2_test import TestGlossaryBase from pyglossary.glossary_v2 import ConvertArgs, Glossary testTimeEpoch = 1730579400 testTime = datetime.datetime.fromtimestamp(testTimeEpoch, tz=datetime.timezone.utc) class TestGlossaryYomichan(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "050-JMdict-English-v3.txt": "6068b9a7", }, ) @freeze_time(testTime) def convert_to_yomichan( self, inputFname, testId, sha1sum="", **convertArgs, ): inputFilename = self.downloadFile(inputFname) outputFilename = self.newTempFilePath( f"{inputFname.replace('.', '_')}-{testId}.zip", ) glos = self.glos = Glossary() res = glos.convert( ConvertArgs( inputFilename=inputFilename, outputFilename=outputFilename, outputFormat="Yomichan", **convertArgs, ) ) self.assertEqual(outputFilename, res) if sha1sum: with open(outputFilename, mode="rb") as _file: actualSha1 = hashlib.sha1(_file.read()).hexdigest() self.assertEqual(sha1sum, actualSha1, f"{outputFilename=}") def test_convert_txt_yomichan_1(self): if sys.version_info[:2] == (3, 13): self.skipTest("Skipping test on this Python version") self.convert_to_yomichan( "050-JMdict-English-v3.txt", testId="1", # sha1sum="e54bc12755924586c306831b54a44a3dfd45cf7b", # FIXME ) def test_convert_txt_yomichan_2(self): if sys.version_info[:2] == (3, 13): self.skipTest("Skipping test on this Python version") self.convert_to_yomichan( "100-ja-en.txt", testId="2", # sha1sum="02bf6195eba15d0e76b3b119fa9c57d3f17eb169", # FIXME ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/glossary_v2_errors_test.py000066400000000000000000000271521476751035500224060ustar00rootroot00000000000000import logging import os import sys import unittest from os.path import abspath, dirname, isfile, join, relpath rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from glossary_v2_test import TestGlossaryBase, appTmpDir from pyglossary.core_test import getMockLogger from pyglossary.glossary_v2 import ConvertArgs, Error, Glossary from pyglossary.os_utils import rmtree __all__ = ["TestGlossaryErrors", "TestGlossaryErrorsBase"] Glossary.init() class MyStr(str): __slots__ = [] class TestGlossaryErrorsBase(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.mockLog = getMockLogger() def setUp(self): TestGlossaryBase.setUp(self) self.mockLog.clear() def tearDown(self): TestGlossaryBase.tearDown(self) method = self._testMethodName self.assertEqual(0, self.mockLog.printRemainingErrors(method)) warnCount = self.mockLog.printRemainingwWarnings(method) if warnCount > 0: print( f"Got {warnCount} unhandled warnings " f"from {self.__class__.__name__}: {self._testMethodName}\n", ) def assertLogCritical(self, errorMsg): self.assertIsNotNone( self.mockLog.popLog( logging.CRITICAL, errorMsg, ), msg=f"did not find critical log {errorMsg!r}", ) def assertLogError(self, errorMsg): self.assertIsNotNone( self.mockLog.popLog( logging.ERROR, errorMsg, ), msg=f"did not find error log {errorMsg!r}", ) def assertLogWarning(self, errorMsg): self.assertIsNotNone( self.mockLog.popLog( logging.WARNING, errorMsg, ), msg=f"did not find warning log {errorMsg!r}", ) def osRoot(): if os.sep == "\\": return "C:\\" return "/" if os.sep == "\\": osNoSuchFileOrDir = "[WinError 3] The system cannot find the path specified:" else: osNoSuchFileOrDir = "[Errno 2] No such file or directory:" class TestGlossaryErrors(TestGlossaryErrorsBase): def test_loadPlugins_invalidDir(self): path = join(osRoot(), "abc", "def", "ghe") Glossary.loadPlugins(path) self.assertLogCritical(f"Invalid plugin directory: {path!r}") def test_detectInputFormat_err1(self): err = None try: Glossary.detectInputFormat( filename="", formatName="", ) except Error as e: err = str(e) self.assertEqual(err, "Unable to detect input format!") def test_detectInputFormat_err2(self): err = None try: Glossary.detectInputFormat( filename="test.abcd", formatName="", ) except Error as e: err = str(e) self.assertEqual(err, "Unable to detect input format!") def test_detectInputFormat_err3(self): err = None try: Glossary.detectInputFormat( filename="test.sql", formatName="", ) except Error as e: err = str(e) self.assertEqual(err, "plugin Sql does not support reading") def test_detectInputFormat_err4(self): err = None try: Glossary.detectInputFormat( filename="test", formatName="FooBar", ) except Error as e: err = str(e) self.assertEqual(err, "Invalid format 'FooBar'") def test_detectInputFormat_ok1(self): res = Glossary.detectInputFormat( filename="test1.txt.gz", formatName="", ) self.assertEqual(res, ("test1.txt.gz", "Tabfile", "")) def test_detectInputFormat_ok2(self): res = Glossary.detectInputFormat( filename="test2.txt.zip", formatName="", ) self.assertEqual(res, ("test2.txt", "Tabfile", "zip")) def test_detectOutputFormat_err1(self): err = None try: Glossary.detectOutputFormat( filename="", formatName="", inputFilename="", ) except Error as e: err = str(e) self.assertEqual(err, "Invalid filename ''") def test_detectOutputFormat_err2(self): try: Glossary.detectOutputFormat( filename="test", formatName="FooBar", inputFilename="", ) except Error as e: err = str(e) self.assertEqual(err, "Invalid format FooBar") def test_detectOutputFormat_err3(self): err = None try: Glossary.detectOutputFormat( filename="", formatName="", inputFilename="test", ) except Error as e: err = str(e) self.assertEqual(err, "No filename nor format is given for output file") def test_detectOutputFormat_err4_1(self): err = None try: Glossary.detectOutputFormat( filename="", formatName="BabylonBgl", inputFilename="test3.txt", ) except Error as e: err = str(e) self.assertEqual(err, "plugin BabylonBgl does not support writing") def test_detectOutputFormat_err4_2(self): err = None try: Glossary.detectOutputFormat( filename="test.bgl", formatName="", inputFilename="", ) except Error as e: err = str(e) self.assertEqual(err, "plugin BabylonBgl does not support writing") def test_detectOutputFormat_err5(self): err = None try: Glossary.detectOutputFormat( filename="test", formatName="", inputFilename="", ) except Error as e: err = str(e) self.assertEqual(err, "Unable to detect output format!") def test_detectOutputFormat_err6(self): res = Glossary.detectOutputFormat( filename="test", formatName="Tabfile", inputFilename="", addExt=True, ) self.assertEqual(res, ("test", "Tabfile", "")) self.assertLogError("inputFilename is empty") def test_cleanup_removed(self): glos = Glossary() tmpFname = "test_cleanup_removed" entry = glos.newDataEntry(tmpFname, b"test") tmpFpath = entry._tmpPath self.assertTrue(bool(tmpFpath), msg="entry tmpPath is empty") self.assertTrue(isfile(tmpFpath), msg=f"tmp file does not exist: {tmpFpath}") rmtree(appTmpDir) glos.cleanup() self.assertLogError(f"no such file or directory: {appTmpDir}") def test_lang_err_get_source(self): glos = Glossary() glos.setInfo("sourcelang", "test") self.assertEqual(glos.sourceLangName, "") self.assertLogError("unknown language 'test'") def test_lang_err_get_target(self): glos = Glossary() glos.setInfo("targetlang", "test") self.assertEqual(glos.targetLangName, "") self.assertLogError("unknown language 'test'") def test_lang_err_set_source(self): glos = Glossary() glos.sourceLangName = "foobar" self.assertLogError("unknown language 'foobar'") self.assertEqual(glos.sourceLangName, "") def test_lang_err_set_target(self): glos = Glossary() glos.targetLangName = "foobar" self.assertLogError("unknown language 'foobar'") self.assertEqual(glos.targetLangName, "") def test_lang_err_setObj_source(self): glos = Glossary() try: glos.sourceLang = "foobar" except TypeError as e: self.assertEqual(str(e), "invalid lang='foobar', must be a Lang object") else: self.fail("must raise a TypeError") def test_lang_err_setObj_target(self): glos = Glossary() try: glos.targetLang = "foobar" except TypeError as e: self.assertEqual(str(e), "invalid lang='foobar', must be a Lang object") else: self.fail("must raise a TypeError") def test_config_attr_set_twice(self): glos = Glossary() glos.config = {"lower": True} self.assertEqual(glos.getConfig("lower", False), True) glos.config = {"lower": False} self.assertLogError("glos.config is set more than once") self.assertEqual(glos.getConfig("lower", False), True) def test_iter_empty(self): glos = Glossary() self.assertEqual(list(glos), []) def test_convert_typeErr_1(self): glos = Glossary() try: glos.convert( ConvertArgs( inputFilename=MyStr(""), ), ) except TypeError as e: self.assertEqual(str(e), "inputFilename must be str") else: self.fail("must raise TypeError") def test_convert_typeErr_2(self): glos = Glossary() try: glos.convert( ConvertArgs( inputFilename="", outputFilename=MyStr(""), ), ) except TypeError as e: self.assertEqual(str(e), "outputFilename must be str") else: self.fail("must raise TypeError") def test_convert_typeErr_3(self): glos = Glossary() try: glos.convert( ConvertArgs( inputFilename="", outputFilename="", inputFormat=MyStr(""), ), ) except TypeError as e: self.assertEqual(str(e), "inputFormat must be str") else: self.fail("must raise TypeError") def test_convert_typeErr_4(self): glos = Glossary() try: glos.convert( ConvertArgs( inputFilename="", outputFilename="", inputFormat="", outputFormat=MyStr(""), ), ) except TypeError as e: self.assertEqual(str(e), "outputFormat must be str") else: self.fail("must raise TypeError") def test_write_typeErr_1(self): glos = Glossary() try: glos.write( filename=MyStr(""), formatName="", ) except TypeError as e: self.assertEqual(str(e), "filename must be str") else: self.fail("must raise TypeError") def test_convert_sameFilename(self): glos = Glossary() err = None try: glos.convert( ConvertArgs( inputFilename="test4.txt", outputFilename="test4.txt", ), ) except Error as e: err = str(e) self.assertEqual(err, "Input and output files are the same") def test_convert_dirExists(self): glos = Glossary() tempFilePath = self.newTempFilePath("test_convert_dirExists") with open(tempFilePath, mode="w", encoding="utf-8") as _file: _file.write("") err = None try: glos.convert( ConvertArgs( inputFilename="test5.txt", outputFilename=self.tempDir, outputFormat="Stardict", ), ) except Error as e: err = str(e) self.assertEqual( err, f"Directory already exists and not empty: {relpath(self.tempDir)}", ) def test_convert_fileNotFound(self): glos = Glossary() inputFilename = join(osRoot(), "abc", "def", "test6.txt") err = None try: glos.convert( ConvertArgs( inputFilename=inputFilename, outputFilename="test2.txt", ), ) except Error as e: err = str(e) self.assertEqual( err, f"[Errno 2] No such file or directory: {inputFilename!r}", ) # self.assertLogCritical(f"Reading file {relpath(inputFilename)!r} failed.") def test_convert_unableDetectOutputFormat(self): glos = Glossary() err = None try: glos.convert( ConvertArgs( inputFilename="test7.txt", outputFilename="test", outputFormat="", ), ) except Error as e: err = str(e) self.assertEqual(err, "Unable to detect output format!") # self.assertLogCritical(f"Writing file {relpath('test')!r} failed.") def test_convert_writeFileNotFound_txt(self): outputFilename = join( appTmpDir, "test", "7de8cf6f17bc4c9abb439e71adbec95d.txt", ) glos = Glossary() err = None try: glos.convert( ConvertArgs( inputFilename=self.downloadFile("100-en-fa.txt"), outputFilename=outputFilename, ), ) except Error as e: err = str(e) self.assertEqual( err, f"[Errno 2] No such file or directory: {outputFilename!r}", ) # self.assertLogCritical(f"Writing file {relpath(outputFilename)!r} failed.") def test_convert_writeFileNotFound_hdir(self): outputFilename = join(osRoot(), "test", "40e20107f5b04087bfc0ec0d61510017.hdir") glos = Glossary() err = None try: glos.convert( ConvertArgs( inputFilename=self.downloadFile("100-en-fa.txt"), outputFilename=outputFilename, ), ) except Error as e: err = str(e) self.assertEqual( err, f"{osNoSuchFileOrDir} {outputFilename!r}", ) # self.assertLogCritical(f"Writing file {relpath(outputFilename)!r} failed.") def test_convert_invalidSortKeyName(self): glos = self.glos = Glossary() outputFilename = self.newTempFilePath("none.txt") err = None try: glos.convert( ConvertArgs( inputFilename=self.downloadFile("100-en-fa.txt"), outputFilename=outputFilename, sort=True, sortKeyName="blah", ), ) except Error as e: err = str(e) self.assertEqual(err, "invalid sortKeyName = 'blah'") if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/glossary_v2_security_test.py000066400000000000000000000030261476751035500227330ustar00rootroot00000000000000import logging import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from glossary_v2_errors_test import TestGlossaryErrors from pyglossary.glossary_v2 import ConvertArgs, Error, Glossary, ReadError class TestGlossarySecurity(TestGlossaryErrors): def __init__(self, *args, **kwargs): TestGlossaryErrors.__init__(self, *args, **kwargs) self.mockLog.setLevel(logging.INFO) def test_convert_1(self): glos = Glossary() with self.assertRaisesRegex(Error, "Unable to detect output format!"): glos.convert( ConvertArgs( inputFilename="os.system('abcd')", outputFilename="os.system('abcd -l')", ) ) def test_convert_2(self): glos = Glossary() with self.assertRaisesRegex(Error, "Unable to detect output format!"): glos.convert( ConvertArgs( inputFilename="os.system('abcd');test.txt", outputFilename="os.system('abcd -l')", ) ) def test_convert_3(self): glos = Glossary() with self.assertRaisesRegex(ReadError, "No such file or directory: "): glos.convert( ConvertArgs( inputFilename="os.system('abcd');test.txt", outputFilename="os.system('abcd -l');test.csv", ) ) def test_convert_4(self): glos = Glossary() with self.assertRaisesRegex(Error, "Unable to detect output format!"): glos.convert( ConvertArgs( inputFilename="test.txt\nos.system('abcd')", outputFilename="test.csv\nos.system('abcd -l')", ) ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/glossary_v2_test.py000066400000000000000000000642631476751035500210160ustar00rootroot00000000000000import hashlib import json import logging import os import random import sys import tempfile import tracemalloc import unittest import zipfile from os.path import abspath, dirname, isdir, isfile, join, realpath from urllib.request import urlopen rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from typing import TYPE_CHECKING from pyglossary.core import cacheDir, log, tmpDir from pyglossary.glossary_v2 import ConvertArgs, Glossary from pyglossary.os_utils import rmtree from pyglossary.text_utils import crc32hex if TYPE_CHECKING: from collections.abc import Callable __all__ = ["TestGlossaryBase", "appTmpDir", "testCacheDir"] tracemalloc.start() Glossary.init() repo = os.getenv( "PYGLOSSARY_TEST_REPO", "ilius/pyglossary-test/main", ) dataURL = f"https://raw.githubusercontent.com/{repo}/{{filename}}" testCacheDir = realpath(join(cacheDir, "test")) appTmpDir = join(cacheDir, "tmp") os.makedirs(testCacheDir, exist_ok=True) os.chdir(testCacheDir) os.makedirs(join(tmpDir, "pyglossary"), exist_ok=True) class TestGlossaryBase(unittest.TestCase): def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) self.maxDiff = None self.dataFileCRC32 = { "004-bar.txt": "6775e590", "004-bar-sort.txt": "fe861123", "006-empty.txt": "07ff224b", "006-empty-filtered.txt": "2b3c1c0f", "100-en-de-v4.txt": "d420a669", "100-en-fa.txt": "f5c53133", "100-ja-en.txt": "93542e89", "100-en-de-v4-remove_font_b.txt": "a3144e2f", "100-en-de-v4.sd/100-en-de.dict": "5a97476f", "100-en-de-v4.sd/100-en-de.idx": "a99f29d2", "100-en-de-v4.sd/100-en-de.ifo": "6529871f", "100-en-de-v4.info": "f2cfb284", "100-en-fa.info": "9bddb7bb", "100-en-fa-v2.info": "7c0f646b", "100-ja-en.info": "8cf5403c", "300-rand-en-fa.txt": "586617c8", "res/stardict.png": "7e1447fa", "res/test.json": "41f8cf31", } os.environ["CALC_FILE_SIZE"] = "1" def addDirCRC32(self, dirPath: str, files: "dict[str, str]") -> None: for fpath, _hash in files.items(): self.dataFileCRC32[f"{dirPath}/{fpath}"] = _hash # The setUp() and tearDown() methods allow you to define instructions that # will be executed before and after each test method. def setUp(self): self.glos = None self.tempDir = tempfile.mkdtemp(dir=join(tmpDir, "pyglossary")) def tearDown(self): if self.glos is not None: self.glos.cleanup() self.glos.clear() if os.getenv("NO_CLEANUP"): return for direc in ( self.tempDir, appTmpDir, ): if isdir(direc): rmtree(direc) def fixDownloadFilename(self, filename): return filename.replace("/", "__").replace("\\", "__") def downloadFile(self, filename): unixFilename = filename.replace("\\", "/") crc32 = self.dataFileCRC32[unixFilename] fpath = join(testCacheDir, self.fixDownloadFilename(filename)) if isfile(fpath): with open(fpath, mode="rb") as _file: data = _file.read() if crc32hex(data) != crc32: raise RuntimeError(f"CRC32 check failed for existing file: {fpath!r}") return fpath try: with urlopen(dataURL.format(filename=unixFilename)) as res: data = res.read() except Exception as e: print(f"{filename=}") raise e from None actual_crc32 = crc32hex(data) if actual_crc32 != crc32: raise RuntimeError( f"CRC32 check failed for downloaded file: {filename!r}: {actual_crc32}", ) with open(fpath, mode="wb") as _file: _file.write(data) return fpath def downloadDir(self, dirName: str, files: list[str]) -> str: dirPath = join(testCacheDir, self.fixDownloadFilename(dirName)) for fileRelPath in files: newFilePath = join(dirPath, fileRelPath) if isfile(newFilePath): # TODO: check crc-32 continue filePath = self.downloadFile(join(dirName, fileRelPath)) os.makedirs(dirname(newFilePath), exist_ok=True) os.rename(filePath, newFilePath) return dirPath def newTempFilePath(self, filename): fpath = join(self.tempDir, filename) if isfile(fpath): os.remove(fpath) return fpath def showGlossaryDiff(self, fpath1, fpath2) -> None: from pyglossary.ui.tools.diff_glossary import diffGlossary diffGlossary(fpath1, fpath2) def compareTextFiles(self, fpath1, fpath2, showDiff=False): self.assertTrue(isfile(fpath1), f"{fpath1 = }") self.assertTrue(isfile(fpath2), f"{fpath2 = }") with open(fpath1, encoding="utf-8") as file1: text1 = file1.read().rstrip("\n") with open(fpath2, encoding="utf-8") as file2: text2 = file2.read().rstrip("\n") try: self.assertEqual( len(text1), len(text2), msg=f"{fpath1!r} differs from {fpath2!r} in file size", ) self.assertEqual( text1, text2, msg=f"{fpath1!r} differs from {fpath2!r}", ) except AssertionError as e: if showDiff: self.showGlossaryDiff(fpath1, fpath2) raise e from None def compareBinaryFiles(self, fpath1, fpath2): self.assertTrue(isfile(fpath1), f"File {fpath1!r} does not exist") self.assertTrue(isfile(fpath2), f"File {fpath2!r} does not exist") with open(fpath1, mode="rb") as file1: data1 = file1.read() with open(fpath2, mode="rb") as file2: data2 = file2.read() self.assertEqual(len(data1), len(data2), msg=f"{fpath1!r}") self.assertTrue( data1 == data2, msg=f"{fpath1!r} differs from {fpath2!r}", ) def compareZipFiles( self, fpath1, fpath2, dataReplaceFuncs: "dict[str, Callable]", ): zf1 = zipfile.ZipFile(fpath1) zf2 = zipfile.ZipFile(fpath2) pathList1 = zf1.namelist() pathList2 = zf2.namelist() if not self.assertEqual(pathList1, pathList2): return for zfpath in pathList1: data1 = zf1.read(zfpath) data2 = zf2.read(zfpath) func = dataReplaceFuncs.get(zfpath) if func is not None: data1 = func(data1) data2 = func(data2) self.assertEqual(len(data1), len(data2), msg=f"{zfpath=}") self.assertTrue( data1 == data2, msg=f"{zfpath=}", ) def checkZipFileSha1sum( self, fpath, sha1sumDict: "dict[str, str]", dataReplaceFuncs: "dict[str, Callable] | None" = None, ): if dataReplaceFuncs is None: dataReplaceFuncs = {} zf = zipfile.ZipFile(fpath) # pathList = zf.namelist() for zfpath, expectedSha1 in sha1sumDict.items(): data = zf.read(zfpath) func = dataReplaceFuncs.get(zfpath) if func is not None: data = func(data) actualSha1 = hashlib.sha1(data).hexdigest() self.assertEqual(actualSha1, expectedSha1, msg=f"file: {zfpath}") def convert( # noqa: PLR0913 self, fname, # input file with extension fname2, # output file with extension testId="tmp", # noqa: ARG002 compareText="", compareBinary="", sha1sum=None, md5sum=None, config=None, showDiff=False, **convertKWArgs, ): inputFilename = self.downloadFile(fname) outputFilename = self.newTempFilePath(fname2) glos = self.glos = Glossary() if config is not None: glos.config = config res = glos.convert( ConvertArgs( inputFilename=inputFilename, outputFilename=outputFilename, **convertKWArgs, ), ) self.assertEqual(outputFilename, res) if compareText: self.compareTextFiles( outputFilename, self.downloadFile(compareText), showDiff=showDiff, ) return if compareBinary: self.compareBinaryFiles(outputFilename, self.downloadFile(compareBinary)) return msg = f"{outputFilename=}" if sha1sum: with open(outputFilename, mode="rb") as _file: actualSha1 = hashlib.sha1(_file.read()).hexdigest() self.assertEqual(sha1sum, actualSha1, msg) return if md5sum: with open(outputFilename, mode="rb") as _file: actualMd5 = hashlib.md5(_file.read()).hexdigest() self.assertEqual(md5sum, actualMd5, msg) return def convert_txt_txt( self, fname, # input txt file without extension fname2, # expected output txt file without extension fnamePrefix="", testId="tmp", config=None, **convertArgs, ): self.convert( f"{fnamePrefix}{fname}.txt", f"{fname2}-{testId}.txt", compareText=f"{fnamePrefix}{fname2}.txt", testId=testId, config=config, **convertArgs, ) def convert_txt_txt_sort(self, *args, **convertArgs): for sqlite in (None, True, False): self.convert_txt_txt(*args, sort=True, sqlite=sqlite, **convertArgs) os.environ["NO_SQLITE"] = "1" self.convert_txt_txt(*args, sort=True, sqlite=False, **convertArgs) del os.environ["NO_SQLITE"] class TestGlossary(TestGlossaryBase): def __init__(self, *args, **kwargs): TestGlossaryBase.__init__(self, *args, **kwargs) self.dataFileCRC32.update( { "100-en-fa-sort.txt": "d7a82dc8", "100-en-fa-sort-headword.txt": "4067a29f", "100-en-fa-sort-headword-fa.txt": "d01fcee1", "100-en-fa-sort-ebook.txt": "aa620d07", "100-en-fa-sort-ebook3.txt": "5a20f140", "100-en-fa-lower.txt": "62178940", "100-en-fa-remove_html_all-v3.txt": "d611c978", "100-en-fa-rtl.txt": "25ede1e8", "300-rand-en-fa-sort-headword-w1256.txt": "06d83bac", "300-rand-en-fa-sort-headword.txt": "df0f8020", "300-rand-en-fa-sort-w1256.txt": "9594aab3", "sort-locale/092-en-fa-alphabet-sample.txt": "b4856532", "sort-locale/092-en-fa-alphabet-sample-sorted-default.txt": "e7b70589", "sort-locale/092-en-fa-alphabet-sample-sorted-en.txt": "3d2bdf73", "sort-locale/092-en-fa-alphabet-sample-sorted-fa.txt": "245419db", "sort-locale/092-en-fa-alphabet-sample-sorted-latin-fa.txt": "261c03c0", }, ) def setUp(self): TestGlossaryBase.setUp(self) self.prevLogLevel = log.level log.setLevel(logging.ERROR) def tearDown(self): TestGlossaryBase.tearDown(self) log.setLevel(self.prevLogLevel) def test__str__1(self): glos = self.glos = Glossary() self.assertEqual(str(glos), "Glossary{filename: '', name: None}") def test__str__2(self): glos = self.glos = Glossary() glos._filename = "test.txt" self.assertEqual(str(glos), "Glossary{filename: 'test.txt', name: None}") def test__str__3(self): glos = self.glos = Glossary() glos.setInfo("title", "Test Title") self.assertEqual( str(glos), "Glossary{filename: '', name: 'Test Title'}", ) def test__str__4(self): glos = self.glos = Glossary() glos._filename = "test.txt" glos.setInfo("title", "Test Title") self.assertEqual( str(glos), "Glossary{filename: 'test.txt', name: 'Test Title'}", ) def test_info_1(self): glos = self.glos = Glossary() glos.setInfo("test", "ABC") self.assertEqual(glos.getInfo("test"), "ABC") def test_info_2(self): glos = self.glos = Glossary() glos.setInfo("bookname", "Test Glossary") self.assertEqual(glos.getInfo("title"), "Test Glossary") def test_info_3(self): glos = self.glos = Glossary() glos.setInfo("bookname", "Test Glossary") glos.setInfo("title", "Test 2") self.assertEqual(glos.getInfo("name"), "Test 2") self.assertEqual(glos.getInfo("bookname"), "Test 2") self.assertEqual(glos.getInfo("title"), "Test 2") def test_info_4(self): glos = self.glos = Glossary() glos.setInfo("test", 123) self.assertEqual(glos.getInfo("test"), "123") def test_info_del_1(self): glos = self.glos = Glossary() glos.setInfo("test", "abc") self.assertEqual(glos.getInfo("test"), "abc") glos.setInfo("test", None) self.assertEqual(glos.getInfo("test"), "") def test_info_del_2(self): glos = self.glos = Glossary() glos.setInfo("test", None) self.assertEqual(glos.getInfo("test"), "") def test_setInfo_err1(self): glos = self.glos = Glossary() try: glos.setInfo(1, "a") except TypeError as e: self.assertEqual(str(e), "invalid key=1, must be str") else: self.fail("must raise a TypeError") def test_getInfo_err1(self): glos = self.glos = Glossary() try: glos.getInfo(1) except TypeError as e: self.assertEqual(str(e), "invalid key=1, must be str") else: self.fail("must raise a TypeError") def test_getExtraInfos_1(self): glos = self.glos = Glossary() glos.setInfo("a", "test 1") glos.setInfo("b", "test 2") glos.setInfo("c", "test 3") glos.setInfo("d", "test 4") glos.setInfo("name", "my name") self.assertEqual( glos.getExtraInfos(["b", "c", "title"]), {"a": "test 1", "d": "test 4"}, ) def test_infoKeys_1(self): glos = self.glos = Glossary() glos.setInfo("a", "test 1") glos.setInfo("b", "test 2") glos.setInfo("name", "test name") glos.setInfo("title", "test title") self.assertEqual( glos.infoKeys(), ["a", "b", "name"], ) def test_config_attr_get(self): glos = self.glos = Glossary() try: glos.config # noqa: B018 except NotImplementedError: pass else: self.fail("must raise NotImplementedError") def test_config_attr_set(self): glos = self.glos = Glossary() glos.config = {"lower": True} self.assertEqual(glos.getConfig("lower", False), True) def test_directRead_txt_1(self): inputFilename = self.downloadFile("100-en-fa.txt") glos = self.glos = Glossary() res = glos.directRead(filename=inputFilename) self.assertTrue(res) self.assertEqual(glos.sourceLangName, "English") self.assertEqual(glos.targetLangName, "Persian") self.assertIn("Sample: ", glos.getInfo("name")) entryCount = sum(1 for _ in glos) self.assertEqual(entryCount, 100) def test_lang_1(self): glos = self.glos = Glossary() self.assertEqual(glos.sourceLangName, "") self.assertEqual(glos.targetLangName, "") glos.sourceLangName = "ru" glos.targetLangName = "de" self.assertEqual(glos.sourceLangName, "Russian") self.assertEqual(glos.targetLangName, "German") def test_lang_get_source(self): glos = self.glos = Glossary() glos.setInfo("sourcelang", "farsi") self.assertEqual(glos.sourceLangName, "Persian") def test_lang_get_target(self): glos = self.glos = Glossary() glos.setInfo("targetlang", "malay") self.assertEqual(glos.targetLangName, "Malay") def test_lang_set_source(self): glos = self.glos = Glossary() glos.sourceLangName = "en" self.assertEqual(glos.sourceLangName, "English") def test_lang_set_source_empty(self): glos = self.glos = Glossary() glos.sourceLangName = "" self.assertEqual(glos.sourceLangName, "") def test_lang_set_target(self): glos = self.glos = Glossary() glos.targetLangName = "fa" self.assertEqual(glos.targetLangName, "Persian") def test_lang_set_target_empty(self): glos = self.glos = Glossary() glos.targetLangName = "" self.assertEqual(glos.targetLangName, "") def test_lang_getObj_source(self): glos = self.glos = Glossary() glos.setInfo("sourcelang", "farsi") self.assertEqual(glos.sourceLang.name, "Persian") def test_lang_getObj_target(self): glos = self.glos = Glossary() glos.setInfo("targetlang", "malay") self.assertEqual(glos.targetLang.name, "Malay") def test_lang_detect_1(self): glos = self.glos = Glossary() glos.setInfo("name", "en-fa") glos.detectLangsFromName() self.assertEqual( (glos.sourceLangName, glos.targetLangName), ("English", "Persian"), ) def test_lang_detect_2(self): glos = self.glos = Glossary() glos.setInfo("name", "test-en-fa") glos.detectLangsFromName() self.assertEqual( (glos.sourceLangName, glos.targetLangName), ("English", "Persian"), ) def test_lang_detect_3(self): glos = self.glos = Glossary() glos.setInfo("name", "eng to per") glos.detectLangsFromName() self.assertEqual( (glos.sourceLangName, glos.targetLangName), ("English", "Persian"), ) def test_lang_detect_4(self): glos = self.glos = Glossary() glos.setInfo("name", "Test english to farsi") glos.detectLangsFromName() self.assertEqual( (glos.sourceLangName, glos.targetLangName), ("English", "Persian"), ) def test_lang_detect_5(self): glos = self.glos = Glossary() glos.setInfo("name", "freedict-eng-deu.index") glos.detectLangsFromName() self.assertEqual( (glos.sourceLangName, glos.targetLangName), ("English", "German"), ) def convert_to_txtZip( self, fname, # input file with extension fname2, # expected output file without extensions testId="tmp", config=None, **convertKWArgs, ): inputFilename = self.downloadFile(fname) outputTxtName = f"{fname2}-{testId}.txt" outputFilename = self.newTempFilePath(f"{outputTxtName}.zip") expectedFilename = self.downloadFile(f"{fname2}.txt") glos = self.glos = Glossary() if config is not None: glos.config = config res = glos.convert( ConvertArgs( inputFilename=inputFilename, outputFilename=outputFilename, **convertKWArgs, ), ) self.assertEqual(outputFilename, res) zf = zipfile.ZipFile(outputFilename) self.assertTrue( outputTxtName in zf.namelist(), msg=f"{outputTxtName} not in {zf.namelist()}", ) with open(expectedFilename, encoding="utf-8") as expectedFile: expectedText = expectedFile.read() actualText = zf.read(outputTxtName).decode("utf-8") self.assertEqual(len(actualText), len(expectedText)) self.assertEqual(actualText, expectedText) def test_txt_txtZip_1(self): self.convert_to_txtZip( "100-en-fa.txt", "100-en-fa", testId="txt_txtZip_1", infoOverride={"input_file_size": None}, ) def test_sort_1(self): self.convert_txt_txt_sort( "100-en-fa", "100-en-fa-sort", testId="sort_1", ) def test_sort_2(self): self.convert_txt_txt_sort( "100-en-fa", "100-en-fa-sort", testId="sort_2", sortKeyName="headword_lower", ) def test_sort_3(self): self.convert_txt_txt_sort( "100-en-fa", "100-en-fa-sort-headword", testId="sort_3", sortKeyName="headword", ) def test_sort_4(self): self.convert_txt_txt_sort( "300-rand-en-fa", "300-rand-en-fa-sort-headword", testId="sort_4", sortKeyName="headword", ) def test_sort_5(self): self.convert_txt_txt_sort( "300-rand-en-fa", "300-rand-en-fa-sort-headword-w1256", testId="sort_5", sortKeyName="headword", sortEncoding="windows-1256", ) def test_sort_6(self): self.convert_txt_txt_sort( "300-rand-en-fa", "300-rand-en-fa-sort-w1256", testId="sort_6", sortKeyName="headword_lower", sortEncoding="windows-1256", ) def test_sort_7(self): self.convert_txt_txt_sort( "100-en-fa", "100-en-fa-sort-ebook", testId="sort_7", sortKeyName="ebook", ) def test_sort_8(self): self.convert_txt_txt_sort( "100-en-fa", "100-en-fa-sort-ebook3", testId="sort_8", sortKeyName="ebook_length3", ) def test_lower_1(self): self.convert_txt_txt( "100-en-fa", "100-en-fa-lower", testId="lower_1", config={"lower": True}, ) def test_rtl_1(self): self.convert_txt_txt( "100-en-fa", "100-en-fa-rtl", testId="rtl_1", config={"rtl": True}, ) def test_remove_html_all_1(self): self.convert_txt_txt( "100-en-fa", "100-en-fa-remove_html_all-v3", testId="remove_html_all_1", config={"remove_html_all": True}, ) def test_remove_html_1(self): self.convert_txt_txt( "100-en-de-v4", "100-en-de-v4-remove_font_b", testId="remove_html_1", config={"remove_html": "font,b"}, ) def test_save_info_json(self): fname = "100-en-fa" testId = "save_info_json" infoPath = self.newTempFilePath(f"{fname}-{testId}.info") self.convert_txt_txt( fname, fname, testId=testId, config={"save_info_json": True}, infoOverride={"input_file_size": None}, ) with open(infoPath, encoding="utf8") as _file: infoDict = json.load(_file) with open(self.downloadFile(f"{fname}-v2.info"), encoding="utf8") as _file: infoDictExpected = json.load(_file) for key, value in infoDictExpected.items(): self.assertIn(key, infoDict) self.assertEqual(value, infoDict.get(key)) def test_convert_sqlite_direct_error(self): glos = self.glos = Glossary() try: glos.convert( ConvertArgs( inputFilename="foo.txt", outputFilename="bar.txt", direct=True, sqlite=True, ), ) except ValueError as e: self.assertEqual(str(e), "Conflictng arguments: direct=True, sqlite=True") else: self.fail("must raise a ValueError") def test_txt_txt_bar(self): for direct in (None, False, True): self.convert_txt_txt( "004-bar", "004-bar", testId="bar", direct=direct, infoOverride={ "name": None, "input_file_size": None, }, ) def test_txt_txt_bar_sort(self): self.convert_txt_txt_sort( "004-bar", "004-bar-sort", testId="bar_sort", ) def test_txt_txt_empty_filtered(self): for direct in (None, False, True): self.convert_txt_txt( "006-empty", "006-empty-filtered", testId="empty_filtered", direct=direct, ) def test_txt_txt_empty_filtered_sqlite(self): for sqlite in (None, False, True): self.convert_txt_txt( "006-empty", "006-empty-filtered", testId="empty_filtered_sqlite", sqlite=sqlite, ) def test_dataEntry_save(self): glos = self.glos = Glossary() tmpFname = "test_dataEntry_save" entry = glos.newDataEntry(tmpFname, b"test") saveFpath = entry.save(self.tempDir) self.assertTrue( isfile(saveFpath), msg=f"saved file does not exist: {saveFpath}", ) def test_dataEntry_getFileName(self): glos = self.glos = Glossary() tmpFname = "test_dataEntry_getFileName" entry = glos.newDataEntry(tmpFname, b"test") self.assertEqual(entry.getFileName(), tmpFname) def test_cleanup_noFile(self): glos = self.glos = Glossary() glos.cleanup() def test_cleanup_cleanup(self): glos = self.glos = Glossary() tmpFname = "test_cleanup_cleanup" entry = glos.newDataEntry(tmpFname, b"test") tmpFpath = entry._tmpPath self.assertTrue(bool(tmpFpath), msg="entry tmpPath is empty") self.assertTrue( isfile(tmpFpath), msg=f"tmp file does not exist: {tmpFpath}", ) glos.cleanup() self.assertTrue( not isfile(tmpFpath), msg=f"tmp file still exists: {tmpFpath}", ) def test_cleanup_noCleanup(self): glos = self.glos = Glossary() tmpFname = "test_cleanup_noCleanup" entry = glos.newDataEntry(tmpFname, b"test") tmpFpath = entry._tmpPath self.assertTrue(bool(tmpFpath), msg="entry tmpPath is empty") self.assertTrue(isfile(tmpFpath), msg=f"tmp file does not exist: {tmpFpath}") glos.config = {"cleanup": False} glos.cleanup() self.assertTrue(isfile(tmpFpath), msg=f"tmp file does not exist: {tmpFpath}") def addWordsList(self, glos, words, newDefiFunc=str, defiFormat=""): wordsList = [] for index, line in enumerate(words): words = line.rstrip().split("|") wordsList.append(words) glos.addEntry( glos.newEntry( words, newDefiFunc(index), defiFormat=defiFormat, ), ) return wordsList def addWords(self, glos, wordsStr, **kwargs): return self.addWordsList(glos, wordsStr.split("\n"), **kwargs) tenWordsStr = """comedic tubenose organosol adipocere gid next friend bitter apple caca|ca-ca darkling beetle japonica""" tenWordsStr2 = """comedic Tubenose organosol Adipocere gid Next friend bitter apple Caca|ca-ca darkling beetle Japonica""" tenWordsStrFa = ( "بیمارانه\nگالوانومتر\nنقاهت\nرشک" "مندی\nناکاستنی\nشگفتآفرینی\nچندپاری\nنامبارکی\nآماسش\nانگیزنده" ) def test_addEntries_1(self): glos = self.glos = Glossary() wordsList = self.addWords( glos, self.tenWordsStr, newDefiFunc=lambda _i: str(random.randint(0, 10000)), ) self.assertEqual(wordsList, [entry.l_word for entry in glos]) def test_addEntries_2(self): # entry filters don't apply to loaded entries (added with addEntry) glos = self.glos = Glossary() glos.addEntry(glos.newEntry(["a"], "test 1")) glos.addEntry(glos.newEntry([""], "test 2")) glos.addEntry(glos.newEntry(["b"], "test 3")) glos.addEntry(glos.newEntry([], "test 4")) glos.updateEntryFilters() self.assertEqual( [["a"], [""], ["b"], []], [entry.l_word for entry in glos], ) def test_addEntries_3(self): glos = self.glos = Glossary() glos.addEntry(glos.newEntry(["a"], "test 1")) glos.addEntry(glos.newEntry(["b"], "test 3")) glos.addEntry( glos.newDataEntry( "file.bin", b"hello\x00world", ), ) glos.updateEntryFilters() wordListList = [] dataEntries = [] for entry in glos: wordListList.append(entry.l_word) if entry.isData(): dataEntries.append(entry) self.assertEqual( wordListList, [["a"], ["b"], ["file.bin"]], ) self.assertEqual(len(dataEntries), 1) self.assertEqual(dataEntries[0].getFileName(), "file.bin") self.assertEqual(dataEntries[0].data, b"hello\x00world") def test_read_filename(self): glos = self.glos = Glossary() glos.directRead(self.downloadFile("004-bar.txt")) self.assertEqual(glos.filename, join(testCacheDir, "004-bar")) def test_wordTitleStr_em1(self): glos = self.glos = Glossary() self.assertEqual(glos.wordTitleStr(""), "") def test_wordTitleStr_em2(self): glos = self.glos = Glossary() glos._defiHasWordTitle = True self.assertEqual(glos.wordTitleStr("test1"), "") def test_wordTitleStr_b1(self): glos = self.glos = Glossary() self.assertEqual(glos.wordTitleStr("test1"), "test1
          ") def test_wordTitleStr_b2(self): glos = self.glos = Glossary() self.assertEqual( glos.wordTitleStr("test1", class_="headword"), 'test1
          ', ) def test_wordTitleStr_cjk1(self): glos = self.glos = Glossary() self.assertEqual( glos.wordTitleStr("test1", sample="くりかえし"), "test1
          ", ) def test_wordTitleStr_cjk2(self): glos = self.glos = Glossary() self.assertEqual( glos.wordTitleStr("くりかえし"), "くりかえし
          ", ) def test_convert_sortLocale_default_1(self): self.convert_txt_txt_sort( "092-en-fa-alphabet-sample", "092-en-fa-alphabet-sample-sorted-default", fnamePrefix="sort-locale/", testId="sorted-default", sortKeyName="headword_lower", ) def test_convert_sortLocale_en_1(self): self.convert_txt_txt_sort( "092-en-fa-alphabet-sample", "092-en-fa-alphabet-sample-sorted-en", fnamePrefix="sort-locale/", testId="sorted-en-headword_lower", sortKeyName="headword_lower:en_US.UTF-8", ) def test_convert_sortLocale_fa_1(self): self.convert_txt_txt_sort( "092-en-fa-alphabet-sample", "092-en-fa-alphabet-sample-sorted-fa", fnamePrefix="sort-locale/", testId="sorted-fa-headword_lower", sortKeyName="headword_lower:fa_IR.UTF-8", ) def test_convert_sortLocale_fa_2(self): self.convert_txt_txt_sort( "092-en-fa-alphabet-sample", "092-en-fa-alphabet-sample-sorted-latin-fa", fnamePrefix="sort-locale/", testId="sorted-latin-fa", sortKeyName="headword_lower:fa-u-kr-latn-arab", ) def test_convert_sortLocale_fa_3(self): self.convert_txt_txt_sort( "100-en-fa", "100-en-fa-sort-headword-fa", testId="sorted-fa-headword", sortKeyName="headword:fa", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/gregorian_test.py000066400000000000000000000146571476751035500205230ustar00rootroot00000000000000import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary import gregorian def getMonthLen(y: int, m: int) -> int: if m == 12: return gregorian.to_jd(y + 1, 1, 1) - gregorian.to_jd(y, 12, 1) return gregorian.to_jd(y, m + 1, 1) - gregorian.to_jd(y, m, 1) class Testgregorian(unittest.TestCase): def notest_isLeap_negativeYear(self): print() isLeapFunc = gregorian.isLeap for year in range(10, -101, -1): isLeap = isLeapFunc(year) # print(f"{str(year).center(10)} {'L' if isLeap1 else ' '}") print(f'{year}: "{"L" if isLeap else " "}",') # year -> f"{'L' if isLeap33 else ' '}{'L' if isLeap2820 else ' '}" isLeapDict = { -50: " ", -49: " ", -48: "L", -47: " ", -46: " ", -45: " ", -44: "L", -43: " ", -42: " ", -41: " ", -40: "L", -39: " ", -38: " ", -37: " ", -36: "L", -35: " ", -34: " ", -33: " ", -32: "L", -31: " ", -30: " ", -29: " ", -28: "L", -27: " ", -26: " ", -25: " ", -24: "L", -23: " ", -22: " ", -21: " ", -20: "L", -19: " ", -18: " ", -17: " ", -16: "L", -15: " ", -14: " ", -13: " ", -12: "L", -11: " ", -10: " ", -9: " ", -8: "L", -7: " ", -6: " ", -5: " ", -4: "L", -3: " ", -2: " ", -1: " ", 0: "L", 1: " ", 2: " ", 3: " ", 4: "L", 5: " ", 6: " ", 7: " ", 8: "L", 9: " ", 10: " ", 11: " ", 12: "L", 13: " ", 14: " ", 15: " ", 16: "L", 17: " ", 18: " ", 19: " ", 20: "L", 21: " ", 22: " ", 23: " ", 24: "L", 25: " ", 26: " ", 27: " ", 28: "L", 29: " ", 30: " ", 31: " ", 32: "L", 33: " ", 34: " ", 35: " ", 36: "L", 37: " ", 38: " ", 39: " ", 40: "L", 41: " ", 42: " ", 43: " ", 44: "L", 45: " ", 46: " ", 47: " ", 48: "L", 49: " ", 50: " ", 1990: " ", 1991: " ", 1992: "L", 1993: " ", 1994: " ", 1995: " ", 1996: "L", 1997: " ", 1998: " ", 1999: " ", 2000: "L", 2001: " ", 2002: " ", 2003: " ", 2004: "L", 2005: " ", 2006: " ", 2007: " ", 2008: "L", 2009: " ", 2010: " ", 2011: " ", 2012: "L", 2013: " ", 2014: " ", 2015: " ", 2016: "L", 2017: " ", 2018: " ", 2019: " ", 2020: "L", 2021: " ", 2022: " ", 2023: " ", 2024: "L", 2025: " ", 2026: " ", 2027: " ", 2028: "L", 2029: " ", } dateToJdDict = { (-50, 1, 1): 1702798, (-49, 1, 1): 1703163, (-48, 1, 1): 1703528, (-47, 1, 1): 1703894, (-46, 1, 1): 1704259, (-45, 1, 1): 1704624, (-44, 1, 1): 1704989, (-43, 1, 1): 1705355, (-42, 1, 1): 1705720, (-41, 1, 1): 1706085, (-40, 1, 1): 1706450, (-39, 1, 1): 1706816, (-38, 1, 1): 1707181, (-37, 1, 1): 1707546, (-36, 1, 1): 1707911, (-35, 1, 1): 1708277, (-34, 1, 1): 1708642, (-33, 1, 1): 1709007, (-32, 1, 1): 1709372, (-31, 1, 1): 1709738, (-30, 1, 1): 1710103, (-29, 1, 1): 1710468, (-28, 1, 1): 1710833, (-27, 1, 1): 1711199, (-26, 1, 1): 1711564, (-25, 1, 1): 1711929, (-24, 1, 1): 1712294, (-23, 1, 1): 1712660, (-22, 1, 1): 1713025, (-21, 1, 1): 1713390, (-20, 1, 1): 1713755, (-19, 1, 1): 1714121, (-18, 1, 1): 1714486, (-17, 1, 1): 1714851, (-16, 1, 1): 1715216, (-15, 1, 1): 1715582, (-14, 1, 1): 1715947, (-13, 1, 1): 1716312, (-12, 1, 1): 1716677, (-11, 1, 1): 1717043, (-10, 1, 1): 1717408, (-9, 1, 1): 1717773, (-8, 1, 1): 1718138, (-7, 1, 1): 1718504, (-6, 1, 1): 1718869, (-5, 1, 1): 1719234, (-4, 1, 1): 1719599, (-3, 1, 1): 1719965, (-2, 1, 1): 1720330, (-1, 1, 1): 1720695, (0, 1, 1): 1721060, (1, 1, 1): 1721426, (2, 1, 1): 1721791, (3, 1, 1): 1722156, (4, 1, 1): 1722521, (5, 1, 1): 1722887, (6, 1, 1): 1723252, (7, 1, 1): 1723617, (8, 1, 1): 1723982, (9, 1, 1): 1724348, (10, 1, 1): 1724713, (11, 1, 1): 1725078, (12, 1, 1): 1725443, (13, 1, 1): 1725809, (14, 1, 1): 1726174, (15, 1, 1): 1726539, (16, 1, 1): 1726904, (17, 1, 1): 1727270, (18, 1, 1): 1727635, (19, 1, 1): 1728000, (20, 1, 1): 1728365, (21, 1, 1): 1728731, (22, 1, 1): 1729096, (23, 1, 1): 1729461, (24, 1, 1): 1729826, (25, 1, 1): 1730192, (26, 1, 1): 1730557, (27, 1, 1): 1730922, (28, 1, 1): 1731287, (29, 1, 1): 1731653, (30, 1, 1): 1732018, (31, 1, 1): 1732383, (32, 1, 1): 1732748, (33, 1, 1): 1733114, (34, 1, 1): 1733479, (35, 1, 1): 1733844, (36, 1, 1): 1734209, (37, 1, 1): 1734575, (38, 1, 1): 1734940, (39, 1, 1): 1735305, (40, 1, 1): 1735670, (41, 1, 1): 1736036, (42, 1, 1): 1736401, (43, 1, 1): 1736766, (44, 1, 1): 1737131, (45, 1, 1): 1737497, (46, 1, 1): 1737862, (47, 1, 1): 1738227, (48, 1, 1): 1738592, (49, 1, 1): 1738958, (50, 1, 1): 1739323, (2015, 1, 1): 2457024, (2015, 2, 1): 2457055, (2015, 3, 1): 2457083, (2015, 4, 1): 2457114, (2015, 5, 1): 2457144, (2015, 6, 1): 2457175, (2015, 7, 1): 2457205, (2015, 8, 1): 2457236, (2015, 9, 1): 2457267, (2015, 10, 1): 2457297, (2015, 11, 1): 2457328, (2015, 12, 1): 2457358, (2016, 1, 1): 2457389, (2016, 2, 1): 2457420, (2016, 3, 1): 2457449, (2016, 4, 1): 2457480, (2016, 5, 1): 2457510, (2016, 6, 1): 2457541, (2016, 7, 1): 2457571, (2016, 8, 1): 2457602, (2016, 9, 1): 2457633, (2016, 10, 1): 2457663, (2016, 11, 1): 2457694, (2016, 12, 1): 2457724, (2017, 1, 1): 2457755, (2017, 2, 1): 2457786, (2017, 3, 1): 2457814, (2017, 4, 1): 2457845, (2017, 5, 1): 2457875, (2017, 6, 1): 2457906, (2017, 7, 1): 2457936, (2017, 8, 1): 2457967, (2017, 9, 1): 2457998, (2017, 10, 1): 2458028, (2017, 11, 1): 2458059, (2017, 12, 1): 2458089, } def test_isLeap(self): for year, isLeapStr in self.isLeapDict.items(): isLeap = isLeapStr == "L" isLeapActual = gregorian.isLeap(year) self.assertEqual( isLeapActual, isLeap, f"{year=}, {isLeap=}, {isLeapActual=}", ) def test_to_jd(self): for date, jd in self.dateToJdDict.items(): jdActual = gregorian.to_jd(*date) self.assertEqual( jdActual, jd, f"{date=}, {jd=}, {jdActual=}", ) def test_convert(self): startYear = 1950 endYear = 2050 for year in range(startYear, endYear): for month in range(1, 13): monthLen = getMonthLen(year, month) for day in range(1, monthLen + 1): date = (year, month, day) jd = gregorian.to_jd(*date) ndate = gregorian.jd_to(jd) self.assertEqual( ndate, date, f"{jd=}, {date=}, {ndate=}", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/html_utils_test.py000066400000000000000000000036761476751035500207310ustar00rootroot00000000000000# -*- coding: utf-8 -*- import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.html_utils import unescape_unicode class UnescapeUnicodeTest(unittest.TestCase): def case(self, text, expected): actual = unescape_unicode(text) self.assertEqual(actual, expected) def test(self): self.case("<", "<") self.case(">", ">") self.case("&", "&") self.case(""", """) self.case("'", "'") self.case(" ", " ") self.case(" ", " ") self.case("<á>", "<á>") self.case("/wəːkiŋtiːm/", "/wəːkiŋtiːm/") # Babylon dictionaries contain a lot of non-standard entity, # references for example, csdot, fllig, nsm, cancer, thlig, # tsdot, upslur... self.case("<&etilde;", "<ẽ") self.case("<⅓", "<⅓") self.case("<⅔", "<⅔") self.case("<ĩ", "<ĩ") self.case("<&ldash;", "<–") self.case("<ů", "<ů") self.case("<ũ", "<ũ") self.case("<&wring;", "<ẘ") self.case("<&xfrac13;", "<⅓") self.case("<ŷ", "<ŷ") self.case("<&ygrave;", "<ỳ") self.case("<&yring;", "<ẙ") self.case("<&ytilde;", "<ỹ") def benchmark_main(): import timeit from random import choice from english_words import english_words_set english_words_list = list(english_words_set) textList = [] for _ in range(20): text = "" for _ in range(10): text += choice(english_words_list) + " " textList.append(text) print("avg length:", sum(len(text) for text in textList) / len(textList)) def run_benchmark1(): for text in textList: unescape_unicode(text) print("benchmark 1:", timeit.timeit("run_benchmark1()", globals=locals())) if __name__ == "__main__": if "-b" in sys.argv: benchmark_main() else: unittest.main() pyglossary-5.0.9/tests/option_test.py000066400000000000000000000144071476751035500200470ustar00rootroot00000000000000from __future__ import annotations import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.option import ( BoolOption, DictOption, FileSizeOption, FloatOption, IntOption, ListOption, StrOption, ) class TestOptionValidateBoolNumber(unittest.TestCase): def caseOK(self, cls, raw: str, value: bool | None): opt = cls() valueActual, ok = opt.evaluate(raw) self.assertTrue(ok, "evaluate failed") self.assertEqual(valueActual, value) ok2 = opt.validate(valueActual) self.assertEqual(ok2, True, "validate failed") def caseFailed(self, cls, raw: str, value: bool | None): opt = cls() valueActual, ok = opt.evaluate(raw) self.assertFalse(ok) self.assertEqual(valueActual, value) def test_bool_ok(self): self.caseOK(BoolOption, "True", True) self.caseOK(BoolOption, "False", False) self.caseOK(BoolOption, "true", True) self.caseOK(BoolOption, "false", False) self.caseOK(BoolOption, "TRUE", True) self.caseOK(BoolOption, "FALSE", False) self.caseOK(BoolOption, "1", True) self.caseOK(BoolOption, "0", False) self.caseOK(BoolOption, "yes", True) self.caseOK(BoolOption, "no", False) self.caseOK(BoolOption, "YES", True) self.caseOK(BoolOption, "NO", False) def test_bool_failed(self): self.caseFailed(BoolOption, "Y", None) self.caseFailed(BoolOption, "N", None) self.caseFailed(BoolOption, "YESS", None) self.caseFailed(BoolOption, "123", None) self.caseFailed(BoolOption, "a", None) def test_int_ok(self): self.caseOK(IntOption, "0", 0) self.caseOK(IntOption, "1", 1) self.caseOK(IntOption, "-1", -1) self.caseOK(IntOption, "1234", 1234) def test_int_failed(self): self.caseFailed(IntOption, "abc", None) self.caseFailed(IntOption, "12f", None) self.caseFailed(IntOption, "fff", None) def test_file_size_ok(self): self.caseOK(FileSizeOption, "0", 0) self.caseOK(FileSizeOption, "1", 1) self.caseOK(FileSizeOption, "1234", 1234) self.caseOK(FileSizeOption, "123k", 123000) self.caseOK(FileSizeOption, "123m", 123000000) self.caseOK(FileSizeOption, "1.7g", 1700000000) self.caseOK(FileSizeOption, "123kib", 123 * 1024) self.caseOK(FileSizeOption, "123KiB", 123 * 1024) self.caseOK(FileSizeOption, "123ki", 123 * 1024) self.caseOK(FileSizeOption, "123Ki", 123 * 1024) self.caseOK(FileSizeOption, "123mib", 123 * 1024**2) self.caseOK(FileSizeOption, "123MiB", 123 * 1024**2) self.caseOK(FileSizeOption, "123mi", 123 * 1024**2) self.caseOK(FileSizeOption, "123Mi", 123 * 1024**2) self.caseOK(FileSizeOption, "1.7gib", int(1.7 * 1024**3)) self.caseOK(FileSizeOption, "1.7GiB", int(1.7 * 1024**3)) self.caseOK(FileSizeOption, "1.7gi", int(1.7 * 1024**3)) self.caseOK(FileSizeOption, "1.7Gi", int(1.7 * 1024**3)) def test_file_size_failed(self): self.caseFailed(FileSizeOption, "-1", None) self.caseFailed(FileSizeOption, "123kg", None) self.caseFailed(FileSizeOption, "123k.1", None) def test_float_ok(self): self.caseOK(FloatOption, "0", 0.0) self.caseOK(FloatOption, "1", 1.0) self.caseOK(FloatOption, "-1", -1.0) self.caseOK(FloatOption, "1234", 1234.0) self.caseOK(FloatOption, "1.5", 1.5) self.caseOK(FloatOption, "-7.9", -7.9) def test_float_failed(self): self.caseFailed(FloatOption, "abc", None) self.caseFailed(FloatOption, "12f", None) self.caseFailed(FloatOption, "fff", None) class TestOptionValidateStr(unittest.TestCase): def newTester(self, customValue: bool, values: list[str]): def test(raw: str, valid: bool): opt = StrOption(customValue=customValue, values=values) valueActual, evalOkActual = opt.evaluate(raw) self.assertEqual(evalOkActual, True, "evaluate failed") self.assertEqual(valueActual, raw) validActual = opt.validate(valueActual) self.assertEqual(validActual, valid, "validate failed") return test def test_1(self): test = self.newTester(False, ["a", "b", "c"]) test("a", True) test("b", True) test("c", True) test("d", False) test("123", False) def test_2(self): test = self.newTester(True, ["a", "b", "3"]) test("a", True) test("b", True) test("c", True) test("d", True) test("123", True) class TestOptionValidateDict(unittest.TestCase): def caseOK(self, raw: str, value: dict | None): opt = DictOption() valueActual, ok = opt.evaluate(raw) self.assertTrue(ok, "evaluate failed") self.assertEqual(valueActual, value) ok2 = opt.validate(valueActual) self.assertEqual(ok2, True, "validate failed") def caseEvalFail(self, raw: str): opt = DictOption() valueActual, ok = opt.evaluate(raw) self.assertFalse(ok) self.assertEqual(valueActual, None) def test_dict_ok(self): self.caseOK("", None) self.caseOK("{}", {}) self.caseOK('{"a": 1}', {"a": 1}) self.caseOK('{"a": "b", "123":456}', {"a": "b", "123": 456}) def test_dict_syntaxErr(self): self.caseEvalFail("123abc") self.caseEvalFail("{") self.caseEvalFail("(") self.caseEvalFail('{"a": 1') self.caseEvalFail('{"a": 1]') self.caseEvalFail("][") def test_dict_notDict(self): self.caseEvalFail("123") self.caseEvalFail("[]") self.caseEvalFail("[1, 2, 3]") self.caseEvalFail('["a", 2, 3.5]') self.caseEvalFail("{10, 20, 30}") class TestOptionValidateList(unittest.TestCase): def caseOK(self, raw: str, value: dict | None): opt = ListOption() valueActual, ok = opt.evaluate(raw) self.assertTrue(ok, "evaluate failed") self.assertEqual(valueActual, value) ok2 = opt.validate(valueActual) self.assertEqual(ok2, True, "validate failed") def caseEvalFail(self, raw: str): opt = ListOption() valueActual, ok = opt.evaluate(raw) self.assertFalse(ok, f"evaluale did not fail, {valueActual=}") self.assertEqual(valueActual, None) def test_list_ok(self): self.caseOK("", None) self.caseOK("[]", []) self.caseOK('["a", "b"]', ["a", "b"]) self.caseOK("[1, 2, 3]", [1, 2, 3]) self.caseOK('["a", 2, 3.5]', ["a", 2, 3.5]) def test_list_syntaxErr(self): self.caseEvalFail("123abc") self.caseEvalFail("{") self.caseEvalFail("(") self.caseEvalFail('{"a": 1') self.caseEvalFail('{"a": 1]') self.caseEvalFail("][") def test_list_notList(self): self.caseEvalFail("123") self.caseEvalFail("{10, 20, 30}") self.caseEvalFail('{"a": 1}') self.caseEvalFail('{"a": "b", "123":456}') if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/slob_test.py000066400000000000000000000450141476751035500174740ustar00rootroot00000000000000import io import logging import os import random import sys import tempfile import unicodedata import unittest from os.path import abspath, dirname from typing import cast rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) import icu from pyglossary import slob from pyglossary.core_test import MockLogHandler mockLog = MockLogHandler() log = logging.getLogger("pyglossary") log.addHandler(mockLog) class StructReaderWriter(slob.StructWriter): def __init__( self, file: "io.BufferedWriter", reader: "slob.StructReader", encoding: "str | None" = None, ) -> None: super().__init__( file=file, encoding=encoding, ) self._reader = reader def tell(self) -> int: return self._file.tell() def write(self, data: bytes) -> int: return self._file.write(data) def read_byte(self) -> int: return self._reader.read_byte() def read_tiny_text(self) -> str: return self._reader.read_tiny_text() class TagNotFound(Exception): pass def set_tag_value(filename: str, name: str, value: str) -> None: with slob.fopen(filename, "rb+") as file: file.seek(len(slob.MAGIC) + 16) encoding = slob.read_byte_string(file, slob.U_CHAR).decode(slob.UTF8) if slob.encodings.search_function(encoding) is None: raise slob.UnknownEncoding(encoding) reader = StructReaderWriter( file=file, reader=slob.StructReader(file, encoding=encoding), encoding=encoding, ) reader.read_tiny_text() tag_count = reader.read_byte() for _ in range(tag_count): key = reader.read_tiny_text() if key == name: reader.write_tiny_text(value, editable=True) return reader.read_tiny_text() raise TagNotFound(name) class BaseTest(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.TemporaryDirectory(prefix="test") self._writers = [] def tearDown(self): for w in self._writers: w.close() self.tmpdir.cleanup() def _observer(self, event: "slob.WriterEvent"): log.info(f"slob: {event.name}{': ' + event.data if event.data else ''}") def create(self, *args, observer=None, **kwargs): if observer is None: observer = self._observer w = slob.Writer(*args, observer=observer, **kwargs) self._writers.append(w) return w class TestReadWrite(BaseTest): def setUp(self): BaseTest.setUp(self) self.path = os.path.join(self.tmpdir.name, "test.slob") writer = self.create(self.path) self.tags = { "a": "abc", "bb": "xyz123", "ccc": "lkjlk", } for name, value in self.tags.items(): writer.tag(name, value) self.tag2 = "bb", "xyz123" self.blob_encoding = "ascii" self.data = [ (("c", "cc", "ccc"), slob.MIME_TEXT, "Hello C 1"), ("a", slob.MIME_TEXT, "Hello A 12"), ("z", slob.MIME_TEXT, "Hello Z 123"), ("b", slob.MIME_TEXT, "Hello B 1234"), ("d", slob.MIME_TEXT, "Hello D 12345"), ("uuu", slob.MIME_HTML, "Hello U!"), ((("yy", "frag1"),), slob.MIME_HTML, '

          Section 1

          '), ] self.all_keys = [] self.data_as_dict = {} for k, t, v in self.data: if isinstance(k, str): k = (k,) # noqa: PLW2901 for key in k: if isinstance(key, tuple): key, fragment = key # noqa: PLW2901 else: fragment = "" self.all_keys.append(key) self.data_as_dict[key] = (t, v, fragment) writer.add(v.encode(self.blob_encoding), *k, content_type=t) self.all_keys.sort() writer.finalize() self.w = writer def test_header(self): with slob.MultiFileReader(self.path) as f: header = slob.read_header(f) for key, value in self.tags.items(): self.assertEqual(header.tags[key], value) self.assertEqual(self.w.encoding, slob.UTF8) self.assertEqual(header.encoding, self.w.encoding) self.assertEqual(header.compression, self.w.compression) for i, content_type in enumerate(header.content_types): self.assertEqual(self.w.content_types[content_type], i) self.assertEqual(header.blob_count, len(self.data)) def test_content(self): with slob.open(self.path) as r: self.assertEqual(len(r), len(self.all_keys)) self.assertRaises(IndexError, r.__getitem__, len(self.all_keys)) for i, item in enumerate(r): self.assertEqual(item.key, self.all_keys[i]) content_type, value, fragment = self.data_as_dict[item.key] self.assertEqual(item.content_type, content_type) self.assertEqual(item.content.decode(self.blob_encoding), value) self.assertEqual(item.fragment, fragment) class TestSort(BaseTest): def setUp(self): BaseTest.setUp(self) self.path = os.path.join(self.tmpdir.name, "test.slob") writer = self.create(self.path) data = [ "Ф, ф", "Ф ф", "Ф", "Э", "Е е", "г", "н", "ф", "а", "Ф, Ф", "е", "Е", "Ее", "ё", "Ё", "Её", "Е ё", "А", "э", "ы", ] self.data_sorted = sorted(data, key=slob.sortkey(slob.IDENTICAL)) for k in data: v = ";".join(unicodedata.name(c) for c in k) writer.add(v.encode("ascii"), k) writer.finalize() self.r = slob.open(self.path) def test_sort_order(self): for i in range(len(self.r)): self.assertEqual(self.r[i].key, self.data_sorted[i]) def tearDown(self): self.r.close() BaseTest.tearDown(self) class TestSortKey(BaseTest): def setUp(self): BaseTest.setUp(self) self.data = [ "Ф, ф", "Ф ф", "Ф", "Э", "Е е", "г", "н", "ф", "а", "Ф, Ф", "е", "Е", "Ее", "ё", "Ё", "Её", "Е ё", "А", "э", "ы", ] self.data_sorted = [ "а", "А", "г", "е", "Е", "ё", "Ё", "Е е", "Ее", "Е ё", "Её", "н", "ф", "Ф", "Ф ф", "Ф, ф", "Ф, Ф", "ы", "э", "Э", ] def test_sort_order(self): for locName in ( # en_US_POSIX on Mac OS X # https://github.com/ilius/pyglossary/issues/458 "en_US_POSIX", "en_US", "en_CA", "fa_IR.UTF-8", ): icu.Locale.setDefault(icu.Locale(locName)) slob.sortkey.cache_clear() data_sorted = sorted(self.data, key=slob.sortkey(slob.IDENTICAL)) self.assertEqual(self.data_sorted, data_sorted) class TestFind(BaseTest): def setUp(self): BaseTest.setUp(self) self.path = os.path.join(self.tmpdir.name, "test.slob") writer = self.create(self.path) data = [ "Cc", "aA", "aa", "Aa", "Bb", "cc", "Äā", "ăÀ", "a\u00a0a", "a-a", "a\u2019a", "a\u2032a", "a,a", "a a", ] for k in data: v = ";".join(unicodedata.name(c) for c in k) writer.add(v.encode("ascii"), k) writer.finalize() self.r = slob.open(self.path) def get(self, d, key): return [item.content.decode("ascii") for item in d[key]] def test_find_identical(self): d = self.r.as_dict(slob.IDENTICAL) self.assertEqual( self.get(d, "aa"), ["LATIN SMALL LETTER A;LATIN SMALL LETTER A"], ) self.assertEqual( self.get(d, "a-a"), ["LATIN SMALL LETTER A;HYPHEN-MINUS;LATIN SMALL LETTER A"], ) self.assertEqual( self.get(d, "aA"), ["LATIN SMALL LETTER A;LATIN CAPITAL LETTER A"], ) self.assertEqual( self.get(d, "Äā"), [ "LATIN CAPITAL LETTER A WITH DIAERESIS;" "LATIN SMALL LETTER A WITH MACRON", ], ) self.assertEqual( self.get(d, "a a"), ["LATIN SMALL LETTER A;SPACE;LATIN SMALL LETTER A"], ) def test_find_quaternary(self): d = self.r.as_dict(slob.QUATERNARY) self.assertEqual( self.get(d, "a\u2032a"), ["LATIN SMALL LETTER A;PRIME;LATIN SMALL LETTER A"], ) self.assertEqual( self.get(d, "a a"), [ "LATIN SMALL LETTER A;SPACE;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;NO-BREAK SPACE;LATIN SMALL LETTER A", ], ) def test_find_tertiary(self): d = self.r.as_dict(slob.TERTIARY) self.assertEqual( self.get(d, "aa"), [ "LATIN SMALL LETTER A;SPACE;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;NO-BREAK SPACE;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;HYPHEN-MINUS;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;COMMA;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;RIGHT SINGLE QUOTATION MARK;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;PRIME;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;LATIN SMALL LETTER A", ], ) def test_find_secondary(self): d = self.r.as_dict(slob.SECONDARY) self.assertEqual( self.get(d, "aa"), [ "LATIN SMALL LETTER A;SPACE;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;NO-BREAK SPACE;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;HYPHEN-MINUS;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;COMMA;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;RIGHT SINGLE QUOTATION MARK;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;PRIME;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;LATIN CAPITAL LETTER A", "LATIN CAPITAL LETTER A;LATIN SMALL LETTER A", ], ) def test_find_primary(self): d = self.r.as_dict(slob.PRIMARY) expected = [ "LATIN SMALL LETTER A;SPACE;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;NO-BREAK SPACE;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;HYPHEN-MINUS;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;COMMA;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;RIGHT SINGLE QUOTATION MARK;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;PRIME;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;LATIN SMALL LETTER A", "LATIN SMALL LETTER A;LATIN CAPITAL LETTER A", "LATIN CAPITAL LETTER A;LATIN SMALL LETTER A", "LATIN SMALL LETTER A WITH BREVE;LATIN CAPITAL LETTER A WITH GRAVE", "LATIN CAPITAL LETTER A WITH DIAERESIS;LATIN SMALL LETTER A WITH MACRON", ] self.assertEqual( self.get(d, "aa"), expected, ) def tearDown(self): self.r.close() BaseTest.tearDown(self) class TestPrefixFind(BaseTest): def setUp(self): BaseTest.setUp(self) self.path = os.path.join(self.tmpdir.name, "test.slob") self.data = ["a", "ab", "abc", "abcd", "abcde"] writer = self.create(self.path) for k in self.data: writer.add(k.encode("ascii"), k) writer.finalize() def test(self): with slob.open(self.path) as r: for i, k in enumerate(self.data): d = r.as_dict(slob.IDENTICAL, len(k)) self.assertEqual( [cast("slob.Blob", v).content.decode("ascii") for v in d[k]], self.data[i:], ) class TestAlias(BaseTest): def setUp(self): BaseTest.setUp(self) self.path = os.path.join(self.tmpdir.name, "test.slob") def test_alias(self): too_many_redirects = [] target_not_found = [] def observer(event): if event.name == "too_many_redirects": too_many_redirects.append(event.data) elif event.name == "alias_target_not_found": target_not_found.append(event.data) w = self.create(self.path, observer=observer) data = ["z", "b", "q", "a", "u", "g", "p", "n"] for k in data: v = ";".join(unicodedata.name(c) for c in k) w.add(v.encode("ascii"), k) w.add_alias("w", "u") w.add_alias("small u", "u") w.add_alias("y1", "y2") w.add_alias("y2", "y3") w.add_alias("y3", "z") w.add_alias("ZZZ", "YYY") w.add_alias("l3", "l1") w.add_alias("l1", "l2") w.add_alias("l2", "l3") w.add_alias("a1", ("a", "a-frag1")) w.add_alias("a2", "a1") w.add_alias("a3", ("a2", "a-frag2")) w.add_alias("g1", "g") w.add_alias("g2", ("g1", "g-frag1")) w.add_alias("n or p", "n") w.add_alias("n or p", "p") w.finalize() self.assertEqual(too_many_redirects, ["l1", "l2", "l3"]) self.assertEqual(target_not_found, ["l2", "l3", "l1", "YYY"]) with slob.open(self.path) as r: d = r.as_dict() def get(key): return [item.content.decode("ascii") for item in d[key]] self.assertEqual(get("w"), ["LATIN SMALL LETTER U"]) self.assertEqual(get("small u"), ["LATIN SMALL LETTER U"]) self.assertEqual(get("y1"), ["LATIN SMALL LETTER Z"]) self.assertEqual(get("y2"), ["LATIN SMALL LETTER Z"]) self.assertEqual(get("y3"), ["LATIN SMALL LETTER Z"]) self.assertEqual(get("ZZZ"), []) self.assertEqual(get("l1"), []) self.assertEqual(get("l2"), []) self.assertEqual(get("l3"), []) self.assertEqual(len(list(d["n or p"])), 2) item_a1 = cast("slob.Blob", next(d["a1"])) self.assertEqual(item_a1.content, b"LATIN SMALL LETTER A") self.assertEqual(item_a1.fragment, "a-frag1") item_a2 = cast("slob.Blob", next(d["a2"])) self.assertEqual(item_a2.content, b"LATIN SMALL LETTER A") self.assertEqual(item_a2.fragment, "a-frag1") item_a3 = cast("slob.Blob", next(d["a3"])) self.assertEqual(item_a3.content, b"LATIN SMALL LETTER A") self.assertEqual(item_a3.fragment, "a-frag1") item_g1 = cast("slob.Blob", next(d["g1"])) self.assertEqual(item_g1.content, b"LATIN SMALL LETTER G") self.assertEqual(item_g1.fragment, "") item_g2 = cast("slob.Blob", next(d["g2"])) self.assertEqual(item_g2.content, b"LATIN SMALL LETTER G") self.assertEqual(item_g2.fragment, "g-frag1") class TestBlobId(BaseTest): def test(self): max_i = 2**32 - 1 max_j = 2**16 - 1 i_values = [0, max_i] + [random.randint(1, max_i - 1) for _ in range(100)] j_values = [0, max_j] + [random.randint(1, max_j - 1) for _ in range(100)] for i in i_values: for j in j_values: self.assertEqual(slob.unmeld_ints(slob.meld_ints(i, j)), (i, j)) class TestMultiFileReader(BaseTest): def test_read_all(self): fnames = [] for name in "abcdef": path = os.path.join(self.tmpdir.name, name) fnames.append(path) with slob.fopen(path, "wb") as f: f.write(name.encode(slob.UTF8)) with slob.MultiFileReader(*fnames) as m: self.assertEqual(m.read().decode(slob.UTF8), "abcdef") def test_seek_and_read(self): def mkfile(basename, content): part = os.path.join(self.tmpdir.name, basename) with slob.fopen(part, "wb") as f: f.write(content) return part content = b"abc\nd\nefgh\nij" part1 = mkfile("1", content[:4]) part2 = mkfile("2", content[4:5]) part3 = mkfile("3", content[5:]) with slob.MultiFileReader(part1, part2, part3) as m: self.assertEqual(m.size, len(content)) m.seek(2) self.assertEqual(m.read(2), content[2:4]) m.seek(1) self.assertEqual(m.read(len(content) - 2), content[1:-1]) m.seek(-1, whence=io.SEEK_END) self.assertEqual(m.read(10), content[-1:]) m.seek(4) m.seek(-2, whence=io.SEEK_CUR) self.assertEqual(m.read(3), content[2:5]) class TestFormatErrors(BaseTest): def test_wrong_file_type(self): name = os.path.join(self.tmpdir.name, "1") with slob.fopen(name, "wb") as f: f.write(b"123") self.assertRaises(slob.UnknownFileFormat, slob.open, name) def test_truncated_file(self): name = os.path.join(self.tmpdir.name, "1") writer = self.create(name) writer.add(b"123", "a") writer.add(b"234", "b") writer.finalize() with slob.fopen(name, "rb") as f: all_bytes = f.read() with slob.fopen(name, "wb") as f: f.write(all_bytes[:-1]) self.assertRaises(slob.IncorrectFileSize, slob.open, name) with slob.fopen(name, "wb") as f: f.write(all_bytes) f.write(b"\n") self.assertRaises(slob.IncorrectFileSize, slob.open, name) class TestTooLongText(BaseTest): def setUp(self): BaseTest.setUp(self) self.path = os.path.join(self.tmpdir.name, "test.slob") def test_too_long(self): rejected_keys = [] rejected_aliases = [] rejected_alias_targets = [] rejected_tags = [] rejected_content_types = [] def observer(event): if event.name == "key_too_long": rejected_keys.append(event.data) elif event.name == "alias_too_long": rejected_aliases.append(event.data) elif event.name == "alias_target_too_long": rejected_alias_targets.append(event.data) elif event.name == "tag_name_too_long": rejected_tags.append(event.data) elif event.name == "content_type_too_long": rejected_content_types.append(event.data) long_tag_name = "t" * (slob.MAX_TINY_TEXT_LEN + 1) long_tag_value = "v" * (slob.MAX_TINY_TEXT_LEN + 1) long_content_type = "T" * (slob.MAX_TEXT_LEN + 1) long_key = "c" * (slob.MAX_TEXT_LEN + 1) long_frag = "d" * (slob.MAX_TINY_TEXT_LEN + 1) key_with_long_frag = ("d", long_frag) tag_with_long_name = (long_tag_name, "t3 value") tag_with_long_value = ("t1", long_tag_value) long_alias = "f" * (slob.MAX_TEXT_LEN + 1) alias_with_long_frag = ("i", long_frag) long_alias_target = long_key long_alias_target_frag = key_with_long_frag w = self.create(self.path, observer=observer) w.tag(*tag_with_long_value) w.tag("t2", "t2 value") w.tag(*tag_with_long_name) data = ["a", "b", long_key, key_with_long_frag] for k in data: v = k.encode("ascii") if isinstance(k, str) else "#".join(k).encode("ascii") w.add(v, k) w.add_alias("e", "a") w.add_alias(long_alias, "a") w.add_alias(alias_with_long_frag, "a") w.add_alias("g", long_alias_target) w.add_alias("h", long_alias_target_frag) w.add(b"Hello", "hello", content_type=long_content_type) w.finalize() self.assertEqual( rejected_keys, [long_key, key_with_long_frag], ) self.assertEqual( rejected_aliases, [long_alias, alias_with_long_frag], ) self.assertEqual( rejected_alias_targets, [long_alias_target, long_alias_target_frag], ) self.assertEqual( rejected_tags, [tag_with_long_name], ) self.assertEqual( rejected_content_types, [long_content_type], ) with slob.open(self.path) as r: self.assertEqual(r.tags["t2"], "t2 value") self.assertNotIn(tag_with_long_name[0], r.tags) self.assertIn(tag_with_long_value[0], r.tags) self.assertEqual(r.tags[tag_with_long_value[0]], "") d = r.as_dict() self.assertIn("a", d) self.assertIn("b", d) self.assertNotIn(long_key, d) self.assertNotIn(key_with_long_frag[0], d) self.assertIn("e", d) self.assertNotIn(long_alias, d) self.assertNotIn("g", d) self.assertRaises( ValueError, set_tag_value, self.path, "t1", "ы" * 128, ) class TestEditTag(BaseTest): def setUp(self): BaseTest.setUp(self) self.path = os.path.join(self.tmpdir.name, "test.slob") writer = self.create(self.path) writer.tag("a", "123456") writer.tag("b", "654321") writer.finalize() def test_edit_existing_tag(self): with slob.open(self.path) as f: self.assertEqual(f.tags["a"], "123456") self.assertEqual(f.tags["b"], "654321") set_tag_value(self.path, "b", "efg") set_tag_value(self.path, "a", "xyz") with slob.open(self.path) as f: self.assertEqual(f.tags["a"], "xyz") self.assertEqual(f.tags["b"], "efg") def test_edit_nonexisting_tag(self): self.assertRaises(TagNotFound, set_tag_value, self.path, "z", "abc") class TestBinItemNumberLimit(BaseTest): def setUp(self): BaseTest.setUp(self) self.path = os.path.join(self.tmpdir.name, "test.slob") def test_writing_more_then_max_number_of_bin_items(self): writer = self.create(self.path) for _ in range(slob.MAX_BIN_ITEM_COUNT + 2): writer.add(b"a", "a") self.assertEqual(writer.bin_count, 2) writer.finalize() if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/stardict_test.py000066400000000000000000000111711476751035500203470ustar00rootroot00000000000000from __future__ import annotations import locale import random import unittest from functools import cmp_to_key from typing import Any def toBytes(s: str | bytes) -> bytes: return bytes(s, "utf-8") if isinstance(s, str) else bytes(s) def sortKeyBytes(ba: bytes) -> Any: assert isinstance(ba, bytes) # ba.lower() + ba is wrong return ( ba.lower(), ba, ) def stardictStrCmp(s1: str, s2: str) -> int: """ Use this function to sort index items in StarDict dictionary s1 and s2 must be utf-8 encoded strings. """ s1 = toBytes(s1) s2 = toBytes(s2) a = asciiStrCaseCmp(s1, s2) if a == 0: return strCmp(s1, s2) return a # the slow way in Python 3 (where there is no cmp arg in list.sort) sortKeyOld = cmp_to_key(stardictStrCmp) # TOO SLOW def asciiStrCaseCmp(ba1: bytes, ba2: bytes) -> int: """ ba1 and ba2 are instances of bytes imitate g_ascii_strcasecmp function of glib library gstrfuncs.c file. """ commonLen = min(len(ba1), len(ba2)) for i in range(commonLen): c1 = asciiLower(ba1[i]) c2 = asciiLower(ba2[i]) if c1 != c2: return c1 - c2 return len(ba1) - len(ba2) def strCmp(ba1: bytes, ba2: bytes) -> int: """ ba1 and ba2 are instances of bytes imitate strcmp of standard C library. Attention! You may have a temptation to replace this function with built-in cmp() function. Hold on! Most probably these two function behave identically now, but cmp does not document how it compares strings. There is no guaranty it will not be changed in future. Since we need predictable sorting order in StarDict dictionary, we need to preserve this function despite the fact there are other ways to implement it. """ commonLen = min(len(ba1), len(ba2)) for i in range(commonLen): c1 = ba1[i] c2 = ba2[i] if c1 != c2: return c1 - c2 return len(ba1) - len(ba2) def isAsciiAlpha(c: int) -> bool: return ord("A") <= c <= ord("Z") or ord("a") <= c <= ord("z") def isAsciiLower(c: int) -> bool: return ord("a") <= c <= ord("z") def isAsciiUpper(c: int) -> bool: """Imitate ISUPPER macro of glib library gstrfuncs.c file.""" return ord("A") <= c <= ord("Z") def asciiLower(c: int) -> int: """ Returns int (ascii character code). imitate TOLOWER macro of glib library gstrfuncs.c file This function converts upper case Latin letters to corresponding lower case letters, other chars are not changed. c must be non-Unicode string of length 1. You may apply this function to individual bytes of non-Unicode string. The following encodings are allowed: single byte encoding like koi8-r, cp1250, cp1251, cp1252, etc, and utf-8 encoding. Attention! Python Standard Library provides str.lower() method. It is not a correct replacement for this function. For non-unicode string str.lower() is locale dependent, it not only converts Latin letters to lower case, but also locale specific letters will be converted. """ return c - ord("A") + ord("a") if isAsciiUpper(c) else c def getRandomBytes(avgLen: float, sigma: float) -> bytes: length = round(random.gauss(avgLen, sigma)) return bytes([random.choice(range(256)) for _ in range(length)]) class AsciiLowerUpperTest(unittest.TestCase): def set_locale_iter(self): for localeName in locale.locale_alias.values(): try: locale.setlocale(locale.LC_ALL, localeName) except Exception as e: if "unsupported locale setting" not in str(e): print(e) continue yield localeName def test_isalpha(self): for _ in self.set_locale_iter(): for code in range(256): self.assertEqual( isAsciiAlpha(code), bytes([code]).isalpha(), ) def test_islower(self): for _ in self.set_locale_iter(): for code in range(256): self.assertEqual( isAsciiLower(code), bytes([code]).islower(), ) def test_isupper(self): for _ in self.set_locale_iter(): for code in range(256): self.assertEqual( isAsciiUpper(code), bytes([code]).isupper(), ) def test_lower(self): for _ in self.set_locale_iter(): for code in range(256): self.assertEqual( asciiLower(code), ord(bytes([code]).lower()), ) class SortRandomTest(unittest.TestCase): def set_locale_iter(self): for localeName in locale.locale_alias.values(): try: locale.setlocale(locale.LC_ALL, localeName) except Exception as e: if "unsupported locale setting" not in str(e): raise e continue # print(localeName) yield localeName def test_sort_1(self): bsList = [getRandomBytes(30, 10) for _ in range(100)] for _ in self.set_locale_iter(): self.assertEqual( sorted( bsList, key=sortKeyOld, ), sorted( bsList, key=sortKeyBytes, ), ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/text_utils_test.py000066400000000000000000000160051476751035500207370ustar00rootroot00000000000000import os import struct import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.text_utils import ( crc32hex, escapeNTB, fixUtf8, joinByBar, replacePostSpaceChar, splitByBar, splitByBarUnescapeNTB, uint32FromBytes, uint32ToBytes, uintFromBytes, unescapeBar, unescapeNTB, urlToPath, ) class TestTextUtils(unittest.TestCase): def test_fixUtf8(self): f = fixUtf8 # Since entries already keep words and defi as string, fixUtf8 does not # do much. It just removes zero bytes between valid characters # (and not within characters) # If there were encoding errors in input file, Reader class would # most likely fail to read and raise exception. # This feature was useful in Python 2.x, but not much anymore! self.assertEqual(f("\x00س\x00لام"), "سلام") def test_unescapeNTB(self): self.assertEqual("a", unescapeNTB("a", bar=False)) self.assertEqual("a\t", unescapeNTB("a\\t", bar=False)) self.assertEqual("a\n", unescapeNTB("a\\n", bar=False)) self.assertEqual("\ta", unescapeNTB("\\ta", bar=False)) self.assertEqual("\na", unescapeNTB("\\na", bar=False)) self.assertEqual("a\tb\n", unescapeNTB("a\\tb\\n", bar=False)) self.assertEqual("a\\b", unescapeNTB("a\\\\b", bar=False)) self.assertEqual("a\\\tb", unescapeNTB("a\\\\\\tb", bar=False)) self.assertEqual("a|b\tc", unescapeNTB("a|b\\tc", bar=False)) self.assertEqual("a\\|b\tc", unescapeNTB("a\\|b\\tc", bar=False)) self.assertEqual("a\\|b\tc", unescapeNTB("a\\\\|b\\tc", bar=False)) self.assertEqual("|", unescapeNTB("\\|", bar=True)) self.assertEqual("a|b", unescapeNTB("a\\|b", bar=True)) self.assertEqual("a|b\tc", unescapeNTB("a\\|b\\tc", bar=True)) def test_escapeNTB(self): self.assertEqual(escapeNTB("a", bar=False), "a") self.assertEqual(escapeNTB("a\t", bar=False), "a\\t") self.assertEqual(escapeNTB("a\n", bar=False), "a\\n") self.assertEqual(escapeNTB("\ta", bar=False), "\\ta") self.assertEqual(escapeNTB("\na", bar=False), "\\na") self.assertEqual(escapeNTB("a\tb\n", bar=False), "a\\tb\\n") self.assertEqual(escapeNTB("a\\b", bar=False), "a\\\\b") self.assertEqual(escapeNTB("a\\\tb", bar=False), "a\\\\\\tb") self.assertEqual(escapeNTB("a|b\tc", bar=False), "a|b\\tc") self.assertEqual(escapeNTB("a\\|b\tc", bar=False), "a\\\\|b\\tc") self.assertEqual(escapeNTB("|", bar=True), "\\|") self.assertEqual(escapeNTB("a|b", bar=True), "a\\|b") self.assertEqual(escapeNTB("a|b\tc", bar=True), "a\\|b\\tc") def test_splitByBarUnescapeNTB(self): f = splitByBarUnescapeNTB self.assertEqual(f(""), [""]) self.assertEqual(f("|"), ["", ""]) self.assertEqual(f("a"), ["a"]) self.assertEqual(f("a|"), ["a", ""]) self.assertEqual(f("|a"), ["", "a"]) self.assertEqual(f("a|b"), ["a", "b"]) self.assertEqual(f("a\\|b|c"), ["a|b", "c"]) self.assertEqual(f("a\\\\1|b|c"), ["a\\1", "b", "c"]) # self.assertEqual(f("a\\\\|b|c"), ["a\\", "b", "c"]) # FIXME self.assertEqual(f("a\\\\1|b\\n|c\\t"), ["a\\1", "b\n", "c\t"]) def test_unescapeBar(self): f = unescapeBar self.assertEqual("", f("")) self.assertEqual("|", f("\\|")) self.assertEqual("a|b", f("a\\|b")) self.assertEqual("a|b\tc", f("a\\|b\tc")) self.assertEqual("a|b\\t\\nc", f("a\\|b\\t\\nc")) self.assertEqual("\\", f("\\\\")) self.assertEqual("\\|", f("\\\\\\|")) def test_splitByBar(self): f = splitByBar self.assertEqual(f(""), [""]) self.assertEqual(f("|"), ["", ""]) self.assertEqual(f("a"), ["a"]) self.assertEqual(f("a|"), ["a", ""]) self.assertEqual(f("|a"), ["", "a"]) self.assertEqual(f("a|b"), ["a", "b"]) self.assertEqual(f("a\\|b"), ["a|b"]) self.assertEqual(f("a\\|b|c"), ["a|b", "c"]) self.assertEqual(f("a\\\\1|b|c"), ["a\\1", "b", "c"]) # self.assertEqual(f("a\\\\|b|c"), ["a\\", "b", "c"]) # FIXME def test_joinByBar(self): f = joinByBar self.assertEqual("", f([""])) self.assertEqual("|", f(["", ""])) self.assertEqual("a", f(["a"])) self.assertEqual("a|", f(["a", ""])) self.assertEqual("|a", f(["", "a"])) self.assertEqual("a|b", f(["a", "b"])) self.assertEqual("a\\|b", f(["a|b"])) self.assertEqual("a\\|b|c", f(["a|b", "c"])) self.assertEqual("a\\\\1|b|c", f(["a\\1", "b", "c"])) def test_uint32ToBytes(self): f = uint32ToBytes outOfRangeError = "'I' format requires 0 <= number <= 4294967295" if os.sep == "\\": outOfRangeError = "argument out of range" self.assertEqual(f(0), bytes([0, 0, 0, 0])) self.assertEqual(f(0x3E8), bytes([0, 0, 0x03, 0xE8])) self.assertEqual(f(0x186A0), bytes([0, 1, 0x86, 0xA0])) self.assertEqual(f(0x3B9ACA00), bytes([0x3B, 0x9A, 0xCA, 0x00])) self.assertEqual(f(0xFFFFFFFF), bytes([0xFF, 0xFF, 0xFF, 0xFF])) with self.assertRaises(struct.error) as ctx: f(0xFFFFFFFF + 1) self.assertEqual( str(ctx.exception), outOfRangeError, ) with self.assertRaises(struct.error) as ctx: f(10000000000) self.assertEqual( str(ctx.exception), outOfRangeError, ) with self.assertRaises(struct.error) as ctx: f(-1) if sys.version_info >= (3, 12): self.assertEqual( str(ctx.exception), "'I' format requires 0 <= number <= 4294967295", ) else: self.assertEqual(str(ctx.exception), "argument out of range") def test_uint32FromBytes(self): f = uint32FromBytes self.assertEqual(0, f(bytes([0, 0, 0, 0]))) self.assertEqual(0x3E8, f(bytes([0, 0, 0x03, 0xE8]))) self.assertEqual(0x186A0, f(bytes([0, 1, 0x86, 0xA0]))) self.assertEqual(0x3B9ACA00, f(bytes([0x3B, 0x9A, 0xCA, 0x00]))) self.assertEqual(0xFFFFFFFF, f(bytes([0xFF, 0xFF, 0xFF, 0xFF]))) with self.assertRaises(struct.error) as ctx: f(bytes([0x01, 0xFF, 0xFF, 0xFF, 0xFF])) self.assertEqual(str(ctx.exception), "unpack requires a buffer of 4 bytes") def test_uintFromBytes(self): f = uintFromBytes self.assertEqual(0, f(bytes([0, 0, 0, 0]))) self.assertEqual(0x3E8, f(bytes([0, 0, 0x03, 0xE8]))) self.assertEqual(0x186A0, f(bytes([0, 1, 0x86, 0xA0]))) self.assertEqual(0x3B9ACA00, f(bytes([0x3B, 0x9A, 0xCA, 0x00]))) self.assertEqual(0xFFFFFFFF, f(bytes([0xFF, 0xFF, 0xFF, 0xFF]))) self.assertEqual( 0xFFABCDEF5542, f(bytes([0xFF, 0xAB, 0xCD, 0xEF, 0x55, 0x42])), ) def test_crc32hex(self): f = crc32hex self.assertEqual(f(b""), "00000000") self.assertEqual(f(b"\x00"), "d202ef8d") self.assertEqual(f(b"\x00\x00"), "41d912ff") self.assertEqual( f(bytes.fromhex("73c3bbc38b7459360ac3a9c2b3c2a2")), "bbfb1610", ) def test_urlToPath(self): f = urlToPath self.assertEqual( f("https://github.com/ilius/pyglossary"), "https://github.com/ilius/pyglossary", ) self.assertEqual( f("file:///home/test/abc.txt"), "/home/test/abc.txt", ) self.assertEqual( f("file:///home/test/%D8%AA%D8%B3%D8%AA.txt"), "/home/test/تست.txt", ) def test_replacePostSpaceChar(self): f = replacePostSpaceChar self.assertEqual( f("First sentence .Second sentence.", "."), "First sentence. Second sentence.", ) self.assertEqual( f("First ,second.", ","), "First, second.", ) if __name__ == "__main__": unittest.main() pyglossary-5.0.9/tests/xml_utils_test.py000066400000000000000000000012201476751035500205440ustar00rootroot00000000000000import sys import unittest from os.path import abspath, dirname rootDir = dirname(dirname(abspath(__file__))) sys.path.insert(0, rootDir) from pyglossary.xml_utils import xml_escape class Test_xml_escape(unittest.TestCase): def test(self): f = xml_escape self.assertEqual(f(""), "") self.assertEqual(f("abc"), "abc") self.assertEqual(f('"a"'), ""a"") self.assertEqual(f("'a'"), "'a'") self.assertEqual(f('"a"', quotation=False), '"a"') self.assertEqual(f("'a'", quotation=False), "'a'") self.assertEqual(f("R&D"), "R&D") self.assertEqual(f("<-->"), "<-->") if __name__ == "__main__": unittest.main() pyglossary-5.0.9/whitelist.py000066400000000000000000000215441476751035500163520ustar00rootroot00000000000000AnyStr # unused import (pyglossary/json_utils.py:6) AnyStr # unused import (pyglossary/text_utils.py:25) EntryListType # unused import (pyglossary/glossary_v2.py:65) exc_tb # unused variable (pyglossary/os_utils.py:54) exc_tb # unused variable (pyglossary/slob.py:1569) exc_tb # unused variable (pyglossary/slob.py:280) exc_tb # unused variable (pyglossary/slob.py:739) exc_type # unused variable (pyglossary/os_utils.py:52) exc_type # unused variable (pyglossary/slob.py:1567) exc_type # unused variable (pyglossary/slob.py:278) exc_type # unused variable (pyglossary/slob.py:737) IOBase # unused import (pyglossary/plugins/freedict.py:4) IOBase # unused import (pyglossary/plugins/wiktextract.py:3) IOBase # unused import (pyglossary/slob.py:34) Iterable # unused import (pyglossary/iter_utils.py:25) Iterable # unused import (pyglossary/plugin_lib/dictdlib.py:29) Iterable # unused import (pyglossary/plugins/csv_plugin.py:22) Iterable # unused import (pyglossary/sq_entry_list.py:27) lxml # unused import (pyglossary/plugins/cc_kedict.py:10) Mapping # unused import (pyglossary/slob.py:31) RawEntryType # unused import (pyglossary/entry_list.py:28) RawEntryType # unused import (pyglossary/entry.py:25) RawEntryType # unused import (pyglossary/glossary_v2.py:65) RawEntryType # unused import (pyglossary/sq_entry_list.py:30) SortKeyType # unused import (pyglossary/sort_keys.py:27) SortKeyType # unused import (pyglossary/sort_modules/dicformids.py:5) SortKeyType # unused import (pyglossary/sort_modules/ebook_length3.py:6) SortKeyType # unused import (pyglossary/sort_modules/ebook.py:4) SortKeyType # unused import (pyglossary/sort_modules/headword_bytes_lower.py:4) SortKeyType # unused import (pyglossary/sort_modules/headword_lower.py:7) SortKeyType # unused import (pyglossary/sort_modules/headword.py:7) SortKeyType # unused import (pyglossary/sort_modules/random.py:7) SortKeyType # unused import (pyglossary/sort_modules/stardict.py:4) SQLiteSortKeyType # unused import (pyglossary/sort_keys.py:27) SQLiteSortKeyType # unused import (pyglossary/sort_modules/dicformids.py:5) SQLiteSortKeyType # unused import (pyglossary/sort_modules/ebook_length3.py:6) SQLiteSortKeyType # unused import (pyglossary/sort_modules/ebook.py:4) SQLiteSortKeyType # unused import (pyglossary/sort_modules/headword_bytes_lower.py:4) SQLiteSortKeyType # unused import (pyglossary/sort_modules/headword_lower.py:7) SQLiteSortKeyType # unused import (pyglossary/sort_modules/headword.py:7) SQLiteSortKeyType # unused import (pyglossary/sort_modules/random.py:7) SQLiteSortKeyType # unused import (pyglossary/sort_modules/stardict.py:4) T_Collator # unused import (pyglossary/slob.py:54) T_Collator # unused import (pyglossary/sort_keys.py:26) T_Collator # unused import (pyglossary/sort_modules/headword_lower.py:5) T_Collator # unused import (pyglossary/sort_modules/headword.py:5) T_Collator # unused import (pyglossary/sort_modules/random.py:5) T_htmlfile # unused import (pyglossary/plugins/cc_cedict/conv.py:13) T_htmlfile # unused import (pyglossary/plugins/dict_cc.py:12) T_htmlfile # unused import (pyglossary/plugins/freedict.py:12) T_htmlfile # unused import (pyglossary/plugins/jmdict.py:13) T_htmlfile # unused import (pyglossary/plugins/jmnedict.py:11) T_htmlfile # unused import (pyglossary/plugins/wiktextract.py:11) T_htmlfile # unused import (pyglossary/xdxf/transform.py:6) T_Locale # unused import (pyglossary/sort_keys.py:26) UIType # unused import (pyglossary/glossary_progress.py:6) UIType # unused import (pyglossary/glossary_v2.py:87) _.add_alias # unused method (pyglossary/slob.py:1252) _.addAlt # unused method (pyglossary/entry.py:123) _.addAlt # unused method (pyglossary/entry.py:308) _.addEntryObj # unused method (pyglossary/glossary.py:96) _.bglHeader # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:412) _.bglHeader # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:44) _.closed # unused property (pyglossary/plugins/babylon_bgl/bgl_gzip.py:287) _.closed # unused property (pyglossary/slob.py:290) _.cls_get_prefix # unused method (pyglossary/plugins/ebook_epub2.py:231) DebugBglReader # unused class (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:167) debugReadOptions # unused variable (pyglossary/plugins/babylon_bgl/bgl_reader.py:72) _.defiAsciiCount # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:469) _.defiAsciiCount # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:81) _.defiProcessedCount # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:467) _.defiProcessedCount # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:79) _.defiUtf8Count # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:475) _.defiUtf8Count # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:80) _.detach # unused method (pyglossary/io_utils.py:119) _.detach # unused method (pyglossary/io_utils.py:47) _.directRead # unused method (pyglossary/glossary_v2.py:677) _.dumpBlocks # unused method (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:357) _.dumpMetadata2 # unused method (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:419) escapeNewlines # unused function (pyglossary/plugins/babylon_bgl/bgl_text.py:158) ExcInfoType # unused variable (pyglossary/core.py:25) formatName # unused variable (pyglossary/plugin_handler.py:44) _.getConfig # unused method (pyglossary/glossary_v2.py:540) _.getDefaultDefiFormat # unused method (pyglossary/glossary_v2.py:437) _.getReadExtraOptions # unused method (pyglossary/plugin_prop.py:497) _.getTitleTag # unused method (pyglossary/plugins/freedict.py:187) _.getTitleTag # unused method (pyglossary/plugins/wiktextract.py:656) _.getWriteExtraOptions # unused method (pyglossary/plugin_prop.py:503) _.groupValues # unused method (pyglossary/option.py:133) _.groupValues # unused method (pyglossary/option.py:204) _.groupValues # unused method (pyglossary/option.py:442) _.gzipEndOffset # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:409) _.gzipEndOffset # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:42) _.gzipStartOffset # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:363) _.gzipStartOffset # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:41) _.isatty # unused method (pyglossary/io_utils.py:26) _.isatty # unused method (pyglossary/io_utils.py:98) _.isatty # unused method (pyglossary/slob.py:294) _.keyScoreList # unused method (scripts/wiktextract/extract-schema.py:18) levelNamesCap # unused variable (pyglossary/core.py:103) _.lex_filenum # unused property (pyglossary/plugins/wordnet.py:89) _.longComment # unused property (pyglossary/option.py:76) _.numFiles # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:362) _.numFiles # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:382) _.numFiles # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader_debug.py:40) _.readable # unused method (pyglossary/io_utils.py:101) _.readable # unused method (pyglossary/io_utils.py:29) _.readable # unused method (pyglossary/plugins/babylon_bgl/bgl_gzip.py:333) _.readable # unused method (pyglossary/slob.py:297) _.readinto1 # unused method (pyglossary/io_utils.py:131) _.readinto1 # unused method (pyglossary/io_utils.py:59) _.readinto # unused method (pyglossary/io_utils.py:128) _.readinto # unused method (pyglossary/io_utils.py:56) _.readlines # unused method (pyglossary/io_utils.py:147) _.readlines # unused method (pyglossary/io_utils.py:75) _.rewind # unused method (pyglossary/plugins/babylon_bgl/bgl_gzip.py:325) _.setTimeEnable # unused method (pyglossary/core.py:141) _.setVerbosity # unused method (pyglossary/core.py:118) _.size_content_types # unused method (pyglossary/slob.py:1529) _.size_header # unused method (pyglossary/slob.py:1502) _.size_tags # unused method (pyglossary/slob.py:1522) _.sortWords # unused method (pyglossary/glossary.py:102) _.specialCharPattern # unused attribute (pyglossary/plugins/babylon_bgl/bgl_reader.py:370) _.sub_title_line # unused method (pyglossary/plugins/dsl/__init__.py:273) TextListSymbolCleanup # unused class (pyglossary/entry_filters.py:329) _.titleElement # unused method (pyglossary/glossary.py:39) T_SdList # unused class (pyglossary/plugins/stardict.py:183) _.updateIter # unused method (pyglossary/glossary.py:99) _.validateRaw # unused method (pyglossary/option.py:124) _.writable # unused method (pyglossary/io_utils.py:116) _.writable # unused method (pyglossary/io_utils.py:44) _.writable # unused method (pyglossary/plugins/babylon_bgl/bgl_gzip.py:336) _.writable # unused method (pyglossary/slob.py:321) _.writelines # unused method (pyglossary/io_utils.py:150) _.writelines # unused method (pyglossary/io_utils.py:78) XdxfTransformerType # unused class (pyglossary/plugins/stardict.py:165)