pax_global_header00006660000000000000000000000064147474350520014525gustar00rootroot0000000000000052 comment=b50a1d2186c20f3359f7e10853d2b2225a46ed40 babel-2.17.0/000077500000000000000000000000001474743505200126615ustar00rootroot00000000000000babel-2.17.0/.coveragerc000066400000000000000000000001511474743505200147770ustar00rootroot00000000000000[report] exclude_lines = NotImplemented pragma: no cover warnings.warn if TYPE_CHECKING: babel-2.17.0/.github/000077500000000000000000000000001474743505200142215ustar00rootroot00000000000000babel-2.17.0/.github/ISSUE_TEMPLATE.md000066400000000000000000000002201474743505200167200ustar00rootroot00000000000000## Overview Description ## Steps to Reproduce 1. 2. 3. ## Actual Results ## Expected Results ## Reproducibility ## Additional Information babel-2.17.0/.github/workflows/000077500000000000000000000000001474743505200162565ustar00rootroot00000000000000babel-2.17.0/.github/workflows/ci.yml000066400000000000000000000052061474743505200173770ustar00rootroot00000000000000name: CI on: push: branches: - master - '*-maint' tags: - 'v*' pull_request: branches: - master - '*-maint' jobs: lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: pre-commit/action@v3.0.1 env: RUFF_OUTPUT_FORMAT: github test: runs-on: ${{ matrix.os }} strategy: matrix: os: - "ubuntu-24.04" - "windows-2022" - "macos-14" python-version: - "3.8" - "3.9" - "3.10" - "3.11" - "3.12" - "3.13" - "pypy3.10" env: BABEL_CLDR_NO_DOWNLOAD_PROGRESS: "1" BABEL_CLDR_QUIET: "1" steps: - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: cldr key: cldr-${{ hashFiles('scripts/*cldr*') }} - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} allow-prereleases: true cache: "pip" cache-dependency-path: "**/setup.py" - name: Install dependencies run: | python -m pip install --upgrade pip setuptools wheel python -m pip install 'tox~=4.0' 'tox-gh-actions~=3.0' - name: Run test via Tox run: tox --skip-missing-interpreters env: COVERAGE_XML_PATH: ${{ runner.temp }} BABEL_TOX_EXTRA_DEPS: pytest-github-actions-annotate-failures - uses: codecov/codecov-action@v5 with: directory: ${{ runner.temp }} flags: ${{ matrix.os }}-${{ matrix.python-version }} token: ${{ secrets.CODECOV_TOKEN }} verbose: true build: runs-on: ubuntu-24.04 needs: lint steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: "3.13" cache: "pip" cache-dependency-path: "**/setup.py" - run: pip install build -e . - run: make import-cldr - run: python -m build - uses: actions/upload-artifact@v4 with: name: dist path: dist publish: if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') needs: - build runs-on: ubuntu-latest environment: name: release url: https://pypi.org/p/babel/ permissions: id-token: write steps: - uses: actions/download-artifact@v4 with: name: dist path: dist/ - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: verbose: true print-hash: true babel-2.17.0/.gitignore000066400000000000000000000005531474743505200146540ustar00rootroot00000000000000**/__pycache__ *.egg *.egg-info *.pyc *.pyo *.so *.swp *~ .*cache .DS_Store .coverage .idea .tox /venv* babel/global.dat babel/global.dat.json build dist docs/_build test-env tests/messages/data/project/i18n/en_US tests/messages/data/project/i18n/fi_BUGGY/LC_MESSAGES/*.mo tests/messages/data/project/i18n/long_messages.pot tests/messages/data/project/i18n/temp* babel-2.17.0/.pre-commit-config.yaml000066400000000000000000000012651474743505200171460ustar00rootroot00000000000000repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.9.1 hooks: - id: ruff args: - --fix - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: check-added-large-files - id: check-docstring-first exclude: (docs/conf.py) - id: check-json - id: check-yaml - id: debug-statements exclude: (tests/messages/data/) - id: end-of-file-fixer exclude: (tests/messages/data/) - id: name-tests-test args: [ '--django' ] exclude: (tests/messages/data/|.*(consts|utils).py) - id: requirements-txt-fixer - id: trailing-whitespace babel-2.17.0/.readthedocs.yml000066400000000000000000000010651474743505200157510ustar00rootroot00000000000000# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 build: os: ubuntu-22.04 tools: python: "3.11" jobs: pre_build: # Replace any Babel version something may have pulled in # with the copy we're working on. We'll also need to build # the data files at that point, or date formatting _within_ # Sphinx will fail. - pip install -e . - make import-cldr sphinx: configuration: docs/conf.py formats: - epub - pdf python: install: - requirements: docs/requirements.txt babel-2.17.0/AUTHORS000066400000000000000000000062351474743505200137370ustar00rootroot00000000000000 Babel is written and maintained by the Babel team and various contributors: - Aarni Koskela - Christopher Lenz - Armin Ronacher - Alex Morega - Lasse Schuirmann - Felix Schwarz - Pedro Algarvio - Jeroen Ruigrok van der Werven - Philip Jenvey - benselme - Isaac Jurado - Tomas R. - Tobias Bieniek - Erick Wilder - Jonah Lawrence - Michael Birtwell - Jonas Borgström - Kevin Deldycke - Ville Skyttä - Jon Dufresne - Hugo van Kemenade - Jun Omae - Heungsub Lee - Jakob Schnitzer - Sachin Paliwal - Alex Willmer - Daniel Neuhäuser - Miro Hrončok - Cédric Krier - Luke Plant - Jennifer Wang - Lukas Balaga - sudheesh001 - Jean Abou Samra - Niklas Hambüchen - Changaco - Xavier Fernandez - KO. Mattsson - Sébastien Diemer - alexbodn@gmail.com - saurabhiiit - srisankethu - Erik Romijn - Lukas B - Ryan J Ollos - Arturas Moskvinas - Leonardo Pistone - Hyunjun Kim - wandrew004 - James McKinney - Tomáš Hrnčiar - Gabe Sherman - mattdiaz007 - Dylan Kiss - Daniel Roschka - buhtz - Bohdan Malomuzh - Leonid - Ronan Amicel - Christian Clauss - Best Olunusi - Teo - Ivan Koldakov - Rico Hermans - Daniel - Oleh Prypin - Petr Viktorin - Jean Abou-Samra - Joe Portela - Marc-Etienne Vargenau - Michał Górny - Alex Waygood - Maciej Olko - martin f. krafft - DS/Charlie - lilinjie - Johannes Wilm - Eric L - Przemyslaw Wegrzyn - Lukas Kahwe Smith - Lukas Juhrich - Nikita Sobolev - Raphael Nestler - Frank Harrison - Nehal J Wani - Mohamed Morsy - Krzysztof Jagiełło - Morgan Wahl - farhan5900 - Sigurd Ljødal - Andrii Oriekhov - rachele-collin - Lukas Winkler - Juliette Monsel - Álvaro Mondéjar Rubio - ruro - Alessio Bogon - Nikiforov Konstantin - Abdullah Javed Nesar - Brad Martin - Tyler Kennedy - CyanNani123 - sebleblanc - He Chen - Steve (Gadget) Barnes - Romuald Brunet - Mario Frasca - BT-sschmid - Alberto Mardegan - mondeja - NotAFile - Julien Palard - Brian Cappello - Serban Constantin - Bryn Truscott - Chris - Charly C - PTrottier - xmo-odoo - StevenJ - Jungmo Ku - Simeon Visser - Narendra Vardi - Stefane Fermigier - Narayan Acharya - François Magimel - Wolfgang Doll - Roy Williams - Marc-André Dufresne - Abhishek Tiwari - David Baumgold - Alex Kuzmenko - Georg Schölly - ldwoolley - Rodrigo Ramírez Norambuena - Jakub Wilk - Roman Rader - Max Shenfield - Nicolas Grilly - Kenny Root - Adam Chainz - Sébastien Fievet - Anthony Sottile - Yuriy Shatrov - iamshubh22 - Sven Anderson - Eoin Nugent - Roman Imankulov - David Stanek - Roy Wellington Ⅳ - Florian Schulze - Todd M. Guerra - Joseph Breihan - Craig Loftus - The Gitter Badger - Régis Behmo - Julen Ruiz Aizpuru - astaric - Felix Yan - Philip_Tzou - Jesús Espino - Jeremy Weinstein - James Page - masklinn - Sjoerd Langkemper - Matt Iversen - Alexander A. Dyshev - Dirkjan Ochtman - Nick Retallack - Thomas Waldmann - xen Babel was previously developed under the Copyright of Edgewall Software. The following copyright notice holds true for releases before 2013: "Copyright (c) 2007 - 2011 by Edgewall Software" In addition to the regular contributions Babel includes a fork of Lennart Regebro's tzlocal that originally was licensed under the CC0 license. The original copyright of that project is "Copyright 2013 by Lennart Regebro". babel-2.17.0/CHANGES.rst000066400000000000000000001326021474743505200144670ustar00rootroot00000000000000Babel Changelog =============== Version 2.17.0 -------------- Happy 2025! This release is being made from FOSDEM 2025, in Brussels, Belgium. Thank you to all contributors, new and old, and here's to another great year of internationalization and localization! Features ~~~~~~~~ * CLDR: Babel now uses CLDR 46, by @tomasr8 in :gh:`1145` * Dates: Allow specifying an explicit format in parse_date/parse_time by @tomasr8 in :gh:`1131` * Dates: More alternate characters are now supported by `format_skeleton`. By @tomasr8 in :gh:`1122` * Dates: Support short and narrow formats for format_timedelta when using `add_direction`, by @akx in :gh:`1163` * Messages: .po files now enclose white spaces in filenames like GNU gettext does. By @Dunedan in :gh:`1105`, and @tomasr8 in :gh:`1120` * Messages: Initial support for `Message.python_brace_format`, by @tomasr8 in :gh:`1169` * Numbers: LC_MONETARY is now preferred when formatting currencies, by @akx in :gh:`1173` Bugfixes ~~~~~~~~ * Dates: Make seconds optional in `parse_time` time formats by @tomasr8 in :gh:`1141` * Dates: Replace `str.index` with `str.find` by @tomasr8 in :gh:`1130` * Dates: Strip extra leading slashes in `/etc/localtime` by @akx in :gh:`1165` * Dates: Week numbering and formatting of dates with week numbers was repaired by @jun66j5 in :gh:`1179` * General: Improve handling for `locale=None` by @akx in :gh:`1164` * General: Remove redundant assignment in `Catalog.__setitem__` by @tomasr8 in :gh:`1167` * Messages: Fix extracted lineno with nested calls, by @dylankiss in :gh:`1126` * Messages: Fix of list index out of range when translations is empty, by @gabe-sherman in :gh:`1135` * Messages: Fix the way obsolete messages are stored by @tomasr8 in :gh:`1132` * Messages: Simplify `read_mo` logic regarding `catalog.charset` by @tomasr8 in :gh:`1148` * Messages: Use the first matching method & options, rather than first matching method & last options, by @jpmckinney in :gh:`1121` Deprecation and compatibility ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Dates: Fix deprecation warnings for `datetime.utcnow()` by @tomasr8 in :gh:`1119` * Docs: Adjust docs/conf.py to add compatibility with sphinx 8 by @hrnciar in :gh:`1155` * General: Import `Literal` from the typing module by @tomasr8 in :gh:`1175` * General: Replace `OrderedDict` with just `dict` by @tomasr8 in :gh:`1149` * Messages: Mark `wraptext` deprecated; use `TextWrapper` directly in `write_po` by @akx in :gh:`1140` Infrastructure ~~~~~~~~~~~~~~ * Add tzdata as dev dependency and sync with tox.ini by @wandrew004 in :gh:`1159` * Duplicate test code was deleted by @mattdiaz007 in :gh:`1138` * Increase test coverage of the `python_format` checker by @tomasr8 in :gh:`1176` * Small cleanups by @akx in :gh:`1160`, :gh:`1166`, :gh:`1170` and :gh:`1172` * Update CI to use python 3.13 and Ubuntu 24.04 by @tomasr8 in :gh:`1153` Version 2.16.0 -------------- Features ~~~~~~~~ * CLDR: Upgrade to CLDR 45 by @tomasr8 in :gh:`1077` * Lists: Support list format fallbacks by @akx in :gh:`1099` * Messages: Initial support for reading mapping configuration as TOML by @akx in :gh:`1108` Bugfixes ~~~~~~~~ * CLDR: Do not allow substituting alternates or drafts in derived locales by @akx in :gh:`1113` * Core: Allow falling back to modifier-less locale data by @akx in :gh:`1104` * Core: Allow use of importlib.metadata for finding entrypoints by @akx in :gh:`1102` * Dates: Avoid crashing on importing localtime when TZ is malformed by @akx in :gh:`1100` * Messages: Allow parsing .po files that have an extant but empty Language header by @akx in :gh:`1101` * Messages: Fix ``--ignore-dirs`` being incorrectly read (#1094) by @john-psina and @Edwin18 in :gh:`1052` and :gh:`1095` * Messages: Make pgettext search plurals when translation is not found by @tomasr8 in :gh:`1085` Infrastructure ~~~~~~~~~~~~~~ * Replace deprecated `ast.Str` with `ast.Constant` by @tomasr8 in :gh:`1083` * CI fixes by @akx in :gh:`1080`, :gh:`1097`, :gh:`1103`, :gh:`1107` * Test on Python 3.13 beta releases by @akx in * Normalize package name to lower-case in setup.py by @akx in :gh:`1110` Documentation ~~~~~~~~~~~~~ * Add a mention to the docs that `format_skeleton(..., fuzzy=True)` may raise by @tomasr8 in :gh:`1106` * Two hyperlinks (to CLDR) and some typos by @buhtz in :gh:`1115` Version 2.15.0 -------------- Python version support ~~~~~~~~~~~~~~~~~~~~~~ * Babel 2.15.0 will require Python 3.8 or newer. (:gh:`1048`) Features ~~~~~~~~ * CLDR: Upgrade to CLDR 44 (:gh:`1071`) (@akx) * Dates: Support for the "fall back to short format" logic for time delta formatting (:gh:`1075`) (@akx) * Message: More versatile .po IO functions (:gh:`1068`) (@akx) * Numbers: Improved support for alternate spaces when parsing numbers (:gh:`1007`) (@ronnix's first contribution) Infrastructure ~~~~~~~~~~~~~~ * Upgrade GitHub Actions (:gh:`1054`) (@cclauss's first contribution) * The Unicode license is now included in `locale-data` and in the documentation (:gh:`1074`) (@akx) Version 2.14.0 -------------- Upcoming deprecation ~~~~~~~~~~~~~~~~~~~~ * This version, Babel 2.14, is the last version of Babel to support Python 3.7. Babel 2.15 will require Python 3.8 or newer. * We had previously announced Babel 2.13 to have been the last version to support Python 3.7, but being able to use CLDR 43 with Python 3.7 was deemed important enough to keep supporting the EOL Python version for one more release. Possibly backwards incompatible changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ``Locale.number_symbols`` will now have first-level keys for each numbering system. Since the implicit default numbering system still is ``"latn"``, what had previously been e.g. ``Locale.number_symbols['decimal']`` is now ``Locale.number_symbols['latn']['decimal']``. * Babel no longer directly depends on either ``distutils`` or ``setuptools``; if you had been using the Babel setuptools command extensions, you would need to explicitly depend on ``setuptools`` – though given you're running ``setup.py`` you probably already do. Features ~~~~~~~~ * CLDR/Numbers: Add support of local numbering systems for number symbols by @kajte in :gh:`1036` * CLDR: Upgrade to CLDR 43 by @rix0rrr in :gh:`1043` * Frontend: Allow last_translator to be passed as an option to extract_message by @AivGitHub in :gh:`1044` * Frontend: Decouple `pybabel` CLI frontend from distutils/setuptools by @akx in :gh:`1041` * Numbers: Improve parsing of malformed decimals by @Olunusib and @akx in :gh:`1042` Infrastructure ~~~~~~~~~~~~~~ * Enforce trailing commas (enable Ruff COM rule and autofix) by @akx in :gh:`1045` * CI: use GitHub output formats by @akx in :gh:`1046` Version 2.13.1 -------------- This is a patch release to fix a few bugs. Fixes ~~~~~ * Fix a typo in ``_locales_to_names`` by @Dl84 in :gh:`1038` (issue :gh:`1037`) * Fix ``setuptools`` dependency for Python 3.12 by @opryprin in :gh:`1033` Version 2.13.0 -------------- Upcoming deprecation (reverted) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * It was previously announced that this version, Babel 2.13, would be the last version of Babel to support Python 3.7. Babel 2.14 will still support Python 3.7. Features ~~~~~~~~ * Add flag to ignore POT-Creation-Date for updates by @joeportela in :gh:`999` * Support 't' specifier in keywords by @jeanas in :gh:`1015` * Add f-string parsing for Python 3.12 (PEP 701) by @encukou in :gh:`1027` Fixes ~~~~~ * Various typing-related fixes by @akx in :gh:`979`, in :gh:`978`, :gh:`981`, :gh:`983` * babel.messages.catalog: deduplicate _to_fuzzy_match_key logic by @akx in :gh:`980` * Freeze format_time() tests to a specific date to fix test failures by @mgorny in :gh:`998` * Spelling and grammar fixes by @scop in :gh:`1008` * Renovate lint tools by @akx in :gh:`1017`, :gh:`1028` * Use SPDX license identifier by @vargenau in :gh:`994` * Use aware UTC datetimes internally by @scop in :gh:`1009` New Contributors ~~~~~~~~~~~~~~~~ * @mgorny made their first contribution in :gh:`998` * @vargenau made their first contribution in :gh:`994` * @joeportela made their first contribution in :gh:`999` * @encukou made their first contribution in :gh:`1027` Version 2.12.1 -------------- Fixes ~~~~~ * Version 2.12.0 was missing the ``py.typed`` marker file. Thanks to Alex Waygood for the fix! :gh:`975` * The copyright year in all files was bumped to 2023. Version 2.12.0 -------------- Deprecations & breaking changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Python 3.6 is no longer supported (:gh:`919`) - Aarni Koskela * The `get_next_timezone_transition` function is no more (:gh:`958`) - Aarni Koskela * `Locale.parse()` will no longer return `None`; it will always return a Locale or raise an exception. Passing in `None`, though technically allowed by the typing, will raise. (:gh:`966`) New features ~~~~~~~~~~~~ * CLDR: Babel now uses CLDR 42 (:gh:`951`) - Aarni Koskela * Dates: `pytz` is now optional; Babel will prefer it but will use `zoneinfo` when available. (:gh:`940`) - @ds-cbo * General: Babel now ships type annotations, thanks to Jonah Lawrence's work in multiple PRs. * Locales: @modifiers are now retained when parsing locales (:gh:`947`) - martin f. krafft * Messages: JavaScript template string expression extraction is now smarter. (:gh:`939`) - Johannes Wilm * Numbers: NaN and Infinity are now better supported (:gh:`955`) - Jonah Lawrence * Numbers: Short compact currency formats are now supported (:gh:`926`) - Jonah Lawrence * Numbers: There's now a `Format.compact_decimal` utility function. (:gh:`921`) - Jonah Lawrence Bugfixes ~~~~~~~~ * Dates: The cache for parsed datetime patterns is now bounded (:gh:`967`) - Aarni Koskela * Messages: Fuzzy candidate matching accuracy is improved (:gh:`970`) - Jean Abou Samra * Numbers: Compact singular formats and patterns with no numbers work correctly (:gh:`930`, :gh:`932`) - Jonah Lawrence, Jun Omae Improvements & cleanup ~~~~~~~~~~~~~~~~~~~~~~ * Dates: `babel.dates.UTC` is now an alias for `datetime.timezone.utc` (:gh:`957`) - Aarni Koskela * Dates: `babel.localtime` was slightly cleaned up. (:gh:`952`) - Aarni Koskela * Documentation: Documentation was improved by Maciej Olko, Jonah Lawrence, lilinjie, and Aarni Koskela. * Infrastructure: Babel is now being linted with pre-commit and ruff. - Aarni Koskela Version 2.11.0 -------------- Upcoming deprecation ~~~~~~~~~~~~~~~~~~~~ * This version, Babel 2.11, is the last version of Babel to support Python 3.6. Babel 2.12 will require Python 3.7 or newer. Improvements ~~~~~~~~~~~~ * Support for hex escapes in JavaScript string literals :gh:`877` - Przemyslaw Wegrzyn * Add support for formatting decimals in compact form :gh:`909` - Jonah Lawrence * Adapt parse_date to handle ISO dates in ASCII format :gh:`842` - Eric L. * Use `ast` instead of `eval` for Python string extraction :gh:`915` - Aarni Koskela * This also enables extraction from static f-strings. F-strings with expressions are silently ignored (but won't raise an error as they used to). Infrastructure ~~~~~~~~~~~~~~ * Tests: Use regular asserts and ``pytest.raises()`` :gh:`875` – Aarni Koskela * Wheels are now built in GitHub Actions :gh:`888` – Aarni Koskela * Small improvements to the CLDR downloader script :gh:`894` – Aarni Koskela * Remove antiquated `__nonzero__` methods :gh:`896` - Nikita Sobolev * Remove superfluous `__unicode__` declarations :gh:`905` - Lukas Juhrich * Mark package compatible with Python 3.11 :gh:`913` - Aarni Koskela * Quiesce pytest warnings :gh:`916` - Aarni Koskela Bugfixes ~~~~~~~~ * Use email.Message for pofile header parsing instead of the deprecated ``cgi.parse_header`` function. :gh:`876` – Aarni Koskela * Remove determining time zone via systemsetup on macOS :gh:`914` - Aarni Koskela Documentation ~~~~~~~~~~~~~ * Update Python versions in documentation :gh:`898` - Raphael Nestler * Align BSD-3 license with OSI template :gh:`912` - Lukas Kahwe Smith Version 2.10.3 -------------- This is a bugfix release for Babel 2.10.2, which was mistakenly packaged with outdated locale data. Thanks to Michał Górny for pointing this out and Jun Omae for verifying. This and future Babel PyPI packages will be built by a more automated process, which should make problems like this less likely to occur. Version 2.10.2 -------------- This is a bugfix release for Babel 2.10.1. * Fallback count="other" format in format_currency() (:gh:`872`) - Jun Omae * Fix get_period_id() with ``dayPeriodRule`` across 0:00 (:gh:`871`) - Jun Omae * Add support for ``b`` and ``B`` period symbols in time format (:gh:`869`) - Jun Omae * chore(docs/typo): Fixes a minor typo in a function comment (:gh:`864`) - Frank Harrison Version 2.10.1 -------------- This is a bugfix release for Babel 2.10.0. * Messages: Fix ``distutils`` import. Regressed in :gh:`843`. (:gh:`852`) - Nehal J Wani * The wheel file is no longer marked as universal, since Babel only supports Python 3. Version 2.10.0 -------------- Upcoming deprecation ~~~~~~~~~~~~~~~~~~~~ * The ``get_next_timezone_transition()`` function is marked deprecated in this version and will be removed likely as soon as Babel 2.11. No replacement for this function is planned; based on discussion in :gh:`716`, it's likely the function is not used in any real code. (:gh:`852`) - Aarni Koskela, Paul Ganssle Improvements ~~~~~~~~~~~~ * CLDR: Upgrade to CLDR 41.0. (:gh:`853`) - Aarni Koskela * The ``c`` and ``e`` plural form operands introduced in CLDR 40 are parsed, but otherwise unsupported. (:gh:`826`) * Non-nominative forms of units are currently ignored. * Messages: Implement ``--init-missing`` option for ``pybabel update`` (:gh:`785`) - ruro * Messages: For ``extract``, you can now replace the built-in ``.*`` / ``_*`` ignored directory patterns with ones of your own. (:gh:`832`) - Aarni Koskela, Kinshuk Dua * Messages: Add ``--check`` to verify if catalogs are up-to-date (:gh:`831`) - Krzysztof Jagiełło * Messages: Add ``--header-comment`` to override default header comment (:gh:`720`) - Mohamed Hafez Morsy, Aarni Koskela * Dates: ``parse_time`` now supports 12-hour clock, and is better at parsing partial times. (:gh:`834`) - Aarni Koskela, David Bauer, Arthur Jovart * Dates: ``parse_date`` and ``parse_time`` now raise ``ParseError``, a subclass of ``ValueError``, in certain cases. (:gh:`834`) - Aarni Koskela * Dates: ``parse_date`` and ``parse_time`` now accept the ``format`` parameter. (:gh:`834`) - Juliette Monsel, Aarni Koskela Infrastructure ~~~~~~~~~~~~~~ * The internal ``babel/_compat.py`` module is no more (:gh:`808`) - Hugo van Kemenade * Python 3.10 is officially supported (:gh:`809`) - Hugo van Kemenade * There's now a friendly GitHub issue template. (:gh:`800`) – Álvaro Mondéjar Rubio * Don't use the deprecated format_number function internally or in tests - Aarni Koskela * Add GitHub URL for PyPi (:gh:`846`) - Andrii Oriekhov * Python 3.12 compatibility: Prefer setuptools imports to distutils imports (:gh:`843`) - Aarni Koskela * Python 3.11 compatibility: Add deprecations to l*gettext variants (:gh:`835`) - Aarni Koskela * CI: Babel is now tested with PyPy 3.7. (:gh:`851`) - Aarni Koskela Bugfixes ~~~~~~~~ * Date formatting: Allow using ``other`` as fallback form (:gh:`827`) - Aarni Koskela * Locales: ``Locale.parse()`` normalizes variant tags to upper case (:gh:`829`) - Aarni Koskela * A typo in the plural format for Maltese is fixed. (:gh:`796`) - Lukas Winkler * Messages: Catalog date parsing is now timezone independent. (:gh:`701`) - rachele-collin * Messages: Fix duplicate locations when writing without lineno (:gh:`837`) - Sigurd Ljødal * Messages: Fix missing trailing semicolon in plural form headers (:gh:`848`) - farhan5900 * CLI: Fix output of ``--list-locales`` to not be a bytes repr (:gh:`845`) - Morgan Wahl Documentation ~~~~~~~~~~~~~ * Documentation is now correctly built again, and up to date (:gh:`830`) - Aarni Koskela Version 2.9.1 ------------- Bugfixes ~~~~~~~~ * The internal locale-data loading functions now validate the name of the locale file to be loaded and only allow files within Babel's data directory. Thank you to Chris Lyne of Tenable, Inc. for discovering the issue! Version 2.9.0 ------------- Upcoming version support changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * This version, Babel 2.9, is the last version of Babel to support Python 2.7, Python 3.4, and Python 3.5. Improvements ~~~~~~~~~~~~ * CLDR: Use CLDR 37 – Aarni Koskela (:gh:`734`) * Dates: Handle ZoneInfo objects in get_timezone_location, get_timezone_name - Alessio Bogon (:gh:`741`) * Numbers: Add group_separator feature in number formatting - Abdullah Javed Nesar (:gh:`726`) Bugfixes ~~~~~~~~ * Dates: Correct default Format().timedelta format to 'long' to mute deprecation warnings – Aarni Koskela * Import: Simplify iteration code in "import_cldr.py" – Felix Schwarz * Import: Stop using deprecated ElementTree methods "getchildren()" and "getiterator()" – Felix Schwarz * Messages: Fix unicode printing error on Python 2 without TTY. – Niklas Hambüchen * Messages: Introduce invariant that _invalid_pofile() takes unicode line. – Niklas Hambüchen * Tests: fix tests when using Python 3.9 – Felix Schwarz * Tests: Remove deprecated 'sudo: false' from Travis configuration – Jon Dufresne * Tests: Support Py.test 6.x – Aarni Koskela * Utilities: LazyProxy: Handle AttributeError in specified func – Nikiforov Konstantin (:gh:`724`) * Utilities: Replace usage of parser.suite with ast.parse – Miro Hrončok Documentation ~~~~~~~~~~~~~ * Update parse_number comments – Brad Martin (:gh:`708`) * Add __iter__ to Catalog documentation – @CyanNani123 Version 2.8.1 ------------- This is solely a patch release to make running tests on Py.test 6+ possible. Bugfixes ~~~~~~~~ * Support Py.test 6 - Aarni Koskela (:gh:`747`, :gh:`750`, :gh:`752`) Version 2.8.0 ------------- Improvements ~~~~~~~~~~~~ * CLDR: Upgrade to CLDR 36.0 - Aarni Koskela (:gh:`679`) * Messages: Don't even open files with the "ignore" extraction method - @sebleblanc (:gh:`678`) Bugfixes ~~~~~~~~ * Numbers: Fix formatting very small decimals when quantization is disabled - Lev Lybin, @miluChen (:gh:`662`) * Messages: Attempt to sort all messages – Mario Frasca (:gh:`651`, :gh:`606`) Docs ~~~~ * Add years to changelog - Romuald Brunet * Note that installation requires pytz - Steve (Gadget) Barnes Version 2.7.0 ------------- Possibly incompatible changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ These may be backward incompatible in some cases, as some more-or-less internal APIs have changed. Please feel free to file issues if you bump into anything strange and we'll try to help! * General: Internal uses of ``babel.util.odict`` have been replaced with ``collections.OrderedDict`` from The Python standard library. Improvements ~~~~~~~~~~~~ * CLDR: Upgrade to CLDR 35.1 - Alberto Mardegan, Aarni Koskela (:gh:`626`, :gh:`643`) * General: allow anchoring path patterns to the start of a string - Brian Cappello (:gh:`600`) * General: Bumped version requirement on pytz - @chrisbrake (:gh:`592`) * Messages: `pybabel compile`: exit with code 1 if errors were encountered - Aarni Koskela (:gh:`647`) * Messages: Add omit-header to update_catalog - Cédric Krier (:gh:`633`) * Messages: Catalog update: keep user comments from destination by default - Aarni Koskela (:gh:`648`) * Messages: Skip empty message when writing mo file - Cédric Krier (:gh:`564`) * Messages: Small fixes to avoid crashes on badly formatted .po files - Bryn Truscott (:gh:`597`) * Numbers: `parse_decimal()` `strict` argument and `suggestions` - Charly C (:gh:`590`) * Numbers: don't repeat suggestions in parse_decimal strict - Serban Constantin (:gh:`599`) * Numbers: implement currency formatting with long display names - Luke Plant (:gh:`585`) * Numbers: parse_decimal(): assume spaces are equivalent to non-breaking spaces when not in strict mode - Aarni Koskela (:gh:`649`) * Performance: Cache locale_identifiers() - Aarni Koskela (:gh:`644`) Bugfixes ~~~~~~~~ * CLDR: Skip alt=... for week data (minDays, firstDay, weekendStart, weekendEnd) - Aarni Koskela (:gh:`634`) * Dates: Fix wrong weeknumber for 31.12.2018 - BT-sschmid (:gh:`621`) * Locale: Avoid KeyError trying to get data on WindowsXP - mondeja (:gh:`604`) * Locale: get_display_name(): Don't attempt to concatenate variant information to None - Aarni Koskela (:gh:`645`) * Messages: pofile: Add comparison operators to _NormalizedString - Aarni Koskela (:gh:`646`) * Messages: pofile: don't crash when message.locations can't be sorted - Aarni Koskela (:gh:`646`) Tooling & docs ~~~~~~~~~~~~~~ * Docs: Remove all references to deprecated easy_install - Jon Dufresne (:gh:`610`) * Docs: Switch print statement in docs to print function - NotAFile * Docs: Update all pypi.python.org URLs to pypi.org - Jon Dufresne (:gh:`587`) * Docs: Use https URLs throughout project where available - Jon Dufresne (:gh:`588`) * Support: Add testing and document support for Python 3.7 - Jon Dufresne (:gh:`611`) * Support: Test on Python 3.8-dev - Aarni Koskela (:gh:`642`) * Support: Using ABCs from collections instead of collections.abc is deprecated. - Julien Palard (:gh:`609`) * Tests: Fix conftest.py compatibility with pytest 4.3 - Miro Hrončok (:gh:`635`) * Tests: Update pytest and pytest-cov - Miro Hrončok (:gh:`635`) Version 2.6.0 ------------- Possibly incompatible changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ These may be backward incompatible in some cases, as some more-or-less internal APIs have changed. Please feel free to file issues if you bump into anything strange and we'll try to help! * Numbers: Refactor decimal handling code and allow bypass of decimal quantization. (@kdeldycke) (PR :gh:`538`) * Messages: allow processing files that are in locales unknown to Babel (@akx) (PR :gh:`557`) * General: Drop support for EOL Python 2.6 and 3.3 (@hugovk) (PR :gh:`546`) Other changes ~~~~~~~~~~~~~ * CLDR: Use CLDR 33 (@akx) (PR :gh:`581`) * Lists: Add support for various list styles other than the default (@akx) (:gh:`552`) * Messages: Add new PoFileError exception (@Bedrock02) (PR :gh:`532`) * Times: Simplify Linux distro specific explicit timezone setting search (@scop) (PR :gh:`528`) Bugfixes ~~~~~~~~ * CLDR: avoid importing alt=narrow currency symbols (@akx) (PR :gh:`558`) * CLDR: ignore non-Latin numbering systems (@akx) (PR :gh:`579`) * Docs: Fix improper example for date formatting (@PTrottier) (PR :gh:`574`) * Tooling: Fix some deprecation warnings (@akx) (PR :gh:`580`) Tooling & docs ~~~~~~~~~~~~~~ * Add explicit signatures to some date autofunctions (@xmo-odoo) (PR :gh:`554`) * Include license file in the generated wheel package (@jdufresne) (PR :gh:`539`) * Python 3.6 invalid escape sequence deprecation fixes (@scop) (PR :gh:`528`) * Test and document all supported Python versions (@jdufresne) (PR :gh:`540`) * Update copyright header years and authors file (@akx) (PR :gh:`559`) Version 2.5.3 ------------- This is a maintenance release that reverts undesired API-breaking changes that slipped into 2.5.2 (see :gh:`550`). It is based on v2.5.1 (f29eccd) with commits 7cedb84, 29da2d2 and edfb518 cherry-picked on top. Version 2.5.2 ------------- Bugfixes ~~~~~~~~ * Revert the unnecessary PyInstaller fixes from 2.5.0 and 2.5.1 (:gh:`533`) (@yagebu) Version 2.5.1 ------------- Minor Improvements and bugfixes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Use a fixed datetime to avoid test failures (:gh:`520`) (@narendravardi) * Parse multi-line __future__ imports better (:gh:`519`) (@akx) * Fix validate_currency docstring (:gh:`522`) * Allow normalize_locale and exists to handle various unexpected inputs (:gh:`523`) (@suhojm) * Make PyInstaller support more robust (:gh:`525`, :gh:`526`) (@thijstriemstra, @akx) Version 2.5.0 ------------- New Features ~~~~~~~~~~~~ * Numbers: Add currency utilities and helpers (:gh:`491`) (@kdeldycke) * Support PyInstaller (:gh:`500`, :gh:`505`) (@wodo) Minor Improvements and bugfixes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Dates: Add __str__ to DateTimePattern (:gh:`515`) (@sfermigier) * Dates: Fix an invalid string to bytes comparison when parsing TZ files on Py3 (:gh:`498`) (@rowillia) * Dates: Formatting zero-padded components of dates is faster (:gh:`517`) (@akx) * Documentation: Fix "Good Commits" link in CONTRIBUTING.md (:gh:`511`) (@naryanacharya6) * Documentation: Fix link to Python gettext module (:gh:`512`) (@Linkid) * Messages: Allow both dash and underscore separated locale identifiers in pofiles (:gh:`489`, :gh:`490`) (@akx) * Messages: Extract Python messages in nested gettext calls (:gh:`488`) (@sublee) * Messages: Fix in-place editing of dir list while iterating (:gh:`476`, :gh:`492`) (@MarcDufresne) * Messages: Stabilize sort order (:gh:`482`) (@xavfernandez) * Time zones: Honor the no-inherit marker for metazone names (:gh:`405`) (@akx) Version 2.4.0 ------------- New Features ~~~~~~~~~~~~ Some of these changes might break your current code and/or tests. * CLDR: CLDR 29 is now used instead of CLDR 28 (:gh:`405`) (@akx) * Messages: Add option 'add_location' for location line formatting (:gh:`438`, :gh:`459`) (@rrader, @alxpy) * Numbers: Allow full control of decimal behavior (:gh:`410`) (@etanol) Minor Improvements and bugfixes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Documentation: Improve Date Fields descriptions (:gh:`450`) (@ldwoolley) * Documentation: Typo fixes and documentation improvements (:gh:`406`, :gh:`412`, :gh:`403`, :gh:`440`, :gh:`449`, :gh:`463`) (@zyegfryed, @adamchainz, @jwilk, @akx, @roramirez, @abhishekcs10) * Messages: Default to UTF-8 source encoding instead of ISO-8859-1 (:gh:`399`) (@asottile) * Messages: Ensure messages are extracted in the order they were passed in (:gh:`424`) (@ngrilly) * Messages: Message extraction for JSX files is improved (:gh:`392`, :gh:`396`, :gh:`425`) (@karloskar, @georgschoelly) * Messages: PO file reading supports multi-line obsolete units (:gh:`429`) (@mbirtwell) * Messages: Python message extractor respects unicode_literals in __future__ (:gh:`427`) (@sublee) * Messages: Roundtrip Language headers (:gh:`420`) (@kruton) * Messages: units before obsolete units are no longer erroneously marked obsolete (:gh:`452`) (@mbirtwell) * Numbers: `parse_pattern` now preserves the full original pattern (:gh:`414`) (@jtwang) * Numbers: Fix float conversion in `extract_operands` (:gh:`435`) (@akx) * Plurals: Fix plural forms for Czech and Slovak locales (:gh:`373`) (@ykshatroff) * Plurals: More plural form fixes based on Mozilla and CLDR references (:gh:`431`) (@mshenfield) Internal improvements ~~~~~~~~~~~~~~~~~~~~~ * Local times are constructed correctly in tests (:gh:`411`) (@etanol) * Miscellaneous small improvements (:gh:`437`) (@scop) * Regex flags are extracted from the regex strings (:gh:`462`) (@singingwolfboy) * The PO file reader is now a class and has seen some refactoring (:gh:`429`, :gh:`452`) (@mbirtwell) Version 2.3.4 ------------- (Bugfix release, released on April 22th 2016) Bugfixes ~~~~~~~~ * CLDR: The lxml library is no longer used for CLDR importing, so it should not cause strange failures either. Thanks to @aronbierbaum for the bug report and @jtwang for the fix. (:gh:`393`) * CLI: Every last single CLI usage regression should now be gone, and both distutils and stand-alone CLIs should work as they have in the past. Thanks to @paxswill and @ajaeger for bug reports. (:gh:`389`) Version 2.3.3 ------------- (Bugfix release, released on April 12th 2016) Bugfixes ~~~~~~~~ * CLI: Usage regressions that had snuck in between 2.2 and 2.3 should be no more. (:gh:`386`) Thanks to @ajaeger, @sebdiem and @jcristovao for bug reports and patches. Version 2.3.2 ------------- (Bugfix release, released on April 9th 2016) Bugfixes ~~~~~~~~ * Dates: Period (am/pm) formatting was broken in certain locales (namely zh_TW). Thanks to @jun66j5 for the bug report. (:gh:`378`, :gh:`379`) Version 2.3.1 ------------- (Bugfix release because of deployment problems, released on April 8th 2016) Version 2.3 ----------- (Feature release, released on April 8th 2016) Internal improvements ~~~~~~~~~~~~~~~~~~~~~ * The CLI frontend and Distutils commands use a shared implementation (:gh:`311`) * PyPy3 is supported (:gh:`343`) Features ~~~~~~~~ * CLDR: Add an API for territory language data (:gh:`315`) * Core: Character order and measurement system data is imported and exposed (:gh:`368`) * Dates: Add an API for time interval formatting (:gh:`316`) * Dates: More pattern formats and lengths are supported (:gh:`347`) * Dates: Period IDs are imported and exposed (:gh:`349`) * Dates: Support for date-time skeleton formats has been added (:gh:`265`) * Dates: Timezone formatting has been improved (:gh:`338`) * Messages: JavaScript extraction now supports dotted names, ES6 template strings and JSX tags (:gh:`332`) * Messages: npgettext is recognized by default (:gh:`341`) * Messages: The CLI learned to accept multiple domains (:gh:`335`) * Messages: The extraction commands now accept filenames in addition to directories (:gh:`324`) * Units: A new API for unit formatting is implemented (:gh:`369`) Bugfixes ~~~~~~~~ * Core: Mixed-case locale IDs work more reliably (:gh:`361`) * Dates: S...S formats work correctly now (:gh:`360`) * Messages: All messages are now sorted correctly if sorting has been specified (:gh:`300`) * Messages: Fix the unexpected behavior caused by catalog header updating (e0e7ef1) (:gh:`320`) * Messages: Gettext operands are now generated correctly (:gh:`295`) * Messages: Message extraction has been taught to detect encodings better (:gh:`274`) Version 2.2 ----------- (Feature release, released on January 2nd 2016) Bugfixes ~~~~~~~~ * General: Add __hash__ to Locale. (:gh:`303`) (2aa8074) * General: Allow files with BOM if they're UTF-8 (:gh:`189`) (da87edd) * General: localedata directory is now locale-data (:gh:`109`) (2d1882e) * General: odict: Fix pop method (0a9e97e) * General: Removed uses of datetime.date class from .dat files (:gh:`174`) (94f6830) * Messages: Fix plural selection for Chinese (531f666) * Messages: Fix typo and add semicolon in plural_forms (5784501) * Messages: Flatten NullTranslations.files into a list (ad11101) * Times: FixedOffsetTimezone: fix display of negative offsets (d816803) Features ~~~~~~~~ * CLDR: Update to CLDR 28 (:gh:`292`) (9f7f4d0) * General: Add __copy__ and __deepcopy__ to LazyProxy. (a1cc3f1) * General: Add official support for Python 3.4 and 3.5 * General: Improve odict performance by making key search O(1) (6822b7f) * Locale: Add an ordinal_form property to Locale (:gh:`270`) (b3f3430) * Locale: Add support for list formatting (37ce4fa, be6e23d) * Locale: Check inheritance exceptions first (3ef0d6d) * Messages: Allow file locations without line numbers (:gh:`279`) (79bc781) * Messages: Allow passing a callable to `extract()` (:gh:`289`) (3f58516) * Messages: Support 'Language' header field of PO files (:gh:`76`) (3ce842b) * Messages: Update catalog headers from templates (e0e7ef1) * Numbers: Properly load and expose currency format types (:gh:`201`) (df676ab) * Numbers: Use cdecimal by default when available (b6169be) * Numbers: Use the CLDR's suggested number of decimals for format_currency (:gh:`139`) (201ed50) * Times: Add format_timedelta(format='narrow') support (edc5eb5) Version 2.1 ----------- (Bugfix/minor feature release, released on September 25th 2015) - Parse and honour the locale inheritance exceptions (:gh:`97`) - Fix Locale.parse using ``global.dat`` incompatible types (:gh:`174`) - Fix display of negative offsets in ``FixedOffsetTimezone`` (:gh:`214`) - Improved odict performance which is used during localization file build, should improve compilation time for large projects - Add support for "narrow" format for ``format_timedelta`` - Add universal wheel support - Support 'Language' header field in .PO files (fixes :gh:`76`) - Test suite enhancements (coverage, broken tests fixed, etc) - Documentation updated Version 2.0 ----------- (Released on July 27th 2015, codename Second Coming) - Added support for looking up currencies that belong to a territory through the :func:`babel.numbers.get_territory_currencies` function. - Improved Python 3 support. - Fixed some broken tests for timezone behavior. - Improved various smaller things for dealing with dates. Version 1.4 ----------- (bugfix release, release date to be decided) - Fixed a bug that caused deprecated territory codes not being converted properly by the subtag resolving. This for instance showed up when trying to use ``und_UK`` as a language code which now properly resolves to ``en_GB``. - Fixed a bug that made it impossible to import the CLDR data from scratch on windows systems. Version 1.3 ----------- (bugfix release, released on July 29th 2013) - Fixed a bug in likely-subtag resolving for some common locales. This primarily makes ``zh_CN`` work again which was broken due to how it was defined in the likely subtags combined with our broken resolving. This fixes :gh:`37`. - Fixed a bug that caused pybabel to break when writing to stdout on Python 3. - Removed a stray print that was causing issues when writing to stdout for message catalogs. Version 1.2 ----------- (bugfix release, released on July 27th 2013) - Included all tests in the tarball. Previously the include skipped past recursive folders. - Changed how tests are invoked and added separate standalone test command. This simplifies testing of the package for linux distributors. Version 1.1 ----------- (bugfix release, released on July 27th 2013) - added dummy version requirements for pytz so that it installs on pip 1.4. - Included tests in the tarball. Version 1.0 ----------- (Released on July 26th 2013, codename Revival) - support python 2.6, 2.7, 3.3+ and pypy - drop all other versions - use tox for testing on different pythons - Added support for the locale plural rules defined by the CLDR. - Added `format_timedelta` function to support localized formatting of relative times with strings such as "2 days" or "1 month" (:trac:`126`). - Fixed negative offset handling of Catalog._set_mime_headers (:trac:`165`). - Fixed the case where messages containing square brackets would break with an unpack error. - updated to CLDR 23 - Make the CLDR import script work with Python 2.7. - Fix various typos. - Sort output of list-locales. - Make the POT-Creation-Date of the catalog being updated equal to POT-Creation-Date of the template used to update (:trac:`148`). - Use a more explicit error message if no option or argument (command) is passed to pybabel (:trac:`81`). - Keep the PO-Revision-Date if it is not the default value (:trac:`148`). - Make --no-wrap work by reworking --width's default and mimic xgettext's behaviour of always wrapping comments (:trac:`145`). - Add --project and --version options for commandline (:trac:`173`). - Add a __ne__() method to the Local class. - Explicitly sort instead of using sorted() and don't assume ordering (Jython compatibility). - Removed ValueError raising for string formatting message checkers if the string does not contain any string formatting (:trac:`150`). - Fix Serbian plural forms (:trac:`213`). - Small speed improvement in format_date() (:trac:`216`). - Fix so frontend.CommandLineInterface.run does not accumulate logging handlers (:trac:`227`, reported with initial patch by dfraser) - Fix exception if environment contains an invalid locale setting (:trac:`200`) - use cPickle instead of pickle for better performance (:trac:`225`) - Only use bankers round algorithm as a tie breaker if there are two nearest numbers, round as usual if there is only one nearest number (:trac:`267`, patch by Martin) - Allow disabling cache behaviour in LazyProxy (:trac:`208`, initial patch from Pedro Algarvio) - Support for context-aware methods during message extraction (:trac:`229`, patch from David Rios) - "init" and "update" commands support "--no-wrap" option (:trac:`289`) - fix formatting of fraction in format_decimal() if the input value is a float with more than 7 significant digits (:trac:`183`) - fix format_date() with datetime parameter (:trac:`282`, patch from Xavier Morel) - fix format_decimal() with small Decimal values (:trac:`214`, patch from George Lund) - fix handling of messages containing '\\n' (:trac:`198`) - handle irregular multi-line msgstr (no "" as first line) gracefully (:trac:`171`) - parse_decimal() now returns Decimals not floats, API change (:trac:`178`) - no warnings when running setup.py without installed setuptools (:trac:`262`) - modified Locale.__eq__ method so Locales are only equal if all of their attributes (language, territory, script, variant) are equal - resort to hard-coded message extractors/checkers if pkg_resources is installed but no egg-info was found (:trac:`230`) - format_time() and format_datetime() now accept also floats (:trac:`242`) - add babel.support.NullTranslations class similar to gettext.NullTranslations but with all of Babel's new gettext methods (:trac:`277`) - "init" and "update" commands support "--width" option (:trac:`284`) - fix 'input_dirs' option for setuptools integration (:trac:`232`, initial patch by Étienne Bersac) - ensure .mo file header contains the same information as the source .po file (:trac:`199`) - added support for get_language_name() on the locale objects. - added support for get_territory_name() on the locale objects. - added support for get_script_name() on the locale objects. - added pluralization support for currency names and added a '¤¤¤' pattern for currencies that includes the full name. - depend on pytz now and wrap it nicer. This gives us improved support for things like timezone transitions and an overall nicer API. - Added support for explicit charset to PO file reading. - Added experimental Python 3 support. - Added better support for returning timezone names. - Don't throw away a Catalog's obsolete messages when updating it. - Added basic likelySubtag resolving when doing locale parsing and no match can be found. Version 0.9.6 ------------- (released on March 17th 2011) - Backport r493-494: documentation typo fixes. - Make the CLDR import script work with Python 2.7. - Fix various typos. - Fixed Python 2.3 compatibility (:trac:`146`, :trac:`233`). - Sort output of list-locales. - Make the POT-Creation-Date of the catalog being updated equal to POT-Creation-Date of the template used to update (:trac:`148`). - Use a more explicit error message if no option or argument (command) is passed to pybabel (:trac:`81`). - Keep the PO-Revision-Date if it is not the default value (:trac:`148`). - Make --no-wrap work by reworking --width's default and mimic xgettext's behaviour of always wrapping comments (:trac:`145`). - Fixed negative offset handling of Catalog._set_mime_headers (:trac:`165`). - Add --project and --version options for commandline (:trac:`173`). - Add a __ne__() method to the Local class. - Explicitly sort instead of using sorted() and don't assume ordering (Python 2.3 and Jython compatibility). - Removed ValueError raising for string formatting message checkers if the string does not contain any string formatting (:trac:`150`). - Fix Serbian plural forms (:trac:`213`). - Small speed improvement in format_date() (:trac:`216`). - Fix number formatting for locales where CLDR specifies alt or draft items (:trac:`217`) - Fix bad check in format_time (:trac:`257`, reported with patch and tests by jomae) - Fix so frontend.CommandLineInterface.run does not accumulate logging handlers (:trac:`227`, reported with initial patch by dfraser) - Fix exception if environment contains an invalid locale setting (:trac:`200`) Version 0.9.5 ------------- (released on April 6th 2010) - Fixed the case where messages containing square brackets would break with an unpack error. - Backport of r467: Fuzzy matching regarding plurals should *NOT* be checked against len(message.id) because this is always 2, instead, it's should be checked against catalog.num_plurals (:trac:`212`). Version 0.9.4 ------------- (released on August 25th 2008) - Currency symbol definitions that is defined with choice patterns in the CLDR data are no longer imported, so the symbol code will be used instead. - Fixed quarter support in date formatting. - Fixed a serious memory leak that was introduces by the support for CLDR aliases in 0.9.3 (:trac:`128`). - Locale modifiers such as "@euro" are now stripped from locale identifiers when parsing (:trac:`136`). - The system locales "C" and "POSIX" are now treated as aliases for "en_US_POSIX", for which the CLDR provides the appropriate data. Thanks to Manlio Perillo for the suggestion. - Fixed JavaScript extraction for regular expression literals (:trac:`138`) and concatenated strings. - The `Translation` class in `babel.support` can now manage catalogs with different message domains, and exposes the family of `d*gettext` functions (:trac:`137`). Version 0.9.3 ------------- (released on July 9th 2008) - Fixed invalid message extraction methods causing an UnboundLocalError. - Extraction method specification can now use a dot instead of the colon to separate module and function name (:trac:`105`). - Fixed message catalog compilation for locales with more than two plural forms (:trac:`95`). - Fixed compilation of message catalogs for locales with more than two plural forms where the translations were empty (:trac:`97`). - The stripping of the comment tags in comments is optional now and is done for each line in a comment. - Added a JavaScript message extractor. - Updated to CLDR 1.6. - Fixed timezone calculations when formatting datetime and time values. - Added a `get_plural` function into the plurals module that returns the correct plural forms for a locale as tuple. - Added support for alias definitions in the CLDR data files, meaning that the chance for items missing in certain locales should be greatly reduced (:trac:`68`). Version 0.9.2 ------------- (released on February 4th 2008) - Fixed catalogs' charset values not being recognized (:trac:`66`). - Numerous improvements to the default plural forms. - Fixed fuzzy matching when updating message catalogs (:trac:`82`). - Fixed bug in catalog updating, that in some cases pulled in translations from different catalogs based on the same template. - Location lines in PO files do no longer get wrapped at hyphens in file names (:trac:`79`). - Fixed division by zero error in catalog compilation on empty catalogs (:trac:`60`). Version 0.9.1 ------------- (released on September 7th 2007) - Fixed catalog updating when a message is merged that was previously simple but now has a plural form, for example by moving from `gettext` to `ngettext`, or vice versa. - Fixed time formatting for 12 am and 12 pm. - Fixed output encoding of the `pybabel --list-locales` command. - MO files are now written in binary mode on windows (:trac:`61`). Version 0.9 ----------- (released on August 20th 2007) - The `new_catalog` distutils command has been renamed to `init_catalog` for consistency with the command-line frontend. - Added compilation of message catalogs to MO files (:trac:`21`). - Added updating of message catalogs from POT files (:trac:`22`). - Support for significant digits in number formatting. - Apply proper "banker's rounding" in number formatting in a cross-platform manner. - The number formatting functions now also work with numbers represented by Python `Decimal` objects (:trac:`53`). - Added extensible infrastructure for validating translation catalogs. - Fixed the extractor not filtering out messages that didn't validate against the keyword's specification (:trac:`39`). - Fixed the extractor raising an exception when encountering an empty string msgid. It now emits a warning to stderr. - Numerous Python message extractor fixes: it now handles nested function calls within a gettext function call correctly, uses the correct line number for multi-line function calls, and other small fixes (tickets :trac:`38` and :trac:`39`). - Improved support for detecting Python string formatting fields in message strings (:trac:`57`). - CLDR upgraded to the 1.5 release. - Improved timezone formatting. - Implemented scientific number formatting. - Added mechanism to lookup locales by alias, for cases where browsers insist on including only the language code in the `Accept-Language` header, and sometimes even the incorrect language code. Version 0.8.1 ------------- (released on July 2nd 2007) - `default_locale()` would fail when the value of the `LANGUAGE` environment variable contained multiple language codes separated by colon, as is explicitly allowed by the GNU gettext tools. As the `default_locale()` function is called at the module level in some modules, this bug would completely break importing these modules on systems where `LANGUAGE` is set that way. - The character set specified in PO template files is now respected when creating new catalog files based on that template. This allows the use of characters outside the ASCII range in POT files (:trac:`17`). - The default ordering of messages in generated POT files, which is based on the order those messages are found when walking the source tree, is no longer subject to differences between platforms; directory and file names are now always sorted alphabetically. - The Python message extractor now respects the special encoding comment to be able to handle files containing non-ASCII characters (:trac:`23`). - Added ``N_`` (gettext noop) to the extractor's default keywords. - Made locale string parsing more robust, and also take the script part into account (:trac:`27`). - Added a function to list all locales for which locale data is available. - Added a command-line option to the `pybabel` command which prints out all available locales (:trac:`24`). - The name of the command-line script has been changed from just `babel` to `pybabel` to avoid a conflict with the OpenBabel project (:trac:`34`). Version 0.8 ----------- (released on June 20th 2007) - First public release babel-2.17.0/CONTRIBUTING.md000066400000000000000000000041211474743505200151100ustar00rootroot00000000000000# Babel Contribution Guidelines Welcome to Babel! These guidelines will give you a short overview over how we handle issues and PRs in this repository. Note that they are preliminary and still need proper phrasing - if you'd like to help - be sure to make a PR. Please know that we do appreciate all contributions - bug reports as well as Pull Requests. ## Setting up a development environment and running tests After you've cloned the repository, 1. Set up a Python virtualenv (the methods vary depending on tooling and operating system) and activate it. 2. Install Babel in editable mode with development dependencies: `pip install -e .[dev]` 3. Run `make import-cldr` to import the CLDR database. This will download the CLDR database and convert it to a format that Babel can use. 4. Run `make test` to run the tests. You can also run e.g. `pytest --cov babel .` to run the tests with coverage reporting enabled. You can also use [Tox][tox] to run the tests in separate virtualenvs for all supported Python versions; a `tox.ini` configuration (which is what the CI process uses) is included in the repository. ## On pull requests ### PR Merge Criteria For a PR to be merged, the following statements must hold true: - All CI services pass. (Windows build, linux build, sufficient test coverage.) - All commits must have been reviewed and approved by a babel maintainer who is not the author of the PR. Commits shall comply to the "Good Commits" standards outlined below. To begin contributing have a look at the open [easy issues](https://github.com/python-babel/babel/issues?q=is%3Aopen+is%3Aissue+label%3Adifficulty%2Flow) which could be fixed. ### Correcting PRs Rebasing PRs is preferred over merging master into the source branches again and again cluttering our history. If a reviewer has suggestions, the commit shall be amended so the history is not cluttered by "fixup commits". ### Writing Good Commits Please see https://api.coala.io/en/latest/Developers/Writing_Good_Commits.html for guidelines on how to write good commits and proper commit messages. [tox]: https://tox.wiki/en/latest/ babel-2.17.0/LICENSE000066400000000000000000000027731474743505200136770ustar00rootroot00000000000000Copyright (c) 2013-2025 by the Babel Team, see AUTHORS for more information. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. babel-2.17.0/MANIFEST.in000066400000000000000000000005061474743505200144200ustar00rootroot00000000000000include Makefile CHANGES.rst LICENSE AUTHORS include conftest.py tox.ini include babel/global.dat include babel/locale-data/*.dat include babel/locale-data/LICENSE* recursive-include docs * recursive-exclude docs/_build * include scripts/* recursive-include tests * recursive-exclude tests *.pyc recursive-exclude tests *.pyo babel-2.17.0/Makefile000066400000000000000000000007161474743505200143250ustar00rootroot00000000000000test: import-cldr python ${PYTHON_TEST_FLAGS} -m pytest ${PYTEST_FLAGS} clean: clean-cldr clean-pyc import-cldr: python scripts/download_import_cldr.py clean-cldr: rm -f babel/locale-data/*.dat rm -f babel/global.dat clean-pyc: find . -name '*.pyc' -exec rm {} \; find . -name '__pycache__' -type d | xargs rm -rf develop: pip install --editable . tox-test: tox .PHONY: test develop tox-test clean-pyc clean-cldr import-cldr clean standalone-test babel-2.17.0/README.rst000066400000000000000000000014761474743505200143600ustar00rootroot00000000000000About Babel =========== Babel is a Python library that provides an integrated collection of utilities that assist with internationalizing and localizing Python applications (in particular web-based applications.) Details can be found in the HTML files in the ``docs`` folder. For more information please visit the Babel web site: https://babel.pocoo.org/ Join the chat at https://gitter.im/python-babel/babel Contributing to Babel ===================== If you want to contribute code to Babel, please take a look at our `CONTRIBUTING.md `__. If you know your way around Babels codebase a bit and like to help further, we would appreciate any help in reviewing pull requests. Please contact us at https://gitter.im/python-babel/babel if you're interested! babel-2.17.0/babel/000077500000000000000000000000001474743505200137265ustar00rootroot00000000000000babel-2.17.0/babel/__init__.py000066400000000000000000000015621474743505200160430ustar00rootroot00000000000000""" babel ~~~~~ Integrated collection of utilities that assist in internationalizing and localizing applications. This package is basically composed of two major parts: * tools to build and work with ``gettext`` message catalogs * a Python interface to the CLDR (Common Locale Data Repository), providing access to various locale display names, localized number and date formatting, etc. :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ from babel.core import ( Locale, UnknownLocaleError, default_locale, get_locale_identifier, negotiate_locale, parse_locale, ) __version__ = '2.17.0' __all__ = [ 'Locale', 'UnknownLocaleError', '__version__', 'default_locale', 'get_locale_identifier', 'negotiate_locale', 'parse_locale', ] babel-2.17.0/babel/core.py000066400000000000000000001304731474743505200152400ustar00rootroot00000000000000""" babel.core ~~~~~~~~~~ Core locale representation and locale data access. :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import os import pickle from collections.abc import Iterable, Mapping from typing import TYPE_CHECKING, Any, Literal from babel import localedata from babel.plural import PluralRule __all__ = [ 'Locale', 'UnknownLocaleError', 'default_locale', 'get_global', 'get_locale_identifier', 'negotiate_locale', 'parse_locale', ] if TYPE_CHECKING: from typing_extensions import TypeAlias _GLOBAL_KEY: TypeAlias = Literal[ "all_currencies", "currency_fractions", "language_aliases", "likely_subtags", "meta_zones", "parent_exceptions", "script_aliases", "territory_aliases", "territory_currencies", "territory_languages", "territory_zones", "variant_aliases", "windows_zone_mapping", "zone_aliases", "zone_territories", ] _global_data: Mapping[_GLOBAL_KEY, Mapping[str, Any]] | None _global_data = None _default_plural_rule = PluralRule({}) def _raise_no_data_error(): raise RuntimeError('The babel data files are not available. ' 'This usually happens because you are using ' 'a source checkout from Babel and you did ' 'not build the data files. Just make sure ' 'to run "python setup.py import_cldr" before ' 'installing the library.') def get_global(key: _GLOBAL_KEY) -> Mapping[str, Any]: """Return the dictionary for the given key in the global data. The global data is stored in the ``babel/global.dat`` file and contains information independent of individual locales. >>> get_global('zone_aliases')['UTC'] u'Etc/UTC' >>> get_global('zone_territories')['Europe/Berlin'] u'DE' The keys available are: - ``all_currencies`` - ``currency_fractions`` - ``language_aliases`` - ``likely_subtags`` - ``parent_exceptions`` - ``script_aliases`` - ``territory_aliases`` - ``territory_currencies`` - ``territory_languages`` - ``territory_zones`` - ``variant_aliases`` - ``windows_zone_mapping`` - ``zone_aliases`` - ``zone_territories`` .. note:: The internal structure of the data may change between versions. .. versionadded:: 0.9 :param key: the data key """ global _global_data if _global_data is None: dirname = os.path.join(os.path.dirname(__file__)) filename = os.path.join(dirname, 'global.dat') if not os.path.isfile(filename): _raise_no_data_error() with open(filename, 'rb') as fileobj: _global_data = pickle.load(fileobj) assert _global_data is not None return _global_data.get(key, {}) LOCALE_ALIASES = { 'ar': 'ar_SY', 'bg': 'bg_BG', 'bs': 'bs_BA', 'ca': 'ca_ES', 'cs': 'cs_CZ', 'da': 'da_DK', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES', 'et': 'et_EE', 'fa': 'fa_IR', 'fi': 'fi_FI', 'fr': 'fr_FR', 'gl': 'gl_ES', 'he': 'he_IL', 'hu': 'hu_HU', 'id': 'id_ID', 'is': 'is_IS', 'it': 'it_IT', 'ja': 'ja_JP', 'km': 'km_KH', 'ko': 'ko_KR', 'lt': 'lt_LT', 'lv': 'lv_LV', 'mk': 'mk_MK', 'nl': 'nl_NL', 'nn': 'nn_NO', 'no': 'nb_NO', 'pl': 'pl_PL', 'pt': 'pt_PT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'sk': 'sk_SK', 'sl': 'sl_SI', 'sv': 'sv_SE', 'th': 'th_TH', 'tr': 'tr_TR', 'uk': 'uk_UA', } class UnknownLocaleError(Exception): """Exception thrown when a locale is requested for which no locale data is available. """ def __init__(self, identifier: str) -> None: """Create the exception. :param identifier: the identifier string of the unsupported locale """ Exception.__init__(self, f"unknown locale {identifier!r}") #: The identifier of the locale that could not be found. self.identifier = identifier class Locale: """Representation of a specific locale. >>> locale = Locale('en', 'US') >>> repr(locale) "Locale('en', territory='US')" >>> locale.display_name u'English (United States)' A `Locale` object can also be instantiated from a raw locale string: >>> locale = Locale.parse('en-US', sep='-') >>> repr(locale) "Locale('en', territory='US')" `Locale` objects provide access to a collection of locale data, such as territory and language names, number and date format patterns, and more: >>> locale.number_symbols['latn']['decimal'] u'.' If a locale is requested for which no locale data is available, an `UnknownLocaleError` is raised: >>> Locale.parse('en_XX') Traceback (most recent call last): ... UnknownLocaleError: unknown locale 'en_XX' For more information see :rfc:`3066`. """ def __init__( self, language: str, territory: str | None = None, script: str | None = None, variant: str | None = None, modifier: str | None = None, ) -> None: """Initialize the locale object from the given identifier components. >>> locale = Locale('en', 'US') >>> locale.language 'en' >>> locale.territory 'US' :param language: the language code :param territory: the territory (country or region) code :param script: the script code :param variant: the variant code :param modifier: a modifier (following the '@' symbol, sometimes called '@variant') :raise `UnknownLocaleError`: if no locale data is available for the requested locale """ #: the language code self.language = language #: the territory (country or region) code self.territory = territory #: the script code self.script = script #: the variant code self.variant = variant #: the modifier self.modifier = modifier self.__data: localedata.LocaleDataDict | None = None identifier = str(self) identifier_without_modifier = identifier.partition('@')[0] if localedata.exists(identifier): self.__data_identifier = identifier elif localedata.exists(identifier_without_modifier): self.__data_identifier = identifier_without_modifier else: raise UnknownLocaleError(identifier) @classmethod def default(cls, category: str | None = None, aliases: Mapping[str, str] = LOCALE_ALIASES) -> Locale: """Return the system default locale for the specified category. >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LC_MESSAGES']: ... os.environ[name] = '' >>> os.environ['LANG'] = 'fr_FR.UTF-8' >>> Locale.default('LC_MESSAGES') Locale('fr', territory='FR') The following fallbacks to the variable are always considered: - ``LANGUAGE`` - ``LC_ALL`` - ``LC_CTYPE`` - ``LANG`` :param category: one of the ``LC_XXX`` environment variable names :param aliases: a dictionary of aliases for locale identifiers """ # XXX: use likely subtag expansion here instead of the # aliases dictionary. locale_string = default_locale(category, aliases=aliases) return cls.parse(locale_string) @classmethod def negotiate( cls, preferred: Iterable[str], available: Iterable[str], sep: str = '_', aliases: Mapping[str, str] = LOCALE_ALIASES, ) -> Locale | None: """Find the best match between available and requested locale strings. >>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT']) Locale('de', territory='DE') >>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de']) Locale('de') >>> Locale.negotiate(['de_DE', 'de'], ['en_US']) You can specify the character used in the locale identifiers to separate the different components. This separator is applied to both lists. Also, case is ignored in the comparison: >>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-') Locale('de', territory='DE') :param preferred: the list of locale identifiers preferred by the user :param available: the list of locale identifiers available :param aliases: a dictionary of aliases for locale identifiers :param sep: separator for parsing; e.g. Windows tends to use '-' instead of '_'. """ identifier = negotiate_locale(preferred, available, sep=sep, aliases=aliases) if identifier: return Locale.parse(identifier, sep=sep) return None @classmethod def parse( cls, identifier: Locale | str | None, sep: str = '_', resolve_likely_subtags: bool = True, ) -> Locale: """Create a `Locale` instance for the given locale identifier. >>> l = Locale.parse('de-DE', sep='-') >>> l.display_name u'Deutsch (Deutschland)' If the `identifier` parameter is not a string, but actually a `Locale` object, that object is returned: >>> Locale.parse(l) Locale('de', territory='DE') If the `identifier` parameter is neither of these, such as `None` or an empty string, e.g. because a default locale identifier could not be determined, a `TypeError` is raised: >>> Locale.parse(None) Traceback (most recent call last): ... TypeError: ... This also can perform resolving of likely subtags which it does by default. This is for instance useful to figure out the most likely locale for a territory you can use ``'und'`` as the language tag: >>> Locale.parse('und_AT') Locale('de', territory='AT') Modifiers are optional, and always at the end, separated by "@": >>> Locale.parse('de_AT@euro') Locale('de', territory='AT', modifier='euro') :param identifier: the locale identifier string :param sep: optional component separator :param resolve_likely_subtags: if this is specified then a locale will have its likely subtag resolved if the locale otherwise does not exist. For instance ``zh_TW`` by itself is not a locale that exists but Babel can automatically expand it to the full form of ``zh_hant_TW``. Note that this expansion is only taking place if no locale exists otherwise. For instance there is a locale ``en`` that can exist by itself. :raise `ValueError`: if the string does not appear to be a valid locale identifier :raise `UnknownLocaleError`: if no locale data is available for the requested locale :raise `TypeError`: if the identifier is not a string or a `Locale` :raise `ValueError`: if the identifier is not a valid string """ if isinstance(identifier, Locale): return identifier if not identifier: msg = ( f"Empty locale identifier value: {identifier!r}\n\n" f"If you didn't explicitly pass an empty value to a Babel function, " f"this could be caused by there being no suitable locale environment " f"variables for the API you tried to use.", ) if isinstance(identifier, str): raise ValueError(msg) # `parse_locale` would raise a ValueError, so let's do that here raise TypeError(msg) if not isinstance(identifier, str): raise TypeError(f"Unexpected value for identifier: {identifier!r}") parts = parse_locale(identifier, sep=sep) input_id = get_locale_identifier(parts) def _try_load(parts): try: return cls(*parts) except UnknownLocaleError: return None def _try_load_reducing(parts): # Success on first hit, return it. locale = _try_load(parts) if locale is not None: return locale # Now try without script and variant locale = _try_load(parts[:2]) if locale is not None: return locale locale = _try_load(parts) if locale is not None: return locale if not resolve_likely_subtags: raise UnknownLocaleError(input_id) # From here onwards is some very bad likely subtag resolving. This # whole logic is not entirely correct but good enough (tm) for the # time being. This has been added so that zh_TW does not cause # errors for people when they upgrade. Later we should properly # implement ICU like fuzzy locale objects and provide a way to # maximize and minimize locale tags. if len(parts) == 5: language, territory, script, variant, modifier = parts else: language, territory, script, variant = parts modifier = None language = get_global('language_aliases').get(language, language) territory = get_global('territory_aliases').get(territory or '', (territory,))[0] script = get_global('script_aliases').get(script or '', script) variant = get_global('variant_aliases').get(variant or '', variant) if territory == 'ZZ': territory = None if script == 'Zzzz': script = None parts = language, territory, script, variant, modifier # First match: try the whole identifier new_id = get_locale_identifier(parts) likely_subtag = get_global('likely_subtags').get(new_id) if likely_subtag is not None: locale = _try_load_reducing(parse_locale(likely_subtag)) if locale is not None: return locale # If we did not find anything so far, try again with a # simplified identifier that is just the language likely_subtag = get_global('likely_subtags').get(language) if likely_subtag is not None: parts2 = parse_locale(likely_subtag) if len(parts2) == 5: language2, _, script2, variant2, modifier2 = parts2 else: language2, _, script2, variant2 = parts2 modifier2 = None locale = _try_load_reducing((language2, territory, script2, variant2, modifier2)) if locale is not None: return locale raise UnknownLocaleError(input_id) def __eq__(self, other: object) -> bool: for key in ('language', 'territory', 'script', 'variant', 'modifier'): if not hasattr(other, key): return False return ( self.language == getattr(other, 'language') and # noqa: B009 self.territory == getattr(other, 'territory') and # noqa: B009 self.script == getattr(other, 'script') and # noqa: B009 self.variant == getattr(other, 'variant') and # noqa: B009 self.modifier == getattr(other, 'modifier') # noqa: B009 ) def __ne__(self, other: object) -> bool: return not self.__eq__(other) def __hash__(self) -> int: return hash((self.language, self.territory, self.script, self.variant, self.modifier)) def __repr__(self) -> str: parameters = [''] for key in ('territory', 'script', 'variant', 'modifier'): value = getattr(self, key) if value is not None: parameters.append(f"{key}={value!r}") return f"Locale({self.language!r}{', '.join(parameters)})" def __str__(self) -> str: return get_locale_identifier((self.language, self.territory, self.script, self.variant, self.modifier)) @property def _data(self) -> localedata.LocaleDataDict: if self.__data is None: self.__data = localedata.LocaleDataDict(localedata.load(self.__data_identifier)) return self.__data def get_display_name(self, locale: Locale | str | None = None) -> str | None: """Return the display name of the locale using the given locale. The display name will include the language, territory, script, and variant, if those are specified. >>> Locale('zh', 'CN', script='Hans').get_display_name('en') u'Chinese (Simplified, China)' Modifiers are currently passed through verbatim: >>> Locale('it', 'IT', modifier='euro').get_display_name('en') u'Italian (Italy, euro)' :param locale: the locale to use """ if locale is None: locale = self locale = Locale.parse(locale) retval = locale.languages.get(self.language) if retval and (self.territory or self.script or self.variant): details = [] if self.script: details.append(locale.scripts.get(self.script)) if self.territory: details.append(locale.territories.get(self.territory)) if self.variant: details.append(locale.variants.get(self.variant)) if self.modifier: details.append(self.modifier) detail_string = ', '.join(atom for atom in details if atom) if detail_string: retval += f" ({detail_string})" return retval display_name = property(get_display_name, doc="""\ The localized display name of the locale. >>> Locale('en').display_name u'English' >>> Locale('en', 'US').display_name u'English (United States)' >>> Locale('sv').display_name u'svenska' :type: `unicode` """) def get_language_name(self, locale: Locale | str | None = None) -> str | None: """Return the language of this locale in the given locale. >>> Locale('zh', 'CN', script='Hans').get_language_name('de') u'Chinesisch' .. versionadded:: 1.0 :param locale: the locale to use """ if locale is None: locale = self locale = Locale.parse(locale) return locale.languages.get(self.language) language_name = property(get_language_name, doc="""\ The localized language name of the locale. >>> Locale('en', 'US').language_name u'English' """) def get_territory_name(self, locale: Locale | str | None = None) -> str | None: """Return the territory name in the given locale.""" if locale is None: locale = self locale = Locale.parse(locale) return locale.territories.get(self.territory or '') territory_name = property(get_territory_name, doc="""\ The localized territory name of the locale if available. >>> Locale('de', 'DE').territory_name u'Deutschland' """) def get_script_name(self, locale: Locale | str | None = None) -> str | None: """Return the script name in the given locale.""" if locale is None: locale = self locale = Locale.parse(locale) return locale.scripts.get(self.script or '') script_name = property(get_script_name, doc="""\ The localized script name of the locale if available. >>> Locale('sr', 'ME', script='Latn').script_name u'latinica' """) @property def english_name(self) -> str | None: """The english display name of the locale. >>> Locale('de').english_name u'German' >>> Locale('de', 'DE').english_name u'German (Germany)' :type: `unicode`""" return self.get_display_name(Locale('en')) # { General Locale Display Names @property def languages(self) -> localedata.LocaleDataDict: """Mapping of language codes to translated language names. >>> Locale('de', 'DE').languages['ja'] u'Japanisch' See `ISO 639 `_ for more information. """ return self._data['languages'] @property def scripts(self) -> localedata.LocaleDataDict: """Mapping of script codes to translated script names. >>> Locale('en', 'US').scripts['Hira'] u'Hiragana' See `ISO 15924 `_ for more information. """ return self._data['scripts'] @property def territories(self) -> localedata.LocaleDataDict: """Mapping of script codes to translated script names. >>> Locale('es', 'CO').territories['DE'] u'Alemania' See `ISO 3166 `_ for more information. """ return self._data['territories'] @property def variants(self) -> localedata.LocaleDataDict: """Mapping of script codes to translated script names. >>> Locale('de', 'DE').variants['1901'] u'Alte deutsche Rechtschreibung' """ return self._data['variants'] # { Number Formatting @property def currencies(self) -> localedata.LocaleDataDict: """Mapping of currency codes to translated currency names. This only returns the generic form of the currency name, not the count specific one. If an actual number is requested use the :func:`babel.numbers.get_currency_name` function. >>> Locale('en').currencies['COP'] u'Colombian Peso' >>> Locale('de', 'DE').currencies['COP'] u'Kolumbianischer Peso' """ return self._data['currency_names'] @property def currency_symbols(self) -> localedata.LocaleDataDict: """Mapping of currency codes to symbols. >>> Locale('en', 'US').currency_symbols['USD'] u'$' >>> Locale('es', 'CO').currency_symbols['USD'] u'US$' """ return self._data['currency_symbols'] @property def number_symbols(self) -> localedata.LocaleDataDict: """Symbols used in number formatting by number system. .. note:: The format of the value returned may change between Babel versions. >>> Locale('fr', 'FR').number_symbols["latn"]['decimal'] u',' >>> Locale('fa', 'IR').number_symbols["arabext"]['decimal'] u'٫' >>> Locale('fa', 'IR').number_symbols["latn"]['decimal'] u'.' """ return self._data['number_symbols'] @property def other_numbering_systems(self) -> localedata.LocaleDataDict: """ Mapping of other numbering systems available for the locale. See: https://www.unicode.org/reports/tr35/tr35-numbers.html#otherNumberingSystems >>> Locale('el', 'GR').other_numbering_systems['traditional'] u'grek' .. note:: The format of the value returned may change between Babel versions. """ return self._data['numbering_systems'] @property def default_numbering_system(self) -> str: """The default numbering system used by the locale. >>> Locale('el', 'GR').default_numbering_system u'latn' """ return self._data['default_numbering_system'] @property def decimal_formats(self) -> localedata.LocaleDataDict: """Locale patterns for decimal number formatting. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').decimal_formats[None] """ return self._data['decimal_formats'] @property def compact_decimal_formats(self) -> localedata.LocaleDataDict: """Locale patterns for compact decimal number formatting. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').compact_decimal_formats["short"]["one"]["1000"] """ return self._data['compact_decimal_formats'] @property def currency_formats(self) -> localedata.LocaleDataDict: """Locale patterns for currency number formatting. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').currency_formats['standard'] >>> Locale('en', 'US').currency_formats['accounting'] """ return self._data['currency_formats'] @property def compact_currency_formats(self) -> localedata.LocaleDataDict: """Locale patterns for compact currency number formatting. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').compact_currency_formats["short"]["one"]["1000"] """ return self._data['compact_currency_formats'] @property def percent_formats(self) -> localedata.LocaleDataDict: """Locale patterns for percent number formatting. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').percent_formats[None] """ return self._data['percent_formats'] @property def scientific_formats(self) -> localedata.LocaleDataDict: """Locale patterns for scientific number formatting. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').scientific_formats[None] """ return self._data['scientific_formats'] # { Calendar Information and Date Formatting @property def periods(self) -> localedata.LocaleDataDict: """Locale display names for day periods (AM/PM). >>> Locale('en', 'US').periods['am'] u'AM' """ try: return self._data['day_periods']['stand-alone']['wide'] except KeyError: return localedata.LocaleDataDict({}) # pragma: no cover @property def day_periods(self) -> localedata.LocaleDataDict: """Locale display names for various day periods (not necessarily only AM/PM). These are not meant to be used without the relevant `day_period_rules`. """ return self._data['day_periods'] @property def day_period_rules(self) -> localedata.LocaleDataDict: """Day period rules for the locale. Used by `get_period_id`. """ return self._data.get('day_period_rules', localedata.LocaleDataDict({})) @property def days(self) -> localedata.LocaleDataDict: """Locale display names for weekdays. >>> Locale('de', 'DE').days['format']['wide'][3] u'Donnerstag' """ return self._data['days'] @property def months(self) -> localedata.LocaleDataDict: """Locale display names for months. >>> Locale('de', 'DE').months['format']['wide'][10] u'Oktober' """ return self._data['months'] @property def quarters(self) -> localedata.LocaleDataDict: """Locale display names for quarters. >>> Locale('de', 'DE').quarters['format']['wide'][1] u'1. Quartal' """ return self._data['quarters'] @property def eras(self) -> localedata.LocaleDataDict: """Locale display names for eras. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').eras['wide'][1] u'Anno Domini' >>> Locale('en', 'US').eras['abbreviated'][0] u'BC' """ return self._data['eras'] @property def time_zones(self) -> localedata.LocaleDataDict: """Locale display names for time zones. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight'] u'British Summer Time' >>> Locale('en', 'US').time_zones['America/St_Johns']['city'] u'St. John\u2019s' """ return self._data['time_zones'] @property def meta_zones(self) -> localedata.LocaleDataDict: """Locale display names for meta time zones. Meta time zones are basically groups of different Olson time zones that have the same GMT offset and daylight savings time. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight'] u'Central European Summer Time' .. versionadded:: 0.9 """ return self._data['meta_zones'] @property def zone_formats(self) -> localedata.LocaleDataDict: """Patterns related to the formatting of time zones. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').zone_formats['fallback'] u'%(1)s (%(0)s)' >>> Locale('pt', 'BR').zone_formats['region'] u'Hor\\xe1rio %s' .. versionadded:: 0.9 """ return self._data['zone_formats'] @property def first_week_day(self) -> int: """The first day of a week, with 0 being Monday. >>> Locale('de', 'DE').first_week_day 0 >>> Locale('en', 'US').first_week_day 6 """ return self._data['week_data']['first_day'] @property def weekend_start(self) -> int: """The day the weekend starts, with 0 being Monday. >>> Locale('de', 'DE').weekend_start 5 """ return self._data['week_data']['weekend_start'] @property def weekend_end(self) -> int: """The day the weekend ends, with 0 being Monday. >>> Locale('de', 'DE').weekend_end 6 """ return self._data['week_data']['weekend_end'] @property def min_week_days(self) -> int: """The minimum number of days in a week so that the week is counted as the first week of a year or month. >>> Locale('de', 'DE').min_week_days 4 """ return self._data['week_data']['min_days'] @property def date_formats(self) -> localedata.LocaleDataDict: """Locale patterns for date formatting. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').date_formats['short'] >>> Locale('fr', 'FR').date_formats['long'] """ return self._data['date_formats'] @property def time_formats(self) -> localedata.LocaleDataDict: """Locale patterns for time formatting. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en', 'US').time_formats['short'] >>> Locale('fr', 'FR').time_formats['long'] """ return self._data['time_formats'] @property def datetime_formats(self) -> localedata.LocaleDataDict: """Locale patterns for datetime formatting. .. note:: The format of the value returned may change between Babel versions. >>> Locale('en').datetime_formats['full'] u'{1}, {0}' >>> Locale('th').datetime_formats['medium'] u'{1} {0}' """ return self._data['datetime_formats'] @property def datetime_skeletons(self) -> localedata.LocaleDataDict: """Locale patterns for formatting parts of a datetime. >>> Locale('en').datetime_skeletons['MEd'] >>> Locale('fr').datetime_skeletons['MEd'] >>> Locale('fr').datetime_skeletons['H'] """ return self._data['datetime_skeletons'] @property def interval_formats(self) -> localedata.LocaleDataDict: """Locale patterns for interval formatting. .. note:: The format of the value returned may change between Babel versions. How to format date intervals in Finnish when the day is the smallest changing component: >>> Locale('fi_FI').interval_formats['MEd']['d'] [u'E d.\u2009\u2013\u2009', u'E d.M.'] .. seealso:: The primary API to use this data is :py:func:`babel.dates.format_interval`. :rtype: dict[str, dict[str, list[str]]] """ return self._data['interval_formats'] @property def plural_form(self) -> PluralRule: """Plural rules for the locale. >>> Locale('en').plural_form(1) 'one' >>> Locale('en').plural_form(0) 'other' >>> Locale('fr').plural_form(0) 'one' >>> Locale('ru').plural_form(100) 'many' """ return self._data.get('plural_form', _default_plural_rule) @property def list_patterns(self) -> localedata.LocaleDataDict: """Patterns for generating lists .. note:: The format of the value returned may change between Babel versions. >>> Locale('en').list_patterns['standard']['start'] u'{0}, {1}' >>> Locale('en').list_patterns['standard']['end'] u'{0}, and {1}' >>> Locale('en_GB').list_patterns['standard']['end'] u'{0} and {1}' """ return self._data['list_patterns'] @property def ordinal_form(self) -> PluralRule: """Plural rules for the locale. >>> Locale('en').ordinal_form(1) 'one' >>> Locale('en').ordinal_form(2) 'two' >>> Locale('en').ordinal_form(3) 'few' >>> Locale('fr').ordinal_form(2) 'other' >>> Locale('ru').ordinal_form(100) 'other' """ return self._data.get('ordinal_form', _default_plural_rule) @property def measurement_systems(self) -> localedata.LocaleDataDict: """Localized names for various measurement systems. >>> Locale('fr', 'FR').measurement_systems['US'] u'am\\xe9ricain' >>> Locale('en', 'US').measurement_systems['US'] u'US' """ return self._data['measurement_systems'] @property def character_order(self) -> str: """The text direction for the language. >>> Locale('de', 'DE').character_order 'left-to-right' >>> Locale('ar', 'SA').character_order 'right-to-left' """ return self._data['character_order'] @property def text_direction(self) -> str: """The text direction for the language in CSS short-hand form. >>> Locale('de', 'DE').text_direction 'ltr' >>> Locale('ar', 'SA').text_direction 'rtl' """ return ''.join(word[0] for word in self.character_order.split('-')) @property def unit_display_names(self) -> localedata.LocaleDataDict: """Display names for units of measurement. .. seealso:: You may want to use :py:func:`babel.units.get_unit_name` instead. .. note:: The format of the value returned may change between Babel versions. """ return self._data['unit_display_names'] def default_locale( category: str | tuple[str, ...] | list[str] | None = None, aliases: Mapping[str, str] = LOCALE_ALIASES, ) -> str | None: """Returns the system default locale for a given category, based on environment variables. >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']: ... os.environ[name] = '' >>> os.environ['LANG'] = 'fr_FR.UTF-8' >>> default_locale('LC_MESSAGES') 'fr_FR' The "C" or "POSIX" pseudo-locales are treated as aliases for the "en_US_POSIX" locale: >>> os.environ['LC_MESSAGES'] = 'POSIX' >>> default_locale('LC_MESSAGES') 'en_US_POSIX' The following fallbacks to the variable are always considered: - ``LANGUAGE`` - ``LC_ALL`` - ``LC_CTYPE`` - ``LANG`` :param category: one or more of the ``LC_XXX`` environment variable names :param aliases: a dictionary of aliases for locale identifiers """ varnames = ('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG') if category: if isinstance(category, str): varnames = (category, *varnames) elif isinstance(category, (list, tuple)): varnames = (*category, *varnames) else: raise TypeError(f"Invalid type for category: {category!r}") for name in varnames: if not name: continue locale = os.getenv(name) if locale: if name == 'LANGUAGE' and ':' in locale: # the LANGUAGE variable may contain a colon-separated list of # language codes; we just pick the language on the list locale = locale.split(':')[0] if locale.split('.')[0] in ('C', 'POSIX'): locale = 'en_US_POSIX' elif aliases and locale in aliases: locale = aliases[locale] try: return get_locale_identifier(parse_locale(locale)) except ValueError: pass return None def negotiate_locale(preferred: Iterable[str], available: Iterable[str], sep: str = '_', aliases: Mapping[str, str] = LOCALE_ALIASES) -> str | None: """Find the best match between available and requested locale strings. >>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT']) 'de_DE' >>> negotiate_locale(['de_DE', 'en_US'], ['en', 'de']) 'de' Case is ignored by the algorithm, the result uses the case of the preferred locale identifier: >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at']) 'de_DE' >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at']) 'de_DE' By default, some web browsers unfortunately do not include the territory in the locale identifier for many locales, and some don't even allow the user to easily add the territory. So while you may prefer using qualified locale identifiers in your web-application, they would not normally match the language-only locale sent by such browsers. To workaround that, this function uses a default mapping of commonly used language-only locale identifiers to identifiers including the territory: >>> negotiate_locale(['ja', 'en_US'], ['ja_JP', 'en_US']) 'ja_JP' Some browsers even use an incorrect or outdated language code, such as "no" for Norwegian, where the correct locale identifier would actually be "nb_NO" (Bokmål) or "nn_NO" (Nynorsk). The aliases are intended to take care of such cases, too: >>> negotiate_locale(['no', 'sv'], ['nb_NO', 'sv_SE']) 'nb_NO' You can override this default mapping by passing a different `aliases` dictionary to this function, or you can bypass the behavior althogher by setting the `aliases` parameter to `None`. :param preferred: the list of locale strings preferred by the user :param available: the list of locale strings available :param sep: character that separates the different parts of the locale strings :param aliases: a dictionary of aliases for locale identifiers """ available = [a.lower() for a in available if a] for locale in preferred: ll = locale.lower() if ll in available: return locale if aliases: alias = aliases.get(ll) if alias: alias = alias.replace('_', sep) if alias.lower() in available: return alias parts = locale.split(sep) if len(parts) > 1 and parts[0].lower() in available: return parts[0] return None def parse_locale( identifier: str, sep: str = '_', ) -> tuple[str, str | None, str | None, str | None] | tuple[str, str | None, str | None, str | None, str | None]: """Parse a locale identifier into a tuple of the form ``(language, territory, script, variant, modifier)``. >>> parse_locale('zh_CN') ('zh', 'CN', None, None) >>> parse_locale('zh_Hans_CN') ('zh', 'CN', 'Hans', None) >>> parse_locale('ca_es_valencia') ('ca', 'ES', None, 'VALENCIA') >>> parse_locale('en_150') ('en', '150', None, None) >>> parse_locale('en_us_posix') ('en', 'US', None, 'POSIX') >>> parse_locale('it_IT@euro') ('it', 'IT', None, None, 'euro') >>> parse_locale('it_IT@custom') ('it', 'IT', None, None, 'custom') >>> parse_locale('it_IT@') ('it', 'IT', None, None) The default component separator is "_", but a different separator can be specified using the `sep` parameter. The optional modifier is always separated with "@" and at the end: >>> parse_locale('zh-CN', sep='-') ('zh', 'CN', None, None) >>> parse_locale('zh-CN@custom', sep='-') ('zh', 'CN', None, None, 'custom') If the identifier cannot be parsed into a locale, a `ValueError` exception is raised: >>> parse_locale('not_a_LOCALE_String') Traceback (most recent call last): ... ValueError: 'not_a_LOCALE_String' is not a valid locale identifier Encoding information is removed from the identifier, while modifiers are kept: >>> parse_locale('en_US.UTF-8') ('en', 'US', None, None) >>> parse_locale('de_DE.iso885915@euro') ('de', 'DE', None, None, 'euro') See :rfc:`4646` for more information. :param identifier: the locale identifier string :param sep: character that separates the different components of the locale identifier :raise `ValueError`: if the string does not appear to be a valid locale identifier """ if not identifier: raise ValueError("empty locale identifier") identifier, _, modifier = identifier.partition('@') if '.' in identifier: # this is probably the charset/encoding, which we don't care about identifier = identifier.split('.', 1)[0] parts = identifier.split(sep) lang = parts.pop(0).lower() if not lang.isalpha(): raise ValueError(f"expected only letters, got {lang!r}") script = territory = variant = None if parts and len(parts[0]) == 4 and parts[0].isalpha(): script = parts.pop(0).title() if parts: if len(parts[0]) == 2 and parts[0].isalpha(): territory = parts.pop(0).upper() elif len(parts[0]) == 3 and parts[0].isdigit(): territory = parts.pop(0) if parts and ( len(parts[0]) == 4 and parts[0][0].isdigit() or len(parts[0]) >= 5 and parts[0][0].isalpha() ): variant = parts.pop().upper() if parts: raise ValueError(f"{identifier!r} is not a valid locale identifier") # TODO(3.0): always return a 5-tuple if modifier: return lang, territory, script, variant, modifier else: return lang, territory, script, variant def get_locale_identifier( tup: tuple[str] | tuple[str, str | None] | tuple[str, str | None, str | None] | tuple[str, str | None, str | None, str | None] | tuple[str, str | None, str | None, str | None, str | None], sep: str = "_", ) -> str: """The reverse of :func:`parse_locale`. It creates a locale identifier out of a ``(language, territory, script, variant, modifier)`` tuple. Items can be set to ``None`` and trailing ``None``\\s can also be left out of the tuple. >>> get_locale_identifier(('de', 'DE', None, '1999', 'custom')) 'de_DE_1999@custom' >>> get_locale_identifier(('fi', None, None, None, 'custom')) 'fi@custom' .. versionadded:: 1.0 :param tup: the tuple as returned by :func:`parse_locale`. :param sep: the separator for the identifier. """ tup = tuple(tup[:5]) # type: ignore # length should be no more than 5 lang, territory, script, variant, modifier = tup + (None,) * (5 - len(tup)) ret = sep.join(filter(None, (lang, script, territory, variant))) return f'{ret}@{modifier}' if modifier else ret babel-2.17.0/babel/dates.py000066400000000000000000002251321474743505200154050ustar00rootroot00000000000000""" babel.dates ~~~~~~~~~~~ Locale dependent formatting and parsing of dates and times. The default locale for the functions in this module is determined by the following environment variables, in that order: * ``LC_TIME``, * ``LC_ALL``, and * ``LANG`` :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import math import re import warnings from functools import lru_cache from typing import TYPE_CHECKING, Literal, SupportsInt try: import pytz except ModuleNotFoundError: pytz = None import zoneinfo import datetime from collections.abc import Iterable from babel import localtime from babel.core import Locale, default_locale, get_global from babel.localedata import LocaleDataDict if TYPE_CHECKING: from typing_extensions import TypeAlias _Instant: TypeAlias = datetime.date | datetime.time | float | None _PredefinedTimeFormat: TypeAlias = Literal['full', 'long', 'medium', 'short'] _Context: TypeAlias = Literal['format', 'stand-alone'] _DtOrTzinfo: TypeAlias = datetime.datetime | datetime.tzinfo | str | int | datetime.time | None # "If a given short metazone form is known NOT to be understood in a given # locale and the parent locale has this value such that it would normally # be inherited, the inheritance of this value can be explicitly disabled by # use of the 'no inheritance marker' as the value, which is 3 simultaneous [sic] # empty set characters ( U+2205 )." # - https://www.unicode.org/reports/tr35/tr35-dates.html#Metazone_Names NO_INHERITANCE_MARKER = '\u2205\u2205\u2205' UTC = datetime.timezone.utc LOCALTZ = localtime.LOCALTZ LC_TIME = default_locale('LC_TIME') def _localize(tz: datetime.tzinfo, dt: datetime.datetime) -> datetime.datetime: # Support localizing with both pytz and zoneinfo tzinfos # nothing to do if dt.tzinfo is tz: return dt if hasattr(tz, 'localize'): # pytz return tz.localize(dt) if dt.tzinfo is None: # convert naive to localized return dt.replace(tzinfo=tz) # convert timezones return dt.astimezone(tz) def _get_dt_and_tzinfo(dt_or_tzinfo: _DtOrTzinfo) -> tuple[datetime.datetime | None, datetime.tzinfo]: """ Parse a `dt_or_tzinfo` value into a datetime and a tzinfo. See the docs for this function's callers for semantics. :rtype: tuple[datetime, tzinfo] """ if dt_or_tzinfo is None: dt = datetime.datetime.now() tzinfo = LOCALTZ elif isinstance(dt_or_tzinfo, str): dt = None tzinfo = get_timezone(dt_or_tzinfo) elif isinstance(dt_or_tzinfo, int): dt = None tzinfo = UTC elif isinstance(dt_or_tzinfo, (datetime.datetime, datetime.time)): dt = _get_datetime(dt_or_tzinfo) tzinfo = dt.tzinfo if dt.tzinfo is not None else UTC else: dt = None tzinfo = dt_or_tzinfo return dt, tzinfo def _get_tz_name(dt_or_tzinfo: _DtOrTzinfo) -> str: """ Get the timezone name out of a time, datetime, or tzinfo object. :rtype: str """ dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo) if hasattr(tzinfo, 'zone'): # pytz object return tzinfo.zone elif hasattr(tzinfo, 'key') and tzinfo.key is not None: # ZoneInfo object return tzinfo.key else: return tzinfo.tzname(dt or datetime.datetime.now(UTC)) def _get_datetime(instant: _Instant) -> datetime.datetime: """ Get a datetime out of an "instant" (date, time, datetime, number). .. warning:: The return values of this function may depend on the system clock. If the instant is None, the current moment is used. If the instant is a time, it's augmented with today's date. Dates are converted to naive datetimes with midnight as the time component. >>> from datetime import date, datetime >>> _get_datetime(date(2015, 1, 1)) datetime.datetime(2015, 1, 1, 0, 0) UNIX timestamps are converted to datetimes. >>> _get_datetime(1400000000) datetime.datetime(2014, 5, 13, 16, 53, 20) Other values are passed through as-is. >>> x = datetime(2015, 1, 1) >>> _get_datetime(x) is x True :param instant: date, time, datetime, integer, float or None :type instant: date|time|datetime|int|float|None :return: a datetime :rtype: datetime """ if instant is None: return datetime.datetime.now(UTC).replace(tzinfo=None) elif isinstance(instant, (int, float)): return datetime.datetime.fromtimestamp(instant, UTC).replace(tzinfo=None) elif isinstance(instant, datetime.time): return datetime.datetime.combine(datetime.date.today(), instant) elif isinstance(instant, datetime.date) and not isinstance(instant, datetime.datetime): return datetime.datetime.combine(instant, datetime.time()) # TODO (3.x): Add an assertion/type check for this fallthrough branch: return instant def _ensure_datetime_tzinfo(dt: datetime.datetime, tzinfo: datetime.tzinfo | None = None) -> datetime.datetime: """ Ensure the datetime passed has an attached tzinfo. If the datetime is tz-naive to begin with, UTC is attached. If a tzinfo is passed in, the datetime is normalized to that timezone. >>> from datetime import datetime >>> _get_tz_name(_ensure_datetime_tzinfo(datetime(2015, 1, 1))) 'UTC' >>> tz = get_timezone("Europe/Stockholm") >>> _ensure_datetime_tzinfo(datetime(2015, 1, 1, 13, 15, tzinfo=UTC), tzinfo=tz).hour 14 :param datetime: Datetime to augment. :param tzinfo: optional tzinfo :return: datetime with tzinfo :rtype: datetime """ if dt.tzinfo is None: dt = dt.replace(tzinfo=UTC) if tzinfo is not None: dt = dt.astimezone(get_timezone(tzinfo)) if hasattr(tzinfo, 'normalize'): # pytz dt = tzinfo.normalize(dt) return dt def _get_time( time: datetime.time | datetime.datetime | None, tzinfo: datetime.tzinfo | None = None, ) -> datetime.time: """ Get a timezoned time from a given instant. .. warning:: The return values of this function may depend on the system clock. :param time: time, datetime or None :rtype: time """ if time is None: time = datetime.datetime.now(UTC) elif isinstance(time, (int, float)): time = datetime.datetime.fromtimestamp(time, UTC) if time.tzinfo is None: time = time.replace(tzinfo=UTC) if isinstance(time, datetime.datetime): if tzinfo is not None: time = time.astimezone(tzinfo) if hasattr(tzinfo, 'normalize'): # pytz time = tzinfo.normalize(time) time = time.timetz() elif tzinfo is not None: time = time.replace(tzinfo=tzinfo) return time def get_timezone(zone: str | datetime.tzinfo | None = None) -> datetime.tzinfo: """Looks up a timezone by name and returns it. The timezone object returned comes from ``pytz`` or ``zoneinfo``, whichever is available. It corresponds to the `tzinfo` interface and can be used with all of the functions of Babel that operate with dates. If a timezone is not known a :exc:`LookupError` is raised. If `zone` is ``None`` a local zone object is returned. :param zone: the name of the timezone to look up. If a timezone object itself is passed in, it's returned unchanged. """ if zone is None: return LOCALTZ if not isinstance(zone, str): return zone if pytz: try: return pytz.timezone(zone) except pytz.UnknownTimeZoneError as e: exc = e else: assert zoneinfo try: return zoneinfo.ZoneInfo(zone) except zoneinfo.ZoneInfoNotFoundError as e: exc = e raise LookupError(f"Unknown timezone {zone}") from exc def get_period_names( width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', context: _Context = 'stand-alone', locale: Locale | str | None = None, ) -> LocaleDataDict: """Return the names for day periods (AM/PM) used by the locale. >>> get_period_names(locale='en_US')['am'] u'AM' :param width: the width to use, one of "abbreviated", "narrow", or "wide" :param context: the context, either "format" or "stand-alone" :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. """ return Locale.parse(locale or LC_TIME).day_periods[context][width] def get_day_names( width: Literal['abbreviated', 'narrow', 'short', 'wide'] = 'wide', context: _Context = 'format', locale: Locale | str | None = None, ) -> LocaleDataDict: """Return the day names used by the locale for the specified format. >>> get_day_names('wide', locale='en_US')[1] u'Tuesday' >>> get_day_names('short', locale='en_US')[1] u'Tu' >>> get_day_names('abbreviated', locale='es')[1] u'mar' >>> get_day_names('narrow', context='stand-alone', locale='de_DE')[1] u'D' :param width: the width to use, one of "wide", "abbreviated", "short" or "narrow" :param context: the context, either "format" or "stand-alone" :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. """ return Locale.parse(locale or LC_TIME).days[context][width] def get_month_names( width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', context: _Context = 'format', locale: Locale | str | None = None, ) -> LocaleDataDict: """Return the month names used by the locale for the specified format. >>> get_month_names('wide', locale='en_US')[1] u'January' >>> get_month_names('abbreviated', locale='es')[1] u'ene' >>> get_month_names('narrow', context='stand-alone', locale='de_DE')[1] u'J' :param width: the width to use, one of "wide", "abbreviated", or "narrow" :param context: the context, either "format" or "stand-alone" :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. """ return Locale.parse(locale or LC_TIME).months[context][width] def get_quarter_names( width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', context: _Context = 'format', locale: Locale | str | None = None, ) -> LocaleDataDict: """Return the quarter names used by the locale for the specified format. >>> get_quarter_names('wide', locale='en_US')[1] u'1st quarter' >>> get_quarter_names('abbreviated', locale='de_DE')[1] u'Q1' >>> get_quarter_names('narrow', locale='de_DE')[1] u'1' :param width: the width to use, one of "wide", "abbreviated", or "narrow" :param context: the context, either "format" or "stand-alone" :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. """ return Locale.parse(locale or LC_TIME).quarters[context][width] def get_era_names( width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', locale: Locale | str | None = None, ) -> LocaleDataDict: """Return the era names used by the locale for the specified format. >>> get_era_names('wide', locale='en_US')[1] u'Anno Domini' >>> get_era_names('abbreviated', locale='de_DE')[1] u'n. Chr.' :param width: the width to use, either "wide", "abbreviated", or "narrow" :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. """ return Locale.parse(locale or LC_TIME).eras[width] def get_date_format( format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = None, ) -> DateTimePattern: """Return the date formatting patterns used by the locale for the specified format. >>> get_date_format(locale='en_US') >>> get_date_format('full', locale='de_DE') :param format: the format to use, one of "full", "long", "medium", or "short" :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. """ return Locale.parse(locale or LC_TIME).date_formats[format] def get_datetime_format( format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = None, ) -> DateTimePattern: """Return the datetime formatting patterns used by the locale for the specified format. >>> get_datetime_format(locale='en_US') u'{1}, {0}' :param format: the format to use, one of "full", "long", "medium", or "short" :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. """ patterns = Locale.parse(locale or LC_TIME).datetime_formats if format not in patterns: format = None return patterns[format] def get_time_format( format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = None, ) -> DateTimePattern: """Return the time formatting patterns used by the locale for the specified format. >>> get_time_format(locale='en_US') >>> get_time_format('full', locale='de_DE') :param format: the format to use, one of "full", "long", "medium", or "short" :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. """ return Locale.parse(locale or LC_TIME).time_formats[format] def get_timezone_gmt( datetime: _Instant = None, width: Literal['long', 'short', 'iso8601', 'iso8601_short'] = 'long', locale: Locale | str | None = None, return_z: bool = False, ) -> str: """Return the timezone associated with the given `datetime` object formatted as string indicating the offset from GMT. >>> from datetime import datetime >>> dt = datetime(2007, 4, 1, 15, 30) >>> get_timezone_gmt(dt, locale='en') u'GMT+00:00' >>> get_timezone_gmt(dt, locale='en', return_z=True) 'Z' >>> get_timezone_gmt(dt, locale='en', width='iso8601_short') u'+00' >>> tz = get_timezone('America/Los_Angeles') >>> dt = _localize(tz, datetime(2007, 4, 1, 15, 30)) >>> get_timezone_gmt(dt, locale='en') u'GMT-07:00' >>> get_timezone_gmt(dt, 'short', locale='en') u'-0700' >>> get_timezone_gmt(dt, locale='en', width='iso8601_short') u'-07' The long format depends on the locale, for example in France the acronym UTC string is used instead of GMT: >>> get_timezone_gmt(dt, 'long', locale='fr_FR') u'UTC-07:00' .. versionadded:: 0.9 :param datetime: the ``datetime`` object; if `None`, the current date and time in UTC is used :param width: either "long" or "short" or "iso8601" or "iso8601_short" :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. :param return_z: True or False; Function returns indicator "Z" when local time offset is 0 """ datetime = _ensure_datetime_tzinfo(_get_datetime(datetime)) locale = Locale.parse(locale or LC_TIME) offset = datetime.tzinfo.utcoffset(datetime) seconds = offset.days * 24 * 60 * 60 + offset.seconds hours, seconds = divmod(seconds, 3600) if return_z and hours == 0 and seconds == 0: return 'Z' elif seconds == 0 and width == 'iso8601_short': return '%+03d' % hours elif width == 'short' or width == 'iso8601_short': pattern = '%+03d%02d' elif width == 'iso8601': pattern = '%+03d:%02d' else: pattern = locale.zone_formats['gmt'] % '%+03d:%02d' return pattern % (hours, seconds // 60) def get_timezone_location( dt_or_tzinfo: _DtOrTzinfo = None, locale: Locale | str | None = None, return_city: bool = False, ) -> str: """Return a representation of the given timezone using "location format". The result depends on both the local display name of the country and the city associated with the time zone: >>> tz = get_timezone('America/St_Johns') >>> print(get_timezone_location(tz, locale='de_DE')) Kanada (St. John’s) (Ortszeit) >>> print(get_timezone_location(tz, locale='en')) Canada (St. John’s) Time >>> print(get_timezone_location(tz, locale='en', return_city=True)) St. John’s >>> tz = get_timezone('America/Mexico_City') >>> get_timezone_location(tz, locale='de_DE') u'Mexiko (Mexiko-Stadt) (Ortszeit)' If the timezone is associated with a country that uses only a single timezone, just the localized country name is returned: >>> tz = get_timezone('Europe/Berlin') >>> get_timezone_name(tz, locale='de_DE') u'Mitteleurop\\xe4ische Zeit' .. versionadded:: 0.9 :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines the timezone; if `None`, the current date and time in UTC is assumed :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. :param return_city: True or False, if True then return exemplar city (location) for the time zone :return: the localized timezone name using location format """ locale = Locale.parse(locale or LC_TIME) zone = _get_tz_name(dt_or_tzinfo) # Get the canonical time-zone code zone = get_global('zone_aliases').get(zone, zone) info = locale.time_zones.get(zone, {}) # Otherwise, if there is only one timezone for the country, return the # localized country name region_format = locale.zone_formats['region'] territory = get_global('zone_territories').get(zone) if territory not in locale.territories: territory = 'ZZ' # invalid/unknown territory_name = locale.territories[territory] if not return_city and territory and len(get_global('territory_zones').get(territory, [])) == 1: return region_format % territory_name # Otherwise, include the city in the output fallback_format = locale.zone_formats['fallback'] if 'city' in info: city_name = info['city'] else: metazone = get_global('meta_zones').get(zone) metazone_info = locale.meta_zones.get(metazone, {}) if 'city' in metazone_info: city_name = metazone_info['city'] elif '/' in zone: city_name = zone.split('/', 1)[1].replace('_', ' ') else: city_name = zone.replace('_', ' ') if return_city: return city_name return region_format % (fallback_format % { '0': city_name, '1': territory_name, }) def get_timezone_name( dt_or_tzinfo: _DtOrTzinfo = None, width: Literal['long', 'short'] = 'long', uncommon: bool = False, locale: Locale | str | None = None, zone_variant: Literal['generic', 'daylight', 'standard'] | None = None, return_zone: bool = False, ) -> str: r"""Return the localized display name for the given timezone. The timezone may be specified using a ``datetime`` or `tzinfo` object. >>> from datetime import time >>> dt = time(15, 30, tzinfo=get_timezone('America/Los_Angeles')) >>> get_timezone_name(dt, locale='en_US') # doctest: +SKIP u'Pacific Standard Time' >>> get_timezone_name(dt, locale='en_US', return_zone=True) 'America/Los_Angeles' >>> get_timezone_name(dt, width='short', locale='en_US') # doctest: +SKIP u'PST' If this function gets passed only a `tzinfo` object and no concrete `datetime`, the returned display name is independent of daylight savings time. This can be used for example for selecting timezones, or to set the time of events that recur across DST changes: >>> tz = get_timezone('America/Los_Angeles') >>> get_timezone_name(tz, locale='en_US') u'Pacific Time' >>> get_timezone_name(tz, 'short', locale='en_US') u'PT' If no localized display name for the timezone is available, and the timezone is associated with a country that uses only a single timezone, the name of that country is returned, formatted according to the locale: >>> tz = get_timezone('Europe/Berlin') >>> get_timezone_name(tz, locale='de_DE') u'Mitteleurop\xe4ische Zeit' >>> get_timezone_name(tz, locale='pt_BR') u'Hor\xe1rio da Europa Central' On the other hand, if the country uses multiple timezones, the city is also included in the representation: >>> tz = get_timezone('America/St_Johns') >>> get_timezone_name(tz, locale='de_DE') u'Neufundland-Zeit' Note that short format is currently not supported for all timezones and all locales. This is partially because not every timezone has a short code in every locale. In that case it currently falls back to the long format. For more information see `LDML Appendix J: Time Zone Display Names `_ .. versionadded:: 0.9 .. versionchanged:: 1.0 Added `zone_variant` support. :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines the timezone; if a ``tzinfo`` object is used, the resulting display name will be generic, i.e. independent of daylight savings time; if `None`, the current date in UTC is assumed :param width: either "long" or "short" :param uncommon: deprecated and ignored :param zone_variant: defines the zone variation to return. By default the variation is defined from the datetime object passed in. If no datetime object is passed in, the ``'generic'`` variation is assumed. The following values are valid: ``'generic'``, ``'daylight'`` and ``'standard'``. :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. :param return_zone: True or False. If true then function returns long time zone ID """ dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo) locale = Locale.parse(locale or LC_TIME) zone = _get_tz_name(dt_or_tzinfo) if zone_variant is None: if dt is None: zone_variant = 'generic' else: dst = tzinfo.dst(dt) zone_variant = "daylight" if dst else "standard" else: if zone_variant not in ('generic', 'standard', 'daylight'): raise ValueError('Invalid zone variation') # Get the canonical time-zone code zone = get_global('zone_aliases').get(zone, zone) if return_zone: return zone info = locale.time_zones.get(zone, {}) # Try explicitly translated zone names first if width in info and zone_variant in info[width]: return info[width][zone_variant] metazone = get_global('meta_zones').get(zone) if metazone: metazone_info = locale.meta_zones.get(metazone, {}) if width in metazone_info: name = metazone_info[width].get(zone_variant) if width == 'short' and name == NO_INHERITANCE_MARKER: # If the short form is marked no-inheritance, # try to fall back to the long name instead. name = metazone_info.get('long', {}).get(zone_variant) if name: return name # If we have a concrete datetime, we assume that the result can't be # independent of daylight savings time, so we return the GMT offset if dt is not None: return get_timezone_gmt(dt, width=width, locale=locale) return get_timezone_location(dt_or_tzinfo, locale=locale) def format_date( date: datetime.date | None = None, format: _PredefinedTimeFormat | str = 'medium', locale: Locale | str | None = None, ) -> str: """Return a date formatted according to the given pattern. >>> from datetime import date >>> d = date(2007, 4, 1) >>> format_date(d, locale='en_US') u'Apr 1, 2007' >>> format_date(d, format='full', locale='de_DE') u'Sonntag, 1. April 2007' If you don't want to use the locale default formats, you can specify a custom date pattern: >>> format_date(d, "EEE, MMM d, ''yy", locale='en') u"Sun, Apr 1, '07" :param date: the ``date`` or ``datetime`` object; if `None`, the current date is used :param format: one of "full", "long", "medium", or "short", or a custom date/time pattern :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale. """ if date is None: date = datetime.date.today() elif isinstance(date, datetime.datetime): date = date.date() locale = Locale.parse(locale or LC_TIME) if format in ('full', 'long', 'medium', 'short'): format = get_date_format(format, locale=locale) pattern = parse_pattern(format) return pattern.apply(date, locale) def format_datetime( datetime: _Instant = None, format: _PredefinedTimeFormat | str = 'medium', tzinfo: datetime.tzinfo | None = None, locale: Locale | str | None = None, ) -> str: r"""Return a date formatted according to the given pattern. >>> from datetime import datetime >>> dt = datetime(2007, 4, 1, 15, 30) >>> format_datetime(dt, locale='en_US') u'Apr 1, 2007, 3:30:00\u202fPM' For any pattern requiring the display of the timezone: >>> format_datetime(dt, 'full', tzinfo=get_timezone('Europe/Paris'), ... locale='fr_FR') 'dimanche 1 avril 2007, 17:30:00 heure d’été d’Europe centrale' >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz", ... tzinfo=get_timezone('US/Eastern'), locale='en') u'2007.04.01 AD at 11:30:00 EDT' :param datetime: the `datetime` object; if `None`, the current date and time is used :param format: one of "full", "long", "medium", or "short", or a custom date/time pattern :param tzinfo: the timezone to apply to the time for display :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale. """ datetime = _ensure_datetime_tzinfo(_get_datetime(datetime), tzinfo) locale = Locale.parse(locale or LC_TIME) if format in ('full', 'long', 'medium', 'short'): return get_datetime_format(format, locale=locale) \ .replace("'", "") \ .replace('{0}', format_time(datetime, format, tzinfo=None, locale=locale)) \ .replace('{1}', format_date(datetime, format, locale=locale)) else: return parse_pattern(format).apply(datetime, locale) def format_time( time: datetime.time | datetime.datetime | float | None = None, format: _PredefinedTimeFormat | str = 'medium', tzinfo: datetime.tzinfo | None = None, locale: Locale | str | None = None, ) -> str: r"""Return a time formatted according to the given pattern. >>> from datetime import datetime, time >>> t = time(15, 30) >>> format_time(t, locale='en_US') u'3:30:00\u202fPM' >>> format_time(t, format='short', locale='de_DE') u'15:30' If you don't want to use the locale default formats, you can specify a custom time pattern: >>> format_time(t, "hh 'o''clock' a", locale='en') u"03 o'clock PM" For any pattern requiring the display of the time-zone a timezone has to be specified explicitly: >>> t = datetime(2007, 4, 1, 15, 30) >>> tzinfo = get_timezone('Europe/Paris') >>> t = _localize(tzinfo, t) >>> format_time(t, format='full', tzinfo=tzinfo, locale='fr_FR') '15:30:00 heure d’été d’Europe centrale' >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=get_timezone('US/Eastern'), ... locale='en') u"09 o'clock AM, Eastern Daylight Time" As that example shows, when this function gets passed a ``datetime.datetime`` value, the actual time in the formatted string is adjusted to the timezone specified by the `tzinfo` parameter. If the ``datetime`` is "naive" (i.e. it has no associated timezone information), it is assumed to be in UTC. These timezone calculations are **not** performed if the value is of type ``datetime.time``, as without date information there's no way to determine what a given time would translate to in a different timezone without information about whether daylight savings time is in effect or not. This means that time values are left as-is, and the value of the `tzinfo` parameter is only used to display the timezone name if needed: >>> t = time(15, 30) >>> format_time(t, format='full', tzinfo=get_timezone('Europe/Paris'), ... locale='fr_FR') # doctest: +SKIP u'15:30:00 heure normale d\u2019Europe centrale' >>> format_time(t, format='full', tzinfo=get_timezone('US/Eastern'), ... locale='en_US') # doctest: +SKIP u'3:30:00\u202fPM Eastern Standard Time' :param time: the ``time`` or ``datetime`` object; if `None`, the current time in UTC is used :param format: one of "full", "long", "medium", or "short", or a custom date/time pattern :param tzinfo: the time-zone to apply to the time for display :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale. """ # get reference date for if we need to find the right timezone variant # in the pattern ref_date = time.date() if isinstance(time, datetime.datetime) else None time = _get_time(time, tzinfo) locale = Locale.parse(locale or LC_TIME) if format in ('full', 'long', 'medium', 'short'): format = get_time_format(format, locale=locale) return parse_pattern(format).apply(time, locale, reference_date=ref_date) def format_skeleton( skeleton: str, datetime: _Instant = None, tzinfo: datetime.tzinfo | None = None, fuzzy: bool = True, locale: Locale | str | None = None, ) -> str: r"""Return a time and/or date formatted according to the given pattern. The skeletons are defined in the CLDR data and provide more flexibility than the simple short/long/medium formats, but are a bit harder to use. The are defined using the date/time symbols without order or punctuation and map to a suitable format for the given locale. >>> from datetime import datetime >>> t = datetime(2007, 4, 1, 15, 30) >>> format_skeleton('MMMEd', t, locale='fr') u'dim. 1 avr.' >>> format_skeleton('MMMEd', t, locale='en') u'Sun, Apr 1' >>> format_skeleton('yMMd', t, locale='fi') # yMMd is not in the Finnish locale; yMd gets used u'1.4.2007' >>> format_skeleton('yMMd', t, fuzzy=False, locale='fi') # yMMd is not in the Finnish locale, an error is thrown Traceback (most recent call last): ... KeyError: yMMd >>> format_skeleton('GH', t, fuzzy=True, locale='fi_FI') # GH is not in the Finnish locale and there is no close match, an error is thrown Traceback (most recent call last): ... KeyError: None After the skeleton is resolved to a pattern `format_datetime` is called so all timezone processing etc is the same as for that. :param skeleton: A date time skeleton as defined in the cldr data. :param datetime: the ``time`` or ``datetime`` object; if `None`, the current time in UTC is used :param tzinfo: the time-zone to apply to the time for display :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's close enough to it. If there is no close match, a `KeyError` is thrown. :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale. """ locale = Locale.parse(locale or LC_TIME) if fuzzy and skeleton not in locale.datetime_skeletons: skeleton = match_skeleton(skeleton, locale.datetime_skeletons) format = locale.datetime_skeletons[skeleton] return format_datetime(datetime, format, tzinfo, locale) TIMEDELTA_UNITS: tuple[tuple[str, int], ...] = ( ('year', 3600 * 24 * 365), ('month', 3600 * 24 * 30), ('week', 3600 * 24 * 7), ('day', 3600 * 24), ('hour', 3600), ('minute', 60), ('second', 1), ) def format_timedelta( delta: datetime.timedelta | int, granularity: Literal['year', 'month', 'week', 'day', 'hour', 'minute', 'second'] = 'second', threshold: float = .85, add_direction: bool = False, format: Literal['narrow', 'short', 'medium', 'long'] = 'long', locale: Locale | str | None = None, ) -> str: """Return a time delta according to the rules of the given locale. >>> from datetime import timedelta >>> format_timedelta(timedelta(weeks=12), locale='en_US') u'3 months' >>> format_timedelta(timedelta(seconds=1), locale='es') u'1 segundo' The granularity parameter can be provided to alter the lowest unit presented, which defaults to a second. >>> format_timedelta(timedelta(hours=3), granularity='day', locale='en_US') u'1 day' The threshold parameter can be used to determine at which value the presentation switches to the next higher unit. A higher threshold factor means the presentation will switch later. For example: >>> format_timedelta(timedelta(hours=23), threshold=0.9, locale='en_US') u'1 day' >>> format_timedelta(timedelta(hours=23), threshold=1.1, locale='en_US') u'23 hours' In addition directional information can be provided that informs the user if the date is in the past or in the future: >>> format_timedelta(timedelta(hours=1), add_direction=True, locale='en') u'in 1 hour' >>> format_timedelta(timedelta(hours=-1), add_direction=True, locale='en') u'1 hour ago' The format parameter controls how compact or wide the presentation is: >>> format_timedelta(timedelta(hours=3), format='short', locale='en') u'3 hr' >>> format_timedelta(timedelta(hours=3), format='narrow', locale='en') u'3h' :param delta: a ``timedelta`` object representing the time difference to format, or the delta in seconds as an `int` value :param granularity: determines the smallest unit that should be displayed, the value can be one of "year", "month", "week", "day", "hour", "minute" or "second" :param threshold: factor that determines at which point the presentation switches to the next higher unit :param add_direction: if this flag is set to `True` the return value will include directional information. For instance a positive timedelta will include the information about it being in the future, a negative will be information about the value being in the past. :param format: the format, can be "narrow", "short" or "long". ( "medium" is deprecated, currently converted to "long" to maintain compatibility) :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale. """ if format not in ('narrow', 'short', 'medium', 'long'): raise TypeError('Format must be one of "narrow", "short" or "long"') if format == 'medium': warnings.warn( '"medium" value for format param of format_timedelta' ' is deprecated. Use "long" instead', category=DeprecationWarning, stacklevel=2, ) format = 'long' if isinstance(delta, datetime.timedelta): seconds = int((delta.days * 86400) + delta.seconds) else: seconds = delta locale = Locale.parse(locale or LC_TIME) date_fields = locale._data["date_fields"] unit_patterns = locale._data["unit_patterns"] def _iter_patterns(a_unit): if add_direction: # Try to find the length variant version first ("year-narrow") # before falling back to the default. unit_rel_patterns = (date_fields.get(f"{a_unit}-{format}") or date_fields[a_unit]) if seconds >= 0: yield unit_rel_patterns['future'] else: yield unit_rel_patterns['past'] a_unit = f"duration-{a_unit}" unit_pats = unit_patterns.get(a_unit, {}) yield unit_pats.get(format) # We do not support `` tags at all while ingesting CLDR data, # so these aliases specified in `root.xml` are hard-coded here: # # if format in ("long", "narrow"): yield unit_pats.get("short") for unit, secs_per_unit in TIMEDELTA_UNITS: value = abs(seconds) / secs_per_unit if value >= threshold or unit == granularity: if unit == granularity and value > 0: value = max(1, value) value = int(round(value)) plural_form = locale.plural_form(value) pattern = None for patterns in _iter_patterns(unit): if patterns is not None: pattern = patterns.get(plural_form) or patterns.get('other') if pattern: break # This really should not happen if pattern is None: return '' return pattern.replace('{0}', str(value)) return '' def _format_fallback_interval( start: _Instant, end: _Instant, skeleton: str | None, tzinfo: datetime.tzinfo | None, locale: Locale, ) -> str: if skeleton in locale.datetime_skeletons: # Use the given skeleton format = lambda dt: format_skeleton(skeleton, dt, tzinfo, locale=locale) elif all((isinstance(d, datetime.date) and not isinstance(d, datetime.datetime)) for d in (start, end)): # Both are just dates format = lambda dt: format_date(dt, locale=locale) elif all((isinstance(d, datetime.time) and not isinstance(d, datetime.date)) for d in (start, end)): # Both are times format = lambda dt: format_time(dt, tzinfo=tzinfo, locale=locale) else: format = lambda dt: format_datetime(dt, tzinfo=tzinfo, locale=locale) formatted_start = format(start) formatted_end = format(end) if formatted_start == formatted_end: return format(start) return ( locale.interval_formats.get(None, "{0}-{1}"). replace("{0}", formatted_start). replace("{1}", formatted_end) ) def format_interval( start: _Instant, end: _Instant, skeleton: str | None = None, tzinfo: datetime.tzinfo | None = None, fuzzy: bool = True, locale: Locale | str | None = None, ) -> str: """ Format an interval between two instants according to the locale's rules. >>> from datetime import date, time >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "yMd", locale="fi") u'15.\u201317.1.2016' >>> format_interval(time(12, 12), time(16, 16), "Hm", locale="en_GB") '12:12\u201316:16' >>> format_interval(time(5, 12), time(16, 16), "hm", locale="en_US") '5:12\u202fAM\u2009–\u20094:16\u202fPM' >>> format_interval(time(16, 18), time(16, 24), "Hm", locale="it") '16:18\u201316:24' If the start instant equals the end instant, the interval is formatted like the instant. >>> format_interval(time(16, 18), time(16, 18), "Hm", locale="it") '16:18' Unknown skeletons fall back to "default" formatting. >>> format_interval(date(2015, 1, 1), date(2017, 1, 1), "wzq", locale="ja") '2015/01/01\uff5e2017/01/01' >>> format_interval(time(16, 18), time(16, 24), "xxx", locale="ja") '16:18:00\uff5e16:24:00' >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "xxx", locale="de") '15.01.2016\u2009–\u200917.01.2016' :param start: First instant (datetime/date/time) :param end: Second instant (datetime/date/time) :param skeleton: The "skeleton format" to use for formatting. :param tzinfo: tzinfo to use (if none is already attached) :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's close enough to it. :param locale: A locale object or identifier. Defaults to the system time locale. :return: Formatted interval """ locale = Locale.parse(locale or LC_TIME) # NB: The quote comments below are from the algorithm description in # https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats # > Look for the intervalFormatItem element that matches the "skeleton", # > starting in the current locale and then following the locale fallback # > chain up to, but not including root. interval_formats = locale.interval_formats if skeleton not in interval_formats or not skeleton: # > If no match was found from the previous step, check what the closest # > match is in the fallback locale chain, as in availableFormats. That # > is, this allows for adjusting the string value field's width, # > including adjusting between "MMM" and "MMMM", and using different # > variants of the same field, such as 'v' and 'z'. if skeleton and fuzzy: skeleton = match_skeleton(skeleton, interval_formats) else: skeleton = None if not skeleton: # Still no match whatsoever? # > Otherwise, format the start and end datetime using the fallback pattern. return _format_fallback_interval(start, end, skeleton, tzinfo, locale) skel_formats = interval_formats[skeleton] if start == end: return format_skeleton(skeleton, start, tzinfo, fuzzy=fuzzy, locale=locale) start = _ensure_datetime_tzinfo(_get_datetime(start), tzinfo=tzinfo) end = _ensure_datetime_tzinfo(_get_datetime(end), tzinfo=tzinfo) start_fmt = DateTimeFormat(start, locale=locale) end_fmt = DateTimeFormat(end, locale=locale) # > If a match is found from previous steps, compute the calendar field # > with the greatest difference between start and end datetime. If there # > is no difference among any of the fields in the pattern, format as a # > single date using availableFormats, and return. for field in PATTERN_CHAR_ORDER: # These are in largest-to-smallest order if field in skel_formats and start_fmt.extract(field) != end_fmt.extract(field): # > If there is a match, use the pieces of the corresponding pattern to # > format the start and end datetime, as above. return "".join( parse_pattern(pattern).apply(instant, locale) for pattern, instant in zip(skel_formats[field], (start, end)) ) # > Otherwise, format the start and end datetime using the fallback pattern. return _format_fallback_interval(start, end, skeleton, tzinfo, locale) def get_period_id( time: _Instant, tzinfo: datetime.tzinfo | None = None, type: Literal['selection'] | None = None, locale: Locale | str | None = None, ) -> str: """ Get the day period ID for a given time. This ID can be used as a key for the period name dictionary. >>> from datetime import time >>> get_period_names(locale="de")[get_period_id(time(7, 42), locale="de")] u'Morgen' >>> get_period_id(time(0), locale="en_US") u'midnight' >>> get_period_id(time(0), type="selection", locale="en_US") u'night1' :param time: The time to inspect. :param tzinfo: The timezone for the time. See ``format_time``. :param type: The period type to use. Either "selection" or None. The selection type is used for selecting among phrases such as “Your email arrived yesterday evening” or “Your email arrived last night”. :param locale: the `Locale` object, or a locale string. Defaults to the system time locale. :return: period ID. Something is always returned -- even if it's just "am" or "pm". """ time = _get_time(time, tzinfo) seconds_past_midnight = int(time.hour * 60 * 60 + time.minute * 60 + time.second) locale = Locale.parse(locale or LC_TIME) # The LDML rules state that the rules may not overlap, so iterating in arbitrary # order should be alright, though `at` periods should be preferred. rulesets = locale.day_period_rules.get(type, {}).items() for rule_id, rules in rulesets: for rule in rules: if "at" in rule and rule["at"] == seconds_past_midnight: return rule_id for rule_id, rules in rulesets: for rule in rules: if "from" in rule and "before" in rule: if rule["from"] < rule["before"]: if rule["from"] <= seconds_past_midnight < rule["before"]: return rule_id else: # e.g. from="21:00" before="06:00" if rule["from"] <= seconds_past_midnight < 86400 or \ 0 <= seconds_past_midnight < rule["before"]: return rule_id start_ok = end_ok = False if "from" in rule and seconds_past_midnight >= rule["from"]: start_ok = True if "to" in rule and seconds_past_midnight <= rule["to"]: # This rule type does not exist in the present CLDR data; # excuse the lack of test coverage. end_ok = True if "before" in rule and seconds_past_midnight < rule["before"]: end_ok = True if "after" in rule: raise NotImplementedError("'after' is deprecated as of CLDR 29.") if start_ok and end_ok: return rule_id if seconds_past_midnight < 43200: return "am" else: return "pm" class ParseError(ValueError): pass def parse_date( string: str, locale: Locale | str | None = None, format: _PredefinedTimeFormat | str = 'medium', ) -> datetime.date: """Parse a date from a string. If an explicit format is provided, it is used to parse the date. >>> parse_date('01.04.2004', format='dd.MM.yyyy') datetime.date(2004, 4, 1) If no format is given, or if it is one of "full", "long", "medium", or "short", the function first tries to interpret the string as ISO-8601 date format and then uses the date format for the locale as a hint to determine the order in which the date fields appear in the string. >>> parse_date('4/1/04', locale='en_US') datetime.date(2004, 4, 1) >>> parse_date('01.04.2004', locale='de_DE') datetime.date(2004, 4, 1) >>> parse_date('2004-04-01', locale='en_US') datetime.date(2004, 4, 1) >>> parse_date('2004-04-01', locale='de_DE') datetime.date(2004, 4, 1) >>> parse_date('01.04.04', locale='de_DE', format='short') datetime.date(2004, 4, 1) :param string: the string containing the date :param locale: a `Locale` object or a locale identifier :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale. :param format: the format to use, either an explicit date format, or one of "full", "long", "medium", or "short" (see ``get_time_format``) """ numbers = re.findall(r'(\d+)', string) if not numbers: raise ParseError("No numbers were found in input") use_predefined_format = format in ('full', 'long', 'medium', 'short') # we try ISO-8601 format first, meaning similar to formats # extended YYYY-MM-DD or basic YYYYMMDD iso_alike = re.match(r'^(\d{4})-?([01]\d)-?([0-3]\d)$', string, flags=re.ASCII) # allow only ASCII digits if iso_alike and use_predefined_format: try: return datetime.date(*map(int, iso_alike.groups())) except ValueError: pass # a locale format might fit better, so let's continue if use_predefined_format: fmt = get_date_format(format=format, locale=locale) else: fmt = parse_pattern(format) format_str = fmt.pattern.lower() year_idx = format_str.index('y') month_idx = format_str.find('m') if month_idx < 0: month_idx = format_str.index('l') day_idx = format_str.index('d') indexes = sorted([(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')]) indexes = {item[1]: idx for idx, item in enumerate(indexes)} # FIXME: this currently only supports numbers, but should also support month # names, both in the requested locale, and english year = numbers[indexes['Y']] year = 2000 + int(year) if len(year) == 2 else int(year) month = int(numbers[indexes['M']]) day = int(numbers[indexes['D']]) if month > 12: month, day = day, month return datetime.date(year, month, day) def parse_time( string: str, locale: Locale | str | None = None, format: _PredefinedTimeFormat | str = 'medium', ) -> datetime.time: """Parse a time from a string. This function uses the time format for the locale as a hint to determine the order in which the time fields appear in the string. If an explicit format is provided, the function will use it to parse the time instead. >>> parse_time('15:30:00', locale='en_US') datetime.time(15, 30) >>> parse_time('15:30:00', format='H:mm:ss') datetime.time(15, 30) :param string: the string containing the time :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale. :param format: the format to use, either an explicit time format, or one of "full", "long", "medium", or "short" (see ``get_time_format``) :return: the parsed time :rtype: `time` """ numbers = re.findall(r'(\d+)', string) if not numbers: raise ParseError("No numbers were found in input") # TODO: try ISO format first? if format in ('full', 'long', 'medium', 'short'): fmt = get_time_format(format=format, locale=locale) else: fmt = parse_pattern(format) format_str = fmt.pattern.lower() hour_idx = format_str.find('h') if hour_idx < 0: hour_idx = format_str.index('k') min_idx = format_str.index('m') # format might not contain seconds if (sec_idx := format_str.find('s')) < 0: sec_idx = math.inf indexes = sorted([(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')]) indexes = {item[1]: idx for idx, item in enumerate(indexes)} # TODO: support time zones # Check if the format specifies a period to be used; # if it does, look for 'pm' to figure out an offset. hour_offset = 0 if 'a' in format_str and 'pm' in string.lower(): hour_offset = 12 # Parse up to three numbers from the string. minute = second = 0 hour = int(numbers[indexes['H']]) + hour_offset if len(numbers) > 1: minute = int(numbers[indexes['M']]) if len(numbers) > 2: second = int(numbers[indexes['S']]) return datetime.time(hour, minute, second) class DateTimePattern: def __init__(self, pattern: str, format: DateTimeFormat): self.pattern = pattern self.format = format def __repr__(self) -> str: return f"<{type(self).__name__} {self.pattern!r}>" def __str__(self) -> str: pat = self.pattern return pat def __mod__(self, other: DateTimeFormat) -> str: if not isinstance(other, DateTimeFormat): return NotImplemented return self.format % other def apply( self, datetime: datetime.date | datetime.time, locale: Locale | str | None, reference_date: datetime.date | None = None, ) -> str: return self % DateTimeFormat(datetime, locale, reference_date) class DateTimeFormat: def __init__( self, value: datetime.date | datetime.time, locale: Locale | str, reference_date: datetime.date | None = None, ) -> None: assert isinstance(value, (datetime.date, datetime.datetime, datetime.time)) if isinstance(value, (datetime.datetime, datetime.time)) and value.tzinfo is None: value = value.replace(tzinfo=UTC) self.value = value self.locale = Locale.parse(locale) self.reference_date = reference_date def __getitem__(self, name: str) -> str: char = name[0] num = len(name) if char == 'G': return self.format_era(char, num) elif char in ('y', 'Y', 'u'): return self.format_year(char, num) elif char in ('Q', 'q'): return self.format_quarter(char, num) elif char in ('M', 'L'): return self.format_month(char, num) elif char in ('w', 'W'): return self.format_week(char, num) elif char == 'd': return self.format(self.value.day, num) elif char == 'D': return self.format_day_of_year(num) elif char == 'F': return self.format_day_of_week_in_month() elif char in ('E', 'e', 'c'): return self.format_weekday(char, num) elif char in ('a', 'b', 'B'): return self.format_period(char, num) elif char == 'h': if self.value.hour % 12 == 0: return self.format(12, num) else: return self.format(self.value.hour % 12, num) elif char == 'H': return self.format(self.value.hour, num) elif char == 'K': return self.format(self.value.hour % 12, num) elif char == 'k': if self.value.hour == 0: return self.format(24, num) else: return self.format(self.value.hour, num) elif char == 'm': return self.format(self.value.minute, num) elif char == 's': return self.format(self.value.second, num) elif char == 'S': return self.format_frac_seconds(num) elif char == 'A': return self.format_milliseconds_in_day(num) elif char in ('z', 'Z', 'v', 'V', 'x', 'X', 'O'): return self.format_timezone(char, num) else: raise KeyError(f"Unsupported date/time field {char!r}") def extract(self, char: str) -> int: char = str(char)[0] if char == 'y': return self.value.year elif char == 'M': return self.value.month elif char == 'd': return self.value.day elif char == 'H': return self.value.hour elif char == 'h': return self.value.hour % 12 or 12 elif char == 'm': return self.value.minute elif char == 'a': return int(self.value.hour >= 12) # 0 for am, 1 for pm else: raise NotImplementedError(f"Not implemented: extracting {char!r} from {self.value!r}") def format_era(self, char: str, num: int) -> str: width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)] era = int(self.value.year >= 0) return get_era_names(width, self.locale)[era] def format_year(self, char: str, num: int) -> str: value = self.value.year if char.isupper(): month = self.value.month if month == 1 and self.value.day < 7 and self.get_week_of_year() >= 52: value -= 1 elif month == 12 and self.value.day > 25 and self.get_week_of_year() <= 2: value += 1 year = self.format(value, num) if num == 2: year = year[-2:] return year def format_quarter(self, char: str, num: int) -> str: quarter = (self.value.month - 1) // 3 + 1 if num <= 2: return '%0*d' % (num, quarter) width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num] context = {'Q': 'format', 'q': 'stand-alone'}[char] return get_quarter_names(width, context, self.locale)[quarter] def format_month(self, char: str, num: int) -> str: if num <= 2: return '%0*d' % (num, self.value.month) width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num] context = {'M': 'format', 'L': 'stand-alone'}[char] return get_month_names(width, context, self.locale)[self.value.month] def format_week(self, char: str, num: int) -> str: if char.islower(): # week of year week = self.get_week_of_year() return self.format(week, num) else: # week of month week = self.get_week_of_month() return str(week) def format_weekday(self, char: str = 'E', num: int = 4) -> str: """ Return weekday from parsed datetime according to format pattern. >>> from datetime import date >>> format = DateTimeFormat(date(2016, 2, 28), Locale.parse('en_US')) >>> format.format_weekday() u'Sunday' 'E': Day of week - Use one through three letters for the abbreviated day name, four for the full (wide) name, five for the narrow name, or six for the short name. >>> format.format_weekday('E',2) u'Sun' 'e': Local day of week. Same as E except adds a numeric value that will depend on the local starting day of the week, using one or two letters. For this example, Monday is the first day of the week. >>> format.format_weekday('e',2) '01' 'c': Stand-Alone local day of week - Use one letter for the local numeric value (same as 'e'), three for the abbreviated day name, four for the full (wide) name, five for the narrow name, or six for the short name. >>> format.format_weekday('c',1) '1' :param char: pattern format character ('e','E','c') :param num: count of format character """ if num < 3: if char.islower(): value = 7 - self.locale.first_week_day + self.value.weekday() return self.format(value % 7 + 1, num) num = 3 weekday = self.value.weekday() width = {3: 'abbreviated', 4: 'wide', 5: 'narrow', 6: 'short'}[num] context = "stand-alone" if char == "c" else "format" return get_day_names(width, context, self.locale)[weekday] def format_day_of_year(self, num: int) -> str: return self.format(self.get_day_of_year(), num) def format_day_of_week_in_month(self) -> str: return str((self.value.day - 1) // 7 + 1) def format_period(self, char: str, num: int) -> str: """ Return period from parsed datetime according to format pattern. >>> from datetime import datetime, time >>> format = DateTimeFormat(time(13, 42), 'fi_FI') >>> format.format_period('a', 1) u'ip.' >>> format.format_period('b', 1) u'iltap.' >>> format.format_period('b', 4) u'iltapäivä' >>> format.format_period('B', 4) u'iltapäivällä' >>> format.format_period('B', 5) u'ip.' >>> format = DateTimeFormat(datetime(2022, 4, 28, 6, 27), 'zh_Hant') >>> format.format_period('a', 1) u'上午' >>> format.format_period('B', 1) u'清晨' :param char: pattern format character ('a', 'b', 'B') :param num: count of format character """ widths = [{3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)], 'wide', 'narrow', 'abbreviated'] if char == 'a': period = 'pm' if self.value.hour >= 12 else 'am' context = 'format' else: period = get_period_id(self.value, locale=self.locale) context = 'format' if char == 'B' else 'stand-alone' for width in widths: period_names = get_period_names(context=context, width=width, locale=self.locale) if period in period_names: return period_names[period] raise ValueError(f"Could not format period {period} in {self.locale}") def format_frac_seconds(self, num: int) -> str: """ Return fractional seconds. Rounds the time's microseconds to the precision given by the number \ of digits passed in. """ value = self.value.microsecond / 1000000 return self.format(round(value, num) * 10**num, num) def format_milliseconds_in_day(self, num): msecs = self.value.microsecond // 1000 + self.value.second * 1000 + \ self.value.minute * 60000 + self.value.hour * 3600000 return self.format(msecs, num) def format_timezone(self, char: str, num: int) -> str: width = {3: 'short', 4: 'long', 5: 'iso8601'}[max(3, num)] # It could be that we only receive a time to format, but also have a # reference date which is important to distinguish between timezone # variants (summer/standard time) value = self.value if self.reference_date: value = datetime.datetime.combine(self.reference_date, self.value) if char == 'z': return get_timezone_name(value, width, locale=self.locale) elif char == 'Z': if num == 5: return get_timezone_gmt(value, width, locale=self.locale, return_z=True) return get_timezone_gmt(value, width, locale=self.locale) elif char == 'O': if num == 4: return get_timezone_gmt(value, width, locale=self.locale) # TODO: To add support for O:1 elif char == 'v': return get_timezone_name(value.tzinfo, width, locale=self.locale) elif char == 'V': if num == 1: return get_timezone_name(value.tzinfo, width, uncommon=True, locale=self.locale) elif num == 2: return get_timezone_name(value.tzinfo, locale=self.locale, return_zone=True) elif num == 3: return get_timezone_location(value.tzinfo, locale=self.locale, return_city=True) return get_timezone_location(value.tzinfo, locale=self.locale) # Included additional elif condition to add support for 'Xx' in timezone format elif char == 'X': if num == 1: return get_timezone_gmt(value, width='iso8601_short', locale=self.locale, return_z=True) elif num in (2, 4): return get_timezone_gmt(value, width='short', locale=self.locale, return_z=True) elif num in (3, 5): return get_timezone_gmt(value, width='iso8601', locale=self.locale, return_z=True) elif char == 'x': if num == 1: return get_timezone_gmt(value, width='iso8601_short', locale=self.locale) elif num in (2, 4): return get_timezone_gmt(value, width='short', locale=self.locale) elif num in (3, 5): return get_timezone_gmt(value, width='iso8601', locale=self.locale) def format(self, value: SupportsInt, length: int) -> str: return '%0*d' % (length, value) def get_day_of_year(self, date: datetime.date | None = None) -> int: if date is None: date = self.value return (date - date.replace(month=1, day=1)).days + 1 def get_week_of_year(self) -> int: """Return the week of the year.""" day_of_year = self.get_day_of_year(self.value) week = self.get_week_number(day_of_year) if week == 0: date = datetime.date(self.value.year - 1, 12, 31) week = self.get_week_number(self.get_day_of_year(date), date.weekday()) elif week > 52: weekday = datetime.date(self.value.year + 1, 1, 1).weekday() if self.get_week_number(1, weekday) == 1 and \ 32 - (weekday - self.locale.first_week_day) % 7 <= self.value.day: week = 1 return week def get_week_of_month(self) -> int: """Return the week of the month.""" return self.get_week_number(self.value.day) def get_week_number(self, day_of_period: int, day_of_week: int | None = None) -> int: """Return the number of the week of a day within a period. This may be the week number in a year or the week number in a month. Usually this will return a value equal to or greater than 1, but if the first week of the period is so short that it actually counts as the last week of the previous period, this function will return 0. >>> date = datetime.date(2006, 1, 8) >>> DateTimeFormat(date, 'de_DE').get_week_number(6) 1 >>> DateTimeFormat(date, 'en_US').get_week_number(6) 2 :param day_of_period: the number of the day in the period (usually either the day of month or the day of year) :param day_of_week: the week day; if omitted, the week day of the current date is assumed """ if day_of_week is None: day_of_week = self.value.weekday() first_day = (day_of_week - self.locale.first_week_day - day_of_period + 1) % 7 if first_day < 0: first_day += 7 week_number = (day_of_period + first_day - 1) // 7 if 7 - first_day >= self.locale.min_week_days: week_number += 1 return week_number PATTERN_CHARS: dict[str, list[int] | None] = { 'G': [1, 2, 3, 4, 5], # era 'y': None, 'Y': None, 'u': None, # year 'Q': [1, 2, 3, 4, 5], 'q': [1, 2, 3, 4, 5], # quarter 'M': [1, 2, 3, 4, 5], 'L': [1, 2, 3, 4, 5], # month 'w': [1, 2], 'W': [1], # week 'd': [1, 2], 'D': [1, 2, 3], 'F': [1], 'g': None, # day 'E': [1, 2, 3, 4, 5, 6], 'e': [1, 2, 3, 4, 5, 6], 'c': [1, 3, 4, 5, 6], # week day 'a': [1, 2, 3, 4, 5], 'b': [1, 2, 3, 4, 5], 'B': [1, 2, 3, 4, 5], # period 'h': [1, 2], 'H': [1, 2], 'K': [1, 2], 'k': [1, 2], # hour 'm': [1, 2], # minute 's': [1, 2], 'S': None, 'A': None, # second 'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4, 5], 'O': [1, 4], 'v': [1, 4], # zone 'V': [1, 2, 3, 4], 'x': [1, 2, 3, 4, 5], 'X': [1, 2, 3, 4, 5], # zone } #: The pattern characters declared in the Date Field Symbol Table #: (https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table) #: in order of decreasing magnitude. PATTERN_CHAR_ORDER = "GyYuUQqMLlwWdDFgEecabBChHKkjJmsSAzZOvVXx" def parse_pattern(pattern: str | DateTimePattern) -> DateTimePattern: """Parse date, time, and datetime format patterns. >>> parse_pattern("MMMMd").format u'%(MMMM)s%(d)s' >>> parse_pattern("MMM d, yyyy").format u'%(MMM)s %(d)s, %(yyyy)s' Pattern can contain literal strings in single quotes: >>> parse_pattern("H:mm' Uhr 'z").format u'%(H)s:%(mm)s Uhr %(z)s' An actual single quote can be used by using two adjacent single quote characters: >>> parse_pattern("hh' o''clock'").format u"%(hh)s o'clock" :param pattern: the formatting pattern to parse """ if isinstance(pattern, DateTimePattern): return pattern return _cached_parse_pattern(pattern) @lru_cache(maxsize=1024) def _cached_parse_pattern(pattern: str) -> DateTimePattern: result = [] for tok_type, tok_value in tokenize_pattern(pattern): if tok_type == "chars": result.append(tok_value.replace('%', '%%')) elif tok_type == "field": fieldchar, fieldnum = tok_value limit = PATTERN_CHARS[fieldchar] if limit and fieldnum not in limit: raise ValueError(f"Invalid length for field: {fieldchar * fieldnum!r}") result.append('%%(%s)s' % (fieldchar * fieldnum)) else: raise NotImplementedError(f"Unknown token type: {tok_type}") return DateTimePattern(pattern, ''.join(result)) def tokenize_pattern(pattern: str) -> list[tuple[str, str | tuple[str, int]]]: """ Tokenize date format patterns. Returns a list of (token_type, token_value) tuples. ``token_type`` may be either "chars" or "field". For "chars" tokens, the value is the literal value. For "field" tokens, the value is a tuple of (field character, repetition count). :param pattern: Pattern string :type pattern: str :rtype: list[tuple] """ result = [] quotebuf = None charbuf = [] fieldchar = [''] fieldnum = [0] def append_chars(): result.append(('chars', ''.join(charbuf).replace('\0', "'"))) del charbuf[:] def append_field(): result.append(('field', (fieldchar[0], fieldnum[0]))) fieldchar[0] = '' fieldnum[0] = 0 for char in pattern.replace("''", '\0'): if quotebuf is None: if char == "'": # quote started if fieldchar[0]: append_field() elif charbuf: append_chars() quotebuf = [] elif char in PATTERN_CHARS: if charbuf: append_chars() if char == fieldchar[0]: fieldnum[0] += 1 else: if fieldchar[0]: append_field() fieldchar[0] = char fieldnum[0] = 1 else: if fieldchar[0]: append_field() charbuf.append(char) elif quotebuf is not None: if char == "'": # end of quote charbuf.extend(quotebuf) quotebuf = None else: # inside quote quotebuf.append(char) if fieldchar[0]: append_field() elif charbuf: append_chars() return result def untokenize_pattern(tokens: Iterable[tuple[str, str | tuple[str, int]]]) -> str: """ Turn a date format pattern token stream back into a string. This is the reverse operation of ``tokenize_pattern``. :type tokens: Iterable[tuple] :rtype: str """ output = [] for tok_type, tok_value in tokens: if tok_type == "field": output.append(tok_value[0] * tok_value[1]) elif tok_type == "chars": if not any(ch in PATTERN_CHARS for ch in tok_value): # No need to quote output.append(tok_value) else: output.append("'%s'" % tok_value.replace("'", "''")) return "".join(output) def split_interval_pattern(pattern: str) -> list[str]: """ Split an interval-describing datetime pattern into multiple pieces. > The pattern is then designed to be broken up into two pieces by determining the first repeating field. - https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats >>> split_interval_pattern(u'E d.M. \u2013 E d.M.') [u'E d.M. \u2013 ', 'E d.M.'] >>> split_interval_pattern("Y 'text' Y 'more text'") ["Y 'text '", "Y 'more text'"] >>> split_interval_pattern(u"E, MMM d \u2013 E") [u'E, MMM d \u2013 ', u'E'] >>> split_interval_pattern("MMM d") ['MMM d'] >>> split_interval_pattern("y G") ['y G'] >>> split_interval_pattern(u"MMM d \u2013 d") [u'MMM d \u2013 ', u'd'] :param pattern: Interval pattern string :return: list of "subpatterns" """ seen_fields = set() parts = [[]] for tok_type, tok_value in tokenize_pattern(pattern): if tok_type == "field": if tok_value[0] in seen_fields: # Repeated field parts.append([]) seen_fields.clear() seen_fields.add(tok_value[0]) parts[-1].append((tok_type, tok_value)) return [untokenize_pattern(tokens) for tokens in parts] def match_skeleton(skeleton: str, options: Iterable[str], allow_different_fields: bool = False) -> str | None: """ Find the closest match for the given datetime skeleton among the options given. This uses the rules outlined in the TR35 document. >>> match_skeleton('yMMd', ('yMd', 'yMMMd')) 'yMd' >>> match_skeleton('yMMd', ('jyMMd',), allow_different_fields=True) 'jyMMd' >>> match_skeleton('yMMd', ('qyMMd',), allow_different_fields=False) >>> match_skeleton('hmz', ('hmv',)) 'hmv' :param skeleton: The skeleton to match :type skeleton: str :param options: An iterable of other skeletons to match against :type options: Iterable[str] :param allow_different_fields: Whether to allow a match that uses different fields than the skeleton requested. :type allow_different_fields: bool :return: The closest skeleton match, or if no match was found, None. :rtype: str|None """ # TODO: maybe implement pattern expansion? # Based on the implementation in # https://github.com/unicode-org/icu/blob/main/icu4j/main/core/src/main/java/com/ibm/icu/text/DateIntervalInfo.java # Filter out falsy values and sort for stability; when `interval_formats` is passed in, there may be a None key. options = sorted(option for option in options if option) if 'z' in skeleton and not any('z' in option for option in options): skeleton = skeleton.replace('z', 'v') if 'k' in skeleton and not any('k' in option for option in options): skeleton = skeleton.replace('k', 'H') if 'K' in skeleton and not any('K' in option for option in options): skeleton = skeleton.replace('K', 'h') if 'a' in skeleton and not any('a' in option for option in options): skeleton = skeleton.replace('a', '') if 'b' in skeleton and not any('b' in option for option in options): skeleton = skeleton.replace('b', '') get_input_field_width = dict(t[1] for t in tokenize_pattern(skeleton) if t[0] == "field").get best_skeleton = None best_distance = None for option in options: get_opt_field_width = dict(t[1] for t in tokenize_pattern(option) if t[0] == "field").get distance = 0 for field in PATTERN_CHARS: input_width = get_input_field_width(field, 0) opt_width = get_opt_field_width(field, 0) if input_width == opt_width: continue if opt_width == 0 or input_width == 0: if not allow_different_fields: # This one is not okay option = None break distance += 0x1000 # Magic weight constant for "entirely different fields" elif field == 'M' and ((input_width > 2 and opt_width <= 2) or (input_width <= 2 and opt_width > 2)): distance += 0x100 # Magic weight for "text turns into a number" else: distance += abs(input_width - opt_width) if not option: # We lost the option along the way (probably due to "allow_different_fields") continue if not best_skeleton or distance < best_distance: best_skeleton = option best_distance = distance if distance == 0: # Found a perfect match! break return best_skeleton babel-2.17.0/babel/languages.py000066400000000000000000000054341474743505200162540ustar00rootroot00000000000000from __future__ import annotations from babel.core import get_global def get_official_languages(territory: str, regional: bool = False, de_facto: bool = False) -> tuple[str, ...]: """ Get the official language(s) for the given territory. The language codes, if any are known, are returned in order of descending popularity. If the `regional` flag is set, then languages which are regionally official are also returned. If the `de_facto` flag is set, then languages which are "de facto" official are also returned. .. warning:: Note that the data is as up to date as the current version of the CLDR used by Babel. If you need scientifically accurate information, use another source! :param territory: Territory code :type territory: str :param regional: Whether to return regionally official languages too :type regional: bool :param de_facto: Whether to return de-facto official languages too :type de_facto: bool :return: Tuple of language codes :rtype: tuple[str] """ territory = str(territory).upper() allowed_stati = {"official"} if regional: allowed_stati.add("official_regional") if de_facto: allowed_stati.add("de_facto_official") languages = get_global("territory_languages").get(territory, {}) pairs = [ (info['population_percent'], language) for language, info in languages.items() if info.get('official_status') in allowed_stati ] pairs.sort(reverse=True) return tuple(lang for _, lang in pairs) def get_territory_language_info(territory: str) -> dict[str, dict[str, float | str | None]]: """ Get a dictionary of language information for a territory. The dictionary is keyed by language code; the values are dicts with more information. The following keys are currently known for the values: * `population_percent`: The percentage of the territory's population speaking the language. * `official_status`: An optional string describing the officiality status of the language. Known values are "official", "official_regional" and "de_facto_official". .. warning:: Note that the data is as up to date as the current version of the CLDR used by Babel. If you need scientifically accurate information, use another source! .. note:: Note that the format of the dict returned may change between Babel versions. See https://www.unicode.org/cldr/charts/latest/supplemental/territory_language_information.html :param territory: Territory code :type territory: str :return: Language information dictionary :rtype: dict[str, dict] """ territory = str(territory).upper() return get_global("territory_languages").get(territory, {}).copy() babel-2.17.0/babel/lists.py000066400000000000000000000104111474743505200154330ustar00rootroot00000000000000""" babel.lists ~~~~~~~~~~~ Locale dependent formatting of lists. The default locale for the functions in this module is determined by the following environment variables, in that order: * ``LC_ALL``, and * ``LANG`` :copyright: (c) 2015-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import warnings from collections.abc import Sequence from typing import Literal from babel.core import Locale, default_locale _DEFAULT_LOCALE = default_locale() # TODO(3.0): Remove this. def __getattr__(name): if name == "DEFAULT_LOCALE": warnings.warn( "The babel.lists.DEFAULT_LOCALE constant is deprecated and will be removed.", DeprecationWarning, stacklevel=2, ) return _DEFAULT_LOCALE raise AttributeError(f"module {__name__!r} has no attribute {name!r}") def format_list( lst: Sequence[str], style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard', locale: Locale | str | None = None, ) -> str: """ Format the items in `lst` as a list. >>> format_list(['apples', 'oranges', 'pears'], locale='en') u'apples, oranges, and pears' >>> format_list(['apples', 'oranges', 'pears'], locale='zh') u'apples\u3001oranges\u548cpears' >>> format_list(['omena', 'peruna', 'aplari'], style='or', locale='fi') u'omena, peruna tai aplari' Not all styles are necessarily available in all locales. The function will attempt to fall back to replacement styles according to the rules set forth in the CLDR root XML file, and raise a ValueError if no suitable replacement can be found. The following text is verbatim from the Unicode TR35-49 spec [1]. * standard: A typical 'and' list for arbitrary placeholders. eg. "January, February, and March" * standard-short: A short version of an 'and' list, suitable for use with short or abbreviated placeholder values. eg. "Jan., Feb., and Mar." * or: A typical 'or' list for arbitrary placeholders. eg. "January, February, or March" * or-short: A short version of an 'or' list. eg. "Jan., Feb., or Mar." * unit: A list suitable for wide units. eg. "3 feet, 7 inches" * unit-short: A list suitable for short units eg. "3 ft, 7 in" * unit-narrow: A list suitable for narrow units, where space on the screen is very limited. eg. "3′ 7″" [1]: https://www.unicode.org/reports/tr35/tr35-49/tr35-general.html#ListPatterns :param lst: a sequence of items to format in to a list :param style: the style to format the list with. See above for description. :param locale: the locale. Defaults to the system locale. """ locale = Locale.parse(locale or _DEFAULT_LOCALE) if not lst: return '' if len(lst) == 1: return lst[0] patterns = _resolve_list_style(locale, style) if len(lst) == 2 and '2' in patterns: return patterns['2'].format(*lst) result = patterns['start'].format(lst[0], lst[1]) for elem in lst[2:-1]: result = patterns['middle'].format(result, elem) result = patterns['end'].format(result, lst[-1]) return result # Based on CLDR 45's root.xml file's ``es. # The root file defines both `standard` and `or`, # so they're always available. # TODO: It would likely be better to use the # babel.localedata.Alias mechanism for this, # but I'm not quite sure how it's supposed to # work with inheritance and data in the root. _style_fallbacks = { "or-narrow": ["or-short", "or"], "or-short": ["or"], "standard-narrow": ["standard-short", "standard"], "standard-short": ["standard"], "unit": ["unit-short", "standard"], "unit-narrow": ["unit-short", "unit", "standard"], "unit-short": ["standard"], } def _resolve_list_style(locale: Locale, style: str): for style in (style, *(_style_fallbacks.get(style, []))): # noqa: B020 if style in locale.list_patterns: return locale.list_patterns[style] raise ValueError( f"Locale {locale} does not support list formatting style {style!r} " f"(supported are {sorted(locale.list_patterns)})", ) babel-2.17.0/babel/locale-data/000077500000000000000000000000001474743505200160745ustar00rootroot00000000000000babel-2.17.0/babel/locale-data/.gitignore000066400000000000000000000000021474743505200200540ustar00rootroot00000000000000* babel-2.17.0/babel/locale-data/LICENSE.unicode000066400000000000000000000037611474743505200205350ustar00rootroot00000000000000UNICODE LICENSE V3 COPYRIGHT AND PERMISSION NOTICE Copyright © 2004-2025 Unicode, Inc. NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. Permission is hereby granted, free of charge, to any person obtaining a copy of data files and any associated documentation (the "Data Files") or software and any associated documentation (the "Software") to deal in the Data Files or Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Data Files or Software, and to permit persons to whom the Data Files or Software are furnished to do so, provided that either (a) this copyright and permission notice appear with all copies of the Data Files or Software, or (b) this copyright and permission notice appear in associated Documentation. THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE. Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in these Data Files or Software without prior written authorization of the copyright holder. SPDX-License-Identifier: Unicode-3.0 babel-2.17.0/babel/localedata.py000066400000000000000000000216341474743505200163770ustar00rootroot00000000000000""" babel.localedata ~~~~~~~~~~~~~~~~ Low-level locale data access. :note: The `Locale` class, which uses this module under the hood, provides a more convenient interface for accessing the locale data. :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import os import pickle import re import sys import threading from collections import abc from collections.abc import Iterator, Mapping, MutableMapping from functools import lru_cache from itertools import chain from typing import Any _cache: dict[str, Any] = {} _cache_lock = threading.RLock() _dirname = os.path.join(os.path.dirname(__file__), 'locale-data') _windows_reserved_name_re = re.compile("^(con|prn|aux|nul|com[0-9]|lpt[0-9])$", re.I) def normalize_locale(name: str) -> str | None: """Normalize a locale ID by stripping spaces and apply proper casing. Returns the normalized locale ID string or `None` if the ID is not recognized. """ if not name or not isinstance(name, str): return None name = name.strip().lower() for locale_id in chain.from_iterable([_cache, locale_identifiers()]): if name == locale_id.lower(): return locale_id def resolve_locale_filename(name: os.PathLike[str] | str) -> str: """ Resolve a locale identifier to a `.dat` path on disk. """ # Clean up any possible relative paths. name = os.path.basename(name) # Ensure we're not left with one of the Windows reserved names. if sys.platform == "win32" and _windows_reserved_name_re.match(os.path.splitext(name)[0]): raise ValueError(f"Name {name} is invalid on Windows") # Build the path. return os.path.join(_dirname, f"{name}.dat") def exists(name: str) -> bool: """Check whether locale data is available for the given locale. Returns `True` if it exists, `False` otherwise. :param name: the locale identifier string """ if not name or not isinstance(name, str): return False if name in _cache: return True file_found = os.path.exists(resolve_locale_filename(name)) return True if file_found else bool(normalize_locale(name)) @lru_cache(maxsize=None) def locale_identifiers() -> list[str]: """Return a list of all locale identifiers for which locale data is available. This data is cached after the first invocation. You can clear the cache by calling `locale_identifiers.cache_clear()`. .. versionadded:: 0.8.1 :return: a list of locale identifiers (strings) """ return [ stem for stem, extension in (os.path.splitext(filename) for filename in os.listdir(_dirname)) if extension == '.dat' and stem != 'root' ] def _is_non_likely_script(name: str) -> bool: """Return whether the locale is of the form ``lang_Script``, and the script is not the likely script for the language. This implements the behavior of the ``nonlikelyScript`` value of the ``localRules`` attribute for parent locales added in CLDR 45. """ from babel.core import get_global, parse_locale try: lang, territory, script, variant, *rest = parse_locale(name) except ValueError: return False if lang and script and not territory and not variant and not rest: likely_subtag = get_global('likely_subtags').get(lang) _, _, likely_script, *_ = parse_locale(likely_subtag) return script != likely_script return False def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str, Any]: """Load the locale data for the given locale. The locale data is a dictionary that contains much of the data defined by the Common Locale Data Repository (CLDR). This data is stored as a collection of pickle files inside the ``babel`` package. >>> d = load('en_US') >>> d['languages']['sv'] u'Swedish' Note that the results are cached, and subsequent requests for the same locale return the same dictionary: >>> d1 = load('en_US') >>> d2 = load('en_US') >>> d1 is d2 True :param name: the locale identifier string (or "root") :param merge_inherited: whether the inherited data should be merged into the data of the requested locale :raise `IOError`: if no locale data file is found for the given locale identifier, or one of the locales it inherits from """ name = os.path.basename(name) _cache_lock.acquire() try: data = _cache.get(name) if not data: # Load inherited data if name == 'root' or not merge_inherited: data = {} else: from babel.core import get_global parent = get_global('parent_exceptions').get(name) if not parent: if _is_non_likely_script(name): parent = 'root' else: parts = name.split('_') parent = "root" if len(parts) == 1 else "_".join(parts[:-1]) data = load(parent).copy() filename = resolve_locale_filename(name) with open(filename, 'rb') as fileobj: if name != 'root' and merge_inherited: merge(data, pickle.load(fileobj)) else: data = pickle.load(fileobj) _cache[name] = data return data finally: _cache_lock.release() def merge(dict1: MutableMapping[Any, Any], dict2: Mapping[Any, Any]) -> None: """Merge the data from `dict2` into the `dict1` dictionary, making copies of nested dictionaries. >>> d = {1: 'foo', 3: 'baz'} >>> merge(d, {1: 'Foo', 2: 'Bar'}) >>> sorted(d.items()) [(1, 'Foo'), (2, 'Bar'), (3, 'baz')] :param dict1: the dictionary to merge into :param dict2: the dictionary containing the data that should be merged """ for key, val2 in dict2.items(): if val2 is not None: val1 = dict1.get(key) if isinstance(val2, dict): if val1 is None: val1 = {} if isinstance(val1, Alias): val1 = (val1, val2) elif isinstance(val1, tuple): alias, others = val1 others = others.copy() merge(others, val2) val1 = (alias, others) else: val1 = val1.copy() merge(val1, val2) else: val1 = val2 dict1[key] = val1 class Alias: """Representation of an alias in the locale data. An alias is a value that refers to some other part of the locale data, as specified by the `keys`. """ def __init__(self, keys: tuple[str, ...]) -> None: self.keys = tuple(keys) def __repr__(self) -> str: return f"<{type(self).__name__} {self.keys!r}>" def resolve(self, data: Mapping[str | int | None, Any]) -> Mapping[str | int | None, Any]: """Resolve the alias based on the given data. This is done recursively, so if one alias resolves to a second alias, that second alias will also be resolved. :param data: the locale data :type data: `dict` """ base = data for key in self.keys: data = data[key] if isinstance(data, Alias): data = data.resolve(base) elif isinstance(data, tuple): alias, others = data data = alias.resolve(base) return data class LocaleDataDict(abc.MutableMapping): """Dictionary wrapper that automatically resolves aliases to the actual values. """ def __init__(self, data: MutableMapping[str | int | None, Any], base: Mapping[str | int | None, Any] | None = None): self._data = data if base is None: base = data self.base = base def __len__(self) -> int: return len(self._data) def __iter__(self) -> Iterator[str | int | None]: return iter(self._data) def __getitem__(self, key: str | int | None) -> Any: orig = val = self._data[key] if isinstance(val, Alias): # resolve an alias val = val.resolve(self.base) if isinstance(val, tuple): # Merge a partial dict with an alias alias, others = val val = alias.resolve(self.base).copy() merge(val, others) if isinstance(val, dict): # Return a nested alias-resolving dict val = LocaleDataDict(val, base=self.base) if val is not orig: self._data[key] = val return val def __setitem__(self, key: str | int | None, value: Any) -> None: self._data[key] = value def __delitem__(self, key: str | int | None) -> None: del self._data[key] def copy(self) -> LocaleDataDict: return LocaleDataDict(self._data.copy(), base=self.base) babel-2.17.0/babel/localtime/000077500000000000000000000000001474743505200156775ustar00rootroot00000000000000babel-2.17.0/babel/localtime/__init__.py000066400000000000000000000020231474743505200200050ustar00rootroot00000000000000""" babel.localtime ~~~~~~~~~~~~~~~ Babel specific fork of tzlocal to determine the local timezone of the system. :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ import datetime import sys if sys.platform == 'win32': from babel.localtime._win32 import _get_localzone else: from babel.localtime._unix import _get_localzone # TODO(3.0): the offset constants are not part of the public API # and should be removed from babel.localtime._fallback import ( DSTDIFF, # noqa: F401 DSTOFFSET, # noqa: F401 STDOFFSET, # noqa: F401 ZERO, # noqa: F401 _FallbackLocalTimezone, ) def get_localzone() -> datetime.tzinfo: """Returns the current underlying local timezone object. Generally this function does not need to be used, it's a better idea to use the :data:`LOCALTZ` singleton instead. """ return _get_localzone() try: LOCALTZ = get_localzone() except LookupError: LOCALTZ = _FallbackLocalTimezone() babel-2.17.0/babel/localtime/_fallback.py000066400000000000000000000022671474743505200201560ustar00rootroot00000000000000""" babel.localtime._fallback ~~~~~~~~~~~~~~~~~~~~~~~~~ Emulated fallback local timezone when all else fails. :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ import datetime import time STDOFFSET = datetime.timedelta(seconds=-time.timezone) DSTOFFSET = datetime.timedelta(seconds=-time.altzone) if time.daylight else STDOFFSET DSTDIFF = DSTOFFSET - STDOFFSET ZERO = datetime.timedelta(0) class _FallbackLocalTimezone(datetime.tzinfo): def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta: if self._isdst(dt): return DSTOFFSET else: return STDOFFSET def dst(self, dt: datetime.datetime) -> datetime.timedelta: if self._isdst(dt): return DSTDIFF else: return ZERO def tzname(self, dt: datetime.datetime) -> str: return time.tzname[self._isdst(dt)] def _isdst(self, dt: datetime.datetime) -> bool: tt = (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.weekday(), 0, -1) stamp = time.mktime(tt) tt = time.localtime(stamp) return tt.tm_isdst > 0 babel-2.17.0/babel/localtime/_helpers.py000066400000000000000000000032501474743505200200520ustar00rootroot00000000000000try: import pytz except ModuleNotFoundError: pytz = None try: import zoneinfo except ModuleNotFoundError: zoneinfo = None def _get_tzinfo(tzenv: str): """Get the tzinfo from `zoneinfo` or `pytz` :param tzenv: timezone in the form of Continent/City :return: tzinfo object or None if not found """ if pytz: try: return pytz.timezone(tzenv) except pytz.UnknownTimeZoneError: pass else: try: return zoneinfo.ZoneInfo(tzenv) except ValueError as ve: # This is somewhat hacky, but since _validate_tzfile_path() doesn't # raise a specific error type, we'll need to check the message to be # one we know to be from that function. # If so, we pretend it meant that the TZ didn't exist, for the benefit # of `babel.localtime` catching the `LookupError` raised by # `_get_tzinfo_or_raise()`. # See https://github.com/python-babel/babel/issues/1092 if str(ve).startswith("ZoneInfo keys "): return None except zoneinfo.ZoneInfoNotFoundError: pass return None def _get_tzinfo_or_raise(tzenv: str): tzinfo = _get_tzinfo(tzenv) if tzinfo is None: raise LookupError( f"Can not find timezone {tzenv}. \n" "Timezone names are generally in the form `Continent/City`.", ) return tzinfo def _get_tzinfo_from_file(tzfilename: str): with open(tzfilename, 'rb') as tzfile: if pytz: return pytz.tzfile.build_tzinfo('local', tzfile) else: return zoneinfo.ZoneInfo.from_file(tzfile) babel-2.17.0/babel/localtime/_unix.py000066400000000000000000000074761474743505200174110ustar00rootroot00000000000000import datetime import os import re from babel.localtime._helpers import ( _get_tzinfo, _get_tzinfo_from_file, _get_tzinfo_or_raise, ) def _tz_from_env(tzenv: str) -> datetime.tzinfo: if tzenv[0] == ':': tzenv = tzenv[1:] # TZ specifies a file if os.path.exists(tzenv): return _get_tzinfo_from_file(tzenv) # TZ specifies a zoneinfo zone. return _get_tzinfo_or_raise(tzenv) def _get_localzone(_root: str = '/') -> datetime.tzinfo: """Tries to find the local timezone configuration. This method prefers finding the timezone name and passing that to zoneinfo or pytz, over passing in the localtime file, as in the later case the zoneinfo name is unknown. The parameter _root makes the function look for files like /etc/localtime beneath the _root directory. This is primarily used by the tests. In normal usage you call the function without parameters. """ tzenv = os.environ.get('TZ') if tzenv: return _tz_from_env(tzenv) # This is actually a pretty reliable way to test for the local time # zone on operating systems like OS X. On OS X especially this is the # only one that actually works. try: link_dst = os.readlink('/etc/localtime') except OSError: pass else: pos = link_dst.find('/zoneinfo/') if pos >= 0: # On occasion, the `/etc/localtime` symlink has a double slash, e.g. # "/usr/share/zoneinfo//UTC", which would make `zoneinfo.ZoneInfo` # complain (no absolute paths allowed), and we'd end up returning # `None` (as a fix for #1092). # Instead, let's just "fix" the double slash symlink by stripping # leading slashes before passing the assumed zone name forward. zone_name = link_dst[pos + 10:].lstrip("/") tzinfo = _get_tzinfo(zone_name) if tzinfo is not None: return tzinfo # Now look for distribution specific configuration files # that contain the timezone name. tzpath = os.path.join(_root, 'etc/timezone') if os.path.exists(tzpath): with open(tzpath, 'rb') as tzfile: data = tzfile.read() # Issue #3 in tzlocal was that /etc/timezone was a zoneinfo file. # That's a misconfiguration, but we need to handle it gracefully: if data[:5] != b'TZif2': etctz = data.strip().decode() # Get rid of host definitions and comments: if ' ' in etctz: etctz, dummy = etctz.split(' ', 1) if '#' in etctz: etctz, dummy = etctz.split('#', 1) return _get_tzinfo_or_raise(etctz.replace(' ', '_')) # CentOS has a ZONE setting in /etc/sysconfig/clock, # OpenSUSE has a TIMEZONE setting in /etc/sysconfig/clock and # Gentoo has a TIMEZONE setting in /etc/conf.d/clock # We look through these files for a timezone: timezone_re = re.compile(r'\s*(TIME)?ZONE\s*=\s*"(?P.+)"') for filename in ('etc/sysconfig/clock', 'etc/conf.d/clock'): tzpath = os.path.join(_root, filename) if not os.path.exists(tzpath): continue with open(tzpath) as tzfile: for line in tzfile: match = timezone_re.match(line) if match is not None: # We found a timezone etctz = match.group("etctz") return _get_tzinfo_or_raise(etctz.replace(' ', '_')) # No explicit setting existed. Use localtime for filename in ('etc/localtime', 'usr/local/etc/localtime'): tzpath = os.path.join(_root, filename) if not os.path.exists(tzpath): continue return _get_tzinfo_from_file(tzpath) raise LookupError('Can not find any timezone configuration') babel-2.17.0/babel/localtime/_win32.py000066400000000000000000000062131474743505200173540ustar00rootroot00000000000000from __future__ import annotations try: import winreg except ImportError: winreg = None import datetime from typing import Any, Dict, cast from babel.core import get_global from babel.localtime._helpers import _get_tzinfo_or_raise # When building the cldr data on windows this module gets imported. # Because at that point there is no global.dat yet this call will # fail. We want to catch it down in that case then and just assume # the mapping was empty. try: tz_names: dict[str, str] = cast(Dict[str, str], get_global('windows_zone_mapping')) except RuntimeError: tz_names = {} def valuestodict(key) -> dict[str, Any]: """Convert a registry key's values to a dictionary.""" dict = {} size = winreg.QueryInfoKey(key)[1] for i in range(size): data = winreg.EnumValue(key, i) dict[data[0]] = data[1] return dict def get_localzone_name() -> str: # Windows is special. It has unique time zone names (in several # meanings of the word) available, but unfortunately, they can be # translated to the language of the operating system, so we need to # do a backwards lookup, by going through all time zones and see which # one matches. handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) TZLOCALKEYNAME = r'SYSTEM\CurrentControlSet\Control\TimeZoneInformation' localtz = winreg.OpenKey(handle, TZLOCALKEYNAME) keyvalues = valuestodict(localtz) localtz.Close() if 'TimeZoneKeyName' in keyvalues: # Windows 7 (and Vista?) # For some reason this returns a string with loads of NUL bytes at # least on some systems. I don't know if this is a bug somewhere, I # just work around it. tzkeyname = keyvalues['TimeZoneKeyName'].split('\x00', 1)[0] else: # Windows 2000 or XP # This is the localized name: tzwin = keyvalues['StandardName'] # Open the list of timezones to look up the real name: TZKEYNAME = r'SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones' tzkey = winreg.OpenKey(handle, TZKEYNAME) # Now, match this value to Time Zone information tzkeyname = None for i in range(winreg.QueryInfoKey(tzkey)[0]): subkey = winreg.EnumKey(tzkey, i) sub = winreg.OpenKey(tzkey, subkey) data = valuestodict(sub) sub.Close() if data.get('Std', None) == tzwin: tzkeyname = subkey break tzkey.Close() handle.Close() if tzkeyname is None: raise LookupError('Can not find Windows timezone configuration') timezone = tz_names.get(tzkeyname) if timezone is None: # Nope, that didn't work. Try adding 'Standard Time', # it seems to work a lot of times: timezone = tz_names.get(f"{tzkeyname} Standard Time") # Return what we have. if timezone is None: raise LookupError(f"Can not find timezone {tzkeyname}") return timezone def _get_localzone() -> datetime.tzinfo: if winreg is None: raise LookupError( 'Runtime support not available') return _get_tzinfo_or_raise(get_localzone_name()) babel-2.17.0/babel/messages/000077500000000000000000000000001474743505200155355ustar00rootroot00000000000000babel-2.17.0/babel/messages/__init__.py000066400000000000000000000005351474743505200176510ustar00rootroot00000000000000""" babel.messages ~~~~~~~~~~~~~~ Support for ``gettext`` message catalogs. :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ from babel.messages.catalog import ( Catalog, Message, TranslationError, ) __all__ = [ "Catalog", "Message", "TranslationError", ] babel-2.17.0/babel/messages/_compat.py000066400000000000000000000022131474743505200175270ustar00rootroot00000000000000import sys from functools import partial def find_entrypoints(group_name: str): """ Find entrypoints of a given group using either `importlib.metadata` or the older `pkg_resources` mechanism. Yields tuples of the entrypoint name and a callable function that will load the actual entrypoint. """ if sys.version_info >= (3, 10): # "Changed in version 3.10: importlib.metadata is no longer provisional." try: from importlib.metadata import entry_points except ImportError: pass else: eps = entry_points(group=group_name) # Only do this if this implementation of `importlib.metadata` is # modern enough to not return a dict. if not isinstance(eps, dict): for entry_point in eps: yield (entry_point.name, entry_point.load) return try: from pkg_resources import working_set except ImportError: pass else: for entry_point in working_set.iter_entry_points(group_name): yield (entry_point.name, partial(entry_point.load, require=True)) babel-2.17.0/babel/messages/catalog.py000066400000000000000000001116521474743505200175270ustar00rootroot00000000000000""" babel.messages.catalog ~~~~~~~~~~~~~~~~~~~~~~ Data structures for message catalogs. :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import datetime import re from collections.abc import Iterable, Iterator from copy import copy from difflib import SequenceMatcher from email import message_from_string from heapq import nlargest from string import Formatter from typing import TYPE_CHECKING from babel import __version__ as VERSION from babel.core import Locale, UnknownLocaleError from babel.dates import format_datetime from babel.messages.plurals import get_plural from babel.util import LOCALTZ, FixedOffsetTimezone, _cmp, distinct if TYPE_CHECKING: from typing_extensions import TypeAlias _MessageID: TypeAlias = str | tuple[str, ...] | list[str] __all__ = [ 'DEFAULT_HEADER', 'PYTHON_FORMAT', 'Catalog', 'Message', 'TranslationError', ] def get_close_matches(word, possibilities, n=3, cutoff=0.6): """A modified version of ``difflib.get_close_matches``. It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work around https://github.com/python/cpython/issues/90825. """ if not n > 0: # pragma: no cover raise ValueError(f"n must be > 0: {n!r}") if not 0.0 <= cutoff <= 1.0: # pragma: no cover raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff!r}") result = [] s = SequenceMatcher(autojunk=False) # only line changed from difflib.py s.set_seq2(word) for x in possibilities: s.set_seq1(x) if s.real_quick_ratio() >= cutoff and \ s.quick_ratio() >= cutoff and \ s.ratio() >= cutoff: result.append((s.ratio(), x)) # Move the best scorers to head of list result = nlargest(n, result) # Strip scores for the best n matches return [x for score, x in result] PYTHON_FORMAT = re.compile(r''' \% (?:\(([\w]*)\))? ( [-#0\ +]?(?:\*|[\d]+)? (?:\.(?:\*|[\d]+))? [hlL]? ) ([diouxXeEfFgGcrs%]) ''', re.VERBOSE) def _has_python_brace_format(string: str) -> bool: if "{" not in string: return False fmt = Formatter() try: # `fmt.parse` returns 3-or-4-tuples of the form # `(literal_text, field_name, format_spec, conversion)`; # if `field_name` is set, this smells like brace format field_name_seen = False for t in fmt.parse(string): if t[1] is not None: field_name_seen = True # We cannot break here, as we need to consume the whole string # to ensure that it is a valid format string. except ValueError: return False return field_name_seen def _parse_datetime_header(value: str) -> datetime.datetime: match = re.match(r'^(?P.*?)(?P[+-]\d{4})?$', value) dt = datetime.datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M') # Separate the offset into a sign component, hours, and # minutes tzoffset = match.group('tzoffset') if tzoffset is not None: plus_minus_s, rest = tzoffset[0], tzoffset[1:] hours_offset_s, mins_offset_s = rest[:2], rest[2:] # Make them all integers plus_minus = int(f"{plus_minus_s}1") hours_offset = int(hours_offset_s) mins_offset = int(mins_offset_s) # Calculate net offset net_mins_offset = hours_offset * 60 net_mins_offset += mins_offset net_mins_offset *= plus_minus # Create an offset object tzoffset = FixedOffsetTimezone(net_mins_offset) # Store the offset in a datetime object dt = dt.replace(tzinfo=tzoffset) return dt class Message: """Representation of a single message in a catalog.""" def __init__( self, id: _MessageID, string: _MessageID | None = '', locations: Iterable[tuple[str, int]] = (), flags: Iterable[str] = (), auto_comments: Iterable[str] = (), user_comments: Iterable[str] = (), previous_id: _MessageID = (), lineno: int | None = None, context: str | None = None, ) -> None: """Create the message object. :param id: the message ID, or a ``(singular, plural)`` tuple for pluralizable messages :param string: the translated message string, or a ``(singular, plural)`` tuple for pluralizable messages :param locations: a sequence of ``(filename, lineno)`` tuples :param flags: a set or sequence of flags :param auto_comments: a sequence of automatic comments for the message :param user_comments: a sequence of user comments for the message :param previous_id: the previous message ID, or a ``(singular, plural)`` tuple for pluralizable messages :param lineno: the line number on which the msgid line was found in the PO file, if any :param context: the message context """ self.id = id if not string and self.pluralizable: string = ('', '') self.string = string self.locations = list(distinct(locations)) self.flags = set(flags) if id and self.python_format: self.flags.add('python-format') else: self.flags.discard('python-format') if id and self.python_brace_format: self.flags.add('python-brace-format') else: self.flags.discard('python-brace-format') self.auto_comments = list(distinct(auto_comments)) self.user_comments = list(distinct(user_comments)) if isinstance(previous_id, str): self.previous_id = [previous_id] else: self.previous_id = list(previous_id) self.lineno = lineno self.context = context def __repr__(self) -> str: return f"<{type(self).__name__} {self.id!r} (flags: {list(self.flags)!r})>" def __cmp__(self, other: object) -> int: """Compare Messages, taking into account plural ids""" def values_to_compare(obj): if isinstance(obj, Message) and obj.pluralizable: return obj.id[0], obj.context or '' return obj.id, obj.context or '' return _cmp(values_to_compare(self), values_to_compare(other)) def __gt__(self, other: object) -> bool: return self.__cmp__(other) > 0 def __lt__(self, other: object) -> bool: return self.__cmp__(other) < 0 def __ge__(self, other: object) -> bool: return self.__cmp__(other) >= 0 def __le__(self, other: object) -> bool: return self.__cmp__(other) <= 0 def __eq__(self, other: object) -> bool: return self.__cmp__(other) == 0 def __ne__(self, other: object) -> bool: return self.__cmp__(other) != 0 def is_identical(self, other: Message) -> bool: """Checks whether messages are identical, taking into account all properties. """ assert isinstance(other, Message) return self.__dict__ == other.__dict__ def clone(self) -> Message: return Message(*map(copy, (self.id, self.string, self.locations, self.flags, self.auto_comments, self.user_comments, self.previous_id, self.lineno, self.context))) def check(self, catalog: Catalog | None = None) -> list[TranslationError]: """Run various validation checks on the message. Some validations are only performed if the catalog is provided. This method returns a sequence of `TranslationError` objects. :rtype: ``iterator`` :param catalog: A catalog instance that is passed to the checkers :see: `Catalog.check` for a way to perform checks for all messages in a catalog. """ from babel.messages.checkers import checkers errors: list[TranslationError] = [] for checker in checkers: try: checker(catalog, self) except TranslationError as e: errors.append(e) return errors @property def fuzzy(self) -> bool: """Whether the translation is fuzzy. >>> Message('foo').fuzzy False >>> msg = Message('foo', 'foo', flags=['fuzzy']) >>> msg.fuzzy True >>> msg :type: `bool`""" return 'fuzzy' in self.flags @property def pluralizable(self) -> bool: """Whether the message is plurizable. >>> Message('foo').pluralizable False >>> Message(('foo', 'bar')).pluralizable True :type: `bool`""" return isinstance(self.id, (list, tuple)) @property def python_format(self) -> bool: """Whether the message contains Python-style parameters. >>> Message('foo %(name)s bar').python_format True >>> Message(('foo %(name)s', 'foo %(name)s')).python_format True :type: `bool`""" ids = self.id if not isinstance(ids, (list, tuple)): ids = [ids] return any(PYTHON_FORMAT.search(id) for id in ids) @property def python_brace_format(self) -> bool: """Whether the message contains Python f-string parameters. >>> Message('Hello, {name}!').python_brace_format True >>> Message(('One apple', '{count} apples')).python_brace_format True :type: `bool`""" ids = self.id if not isinstance(ids, (list, tuple)): ids = [ids] return any(_has_python_brace_format(id) for id in ids) class TranslationError(Exception): """Exception thrown by translation checkers when invalid message translations are encountered.""" DEFAULT_HEADER = """\ # Translations template for PROJECT. # Copyright (C) YEAR ORGANIZATION # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , YEAR. #""" def parse_separated_header(value: str) -> dict[str, str]: # Adapted from https://peps.python.org/pep-0594/#cgi from email.message import Message m = Message() m['content-type'] = value return dict(m.get_params()) def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') -> str: if isinstance(s, str): return s if isinstance(s, bytes): return s.decode(encoding, errors) return str(s) class Catalog: """Representation of a message catalog.""" def __init__( self, locale: Locale | str | None = None, domain: str | None = None, header_comment: str | None = DEFAULT_HEADER, project: str | None = None, version: str | None = None, copyright_holder: str | None = None, msgid_bugs_address: str | None = None, creation_date: datetime.datetime | str | None = None, revision_date: datetime.datetime | datetime.time | float | str | None = None, last_translator: str | None = None, language_team: str | None = None, charset: str | None = None, fuzzy: bool = True, ) -> None: """Initialize the catalog object. :param locale: the locale identifier or `Locale` object, or `None` if the catalog is not bound to a locale (which basically means it's a template) :param domain: the message domain :param header_comment: the header comment as string, or `None` for the default header :param project: the project's name :param version: the project's version :param copyright_holder: the copyright holder of the catalog :param msgid_bugs_address: the email address or URL to submit bug reports to :param creation_date: the date the catalog was created :param revision_date: the date the catalog was revised :param last_translator: the name and email of the last translator :param language_team: the name and email of the language team :param charset: the encoding to use in the output (defaults to utf-8) :param fuzzy: the fuzzy bit on the catalog header """ self.domain = domain self.locale = locale self._header_comment = header_comment self._messages: dict[str | tuple[str, str], Message] = {} self.project = project or 'PROJECT' self.version = version or 'VERSION' self.copyright_holder = copyright_holder or 'ORGANIZATION' self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS' self.last_translator = last_translator or 'FULL NAME ' """Name and email address of the last translator.""" self.language_team = language_team or 'LANGUAGE ' """Name and email address of the language team.""" self.charset = charset or 'utf-8' if creation_date is None: creation_date = datetime.datetime.now(LOCALTZ) elif isinstance(creation_date, datetime.datetime) and not creation_date.tzinfo: creation_date = creation_date.replace(tzinfo=LOCALTZ) self.creation_date = creation_date if revision_date is None: revision_date = 'YEAR-MO-DA HO:MI+ZONE' elif isinstance(revision_date, datetime.datetime) and not revision_date.tzinfo: revision_date = revision_date.replace(tzinfo=LOCALTZ) self.revision_date = revision_date self.fuzzy = fuzzy # Dictionary of obsolete messages self.obsolete: dict[str | tuple[str, str], Message] = {} self._num_plurals = None self._plural_expr = None def _set_locale(self, locale: Locale | str | None) -> None: if locale is None: self._locale_identifier = None self._locale = None return if isinstance(locale, Locale): self._locale_identifier = str(locale) self._locale = locale return if isinstance(locale, str): self._locale_identifier = str(locale) try: self._locale = Locale.parse(locale) except UnknownLocaleError: self._locale = None return raise TypeError(f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}") def _get_locale(self) -> Locale | None: return self._locale def _get_locale_identifier(self) -> str | None: return self._locale_identifier locale = property(_get_locale, _set_locale) locale_identifier = property(_get_locale_identifier) def _get_header_comment(self) -> str: comment = self._header_comment year = datetime.datetime.now(LOCALTZ).strftime('%Y') if hasattr(self.revision_date, 'strftime'): year = self.revision_date.strftime('%Y') comment = comment.replace('PROJECT', self.project) \ .replace('VERSION', self.version) \ .replace('YEAR', year) \ .replace('ORGANIZATION', self.copyright_holder) locale_name = (self.locale.english_name if self.locale else self.locale_identifier) if locale_name: comment = comment.replace("Translations template", f"{locale_name} translations") return comment def _set_header_comment(self, string: str | None) -> None: self._header_comment = string header_comment = property(_get_header_comment, _set_header_comment, doc="""\ The header comment for the catalog. >>> catalog = Catalog(project='Foobar', version='1.0', ... copyright_holder='Foo Company') >>> print(catalog.header_comment) #doctest: +ELLIPSIS # Translations template for Foobar. # Copyright (C) ... Foo Company # This file is distributed under the same license as the Foobar project. # FIRST AUTHOR , .... # The header can also be set from a string. Any known upper-case variables will be replaced when the header is retrieved again: >>> catalog = Catalog(project='Foobar', version='1.0', ... copyright_holder='Foo Company') >>> catalog.header_comment = '''\\ ... # The POT for my really cool PROJECT project. ... # Copyright (C) 1990-2003 ORGANIZATION ... # This file is distributed under the same license as the PROJECT ... # project. ... #''' >>> print(catalog.header_comment) # The POT for my really cool Foobar project. # Copyright (C) 1990-2003 Foo Company # This file is distributed under the same license as the Foobar # project. # :type: `unicode` """) def _get_mime_headers(self) -> list[tuple[str, str]]: if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)): revision_date = format_datetime(self.revision_date, 'yyyy-MM-dd HH:mmZ', locale='en') else: revision_date = self.revision_date language_team = self.language_team if self.locale_identifier and 'LANGUAGE' in language_team: language_team = language_team.replace('LANGUAGE', str(self.locale_identifier)) headers: list[tuple[str, str]] = [ ("Project-Id-Version", f"{self.project} {self.version}"), ('Report-Msgid-Bugs-To', self.msgid_bugs_address), ('POT-Creation-Date', format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', locale='en')), ('PO-Revision-Date', revision_date), ('Last-Translator', self.last_translator), ] if self.locale_identifier: headers.append(('Language', str(self.locale_identifier))) headers.append(('Language-Team', language_team)) if self.locale is not None: headers.append(('Plural-Forms', self.plural_forms)) headers += [ ('MIME-Version', '1.0'), ("Content-Type", f"text/plain; charset={self.charset}"), ('Content-Transfer-Encoding', '8bit'), ("Generated-By", f"Babel {VERSION}\n"), ] return headers def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None: for name, value in headers: name = _force_text(name.lower(), encoding=self.charset) value = _force_text(value, encoding=self.charset) if name == 'project-id-version': parts = value.split(' ') self.project = ' '.join(parts[:-1]) self.version = parts[-1] elif name == 'report-msgid-bugs-to': self.msgid_bugs_address = value elif name == 'last-translator': self.last_translator = value elif name == 'language': value = value.replace('-', '_') # The `or None` makes sure that the locale is set to None # if the header's value is an empty string, which is what # some tools generate (instead of eliding the empty Language # header altogether). self._set_locale(value or None) elif name == 'language-team': self.language_team = value elif name == 'content-type': params = parse_separated_header(value) if 'charset' in params: self.charset = params['charset'].lower() elif name == 'plural-forms': params = parse_separated_header(f" ;{value}") self._num_plurals = int(params.get('nplurals', 2)) self._plural_expr = params.get('plural', '(n != 1)') elif name == 'pot-creation-date': self.creation_date = _parse_datetime_header(value) elif name == 'po-revision-date': # Keep the value if it's not the default one if 'YEAR' not in value: self.revision_date = _parse_datetime_header(value) mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ The MIME headers of the catalog, used for the special ``msgid ""`` entry. The behavior of this property changes slightly depending on whether a locale is set or not, the latter indicating that the catalog is actually a template for actual translations. Here's an example of the output for such a catalog template: >>> from babel.dates import UTC >>> from datetime import datetime >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC) >>> catalog = Catalog(project='Foobar', version='1.0', ... creation_date=created) >>> for name, value in catalog.mime_headers: ... print('%s: %s' % (name, value)) Project-Id-Version: Foobar 1.0 Report-Msgid-Bugs-To: EMAIL@ADDRESS POT-Creation-Date: 1990-04-01 15:30+0000 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE Last-Translator: FULL NAME Language-Team: LANGUAGE MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Generated-By: Babel ... And here's an example of the output when the locale is set: >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC) >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0', ... creation_date=created, revision_date=revised, ... last_translator='John Doe ', ... language_team='de_DE ') >>> for name, value in catalog.mime_headers: ... print('%s: %s' % (name, value)) Project-Id-Version: Foobar 1.0 Report-Msgid-Bugs-To: EMAIL@ADDRESS POT-Creation-Date: 1990-04-01 15:30+0000 PO-Revision-Date: 1990-08-03 12:00+0000 Last-Translator: John Doe Language: de_DE Language-Team: de_DE Plural-Forms: nplurals=2; plural=(n != 1); MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Generated-By: Babel ... :type: `list` """) @property def num_plurals(self) -> int: """The number of plurals used by the catalog or locale. >>> Catalog(locale='en').num_plurals 2 >>> Catalog(locale='ga').num_plurals 5 :type: `int`""" if self._num_plurals is None: num = 2 if self.locale: num = get_plural(self.locale)[0] self._num_plurals = num return self._num_plurals @property def plural_expr(self) -> str: """The plural expression used by the catalog or locale. >>> Catalog(locale='en').plural_expr '(n != 1)' >>> Catalog(locale='ga').plural_expr '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)' >>> Catalog(locale='ding').plural_expr # unknown locale '(n != 1)' :type: `str`""" if self._plural_expr is None: expr = '(n != 1)' if self.locale: expr = get_plural(self.locale)[1] self._plural_expr = expr return self._plural_expr @property def plural_forms(self) -> str: """Return the plural forms declaration for the locale. >>> Catalog(locale='en').plural_forms 'nplurals=2; plural=(n != 1);' >>> Catalog(locale='pt_BR').plural_forms 'nplurals=2; plural=(n > 1);' :type: `str`""" return f"nplurals={self.num_plurals}; plural={self.plural_expr};" def __contains__(self, id: _MessageID) -> bool: """Return whether the catalog has a message with the specified ID.""" return self._key_for(id) in self._messages def __len__(self) -> int: """The number of messages in the catalog. This does not include the special ``msgid ""`` entry.""" return len(self._messages) def __iter__(self) -> Iterator[Message]: """Iterates through all the entries in the catalog, in the order they were added, yielding a `Message` object for every entry. :rtype: ``iterator``""" buf = [] for name, value in self.mime_headers: buf.append(f"{name}: {value}") flags = set() if self.fuzzy: flags |= {'fuzzy'} yield Message('', '\n'.join(buf), flags=flags) for key in self._messages: yield self._messages[key] def __repr__(self) -> str: locale = '' if self.locale: locale = f" {self.locale}" return f"<{type(self).__name__} {self.domain!r}{locale}>" def __delitem__(self, id: _MessageID) -> None: """Delete the message with the specified ID.""" self.delete(id) def __getitem__(self, id: _MessageID) -> Message: """Return the message with the specified ID. :param id: the message ID """ return self.get(id) def __setitem__(self, id: _MessageID, message: Message) -> None: """Add or update the message with the specified ID. >>> catalog = Catalog() >>> catalog[u'foo'] = Message(u'foo') >>> catalog[u'foo'] If a message with that ID is already in the catalog, it is updated to include the locations and flags of the new message. >>> catalog = Catalog() >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)]) >>> catalog[u'foo'].locations [('main.py', 1)] >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)]) >>> catalog[u'foo'].locations [('main.py', 1), ('utils.py', 5)] :param id: the message ID :param message: the `Message` object """ assert isinstance(message, Message), 'expected a Message object' key = self._key_for(id, message.context) current = self._messages.get(key) if current: if message.pluralizable and not current.pluralizable: # The new message adds pluralization current.id = message.id current.string = message.string current.locations = list(distinct(current.locations + message.locations)) current.auto_comments = list(distinct(current.auto_comments + message.auto_comments)) current.user_comments = list(distinct(current.user_comments + message.user_comments)) current.flags |= message.flags elif id == '': # special treatment for the header message self.mime_headers = message_from_string(message.string).items() self.header_comment = "\n".join([f"# {c}".rstrip() for c in message.user_comments]) self.fuzzy = message.fuzzy else: if isinstance(id, (list, tuple)): assert isinstance(message.string, (list, tuple)), \ f"Expected sequence but got {type(message.string)}" self._messages[key] = message def add( self, id: _MessageID, string: _MessageID | None = None, locations: Iterable[tuple[str, int]] = (), flags: Iterable[str] = (), auto_comments: Iterable[str] = (), user_comments: Iterable[str] = (), previous_id: _MessageID = (), lineno: int | None = None, context: str | None = None, ) -> Message: """Add or update the message with the specified ID. >>> catalog = Catalog() >>> catalog.add(u'foo') >>> catalog[u'foo'] This method simply constructs a `Message` object with the given arguments and invokes `__setitem__` with that object. :param id: the message ID, or a ``(singular, plural)`` tuple for pluralizable messages :param string: the translated message string, or a ``(singular, plural)`` tuple for pluralizable messages :param locations: a sequence of ``(filename, lineno)`` tuples :param flags: a set or sequence of flags :param auto_comments: a sequence of automatic comments :param user_comments: a sequence of user comments :param previous_id: the previous message ID, or a ``(singular, plural)`` tuple for pluralizable messages :param lineno: the line number on which the msgid line was found in the PO file, if any :param context: the message context """ message = Message(id, string, list(locations), flags, auto_comments, user_comments, previous_id, lineno=lineno, context=context) self[id] = message return message def check(self) -> Iterable[tuple[Message, list[TranslationError]]]: """Run various validation checks on the translations in the catalog. For every message which fails validation, this method yield a ``(message, errors)`` tuple, where ``message`` is the `Message` object and ``errors`` is a sequence of `TranslationError` objects. :rtype: ``generator`` of ``(message, errors)`` """ for message in self._messages.values(): errors = message.check(catalog=self) if errors: yield message, errors def get(self, id: _MessageID, context: str | None = None) -> Message | None: """Return the message with the specified ID and context. :param id: the message ID :param context: the message context, or ``None`` for no context """ return self._messages.get(self._key_for(id, context)) def delete(self, id: _MessageID, context: str | None = None) -> None: """Delete the message with the specified ID and context. :param id: the message ID :param context: the message context, or ``None`` for no context """ key = self._key_for(id, context) if key in self._messages: del self._messages[key] def update( self, template: Catalog, no_fuzzy_matching: bool = False, update_header_comment: bool = False, keep_user_comments: bool = True, update_creation_date: bool = True, ) -> None: """Update the catalog based on the given template catalog. >>> from babel.messages import Catalog >>> template = Catalog() >>> template.add('green', locations=[('main.py', 99)]) >>> template.add('blue', locations=[('main.py', 100)]) >>> template.add(('salad', 'salads'), locations=[('util.py', 42)]) >>> catalog = Catalog(locale='de_DE') >>> catalog.add('blue', u'blau', locations=[('main.py', 98)]) >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), ... locations=[('util.py', 38)]) >>> catalog.update(template) >>> len(catalog) 3 >>> msg1 = catalog['green'] >>> msg1.string >>> msg1.locations [('main.py', 99)] >>> msg2 = catalog['blue'] >>> msg2.string u'blau' >>> msg2.locations [('main.py', 100)] >>> msg3 = catalog['salad'] >>> msg3.string (u'Salat', u'Salate') >>> msg3.locations [('util.py', 42)] Messages that are in the catalog but not in the template are removed from the main collection, but can still be accessed via the `obsolete` member: >>> 'head' in catalog False >>> list(catalog.obsolete.values()) [] :param template: the reference catalog, usually read from a POT file :param no_fuzzy_matching: whether to use fuzzy matching of message IDs :param update_header_comment: whether to copy the header comment from the template :param keep_user_comments: whether to keep user comments from the old catalog :param update_creation_date: whether to copy the creation date from the template """ messages = self._messages remaining = messages.copy() self._messages = {} # Prepare for fuzzy matching fuzzy_candidates = {} if not no_fuzzy_matching: for msgid in messages: if msgid and messages[msgid].string: key = self._key_for(msgid) ctxt = messages[msgid].context fuzzy_candidates[self._to_fuzzy_match_key(key)] = (key, ctxt) fuzzy_matches = set() def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None: message = message.clone() fuzzy = False if oldkey != newkey: fuzzy = True fuzzy_matches.add(oldkey) oldmsg = messages.get(oldkey) assert oldmsg is not None if isinstance(oldmsg.id, str): message.previous_id = [oldmsg.id] else: message.previous_id = list(oldmsg.id) else: oldmsg = remaining.pop(oldkey, None) assert oldmsg is not None message.string = oldmsg.string if keep_user_comments: message.user_comments = list(distinct(oldmsg.user_comments)) if isinstance(message.id, (list, tuple)): if not isinstance(message.string, (list, tuple)): fuzzy = True message.string = tuple( [message.string] + ([''] * (len(message.id) - 1)), ) elif len(message.string) != self.num_plurals: fuzzy = True message.string = tuple(message.string[:len(oldmsg.string)]) elif isinstance(message.string, (list, tuple)): fuzzy = True message.string = message.string[0] message.flags |= oldmsg.flags if fuzzy: message.flags |= {'fuzzy'} self[message.id] = message for message in template: if message.id: key = self._key_for(message.id, message.context) if key in messages: _merge(message, key, key) else: if not no_fuzzy_matching: # do some fuzzy matching with difflib matches = get_close_matches( self._to_fuzzy_match_key(key), fuzzy_candidates.keys(), 1, ) if matches: modified_key = matches[0] newkey, newctxt = fuzzy_candidates[modified_key] if newctxt is not None: newkey = newkey, newctxt _merge(message, newkey, key) continue self[message.id] = message for msgid in remaining: if no_fuzzy_matching or msgid not in fuzzy_matches: self.obsolete[msgid] = remaining[msgid] if update_header_comment: # Allow the updated catalog's header to be rewritten based on the # template's header self.header_comment = template.header_comment # Make updated catalog's POT-Creation-Date equal to the template # used to update the catalog if update_creation_date: self.creation_date = template.creation_date def _to_fuzzy_match_key(self, key: tuple[str, str] | str) -> str: """Converts a message key to a string suitable for fuzzy matching.""" if isinstance(key, tuple): matchkey = key[0] # just the msgid, no context else: matchkey = key return matchkey.lower().strip() def _key_for(self, id: _MessageID, context: str | None = None) -> tuple[str, str] | str: """The key for a message is just the singular ID even for pluralizable messages, but is a ``(msgid, msgctxt)`` tuple for context-specific messages. """ key = id if isinstance(key, (list, tuple)): key = id[0] if context is not None: key = (key, context) return key def is_identical(self, other: Catalog) -> bool: """Checks if catalogs are identical, taking into account messages and headers. """ assert isinstance(other, Catalog) for key in self._messages.keys() | other._messages.keys(): message_1 = self.get(key) message_2 = other.get(key) if ( message_1 is None or message_2 is None or not message_1.is_identical(message_2) ): return False return dict(self.mime_headers) == dict(other.mime_headers) babel-2.17.0/babel/messages/checkers.py000066400000000000000000000141131474743505200176760ustar00rootroot00000000000000""" babel.messages.checkers ~~~~~~~~~~~~~~~~~~~~~~~ Various routines that help with validation of translations. :since: version 0.9 :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import annotations from collections.abc import Callable from babel.messages.catalog import PYTHON_FORMAT, Catalog, Message, TranslationError #: list of format chars that are compatible to each other _string_format_compatibilities = [ {'i', 'd', 'u'}, {'x', 'X'}, {'f', 'F', 'g', 'G'}, ] def num_plurals(catalog: Catalog | None, message: Message) -> None: """Verify the number of plurals in the translation.""" if not message.pluralizable: if not isinstance(message.string, str): raise TranslationError("Found plural forms for non-pluralizable " "message") return # skip further tests if no catalog is provided. elif catalog is None: return msgstrs = message.string if not isinstance(msgstrs, (list, tuple)): msgstrs = (msgstrs,) if len(msgstrs) != catalog.num_plurals: raise TranslationError("Wrong number of plural forms (expected %d)" % catalog.num_plurals) def python_format(catalog: Catalog | None, message: Message) -> None: """Verify the format string placeholders in the translation.""" if 'python-format' not in message.flags: return msgids = message.id if not isinstance(msgids, (list, tuple)): msgids = (msgids,) msgstrs = message.string if not isinstance(msgstrs, (list, tuple)): msgstrs = (msgstrs,) for msgid, msgstr in zip(msgids, msgstrs): if msgstr: _validate_format(msgid, msgstr) def _validate_format(format: str, alternative: str) -> None: """Test format string `alternative` against `format`. `format` can be the msgid of a message and `alternative` one of the `msgstr`\\s. The two arguments are not interchangeable as `alternative` may contain less placeholders if `format` uses named placeholders. If the string formatting of `alternative` is compatible to `format` the function returns `None`, otherwise a `TranslationError` is raised. Examples for compatible format strings: >>> _validate_format('Hello %s!', 'Hallo %s!') >>> _validate_format('Hello %i!', 'Hallo %d!') Example for an incompatible format strings: >>> _validate_format('Hello %(name)s!', 'Hallo %s!') Traceback (most recent call last): ... TranslationError: the format strings are of different kinds This function is used by the `python_format` checker. :param format: The original format string :param alternative: The alternative format string that should be checked against format :raises TranslationError: on formatting errors """ def _parse(string: str) -> list[tuple[str, str]]: result: list[tuple[str, str]] = [] for match in PYTHON_FORMAT.finditer(string): name, format, typechar = match.groups() if typechar == '%' and name is None: continue result.append((name, str(typechar))) return result def _compatible(a: str, b: str) -> bool: if a == b: return True for set in _string_format_compatibilities: if a in set and b in set: return True return False def _check_positional(results: list[tuple[str, str]]) -> bool: positional = None for name, _char in results: if positional is None: positional = name is None else: if (name is None) != positional: raise TranslationError('format string mixes positional ' 'and named placeholders') return bool(positional) a, b = map(_parse, (format, alternative)) if not a: return # now check if both strings are positional or named a_positional, b_positional = map(_check_positional, (a, b)) if a_positional and not b_positional and not b: raise TranslationError('placeholders are incompatible') elif a_positional != b_positional: raise TranslationError('the format strings are of different kinds') # if we are operating on positional strings both must have the # same number of format chars and those must be compatible if a_positional: if len(a) != len(b): raise TranslationError('positional format placeholders are ' 'unbalanced') for idx, ((_, first), (_, second)) in enumerate(zip(a, b)): if not _compatible(first, second): raise TranslationError('incompatible format for placeholder ' '%d: %r and %r are not compatible' % (idx + 1, first, second)) # otherwise the second string must not have names the first one # doesn't have and the types of those included must be compatible else: type_map = dict(a) for name, typechar in b: if name not in type_map: raise TranslationError(f'unknown named placeholder {name!r}') elif not _compatible(typechar, type_map[name]): raise TranslationError( f'incompatible format for placeholder {name!r}: ' f'{typechar!r} and {type_map[name]!r} are not compatible', ) def _find_checkers() -> list[Callable[[Catalog | None, Message], object]]: from babel.messages._compat import find_entrypoints checkers: list[Callable[[Catalog | None, Message], object]] = [] checkers.extend(load() for (name, load) in find_entrypoints('babel.checkers')) if len(checkers) == 0: # if entrypoints are not available or no usable egg-info was found # (see #230), just resort to hard-coded checkers return [num_plurals, python_format] return checkers checkers: list[Callable[[Catalog | None, Message], object]] = _find_checkers() babel-2.17.0/babel/messages/extract.py000066400000000000000000001027421474743505200175670ustar00rootroot00000000000000""" babel.messages.extract ~~~~~~~~~~~~~~~~~~~~~~ Basic infrastructure for extracting localizable messages from source files. This module defines an extensible system for collecting localizable message strings from a variety of sources. A native extractor for Python source files is builtin, extractors for other sources can be added using very simple plugins. The main entry points into the extraction functionality are the functions `extract_from_dir` and `extract_from_file`. :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import ast import io import os import sys import tokenize from collections.abc import ( Callable, Collection, Generator, Iterable, Mapping, MutableSequence, ) from functools import lru_cache from os.path import relpath from textwrap import dedent from tokenize import COMMENT, NAME, NL, OP, STRING, generate_tokens from typing import TYPE_CHECKING, Any, TypedDict from babel.messages._compat import find_entrypoints from babel.util import parse_encoding, parse_future_flags, pathmatch if TYPE_CHECKING: from typing import IO, Final, Protocol from _typeshed import SupportsItems, SupportsRead, SupportsReadline from typing_extensions import TypeAlias class _PyOptions(TypedDict, total=False): encoding: str class _JSOptions(TypedDict, total=False): encoding: str jsx: bool template_string: bool parse_template_string: bool class _FileObj(SupportsRead[bytes], SupportsReadline[bytes], Protocol): def seek(self, __offset: int, __whence: int = ...) -> int: ... def tell(self) -> int: ... _SimpleKeyword: TypeAlias = tuple[int | tuple[int, int] | tuple[int, str], ...] | None _Keyword: TypeAlias = dict[int | None, _SimpleKeyword] | _SimpleKeyword # 5-tuple of (filename, lineno, messages, comments, context) _FileExtractionResult: TypeAlias = tuple[str, int, str | tuple[str, ...], list[str], str | None] # 4-tuple of (lineno, message, comments, context) _ExtractionResult: TypeAlias = tuple[int, str | tuple[str, ...], list[str], str | None] # Required arguments: fileobj, keywords, comment_tags, options # Return value: Iterable of (lineno, message, comments, context) _CallableExtractionMethod: TypeAlias = Callable[ [_FileObj | IO[bytes], Mapping[str, _Keyword], Collection[str], Mapping[str, Any]], Iterable[_ExtractionResult], ] _ExtractionMethod: TypeAlias = _CallableExtractionMethod | str GROUP_NAME: Final[str] = 'babel.extractors' DEFAULT_KEYWORDS: dict[str, _Keyword] = { '_': None, 'gettext': None, 'ngettext': (1, 2), 'ugettext': None, 'ungettext': (1, 2), 'dgettext': (2,), 'dngettext': (2, 3), 'N_': None, 'pgettext': ((1, 'c'), 2), 'npgettext': ((1, 'c'), 2, 3), } DEFAULT_MAPPING: list[tuple[str, str]] = [('**.py', 'python')] # New tokens in Python 3.12, or None on older versions FSTRING_START = getattr(tokenize, "FSTRING_START", None) FSTRING_MIDDLE = getattr(tokenize, "FSTRING_MIDDLE", None) FSTRING_END = getattr(tokenize, "FSTRING_END", None) def _strip_comment_tags(comments: MutableSequence[str], tags: Iterable[str]): """Helper function for `extract` that strips comment tags from strings in a list of comment lines. This functions operates in-place. """ def _strip(line: str): for tag in tags: if line.startswith(tag): return line[len(tag):].strip() return line comments[:] = map(_strip, comments) def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool: subdir = os.path.basename(dirpath) # Legacy default behavior: ignore dot and underscore directories return not (subdir.startswith('.') or subdir.startswith('_')) def extract_from_dir( dirname: str | os.PathLike[str] | None = None, method_map: Iterable[tuple[str, str]] = DEFAULT_MAPPING, options_map: SupportsItems[str, dict[str, Any]] | None = None, keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS, comment_tags: Collection[str] = (), callback: Callable[[str, str, dict[str, Any]], object] | None = None, strip_comment_tags: bool = False, directory_filter: Callable[[str], bool] | None = None, ) -> Generator[_FileExtractionResult, None, None]: """Extract messages from any source files found in the given directory. This function generates tuples of the form ``(filename, lineno, message, comments, context)``. Which extraction method is used per file is determined by the `method_map` parameter, which maps extended glob patterns to extraction method names. For example, the following is the default mapping: >>> method_map = [ ... ('**.py', 'python') ... ] This basically says that files with the filename extension ".py" at any level inside the directory should be processed by the "python" extraction method. Files that don't match any of the mapping patterns are ignored. See the documentation of the `pathmatch` function for details on the pattern syntax. The following extended mapping would also use the "genshi" extraction method on any file in "templates" subdirectory: >>> method_map = [ ... ('**/templates/**.*', 'genshi'), ... ('**.py', 'python') ... ] The dictionary provided by the optional `options_map` parameter augments these mappings. It uses extended glob patterns as keys, and the values are dictionaries mapping options names to option values (both strings). The glob patterns of the `options_map` do not necessarily need to be the same as those used in the method mapping. For example, while all files in the ``templates`` folders in an application may be Genshi applications, the options for those files may differ based on extension: >>> options_map = { ... '**/templates/**.txt': { ... 'template_class': 'genshi.template:TextTemplate', ... 'encoding': 'latin-1' ... }, ... '**/templates/**.html': { ... 'include_attrs': '' ... } ... } :param dirname: the path to the directory to extract messages from. If not given the current working directory is used. :param method_map: a list of ``(pattern, method)`` tuples that maps of extraction method names to extended glob patterns :param options_map: a dictionary of additional options (optional) :param keywords: a dictionary mapping keywords (i.e. names of functions that should be recognized as translation functions) to tuples that specify which of their arguments contain localizable strings :param comment_tags: a list of tags of translator comments to search for and include in the results :param callback: a function that is called for every file that message are extracted from, just before the extraction itself is performed; the function is passed the filename, the name of the extraction method and and the options dictionary as positional arguments, in that order :param strip_comment_tags: a flag that if set to `True` causes all comment tags to be removed from the collected comments. :param directory_filter: a callback to determine whether a directory should be recursed into. Receives the full directory path; should return True if the directory is valid. :see: `pathmatch` """ if dirname is None: dirname = os.getcwd() if options_map is None: options_map = {} if directory_filter is None: directory_filter = default_directory_filter absname = os.path.abspath(dirname) for root, dirnames, filenames in os.walk(absname): dirnames[:] = [ subdir for subdir in dirnames if directory_filter(os.path.join(root, subdir)) ] dirnames.sort() filenames.sort() for filename in filenames: filepath = os.path.join(root, filename).replace(os.sep, '/') yield from check_and_call_extract_file( filepath, method_map, options_map, callback, keywords, comment_tags, strip_comment_tags, dirpath=absname, ) def check_and_call_extract_file( filepath: str | os.PathLike[str], method_map: Iterable[tuple[str, str]], options_map: SupportsItems[str, dict[str, Any]], callback: Callable[[str, str, dict[str, Any]], object] | None, keywords: Mapping[str, _Keyword], comment_tags: Collection[str], strip_comment_tags: bool, dirpath: str | os.PathLike[str] | None = None, ) -> Generator[_FileExtractionResult, None, None]: """Checks if the given file matches an extraction method mapping, and if so, calls extract_from_file. Note that the extraction method mappings are based relative to dirpath. So, given an absolute path to a file `filepath`, we want to check using just the relative path from `dirpath` to `filepath`. Yields 5-tuples (filename, lineno, messages, comments, context). :param filepath: An absolute path to a file that exists. :param method_map: a list of ``(pattern, method)`` tuples that maps of extraction method names to extended glob patterns :param options_map: a dictionary of additional options (optional) :param callback: a function that is called for every file that message are extracted from, just before the extraction itself is performed; the function is passed the filename, the name of the extraction method and and the options dictionary as positional arguments, in that order :param keywords: a dictionary mapping keywords (i.e. names of functions that should be recognized as translation functions) to tuples that specify which of their arguments contain localizable strings :param comment_tags: a list of tags of translator comments to search for and include in the results :param strip_comment_tags: a flag that if set to `True` causes all comment tags to be removed from the collected comments. :param dirpath: the path to the directory to extract messages from. :return: iterable of 5-tuples (filename, lineno, messages, comments, context) :rtype: Iterable[tuple[str, int, str|tuple[str], list[str], str|None] """ # filename is the relative path from dirpath to the actual file filename = relpath(filepath, dirpath) for pattern, method in method_map: if not pathmatch(pattern, filename): continue options = {} for opattern, odict in options_map.items(): if pathmatch(opattern, filename): options = odict break if callback: callback(filename, method, options) for message_tuple in extract_from_file( method, filepath, keywords=keywords, comment_tags=comment_tags, options=options, strip_comment_tags=strip_comment_tags, ): yield (filename, *message_tuple) break def extract_from_file( method: _ExtractionMethod, filename: str | os.PathLike[str], keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS, comment_tags: Collection[str] = (), options: Mapping[str, Any] | None = None, strip_comment_tags: bool = False, ) -> list[_ExtractionResult]: """Extract messages from a specific file. This function returns a list of tuples of the form ``(lineno, message, comments, context)``. :param filename: the path to the file to extract messages from :param method: a string specifying the extraction method (.e.g. "python") :param keywords: a dictionary mapping keywords (i.e. names of functions that should be recognized as translation functions) to tuples that specify which of their arguments contain localizable strings :param comment_tags: a list of translator tags to search for and include in the results :param strip_comment_tags: a flag that if set to `True` causes all comment tags to be removed from the collected comments. :param options: a dictionary of additional options (optional) :returns: list of tuples of the form ``(lineno, message, comments, context)`` :rtype: list[tuple[int, str|tuple[str], list[str], str|None] """ if method == 'ignore': return [] with open(filename, 'rb') as fileobj: return list(extract(method, fileobj, keywords, comment_tags, options, strip_comment_tags)) def _match_messages_against_spec( lineno: int, messages: list[str | None], comments: list[str], fileobj: _FileObj, spec: tuple[int | tuple[int, str], ...], ): translatable = [] context = None # last_index is 1 based like the keyword spec last_index = len(messages) for index in spec: if isinstance(index, tuple): # (n, 'c') context = messages[index[0] - 1] continue if last_index < index: # Not enough arguments return message = messages[index - 1] if message is None: return translatable.append(message) # keyword spec indexes are 1 based, therefore '-1' if isinstance(spec[0], tuple): # context-aware *gettext method first_msg_index = spec[1] - 1 else: first_msg_index = spec[0] - 1 # An empty string msgid isn't valid, emit a warning if not messages[first_msg_index]: filename = (getattr(fileobj, "name", None) or "(unknown)") sys.stderr.write( f"{filename}:{lineno}: warning: Empty msgid. It is reserved by GNU gettext: gettext(\"\") " f"returns the header entry with meta information, not the empty string.\n", ) return translatable = tuple(translatable) if len(translatable) == 1: translatable = translatable[0] return lineno, translatable, comments, context @lru_cache(maxsize=None) def _find_extractor(name: str): for ep_name, load in find_entrypoints(GROUP_NAME): if ep_name == name: return load() return None def extract( method: _ExtractionMethod, fileobj: _FileObj, keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS, comment_tags: Collection[str] = (), options: Mapping[str, Any] | None = None, strip_comment_tags: bool = False, ) -> Generator[_ExtractionResult, None, None]: """Extract messages from the given file-like object using the specified extraction method. This function returns tuples of the form ``(lineno, message, comments, context)``. The implementation dispatches the actual extraction to plugins, based on the value of the ``method`` parameter. >>> source = b'''# foo module ... def run(argv): ... print(_('Hello, world!')) ... ''' >>> from io import BytesIO >>> for message in extract('python', BytesIO(source)): ... print(message) (3, u'Hello, world!', [], None) :param method: an extraction method (a callable), or a string specifying the extraction method (.e.g. "python"); if this is a simple name, the extraction function will be looked up by entry point; if it is an explicit reference to a function (of the form ``package.module:funcname`` or ``package.module.funcname``), the corresponding function will be imported and used :param fileobj: the file-like object the messages should be extracted from :param keywords: a dictionary mapping keywords (i.e. names of functions that should be recognized as translation functions) to tuples that specify which of their arguments contain localizable strings :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :param strip_comment_tags: a flag that if set to `True` causes all comment tags to be removed from the collected comments. :raise ValueError: if the extraction method is not registered :returns: iterable of tuples of the form ``(lineno, message, comments, context)`` :rtype: Iterable[tuple[int, str|tuple[str], list[str], str|None] """ if callable(method): func = method elif ':' in method or '.' in method: if ':' not in method: lastdot = method.rfind('.') module, attrname = method[:lastdot], method[lastdot + 1:] else: module, attrname = method.split(':', 1) func = getattr(__import__(module, {}, {}, [attrname]), attrname) else: func = _find_extractor(method) if func is None: # if no named entry point was found, # we resort to looking up a builtin extractor func = _BUILTIN_EXTRACTORS.get(method) if func is None: raise ValueError(f"Unknown extraction method {method!r}") results = func(fileobj, keywords.keys(), comment_tags, options=options or {}) for lineno, funcname, messages, comments in results: if not isinstance(messages, (list, tuple)): messages = [messages] if not messages: continue specs = keywords[funcname] or None if funcname else None # {None: x} may be collapsed into x for backwards compatibility. if not isinstance(specs, dict): specs = {None: specs} if strip_comment_tags: _strip_comment_tags(comments, comment_tags) # None matches all arities. for arity in (None, len(messages)): try: spec = specs[arity] except KeyError: continue if spec is None: spec = (1,) result = _match_messages_against_spec(lineno, messages, comments, fileobj, spec) if result is not None: yield result def extract_nothing( fileobj: _FileObj, keywords: Mapping[str, _Keyword], comment_tags: Collection[str], options: Mapping[str, Any], ) -> list[_ExtractionResult]: """Pseudo extractor that does not actually extract anything, but simply returns an empty list. """ return [] def extract_python( fileobj: IO[bytes], keywords: Mapping[str, _Keyword], comment_tags: Collection[str], options: _PyOptions, ) -> Generator[_ExtractionResult, None, None]: """Extract messages from Python source code. It returns an iterator yielding tuples in the following form ``(lineno, funcname, message, comments)``. :param fileobj: the seekable, file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :rtype: ``iterator`` """ funcname = lineno = message_lineno = None call_stack = -1 buf = [] messages = [] translator_comments = [] in_def = in_translator_comments = False comment_tag = None encoding = parse_encoding(fileobj) or options.get('encoding', 'UTF-8') future_flags = parse_future_flags(fileobj, encoding) next_line = lambda: fileobj.readline().decode(encoding) tokens = generate_tokens(next_line) # Current prefix of a Python 3.12 (PEP 701) f-string, or None if we're not # currently parsing one. current_fstring_start = None for tok, value, (lineno, _), _, _ in tokens: if call_stack == -1 and tok == NAME and value in ('def', 'class'): in_def = True elif tok == OP and value == '(': if in_def: # Avoid false positives for declarations such as: # def gettext(arg='message'): in_def = False continue if funcname: call_stack += 1 elif in_def and tok == OP and value == ':': # End of a class definition without parens in_def = False continue elif call_stack == -1 and tok == COMMENT: # Strip the comment token from the line value = value[1:].strip() if in_translator_comments and \ translator_comments[-1][0] == lineno - 1: # We're already inside a translator comment, continue appending translator_comments.append((lineno, value)) continue # If execution reaches this point, let's see if comment line # starts with one of the comment tags for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True translator_comments.append((lineno, value)) break elif funcname and call_stack == 0: nested = (tok == NAME and value in keywords) if (tok == OP and value == ')') or nested: if buf: messages.append(''.join(buf)) del buf[:] else: messages.append(None) messages = tuple(messages) if len(messages) > 1 else messages[0] # Comments don't apply unless they immediately # precede the message if translator_comments and \ translator_comments[-1][0] < message_lineno - 1: translator_comments = [] yield (message_lineno, funcname, messages, [comment[1] for comment in translator_comments]) funcname = lineno = message_lineno = None call_stack = -1 messages = [] translator_comments = [] in_translator_comments = False if nested: funcname = value elif tok == STRING: val = _parse_python_string(value, encoding, future_flags) if val is not None: if not message_lineno: message_lineno = lineno buf.append(val) # Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens elif tok == FSTRING_START: current_fstring_start = value if not message_lineno: message_lineno = lineno elif tok == FSTRING_MIDDLE: if current_fstring_start is not None: current_fstring_start += value elif tok == FSTRING_END: if current_fstring_start is not None: fstring = current_fstring_start + value val = _parse_python_string(fstring, encoding, future_flags) if val is not None: buf.append(val) elif tok == OP and value == ',': if buf: messages.append(''.join(buf)) del buf[:] else: messages.append(None) if translator_comments: # We have translator comments, and since we're on a # comma(,) user is allowed to break into a new line # Let's increase the last comment's lineno in order # for the comment to still be a valid one old_lineno, old_comment = translator_comments.pop() translator_comments.append((old_lineno + 1, old_comment)) elif tok != NL and not message_lineno: message_lineno = lineno elif call_stack > 0 and tok == OP and value == ')': call_stack -= 1 elif funcname and call_stack == -1: funcname = None elif tok == NAME and value in keywords: funcname = value if current_fstring_start is not None and tok not in {FSTRING_START, FSTRING_MIDDLE}: # In Python 3.12, tokens other than FSTRING_* mean the # f-string is dynamic, so we don't wan't to extract it. # And if it's FSTRING_END, we've already handled it above. # Let's forget that we're in an f-string. current_fstring_start = None def _parse_python_string(value: str, encoding: str, future_flags: int) -> str | None: # Unwrap quotes in a safe manner, maintaining the string's encoding # https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470 code = compile( f'# coding={str(encoding)}\n{value}', '', 'eval', ast.PyCF_ONLY_AST | future_flags, ) if isinstance(code, ast.Expression): body = code.body if isinstance(body, ast.Constant): return body.value if isinstance(body, ast.JoinedStr): # f-string if all(isinstance(node, ast.Constant) for node in body.values): return ''.join(node.value for node in body.values) # TODO: we could raise an error or warning when not all nodes are constants return None def extract_javascript( fileobj: _FileObj, keywords: Mapping[str, _Keyword], comment_tags: Collection[str], options: _JSOptions, lineno: int = 1, ) -> Generator[_ExtractionResult, None, None]: """Extract messages from JavaScript source code. :param fileobj: the seekable, file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) Supported options are: * `jsx` -- set to false to disable JSX/E4X support. * `template_string` -- if `True`, supports gettext(`key`) * `parse_template_string` -- if `True` will parse the contents of javascript template strings. :param lineno: line number offset (for parsing embedded fragments) """ from babel.messages.jslexer import Token, tokenize, unquote_string funcname = message_lineno = None messages = [] last_argument = None translator_comments = [] concatenate_next = False encoding = options.get('encoding', 'utf-8') last_token = None call_stack = -1 dotted = any('.' in kw for kw in keywords) for token in tokenize( fileobj.read().decode(encoding), jsx=options.get("jsx", True), template_string=options.get("template_string", True), dotted=dotted, lineno=lineno, ): if ( # Turn keyword`foo` expressions into keyword("foo") calls: funcname and # have a keyword... (last_token and last_token.type == 'name') and # we've seen nothing after the keyword... token.type == 'template_string' # this is a template string ): message_lineno = token.lineno messages = [unquote_string(token.value)] call_stack = 0 token = Token('operator', ')', token.lineno) if options.get('parse_template_string') and not funcname and token.type == 'template_string': yield from parse_template_string(token.value, keywords, comment_tags, options, token.lineno) elif token.type == 'operator' and token.value == '(': if funcname: message_lineno = token.lineno call_stack += 1 elif call_stack == -1 and token.type == 'linecomment': value = token.value[2:].strip() if translator_comments and \ translator_comments[-1][0] == token.lineno - 1: translator_comments.append((token.lineno, value)) continue for comment_tag in comment_tags: if value.startswith(comment_tag): translator_comments.append((token.lineno, value.strip())) break elif token.type == 'multilinecomment': # only one multi-line comment may precede a translation translator_comments = [] value = token.value[2:-2].strip() for comment_tag in comment_tags: if value.startswith(comment_tag): lines = value.splitlines() if lines: lines[0] = lines[0].strip() lines[1:] = dedent('\n'.join(lines[1:])).splitlines() for offset, line in enumerate(lines): translator_comments.append((token.lineno + offset, line)) break elif funcname and call_stack == 0: if token.type == 'operator' and token.value == ')': if last_argument is not None: messages.append(last_argument) if len(messages) > 1: messages = tuple(messages) elif messages: messages = messages[0] else: messages = None # Comments don't apply unless they immediately precede the # message if translator_comments and \ translator_comments[-1][0] < message_lineno - 1: translator_comments = [] if messages is not None: yield (message_lineno, funcname, messages, [comment[1] for comment in translator_comments]) funcname = message_lineno = last_argument = None concatenate_next = False translator_comments = [] messages = [] call_stack = -1 elif token.type in ('string', 'template_string'): new_value = unquote_string(token.value) if concatenate_next: last_argument = (last_argument or '') + new_value concatenate_next = False else: last_argument = new_value elif token.type == 'operator': if token.value == ',': if last_argument is not None: messages.append(last_argument) last_argument = None else: messages.append(None) concatenate_next = False elif token.value == '+': concatenate_next = True elif call_stack > 0 and token.type == 'operator' \ and token.value == ')': call_stack -= 1 elif funcname and call_stack == -1: funcname = None elif call_stack == -1 and token.type == 'name' and \ token.value in keywords and \ (last_token is None or last_token.type != 'name' or last_token.value != 'function'): funcname = token.value last_token = token def parse_template_string( template_string: str, keywords: Mapping[str, _Keyword], comment_tags: Collection[str], options: _JSOptions, lineno: int = 1, ) -> Generator[_ExtractionResult, None, None]: """Parse JavaScript template string. :param template_string: the template string to be parsed :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :param lineno: starting line number (optional) """ from babel.messages.jslexer import line_re prev_character = None level = 0 inside_str = False expression_contents = '' for character in template_string[1:-1]: if not inside_str and character in ('"', "'", '`'): inside_str = character elif inside_str == character and prev_character != r'\\': inside_str = False if level: expression_contents += character if not inside_str: if character == '{' and prev_character == '$': level += 1 elif level and character == '}': level -= 1 if level == 0 and expression_contents: expression_contents = expression_contents[0:-1] fake_file_obj = io.BytesIO(expression_contents.encode()) yield from extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno) lineno += len(line_re.findall(expression_contents)) expression_contents = '' prev_character = character _BUILTIN_EXTRACTORS = { 'ignore': extract_nothing, 'python': extract_python, 'javascript': extract_javascript, } babel-2.17.0/babel/messages/frontend.py000066400000000000000000001305401474743505200177310ustar00rootroot00000000000000""" babel.messages.frontend ~~~~~~~~~~~~~~~~~~~~~~~ Frontends for the message extraction functionality. :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import datetime import fnmatch import logging import optparse import os import re import shutil import sys import tempfile import warnings from configparser import RawConfigParser from io import StringIO from typing import BinaryIO, Iterable, Literal from babel import Locale, localedata from babel import __version__ as VERSION from babel.core import UnknownLocaleError from babel.messages.catalog import DEFAULT_HEADER, Catalog from babel.messages.extract import ( DEFAULT_KEYWORDS, DEFAULT_MAPPING, check_and_call_extract_file, extract_from_dir, ) from babel.messages.mofile import write_mo from babel.messages.pofile import read_po, write_po from babel.util import LOCALTZ log = logging.getLogger('babel') class BaseError(Exception): pass class OptionError(BaseError): pass class SetupError(BaseError): pass class ConfigurationError(BaseError): """ Raised for errors in configuration files. """ def listify_value(arg, split=None): """ Make a list out of an argument. Values from `distutils` argument parsing are always single strings; values from `optparse` parsing may be lists of strings that may need to be further split. No matter the input, this function returns a flat list of whitespace-trimmed strings, with `None` values filtered out. >>> listify_value("foo bar") ['foo', 'bar'] >>> listify_value(["foo bar"]) ['foo', 'bar'] >>> listify_value([["foo"], "bar"]) ['foo', 'bar'] >>> listify_value([["foo"], ["bar", None, "foo"]]) ['foo', 'bar', 'foo'] >>> listify_value("foo, bar, quux", ",") ['foo', 'bar', 'quux'] :param arg: A string or a list of strings :param split: The argument to pass to `str.split()`. :return: """ out = [] if not isinstance(arg, (list, tuple)): arg = [arg] for val in arg: if val is None: continue if isinstance(val, (list, tuple)): out.extend(listify_value(val, split=split)) continue out.extend(s.strip() for s in str(val).split(split)) assert all(isinstance(val, str) for val in out) return out class CommandMixin: # This class is a small shim between Distutils commands and # optparse option parsing in the frontend command line. #: Option name to be input as `args` on the script command line. as_args = None #: Options which allow multiple values. #: This is used by the `optparse` transmogrification code. multiple_value_options = () #: Options which are booleans. #: This is used by the `optparse` transmogrification code. # (This is actually used by distutils code too, but is never # declared in the base class.) boolean_options = () #: Option aliases, to retain standalone command compatibility. #: Distutils does not support option aliases, but optparse does. #: This maps the distutils argument name to an iterable of aliases #: that are usable with optparse. option_aliases = {} #: Choices for options that needed to be restricted to specific #: list of choices. option_choices = {} #: Log object. To allow replacement in the script command line runner. log = log def __init__(self, dist=None): # A less strict version of distutils' `__init__`. self.distribution = dist self.initialize_options() self._dry_run = None self.verbose = False self.force = None self.help = 0 self.finalized = 0 def initialize_options(self): pass def ensure_finalized(self): if not self.finalized: self.finalize_options() self.finalized = 1 def finalize_options(self): raise RuntimeError( f"abstract method -- subclass {self.__class__} must override", ) class CompileCatalog(CommandMixin): description = 'compile message catalogs to binary MO files' user_options = [ ('domain=', 'D', "domains of PO files (space separated list, default 'messages')"), ('directory=', 'd', 'path to base directory containing the catalogs'), ('input-file=', 'i', 'name of the input file'), ('output-file=', 'o', "name of the output file (default " "'//LC_MESSAGES/.mo')"), ('locale=', 'l', 'locale of the catalog to compile'), ('use-fuzzy', 'f', 'also include fuzzy translations'), ('statistics', None, 'print statistics about translations'), ] boolean_options = ['use-fuzzy', 'statistics'] def initialize_options(self): self.domain = 'messages' self.directory = None self.input_file = None self.output_file = None self.locale = None self.use_fuzzy = False self.statistics = False def finalize_options(self): self.domain = listify_value(self.domain) if not self.input_file and not self.directory: raise OptionError('you must specify either the input file or the base directory') if not self.output_file and not self.directory: raise OptionError('you must specify either the output file or the base directory') def run(self): n_errors = 0 for domain in self.domain: for errors in self._run_domain(domain).values(): n_errors += len(errors) if n_errors: self.log.error('%d errors encountered.', n_errors) return (1 if n_errors else 0) def _run_domain(self, domain): po_files = [] mo_files = [] if not self.input_file: if self.locale: po_files.append((self.locale, os.path.join(self.directory, self.locale, 'LC_MESSAGES', f"{domain}.po"))) mo_files.append(os.path.join(self.directory, self.locale, 'LC_MESSAGES', f"{domain}.mo")) else: for locale in os.listdir(self.directory): po_file = os.path.join(self.directory, locale, 'LC_MESSAGES', f"{domain}.po") if os.path.exists(po_file): po_files.append((locale, po_file)) mo_files.append(os.path.join(self.directory, locale, 'LC_MESSAGES', f"{domain}.mo")) else: po_files.append((self.locale, self.input_file)) if self.output_file: mo_files.append(self.output_file) else: mo_files.append(os.path.join(self.directory, self.locale, 'LC_MESSAGES', f"{domain}.mo")) if not po_files: raise OptionError('no message catalogs found') catalogs_and_errors = {} for idx, (locale, po_file) in enumerate(po_files): mo_file = mo_files[idx] with open(po_file, 'rb') as infile: catalog = read_po(infile, locale) if self.statistics: translated = 0 for message in list(catalog)[1:]: if message.string: translated += 1 percentage = 0 if len(catalog): percentage = translated * 100 // len(catalog) self.log.info( '%d of %d messages (%d%%) translated in %s', translated, len(catalog), percentage, po_file, ) if catalog.fuzzy and not self.use_fuzzy: self.log.info('catalog %s is marked as fuzzy, skipping', po_file) continue catalogs_and_errors[catalog] = catalog_errors = list(catalog.check()) for message, errors in catalog_errors: for error in errors: self.log.error( 'error: %s:%d: %s', po_file, message.lineno, error, ) self.log.info('compiling catalog %s to %s', po_file, mo_file) with open(mo_file, 'wb') as outfile: write_mo(outfile, catalog, use_fuzzy=self.use_fuzzy) return catalogs_and_errors def _make_directory_filter(ignore_patterns): """ Build a directory_filter function based on a list of ignore patterns. """ def cli_directory_filter(dirname): basename = os.path.basename(dirname) return not any( fnmatch.fnmatch(basename, ignore_pattern) for ignore_pattern in ignore_patterns ) return cli_directory_filter class ExtractMessages(CommandMixin): description = 'extract localizable strings from the project code' user_options = [ ('charset=', None, 'charset to use in the output file (default "utf-8")'), ('keywords=', 'k', 'space-separated list of keywords to look for in addition to the ' 'defaults (may be repeated multiple times)'), ('no-default-keywords', None, 'do not include the default keywords'), ('mapping-file=', 'F', 'path to the mapping configuration file'), ('no-location', None, 'do not include location comments with filename and line number'), ('add-location=', None, 'location lines format. If it is not given or "full", it generates ' 'the lines with both file name and line number. If it is "file", ' 'the line number part is omitted. If it is "never", it completely ' 'suppresses the lines (same as --no-location).'), ('omit-header', None, 'do not include msgid "" entry in header'), ('output-file=', 'o', 'name of the output file'), ('width=', 'w', 'set output line width (default 76)'), ('no-wrap', None, 'do not break long message lines, longer than the output line width, ' 'into several lines'), ('sort-output', None, 'generate sorted output (default False)'), ('sort-by-file', None, 'sort output by file location (default False)'), ('msgid-bugs-address=', None, 'set report address for msgid'), ('copyright-holder=', None, 'set copyright holder in output'), ('project=', None, 'set project name in output'), ('version=', None, 'set project version in output'), ('add-comments=', 'c', 'place comment block with TAG (or those preceding keyword lines) in ' 'output file. Separate multiple TAGs with commas(,)'), # TODO: Support repetition of this argument ('strip-comments', 's', 'strip the comment TAGs from the comments.'), ('input-paths=', None, 'files or directories that should be scanned for messages. Separate multiple ' 'files or directories with commas(,)'), # TODO: Support repetition of this argument ('input-dirs=', None, # TODO (3.x): Remove me. 'alias for input-paths (does allow files as well as directories).'), ('ignore-dirs=', None, 'Patterns for directories to ignore when scanning for messages. ' 'Separate multiple patterns with spaces (default ".* ._")'), ('header-comment=', None, 'header comment for the catalog'), ('last-translator=', None, 'set the name and email of the last translator in output'), ] boolean_options = [ 'no-default-keywords', 'no-location', 'omit-header', 'no-wrap', 'sort-output', 'sort-by-file', 'strip-comments', ] as_args = 'input-paths' multiple_value_options = ( 'add-comments', 'keywords', 'ignore-dirs', ) option_aliases = { 'keywords': ('--keyword',), 'mapping-file': ('--mapping',), 'output-file': ('--output',), 'strip-comments': ('--strip-comment-tags',), 'last-translator': ('--last-translator',), } option_choices = { 'add-location': ('full', 'file', 'never'), } def initialize_options(self): self.charset = 'utf-8' self.keywords = None self.no_default_keywords = False self.mapping_file = None self.no_location = False self.add_location = None self.omit_header = False self.output_file = None self.input_dirs = None self.input_paths = None self.width = None self.no_wrap = False self.sort_output = False self.sort_by_file = False self.msgid_bugs_address = None self.copyright_holder = None self.project = None self.version = None self.add_comments = None self.strip_comments = False self.include_lineno = True self.ignore_dirs = None self.header_comment = None self.last_translator = None def finalize_options(self): if self.input_dirs: if not self.input_paths: self.input_paths = self.input_dirs else: raise OptionError( 'input-dirs and input-paths are mutually exclusive', ) keywords = {} if self.no_default_keywords else DEFAULT_KEYWORDS.copy() keywords.update(parse_keywords(listify_value(self.keywords))) self.keywords = keywords if not self.keywords: raise OptionError( 'you must specify new keywords if you disable the default ones', ) if not self.output_file: raise OptionError('no output file specified') if self.no_wrap and self.width: raise OptionError( "'--no-wrap' and '--width' are mutually exclusive", ) if not self.no_wrap and not self.width: self.width = 76 elif self.width is not None: self.width = int(self.width) if self.sort_output and self.sort_by_file: raise OptionError( "'--sort-output' and '--sort-by-file' are mutually exclusive", ) if self.input_paths: if isinstance(self.input_paths, str): self.input_paths = re.split(r',\s*', self.input_paths) elif self.distribution is not None: self.input_paths = dict.fromkeys([ k.split('.', 1)[0] for k in (self.distribution.packages or ()) ]).keys() else: self.input_paths = [] if not self.input_paths: raise OptionError("no input files or directories specified") for path in self.input_paths: if not os.path.exists(path): raise OptionError(f"Input path: {path} does not exist") self.add_comments = listify_value(self.add_comments or (), ",") if self.distribution: if not self.project: self.project = self.distribution.get_name() if not self.version: self.version = self.distribution.get_version() if self.add_location == 'never': self.no_location = True elif self.add_location == 'file': self.include_lineno = False ignore_dirs = listify_value(self.ignore_dirs) if ignore_dirs: self.directory_filter = _make_directory_filter(ignore_dirs) else: self.directory_filter = None def _build_callback(self, path: str): def callback(filename: str, method: str, options: dict): if method == 'ignore': return # If we explicitly provide a full filepath, just use that. # Otherwise, path will be the directory path and filename # is the relative path from that dir to the file. # So we can join those to get the full filepath. if os.path.isfile(path): filepath = path else: filepath = os.path.normpath(os.path.join(path, filename)) optstr = '' if options: opt_values = ", ".join(f'{k}="{v}"' for k, v in options.items()) optstr = f" ({opt_values})" self.log.info('extracting messages from %s%s', filepath, optstr) return callback def run(self): mappings = self._get_mappings() with open(self.output_file, 'wb') as outfile: catalog = Catalog(project=self.project, version=self.version, msgid_bugs_address=self.msgid_bugs_address, copyright_holder=self.copyright_holder, charset=self.charset, header_comment=(self.header_comment or DEFAULT_HEADER), last_translator=self.last_translator) for path, method_map, options_map in mappings: callback = self._build_callback(path) if os.path.isfile(path): current_dir = os.getcwd() extracted = check_and_call_extract_file( path, method_map, options_map, callback, self.keywords, self.add_comments, self.strip_comments, current_dir, ) else: extracted = extract_from_dir( path, method_map, options_map, keywords=self.keywords, comment_tags=self.add_comments, callback=callback, strip_comment_tags=self.strip_comments, directory_filter=self.directory_filter, ) for filename, lineno, message, comments, context in extracted: if os.path.isfile(path): filepath = filename # already normalized else: filepath = os.path.normpath(os.path.join(path, filename)) catalog.add(message, None, [(filepath, lineno)], auto_comments=comments, context=context) self.log.info('writing PO template file to %s', self.output_file) write_po(outfile, catalog, width=self.width, no_location=self.no_location, omit_header=self.omit_header, sort_output=self.sort_output, sort_by_file=self.sort_by_file, include_lineno=self.include_lineno) def _get_mappings(self): mappings = [] if self.mapping_file: if self.mapping_file.endswith(".toml"): with open(self.mapping_file, "rb") as fileobj: file_style = ( "pyproject.toml" if os.path.basename(self.mapping_file) == "pyproject.toml" else "standalone" ) method_map, options_map = _parse_mapping_toml( fileobj, filename=self.mapping_file, style=file_style, ) else: with open(self.mapping_file) as fileobj: method_map, options_map = parse_mapping_cfg(fileobj, filename=self.mapping_file) for path in self.input_paths: mappings.append((path, method_map, options_map)) elif getattr(self.distribution, 'message_extractors', None): message_extractors = self.distribution.message_extractors for path, mapping in message_extractors.items(): if isinstance(mapping, str): method_map, options_map = parse_mapping_cfg(StringIO(mapping)) else: method_map, options_map = [], {} for pattern, method, options in mapping: method_map.append((pattern, method)) options_map[pattern] = options or {} mappings.append((path, method_map, options_map)) else: for path in self.input_paths: mappings.append((path, DEFAULT_MAPPING, {})) return mappings class InitCatalog(CommandMixin): description = 'create a new catalog based on a POT file' user_options = [ ('domain=', 'D', "domain of PO file (default 'messages')"), ('input-file=', 'i', 'name of the input file'), ('output-dir=', 'd', 'path to output directory'), ('output-file=', 'o', "name of the output file (default " "'//LC_MESSAGES/.po')"), ('locale=', 'l', 'locale for the new localized catalog'), ('width=', 'w', 'set output line width (default 76)'), ('no-wrap', None, 'do not break long message lines, longer than the output line width, ' 'into several lines'), ] boolean_options = ['no-wrap'] def initialize_options(self): self.output_dir = None self.output_file = None self.input_file = None self.locale = None self.domain = 'messages' self.no_wrap = False self.width = None def finalize_options(self): if not self.input_file: raise OptionError('you must specify the input file') if not self.locale: raise OptionError('you must provide a locale for the new catalog') try: self._locale = Locale.parse(self.locale) except UnknownLocaleError as e: raise OptionError(e) from e if not self.output_file and not self.output_dir: raise OptionError('you must specify the output directory') if not self.output_file: self.output_file = os.path.join(self.output_dir, self.locale, 'LC_MESSAGES', f"{self.domain}.po") if not os.path.exists(os.path.dirname(self.output_file)): os.makedirs(os.path.dirname(self.output_file)) if self.no_wrap and self.width: raise OptionError("'--no-wrap' and '--width' are mutually exclusive") if not self.no_wrap and not self.width: self.width = 76 elif self.width is not None: self.width = int(self.width) def run(self): self.log.info( 'creating catalog %s based on %s', self.output_file, self.input_file, ) with open(self.input_file, 'rb') as infile: # Although reading from the catalog template, read_po must be fed # the locale in order to correctly calculate plurals catalog = read_po(infile, locale=self.locale) catalog.locale = self._locale catalog.revision_date = datetime.datetime.now(LOCALTZ) catalog.fuzzy = False with open(self.output_file, 'wb') as outfile: write_po(outfile, catalog, width=self.width) class UpdateCatalog(CommandMixin): description = 'update message catalogs from a POT file' user_options = [ ('domain=', 'D', "domain of PO file (default 'messages')"), ('input-file=', 'i', 'name of the input file'), ('output-dir=', 'd', 'path to base directory containing the catalogs'), ('output-file=', 'o', "name of the output file (default " "'//LC_MESSAGES/.po')"), ('omit-header', None, "do not include msgid "" entry in header"), ('locale=', 'l', 'locale of the catalog to compile'), ('width=', 'w', 'set output line width (default 76)'), ('no-wrap', None, 'do not break long message lines, longer than the output line width, ' 'into several lines'), ('ignore-obsolete=', None, 'whether to omit obsolete messages from the output'), ('init-missing=', None, 'if any output files are missing, initialize them first'), ('no-fuzzy-matching', 'N', 'do not use fuzzy matching'), ('update-header-comment', None, 'update target header comment'), ('previous', None, 'keep previous msgids of translated messages'), ('check=', None, 'don\'t update the catalog, just return the status. Return code 0 ' 'means nothing would change. Return code 1 means that the catalog ' 'would be updated'), ('ignore-pot-creation-date=', None, 'ignore changes to POT-Creation-Date when updating or checking'), ] boolean_options = [ 'omit-header', 'no-wrap', 'ignore-obsolete', 'init-missing', 'no-fuzzy-matching', 'previous', 'update-header-comment', 'check', 'ignore-pot-creation-date', ] def initialize_options(self): self.domain = 'messages' self.input_file = None self.output_dir = None self.output_file = None self.omit_header = False self.locale = None self.width = None self.no_wrap = False self.ignore_obsolete = False self.init_missing = False self.no_fuzzy_matching = False self.update_header_comment = False self.previous = False self.check = False self.ignore_pot_creation_date = False def finalize_options(self): if not self.input_file: raise OptionError('you must specify the input file') if not self.output_file and not self.output_dir: raise OptionError('you must specify the output file or directory') if self.output_file and not self.locale: raise OptionError('you must specify the locale') if self.init_missing: if not self.locale: raise OptionError( 'you must specify the locale for ' 'the init-missing option to work', ) try: self._locale = Locale.parse(self.locale) except UnknownLocaleError as e: raise OptionError(e) from e else: self._locale = None if self.no_wrap and self.width: raise OptionError("'--no-wrap' and '--width' are mutually exclusive") if not self.no_wrap and not self.width: self.width = 76 elif self.width is not None: self.width = int(self.width) if self.no_fuzzy_matching and self.previous: self.previous = False def run(self): check_status = {} po_files = [] if not self.output_file: if self.locale: po_files.append((self.locale, os.path.join(self.output_dir, self.locale, 'LC_MESSAGES', f"{self.domain}.po"))) else: for locale in os.listdir(self.output_dir): po_file = os.path.join(self.output_dir, locale, 'LC_MESSAGES', f"{self.domain}.po") if os.path.exists(po_file): po_files.append((locale, po_file)) else: po_files.append((self.locale, self.output_file)) if not po_files: raise OptionError('no message catalogs found') domain = self.domain if not domain: domain = os.path.splitext(os.path.basename(self.input_file))[0] with open(self.input_file, 'rb') as infile: template = read_po(infile) for locale, filename in po_files: if self.init_missing and not os.path.exists(filename): if self.check: check_status[filename] = False continue self.log.info( 'creating catalog %s based on %s', filename, self.input_file, ) with open(self.input_file, 'rb') as infile: # Although reading from the catalog template, read_po must # be fed the locale in order to correctly calculate plurals catalog = read_po(infile, locale=self.locale) catalog.locale = self._locale catalog.revision_date = datetime.datetime.now(LOCALTZ) catalog.fuzzy = False with open(filename, 'wb') as outfile: write_po(outfile, catalog) self.log.info('updating catalog %s based on %s', filename, self.input_file) with open(filename, 'rb') as infile: catalog = read_po(infile, locale=locale, domain=domain) catalog.update( template, self.no_fuzzy_matching, update_header_comment=self.update_header_comment, update_creation_date=not self.ignore_pot_creation_date, ) tmpname = os.path.join(os.path.dirname(filename), tempfile.gettempprefix() + os.path.basename(filename)) try: with open(tmpname, 'wb') as tmpfile: write_po(tmpfile, catalog, omit_header=self.omit_header, ignore_obsolete=self.ignore_obsolete, include_previous=self.previous, width=self.width) except Exception: os.remove(tmpname) raise if self.check: with open(filename, "rb") as origfile: original_catalog = read_po(origfile) with open(tmpname, "rb") as newfile: updated_catalog = read_po(newfile) updated_catalog.revision_date = original_catalog.revision_date check_status[filename] = updated_catalog.is_identical(original_catalog) os.remove(tmpname) continue try: os.rename(tmpname, filename) except OSError: # We're probably on Windows, which doesn't support atomic # renames, at least not through Python # If the error is in fact due to a permissions problem, that # same error is going to be raised from one of the following # operations os.remove(filename) shutil.copy(tmpname, filename) os.remove(tmpname) if self.check: for filename, up_to_date in check_status.items(): if up_to_date: self.log.info('Catalog %s is up to date.', filename) else: self.log.warning('Catalog %s is out of date.', filename) if not all(check_status.values()): raise BaseError("Some catalogs are out of date.") else: self.log.info("All the catalogs are up-to-date.") return class CommandLineInterface: """Command-line interface. This class provides a simple command-line interface to the message extraction and PO file generation functionality. """ usage = '%%prog %s [options] %s' version = f'%prog {VERSION}' commands = { 'compile': 'compile message catalogs to MO files', 'extract': 'extract messages from source files and generate a POT file', 'init': 'create new message catalogs from a POT file', 'update': 'update existing message catalogs from a POT file', } command_classes = { 'compile': CompileCatalog, 'extract': ExtractMessages, 'init': InitCatalog, 'update': UpdateCatalog, } log = None # Replaced on instance level def run(self, argv=None): """Main entry point of the command-line interface. :param argv: list of arguments passed on the command-line """ if argv is None: argv = sys.argv self.parser = optparse.OptionParser(usage=self.usage % ('command', '[args]'), version=self.version) self.parser.disable_interspersed_args() self.parser.print_help = self._help self.parser.add_option('--list-locales', dest='list_locales', action='store_true', help="print all known locales and exit") self.parser.add_option('-v', '--verbose', action='store_const', dest='loglevel', const=logging.DEBUG, help='print as much as possible') self.parser.add_option('-q', '--quiet', action='store_const', dest='loglevel', const=logging.ERROR, help='print as little as possible') self.parser.set_defaults(list_locales=False, loglevel=logging.INFO) options, args = self.parser.parse_args(argv[1:]) self._configure_logging(options.loglevel) if options.list_locales: identifiers = localedata.locale_identifiers() id_width = max(len(identifier) for identifier in identifiers) + 1 for identifier in sorted(identifiers): locale = Locale.parse(identifier) print(f"{identifier:<{id_width}} {locale.english_name}") return 0 if not args: self.parser.error('no valid command or option passed. ' 'Try the -h/--help option for more information.') cmdname = args[0] if cmdname not in self.commands: self.parser.error(f'unknown command "{cmdname}"') cmdinst = self._configure_command(cmdname, args[1:]) return cmdinst.run() def _configure_logging(self, loglevel): self.log = log self.log.setLevel(loglevel) # Don't add a new handler for every instance initialization (#227), this # would cause duplicated output when the CommandLineInterface as an # normal Python class. if self.log.handlers: handler = self.log.handlers[0] else: handler = logging.StreamHandler() self.log.addHandler(handler) handler.setLevel(loglevel) formatter = logging.Formatter('%(message)s') handler.setFormatter(formatter) def _help(self): print(self.parser.format_help()) print("commands:") cmd_width = max(8, max(len(command) for command in self.commands) + 1) for name, description in sorted(self.commands.items()): print(f" {name:<{cmd_width}} {description}") def _configure_command(self, cmdname, argv): """ :type cmdname: str :type argv: list[str] """ cmdclass = self.command_classes[cmdname] cmdinst = cmdclass() if self.log: cmdinst.log = self.log # Use our logger, not distutils'. assert isinstance(cmdinst, CommandMixin) cmdinst.initialize_options() parser = optparse.OptionParser( usage=self.usage % (cmdname, ''), description=self.commands[cmdname], ) as_args: str | None = getattr(cmdclass, "as_args", None) for long, short, help in cmdclass.user_options: name = long.strip("=") default = getattr(cmdinst, name.replace("-", "_")) strs = [f"--{name}"] if short: strs.append(f"-{short}") strs.extend(cmdclass.option_aliases.get(name, ())) choices = cmdclass.option_choices.get(name, None) if name == as_args: parser.usage += f"<{name}>" elif name in cmdclass.boolean_options: parser.add_option(*strs, action="store_true", help=help) elif name in cmdclass.multiple_value_options: parser.add_option(*strs, action="append", help=help, choices=choices) else: parser.add_option(*strs, help=help, default=default, choices=choices) options, args = parser.parse_args(argv) if as_args: setattr(options, as_args.replace('-', '_'), args) for key, value in vars(options).items(): setattr(cmdinst, key, value) try: cmdinst.ensure_finalized() except OptionError as err: parser.error(str(err)) return cmdinst def main(): return CommandLineInterface().run(sys.argv) def parse_mapping(fileobj, filename=None): warnings.warn( "parse_mapping is deprecated, use parse_mapping_cfg instead", DeprecationWarning, stacklevel=2, ) return parse_mapping_cfg(fileobj, filename) def parse_mapping_cfg(fileobj, filename=None): """Parse an extraction method mapping from a file-like object. :param fileobj: a readable file-like object containing the configuration text to parse :param filename: the name of the file being parsed, for error messages """ extractors = {} method_map = [] options_map = {} parser = RawConfigParser() parser.read_file(fileobj, filename) for section in parser.sections(): if section == 'extractors': extractors = dict(parser.items(section)) else: method, pattern = (part.strip() for part in section.split(':', 1)) method_map.append((pattern, method)) options_map[pattern] = dict(parser.items(section)) if extractors: for idx, (pattern, method) in enumerate(method_map): if method in extractors: method = extractors[method] method_map[idx] = (pattern, method) return method_map, options_map def _parse_config_object(config: dict, *, filename="(unknown)"): extractors = {} method_map = [] options_map = {} extractors_read = config.get("extractors", {}) if not isinstance(extractors_read, dict): raise ConfigurationError(f"{filename}: extractors: Expected a dictionary, got {type(extractors_read)!r}") for method, callable_spec in extractors_read.items(): if not isinstance(method, str): # Impossible via TOML, but could happen with a custom object. raise ConfigurationError(f"{filename}: extractors: Extraction method must be a string, got {method!r}") if not isinstance(callable_spec, str): raise ConfigurationError(f"{filename}: extractors: Callable specification must be a string, got {callable_spec!r}") extractors[method] = callable_spec if "mapping" in config: raise ConfigurationError(f"{filename}: 'mapping' is not a valid key, did you mean 'mappings'?") mappings_read = config.get("mappings", []) if not isinstance(mappings_read, list): raise ConfigurationError(f"{filename}: mappings: Expected a list, got {type(mappings_read)!r}") for idx, entry in enumerate(mappings_read): if not isinstance(entry, dict): raise ConfigurationError(f"{filename}: mappings[{idx}]: Expected a dictionary, got {type(entry)!r}") entry = entry.copy() method = entry.pop("method", None) if not isinstance(method, str): raise ConfigurationError(f"{filename}: mappings[{idx}]: 'method' must be a string, got {method!r}") method = extractors.get(method, method) # Map the extractor name to the callable now pattern = entry.pop("pattern", None) if not isinstance(pattern, (list, str)): raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' must be a list or a string, got {pattern!r}") if not isinstance(pattern, list): pattern = [pattern] for pat in pattern: if not isinstance(pat, str): raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' elements must be strings, got {pat!r}") method_map.append((pat, method)) options_map[pat] = entry return method_map, options_map def _parse_mapping_toml( fileobj: BinaryIO, filename: str = "(unknown)", style: Literal["standalone", "pyproject.toml"] = "standalone", ): """Parse an extraction method mapping from a binary file-like object. .. warning: As of this version of Babel, this is a private API subject to changes. :param fileobj: a readable binary file-like object containing the configuration TOML to parse :param filename: the name of the file being parsed, for error messages :param style: whether the file is in the style of a `pyproject.toml` file, i.e. whether to look for `tool.babel`. """ try: import tomllib except ImportError: try: import tomli as tomllib except ImportError as ie: # pragma: no cover raise ImportError("tomli or tomllib is required to parse TOML files") from ie try: parsed_data = tomllib.load(fileobj) except tomllib.TOMLDecodeError as e: raise ConfigurationError(f"{filename}: Error parsing TOML file: {e}") from e if style == "pyproject.toml": try: babel_data = parsed_data["tool"]["babel"] except (TypeError, KeyError) as e: raise ConfigurationError(f"{filename}: No 'tool.babel' section found in file") from e elif style == "standalone": babel_data = parsed_data if "babel" in babel_data: raise ConfigurationError(f"{filename}: 'babel' should not be present in a stand-alone configuration file") else: # pragma: no cover raise ValueError(f"Unknown TOML style {style!r}") return _parse_config_object(babel_data, filename=filename) def _parse_spec(s: str) -> tuple[int | None, tuple[int | tuple[int, str], ...]]: inds = [] number = None for x in s.split(','): if x[-1] == 't': number = int(x[:-1]) elif x[-1] == 'c': inds.append((int(x[:-1]), 'c')) else: inds.append(int(x)) return number, tuple(inds) def parse_keywords(strings: Iterable[str] = ()): """Parse keywords specifications from the given list of strings. >>> import pprint >>> keywords = ['_', 'dgettext:2', 'dngettext:2,3', 'pgettext:1c,2', ... 'polymorphic:1', 'polymorphic:2,2t', 'polymorphic:3c,3t'] >>> pprint.pprint(parse_keywords(keywords)) {'_': None, 'dgettext': (2,), 'dngettext': (2, 3), 'pgettext': ((1, 'c'), 2), 'polymorphic': {None: (1,), 2: (2,), 3: ((3, 'c'),)}} The input keywords are in GNU Gettext style; see :doc:`cmdline` for details. The output is a dictionary mapping keyword names to a dictionary of specifications. Keys in this dictionary are numbers of arguments, where ``None`` means that all numbers of arguments are matched, and a number means only calls with that number of arguments are matched (which happens when using the "t" specifier). However, as a special case for backwards compatibility, if the dictionary of specifications would be ``{None: x}``, i.e., there is only one specification and it matches all argument counts, then it is collapsed into just ``x``. A specification is either a tuple or None. If a tuple, each element can be either a number ``n``, meaning that the nth argument should be extracted as a message, or the tuple ``(n, 'c')``, meaning that the nth argument should be extracted as context for the messages. A ``None`` specification is equivalent to ``(1,)``, extracting the first argument. """ keywords = {} for string in strings: if ':' in string: funcname, spec_str = string.split(':') number, spec = _parse_spec(spec_str) else: funcname = string number = None spec = None keywords.setdefault(funcname, {})[number] = spec # For best backwards compatibility, collapse {None: x} into x. for k, v in keywords.items(): if set(v) == {None}: keywords[k] = v[None] return keywords def __getattr__(name: str): # Re-exports for backwards compatibility; # `setuptools_frontend` is the canonical import location. if name in {'check_message_extractors', 'compile_catalog', 'extract_messages', 'init_catalog', 'update_catalog'}: from babel.messages import setuptools_frontend return getattr(setuptools_frontend, name) raise AttributeError(f"module {__name__!r} has no attribute {name!r}") if __name__ == '__main__': main() babel-2.17.0/babel/messages/jslexer.py000066400000000000000000000160471474743505200175730ustar00rootroot00000000000000""" babel.messages.jslexer ~~~~~~~~~~~~~~~~~~~~~~ A simple JavaScript 1.5 lexer which is used for the JavaScript extractor. :copyright: (c) 2013-2025 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import re from collections.abc import Generator from typing import NamedTuple operators: list[str] = sorted([ '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=', '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':', ], key=len, reverse=True) escapes: dict[str, str] = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE) dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE) division_re = re.compile(r'/=?') regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*', re.DOTALL) line_re = re.compile(r'(\r\n|\n|\r)') line_join_re = re.compile(r'\\' + line_re.pattern) uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}') hex_escape_re = re.compile(r'[a-fA-F0-9]{1,2}') class Token(NamedTuple): type: str value: str lineno: int _rules: list[tuple[str | None, re.Pattern[str]]] = [ (None, re.compile(r'\s+', re.UNICODE)), (None, re.compile(r'