pax_global_header00006660000000000000000000000064142302016020014500gustar00rootroot0000000000000052 comment=2948d927ba43be6098743dc6ae258393c91af358 pudo-prefixdate-9766ae8/000077500000000000000000000000001423020160200151535ustar00rootroot00000000000000pudo-prefixdate-9766ae8/.bumpversion.cfg000066400000000000000000000003161423020160200202630ustar00rootroot00000000000000[bumpversion] current_version = 0.4.1 commit = True tag = True [bumpversion:file:setup.py] search = version="{current_version}" replace = version="{new_version}" [bumpversion:file:prefixdate/__init__.py] pudo-prefixdate-9766ae8/.github/000077500000000000000000000000001423020160200165135ustar00rootroot00000000000000pudo-prefixdate-9766ae8/.github/FUNDING.yml000066400000000000000000000000741423020160200203310ustar00rootroot00000000000000# These are supported funding model platforms github: pudo pudo-prefixdate-9766ae8/.github/workflows/000077500000000000000000000000001423020160200205505ustar00rootroot00000000000000pudo-prefixdate-9766ae8/.github/workflows/ci.yml000066400000000000000000000016601423020160200216710ustar00rootroot00000000000000name: Build and publish on: [push] jobs: deploy: runs-on: ubuntu-latest environment: deploy steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: "3.9" - name: Install dependencies run: | python -m pip install --upgrade pip setuptools pip install -e ".[dev]" - name: Validate type annotations run: | mypy --strict prefixdate - name: Run tests run: | pytest --cov=prefixdate tests/ - name: Build a distribution run: | python setup.py sdist bdist_wheel - name: Publish to PyPI if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@master with: user: __token__ password: ${{ secrets.pypi_password }} packages_dir: dist/ pudo-prefixdate-9766ae8/.gitignore000066400000000000000000000001761423020160200171470ustar00rootroot00000000000000out.png dump.json __pycache__ *.pyc .idea/ .vscode/ *.egg-info dist/ build/ .mypy_cache/ .tox .pytest_cache .coverage htmlcov/pudo-prefixdate-9766ae8/LICENSE000066400000000000000000000021121423020160200161540ustar00rootroot00000000000000MIT License Copyright (c) 2017-2019 Journalism Development Network, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pudo-prefixdate-9766ae8/MANIFEST.in000066400000000000000000000000761423020160200167140ustar00rootroot00000000000000include README.md include LICENSE include prefixdate/py.typed pudo-prefixdate-9766ae8/Makefile000066400000000000000000000001441423020160200166120ustar00rootroot00000000000000 test: mypy --strict prefixdate pytest --cov-report html --cov-report term --cov=prefixdate tests/pudo-prefixdate-9766ae8/README.md000066400000000000000000000052061423020160200164350ustar00rootroot00000000000000# Prefix date parser This is a helper class to parse dates with varied degrees of precision. For example, a data source might state a date as `2001`, `2001-4` or `2001-04-02`, with the implication that only the year, month or day is known. This library will process such partial dates into a structured format and allow their validation and re-formatting (e.g. turning `2001-4` into `2001-04` above). The library does not support the complexities of the ISO 8601 and RFC 3339 standards including date ranges and calendar-week/day-of-year notations. ## Installation Install `prefixdate` using PyPI: ```bash $ pip install prefixdate ``` ## Usage The library provides a variety of helper functions to parse and format partial dates: ```python from prefixdate import parse, normalize_date, Precision # Parse returns a `DatePrefix` object: date = parse('2001-3') assert date.text == '2001-03' date = parse(2001) assert date.text == '2001' assert date.precision == Precision.YEAR date = parse(None) assert date.text is None assert date.precision == Precision.EMPTY # This will also be the outcome for invalid dates! # Normalize to a standard string: assert normalize_date('2001-1') == '2001-01' assert normalize_date('2001-00-00') == '2001' assert normalize_date('Boo!') is None # This also works for datetimes: from datetime import datetime now = datetime.utcnow().isoformat() minute = normalize_date(now, precision=Precision.MINUTE) # You can also feed in None, date and datetime: normalize_date(datetime.utcnow()) normalize_date(datetime.date()) normalize_date(None) ``` You can also use the `parse_parts` helper, which is similar to the constructor for a `datetime`: ```python from prefixdate import parse_parts, Precision date = parse_parts(2001, '3', None) assert date.precision == Precision.MONTH assert date.text == '2001-03' ``` ### Format strings For dates which are not already stored in an ISO 8601-like string format, you can supply one or many format strings for `datetime.strptime`. The format strings will be analysed to determine how precise the resulting dates are expected to be. ```python from prefixdate import parse_format, parse_formats, Precision date = parse_format('YEAR 2021', 'YEAR %Y') assert date.precision == Precision.YEAR assert date.text == '2021' # You can try out multiple formats in sequence. The first non-empty prefix # will be returned: date = parse_formats('2021', ['%Y-%m-%d', '%Y-%m', '%Y']) assert date.precision == Precision.YEAR assert date.text == '2021' ``` ## Caveats * Datetimes are always converted to UTC and made naive (tzinfo stripped) * Does not process milliseconds yet. * Does not process invalid dates, like Feb 31st. pudo-prefixdate-9766ae8/prefixdate/000077500000000000000000000000001423020160200173065ustar00rootroot00000000000000pudo-prefixdate-9766ae8/prefixdate/__init__.py000066400000000000000000000025451423020160200214250ustar00rootroot00000000000000from typing import Optional, Union from prefixdate.parse import DatePrefix, Raw from prefixdate.precision import Precision from prefixdate.formats import parse_format, parse_formats, format_precision Part = Union[None, str, int] def parse(raw: Raw, precision: Precision = Precision.FULL) -> DatePrefix: """Parse the given input date string and return a `DatePrefix` object that holds a datetime, text version and the precision of the date.""" return DatePrefix(raw, precision=precision) def normalize_date(raw: Raw, precision: Precision = Precision.FULL) -> Optional[str]: """Take the given input date string and parse it into the normalised format to the precision given as an argument.""" return parse(raw, precision=precision).text def parse_parts( year: Part = None, month: Part = None, day: Part = None, hour: Part = None, minute: Part = None, second: Part = None, precision: Precision = Precision.FULL, ) -> DatePrefix: """Try to build a date prefix from the date components as given until one of them is null.""" raw = f"{year}-{month}-{day}T{hour}:{minute}:{second}" return parse(raw, precision=precision) __all__ = [ "DatePrefix", "Precision", "parse", "parse_parts", "normalize_date", "parse_format", "parse_formats", "format_precision", ] __version__ = "0.4.1" pudo-prefixdate-9766ae8/prefixdate/formats.py000066400000000000000000000045621423020160200213420ustar00rootroot00000000000000import re import logging from typing import Iterable from functools import lru_cache from datetime import datetime, date, timezone from prefixdate.precision import Precision from prefixdate.parse import DatePrefix, Raw log = logging.getLogger(__name__) MONTH_FORMATS = re.compile(r"(%b|%B|%m|%c|%x)") DAY_FORMATS = re.compile(r"(%d|%w|%c|%x)") HOUR_FORMATS = re.compile(r"(%H|%I|%c|%X)") MINUTE_FORMATS = re.compile(r"(%M|%c|%X)") SECOND_FORMATS = re.compile(r"(%S|%c|%X)") @lru_cache(maxsize=1000) def format_precision(format: str) -> Precision: """Determine the precision of a `datetime.strptime` format string so that it can be used in constructing a `DatePrefix`. This will check if the format string mentions directives with increasing precision. A format string that defines no date but only time directives will be considered `Precision.EMPTY`. """ if MONTH_FORMATS.search(format) is None: return Precision.YEAR if DAY_FORMATS.search(format) is None: return Precision.MONTH if HOUR_FORMATS.search(format) is None: return Precision.DAY if MINUTE_FORMATS.search(format) is None: return Precision.HOUR if SECOND_FORMATS.search(format) is None: return Precision.MINUTE return Precision.SECOND def parse_format(raw: Raw, format: str) -> DatePrefix: """Parse the given raw input using the supplied format string. The precision of the result is inferred from the format string.""" if isinstance(raw, int): raw = str(raw) elif isinstance(raw, (datetime, date, DatePrefix)): return DatePrefix(raw) elif raw is None: return DatePrefix(None, precision=Precision.EMPTY) try: dt = datetime.strptime(raw, format) precision = format_precision(format) return DatePrefix(dt, precision=precision) except (ValueError, TypeError): log.warning("Date %r does not match format %s", raw, format) return DatePrefix(None, precision=Precision.EMPTY) def parse_formats(raw: Raw, formats: Iterable[str]) -> DatePrefix: """Run `parse_format` using an iterable of format strings, returning the first non-empty result from parsing.""" prefix = DatePrefix(None, precision=Precision.EMPTY) for format in formats: prefix = parse_format(raw, format) if prefix.precision != Precision.EMPTY: return prefix return prefix pudo-prefixdate-9766ae8/prefixdate/parse.py000066400000000000000000000107341423020160200207770ustar00rootroot00000000000000import re import logging from functools import total_ordering from typing import cast, Union, Optional, Match, Tuple from datetime import datetime, date, timedelta, timezone from prefixdate.precision import Precision log = logging.getLogger(__name__) Raw = Union[None, str, date, datetime, int, "DatePrefix"] REGEX = re.compile( r"^\s*((?P[12]\d{3})" r"(-(?P\d{1,2})" r"(-(?P\d{1,2})" r"([T ]" r"((?P\d{1,2})" r"(:(?P\d{1,2})" r"(:(?P\d{1,2})" r"(\.\d{4,6})?" r"(Z|(?P[-+])(?P\d{2})(:?(?P\d{2}))" r"?)?)?)?)?)?)?)?)?.*" ) @total_ordering class DatePrefix(object): """A date that is specified in terms of a value and an additional precision, which defines how well specified the date is. A datetime representation is provided, but it is not aware of the precision aspect.""" __slots__ = ["precision", "dt", "text"] def __init__(self, raw: Raw, precision: Precision = Precision.FULL): self.precision, self.dt = self._parse(raw, precision) self.text: Optional[str] = None if self.dt is not None and self.precision != Precision.EMPTY: self.dt = self.dt if self.dt.tzinfo is not None and self.dt.tzinfo != timezone.utc: self.dt = self.dt.astimezone(timezone.utc) self.text = self.dt.isoformat()[: self.precision.value] def _parse(self, raw: Raw, pcn: Precision) -> Tuple[Precision, Optional[datetime]]: try: match = cast(Match[str], REGEX.match(raw)) # type: ignore except TypeError: if isinstance(raw, datetime): return (pcn, raw) if isinstance(raw, date): return self._parse(raw.isoformat(), pcn) if isinstance(raw, int): if 1000 < raw < 9999: return self._parse(str(raw), Precision.YEAR) if isinstance(raw, DatePrefix): return (raw.precision, raw.dt) if raw is not None: log.warning("Date value is invalid: %s", raw) return (Precision.EMPTY, None) pcn, year = self._extract(match, "year", 1000, pcn, Precision.EMPTY) pcn, month = self._extract(match, "month", 1, pcn, Precision.YEAR) pcn, day = self._extract(match, "day", 1, pcn, Precision.MONTH) pcn, hour = self._extract(match, "hour", 0, pcn, Precision.DAY) pcn, minute = self._extract(match, "minute", 0, pcn, Precision.HOUR) pcn, second = self._extract(match, "second", 0, pcn, Precision.MINUTE) try: tz = self._tzinfo(match) dt = datetime(year, month, day, hour, minute, second, tzinfo=tz) return (pcn, dt) except ValueError: log.warning("Date string is invalid: %s", raw) return (Precision.EMPTY, None) def _extract( self, match: Match[str], group: str, lowest: int, pcn: Precision, fail: Precision, ) -> Tuple[Precision, int]: try: value = int(match.group(group)) if value >= lowest: return (pcn, value) except (ValueError, TypeError, AttributeError): pass precision = Precision(min(pcn.value, fail.value)) return (precision, lowest) def _tzinfo(self, match: Match[str]) -> Optional[timezone]: """Parse the time zone information from a datetime string.""" # This is probably a bit rough-and-ready, there are good libraries # for this. Do we want to depend on one of them? try: sign = -1 if match.group("tzsign") == "-" else 1 hours = sign * int(match.group("tzhour")) minutes = sign * int(match.group("tzminute")) delta = timedelta(hours=hours, minutes=minutes) return timezone(delta) except (ValueError, TypeError, AttributeError): pass return None def __eq__(self, other: object) -> bool: return str(self) == str(other) def __lt__(self, other: object) -> bool: # cf. https://docs.python.org/3/library/functools.html#functools.total_ordering if isinstance(other, DatePrefix): return str(self) < str(other) return NotImplemented def __str__(self) -> str: return self.text or "" def __repr__(self) -> str: return "" % (self.text, self.precision) def __hash__(self) -> int: return hash(repr(self)) pudo-prefixdate-9766ae8/prefixdate/precision.py000066400000000000000000000004231423020160200216520ustar00rootroot00000000000000from enum import Enum class Precision(Enum): """A date precision, defined by the offset of relevant date parts in an ISO 8601 datetime string.""" EMPTY = 0 YEAR = 4 MONTH = 7 DAY = 10 HOUR = 13 MINUTE = 16 SECOND = 19 FULL = SECOND pudo-prefixdate-9766ae8/prefixdate/py.typed000066400000000000000000000000001423020160200207730ustar00rootroot00000000000000pudo-prefixdate-9766ae8/setup.cfg000066400000000000000000000001331423020160200167710ustar00rootroot00000000000000[bdist_wheel] universal = 1 [metadata] description-file = README.md license_file = LICENSEpudo-prefixdate-9766ae8/setup.py000066400000000000000000000023141423020160200166650ustar00rootroot00000000000000from setuptools import setup with open("README.md") as f: long_description = f.read() setup( name="prefixdate", version="0.4.1", description="Formatting utility for international postal addresses", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/pudo/prefixdate", author="Friedrich Lindenberg", author_email="friedrich@pudo.org", license="MIT", classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", "Operating System :: OS Independent", ], keywords="date, partial date, iso8601, rfc3339", packages=["prefixdate"], package_data={"prefixdate": ["py.typed"]}, include_package_data=True, scripts=[], install_requires=[], zip_safe=False, extras_require={ "dev": [ "pytest", "pytest-cov", "mypy", "bump2version", "wheel>=0.29.0", "twine", ], }, ) pudo-prefixdate-9766ae8/tests/000077500000000000000000000000001423020160200163155ustar00rootroot00000000000000pudo-prefixdate-9766ae8/tests/__init__.py000066400000000000000000000000001423020160200204140ustar00rootroot00000000000000pudo-prefixdate-9766ae8/tests/test_formats.py000066400000000000000000000024541423020160200214060ustar00rootroot00000000000000from prefixdate.formats import parse_format, parse_formats, format_precision, Precision def test_format_precision(): assert format_precision("la la %c bla") == Precision.SECOND assert format_precision("%Y bla") == Precision.YEAR assert format_precision("%m %Y") == Precision.MONTH assert format_precision("%d %b %Y") == Precision.DAY assert format_precision("%Y-%m-%dXX%H") == Precision.HOUR assert format_precision("%Y%m%d%H%M") == Precision.MINUTE def test_parse_format(): prefix = parse_format("2021 bla", "%Y bla") assert prefix.text == "2021" assert prefix.precision == Precision.YEAR second = parse_format(prefix, "%Y bla") assert second == prefix prefix = parse_format("2021 blubb", "%Y bla") assert prefix.text is None prefix = parse_format(None, "%Y bla") assert prefix.text is None prefix = parse_format(20210110, "%Y%m%d") assert prefix.text == "2021-01-10" assert prefix.precision == Precision.DAY def test_parse_formats(): prefix = parse_formats(None, ["%Y bla"]) assert prefix.text is None prefix = parse_formats("2021", []) assert prefix.text is None prefix = parse_formats("2021", ["%Y"]) assert prefix.text == "2021" prefix = parse_formats("2021", ["%Y-%m", "%Y"]) assert prefix.text == "2021" pudo-prefixdate-9766ae8/tests/test_parse.py000066400000000000000000000041631423020160200210440ustar00rootroot00000000000000import pytest from datetime import datetime from prefixdate import parse, normalize_date, parse_parts, Precision def test_normalize(): assert normalize_date(None) is None assert normalize_date("2001") == "2001" assert normalize_date(2001) == "2001" assert normalize_date(201) is None assert normalize_date("2001-01-") == "2001-01" assert normalize_date("2001-1") == "2001-01" assert normalize_date("2001-W19") == "2001" assert normalize_date("2001-05-18") == "2001-05-18" assert normalize_date("2001-02-31") is None assert normalize_date("2001-05-18", Precision.YEAR) == "2001" text = "2021-07-01T13:43:22.175889+00:00" assert normalize_date(text, Precision.MINUTE) == "2021-07-01T13:43" text = "2021-07-01T13:43:22.175889+06:00" assert normalize_date(text, Precision.MINUTE) == "2021-07-01T07:43" text = "2021-07-01T13:43:22.175889-08:45" assert normalize_date(text, Precision.MINUTE) == "2021-07-01T22:28" prefix = parse("2017-5-2T10:00:00") assert prefix.text == "2017-05-02T10:00:00" assert prefix == "2017-05-02T10:00:00" assert prefix.precision == Precision.SECOND prefix = parse("2017-5-2T10:00") assert prefix.precision == Precision.MINUTE text = "2017-04-04T10:30:29" prefix = parse(text) assert prefix.text == text assert prefix.precision == Precision.SECOND now = datetime.utcnow() assert parse(now).dt == now assert parse(now.date()).text == now.date().isoformat() assert str(parse(2001)) == "2001" assert repr(parse(2001)) == "" % Precision.YEAR # feed a prefix to parse: prefix = parse(now) out = parse(prefix) assert out == prefix early = parse("2017-04-04T10:30:29") late = parse("2017-04-09T10:30:29") assert early < late assert late > early with pytest.raises(TypeError): assert late > "banana" assert hash(late) is not None def test_parse_parts(): assert parse_parts(year=None).text is None assert parse_parts(year=2001, month=3, day=0).text == "2001-03" assert parse_parts(year=2001, month="03", day="0").text == "2001-03"