pax_global_header00006660000000000000000000000064142761660110014516gustar00rootroot0000000000000052 comment=596bf1c8752de45fa576a52c315d6d8cc5bb1a4e mdurl-0.1.2/000077500000000000000000000000001427616601100126415ustar00rootroot00000000000000mdurl-0.1.2/.bumpversion.cfg000066400000000000000000000007071427616601100157550ustar00rootroot00000000000000[bumpversion] commit = True tag = True tag_name = {new_version} current_version = 0.1.2 [bumpversion:file:pyproject.toml] search = version = "{current_version}" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT replace = version = "{new_version}" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT [bumpversion:file:src/mdurl/__init__.py] search = __version__ = "{current_version}" replace = __version__ = "{new_version}" mdurl-0.1.2/.flake8000066400000000000000000000002071427616601100140130ustar00rootroot00000000000000[flake8] max-line-length = 88 # These checks violate PEP8 so let's ignore them extend-ignore = E203 extend-exclude = */site-packages/* mdurl-0.1.2/.github/000077500000000000000000000000001427616601100142015ustar00rootroot00000000000000mdurl-0.1.2/.github/workflows/000077500000000000000000000000001427616601100162365ustar00rootroot00000000000000mdurl-0.1.2/.github/workflows/tests.yaml000066400000000000000000000042411427616601100202650ustar00rootroot00000000000000name: Tests on: push: branches: [ master ] tags: [ '[0-9]+.[0-9]+.[0-9]+*' ] pull_request: branches: [ master ] jobs: linters: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: python-version: '3.8' - name: Install pre-commit run: | pip install pre-commit - name: run linters # pre-commit also runs in pre-commit.ci, but let's have it here too # to block `pypi-publish` job from triggering if pre-commit fails run: | pre-commit run -a tests: runs-on: ${{ matrix.os }} strategy: matrix: python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10', '3.11-dev'] os: [ubuntu-latest, macos-latest, windows-latest] continue-on-error: ${{ matrix.python-version == '3.11-dev' }} steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install test deps run: | pip install . -r tests/requirements.txt - name: Test with pytest run: | # TODO: bump coverage % up to 100 pytest --cov --cov-fail-under=75 - name: Report coverage if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10' uses: codecov/codecov-action@v1 allgood: runs-on: ubuntu-latest needs: - tests - linters steps: - run: echo "Great success!" pypi-publish: # Only publish if all other jobs succeed needs: [ allgood ] if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: python-version: '3.7' - name: Install build and publish tools run: | pip install build twine - name: Build and check run: | rm -rf dist/ && python -m build twine check --strict dist/* - name: Publish run: | twine upload dist/* env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} mdurl-0.1.2/.gitignore000066400000000000000000000034561427616601100146410ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # IntelliJ .idea/ # VS Code .vscode/ mdurl-0.1.2/.pre-commit-config.yaml000066400000000000000000000031701427616601100171230ustar00rootroot00000000000000repos: - repo: https://github.com/executablebooks/mdformat rev: b8c05ae822d53326e967da45367d0408afc56a81 # frozen: 0.7.14 hooks: - id: mdformat additional_dependencies: - mdformat-gfm - repo: https://github.com/asottile/yesqa rev: 265e9ff7c83add4949f81bb5fe14f4a743ffb04c # frozen: v1.4.0 hooks: - id: yesqa additional_dependencies: - flake8-bugbear - flake8-builtins - flake8-comprehensions - repo: https://github.com/PyCQA/isort rev: c5e8fa75dda5f764d20f66a215d71c21cfa198e1 # frozen: 5.10.1 hooks: - id: isort - repo: https://github.com/psf/black rev: f6c139c5215ce04fd3e73a900f1372942d58eca0 # frozen: 22.6.0 hooks: - id: black - repo: https://github.com/myint/docformatter rev: 67919ee01837761f2d954d7fbb08c12cdd38ec5a # frozen: v1.4 hooks: - id: docformatter - repo: https://github.com/pre-commit/pre-commit-hooks rev: 3298ddab3c13dd77d6ce1fc0baf97691430d84b0 # frozen: v4.3.0 hooks: - id: check-yaml - repo: https://github.com/pre-commit/pygrep-hooks rev: 6f51a66bba59954917140ec2eeeaa4d5e630e6ce # frozen: v1.9.0 hooks: - id: python-use-type-annotations - id: python-check-blanket-noqa - id: python-check-blanket-type-ignore - repo: https://github.com/PyCQA/flake8 rev: f8e1b317742036ff11ff86356fd2b68147e169f7 # frozen: 5.0.4 hooks: - id: flake8 additional_dependencies: - flake8-bugbear - flake8-builtins - flake8-comprehensions - repo: https://github.com/pre-commit/mirrors-mypy rev: fde4bb992b03943ecb94207a52739ba07957bd06 # frozen: v0.971 hooks: - id: mypy args: ["--scripts-are-modules"] additional_dependencies: - pytest mdurl-0.1.2/LICENSE000066400000000000000000000044421427616601100136520ustar00rootroot00000000000000Copyright (c) 2015 Vitaly Puzrin, Alex Kocharin. Copyright (c) 2021 Taneli Hukkinen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- .parse() is based on Joyent's node.js `url` code: Copyright Joyent, Inc. and other Node contributors. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. mdurl-0.1.2/README.md000066400000000000000000000012061427616601100141170ustar00rootroot00000000000000# mdurl [![Build Status](https://github.com/executablebooks/mdurl/workflows/Tests/badge.svg?branch=master)](https://github.com/executablebooks/mdurl/actions?query=workflow%3ATests+branch%3Amaster+event%3Apush) [![codecov.io](https://codecov.io/gh/executablebooks/mdurl/branch/master/graph/badge.svg)](https://codecov.io/gh/executablebooks/mdurl) [![PyPI version](https://img.shields.io/pypi/v/mdurl)](https://pypi.org/project/mdurl) This is a Python port of the JavaScript [mdurl](https://www.npmjs.com/package/mdurl) package. See the [upstream README.md file](https://github.com/markdown-it/mdurl/blob/master/README.md) for API documentation. mdurl-0.1.2/pyproject.toml000066400000000000000000000051051427616601100155560ustar00rootroot00000000000000[build-system] requires = ["flit_core>=3.2.0,<4"] build-backend = "flit_core.buildapi" [project] name = "mdurl" version = "0.1.2" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT description = "Markdown URL utilities" authors = [ { name = "Taneli Hukkinen", email = "hukkin@users.noreply.github.com" }, ] license = { file = "LICENSE" } requires-python = ">=3.7" readme = "README.md" classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: MacOS", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries :: Python Modules", "Typing :: Typed", ] keywords = ["markdown", "commonmark"] [project.urls] "Homepage" = "https://github.com/executablebooks/mdurl" [tool.isort] # Force imports to be sorted by module, independent of import type force_sort_within_sections = true # Group first party and local folder imports together no_lines_before = ["LOCALFOLDER"] # Configure isort to work without access to site-packages known_first_party = ["mdurl", "tests"] # Settings for Black compatibility profile = "black" [tool.pytest.ini_options] addopts = "--strict-markers --strict-config" xfail_strict = true [tool.tox] legacy_tox_ini = ''' [tox] # Only run pytest envs when no args given to tox envlist = py{37,38,39,310} isolated_build = True [testenv:py{37,38,39,310}] description = run tests deps = -r tests/requirements.txt commands = pytest {posargs} ''' [tool.coverage.run] source = ["mdurl"] [tool.coverage.report] # Regexes for lines to exclude from consideration exclude_lines = [ # Have to re-enable the standard pragma "pragma: no cover", # Code for static type checkers "if TYPE_CHECKING:", # Scripts 'if __name__ == .__main__.:', ] [tool.mypy] show_error_codes = true warn_unreachable = true warn_unused_ignores = true warn_redundant_casts = true warn_unused_configs = true # Disabling incremental mode is required for `warn_unused_configs = true` to work incremental = false disallow_untyped_defs = true check_untyped_defs = true strict_equality = true implicit_reexport = false no_implicit_optional = true overrides = [ { module = "tests.*", disallow_untyped_defs = false }, ] mdurl-0.1.2/src/000077500000000000000000000000001427616601100134305ustar00rootroot00000000000000mdurl-0.1.2/src/mdurl/000077500000000000000000000000001427616601100145535ustar00rootroot00000000000000mdurl-0.1.2/src/mdurl/__init__.py000066400000000000000000000010431427616601100166620ustar00rootroot00000000000000__all__ = ( "decode", "DECODE_DEFAULT_CHARS", "DECODE_COMPONENT_CHARS", "encode", "ENCODE_DEFAULT_CHARS", "ENCODE_COMPONENT_CHARS", "format", "parse", "URL", ) __version__ = "0.1.2" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT from mdurl._decode import DECODE_COMPONENT_CHARS, DECODE_DEFAULT_CHARS, decode from mdurl._encode import ENCODE_COMPONENT_CHARS, ENCODE_DEFAULT_CHARS, encode from mdurl._format import format from mdurl._parse import url_parse as parse from mdurl._url import URL mdurl-0.1.2/src/mdurl/_decode.py000066400000000000000000000056741427616601100165230ustar00rootroot00000000000000from __future__ import annotations from collections.abc import Sequence import functools import re DECODE_DEFAULT_CHARS = ";/?:@&=+$,#" DECODE_COMPONENT_CHARS = "" decode_cache: dict[str, list[str]] = {} def get_decode_cache(exclude: str) -> Sequence[str]: if exclude in decode_cache: return decode_cache[exclude] cache: list[str] = [] decode_cache[exclude] = cache for i in range(128): ch = chr(i) cache.append(ch) for i in range(len(exclude)): ch_code = ord(exclude[i]) cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:] return cache # Decode percent-encoded string. # def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str: cache = get_decode_cache(exclude) repl_func = functools.partial(repl_func_with_cache, cache=cache) return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE) def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str: seq = match.group() result = "" i = 0 l = len(seq) # noqa: E741 while i < l: b1 = int(seq[i + 1 : i + 3], 16) if b1 < 0x80: result += cache[b1] i += 3 # emulate JS for loop statement3 continue if (b1 & 0xE0) == 0xC0 and (i + 3 < l): # 110xxxxx 10xxxxxx b2 = int(seq[i + 4 : i + 6], 16) if (b2 & 0xC0) == 0x80: all_bytes = bytes((b1, b2)) try: result += all_bytes.decode() except UnicodeDecodeError: result += "\ufffd" * 2 i += 3 i += 3 # emulate JS for loop statement3 continue if (b1 & 0xF0) == 0xE0 and (i + 6 < l): # 1110xxxx 10xxxxxx 10xxxxxx b2 = int(seq[i + 4 : i + 6], 16) b3 = int(seq[i + 7 : i + 9], 16) if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80: all_bytes = bytes((b1, b2, b3)) try: result += all_bytes.decode() except UnicodeDecodeError: result += "\ufffd" * 3 i += 6 i += 3 # emulate JS for loop statement3 continue if (b1 & 0xF8) == 0xF0 and (i + 9 < l): # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx b2 = int(seq[i + 4 : i + 6], 16) b3 = int(seq[i + 7 : i + 9], 16) b4 = int(seq[i + 10 : i + 12], 16) if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80: all_bytes = bytes((b1, b2, b3, b4)) try: result += all_bytes.decode() except UnicodeDecodeError: result += "\ufffd" * 4 i += 9 i += 3 # emulate JS for loop statement3 continue result += "\ufffd" i += 3 # emulate JS for loop statement3 return result mdurl-0.1.2/src/mdurl/_encode.py000066400000000000000000000050521427616601100165230ustar00rootroot00000000000000from __future__ import annotations from collections.abc import Sequence from string import ascii_letters, digits, hexdigits from urllib.parse import quote as encode_uri_component ASCII_LETTERS_AND_DIGITS = ascii_letters + digits ENCODE_DEFAULT_CHARS = ";/?:@&=+$,-_.!~*'()#" ENCODE_COMPONENT_CHARS = "-_.!~*'()" encode_cache: dict[str, list[str]] = {} # Create a lookup array where anything but characters in `chars` string # and alphanumeric chars is percent-encoded. def get_encode_cache(exclude: str) -> Sequence[str]: if exclude in encode_cache: return encode_cache[exclude] cache: list[str] = [] encode_cache[exclude] = cache for i in range(128): ch = chr(i) if ch in ASCII_LETTERS_AND_DIGITS: # always allow unencoded alphanumeric characters cache.append(ch) else: cache.append("%" + ("0" + hex(i)[2:].upper())[-2:]) for i in range(len(exclude)): cache[ord(exclude[i])] = exclude[i] return cache # Encode unsafe characters with percent-encoding, skipping already # encoded sequences. # # - string - string to encode # - exclude - list of characters to ignore (in addition to a-zA-Z0-9) # - keepEscaped - don't encode '%' in a correct escape sequence (default: true) def encode( string: str, exclude: str = ENCODE_DEFAULT_CHARS, *, keep_escaped: bool = True ) -> str: result = "" cache = get_encode_cache(exclude) l = len(string) # noqa: E741 i = 0 while i < l: code = ord(string[i]) # % if keep_escaped and code == 0x25 and i + 2 < l: if all(c in hexdigits for c in string[i + 1 : i + 3]): result += string[i : i + 3] i += 2 i += 1 # JS for loop statement3 continue if code < 128: result += cache[code] i += 1 # JS for loop statement3 continue if code >= 0xD800 and code <= 0xDFFF: if code >= 0xD800 and code <= 0xDBFF and i + 1 < l: next_code = ord(string[i + 1]) if next_code >= 0xDC00 and next_code <= 0xDFFF: result += encode_uri_component(string[i] + string[i + 1]) i += 1 i += 1 # JS for loop statement3 continue result += "%EF%BF%BD" i += 1 # JS for loop statement3 continue result += encode_uri_component(string[i]) i += 1 # JS for loop statement3 return result mdurl-0.1.2/src/mdurl/_format.py000066400000000000000000000011621427616601100165540ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from mdurl._url import URL def format(url: URL) -> str: # noqa: A001 result = "" result += url.protocol or "" result += "//" if url.slashes else "" result += url.auth + "@" if url.auth else "" if url.hostname and ":" in url.hostname: # ipv6 address result += "[" + url.hostname + "]" else: result += url.hostname or "" result += ":" + url.port if url.port else "" result += url.pathname or "" result += url.search or "" result += url.hash or "" return result mdurl-0.1.2/src/mdurl/_parse.py000066400000000000000000000261561427616601100164100ustar00rootroot00000000000000# Copyright Joyent, Inc. and other Node contributors. # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to permit # persons to whom the Software is furnished to do so, subject to the # following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE # USE OR OTHER DEALINGS IN THE SOFTWARE. # Changes from joyent/node: # # 1. No leading slash in paths, # e.g. in `url.parse('http://foo?bar')` pathname is ``, not `/` # # 2. Backslashes are not replaced with slashes, # so `http:\\example.org\` is treated like a relative path # # 3. Trailing colon is treated like a part of the path, # i.e. in `http://example.org:foo` pathname is `:foo` # # 4. Nothing is URL-encoded in the resulting object, # (in joyent/node some chars in auth and paths are encoded) # # 5. `url.parse()` does not have `parseQueryString` argument # # 6. Removed extraneous result properties: `host`, `path`, `query`, etc., # which can be constructed using other parts of the url. from __future__ import annotations from collections import defaultdict import re from mdurl._url import URL # Reference: RFC 3986, RFC 1808, RFC 2396 # define these here so at least they only have to be # compiled once on the first module load. PROTOCOL_PATTERN = re.compile(r"^([a-z0-9.+-]+:)", flags=re.IGNORECASE) PORT_PATTERN = re.compile(r":[0-9]*$") # Special case for a simple path URL SIMPLE_PATH_PATTERN = re.compile(r"^(//?(?!/)[^?\s]*)(\?[^\s]*)?$") # RFC 2396: characters reserved for delimiting URLs. # We actually just auto-escape these. DELIMS = ("<", ">", '"', "`", " ", "\r", "\n", "\t") # RFC 2396: characters not allowed for various reasons. UNWISE = ("{", "}", "|", "\\", "^", "`") + DELIMS # Allowed by RFCs, but cause of XSS attacks. Always escape these. AUTO_ESCAPE = ("'",) + UNWISE # Characters that are never ever allowed in a hostname. # Note that any invalid chars are also handled, but these # are the ones that are *expected* to be seen, so we fast-path # them. NON_HOST_CHARS = ("%", "/", "?", ";", "#") + AUTO_ESCAPE HOST_ENDING_CHARS = ("/", "?", "#") HOSTNAME_MAX_LEN = 255 HOSTNAME_PART_PATTERN = re.compile(r"^[+a-z0-9A-Z_-]{0,63}$") HOSTNAME_PART_START = re.compile(r"^([+a-z0-9A-Z_-]{0,63})(.*)$") # protocols that can allow "unsafe" and "unwise" chars. # protocols that never have a hostname. HOSTLESS_PROTOCOL = defaultdict( bool, { "javascript": True, "javascript:": True, }, ) # protocols that always contain a // bit. SLASHED_PROTOCOL = defaultdict( bool, { "http": True, "https": True, "ftp": True, "gopher": True, "file": True, "http:": True, "https:": True, "ftp:": True, "gopher:": True, "file:": True, }, ) class MutableURL: def __init__(self) -> None: self.protocol: str | None = None self.slashes: bool = False self.auth: str | None = None self.port: str | None = None self.hostname: str | None = None self.hash: str | None = None self.search: str | None = None self.pathname: str | None = None def parse(self, url: str, slashes_denote_host: bool) -> "MutableURL": lower_proto = "" slashes = False rest = url # trim before proceeding. # This is to support parse stuff like " http://foo.com \n" rest = rest.strip() if not slashes_denote_host and len(url.split("#")) == 1: # Try fast path regexp simple_path = SIMPLE_PATH_PATTERN.match(rest) if simple_path: self.pathname = simple_path.group(1) if simple_path.group(2): self.search = simple_path.group(2) return self proto = "" proto_match = PROTOCOL_PATTERN.match(rest) if proto_match: proto = proto_match.group() lower_proto = proto.lower() self.protocol = proto rest = rest[len(proto) :] # figure out if it's got a host # user@server is *always* interpreted as a hostname, and url # resolution will treat //foo/bar as host=foo,path=bar because that's # how the browser resolves relative URLs. if slashes_denote_host or proto or re.search(r"^//[^@/]+@[^@/]+", rest): slashes = rest.startswith("//") if slashes and not (proto and HOSTLESS_PROTOCOL[proto]): rest = rest[2:] self.slashes = True if not HOSTLESS_PROTOCOL[proto] and ( slashes or (proto and not SLASHED_PROTOCOL[proto]) ): # there's a hostname. # the first instance of /, ?, ;, or # ends the host. # # If there is an @ in the hostname, then non-host chars *are* allowed # to the left of the last @ sign, unless some host-ending character # comes *before* the @-sign. # URLs are obnoxious. # # ex: # http://a@b@c/ => user:a@b host:c # http://a@b?@c => user:a host:c path:/?@c # v0.12 TODO(isaacs): This is not quite how Chrome does things. # Review our test case against browsers more comprehensively. # find the first instance of any hostEndingChars host_end = -1 for i in range(len(HOST_ENDING_CHARS)): hec = rest.find(HOST_ENDING_CHARS[i]) if hec != -1 and (host_end == -1 or hec < host_end): host_end = hec # at this point, either we have an explicit point where the # auth portion cannot go past, or the last @ char is the decider. if host_end == -1: # atSign can be anywhere. at_sign = rest.rfind("@") else: # atSign must be in auth portion. # http://a@b/c@d => host:b auth:a path:/c@d at_sign = rest.rfind("@", 0, host_end + 1) # Now we have a portion which is definitely the auth. # Pull that off. if at_sign != -1: auth = rest[:at_sign] rest = rest[at_sign + 1 :] self.auth = auth # the host is the remaining to the left of the first non-host char host_end = -1 for i in range(len(NON_HOST_CHARS)): hec = rest.find(NON_HOST_CHARS[i]) if hec != -1 and (host_end == -1 or hec < host_end): host_end = hec # if we still have not hit it, then the entire thing is a host. if host_end == -1: host_end = len(rest) if host_end > 0 and rest[host_end - 1] == ":": host_end -= 1 host = rest[:host_end] rest = rest[host_end:] # pull out port. self.parse_host(host) # we've indicated that there is a hostname, # so even if it's empty, it has to be present. self.hostname = self.hostname or "" # if hostname begins with [ and ends with ] # assume that it's an IPv6 address. ipv6_hostname = self.hostname.startswith("[") and self.hostname.endswith( "]" ) # validate a little. if not ipv6_hostname: hostparts = self.hostname.split(".") l = len(hostparts) # noqa: E741 i = 0 while i < l: part = hostparts[i] if not part: i += 1 # emulate statement3 in JS for loop continue if not HOSTNAME_PART_PATTERN.search(part): newpart = "" k = len(part) j = 0 while j < k: if ord(part[j]) > 127: # we replace non-ASCII char with a temporary placeholder # we need this to make sure size of hostname is not # broken by replacing non-ASCII by nothing newpart += "x" else: newpart += part[j] j += 1 # emulate statement3 in JS for loop # we test again with ASCII char only if not HOSTNAME_PART_PATTERN.search(newpart): valid_parts = hostparts[:i] not_host = hostparts[i + 1 :] bit = HOSTNAME_PART_START.search(part) if bit: valid_parts.append(bit.group(1)) not_host.insert(0, bit.group(2)) if not_host: rest = ".".join(not_host) + rest self.hostname = ".".join(valid_parts) break i += 1 # emulate statement3 in JS for loop if len(self.hostname) > HOSTNAME_MAX_LEN: self.hostname = "" # strip [ and ] from the hostname # the host field still retains them, though if ipv6_hostname: self.hostname = self.hostname[1:-1] # chop off from the tail first. hash = rest.find("#") # noqa: A001 if hash != -1: # got a fragment string. self.hash = rest[hash:] rest = rest[:hash] qm = rest.find("?") if qm != -1: self.search = rest[qm:] rest = rest[:qm] if rest: self.pathname = rest if SLASHED_PROTOCOL[lower_proto] and self.hostname and not self.pathname: self.pathname = "" return self def parse_host(self, host: str) -> None: port_match = PORT_PATTERN.search(host) if port_match: port = port_match.group() if port != ":": self.port = port[1:] host = host[: -len(port)] if host: self.hostname = host def url_parse(url: URL | str, *, slashes_denote_host: bool = False) -> URL: if isinstance(url, URL): return url u = MutableURL() u.parse(url, slashes_denote_host) return URL( u.protocol, u.slashes, u.auth, u.port, u.hostname, u.hash, u.search, u.pathname ) mdurl-0.1.2/src/mdurl/_url.py000066400000000000000000000004341427616601100160670ustar00rootroot00000000000000from __future__ import annotations from typing import NamedTuple class URL(NamedTuple): protocol: str | None slashes: bool auth: str | None port: str | None hostname: str | None hash: str | None # noqa: A003 search: str | None pathname: str | None mdurl-0.1.2/src/mdurl/py.typed000066400000000000000000000000321427616601100162450ustar00rootroot00000000000000# Marker file for PEP 561 mdurl-0.1.2/tests/000077500000000000000000000000001427616601100140035ustar00rootroot00000000000000mdurl-0.1.2/tests/__init__.py000066400000000000000000000000001427616601100161020ustar00rootroot00000000000000mdurl-0.1.2/tests/decode.js000066400000000000000000000057061427616601100155740ustar00rootroot00000000000000// TODO: port to Python 'use strict'; var assert = require('assert'); var decode = require('../decode'); function encodeBinary(str) { var result = ''; str = str.replace(/\s+/g, ''); while (str.length) { result = '%' + ('0' + parseInt(str.slice(-8), 2).toString(16)).slice(-2) + result; str = str.slice(0, -8); } return result; } var samples = { '00000000': true, '01010101': true, '01111111': true, // invalid as 1st byte '10000000': true, '10111111': true, // invalid sequences, 2nd byte should be >= 0x80 '11000111 01010101': false, '11100011 01010101': false, '11110001 01010101': false, // invalid sequences, 2nd byte should be < 0xc0 '11000111 11000000': false, '11100011 11000000': false, '11110001 11000000': false, // invalid 3rd byte '11100011 10010101 01010101': false, '11110001 10010101 01010101': false, // invalid 4th byte '11110001 10010101 10010101 01010101': false, // valid sequences '11000111 10101010': true, '11100011 10101010 10101010': true, '11110001 10101010 10101010 10101010': true, // minimal chars with given length '11000010 10000000': true, '11100000 10100000 10000000': true, // impossible sequences '11000001 10111111': false, '11100000 10011111 10111111': false, '11000001 10000000': false, '11100000 10010000 10000000': false, // maximum chars with given length '11011111 10111111': true, '11101111 10111111 10111111': true, '11110000 10010000 10000000 10000000': true, '11110000 10010000 10001111 10001111': true, '11110100 10001111 10110000 10000000': true, '11110100 10001111 10111111 10111111': true, // too low '11110000 10001111 10111111 10111111': false, // too high '11110100 10010000 10000000 10000000': false, '11110100 10011111 10111111 10111111': false, // surrogate range '11101101 10011111 10111111': true, '11101101 10100000 10000000': false, '11101101 10111111 10111111': false, '11101110 10000000 10000000': true }; describe('decode', function() { it('should decode %xx', function() { assert.equal(decode('x%20xx%20%2520'), 'x xx %20'); }); it('should not decode invalid sequences', function() { assert.equal(decode('%2g%z1%%'), '%2g%z1%%'); }); it('should not decode reservedSet', function() { assert.equal(decode('%20%25%20', '%'), ' %25 '); assert.equal(decode('%20%25%20', ' '), '%20%%20'); assert.equal(decode('%20%25%20', ' %'), '%20%25%20'); }); describe('utf8', function() { Object.keys(samples).forEach(function(k) { it(k, function() { var res1, res2, er = null, str = encodeBinary(k); try { res1 = decodeURIComponent(str); } catch(e) { er = e; } res2 = decode(str); if (er) { assert.notEqual(res2.indexOf('\ufffd'), -1); } else { assert.equal(res1, res2); assert.equal(res2.indexOf('\ufffd'), -1); } }); }); }); }); mdurl-0.1.2/tests/fixtures/000077500000000000000000000000001427616601100156545ustar00rootroot00000000000000mdurl-0.1.2/tests/fixtures/__init__.py000066400000000000000000000000001427616601100177530ustar00rootroot00000000000000mdurl-0.1.2/tests/fixtures/url.py000066400000000000000000000434451427616601100170420ustar00rootroot00000000000000# Copyright Joyent, Inc. and other Node contributors. # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to permit # persons to whom the Software is furnished to do so, subject to the # following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE # USE OR OTHER DEALINGS IN THE SOFTWARE. # URLs to parse, and expected data # { url : parsed } PARSED = { "//some_path": {"pathname": "//some_path"}, "HTTP://www.example.com/": { "protocol": "HTTP:", "slashes": True, "hostname": "www.example.com", "pathname": "/", }, "HTTP://www.example.com": { "protocol": "HTTP:", "slashes": True, "hostname": "www.example.com", "pathname": "", }, "http://www.ExAmPlE.com/": { "protocol": "http:", "slashes": True, "hostname": "www.ExAmPlE.com", "pathname": "/", }, "http://user:pw@www.ExAmPlE.com/": { "protocol": "http:", "slashes": True, "auth": "user:pw", "hostname": "www.ExAmPlE.com", "pathname": "/", }, "http://USER:PW@www.ExAmPlE.com/": { "protocol": "http:", "slashes": True, "auth": "USER:PW", "hostname": "www.ExAmPlE.com", "pathname": "/", }, "http://user@www.example.com/": { "protocol": "http:", "slashes": True, "auth": "user", "hostname": "www.example.com", "pathname": "/", }, "http://user%3Apw@www.example.com/": { "protocol": "http:", "slashes": True, "auth": "user%3Apw", "hostname": "www.example.com", "pathname": "/", }, "http://x.com/path?that's#all, folks": { "protocol": "http:", "hostname": "x.com", "slashes": True, "search": "?that's", "pathname": "/path", "hash": "#all, folks", }, "HTTP://X.COM/Y": { "protocol": "HTTP:", "slashes": True, "hostname": "X.COM", "pathname": "/Y", }, # + not an invalid host character # per https://url.spec.whatwg.org/#host-parsing "http://x.y.com+a/b/c": { "protocol": "http:", "slashes": True, "hostname": "x.y.com+a", "pathname": "/b/c", }, # an unexpected invalid char in the hostname. "HtTp://x.y.cOm;a/b/c?d=e#f gi": { "protocol": "HtTp:", "slashes": True, "hostname": "x.y.cOm", "pathname": ";a/b/c", "search": "?d=e", "hash": "#f gi", }, # make sure that we don't accidentally lcast the path parts. "HtTp://x.y.cOm;A/b/c?d=e#f gi": { "protocol": "HtTp:", "slashes": True, "hostname": "x.y.cOm", "pathname": ";A/b/c", "search": "?d=e", "hash": "#f gi", }, "http://x...y...#p": { "protocol": "http:", "slashes": True, "hostname": "x...y...", "hash": "#p", "pathname": "", }, 'http://x/p/"quoted"': { "protocol": "http:", "slashes": True, "hostname": "x", "pathname": '/p/"quoted"', }, " Is a URL!": { "pathname": " Is a URL!" }, "http://www.narwhaljs.org/blog/categories?id=news": { "protocol": "http:", "slashes": True, "hostname": "www.narwhaljs.org", "search": "?id=news", "pathname": "/blog/categories", }, "http://mt0.google.com/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=": { "protocol": "http:", "slashes": True, "hostname": "mt0.google.com", "pathname": "/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=", }, "http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=": { "protocol": "http:", "slashes": True, "hostname": "mt0.google.com", "search": "???&hl=en&src=api&x=2&y=2&z=3&s=", "pathname": "/vt/lyrs=m@114", }, "http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=": { "protocol": "http:", "slashes": True, "auth": "user:pass", "hostname": "mt0.google.com", "search": "???&hl=en&src=api&x=2&y=2&z=3&s=", "pathname": "/vt/lyrs=m@114", }, "file:///etc/passwd": { "slashes": True, "protocol": "file:", "pathname": "/etc/passwd", "hostname": "", }, "file://localhost/etc/passwd": { "protocol": "file:", "slashes": True, "pathname": "/etc/passwd", "hostname": "localhost", }, "file://foo/etc/passwd": { "protocol": "file:", "slashes": True, "pathname": "/etc/passwd", "hostname": "foo", }, "file:///etc/node/": { "slashes": True, "protocol": "file:", "pathname": "/etc/node/", "hostname": "", }, "file://localhost/etc/node/": { "protocol": "file:", "slashes": True, "pathname": "/etc/node/", "hostname": "localhost", }, "file://foo/etc/node/": { "protocol": "file:", "slashes": True, "pathname": "/etc/node/", "hostname": "foo", }, "http:/baz/../foo/bar": {"protocol": "http:", "pathname": "/baz/../foo/bar"}, "http://user:pass@example.com:8000/foo/bar?baz=quux#frag": { "protocol": "http:", "slashes": True, "auth": "user:pass", "port": "8000", "hostname": "example.com", "hash": "#frag", "search": "?baz=quux", "pathname": "/foo/bar", }, "//user:pass@example.com:8000/foo/bar?baz=quux#frag": { "slashes": True, "auth": "user:pass", "port": "8000", "hostname": "example.com", "hash": "#frag", "search": "?baz=quux", "pathname": "/foo/bar", }, "/foo/bar?baz=quux#frag": { "hash": "#frag", "search": "?baz=quux", "pathname": "/foo/bar", }, "http:/foo/bar?baz=quux#frag": { "protocol": "http:", "hash": "#frag", "search": "?baz=quux", "pathname": "/foo/bar", }, "mailto:foo@bar.com?subject=hello": { "protocol": "mailto:", "auth": "foo", "hostname": "bar.com", "search": "?subject=hello", }, "javascript:alert('hello');": { "protocol": "javascript:", "pathname": "alert('hello');", }, "xmpp:isaacschlueter@jabber.org": { "protocol": "xmpp:", "auth": "isaacschlueter", "hostname": "jabber.org", }, "http://atpass:foo%40bar@127.0.0.1:8080/path?search=foo#bar": { "protocol": "http:", "slashes": True, "auth": "atpass:foo%40bar", "hostname": "127.0.0.1", "port": "8080", "pathname": "/path", "search": "?search=foo", "hash": "#bar", }, "svn+ssh://foo/bar": { "hostname": "foo", "protocol": "svn+ssh:", "pathname": "/bar", "slashes": True, }, "dash-test://foo/bar": { "hostname": "foo", "protocol": "dash-test:", "pathname": "/bar", "slashes": True, }, "dash-test:foo/bar": { "hostname": "foo", "protocol": "dash-test:", "pathname": "/bar", }, "dot.test://foo/bar": { "hostname": "foo", "protocol": "dot.test:", "pathname": "/bar", "slashes": True, }, "dot.test:foo/bar": { "hostname": "foo", "protocol": "dot.test:", "pathname": "/bar", }, # IDNA tests "http://www.日本語.com/": { "protocol": "http:", "slashes": True, "hostname": "www.日本語.com", "pathname": "/", }, "http://example.Bücher.com/": { "protocol": "http:", "slashes": True, "hostname": "example.Bücher.com", "pathname": "/", }, "http://www.Äffchen.com/": { "protocol": "http:", "slashes": True, "hostname": "www.Äffchen.com", "pathname": "/", }, "http://www.Äffchen.cOm;A/b/c?d=e#f gi": { "protocol": "http:", "slashes": True, "hostname": "www.Äffchen.cOm", "pathname": ";A/b/c", "search": "?d=e", "hash": "#f gi", }, "http://SÉLIER.COM/": { "protocol": "http:", "slashes": True, "hostname": "SÉLIER.COM", "pathname": "/", }, "http://ليهمابتكلموشعربي؟.ي؟/": { "protocol": "http:", "slashes": True, "hostname": "ليهمابتكلموشعربي؟.ي؟", "pathname": "/", }, "http://➡.ws/➡": { "protocol": "http:", "slashes": True, "hostname": "➡.ws", "pathname": "/➡", }, "http://bucket_name.s3.amazonaws.com/image.jpg": { "protocol": "http:", "slashes": True, "hostname": "bucket_name.s3.amazonaws.com", "pathname": "/image.jpg", }, "git+http://github.com/joyent/node.git": { "protocol": "git+http:", "slashes": True, "hostname": "github.com", "pathname": "/joyent/node.git", }, # if local1@domain1 is uses as a relative URL it may # be parse into auth@hostname, but here there is no # way to make it work in url.parse, I add the test to be explicit "local1@domain1": {"pathname": "local1@domain1"}, # While this may seem counter-intuitive, a browser will parse # as a path. "www.example.com": {"pathname": "www.example.com"}, # ipv6 support "[fe80::1]": {"pathname": "[fe80::1]"}, "coap://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]": { "protocol": "coap:", "slashes": True, "hostname": "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210", }, "coap://[1080:0:0:0:8:800:200C:417A]:61616/": { "protocol": "coap:", "slashes": True, "port": "61616", "hostname": "1080:0:0:0:8:800:200C:417A", "pathname": "/", }, "http://user:password@[3ffe:2a00:100:7031::1]:8080": { "protocol": "http:", "slashes": True, "auth": "user:password", "port": "8080", "hostname": "3ffe:2a00:100:7031::1", "pathname": "", }, "coap://u:p@[::192.9.5.5]:61616/.well-known/r?n=Temperature": { "protocol": "coap:", "slashes": True, "auth": "u:p", "port": "61616", "hostname": "::192.9.5.5", "search": "?n=Temperature", "pathname": "/.well-known/r", }, # empty port "http://example.com:": { "protocol": "http:", "slashes": True, "hostname": "example.com", "pathname": ":", }, "http://example.com:/a/b.html": { "protocol": "http:", "slashes": True, "hostname": "example.com", "pathname": ":/a/b.html", }, "http://example.com:?a=b": { "protocol": "http:", "slashes": True, "hostname": "example.com", "search": "?a=b", "pathname": ":", }, "http://example.com:#abc": { "protocol": "http:", "slashes": True, "hostname": "example.com", "hash": "#abc", "pathname": ":", }, "http://[fe80::1]:/a/b?a=b#abc": { "protocol": "http:", "slashes": True, "hostname": "fe80::1", "search": "?a=b", "hash": "#abc", "pathname": ":/a/b", }, "http://-lovemonsterz.tumblr.com/rss": { "protocol": "http:", "slashes": True, "hostname": "-lovemonsterz.tumblr.com", "pathname": "/rss", }, "http://-lovemonsterz.tumblr.com:80/rss": { "protocol": "http:", "slashes": True, "port": "80", "hostname": "-lovemonsterz.tumblr.com", "pathname": "/rss", }, "http://user:pass@-lovemonsterz.tumblr.com/rss": { "protocol": "http:", "slashes": True, "auth": "user:pass", "hostname": "-lovemonsterz.tumblr.com", "pathname": "/rss", }, "http://user:pass@-lovemonsterz.tumblr.com:80/rss": { "protocol": "http:", "slashes": True, "auth": "user:pass", "port": "80", "hostname": "-lovemonsterz.tumblr.com", "pathname": "/rss", }, "http://_jabber._tcp.google.com/test": { "protocol": "http:", "slashes": True, "hostname": "_jabber._tcp.google.com", "pathname": "/test", }, "http://user:pass@_jabber._tcp.google.com/test": { "protocol": "http:", "slashes": True, "auth": "user:pass", "hostname": "_jabber._tcp.google.com", "pathname": "/test", }, "http://_jabber._tcp.google.com:80/test": { "protocol": "http:", "slashes": True, "port": "80", "hostname": "_jabber._tcp.google.com", "pathname": "/test", }, "http://user:pass@_jabber._tcp.google.com:80/test": { "protocol": "http:", "slashes": True, "auth": "user:pass", "port": "80", "hostname": "_jabber._tcp.google.com", "pathname": "/test", }, "http://x:1/' <>\"`/{}|\\^~`/": { "protocol": "http:", "slashes": True, "port": "1", "hostname": "x", "pathname": "/' <>\"`/{}|\\^~`/", }, "http://a@b@c/": { "protocol": "http:", "slashes": True, "auth": "a@b", "hostname": "c", "pathname": "/", }, "http://a@b?@c": { "protocol": "http:", "slashes": True, "auth": "a", "hostname": "b", "pathname": "", "search": "?@c", }, "http://a\r\" \t\n<'b:b@c\r\nd/e?f": { "protocol": "http:", "slashes": True, "auth": "a\r\" \t\n<'b:b", "hostname": "c", "search": "?f", "pathname": "\r\nd/e", }, # git urls used by npm "git+ssh://git@github.com:npm/npm": { "protocol": "git+ssh:", "slashes": True, "auth": "git", "hostname": "github.com", "pathname": ":npm/npm", }, "http://example.com?foo=bar#frag": { "protocol": "http:", "slashes": True, "hostname": "example.com", "hash": "#frag", "search": "?foo=bar", "pathname": "", }, "http://example.com?foo=@bar#frag": { "protocol": "http:", "slashes": True, "hostname": "example.com", "hash": "#frag", "search": "?foo=@bar", "pathname": "", }, "http://example.com?foo=/bar/#frag": { "protocol": "http:", "slashes": True, "hostname": "example.com", "hash": "#frag", "search": "?foo=/bar/", "pathname": "", }, "http://example.com?foo=?bar/#frag": { "protocol": "http:", "slashes": True, "hostname": "example.com", "hash": "#frag", "search": "?foo=?bar/", "pathname": "", }, "http://example.com#frag=?bar/#frag": { "protocol": "http:", "slashes": True, "hostname": "example.com", "hash": "#frag=?bar/#frag", "pathname": "", }, 'http://google.com" onload="alert(42)/': { "hostname": "google.com", "protocol": "http:", "slashes": True, "pathname": '" onload="alert(42)/', }, "http://a.com/a/b/c?s#h": { "protocol": "http:", "slashes": True, "pathname": "/a/b/c", "hostname": "a.com", "hash": "#h", "search": "?s", }, "http://atpass:foo%40bar@127.0.0.1/": { "auth": "atpass:foo%40bar", "slashes": True, "hostname": "127.0.0.1", "protocol": "http:", "pathname": "/", }, "http://atslash%2F%40:%2F%40@foo/": { "auth": "atslash%2F%40:%2F%40", "hostname": "foo", "protocol": "http:", "pathname": "/", "slashes": True, }, # ipv6 support "coap:u:p@[::1]:61616/.well-known/r?n=Temperature": { "protocol": "coap:", "auth": "u:p", "hostname": "::1", "port": "61616", "pathname": "/.well-known/r", "search": "?n=Temperature", }, "coap:[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:61616/s/stopButton": { "hostname": "fedc:ba98:7654:3210:fedc:ba98:7654:3210", "port": "61616", "protocol": "coap:", "pathname": "/s/stopButton", }, # encode context-specific delimiters in path and query, but do not touch # other non-delimiter chars like `%`. # # `?` and `#` in path and search "http://ex.com/foo%3F100%m%23r?abc=the%231?&foo=bar#frag": { "protocol": "http:", "hostname": "ex.com", "hash": "#frag", "search": "?abc=the%231?&foo=bar", "pathname": "/foo%3F100%m%23r", "slashes": True, }, # `?` and `#` in search only "http://ex.com/fooA100%mBr?abc=the%231?&foo=bar#frag": { "protocol": "http:", "hostname": "ex.com", "hash": "#frag", "search": "?abc=the%231?&foo=bar", "pathname": "/fooA100%mBr", "slashes": True, }, # "http://": { "protocol": "http:", "hostname": "", "slashes": True, }, } mdurl-0.1.2/tests/requirements.txt000066400000000000000000000000421427616601100172630ustar00rootroot00000000000000pytest pytest-randomly pytest-cov mdurl-0.1.2/tests/test_decode.py000066400000000000000000000002171427616601100166370ustar00rootroot00000000000000from mdurl import decode def test_decode_multi_byte(): assert decode("https://host.invalid/%F0%9F%91%A9") == "https://host.invalid/👩" mdurl-0.1.2/tests/test_encode.py000066400000000000000000000033411427616601100166520ustar00rootroot00000000000000import pytest from mdurl import encode @pytest.mark.parametrize( "input_,expected", [ pytest.param("%%%", "%25%25%25", id="should encode percent"), pytest.param("\r\n", "%0D%0A", id="should encode control chars"), pytest.param("?#", "?#", id="should not encode parts of an url"), pytest.param("[]^", "%5B%5D%5E", id="should not encode []^ - commonmark tests"), pytest.param("my url", "my%20url", id="should encode spaces"), pytest.param("φου", "%CF%86%CE%BF%CF%85", id="should encode unicode"), pytest.param( "%FG", "%25FG", id="should encode % if it doesn't start a valid escape seq" ), pytest.param( "%00%FF", "%00%FF", id="should preserve non-utf8 encoded characters" ), pytest.param( "\x00\x7F\x80", "%00%7F%C2%80", id="should encode characters on the cache borders", ), # protects against off-by-one in cache implementation ], ) def test_encode(input_, expected): assert encode(input_) == expected def test_encode_arguments(): assert encode("!@#$", exclude="@$") == "%21@%23$" assert encode("%20%2G", keep_escaped=True) == "%20%252G" assert encode("%20%2G", keep_escaped=False) == "%2520%252G" assert encode("!@%25", exclude="@", keep_escaped=False) == "%21@%2525" def test_encode_surrogates(): # bad surrogates (high) assert encode("\uD800foo") == "%EF%BF%BDfoo" assert encode("foo\uD800") == "foo%EF%BF%BD" # bad surrogates (low) assert encode("\uDD00foo") == "%EF%BF%BDfoo" assert encode("foo\uDD00") == "foo%EF%BF%BD" # valid one # (the codepoint is "D800 DD00" in UTF-16BE) assert encode("𐄀") == "%F0%90%84%80" mdurl-0.1.2/tests/test_format.py000066400000000000000000000003431427616601100167040ustar00rootroot00000000000000import pytest from mdurl import format, parse from tests.fixtures.url import PARSED as FIXTURES @pytest.mark.parametrize("url", FIXTURES.keys()) def test_format(url): parsed = parse(url) assert format(parsed) == url mdurl-0.1.2/tests/test_parse.py000066400000000000000000000013461427616601100165320ustar00rootroot00000000000000import pytest from mdurl import parse from tests.fixtures.url import PARSED as FIXTURES def is_url_and_dict_equal(url, url_dict): return ( url.protocol == url_dict.get("protocol") and url.slashes == url_dict.get("slashes", False) and url.auth == url_dict.get("auth") and url.port == url_dict.get("port") and url.hostname == url_dict.get("hostname") and url.hash == url_dict.get("hash") and url.search == url_dict.get("search") and url.pathname == url_dict.get("pathname") ) @pytest.mark.parametrize( "url,expected_dict", FIXTURES.items(), ) def test_parse(url, expected_dict): parsed = parse(url) assert is_url_and_dict_equal(parsed, expected_dict)