pax_global_header00006660000000000000000000000064125475516710014527gustar00rootroot0000000000000052 comment=69f4adf4a3a083822f023b41651194596ed1e3cf python-uritools-1.0.1/000077500000000000000000000000001254755167100147255ustar00rootroot00000000000000python-uritools-1.0.1/.gitignore000066400000000000000000000000671254755167100167200ustar00rootroot00000000000000*.egg-info *.pyc *.swp .coverage MANIFEST build/ dist/ python-uritools-1.0.1/.travis.yml000066400000000000000000000002341254755167100170350ustar00rootroot00000000000000language: python python: - 2.7 - 3.2 - 3.3 - 3.4 install: - pip install . coverage coveralls script: - python setup.py nosetests after_success: - coveralls python-uritools-1.0.1/CHANGES.rst000066400000000000000000000061771254755167100165420ustar00rootroot000000000000001.0.1 2015-07-09 ---------------- - Encode semicolon in query values passed to ``uricompose()``. 1.0.0 2015-06-12 ---------------- - Fix use of URI references as base URIs in ``urijoin()`` and ``SplitResult.transform()``. - Remove ``SplitResult.getaddrinfo()``. - Remove ``SplitResult.getauthority()``. - Remove ``SplitResult.gethostip()``; return ``ipaddress`` address objects from ``SplitResult.gethost()`` instead. - Remove ``SplitResult.gethost()`` `encoding` parameter. - Remove query delimiter parameters. - Return normalized paths from ``SplitResult.getpath()``. - Convert character constants to strings. 0.12.0 2015-04-03 ----------------- - Deprecate ``SplitResult.getaddrinfo()``. - Deprecate ``SplitResult.getauthority()``. - Deprecate ``SplitResult.gethost()`` and ``SplitResult.gethostip()`` `encoding` parameter; always use `utf-8` instead. - Drop support for "bytes-like objects". - Remove ``DefragResult.base``. 0.11.1 2015-03-25 ----------------- - Fix ``uricompose()`` for relative-path references with colons in the first path segment. 0.11.0 2014-12-16 ----------------- - Support `encoding=None` for ``uriencode()`` and ``uridecode()``. - Add optional `errors` parameter to decoding methods. 0.10.1 2014-11-30 ----------------- - Make ``uricompose()`` return ``str`` on all Python versions. 0.10.0 2014-11-30 ----------------- - Use ``ipaddress`` module for handling IPv4/IPv6 host addresses. - Add `userinfo`, `host` and `port` keyword arguments to ``uricompose()``. - Deprecate ``DefragResult.base``. - Feature freeze for `v1.0`. 0.9.0 2014-11-21 ---------------- - Improve Python 3 support. 0.8.0 2014-11-04 ---------------- - Fix ``uriencode()`` and ``uridecode()``. - Deprecate ``RE``, ``urinormpath()``, ``DefragResult.getbase()``. - Support non-string query values in ``uricompose()``. 0.7.0 2014-10-12 ---------------- - Add optional port parameter to ``SplitResult.getaddrinfo()``. - Cache ``SplitResult.authority`` subcomponents. 0.6.0 2014-09-17 ---------------- - Add basic IPv6 support. - Change ``SplitResult.port`` back to string, to distinguish between empty and absent port components. - Remove ``querysep`` and ``sep`` parameters. - Do not raise ``ValueError`` if scheme is not well-formed. - Improve Python 3 support. 0.5.2 2014-08-06 ---------------- - Fix empty port handling. 0.5.1 2014-06-22 ---------------- - Add basic Python 3 support. 0.5.0 2014-06-21 ---------------- - Add ``SplitResult.getaddrinfo()``. - Support query mappings and sequences in ``uricompose()``. 0.4.0 2014-03-20 ---------------- - Fix ``SplitResult.port`` to return int (matching urlparse). - Add ``SplitResult.getquerylist(), SplitResult.getquerydict()``. 0.3.0 2014-03-02 ---------------- - Add result object accessor methods. - Update documentation. 0.2.1 2014-02-24 ---------------- - Fix IndexError in ``urinormpath()``. - Integrate Python 2.7.6 ``urlparse`` unit tests. 0.2.0 2014-02-18 ---------------- - Add authority subcomponent attributes. - Return ``DefragResult`` from ``uridefrag()``. - Improve edge case behavior. 0.1.0 2014-02-14 ---------------- - Initial beta release. python-uritools-1.0.1/LICENSE000066400000000000000000000020701254755167100157310ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2014 Thomas Kemmer Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. python-uritools-1.0.1/MANIFEST.in000066400000000000000000000001511254755167100164600ustar00rootroot00000000000000include CHANGES.rst include LICENSE include MANIFEST.in include README.rst recursive-include tests *.py python-uritools-1.0.1/README.rst000066400000000000000000000071571254755167100164260ustar00rootroot00000000000000uritools ======================================================================== This module defines RFC 3986 compliant replacements for the most commonly used functions of the Python 2.7 Standard Library urlparse_ and Python 3 `urllib.parse`_ modules. .. code-block:: pycon >>> from uritools import urisplit, uriunsplit, urijoin, uridefrag >>> parts = urisplit('foo://user@example.com:8042/over/there?name=ferret#nose') >>> parts SplitResult(scheme='foo', authority='user@example.com:8042', path='/over/there', query='name=ferret', fragment='nose') >>> parts.scheme 'foo' >>> parts.authority 'user@example.com:8042' >>> parts.userinfo 'user' >>> parts.host 'example.com' >>> parts.port '8042' >>> uriunsplit(parts[:3] + ('name=swallow&type=African', 'beak')) 'foo://user@example.com:8042/over/there?name=swallow&type=African#beak' >>> urijoin('http://www.cwi.nl/~guido/Python.html', 'FAQ.html') 'http://www.cwi.nl/~guido/FAQ.html' >>> uridefrag('http://pythonhosted.org/uritools/index.html#constants') DefragResult(uri='http://pythonhosted.org/uritools/index.html', fragment='constants') For various reasons, the Python 2 urlparse_ module is not compliant with current Internet standards, does not include Unicode support, and is generally unusable with proprietary URI schemes. Python 3's `urllib.parse`_ improves on Unicode support, but the other issues still remain. As stated in `Lib/urllib/parse.py`_:: RFC 3986 is considered the current standard and any future changes to urlparse module should conform with it. The urlparse module is currently not entirely compliant with this RFC due to defacto scenarios for parsing, and for backward compatibility purposes, some parsing quirks from older RFCs are retained. This module aims to provide fully RFC 3986 compliant replacements for some commonly used functions found in urlparse_ and `urllib.parse`_, plus additional functions for conveniently composing URIs from their individual components. Installation ------------------------------------------------------------------------ Install uritools using pip:: pip install uritools Project Resources ------------------------------------------------------------------------ .. image:: http://img.shields.io/pypi/v/uritools.svg?style=flat :target: https://pypi.python.org/pypi/uritools/ :alt: Latest PyPI version .. image:: http://img.shields.io/pypi/dm/uritools.svg?style=flat :target: https://pypi.python.org/pypi/uritools/ :alt: Number of PyPI downloads .. image:: http://img.shields.io/travis/tkem/uritools/master.svg?style=flat :target: https://travis-ci.org/tkem/uritools/ :alt: Travis CI build status .. image:: http://img.shields.io/coveralls/tkem/uritools/master.svg?style=flat :target: https://coveralls.io/r/tkem/uritools :alt: Test coverage - `Documentation`_ - `Issue Tracker`_ - `Source Code`_ - `Change Log`_ License ------------------------------------------------------------------------ Copyright (c) 2014, 2015 Thomas Kemmer. Licensed under the `MIT License`_. .. _urlparse: http://docs.python.org/2/library/urlparse.html .. _urllib.parse: http://docs.python.org/3/library/urllib.parse.html .. _Lib/urllib/parse.py: https://hg.python.org/cpython/file/3.4/Lib/urllib/parse.py .. _Documentation: http://pythonhosted.org/uritools/ .. _Issue Tracker: https://github.com/tkem/uritools/issues/ .. _Source Code: https://github.com/tkem/uritools/ .. _Change Log: https://github.com/tkem/uritools/blob/master/CHANGES.rst .. _MIT License: http://raw.github.com/tkem/uritools/master/LICENSE python-uritools-1.0.1/docs/000077500000000000000000000000001254755167100156555ustar00rootroot00000000000000python-uritools-1.0.1/docs/.gitignore000066400000000000000000000000071254755167100176420ustar00rootroot00000000000000_build python-uritools-1.0.1/docs/Makefile000066400000000000000000000127041254755167100173210ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/uritools.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/uritools.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/uritools" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/uritools" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." python-uritools-1.0.1/docs/conf.py000066400000000000000000000174101254755167100171570ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # uritools documentation build configuration file, created by # sphinx-quickstart on Mon Feb 10 09:15:34 2014. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, os.path.abspath('..')) from uritools import __version__ # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.todo', 'sphinx.ext.coverage'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = u'uritools' copyright = u'2014, Thomas Kemmer' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = __version__ # The full version, including alpha/beta/rc tags. release = version # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". #html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'uritoolsdoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'uritools.tex', u'uritools Documentation', u'Thomas Kemmer', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'uritools', u'uritools Documentation', [u'Thomas Kemmer'], 1) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ('index', 'uritools', u'uritools Documentation', u'Thomas Kemmer', 'uritools', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' python-uritools-1.0.1/docs/index.rst000066400000000000000000000206021254755167100175160ustar00rootroot00000000000000:mod:`uritools` --- RFC 3986 compliant replacement for :mod:`urlparse` ======================================================================= .. module:: uritools This module defines RFC 3986 compliant replacements for the most commonly used functions of the Python 2.7 Standard Library :mod:`urlparse` and Python 3 :mod:`urllib.parse` modules. .. code-block:: pycon >>> from uritools import urisplit, uriunsplit, urijoin, uridefrag >>> parts = urisplit('foo://user@example.com:8042/over/there?name=ferret#nose') >>> parts SplitResult(scheme='foo', authority='user@example.com:8042', path='/over/there', query='name=ferret', fragment='nose') >>> parts.scheme 'foo' >>> parts.authority 'user@example.com:8042' >>> parts.userinfo 'user' >>> parts.host 'example.com' >>> parts.port '8042' >>> uriunsplit(parts[:3] + ('name=swallow&type=African', 'beak')) 'foo://user@example.com:8042/over/there?name=swallow&type=African#beak' >>> urijoin('http://www.cwi.nl/~guido/Python.html', 'FAQ.html') 'http://www.cwi.nl/~guido/FAQ.html' >>> uridefrag('http://pythonhosted.org/uritools/index.html#constants') DefragResult(uri='http://pythonhosted.org/uritools/index.html', fragment='constants') For various reasons, the Python 2 :mod:`urlparse` module is not compliant with current Internet standards, does not include Unicode support, and is generally unusable with proprietary URI schemes. Python 3's :mod:`urllib.parse` improves on Unicode support, but the other issues still remain. As stated in `Lib/urllib/parse.py`_:: FC 3986 is considered the current standard and any future changes to urlparse module should conform with it. The urlparse module is currently not entirely compliant with this RFC due to defacto scenarios for parsing, and for backward compatibility purposes, some parsing quirks from older RFCs are retained. This module aims to provide fully RFC 3986 compliant replacements for the most commonly used functions found in :mod:`urlparse` and :mod:`urllib.parse`, plus additional functions for conveniently composing URIs from their individual components. .. seealso:: :rfc:`3986` - Uniform Resource Identifier (URI): Generic Syntax The current Internet standard (STD66) defining URI syntax, to which any changes to :mod:`uritools` should conform. If deviations are observed, the module's implementation should be changed, even if this means breaking backward compatiblity. URI Decomposition ------------------------------------------------------------------------ .. autofunction:: uridefrag The return value is an instance of a subclass of :class:`collections.namedtuple` with the following read-only attributes: +-------------------+-------+---------------------------------------------+ | Attribute | Index | Value | +===================+=======+=============================================+ | :attr:`uri` | 0 | Absolute URI or relative URI reference | | | | without the fragment identifier | +-------------------+-------+---------------------------------------------+ | :attr:`fragment` | 1 | Fragment identifier, | | | | or :const:`None` if not present | +-------------------+-------+---------------------------------------------+ .. autofunction:: urisplit The return value is an instance of a subclass of :class:`collections.namedtuple` with the following read-only attributes: +-------------------+-------+---------------------------------------------+ | Attribute | Index | Value | +===================+=======+=============================================+ | :attr:`scheme` | 0 | URI scheme, or :const:`None` if not present | +-------------------+-------+---------------------------------------------+ | :attr:`authority` | 1 | Authority component, | | | | or :const:`None` if not present | +-------------------+-------+---------------------------------------------+ | :attr:`path` | 2 | Path component, always present but may be | | | | empty | +-------------------+-------+---------------------------------------------+ | :attr:`query` | 3 | Query component, | | | | or :const:`None` if not present | +-------------------+-------+---------------------------------------------+ | :attr:`fragment` | 4 | Fragment identifier, | | | | or :const:`None` if not present | +-------------------+-------+---------------------------------------------+ | :attr:`userinfo` | | Userinfo subcomponent of authority, | | | | or :const:`None` if not present | +-------------------+-------+---------------------------------------------+ | :attr:`host` | | Host subcomponent of authority, | | | | or :const:`None` if not present | +-------------------+-------+---------------------------------------------+ | :attr:`port` | | Port subcomponent of authority as a | | | | (possibly empty) string, | | | | or :const:`None` if not present | +-------------------+-------+---------------------------------------------+ URI Composition ------------------------------------------------------------------------ .. autofunction:: uricompose `authority` may be a Unicode string, :class:`bytes` object, or a three-item iterable specifying userinfo, host and port subcomponents. If both `authority` and any of the `userinfo`, `host` or `port` keyword arguments are given, the keyword argument will override the corresponding `authority` subcomponent. If `query` is a mapping object or a sequence of two-element tuples, it will be converted to a string of `name=value` pairs seperated by `&`. The returned value is of type :class:`str`. .. autofunction:: urijoin If `strict` is :const:`False`, a scheme in the reference is ignored if it is identical to the base URI's scheme. .. autofunction:: uriunsplit URI Encoding ------------------------------------------------------------------------ .. autofunction:: uridecode If `encoding` is set to :const:`None`, return the percent-decoded `uristring` as a :class:`bytes` object. Otherwise, replace any percent-encodings and decode `uristring` using the codec registered for `encoding`, returning a Unicode string. .. autofunction:: uriencode If `uristring` is a :class:`bytes` object, replace any characters not in :const:`UNRESERVED` or `safe` with their corresponding percent-encodings and return the result as a :class:`bytes` object. Otherwise, encode `uristring` using the codec registered for `encoding` before replacing any percent encodings. Note that `uristring` may be either a Unicode string or a :class:`bytes` object, while `safe` must be a :class:`bytes` object containg ASCII characters only. Character Constants ------------------------------------------------------------------------ .. data:: GEN_DELIMS A string containing all general delimiting characters specified in RFC 3986. .. data:: RESERVED A string containing all reserved characters specified in RFC 3986. .. data:: SUB_DELIMS A string containing all subcomponent delimiting characters specified in RFC 3986. .. data:: UNRESERVED A string containing all unreserved characters specified in RFC 3986. Structured Parse Results ------------------------------------------------------------------------ The result objects from the :func:`uridefrag` and :func:`urisplit` functions are instances of subclasses of :class:`collections.namedtuple`. These objects contain the attributes described in the function documentation, as well as some additional convenience methods. .. autoclass:: DefragResult :members: .. autoclass:: SplitResult :members: .. _Lib/urllib/parse.py: https://hg.python.org/cpython/file/3.4/Lib/urllib/parse.py python-uritools-1.0.1/setup.cfg000066400000000000000000000003731254755167100165510ustar00rootroot00000000000000[bdist_wheel] universal = 1 [flake8] exclude = .git,build,docs,setup.py [nosetests] with-coverage = 1 cover-package = uritools [build_sphinx] source-dir = docs/ build-dir = docs/_build all_files = 1 [upload_sphinx] upload-dir = docs/_build/html python-uritools-1.0.1/setup.py000066400000000000000000000034771254755167100164520ustar00rootroot00000000000000import codecs, os.path, re, sys from setuptools import setup # environment markers require a recent setuptools and/or pip version if sys.version_info >= (3, 3) or 'bdist_wheel' in sys.argv: install_requires = [] elif sys.version_info >= (3, 0): install_requires = ['ipaddress>=1.0.7'] else: install_requires = ['ipaddress>=1.0.6'] with codecs.open(os.path.join(os.path.dirname(__file__), 'uritools', '__init__.py'), encoding='utf8') as f: metadata = dict(re.findall(r"__([a-z]+)__ = '([^']+)", f.read())) setup( name='uritools', version=metadata['version'], author='Thomas Kemmer', author_email='tkemmer@computer.org', url='https://github.com/tkem/uritools/', license='MIT', description='RFC 3986 compliant, Unicode-aware, scheme-agnostic replacement for urlparse', long_description=open('README.rst').read(), keywords='uri url urlparse urlsplit urljoin urldefrag', classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: Other Environment', 'Intended Audience :: Developers', 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Topic :: Internet', 'Topic :: Software Development :: Libraries :: Python Modules' ], packages=['uritools'], install_requires=install_requires, extras_require={ ':python_version == "2.7"': ['ipaddress>=1.0.6'], ':python_version == "3.2"': ['ipaddress>=1.0.7'] }, test_suite='tests' ) python-uritools-1.0.1/tests/000077500000000000000000000000001254755167100160675ustar00rootroot00000000000000python-uritools-1.0.1/tests/__init__.py000066400000000000000000000000001254755167100201660ustar00rootroot00000000000000python-uritools-1.0.1/tests/test_compose.py000066400000000000000000000222721254755167100211520ustar00rootroot00000000000000from __future__ import unicode_literals import ipaddress import unittest from uritools import uricompose class ComposeTest(unittest.TestCase): def check(self, uri, **kwargs): result = uricompose(**kwargs) self.assertEqual(uri, result, msg='%r != %r (kwargs=%r)' % ( uri, result, kwargs) ) def test_rfc3986(self): """uricompose test cases from [RFC3986] 3. Syntax Components""" self.check( 'foo://example.com:42/over/there?name=ferret#nose', scheme='foo', authority='example.com:42', path='/over/there', query='name=ferret', fragment='nose' ) self.check( 'urn:example:animal:ferret:nose', scheme='urn', path='example:animal:ferret:nose' ) def test_scheme(self): cases = [ ('foo+bar:', 'foo+bar'), ('foo+bar:', b'foo+bar'), ('foo+bar:', 'FOO+BAR'), ('foo+bar:', b'FOO+BAR'), ] for uri, scheme in cases: self.check(uri, scheme=scheme) # invalid scheme for scheme in ('', 'foo:', '\xf6lk\xfcrbis'): with self.assertRaises(ValueError, msg='scheme=%r' % scheme): uricompose(scheme=scheme) def test_authority(self): cases = [ ('', None), ('//', ''), ('//', b''), ('//example.com', 'example.com'), ('//example.com', b'example.com'), ('//example.com', 'example.com:'), ('//example.com', b'example.com:'), ('//user@example.com', 'user@example.com'), ('//user@example.com', b'user@example.com'), ('//example.com:42', 'example.com:42'), ('//example.com:42', b'example.com:42'), ('//user@example.com:42', 'user@example.com:42'), ('//user@example.com:42', b'user@example.com:42'), ('//user@127.0.0.1:42', 'user@127.0.0.1:42'), ('//user@127.0.0.1:42', b'user@127.0.0.1:42'), ('//user@[::1]:42', 'user@[::1]:42'), ('//user@[::1]:42', b'user@[::1]:42'), ('//user:c2VjcmV0@example.com', 'user:c2VjcmV0@example.com'), ('//user:c2VjcmV0@example.com', b'user:c2VjcmV0@example.com'), ] for uri, authority in cases: self.check(uri, authority=authority) # invalid authority type for authority in (True, 42, 3.14, ipaddress.IPv6Address('::1')): with self.assertRaises(TypeError, msg='authority=%r' % authority): uricompose(authority=authority) def test_authority_kwargs(self): from ipaddress import IPv4Address, IPv6Address cases = [ ('', [None, None, None]), ('//', [None, '', None]), ('//', [None, b'', None]), ('//example.com', [None, 'example.com', None]), ('//example.com', [None, b'example.com', None]), ('//example.com', [None, 'example.com', '']), ('//example.com', [None, 'example.com', b'']), ('//user@example.com', ['user', 'example.com', None]), ('//user@example.com', [b'user', 'example.com', None]), ('//user@example.com', [b'user', b'example.com', None]), ('//example.com:42', [None, 'example.com', '42']), ('//example.com:42', [None, b'example.com', '42']), ('//example.com:42', [None, b'example.com', b'42']), ('//example.com:42', [None, 'example.com', 42]), ('//example.com:42', [None, b'example.com', 42]), ('//user@example.com:42', ['user', 'example.com', '42']), ('//user@example.com:42', [b'user', 'example.com', '42']), ('//user@example.com:42', [b'user', b'example.com', '42']), ('//user@example.com:42', [b'user', b'example.com', b'42']), ('//user@example.com:42', ['user', 'example.com', 42]), ('//user@example.com:42', [b'user', 'example.com', 42]), ('//user@example.com:42', [b'user', b'example.com', 42]), ('//user@127.0.0.1:42', ['user', '127.0.0.1', 42]), ('//user@127.0.0.1:42', ['user', b'127.0.0.1', 42]), ('//user@127.0.0.1:42', ['user', IPv4Address('127.0.0.1'), 42]), ('//user@[::1]:42', ['user', '::1', 42]), ('//user@[::1]:42', ['user', b'::1', 42]), ('//user@[::1]:42', ['user', '[::1]', 42]), ('//user@[::1]:42', ['user', b'[::1]', 42]), ('//user@[::1]:42', ['user', IPv6Address('::1'), 42]), ] for uri, authority in cases: self.check(uri, authority=authority) userinfo, host, port = authority self.check(uri, userinfo=userinfo, host=host, port=port) # invalid authority value for authority in ([], ['foo'], ['foo', 'bar'], range(4)): with self.assertRaises(ValueError, msg='authority=%r' % authority): uricompose(authority=authority) # invalid host type for host in (True, 42, 3.14, ipaddress.IPv6Network('2001:db00::0/24')): with self.assertRaises(AttributeError, msg='host=%r' % host): uricompose(authority=[None, host, None]) with self.assertRaises(AttributeError, msg='host=%r' % host): uricompose(host=host) # invalid host ip-literal for host in ('[foo]', '[v1.x]'): with self.assertRaises(ValueError, msg='host=%r' % host): uricompose(authority=[None, host, None]) with self.assertRaises(ValueError, msg='host=%r' % host): uricompose(host=host) # invalid port value for port in (-1, 'foo', 3.14): with self.assertRaises(ValueError, msg='port=%r' % port): uricompose(authority=[None, '', port]) with self.assertRaises(ValueError, msg='port=%r' % port): uricompose(port=port) def test_authority_override(self): cases = [ ('//user@example.com:42', None, 'user', 'example.com', 42), ('//user@example.com:42', '', 'user', 'example.com', 42), ('//user@example.com:42', 'example.com', 'user', None, 42), ('//user@example.com:42', 'user@:42', None, 'example.com', None), ] for uri, authority, userinfo, host, port in cases: self.check(uri, authority=authority, userinfo=userinfo, host=host, port=port) def test_path(self): cases = [ ('foo', 'foo'), ('foo', b'foo'), ('foo+bar', 'foo+bar'), ('foo+bar', b'foo+bar'), ('foo%20bar', 'foo bar'), ('foo%20bar', b'foo bar'), ('./this:that', 'this:that'), ('./this:that', b'this:that'), ('./this:that/', 'this:that/'), ('./this:that/', b'this:that/'), ] for uri, path in cases: self.check(uri, path=path) # invalid path with authority for path in ('foo', b'foo'): with self.assertRaises(ValueError, msg='path=%r' % path): uricompose(authority='auth', path=path) # invalid path without authority for path in ('//', b'//', '//foo', b'//foo'): with self.assertRaises(ValueError, msg='path=%r' % path): uricompose(path=path) def test_query(self): from collections import OrderedDict as od cases = [ ('?', ''), ('?', b''), ('?', []), ('?', {}), ('?name', 'name'), ('?name', b'name'), ('?name', [('name', None)]), ('?name', [(b'name', None)]), ('?name', {'name': None}), ('?name', {b'name': None}), ('?name=foo', 'name=foo'), ('?name=foo', b'name=foo'), ('?name=foo', [('name', 'foo')]), ('?name=foo', [('name', b'foo')]), ('?name=foo', [(b'name', b'foo')]), ('?name=foo', {'name': 'foo'}), ('?name=foo', {'name': b'foo'}), ('?name=foo', {'name': ['foo']}), ('?name=foo', {'name': [b'foo']}), ('?name=foo', {b'name': b'foo'}), ('?name=foo', {b'name': [b'foo']}), ('?name=42', [('name', 42)]), ('?name=42', {'name': 42}), ('?name=42', {'name': [42]}), ('?name=foo&type=bar', [('name', 'foo'), ('type', 'bar')]), ('?name=foo&type=bar', od([('name', 'foo'), ('type', 'bar')])), ('?name=foo&name=bar', [('name', 'foo'), ('name', 'bar')]), ('?name=foo&name=bar', {'name': ['foo', 'bar']}), ('?name=a/b/c', dict(name='a/b/c')), ('?name=a:b:c', dict(name='a:b:c')), ('?name=a?b?c', dict(name='a?b?c')), ('?name=a@b@c', dict(name='a@b@c')), ('?name=a%23b%23c', dict(name='a#b#c')), ('?name=a%26b%26c', dict(name='a&b&c')), ('?name=a%3Bb%3Bc', dict(name='a;b;c')), ] for uri, query in cases: self.check(uri, query=query) # invalid query type for query in (0, [1]): with self.assertRaises(TypeError, msg='query=%r' % query): uricompose(query=query) python-uritools-1.0.1/tests/test_defrag.py000066400000000000000000000036761254755167100207440ustar00rootroot00000000000000from __future__ import unicode_literals import unittest from uritools import uridefrag class DefragTest(unittest.TestCase): def test_uridefrag(self): cases = [ ('http://python.org#frag', 'http://python.org', 'frag'), ('http://python.org', 'http://python.org', None), ('http://python.org/#frag', 'http://python.org/', 'frag'), ('http://python.org/', 'http://python.org/', None), ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), ('http://python.org/?q', 'http://python.org/?q', None), ('http://python.org/p#frag', 'http://python.org/p', 'frag'), ('http://python.org/p?q', 'http://python.org/p?q', None), ('http://python.org#', 'http://python.org', ''), ('http://python.org/#', 'http://python.org/', ''), ('http://python.org/?q#', 'http://python.org/?q', ''), ('http://python.org/p?q#', 'http://python.org/p?q', ''), ] def encode(s): return s.encode() if s is not None else None cases += list(map(encode, case) for case in cases) for uri, base, fragment in cases: defrag = uridefrag(uri) self.assertEqual(defrag, (base, fragment)) self.assertEqual(defrag.uri, base) self.assertEqual(defrag.fragment, fragment) self.assertEqual(uri, defrag.geturi()) def test_getfragment(self): self.assertEqual(uridefrag('').getfragment(), None) self.assertEqual(uridefrag(b'').getfragment(), None) self.assertEqual(uridefrag('#').getfragment(), '') self.assertEqual(uridefrag(b'#').getfragment(), '') self.assertEqual(uridefrag('#foo').getfragment(), 'foo') self.assertEqual(uridefrag(b'#foo').getfragment(), 'foo') self.assertEqual(uridefrag('#foo%20bar').getfragment(), 'foo bar') self.assertEqual(uridefrag(b'#foo%20bar').getfragment(), 'foo bar') python-uritools-1.0.1/tests/test_encoding.py000066400000000000000000000053151254755167100212720ustar00rootroot00000000000000from __future__ import unicode_literals import unittest from uritools import RESERVED, UNRESERVED, uridecode, uriencode class EncodingTest(unittest.TestCase): def check(self, decoded, encoded, safe=b'', encoding='utf-8'): self.assertEqual(uriencode(decoded, safe, encoding), encoded) self.assertEqual(uridecode(encoded, encoding), decoded) # swap bytes/string types self.assertEqual(uriencode(decoded.encode(encoding), safe, encoding), encoded) # noqa self.assertEqual(uridecode(encoded.decode('ascii'), encoding), decoded) def test_encoding(self): cases = [ ('', b''), (' ', b'%20'), ('%', b'%25'), ('~', b'~'), (UNRESERVED, UNRESERVED.encode('ascii')), ] for decoded, encoded in cases: self.check(decoded, encoded) def test_safe_encoding(self): cases = [ ('', b'', ''), (' ', b' ', ' '), ('%', b'%', '%'), (RESERVED, RESERVED.encode('ascii'), RESERVED) ] for decoded, encoded, safe in cases: self.check(decoded, encoded, safe) def test_utf8_encoding(self): cases = [ ('\xf6lk\xfcrbis', b'%C3%B6lk%C3%BCrbis') ] for decoded, encoded in cases: self.check(decoded, encoded, encoding='utf-8') def test_latin1_encoding(self): cases = [ ('\xf6lk\xfcrbis', b'%F6lk%FCrbis') ] for decoded, encoded in cases: self.check(decoded, encoded, encoding='latin-1') def test_idna_encoding(self): cases = [ ('\xf6lk\xfcrbis', b'xn--lkrbis-vxa4c') ] for decoded, encoded in cases: self.check(decoded, encoded, encoding='idna') def test_decode_bytes(self): cases = [ ('%F6lk%FCrbis', b'\xf6lk\xfcrbis'), (b'%F6lk%FCrbis', b'\xf6lk\xfcrbis') ] for input, output in cases: self.assertEqual(uridecode(input, encoding=None), output) def test_encode_bytes(self): cases = [ (b'\xf6lk\xfcrbis', b'%F6lk%FCrbis') ] for input, output in cases: self.assertEqual(uriencode(input, encoding=None), output) def test_decode_errors(self): cases = [ (UnicodeError, b'%FF', 'utf-8'), ] for exception, string, encoding in cases: self.assertRaises(exception, uridecode, string, encoding) def test_encode_errors(self): cases = [ (UnicodeError, '\xff', b'', 'ascii'), ] for exception, string, safe, encoding in cases: self.assertRaises(exception, uriencode, string, safe, encoding) python-uritools-1.0.1/tests/test_join.py000066400000000000000000000113421254755167100204400ustar00rootroot00000000000000from __future__ import unicode_literals import unittest from uritools import urijoin class JoinTest(unittest.TestCase): RFC3986_BASE = "http://a/b/c/d;p?q" def check(self, base, ref, expected, strict=False): self.assertEqual(expected, urijoin(base, ref, strict)) # base as bytes, ref as str self.assertEqual(expected, urijoin(base.encode(), ref, strict)) # base as str, ref as bytes self.assertEqual(expected, urijoin(base, ref.encode(), strict)) # both base and ref as bytes self.assertEqual( expected.encode(), urijoin(base.encode(), ref.encode(), strict) ) def test_rfc3986_normal(self): """urijoin test cases from RFC 3986 5.4.1. Normal Examples""" self.check(self.RFC3986_BASE, "g:h", "g:h") self.check(self.RFC3986_BASE, "g", "http://a/b/c/g") self.check(self.RFC3986_BASE, "./g", "http://a/b/c/g") self.check(self.RFC3986_BASE, "g/", "http://a/b/c/g/") self.check(self.RFC3986_BASE, "/g", "http://a/g") self.check(self.RFC3986_BASE, "//g", "http://g") self.check(self.RFC3986_BASE, "?y", "http://a/b/c/d;p?y") self.check(self.RFC3986_BASE, "g?y", "http://a/b/c/g?y") self.check(self.RFC3986_BASE, "#s", "http://a/b/c/d;p?q#s") self.check(self.RFC3986_BASE, "g#s", "http://a/b/c/g#s") self.check(self.RFC3986_BASE, "g?y#s", "http://a/b/c/g?y#s") self.check(self.RFC3986_BASE, ";x", "http://a/b/c/;x") self.check(self.RFC3986_BASE, "g;x", "http://a/b/c/g;x") self.check(self.RFC3986_BASE, "g;x?y#s", "http://a/b/c/g;x?y#s") self.check(self.RFC3986_BASE, "", "http://a/b/c/d;p?q") self.check(self.RFC3986_BASE, ".", "http://a/b/c/") self.check(self.RFC3986_BASE, "./", "http://a/b/c/") self.check(self.RFC3986_BASE, "..", "http://a/b/") self.check(self.RFC3986_BASE, "../", "http://a/b/") self.check(self.RFC3986_BASE, "../g", "http://a/b/g") self.check(self.RFC3986_BASE, "../..", "http://a/") self.check(self.RFC3986_BASE, "../../", "http://a/") self.check(self.RFC3986_BASE, "../../g", "http://a/g") def test_rfc3986_abnormal(self): """urijoin test cases from RFC 3986 5.4.2. Abnormal Examples""" self.check(self.RFC3986_BASE, "../../../g", "http://a/g") self.check(self.RFC3986_BASE, "../../../../g", "http://a/g") self.check(self.RFC3986_BASE, "/./g", "http://a/g") self.check(self.RFC3986_BASE, "/../g", "http://a/g") self.check(self.RFC3986_BASE, "g.", "http://a/b/c/g.") self.check(self.RFC3986_BASE, ".g", "http://a/b/c/.g") self.check(self.RFC3986_BASE, "g..", "http://a/b/c/g..") self.check(self.RFC3986_BASE, "..g", "http://a/b/c/..g") self.check(self.RFC3986_BASE, "./../g", "http://a/b/g") self.check(self.RFC3986_BASE, "./g/.", "http://a/b/c/g/") self.check(self.RFC3986_BASE, "g/./h", "http://a/b/c/g/h") self.check(self.RFC3986_BASE, "g/../h", "http://a/b/c/h") self.check(self.RFC3986_BASE, "g;x=1/./y", "http://a/b/c/g;x=1/y") self.check(self.RFC3986_BASE, "g;x=1/../y", "http://a/b/c/y") self.check(self.RFC3986_BASE, "g?y/./x", "http://a/b/c/g?y/./x") self.check(self.RFC3986_BASE, "g?y/../x", "http://a/b/c/g?y/../x") self.check(self.RFC3986_BASE, "g#s/./x", "http://a/b/c/g#s/./x") self.check(self.RFC3986_BASE, "g#s/../x", "http://a/b/c/g#s/../x") self.check(self.RFC3986_BASE, "http:g", "http:g", True) self.check(self.RFC3986_BASE, "http:g", "http://a/b/c/g", False) def test_rfc3986_merge(self): """urijoin test cases for RFC 3986 5.2.3. Merge Paths""" self.check('http://a', 'b', 'http://a/b') def test_relative_base(self): self.check('', "bar", "bar") self.check('foo', "bar", "bar") self.check('foo/', "bar", "foo/bar") self.check('.', "bar", "bar") self.check('./', "bar", "bar") self.check('./foo', "bar", "bar") self.check('./foo/', "bar", "foo/bar") self.check('..', "bar", "bar") self.check('../', "bar", "../bar") self.check('../foo', "bar", "../bar") self.check('../foo/', "bar", "../foo/bar") self.check('', "../bar", "../bar") self.check('foo', "../bar", "../bar") self.check('foo/', "../bar", "bar") self.check('.', "../bar", "../bar") self.check('./', "../bar", "../bar") self.check('./foo', "../bar", "../bar") self.check('./foo/', "../bar", "bar") self.check('..', "../bar", "../bar") self.check('../', "../bar", "../../bar") self.check('../foo', "../bar", "../../bar") self.check('../foo/', "../bar", "../bar") python-uritools-1.0.1/tests/test_split.py000066400000000000000000000374201254755167100206410ustar00rootroot00000000000000from __future__ import unicode_literals import unittest from uritools import urisplit class SplitTest(unittest.TestCase): def check(self, uri, parts, decoded=None): result = urisplit(uri) self.assertEqual(result, parts, 'Error parsing %r' % uri) self.assertEqual(result.geturi(), uri, 'Error recomposing %r' % uri) def test_rfc3986(self): """urisplit test cases from [RFC3986] 3. Syntax Components""" cases = [ ('foo://example.com:8042/over/there?name=ferret#nose', ('foo', 'example.com:8042', '/over/there', 'name=ferret', 'nose')), ('urn:example:animal:ferret:nose', ('urn', None, 'example:animal:ferret:nose', None, None)), (b'foo://example.com:8042/over/there?name=ferret#nose', (b'foo', b'example.com:8042', b'/over/there', b'name=ferret', b'nose')), (b'urn:example:animal:ferret:nose', (b'urn', None, b'example:animal:ferret:nose', None, None)), ] for uri, parts in cases: self.check(uri, parts) def test_abnormal(self): cases = [ ('', (None, None, '', None, None)), (':', (None, None, ':', None, None)), (':/', (None, None, ':/', None, None)), ('://', (None, None, '://', None, None)), ('://?', (None, None, '://', '', None)), ('://#', (None, None, '://', None, '')), ('://?#', (None, None, '://', '', '')), ('//', (None, '', '', None, None)), ('///', (None, '', '/', None, None)), ('//?', (None, '', '', '', None)), ('//#', (None, '', '', None, '')), ('//?#', (None, '', '', '', '')), ('?', (None, None, '', '', None)), ('??', (None, None, '', '?', None)), ('?#', (None, None, '', '', '')), ('#', (None, None, '', None, '')), ('##', (None, None, '', None, '#')), (b'', (None, None, b'', None, None)), (b':', (None, None, b':', None, None)), (b':/', (None, None, b':/', None, None)), (b'://', (None, None, b'://', None, None)), (b'://?', (None, None, b'://', b'', None)), (b'://#', (None, None, b'://', None, b'')), (b'://?#', (None, None, b'://', b'', b'')), (b'//', (None, b'', b'', None, None)), (b'///', (None, b'', b'/', None, None)), (b'//?', (None, b'', b'', b'', None)), (b'//#', (None, b'', b'', None, b'')), (b'//?#', (None, b'', b'', b'', b'')), (b'?', (None, None, b'', b'', None)), (b'??', (None, None, b'', b'?', None)), (b'?#', (None, None, b'', b'', b'')), (b'#', (None, None, b'', None, b'')), (b'##', (None, None, b'', None, b'#')), ] for uri, parts in cases: self.check(uri, parts) def test_members(self): uri = 'foo://user@example.com:8042/over/there?name=ferret#nose' result = urisplit(uri) self.assertEqual(result.scheme, 'foo') self.assertEqual(result.authority, 'user@example.com:8042') self.assertEqual(result.path, '/over/there') self.assertEqual(result.query, 'name=ferret') self.assertEqual(result.fragment, 'nose') self.assertEqual(result.userinfo, 'user') self.assertEqual(result.host, 'example.com') self.assertEqual(result.port, '8042') self.assertEqual(result.geturi(), uri) self.assertEqual(result.getscheme(), 'foo') self.assertEqual(result.getuserinfo(), 'user') self.assertEqual(result.gethost(), 'example.com') self.assertEqual(result.getport(), 8042) self.assertEqual(result.getpath(), '/over/there') self.assertEqual(result.getquery(), 'name=ferret') self.assertEqual(dict(result.getquerydict()), {'name': ['ferret']}) self.assertEqual(list(result.getquerylist()), [('name', 'ferret')]) self.assertEqual(result.getfragment(), 'nose') uri = 'urn:example:animal:ferret:nose' result = urisplit(uri) self.assertEqual(result.scheme, 'urn') self.assertEqual(result.authority, None) self.assertEqual(result.path, 'example:animal:ferret:nose') self.assertEqual(result.query, None) self.assertEqual(result.fragment, None) self.assertEqual(result.userinfo, None) self.assertEqual(result.host, None) self.assertEqual(result.port, None) self.assertEqual(result.geturi(), uri) self.assertEqual(result.getscheme(), 'urn') self.assertEqual(result.getuserinfo(), None) self.assertEqual(result.gethost(), None) self.assertEqual(result.getport(), None) self.assertEqual(result.getpath(), 'example:animal:ferret:nose') self.assertEqual(result.getquery(), None) self.assertEqual(dict(result.getquerydict()), {}) self.assertEqual(list(result.getquerylist()), []) self.assertEqual(result.getfragment(), None) uri = 'file:///' result = urisplit(uri) self.assertEqual(result.scheme, 'file') self.assertEqual(result.authority, '') self.assertEqual(result.path, '/') self.assertEqual(result.query, None) self.assertEqual(result.fragment, None) self.assertEqual(result.userinfo, None) self.assertEqual(result.host, '') self.assertEqual(result.port, None) self.assertEqual(result.geturi(), uri) self.assertEqual(result.getscheme(), 'file') self.assertEqual(result.getuserinfo(), None) self.assertEqual(result.gethost(), '') self.assertEqual(result.getport(), None) self.assertEqual(result.getpath(), '/') self.assertEqual(result.getquery(), None) self.assertEqual(dict(result.getquerydict()), {}) self.assertEqual(list(result.getquerylist()), []) self.assertEqual(result.getfragment(), None) uri = b'foo://user@example.com:8042/over/there?name=ferret#nose' result = urisplit(uri) self.assertEqual(result.scheme, b'foo') self.assertEqual(result.authority, b'user@example.com:8042') self.assertEqual(result.path, b'/over/there') self.assertEqual(result.query, b'name=ferret') self.assertEqual(result.fragment, b'nose') self.assertEqual(result.userinfo, b'user') self.assertEqual(result.host, b'example.com') self.assertEqual(result.port, b'8042') self.assertEqual(result.geturi(), uri) self.assertEqual(result.getscheme(), 'foo') self.assertEqual(result.getuserinfo(), 'user') self.assertEqual(result.gethost(), 'example.com') self.assertEqual(result.getport(), 8042) self.assertEqual(result.getpath(), '/over/there') self.assertEqual(result.getquery(), 'name=ferret') self.assertEqual(dict(result.getquerydict()), {'name': ['ferret']}) self.assertEqual(list(result.getquerylist()), [('name', 'ferret')]) self.assertEqual(result.getfragment(), 'nose') uri = b'urn:example:animal:ferret:nose' result = urisplit(uri) self.assertEqual(result.scheme, b'urn') self.assertEqual(result.authority, None) self.assertEqual(result.path, b'example:animal:ferret:nose') self.assertEqual(result.query, None) self.assertEqual(result.fragment, None) self.assertEqual(result.userinfo, None) self.assertEqual(result.host, None) self.assertEqual(result.port, None) self.assertEqual(result.geturi(), uri) self.assertEqual(result.getscheme(), 'urn') self.assertEqual(result.getuserinfo(), None) self.assertEqual(result.gethost(), None) self.assertEqual(result.getport(), None) self.assertEqual(result.getpath(), 'example:animal:ferret:nose') self.assertEqual(result.getquery(), None) self.assertEqual(dict(result.getquerydict()), {}) self.assertEqual(list(result.getquerylist()), []) self.assertEqual(result.getfragment(), None) uri = b'file:///' result = urisplit(uri) self.assertEqual(result.scheme, b'file') self.assertEqual(result.authority, b'') self.assertEqual(result.path, b'/') self.assertEqual(result.query, None) self.assertEqual(result.fragment, None) self.assertEqual(result.userinfo, None) self.assertEqual(result.host, b'') self.assertEqual(result.port, None) self.assertEqual(result.geturi(), uri) self.assertEqual(result.getscheme(), 'file') self.assertEqual(result.getuserinfo(), None) self.assertEqual(result.gethost(), '') self.assertEqual(result.getport(), None) self.assertEqual(result.getpath(), '/') self.assertEqual(result.getquery(), None) self.assertEqual(dict(result.getquerydict()), {}) self.assertEqual(list(result.getquerylist()), []) self.assertEqual(result.getfragment(), None) def test_getscheme(self): self.assertEqual(urisplit('foo').getscheme(default='bar'), 'bar') self.assertEqual(urisplit('FOO_BAR:/').getscheme(), 'foo_bar') self.assertEqual(urisplit(b'foo').getscheme(default='bar'), 'bar') self.assertEqual(urisplit(b'FOO_BAR:/').getscheme(), 'foo_bar') def test_gethost(self): from ipaddress import IPv4Address, IPv6Address cases = [ ('http://Test.python.org:5432/foo/', 'test.python.org'), ('http://12.34.56.78:5432/foo/', IPv4Address('12.34.56.78')), ('http://[::1]:5432/foo/', IPv6Address('::1')), ] for uri, host in cases: self.assertEqual(urisplit(uri).gethost(), host) self.assertEqual(urisplit(uri.encode()).gethost(), host) for uri in ['http://[::1/', 'http://::1]/']: with self.assertRaises(ValueError, msg='%r' % uri): urisplit(uri).gethost() with self.assertRaises(ValueError, msg='%r' % uri): urisplit(uri.encode()).gethost() def test_getport(self): for uri in ['foo://bar', 'foo://bar:', 'foo://bar/', 'foo://bar:/']: result = urisplit(uri) if result.authority.endswith(':'): self.assertEqual(result.port, '') else: self.assertEqual(result.port, None) self.assertEqual(result.gethost(), 'bar') self.assertEqual(result.getport(8000), 8000) def test_getpath(self): cases = [ ('', '', '/'), ('.', './', '/'), ('./', './', '/'), ('./.', './', '/'), ('./..', '../', '/'), ('./foo', 'foo', '/foo'), ('./foo/', 'foo/', '/foo/'), ('./foo/.', 'foo/', '/foo/'), ('./foo/..', './', '/'), ('..', '../', '/'), ('../', '../', '/'), ('../.', '../', '/'), ('../..', '../../', '/'), ('../foo', '../foo', '/foo'), ('../foo/', '../foo/', '/foo/'), ('../foo/.', '../foo/', '/foo/'), ('../foo/..', '../', '/'), ('../../foo', '../../foo', '/foo'), ('../../foo/', '../../foo/', '/foo/'), ('../../foo/.', '../../foo/', '/foo/'), ('../../foo/..', '../../', '/'), ('../../foo/../bar', '../../bar', '/bar'), ('../../foo/../bar/', '../../bar/', '/bar/'), ('../../foo/../bar/.', '../../bar/', '/bar/'), ('../../foo/../bar/..', '../../', '/'), ('../../foo/../..', '../../../', '/') ] for uri, relpath, abspath in cases: parts = urisplit(uri) self.assertEqual(relpath, parts.getpath()) parts = urisplit(uri.encode('ascii')) self.assertEqual(relpath, parts.getpath()) parts = urisplit('/' + uri) self.assertEqual(abspath, parts.getpath()) parts = urisplit(('/' + uri).encode('ascii')) self.assertEqual(abspath, parts.getpath()) def test_getquery(self): cases = [ ("?", [], {}), ("?&", [], {}), ("?&&", [], {}), ("?=", [('', '')], {'': ['']}), ("?=a", [('', 'a')], {'': ['a']}), ("?a", [('a', None)], {'a': [None]}), ("?a=", [('a', '')], {'a': ['']}), ("?&a=b", [('a', 'b')], {'a': ['b']}), ("?a=a+b&b=b+c", [('a', 'a+b'), ('b', 'b+c')], {'a': ['a+b'], 'b': ['b+c']}), ("?a=a%20b&b=b%20c", [('a', 'a b'), ('b', 'b c')], {'a': ['a b'], 'b': ['b c']}), ("?a=1&a=2", [('a', '1'), ('a', '2')], {'a': ['1', '2']}), ] for query, querylist, querydict in cases: self.assertEqual(urisplit(query).getquerylist(), querylist, 'Error parsing query dict for %r' % query) self.assertEqual(urisplit(query).getquerydict(), querydict, 'Error parsing query list for %r' % query) def test_ip_literal(self): cases = [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), ('http://[::1]:5432/foo/', '::1', 5432), ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432), ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432), ('http://[::12.34.56.78]:5432/foo/', '::c22:384e', 5432), ('http://[::ffff:12.34.56.78]:5432/foo/', '::ffff:c22:384e', 5432), ('http://Test.python.org/foo/', 'test.python.org', None), ('http://12.34.56.78/foo/', '12.34.56.78', None), ('http://[::1]/foo/', '::1', None), ('http://[dead:beef::1]/foo/', 'dead:beef::1', None), ('http://[dead:beef::]/foo/', 'dead:beef::', None), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), ('http://[::12.34.56.78]/foo/', '::c22:384e', None), ('http://[::ffff:12.34.56.78]/foo/', '::ffff:c22:384e', None), ('http://Test.python.org:/foo/', 'test.python.org', None), ('http://12.34.56.78:/foo/', '12.34.56.78', None), ('http://[::1]:/foo/', '::1', None), ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None), ('http://[dead:beef::]:/foo/', 'dead:beef::', None), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), ('http://[::12.34.56.78]:/foo/', '::c22:384e', None), ('http://[::ffff:12.34.56.78]:/foo/', '::ffff:c22:384e', None), ] for uri, host, port in cases: parts = urisplit(uri) self.assertEqual(host, str(parts.gethost())) self.assertEqual(port, parts.getport()) parts = urisplit(uri.encode('ascii')) self.assertEqual(host, str(parts.gethost())) self.assertEqual(port, parts.getport()) def test_invalid_ip_literal(self): uris = [ 'http://::12.34.56.78]/', 'http://[::1/foo/', 'ftp://[::1/foo/bad]/bad', 'http://[::1/foo/bad]/bad', 'http://[foo]/', 'http://[v7.future]' ] for uri in uris: with self.assertRaises(ValueError, msg='%r' % uri): urisplit(uri).gethost() with self.assertRaises(ValueError, msg='%r' % uri.encode('ascii')): urisplit(uri.encode('ascii')).gethost() python-uritools-1.0.1/tests/test_unsplit.py000066400000000000000000000017551254755167100212060ustar00rootroot00000000000000from __future__ import unicode_literals import unittest from uritools import uriunsplit class UnsplitTest(unittest.TestCase): def check(self, split, uri): result = uriunsplit(split) self.assertEqual(result, uri) def test_rfc3986_3(self): """uriunsplit test cases from [RFC3986] 3. Syntax Components""" cases = [ (('foo', 'example.com:8042', '/over/there', 'name=ferret', 'nose'), 'foo://example.com:8042/over/there?name=ferret#nose'), (('urn', None, 'example:animal:ferret:nose', None, None), 'urn:example:animal:ferret:nose'), ((b'foo', b'example.com:8042', b'/over/there', b'name=ferret', b'nose'), b'foo://example.com:8042/over/there?name=ferret#nose'), ((b'urn', None, b'example:animal:ferret:nose', None, None), b'urn:example:animal:ferret:nose'), ] for uri, parts in cases: self.check(uri, parts) python-uritools-1.0.1/uritools/000077500000000000000000000000001254755167100166055ustar00rootroot00000000000000python-uritools-1.0.1/uritools/__init__.py000066400000000000000000000013711254755167100207200ustar00rootroot00000000000000"""RFC 3986 compliant, Unicode-aware, scheme-agnostic replacement for urlparse. This module defines RFC 3986 compliant replacements for the most commonly used functions of the Python 2.7 Standard Library :mod:`urlparse` module. """ from .chars import GEN_DELIMS, RESERVED, SUB_DELIMS, UNRESERVED from .compose import uricompose from .defrag import DefragResult, uridefrag from .encoding import uridecode, uriencode from .join import urijoin from .split import SplitResult, urisplit, uriunsplit __all__ = ( 'GEN_DELIMS', 'RESERVED', 'SUB_DELIMS', 'UNRESERVED', 'DefragResult', 'SplitResult', 'uricompose', 'uridecode', 'uridefrag', 'uriencode', 'urijoin', 'urisplit', 'uriunsplit' ) __version__ = '1.0.1' python-uritools-1.0.1/uritools/chars.py000066400000000000000000000010331254755167100202540ustar00rootroot00000000000000# RFC 3986 2.2. Reserved Characters # # reserved = gen-delims / sub-delims # # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" # # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" # / "*" / "+" / "," / ";" / "=" # GEN_DELIMS = ':/?#[]@' SUB_DELIMS = "!$&'()*+,;=" RESERVED = GEN_DELIMS + SUB_DELIMS # RFC 3986 2.3. Unreserved Characters # # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" # UNRESERVED = ( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' '0123456789' '-._~' ) python-uritools-1.0.1/uritools/compose.py000066400000000000000000000201361254755167100206260ustar00rootroot00000000000000from __future__ import unicode_literals import ipaddress import numbers import re from collections import Iterable, Mapping from .chars import SUB_DELIMS from .encoding import uriencode from .split import uriunsplit # RFC 3986 3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) _SCHEME_RE = re.compile(br"\A[A-Za-z][A-Za-z0-9+.-]*\Z") # RFC 3986 3.2: authority = [ userinfo "@" ] host [ ":" port ] _AUTHORITY_RE_BYTES = re.compile(br"\A(?:(.*)@)?(.*?)(?::([0-9]*))?\Z") _AUTHORITY_RE_STRING = re.compile(r"\A(?:(.*)@)?(.*?)(?::([0-9]*))?\Z") # safe component characters (bytes) _SUB_DELIMS_BYTES = SUB_DELIMS.encode('ascii') _SAFE_USERINFO = _SUB_DELIMS_BYTES + b':' _SAFE_HOST = _SUB_DELIMS_BYTES _SAFE_PATH = _SUB_DELIMS_BYTES + b':@/' _SAFE_QUERY = _SUB_DELIMS_BYTES + b':@/?' _SAFE_FRAGMENT = _SUB_DELIMS_BYTES + b':@/?' def _scheme(scheme): if _SCHEME_RE.match(scheme): return scheme.lower() else: raise ValueError('Invalid scheme component') def _authority(userinfo, host, port, encoding): authority = [] if userinfo is not None: authority.append(uriencode(userinfo, _SAFE_USERINFO, encoding)) authority.append(b'@') if isinstance(host, ipaddress.IPv6Address): authority.append(b'[' + host.compressed.encode() + b']') elif isinstance(host, ipaddress.IPv4Address): authority.append(host.compressed.encode()) elif isinstance(host, bytes): authority.append(_host(host)) elif host is not None: authority.append(_host(host.encode('utf-8'))) if isinstance(port, numbers.Number): authority.append(_port(str(port).encode())) elif isinstance(port, bytes): authority.append(_port(port)) elif port is not None: authority.append(_port(port.encode())) return b''.join(authority) if authority else None def _ip_literal(address): if address.startswith('v'): raise ValueError('Address mechanism not supported') else: return b'[' + ipaddress.IPv6Address(address).compressed.encode() + b']' def _host(host): # RFC 3986 3.2.3: Although host is case-insensitive, producers and # normalizers should use lowercase for registered names and # hexadecimal addresses for the sake of uniformity, while only # using uppercase letters for percent-encodings. if host.startswith(b'[') and host.endswith(b']'): return _ip_literal(host[1:-1].decode()) # check for IPv6 addresses as returned by SplitResult.gethost() try: return _ip_literal(host.decode('utf-8')) except ValueError: return uriencode(host, _SAFE_HOST, 'utf-8').lower() def _port(port): # RFC 3986 3.2.3: URI producers and normalizers should omit the # port component and its ":" delimiter if port is empty or if its # value would be the same as that of the scheme's default. if port.lstrip(b'0123456789'): raise ValueError('Invalid port subcomponent') elif port: return b':' + port else: return b'' def _querylist(items, encoding, safe=re.sub(b'[;&]', b'', _SAFE_QUERY)): terms = [] append = terms.append for key, value in items: name = uriencode(key, safe, encoding) if value is None: append(name) elif isinstance(value, (bytes, type(''))): append(name + b'=' + uriencode(value, safe, encoding)) else: append(name + b'=' + uriencode(str(value), safe, encoding)) return b'&'.join(terms) def _querydict(mapping, encoding, safe=re.sub(b'[;&]', b'', _SAFE_QUERY)): items = [] for key, value in mapping.items(): if isinstance(value, (bytes, type(''))): items.append((key, value)) elif isinstance(value, Iterable): items.extend([(key, v) for v in value]) else: items.append((key, value)) return _querylist(items, encoding, safe) def uricompose(scheme=None, authority=None, path='', query=None, fragment=None, userinfo=None, host=None, port=None, encoding='utf-8'): """Compose a URI string from its individual components.""" # RFC 3986 3.1: Scheme names consist of a sequence of characters # beginning with a letter and followed by any combination of # letters, digits, plus ("+"), period ("."), or hyphen ("-"). # Although schemes are case-insensitive, the canonical form is # lowercase and documents that specify schemes must do so with # lowercase letters. An implementation should accept uppercase # letters as equivalent to lowercase in scheme names (e.g., allow # "HTTP" as well as "http") for the sake of robustness but should # only produce lowercase scheme names for consistency. if isinstance(scheme, bytes): scheme = _scheme(scheme) elif scheme is not None: scheme = _scheme(scheme.encode()) # authority must be string type or three-item iterable if authority is None: authority = (None, None, None) elif isinstance(authority, bytes): authority = _AUTHORITY_RE_BYTES.match(authority).groups() elif isinstance(authority, type('')): authority = _AUTHORITY_RE_STRING.match(authority).groups() elif not isinstance(authority, Iterable): raise TypeError('Invalid authority type') elif len(authority) != 3: raise ValueError('Invalid authority length') authority = _authority( userinfo if userinfo is not None else authority[0], host if host is not None else authority[1], port if port is not None else authority[2], encoding ) # RFC 3986 3.3: If a URI contains an authority component, then the # path component must either be empty or begin with a slash ("/") # character. If a URI does not contain an authority component, # then the path cannot begin with two slash characters ("//"). path = uriencode(path, _SAFE_PATH, encoding) if authority is not None and path and not path.startswith(b'/'): raise ValueError('Invalid path with authority component') if authority is None and path.startswith(b'//'): raise ValueError('Invalid path without authority component') # RFC 3986 4.2: A path segment that contains a colon character # (e.g., "this:that") cannot be used as the first segment of a # relative-path reference, as it would be mistaken for a scheme # name. Such a segment must be preceded by a dot-segment (e.g., # "./this:that") to make a relative-path reference. if scheme is None and authority is None and not path.startswith(b'/'): if b':' in path.partition(b'/')[0]: path = b'./' + path # RFC 3986 3.4: The characters slash ("/") and question mark ("?") # may represent data within the query component. Beware that some # older, erroneous implementations may not handle such data # correctly when it is used as the base URI for relative # references (Section 5.1), apparently because they fail to # distinguish query data from path data when looking for # hierarchical separators. However, as query components are often # used to carry identifying information in the form of "key=value" # pairs and one frequently used value is a reference to another # URI, it is sometimes better for usability to avoid percent- # encoding those characters. if isinstance(query, (bytes, type(''))): query = uriencode(query, _SAFE_QUERY, encoding) elif isinstance(query, Mapping): query = _querydict(query, encoding) elif isinstance(query, Iterable): query = _querylist(query, encoding) elif query is not None: raise TypeError('Invalid query type') # RFC 3986 3.5: The characters slash ("/") and question mark ("?") # are allowed to represent data within the fragment identifier. # Beware that some older, erroneous implementations may not handle # this data correctly when it is used as the base URI for relative # references. if fragment is not None: fragment = uriencode(fragment, _SAFE_FRAGMENT, encoding) result = uriunsplit((scheme, authority, path, query, fragment)) # always return platform `str` type return result if isinstance(result, str) else result.decode() python-uritools-1.0.1/uritools/defrag.py000066400000000000000000000024211254755167100204060ustar00rootroot00000000000000from __future__ import unicode_literals import collections from .encoding import uridecode class DefragResult(collections.namedtuple('DefragResult', 'uri fragment')): """Class to hold :func:`uridefrag` results.""" __slots__ = () # prevent creation of instance dictionary def geturi(self): """Return the recombined version of the original URI as a string.""" fragment = self.fragment if fragment is None: return self.uri elif isinstance(fragment, bytes): return self.uri + b'#' + fragment else: return self.uri + '#' + fragment def getfragment(self, default=None, encoding='utf-8', errors='strict'): """Return the decoded fragment identifier, or `default` if the original URI did not contain a fragment component. """ fragment = self.fragment if fragment is not None: return uridecode(fragment, encoding, errors) else: return default def uridefrag(uristring): """Remove an existing fragment component from a URI string.""" if isinstance(uristring, bytes): parts = uristring.partition(b'#') else: parts = uristring.partition('#') return DefragResult(parts[0], parts[2] if parts[1] else None) python-uritools-1.0.1/uritools/encoding.py000066400000000000000000000036171254755167100207540ustar00rootroot00000000000000from string import hexdigits from .chars import UNRESERVED try: _fromhex = bytes.fromhex except AttributeError: _fromhex = lambda x: chr(int(x, 16)) if isinstance(chr(0), bytes): _fromint = chr else: _fromint = lambda i: bytes([i]) # RFC 3986 2.1: For consistency, URI producers and normalizers should # use uppercase hexadecimal digits for all percent-encodings. def _pctenc(byte): return ('%%%02X' % byte).encode() _unreserved = frozenset(memoryview(UNRESERVED.encode('ascii')).tolist()) _encoded = { b'': [_fromint(i) if i in _unreserved else _pctenc(i) for i in range(256)] } _decoded = { (a + b).encode(): _fromhex(a + b) for a in hexdigits for b in hexdigits } def uriencode(uristring, safe='', encoding='utf-8', errors='strict'): """Encode a URI string or string component.""" if isinstance(uristring, bytes): values = memoryview(uristring).tolist() else: values = memoryview(uristring.encode(encoding, errors)).tolist() if not isinstance(safe, bytes): safe = safe.encode('ascii') try: encode = _encoded[safe].__getitem__ except KeyError: enclist = _encoded[b''][:] for i in memoryview(safe).tolist(): enclist[i] = _fromint(i) _encoded[safe] = enclist encode = enclist.__getitem__ return b''.join(map(encode, values)) def uridecode(uristring, encoding='utf-8', errors='strict'): """Decode a URI string or string component.""" if isinstance(uristring, bytes): parts = uristring.split(b'%') else: parts = uristring.encode(encoding or 'ascii', errors).split(b'%') result = [parts[0]] append = result.append decode = _decoded.get for s in parts[1:]: append(decode(s[:2], b'%' + s[:2])) append(s[2:]) if encoding is not None: return b''.join(result).decode(encoding, errors) else: return b''.join(result) python-uritools-1.0.1/uritools/join.py000066400000000000000000000007461254755167100201250ustar00rootroot00000000000000from __future__ import unicode_literals from .split import urisplit def urijoin(base, ref, strict=False): """Convert a URI reference relative to a base URI to its target URI string. """ if isinstance(base, type(ref)): return urisplit(base).transform(ref, strict).geturi() elif isinstance(base, bytes): return urisplit(base.decode()).transform(ref, strict).geturi() else: return urisplit(base).transform(ref.decode(), strict).geturi() python-uritools-1.0.1/uritools/split.py000066400000000000000000000260771254755167100203260ustar00rootroot00000000000000from __future__ import unicode_literals import collections import ipaddress import re from .encoding import uridecode _URI_COMPONENTS = ('scheme', 'authority', 'path', 'query', 'fragment') def _ip_literal(address): # RFC 3986 3.2.2: In anticipation of future, as-yet-undefined IP # literal address formats, an implementation may use an optional # version flag to indicate such a format explicitly rather than # rely on heuristic determination. # # IP-literal = "[" ( IPv6address / IPvFuture ) "]" # # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) # # If a URI containing an IP-literal that starts with "v" # (case-insensitive), indicating that the version flag is present, # is dereferenced by an application that does not know the meaning # of that version flag, then the application should return an # appropriate error for "address mechanism not supported". if isinstance(address, bytes): address = address.decode('ascii') if address.startswith('v'): raise ValueError('address mechanism not supported') return ipaddress.IPv6Address(address) def _ipv4_address(address): try: if isinstance(address, bytes): return ipaddress.IPv4Address(address.decode('ascii')) else: return ipaddress.IPv4Address(address) except ValueError: return None class SplitResult(collections.namedtuple('SplitResult', _URI_COMPONENTS)): """Base class to hold :func:`urisplit` results.""" __slots__ = () # prevent creation of instance dictionary @property def userinfo(self): authority = self.authority if authority is None: return None userinfo, present, _ = authority.rpartition(self.AT) if present: return userinfo else: return None @property def host(self): authority = self.authority if authority is None: return None _, _, hostinfo = authority.rpartition(self.AT) host, _, port = hostinfo.rpartition(self.COLON) if port.lstrip(self.DIGITS): return hostinfo else: return host @property def port(self): authority = self.authority if authority is None: return None _, present, port = authority.rpartition(self.COLON) if present and not port.lstrip(self.DIGITS): return port else: return None def geturi(self): """Return the re-combined version of the original URI as a string.""" scheme, authority, path, query, fragment = self # RFC 3986 5.3. Component Recomposition result = [] if scheme is not None: result.extend([scheme, self.COLON]) if authority is not None: result.extend([self.SLASH, self.SLASH, authority]) result.append(path) if query is not None: result.extend([self.QUEST, query]) if fragment is not None: result.extend([self.HASH, fragment]) return self.EMPTY.join(result) def getscheme(self, default=None): """Return the URI scheme in canonical (lowercase) form, or `default` if the original URI did not contain a scheme component. """ scheme = self.scheme if scheme is None: return default elif isinstance(scheme, bytes): return scheme.decode('ascii').lower() else: return scheme.lower() def getuserinfo(self, default=None, encoding='utf-8', errors='strict'): """Return the decoded userinfo subcomponent of the URI authority, or `default` if the original URI did not contain a userinfo field. """ userinfo = self.userinfo if userinfo is None: return default else: return uridecode(userinfo, encoding, errors) def gethost(self, default=None): """Return the decoded host subcomponent of the URI authority as a string or an :mod:`ipaddress` address object, or `default` if the original URI did not contain a host. """ host = self.host if host is None or (not host and default is not None): return default elif host.startswith(self.LBRACKET) and host.endswith(self.RBRACKET): return _ip_literal(host[1:-1]) elif host.startswith(self.LBRACKET) or host.endswith(self.RBRACKET): raise ValueError('Invalid host %r' % host) # FIXME: remove? else: return _ipv4_address(host) or uridecode(host, 'utf-8').lower() def getport(self, default=None): """Return the port subcomponent of the URI authority as an :class:`int`, or `default` if the original URI did not contain a port or if the port was empty. """ port = self.port if port: return int(port) else: return default def getpath(self, encoding='utf-8', errors='strict'): """Return the normalized decoded URI path.""" path = self.__remove_dot_segments(self.path) return uridecode(path, encoding, errors) def getquery(self, default=None, encoding='utf-8', errors='strict'): """Return the decoded query string, or `default` if the original URI did not contain a query component. """ query = self.query if query is None: return default else: return uridecode(query, encoding, errors) def getquerydict(self, encoding='utf-8', errors='strict'): """Split the query component into individual `name=value` pairs and return a dictionary of query variables. The dictionary keys are the unique query variable names and the values are lists of values for each name. """ dict = collections.defaultdict(list) for name, value in self.getquerylist(encoding, errors): dict[name].append(value) return dict def getquerylist(self, encoding='utf-8', errors='strict'): """Split the query component into individual `name=value` pairs and return a list of `(name, value)` tuples. """ if self.query: qsl = [self.query] else: return [] for sep in self.QUERYSEP: qsl = [s for qs in qsl for s in qs.split(sep) if s] items = [] for qs in qsl: parts = qs.partition(self.EQ) name = uridecode(parts[0], encoding, errors) if parts[1]: value = uridecode(parts[2], encoding, errors) else: value = None items.append((name, value)) return items def getfragment(self, default=None, encoding='utf-8', errors='strict'): """Return the decoded fragment identifier, or `default` if the original URI did not contain a fragment component. """ fragment = self.fragment if fragment is None: return default else: return uridecode(fragment, encoding, errors) def transform(self, ref, strict=False): """Transform a URI reference relative to `self` into a :class:`SplitResult` representing its target URI. """ scheme, authority, path, query, fragment = self.RE.match(ref).groups() # RFC 3986 5.2.2. Transform References if scheme is not None and (strict or scheme != self.scheme): path = self.__remove_dot_segments(path) elif authority is not None: scheme = self.scheme path = self.__remove_dot_segments(path) elif not path: scheme = self.scheme authority = self.authority path = self.path query = self.query if query is None else query elif path.startswith(self.SLASH): scheme = self.scheme authority = self.authority path = self.__remove_dot_segments(path) else: scheme = self.scheme authority = self.authority path = self.__remove_dot_segments(self.__merge(path)) return type(self)(scheme, authority, path, query, fragment) def __merge(self, path): # RFC 3986 5.2.3. Merge Paths if self.authority is not None and not self.path: return self.SLASH + path else: parts = self.path.rpartition(self.SLASH) return parts[1].join((parts[0], path)) @classmethod def __remove_dot_segments(cls, path): # RFC 3986 5.2.4. Remove Dot Segments pseg = [] for s in path.split(cls.SLASH): if s == cls.DOT: continue elif s != cls.DOTDOT: pseg.append(s) elif len(pseg) == 1 and not pseg[0]: continue elif pseg and pseg[-1] != cls.DOTDOT: pseg.pop() else: pseg.append(s) # adjust for trailing '/.' or '/..' if path.rpartition(cls.SLASH)[2] in (cls.DOT, cls.DOTDOT): pseg.append(cls.EMPTY) if path and len(pseg) == 1 and pseg[0] == cls.EMPTY: pseg.insert(0, cls.DOT) return cls.SLASH.join(pseg) class SplitResultBytes(SplitResult): __slots__ = () # prevent creation of instance dictionary # RFC 3986 Appendix B RE = re.compile(br""" (?:([^:/?#]+):)? # scheme (?://([^/?#]*))? # authority ([^?#]*) # path (?:\?([^#]*))? # query (?:\#(.*))? # fragment """, flags=re.VERBOSE) # RFC 3986 2.2 gen-delims COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = ( b':', b'/', b'?', b'#', b'[', b']', b'@' ) # RFC 3986 3.3 dot-segments DOT, DOTDOT = b'.', b'..' EMPTY, EQ = b'', b'=' DIGITS = b'0123456789' QUERYSEP = (b';', b'&') class SplitResultString(SplitResult): __slots__ = () # prevent creation of instance dictionary # RFC 3986 Appendix B RE = re.compile(r""" (?:([^:/?#]+):)? # scheme (?://([^/?#]*))? # authority ([^?#]*) # path (?:\?([^#]*))? # query (?:\#(.*))? # fragment """, flags=re.VERBOSE) # RFC 3986 2.2 gen-delims COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = ':/?#[]@' # RFC 3986 3.3 dot-segments DOT, DOTDOT = '.', '..' EMPTY, EQ = '', '=' DIGITS = '0123456789' QUERYSEP = ';&' def urisplit(uristring): """Split a well-formed URI string into a tuple with five components corresponding to a URI's general structure:: :///?# """ if isinstance(uristring, bytes): result = SplitResultBytes else: result = SplitResultString return result(*result.RE.match(uristring).groups()) def uriunsplit(parts): """Combine the elements of a five-item iterable into a URI string.""" scheme, authority, path, query, fragment = parts if isinstance(path, bytes): result = SplitResultBytes else: result = SplitResultString return result(scheme, authority, path, query, fragment).geturi()