arcp-0.2.1/0000755000175000017500000000000013620764740012726 5ustar stainstain00000000000000arcp-0.2.1/.gitignore0000644000175000017500000000222313333312162014701 0ustar stainstain00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # dotenv .env # virtualenv .venv venv/ ENV/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .pytest_cache arcp-0.2.1/.travis.yml0000644000175000017500000000041113620763011015021 0ustar stainstain00000000000000language: python python: - "2.7" - "3.5" - "3.6" - "3.7" - "3.8" - "pypy3" install: - pip install -r requirements.txt - pip install codecov python-coveralls pytest pytest-cov script: - pytest --cov=./ after_success: - codecov - coveralls arcp-0.2.1/CONTRIBUTORS.txt0000644000175000017500000000016413620764671015430 0ustar stainstain00000000000000Stian Soiland-Reyes John Vandenberg arcp-0.2.1/LICENSE.txt0000644000175000017500000002613613620764671014564 0ustar stainstain00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. arcp-0.2.1/MANIFEST.in0000644000175000017500000000011013620764671014457 0ustar stainstain00000000000000include *.txt recursive-include docs *.rst recursive-include tests *.py arcp-0.2.1/PKG-INFO0000644000175000017500000002147113620764740014030 0ustar stainstain00000000000000Metadata-Version: 1.1 Name: arcp Version: 0.2.1 Summary: arcp (Archive and Package) URI parser and generator Home-page: https://arcp.readthedocs.io/ Author: Stian Soiland-Reyes Author-email: stain@apache.org License: Apache License, Version 2.0 Download-URL: https://github.com/stain/arcp-py/archive/0.1.0.tar.gz Description: arcp-py ======= Create/parse arcp_ (Archive and Package) URIs. .. image:: https://readthedocs.org/projects/arcp/badge/?version=latest :target: https://arcp.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.1162749.svg :target: https://doi.org/10.5281/zenodo.1162749 .. image:: https://travis-ci.org/stain/arcp-py.svg?branch=master :target: https://travis-ci.org/stain/arcp-py .. image:: https://img.shields.io/pypi/v/arcp.svg?maxAge=86400 :target: https://pypi.org/project/arcp/ .. image:: https://coveralls.io/repos/github/stain/arcp-py/badge.svg?branch=master :target: https://coveralls.io/github/stain/arcp-py?branch=master .. image:: https://codecov.io/gh/stain/arcp-py/branch/master/graph/badge.svg :target: https://codecov.io/gh/stain/arcp-py Introduction ------------ ``arcp`` provides functions for creating arcp_ URIs, which can be used for identifying or parsing hypermedia files packaged in an archive or package, like a ZIP file. arcp URIs can be used to consume or reference hypermedia resources bundled inside a file archive or an application package, as well as to resolve URIs for archive resources within a programmatic framework. This URI scheme provides mechanisms to generate a unique base URI to represent the root of the archive, so that relative URI references in a bundled resource can be resolved within the archive without having to extract the archive content on the local file system. An arcp URI can be used for purposes of isolation (e.g. when consuming multiple archives), security constraints (avoiding “climb out” from the archive), or for externally identiyfing sub-resources referenced by hypermedia formats. Examples: - ``arcp://uuid,32a423d6-52ab-47e3-a9cd-54f418a48571/doc.html`` - ``arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/pics/`` - ``arcp://ni,sha-256;F-34D4TUeOfG0selz7REKRDo4XePkewPeQYtjL3vQs0/`` - ``arcp://name,gallery.example.org/`` The different forms of URI authority_ in arcp URIs can be used depending on which uniqueness constraints to apply when addressing an archive. See the arcp_ specification (*draft-soilandreyes-arcp*) for details. Note that this library only provides mechanisms to *generate* and *parse* arcp URIs, and do *not* integrate with any particular archive or URL handling modules like ``zipfile`` or ``urllib.request``. License ------- © 2018-2020 Stian Soiland-Reyes , The University of Manchester, UK Licensed under the Apache License, version 2.0 , see the file LICENSE.txt for details. Contribute ---------- Source code: Feel free to raise a pull request at or an issue at . Submitted contributions are assumed to be covered by section 5 of the Apache License 2.0. Installing ---------- You will need Python 2.7, Python 3.4 or later (Recommended: 3.6). If you have pip_, then the easiest is normally to install from using:: pip install arcp If you want to install manually from this code base, then try:: python setup.py install Usage ------ For full documentation, see or use ``help(arcp)`` This module provides functions for creating arcp_ URIs, which can be used for identifying or parsing hypermedia files packaged in an archive or package, like a ZIP file:: python >>> from arcp import * >>> arcp_random() 'arcp://uuid,dcd6b1e8-b3a2-43c9-930b-0119cf0dc538/' >>> arcp_random("/foaf.ttl", fragment="me") 'arcp://uuid,dcd6b1e8-b3a2-43c9-930b-0119cf0dc538/foaf.ttl#me' >>> arcp_hash(b"Hello World!", "/folder/") 'arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/' >>> arcp_location("http://example.com/data.zip", "/file.txt") 'arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt' arcp URLs can be used with ``urllib.parse``, for instance using ``urljoin`` to resolve relative references:: >>> css = arcp.arcp_name("app.example.com", "css/style.css") >>> urllib.parse.urljoin(css, "../fonts/foo.woff") 'arcp://name,app.example.com/fonts/foo.woff' In addition this module provides functions that can be used to parse arcp URIs into its constituent fields:: python >>> is_arcp_uri("arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt") True >>> is_arcp_uri("http://example.com/t") False >>> u = parse_arcp("arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt") ARCPSplitResult(scheme='arcp',prefix='uuid',name='b7749d0b-0e47-5fc4-999d-f154abe68065', uuid='b7749d0b-0e47-5fc4-999d-f154abe68065',path='/file.txt',query='',fragment='') >>> u.path '/file.txt' >>> u.prefix 'uuid' >>> u.uuid UUID('b7749d0b-0e47-5fc4-999d-f154abe68065') >>> u.uuid.version 5 >>> parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/").hash ('sha-256', '7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069') The object returned from ``parse_arcp`` is similar to ``ParseResult`` from ``urlparse``, but contains additional properties ``prefix``, ``uuid``, ``ni``, ``hash`` and ``name``, some of which will be ``None`` depending on the arcp prefix. The function ``arcp.parse.urlparse`` can be imported as an alternative to ``urllib.parse.urlparse``. If the scheme is ``arcp`` then the extra arcp fields like `prefix`, `uuid`, `hash` and `name` are available as from `parse_arcp`, otherwise the output is the same as from regular `urlparse`:: python >>> from arcp.parse import urlparse >>> urlparse("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/soup;sads") ARCPParseResult(scheme='arcp',prefix='ni', name='sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk', ni='sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk', hash=('sha-256', '7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069', path='/folder/soup;sads',query='',fragment='') >>> urlparse("http://example.com/help?q=a") ParseResult(scheme='http', netloc='example.com', path='/help', params='', query='q=a', fragment='') .. _arcp: https://tools.ietf.org/html/draft-soilandreyes-arcp-03 .. _pip: https://docs.python.org/3/installing/ .. _authority: https://tools.ietf.org/id/draft-soilandreyes-arcp-03.html#rfc.section.4.1 Keywords: arcp uri url iri archive package Platform: UNKNOWN Classifier: Development Status :: 3 - Alpha Classifier: Intended Audience :: Developers Classifier: Topic :: Software Development :: Build Tools Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Topic :: Internet Classifier: Topic :: Internet :: WWW/HTTP Classifier: Topic :: System :: Archiving Classifier: Topic :: System :: Archiving :: Packaging arcp-0.2.1/README.rst0000644000175000017500000001464213620764671014427 0ustar stainstain00000000000000arcp-py ======= Create/parse arcp_ (Archive and Package) URIs. .. image:: https://readthedocs.org/projects/arcp/badge/?version=latest :target: https://arcp.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.1162749.svg :target: https://doi.org/10.5281/zenodo.1162749 .. image:: https://travis-ci.org/stain/arcp-py.svg?branch=master :target: https://travis-ci.org/stain/arcp-py .. image:: https://img.shields.io/pypi/v/arcp.svg?maxAge=86400 :target: https://pypi.org/project/arcp/ .. image:: https://coveralls.io/repos/github/stain/arcp-py/badge.svg?branch=master :target: https://coveralls.io/github/stain/arcp-py?branch=master .. image:: https://codecov.io/gh/stain/arcp-py/branch/master/graph/badge.svg :target: https://codecov.io/gh/stain/arcp-py Introduction ------------ ``arcp`` provides functions for creating arcp_ URIs, which can be used for identifying or parsing hypermedia files packaged in an archive or package, like a ZIP file. arcp URIs can be used to consume or reference hypermedia resources bundled inside a file archive or an application package, as well as to resolve URIs for archive resources within a programmatic framework. This URI scheme provides mechanisms to generate a unique base URI to represent the root of the archive, so that relative URI references in a bundled resource can be resolved within the archive without having to extract the archive content on the local file system. An arcp URI can be used for purposes of isolation (e.g. when consuming multiple archives), security constraints (avoiding “climb out” from the archive), or for externally identiyfing sub-resources referenced by hypermedia formats. Examples: - ``arcp://uuid,32a423d6-52ab-47e3-a9cd-54f418a48571/doc.html`` - ``arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/pics/`` - ``arcp://ni,sha-256;F-34D4TUeOfG0selz7REKRDo4XePkewPeQYtjL3vQs0/`` - ``arcp://name,gallery.example.org/`` The different forms of URI authority_ in arcp URIs can be used depending on which uniqueness constraints to apply when addressing an archive. See the arcp_ specification (*draft-soilandreyes-arcp*) for details. Note that this library only provides mechanisms to *generate* and *parse* arcp URIs, and do *not* integrate with any particular archive or URL handling modules like ``zipfile`` or ``urllib.request``. License ------- © 2018-2020 Stian Soiland-Reyes , The University of Manchester, UK Licensed under the Apache License, version 2.0 , see the file LICENSE.txt for details. Contribute ---------- Source code: Feel free to raise a pull request at or an issue at . Submitted contributions are assumed to be covered by section 5 of the Apache License 2.0. Installing ---------- You will need Python 2.7, Python 3.4 or later (Recommended: 3.6). If you have pip_, then the easiest is normally to install from using:: pip install arcp If you want to install manually from this code base, then try:: python setup.py install Usage ------ For full documentation, see or use ``help(arcp)`` This module provides functions for creating arcp_ URIs, which can be used for identifying or parsing hypermedia files packaged in an archive or package, like a ZIP file:: python >>> from arcp import * >>> arcp_random() 'arcp://uuid,dcd6b1e8-b3a2-43c9-930b-0119cf0dc538/' >>> arcp_random("/foaf.ttl", fragment="me") 'arcp://uuid,dcd6b1e8-b3a2-43c9-930b-0119cf0dc538/foaf.ttl#me' >>> arcp_hash(b"Hello World!", "/folder/") 'arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/' >>> arcp_location("http://example.com/data.zip", "/file.txt") 'arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt' arcp URLs can be used with ``urllib.parse``, for instance using ``urljoin`` to resolve relative references:: >>> css = arcp.arcp_name("app.example.com", "css/style.css") >>> urllib.parse.urljoin(css, "../fonts/foo.woff") 'arcp://name,app.example.com/fonts/foo.woff' In addition this module provides functions that can be used to parse arcp URIs into its constituent fields:: python >>> is_arcp_uri("arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt") True >>> is_arcp_uri("http://example.com/t") False >>> u = parse_arcp("arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt") ARCPSplitResult(scheme='arcp',prefix='uuid',name='b7749d0b-0e47-5fc4-999d-f154abe68065', uuid='b7749d0b-0e47-5fc4-999d-f154abe68065',path='/file.txt',query='',fragment='') >>> u.path '/file.txt' >>> u.prefix 'uuid' >>> u.uuid UUID('b7749d0b-0e47-5fc4-999d-f154abe68065') >>> u.uuid.version 5 >>> parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/").hash ('sha-256', '7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069') The object returned from ``parse_arcp`` is similar to ``ParseResult`` from ``urlparse``, but contains additional properties ``prefix``, ``uuid``, ``ni``, ``hash`` and ``name``, some of which will be ``None`` depending on the arcp prefix. The function ``arcp.parse.urlparse`` can be imported as an alternative to ``urllib.parse.urlparse``. If the scheme is ``arcp`` then the extra arcp fields like `prefix`, `uuid`, `hash` and `name` are available as from `parse_arcp`, otherwise the output is the same as from regular `urlparse`:: python >>> from arcp.parse import urlparse >>> urlparse("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/soup;sads") ARCPParseResult(scheme='arcp',prefix='ni', name='sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk', ni='sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk', hash=('sha-256', '7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069', path='/folder/soup;sads',query='',fragment='') >>> urlparse("http://example.com/help?q=a") ParseResult(scheme='http', netloc='example.com', path='/help', params='', query='q=a', fragment='') .. _arcp: https://tools.ietf.org/html/draft-soilandreyes-arcp-03 .. _pip: https://docs.python.org/3/installing/ .. _authority: https://tools.ietf.org/id/draft-soilandreyes-arcp-03.html#rfc.section.4.1 arcp-0.2.1/arcp/0000755000175000017500000000000013620764740013653 5ustar stainstain00000000000000arcp-0.2.1/arcp/__init__.py0000644000175000017500000001124313620764671015770 0ustar stainstain00000000000000#!/usr/bin/env python ## Copyright 2018-2020 Stian Soiland-Reyes, The University of Manchester, UK ## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. ## You may obtain a copy of the License at ## ## http://www.apache.org/licenses/LICENSE-2.0 ## ## Unless required by applicable law or agreed to in writing, software ## distributed under the License is distributed on an "AS IS" BASIS, ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ## See the License for the specific language governing permissions and ## limitations under the License. """ Create/parse arcp (Archive and Package) URIs. This module provides functions for creating arcp_ URIs, which can be used for identifying or parsing hypermedia files packaged in an archive like a ZIP file:: >>> from arcp import * >>> arcp_random() 'arcp://uuid,dcd6b1e8-b3a2-43c9-930b-0119cf0dc538/' >>> arcp_random("/foaf.ttl", fragment="me") 'arcp://uuid,dcd6b1e8-b3a2-43c9-930b-0119cf0dc538/foaf.ttl#me' >>> arcp_hash(b"Hello World!", "/folder/") 'arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/' >>> arcp_location("http://example.com/data.zip", "/file.txt") 'arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt' arcp URLs can be used with :mod:`urllib.parse`, for instance using :func:`urllib.parse.urljoin` to resolve relative references:: >>> css = arcp.arcp_name("app.example.com", "css/style.css") >>> urllib.parse.urljoin(css, "../fonts/foo.woff") 'arcp://name,app.example.com/fonts/foo.woff' In addition this module provides functions that can be used to parse arcp URIs into its constituent fields:: >>> is_arcp_uri("arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt") True >>> is_arcp_uri("http://example.com/t") False >>> u = parse_arcp("arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt") ARCPSplitResult(scheme='arcp',prefix='uuid',name='b7749d0b-0e47-5fc4-999d-f154abe68065', uuid='b7749d0b-0e47-5fc4-999d-f154abe68065',path='/file.txt',query='',fragment='') >>> u.path '/file.txt' >>> u.prefix 'uuid' >>> u.uuid UUID('b7749d0b-0e47-5fc4-999d-f154abe68065') >>> u.uuid.version 5 >>> parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/").hash ('sha-256', '7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069') The object returned from :func:`parse_arcp()` is similar to :class:`urllib.parse.ParseResult`, but contains additional properties :attr:`prefix`, :attr:`uuid`, :attr:`ni`, :attr:`hash` and :attr:`name`, some of which will be ``None`` depending on the arcp prefix. The function :func:`arcp.parse.urlparse()` can be imported as an alternative to :func:`urllib.parse.urlparse`. If the scheme is ``arcp`` then the extra arcp fields like :attr:`prefix`, :attr:`uuid`, :attr:`hash` and :attr:`name` are available as from :func:`parse_arcp`, otherwise the output is the same as from :func:`urllib.parse.urlparse`:: >>> from arcp.parse import urlparse >>> urlparse("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/soup;sads") ARCPParseResult(scheme='arcp',prefix='ni', name='sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk', ni='sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk', hash=('sha-256', '7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069', path='/folder/soup;sads',query='',fragment='') >>> urlparse("http://example.com/help?q=a") ParseResult(scheme='http', netloc='example.com', path='/help', params='', query='q=a', fragment='') .. _arcp: https://tools.ietf.org/html/draft-soilandreyes-arcp-03 """ __author__ = "Stian Soiland-Reyes " __copyright__ = "Copyright 2018-2020 The University of Manchester" __license__ = "Apache License, version 2.0 " ARCP="arcp" NI="ni" NIH="ni" try: import urllib.parse as _urlp except: import urlparse as _urlp def _register_scheme(scheme, *uses): """Ensure app scheme works with :func:`urllib.parse.urljoin` and friends""" for u in uses: if not scheme in u: u.append(scheme) _register_scheme(ARCP, _urlp.uses_relative, _urlp.uses_netloc, _urlp.uses_params, _urlp.uses_fragment) # arcp://a/b;c?d#e _register_scheme(NI, _urlp.uses_netloc) # ni://a/b _register_scheme(NIH) # nih:a;b # Convenience export of public functions from .parse import is_arcp_uri, parse_arcp from .generate import arcp_uuid, arcp_random, arcp_location, arcp_name, arcp_hash arcp-0.2.1/arcp/generate.py0000644000175000017500000001427013620764671016026 0ustar stainstain00000000000000#!/usr/bin/env python ## Copyright 2018-2020 Stian Soiland-Reyes, The University of Manchester, UK ## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. ## You may obtain a copy of the License at ## ## http://www.apache.org/licenses/LICENSE-2.0 ## ## Unless required by applicable law or agreed to in writing, software ## distributed under the License is distributed on an "AS IS" BASIS, ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ## See the License for the specific language governing permissions and ## limitations under the License. """ Generate arcp URIs with various prefixes. As detailed in draft-soilandreyes-arcp_, the choice of arcp _prefix_ depends on the uniqueness constraints required to identify the archive. :func:`arcp_random()` can be used for a fresh arcp URI based on a pseudo-random generator. Use :func:`urllib.parse.urljoin()` to resolve paths within the same archive. :func:`arcp_uuid()` can be used with a pre-made UUID instance, for instance loaded from an archive's manifest or generated with :func:`uuid.uuid4()` :func:`arcp_location()` can be used to identify an archive based on its location URL, facilitating a UUID v5 authority. :func:`arcp_name()` can be used to identify an archive based on its absolute DNS name or package name within an installation. .. _draft-soilandreyes-arcp: https://tools.ietf.org/id/draft-soilandreyes-arcp-03.html """ __author__ = "Stian Soiland-Reyes " __copyright__ = "Copyright 2018 The University of Manchester" __license__ = "Apache License, version 2.0 (https://www.apache.org/licenses/LICENSE-2.0)" from uuid import uuid4, uuid5, UUID, NAMESPACE_URL try: from urllib.parse import urlunsplit except: from urlparse import urlunsplit import re from hashlib import sha256 from base64 import urlsafe_b64encode, urlsafe_b64decode SCHEME="arcp" def _reg_name_regex(): """Compile regular expression for RFC3986_ reg-name production _RFC3986: https://www.ietf.org/rfc/rfc3986 """ # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" unreserved = r"[A-Za-z0-9-._~]" # pct-encoded = "%" HEXDIG HEXDIG pct_encoded = r"%[0-9A-Fa-f][0-9A-Fa-f]" # "!" / "$" / "&" / "'" / "(" / ")" # / "*" / "+" / "," / ";" / "=" sub_delims = r"[!$&'()*+,;=]" # reg-name = *( unreserved / pct-encoded / sub-delims ) reg_name = r"^(" + unreserved + r"|" + pct_encoded + sub_delims + r")*$" return re.compile(reg_name) _REG_NAME = _reg_name_regex() def arcp_uuid(uuid, path="/", query=None, fragment=None): """Generate an arcp URI for the given uuid. Parameters: - uuid -- a uuid string or UUID instance identifying the archive, e.g. ``58ca7fa6-be2f-48e4-8b69-e63fb0d929fe`` - path -- Optional path within archive. - query -- Optional query component. - fragment -- Optional fragment component. """ if not isinstance(uuid, UUID): # ensure valid UUID uuid = UUID(uuid) # TODO: Ensure valid path? path = path or "" authority = "uuid,%s" % uuid s = (SCHEME, authority, path, query, fragment) return urlunsplit(s) def arcp_random(path="/", query=None, fragment=None, uuid=None): """Generate an arcp URI using a random uuid. Parameters: - path -- Optional path within archive. - query -- Optional query component. - fragment -- Optional fragment component. - uuid -- optional UUID v4 string or UUID instance """ if uuid is None: uuid = uuid4() elif not isinstance(uuid, UUID): # ensure valid UUID uuid = UUID(uuid) if not uuid.version == 4: raise Exception("UUID is not v4" % uuid) return arcp_uuid(uuid, path=path, query=query, fragment=fragment) def arcp_location(location, path="/", query=None, fragment=None, namespace=NAMESPACE_URL): """Generate an arcp URI for a given archive location. Parameters: - location: URL or location of archive, e.g. ``http://example.com/data.zip`` - path -- Optional path within archive. - query -- Optional query component. - fragment -- Optional fragment component. - namespace -- optional namespace UUID for non-URL location. """ # TODO: Ensure location is valid url if NAMESPACE_URL? uuid = uuid5(namespace, location) return arcp_uuid(uuid, path=path, query=query, fragment=fragment) def arcp_name(name, path="/", query=None, fragment=None): """Generate an arcp URI for a given archive name. Parameters: - name -- Absolute DNS or package name, e.g. ``app.example.com`` - path -- Optional path within archive. - query -- Optional query component. - fragment -- Optional fragment component. - namespace -- optional namespace UUID for non-URL location. """ if not _REG_NAME.match(name): raise Exception("Invalid name: %s" % name) authority = "name," + name s = (SCHEME, authority, path, query, fragment) return urlunsplit(s) def arcp_hash(bytes=b"", path="/", query=None, fragment=None, hash=None): """Generate an arcp URI for a given archive hash checksum. Parameters: - bytes -- Optional bytes of archive to checksum - path -- Optional path within archive. - query -- Optional query component. - fragment -- Optional fragment component. - hash -- Optional hash instance from :func:`hashlib.sha256()` Either ``bytes`` or ``hash`` must be provided. The ``hash`` parameter can be provided to avoid representing the whole archive bytes in memory. """ if hash is None: hash = sha256() elif hash.name != "sha256": # TODO: Map Python's hash-names to RFC6920 raise Exception("hash method %s unsupported, try sha256" % hash.name) hashmethod = "sha-256" # Tip: if bytes == b"" then provided hash param is unchanged hash.update(bytes) # RFC6920-style hash encoding digestB64 = urlsafe_b64encode(hash.digest()) digestB64 = digestB64.decode("ascii").strip("=") authority = "ni,%s;%s" % (hashmethod, digestB64) s = (SCHEME, authority, path, query, fragment) return urlunsplit(s) arcp-0.2.1/arcp/parse.py0000644000175000017500000002312613620764671015346 0ustar stainstain00000000000000#!/usr/bin/env python ## Copyright 2018-2020 Stian Soiland-Reyes, The University of Manchester, UK ## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. ## You may obtain a copy of the License at ## ## http://www.apache.org/licenses/LICENSE-2.0 ## ## Unless required by applicable law or agreed to in writing, software ## distributed under the License is distributed on an "AS IS" BASIS, ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ## See the License for the specific language governing permissions and ## limitations under the License. """ Parse arcp URIs. Use is_arcp_uri() to detect of an URI string is using the arcp: URI scheme, in which case parse_arcp() can be used to split it into its components. The urlparse() function can be used as a replacement for urllib.parse.urlparse() - supporting any URIs. If the URI is using the arcp: URI scheme, additional components are available as from parse_arcp(). """ __author__ = "Stian Soiland-Reyes " __copyright__ = "Copyright 2018-2020 The University of Manchester" __license__ = "Apache License, version 2.0 (https://www.apache.org/licenses/LICENSE-2.0)" from uuid import UUID, NAMESPACE_URL try: import urllib.parse as urlp except: import urlparse as urlp from base64 import urlsafe_b64decode from binascii import hexlify import re SCHEME="arcp" def is_arcp_uri(uri): """Return True if the uri string uses the arcp scheme, otherwise False. """ # tip: urllib will do lowercase for us return urlp.urlparse(uri).scheme == SCHEME def parse_arcp(uri): """Parse an arcp URI string into its constituent parts. The returned object is similar to ``urllib.parse.urlparse()`` in that it is a tuple of ``(scheme,netloc,path,params,query,fragment)`` with equally named properties, but it also adds properties for arcp fields: - prefix -- arcp authority prefix, e.g. "uuid", "ni" or "name", or None if prefix is missing - name -- arcp authority without prefix, e.g. "a4889890-a50a-4f14-b4e7-5fd83683a2b5" or "example.com" - uuid -- a ``uuid.UUID`` object if prefix is "uuid", otherwise None - ni -- the arcp alg-val value according to RFC6920 if prefix is "ni", otherwise None - hash -- the hash method and hash as a hexstring if prefix is "ni", otherwise None """ return ARCPParseResult(*urlp.urlparse(uri)) def urlparse(uri): """Parse any URI string into constituent parts. The returned object is similar to :func:`urllib.parse.urlparse()` in that it is a tuple of ``(scheme,netloc,path,params,query,fragment)`` with equally named properties, but if the URI scheme is "arcp" this also adds arcp properties as in :func:`parse_arcp()`. """ u = urlp.urlparse(uri) if (u.scheme == SCHEME): return ARCPParseResult(*u) else: return u class ARCPParseResult(urlp.ParseResult): """Result of parsing an arcp URI. This class does not detect if the arcp URI was valid according to the specification. This class extends :class:`urlllib.parse.ParseResult` adding arcp properties, some of which may be `None`. """ __slots__ = () def __init__(self, *args): if self.scheme != SCHEME: raise Exception("uri has scheme %s, expected %s" % (self.scheme, SCHEME)) def _host_split(self): """Return (prefix,name) if authority has "," - otherwise (None, authority). """ if self.netloc and "," in self.netloc: return self.netloc.split(",", 1) else: return (None, self.netloc) @property def prefix(self): """The arcp prefix, e.g. "uuid", "ni", "name" or None if no prefix was present. """ (prefix,name) = self._host_split() return prefix @property def name(self): """The URI's authority without arcp prefix. """ (prefix,name) = self._host_split() return name @property def uuid(self): """The arcp UUID if the prefix is "uuid", otherwise None.""" if self.prefix != "uuid": return None return UUID(self.name) @property def ni(self): """The arcp ni string if the prefix is "ni", otherwise None.""" if self.prefix != "ni": return None if not _ALG_VAL.match(self.name): raise Exception("Invalid alg-val for ni, prefix: %s" % self.netloc) return self.name def ni_uri(self, authority=""): """The ni URI (RFC6920_) if the prefix is "ni", otherwise None. If the ``authority`` parameter is provided, it will be used in the returned URI. .. _RFC6920: https://tools.ietf.org/search/rfc6920 """ ni = self.ni if ni is None: return None s = ("ni", authority, ni, None, None) return urlp.urlunsplit(s) def nih_uri(self): """The nih URI (RFC6920_) if the prefix is "ni", otherwise None. .. _RFC6920: https://tools.ietf.org/search/rfc6920 """ h = self.hash if h is None: return None (hash_method, hash_hex) = h segmented = _nih_segmented(hash_hex) checkdigit = _nih_checkdigit(hash_hex) path = "%s;%s;%s" % (hash_method, segmented, checkdigit) s = ("nih", None, path, None, None) return urlp.urlunsplit(s) def ni_well_known(self, base=""): """The ni .well-known URI (RFC5785_) if the prefix is "ni", otherwise None. The parameter ``base``, if provided, should be an absolute URI like ``"http://example.com/"`` - a relative URI is returned otherwise. .. _RFC5785: https://tools.ietf.org/html/rfc5785 """ (method, hash_b64) = self._ni_split() if method is None: return None # .well-known is always at / (RFC5785) path = "/.well-known/ni/%s/%s" % (method, hash_b64) return urlp.urljoin(base, path) def _ni_split(self): """Split self.ni: """ ni = self.ni if ni is None: return (None,None) # Already checked by self.ni regex #if not ";" in ni: # raise Exception("invalid ni hash: %s" % ni) (method, hash_b64) = ni.split(";", 1) return (method, hash_b64) @property def hash(self): """A tuple (hash_method,hash_hex) if the prefix is "ni", otherwise None. """ (method, hash_b64) = self._ni_split() if method is None: return None # re-instate padding as urlsafe_base64decode is strict missing_padding = 4 - (len(hash_b64) % 4) hash_b64 += "=" * missing_padding hash_bytes = urlsafe_b64decode(hash_b64) hash_hex = hexlify(hash_bytes).decode("ascii") return (method.lower(), hash_hex) def __repr__(self): props = ["scheme='arcp'"] props += ["prefix='%s'" % self.prefix or ""] props += ["name='%s'" % self.name or ""] if self.uuid is not None: props += ["uuid=%s" % self.uuid] if self.ni is not None: props += ["ni='%s'" % self.ni] # Avoid Exception in __repr__ if ";" in self.ni: props += ["hash=('%s', '%s'" % self.hash] # Traditional URI properties props += ["path='%s'" % self.path or ""] props += ["query='%s'" % self.query or ""] props += ["fragment='%s'" % self.fragment or ""] return "ARCPParseResult(%s)" % ",".join(props) def __str__(self): return self.geturl() def _alg_val_regex(): """Compile regular expression for RFC6920_ alg-val production .. _RFC6920: https://www.ietf.org/rfc/rfc6920 """ # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" unreserved = r"[A-Za-z0-9-._~]" # alg = 1*unreserved alg = r"(" + unreserved + r"+)" # val = 1*unreserved val = r"(" + unreserved + r"+)" # alg-val = alg ";" val alg_val = r"^" + alg + ";" + val + r"$" return re.compile(alg_val) _ALG_VAL = _alg_val_regex() def _nih_segmented(h, grouping=6): """Segment hex-hash with dashes in nih style RFC6920_ >>> _nih_segmented("0123456789abcdef") "012345-6789ab-cdef" .. _RFC6920: https://www.ietf.org/rfc/rfc6920 """ segmented = [] while h: segmented.append(h[:grouping]) h = h[grouping:] return "-".join(segmented) def _nih_checkdigit(h): """Luhn mod N algorithm in base 16 (hex) according to RFC6920_ .. _RFC6920: https://www.ietf.org/rfc/rfc6920 """ ## Adopted from https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm ## pseudocode factor = 2 total = 0 base = 16 digits = len(h) # 0 if digits has even length, 1 if odd # (as we start doubling with the very last digit) parity = digits % 2 for x in range(digits): digit = int(h[x], 16) if x % 2 != parity: # double every second digit digit *= 2 # slight less efficient, but more verbose: # if > 16: # total += digit - 16 + 1 # else: # total + digit total += sum(divmod(digit, 16)) else: # Not doubled, must be <16 total += digit # checkdigit that needs to be added to total # to get 0 after modulus remainder = (16-total) % 16 # Return as hex digit return "%x" % remainder arcp-0.2.1/arcp.egg-info/0000755000175000017500000000000013620764740015345 5ustar stainstain00000000000000arcp-0.2.1/arcp.egg-info/PKG-INFO0000644000175000017500000002147113620764740016447 0ustar stainstain00000000000000Metadata-Version: 1.1 Name: arcp Version: 0.2.1 Summary: arcp (Archive and Package) URI parser and generator Home-page: https://arcp.readthedocs.io/ Author: Stian Soiland-Reyes Author-email: stain@apache.org License: Apache License, Version 2.0 Download-URL: https://github.com/stain/arcp-py/archive/0.1.0.tar.gz Description: arcp-py ======= Create/parse arcp_ (Archive and Package) URIs. .. image:: https://readthedocs.org/projects/arcp/badge/?version=latest :target: https://arcp.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.1162749.svg :target: https://doi.org/10.5281/zenodo.1162749 .. image:: https://travis-ci.org/stain/arcp-py.svg?branch=master :target: https://travis-ci.org/stain/arcp-py .. image:: https://img.shields.io/pypi/v/arcp.svg?maxAge=86400 :target: https://pypi.org/project/arcp/ .. image:: https://coveralls.io/repos/github/stain/arcp-py/badge.svg?branch=master :target: https://coveralls.io/github/stain/arcp-py?branch=master .. image:: https://codecov.io/gh/stain/arcp-py/branch/master/graph/badge.svg :target: https://codecov.io/gh/stain/arcp-py Introduction ------------ ``arcp`` provides functions for creating arcp_ URIs, which can be used for identifying or parsing hypermedia files packaged in an archive or package, like a ZIP file. arcp URIs can be used to consume or reference hypermedia resources bundled inside a file archive or an application package, as well as to resolve URIs for archive resources within a programmatic framework. This URI scheme provides mechanisms to generate a unique base URI to represent the root of the archive, so that relative URI references in a bundled resource can be resolved within the archive without having to extract the archive content on the local file system. An arcp URI can be used for purposes of isolation (e.g. when consuming multiple archives), security constraints (avoiding “climb out” from the archive), or for externally identiyfing sub-resources referenced by hypermedia formats. Examples: - ``arcp://uuid,32a423d6-52ab-47e3-a9cd-54f418a48571/doc.html`` - ``arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/pics/`` - ``arcp://ni,sha-256;F-34D4TUeOfG0selz7REKRDo4XePkewPeQYtjL3vQs0/`` - ``arcp://name,gallery.example.org/`` The different forms of URI authority_ in arcp URIs can be used depending on which uniqueness constraints to apply when addressing an archive. See the arcp_ specification (*draft-soilandreyes-arcp*) for details. Note that this library only provides mechanisms to *generate* and *parse* arcp URIs, and do *not* integrate with any particular archive or URL handling modules like ``zipfile`` or ``urllib.request``. License ------- © 2018-2020 Stian Soiland-Reyes , The University of Manchester, UK Licensed under the Apache License, version 2.0 , see the file LICENSE.txt for details. Contribute ---------- Source code: Feel free to raise a pull request at or an issue at . Submitted contributions are assumed to be covered by section 5 of the Apache License 2.0. Installing ---------- You will need Python 2.7, Python 3.4 or later (Recommended: 3.6). If you have pip_, then the easiest is normally to install from using:: pip install arcp If you want to install manually from this code base, then try:: python setup.py install Usage ------ For full documentation, see or use ``help(arcp)`` This module provides functions for creating arcp_ URIs, which can be used for identifying or parsing hypermedia files packaged in an archive or package, like a ZIP file:: python >>> from arcp import * >>> arcp_random() 'arcp://uuid,dcd6b1e8-b3a2-43c9-930b-0119cf0dc538/' >>> arcp_random("/foaf.ttl", fragment="me") 'arcp://uuid,dcd6b1e8-b3a2-43c9-930b-0119cf0dc538/foaf.ttl#me' >>> arcp_hash(b"Hello World!", "/folder/") 'arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/' >>> arcp_location("http://example.com/data.zip", "/file.txt") 'arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt' arcp URLs can be used with ``urllib.parse``, for instance using ``urljoin`` to resolve relative references:: >>> css = arcp.arcp_name("app.example.com", "css/style.css") >>> urllib.parse.urljoin(css, "../fonts/foo.woff") 'arcp://name,app.example.com/fonts/foo.woff' In addition this module provides functions that can be used to parse arcp URIs into its constituent fields:: python >>> is_arcp_uri("arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt") True >>> is_arcp_uri("http://example.com/t") False >>> u = parse_arcp("arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/file.txt") ARCPSplitResult(scheme='arcp',prefix='uuid',name='b7749d0b-0e47-5fc4-999d-f154abe68065', uuid='b7749d0b-0e47-5fc4-999d-f154abe68065',path='/file.txt',query='',fragment='') >>> u.path '/file.txt' >>> u.prefix 'uuid' >>> u.uuid UUID('b7749d0b-0e47-5fc4-999d-f154abe68065') >>> u.uuid.version 5 >>> parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/").hash ('sha-256', '7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069') The object returned from ``parse_arcp`` is similar to ``ParseResult`` from ``urlparse``, but contains additional properties ``prefix``, ``uuid``, ``ni``, ``hash`` and ``name``, some of which will be ``None`` depending on the arcp prefix. The function ``arcp.parse.urlparse`` can be imported as an alternative to ``urllib.parse.urlparse``. If the scheme is ``arcp`` then the extra arcp fields like `prefix`, `uuid`, `hash` and `name` are available as from `parse_arcp`, otherwise the output is the same as from regular `urlparse`:: python >>> from arcp.parse import urlparse >>> urlparse("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/folder/soup;sads") ARCPParseResult(scheme='arcp',prefix='ni', name='sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk', ni='sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk', hash=('sha-256', '7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069', path='/folder/soup;sads',query='',fragment='') >>> urlparse("http://example.com/help?q=a") ParseResult(scheme='http', netloc='example.com', path='/help', params='', query='q=a', fragment='') .. _arcp: https://tools.ietf.org/html/draft-soilandreyes-arcp-03 .. _pip: https://docs.python.org/3/installing/ .. _authority: https://tools.ietf.org/id/draft-soilandreyes-arcp-03.html#rfc.section.4.1 Keywords: arcp uri url iri archive package Platform: UNKNOWN Classifier: Development Status :: 3 - Alpha Classifier: Intended Audience :: Developers Classifier: Topic :: Software Development :: Build Tools Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Topic :: Internet Classifier: Topic :: Internet :: WWW/HTTP Classifier: Topic :: System :: Archiving Classifier: Topic :: System :: Archiving :: Packaging arcp-0.2.1/arcp.egg-info/SOURCES.txt0000644000175000017500000000064413620764740017235 0ustar stainstain00000000000000.gitignore .travis.yml CONTRIBUTORS.txt LICENSE.txt MANIFEST.in README.rst requirements.txt setup.cfg setup.py arcp/__init__.py arcp/generate.py arcp/parse.py arcp.egg-info/PKG-INFO arcp.egg-info/SOURCES.txt arcp.egg-info/dependency_links.txt arcp.egg-info/top_level.txt docs/Makefile docs/arcp.rst docs/conf.py docs/generate.rst docs/index.rst docs/parse.rst tests/__init__.py tests/test_generate.py tests/test_parse.pyarcp-0.2.1/arcp.egg-info/dependency_links.txt0000644000175000017500000000000113620764740021413 0ustar stainstain00000000000000 arcp-0.2.1/arcp.egg-info/top_level.txt0000644000175000017500000000000513620764740020072 0ustar stainstain00000000000000arcp arcp-0.2.1/docs/0000755000175000017500000000000013620764740013656 5ustar stainstain00000000000000arcp-0.2.1/docs/Makefile0000644000175000017500000000113113620764671015315 0ustar stainstain00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = arcp SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)arcp-0.2.1/docs/arcp.rst0000644000175000017500000000020313620764671015333 0ustar stainstain00000000000000arcp ==== .. automodule:: arcp :members: is_arcp_uri, parse_arcp, arcp_uuid, arcp_random, arcp_location, arcp_name, arcp_hash arcp-0.2.1/docs/conf.py0000644000175000017500000001221613620764671015162 0ustar stainstain00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # arcp documentation build configuration file, created by # sphinx-quickstart on Tue Jan 30 13:12:39 2018. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath('..')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.coverage', 'sphinx.ext.viewcode'] # External References intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # General information about the project. project = 'arcp' copyright = '2018, Stian Soiland-Reyes' author = 'Stian Soiland-Reyes' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '0.1.0' # The full version, including alpha/beta/rc tags. release = '0.1.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'alabaster' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # This is required for the alabaster theme # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars html_sidebars = { '**': [ 'relations.html', # needs 'show_related': True theme option to display 'searchbox.html', ] } # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. htmlhelp_basename = 'arcpdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'arcp.tex', 'arcp Documentation', 'Stian Soiland-Reyes', 'manual'), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'arcp', 'arcp Documentation', [author], 1) ] # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'arcp', 'arcp Documentation', author, 'arcp', 'One line description of project.', 'Miscellaneous'), ] arcp-0.2.1/docs/generate.rst0000644000175000017500000000011013620764671016175 0ustar stainstain00000000000000arcp.generate ------------- .. automodule:: arcp.generate :members: arcp-0.2.1/docs/index.rst0000644000175000017500000000536713620764671015535 0ustar stainstain00000000000000arcp (Archive and Package) URI Python library ============================================= .. toctree:: :maxdepth: 2 :caption: Contents: arcp generate parse Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ``arcp`` provides functions for creating arcp_ URIs, which can be used for identifying or parsing hypermedia files packaged in an archive or package, like a ZIP file. arcp URIs can be used to consume or reference hypermedia resources bundled inside a file archive or an application package, as well as to resolve URIs for archive resources within a programmatic framework. This URI scheme provides mechanisms to generate a unique base URI to represent the root of the archive, so that relative URI references in a bundled resource can be resolved within the archive without having to extract the archive content on the local file system. An arcp URI can be used for purposes of isolation (e.g. when consuming multiple archives), security constraints (avoiding “climb out” from the archive), or for externally identiyfing sub-resources referenced by hypermedia formats. Examples: - ``arcp://uuid,32a423d6-52ab-47e3-a9cd-54f418a48571/doc.html`` - ``arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/pics/`` - ``arcp://ni,sha-256;F-34D4TUeOfG0selz7REKRDo4XePkewPeQYtjL3vQs0/`` - ``arcp://name,gallery.example.org/`` The different forms of URI authority_ in arcp URIs can be used depending on which uniqueness constraints to apply when addressing an archive. See the arcp_ specification (draft-soilandreyes-arcp) for details. Note that this library only provides mechanisms to *generate* and *parse* arcp URIs, and do *not* integrate with any particular archive or URL handling modules like ``zipfile`` or ``urllib.request``. License ------- © 2018-2020 Stian Soiland-Reyes , The University of Manchester, UK Licensed under the Apache License, version 2.0 . Source code and contributing ---------------------------- Source code: Feel free to raise a pull request at or an issue at . Installing ---------- You will need Python 2.7, Python 3.4 or later (Recommended: 3.6). If you have pip_, then the easiest is normally to install from using:: pip install arcp If you want to install manually from this code base, then try:: python setup.py install .. _arcp: https://tools.ietf.org/id/draft-soilandreyes-arcp-03.html .. _pip: https://docs.python.org/3/installing/ .. _authority: https://tools.ietf.org/id/draft-soilandreyes-arcp-03.html#rfc.section.4.1 arcp-0.2.1/docs/parse.rst0000644000175000017500000000007713620764671015531 0ustar stainstain00000000000000arcp.parse ---------- .. automodule:: arcp.parse :members: arcp-0.2.1/requirements.txt0000644000175000017500000000002413620764671016211 0ustar stainstain00000000000000wheel sphinx pytest arcp-0.2.1/setup.cfg0000644000175000017500000000015513620764740014550 0ustar stainstain00000000000000[metadata] description-file = README.rst [bdist_wheel] universal = 1 [egg_info] tag_build = tag_date = 0 arcp-0.2.1/setup.py0000755000175000017500000000512413620764677014456 0ustar stainstain00000000000000#!/usr/bin/env python ## Copyright 2018-2020 Stian Soiland-Reyes, The University of Manchester, UK ## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. ## You may obtain a copy of the License at ## ## http://www.apache.org/licenses/LICENSE-2.0 ## ## Unless required by applicable law or agreed to in writing, software ## distributed under the License is distributed on an "AS IS" BASIS, ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ## See the License for the specific language governing permissions and ## limitations under the License. __author__ = "Stian Soiland-Reyes " __copyright__ = "Copyright 2018-2020 The University of Manchester" __license__ = "Apache License, version 2.0 (https://www.apache.org/licenses/LICENSE-2.0)" from setuptools import setup, find_packages from codecs import open from os import path here = path.abspath(path.dirname(__file__)) with open(path.join(here, 'README.rst'), encoding='utf-8') as f: long_description = f.read() setup( name = 'arcp', packages = find_packages(exclude=['contrib', 'docs', 'tests']), # Required version = '0.2.1', description = 'arcp (Archive and Package) URI parser and generator', long_description=long_description, author = 'Stian Soiland-Reyes', author_email = 'stain@apache.org', # https://www.apache.org/licenses/LICENSE-2.0 license = "Apache License, Version 2.0", url = 'https://arcp.readthedocs.io/', download_url = 'https://github.com/stain/arcp-py/archive/0.1.0.tar.gz', keywords = "arcp uri url iri archive package", install_requires=[], classifiers=[ # https://pypi.python.org/pypi?%3Aaction=list_classifiers 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', 'Topic :: Software Development :: Build Tools', 'Topic :: Software Development :: Libraries :: Python Modules', # 'License :: OSI Approved :: Apache Software License', # https://github.com/pypa/pypi-legacy/issues/564 #'License :: OSI Approved', # 'License :: OSI Approved :: Apache License, Version 2.0 (Apache-2.0)', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Topic :: Internet', 'Topic :: Internet :: WWW/HTTP', 'Topic :: System :: Archiving', 'Topic :: System :: Archiving :: Packaging', ], ) arcp-0.2.1/tests/0000755000175000017500000000000013620764740014070 5ustar stainstain00000000000000arcp-0.2.1/tests/__init__.py0000644000175000017500000000000013620764671016172 0ustar stainstain00000000000000arcp-0.2.1/tests/test_generate.py0000644000175000017500000002743313620764671017307 0ustar stainstain00000000000000#!/usr/bin/env python ## Copyright 2018 Stian Soiland-Reyes, The University of Manchester, UK ## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. ## You may obtain a copy of the License at ## ## http://www.apache.org/licenses/LICENSE-2.0 ## ## Unless required by applicable law or agreed to in writing, software ## distributed under the License is distributed on an "AS IS" BASIS, ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ## See the License for the specific language governing permissions and ## limitations under the License. import unittest from uuid import UUID, RFC_4122, NAMESPACE_OID import re from arcp import generate from hashlib import sha256, md5 # Some test data TEST_UUID_v1 = UUID("dbc0802a-0682-11e8-9895-b8ca3ad10ac0") TEST_UUID_v4 = UUID("8c36d39a-18be-4aa8-b1ce-fef330b00a28") OID = "1.3.6.1.4.1.13661" uuid_re = re.compile(r"^[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}$") class UUIDTest(unittest.TestCase): """Test arcp_uuid()""" def testUUID(self): self.assertEqual("arcp://uuid,dbc0802a-0682-11e8-9895-b8ca3ad10ac0/", generate.arcp_uuid(TEST_UUID_v1)) # UUID version should not matter self.assertEqual("arcp://uuid,8c36d39a-18be-4aa8-b1ce-fef330b00a28/", generate.arcp_uuid(TEST_UUID_v4)) def testUUIDPath(self): self.assertEqual("arcp://uuid,dbc0802a-0682-11e8-9895-b8ca3ad10ac0/folder/file.txt", generate.arcp_uuid(TEST_UUID_v1, "/folder/file.txt")) def testUUIDPathQuery(self): self.assertEqual("arcp://uuid,dbc0802a-0682-11e8-9895-b8ca3ad10ac0/folder/file.txt?q=s", generate.arcp_uuid(TEST_UUID_v1, "/folder/file.txt", "q=s")) def testUUIDPathQueryFrag(self): self.assertEqual("arcp://uuid,dbc0802a-0682-11e8-9895-b8ca3ad10ac0/folder/file.txt?q=s#frag", generate.arcp_uuid(TEST_UUID_v1, "/folder/file.txt", "q=s", "frag")) def testUUIDQuery(self): self.assertEqual("arcp://uuid,dbc0802a-0682-11e8-9895-b8ca3ad10ac0/?a=b&c=d", generate.arcp_uuid(TEST_UUID_v1, query="a=b&c=d")) def testUUIDFrag(self): self.assertEqual("arcp://uuid,dbc0802a-0682-11e8-9895-b8ca3ad10ac0/#frag", generate.arcp_uuid(TEST_UUID_v1, fragment="frag")) def testUUIDstr(self): self.assertEqual("arcp://uuid,dbc0802a-0682-11e8-9895-b8ca3ad10ac0/", generate.arcp_uuid("dbc0802a-0682-11e8-9895-b8ca3ad10ac0")) def testUUIDstrFromUpperCase(self): self.assertEqual("arcp://uuid,dbc0802a-0682-11e8-9895-b8ca3ad10ac0/", generate.arcp_uuid("dbc0802a-0682-11e8-9895-b8ca3ad10ac0")) def testUUIDstrPathQueryFrag(self): self.assertEqual("arcp://uuid,dbc0802a-0682-11e8-9895-b8ca3ad10ac0/folder/file.txt?q=s#frag", generate.arcp_uuid("dbc0802a-0682-11e8-9895-b8ca3ad10ac0", "/folder/file.txt", "q=s", "frag")) def testUUIDstrInvalidUUID(self): with self.assertRaises(Exception): # Too short generate.arcp_uuid("5da78af6") with self.assertRaises(Exception): # empty generate.arcp_uuid("") class RandomTest(unittest.TestCase): """Test arcp_random(), with implicit or explicit UUID""" def testRandom(self): u = generate.arcp_random() u2 = generate.arcp_random() # always fresh self.assertNotEqual(u, u2) self.assertTrue(u.startswith("arcp://uuid,")) self.assertTrue(u.endswith("/")) def testRandomValidUUID(self): u = generate.arcp_random() # Extract UUID to ensure it is v4 uuidStr = u.replace("arcp://uuid,", "").strip("/") # Ensure RFC4122 compliance self.assertIsNotNone(uuid_re.match(uuidStr)) # ensure lower-case in output self.assertEqual(uuidStr, uuidStr.lower()) # must be valid UUID uuid = UUID(uuidStr) # must be RFC_4122 variant, version 4 (random) self.assertEqual(RFC_4122, uuid.variant) self.assertEqual(4, uuid.version) def testRandomPath(self): u = generate.arcp_random("/folder/file.txt") self.assertTrue(u.startswith("arcp://uuid,")) self.assertTrue(u.endswith("/folder/file.txt")) self.assertEqual( # initial / in path is optional len(generate.arcp_random("file.txt")), len(generate.arcp_random("/file.txt"))) def testRandomQuery(self): u = generate.arcp_random(query="q=a") self.assertTrue(u.startswith("arcp://uuid,")) self.assertTrue(u.endswith("/?q=a")) def testRandomFrag(self): u = generate.arcp_random(fragment="frag") self.assertTrue(u.startswith("arcp://uuid,")) self.assertTrue(u.endswith("/#frag")) def testRandomPathQueryFrag(self): u = generate.arcp_random("/folder/file.txt", "a=b&c=d", "frag") self.assertTrue(u.startswith("arcp://uuid,")) self.assertTrue(u.endswith("/folder/file.txt?a=b&c=d#frag")) # Now test providing a fixed UUID def testUUID(self): self.assertEqual("arcp://uuid,8c36d39a-18be-4aa8-b1ce-fef330b00a28/", generate.arcp_random(uuid=TEST_UUID_v4)) # UUID version must be 4 with self.assertRaises(Exception): generate.arcp_random(uuid=TEST_UUID_v1) def testUUIDPath(self): self.assertEqual("arcp://uuid,8c36d39a-18be-4aa8-b1ce-fef330b00a28/folder/file.txt", generate.arcp_random("/folder/file.txt", uuid=TEST_UUID_v4)) def testUUIDstr(self): self.assertEqual("arcp://uuid,8c36d39a-18be-4aa8-b1ce-fef330b00a28/", generate.arcp_random(uuid="8c36d39a-18be-4aa8-b1ce-fef330b00a28")) with self.assertRaises(Exception): # UUID version must be 4 generate.arcp_random(uuid="dbc0802a-0682-11e8-9895-b8ca3ad10ac0") def testUUIDstrPathQueryFrag(self): self.assertEqual("arcp://uuid,8c36d39a-18be-4aa8-b1ce-fef330b00a28/folder/file.txt?q=s#frag", generate.arcp_random("/folder/file.txt", "q=s", "frag", uuid="8c36d39a-18be-4aa8-b1ce-fef330b00a28")) def testUUIDstrInvalidUUID(self): with self.assertRaises(Exception): # Too short generate.arcp_random(uuid="5da78af6") with self.assertRaises(Exception): # empty generate.arcp_random(uuid="") class LocationTest(unittest.TestCase): """Test arcp_location()""" def testExampleZip(self): # URL and expected UUID as in # https://tools.ietf.org/id/draft-soilandreyes-arcp-03.html#rfc.appendix.A.2 self.assertEqual("arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/", generate.arcp_location("http://example.com/data.zip")) self.assertEqual("arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/pics/", generate.arcp_location("http://example.com/data.zip", "/pics/")) self.assertEqual("arcp://uuid,b7749d0b-0e47-5fc4-999d-f154abe68065/pics/flower.jpeg", generate.arcp_location("http://example.com/data.zip", "/pics/flower.jpeg")) def testExampleZipUUIDValid(self): u = generate.arcp_location("http://example.com/") # Extract UUID to ensure it is v4 uuidStr = u.replace("arcp://uuid,", "").strip("/") # Ensure RFC4122 compliance self.assertIsNotNone(re.match(uuid_re, uuidStr)) # ensure lower-case in output self.assertEqual(uuidStr, uuidStr.lower()) # must be valid UUID uuid = UUID(uuidStr) # must be RFC_4122 variant, version 5 (name sha1) self.assertEqual(RFC_4122, uuid.variant) self.assertEqual(5, uuid.version) def testLocationOtherNamespace(self): u = generate.arcp_location(OID, namespace=NAMESPACE_OID, path="/example") self.assertEqual("arcp://uuid,215aa48f-233f-507f-8484-3eb5d6e23e9d/example", u) def testLocationNamespaceUUIDstrPathQueryFrag(self): self.assertEqual("arcp://uuid,215aa48f-233f-507f-8484-3eb5d6e23e9d/folder/file.txt?q=s#frag", generate.arcp_location(OID, "/folder/file.txt", "q=s", "frag", namespace=NAMESPACE_OID)) class NameTest(unittest.TestCase): """Test arcp_name()""" def testExampleName(self): # Adapted from # https://tools.ietf.org/id/draft-soilandreyes-arcp-02.html#rfc.appendix.A.7 self.assertEqual("arcp://name,gallery.example.org/", generate.arcp_name("gallery.example.org")) new_photos = generate.arcp_name("gallery.example.org", "/photos/", "New") self.assertEqual("arcp://name,gallery.example.org/photos/?New", new_photos) photo = generate.arcp_name("gallery.example.org", "/photos/137") self.assertEqual("arcp://name,gallery.example.org/photos/137", photo) template = "arcp://name,messaging.example.com/share;{*uri};{*redirect}" u = template.replace("{*uri}", photo).replace("{*redirect}", new_photos) # TODO: Should arcp_name support path = "/share;%s;%s" % (photo, new_photos) self.assertEqual(u, generate.arcp_name("messaging.example.com", path)) def testNamePath(self): self.assertEqual("arcp://name,app.example.org/msgs/1", generate.arcp_name("app.example.org", "/msgs/1")) def testNameQuery(self): self.assertEqual("arcp://name,app.example.org/?q=a", generate.arcp_name("app.example.org", query="q=a")) def testNameHash(self): self.assertEqual("arcp://name,app.example.org/#frag", generate.arcp_name("app.example.org", fragment="frag")) def testNamePathQueryHash(self): self.assertEqual("arcp://name,app.example.org/msgs/1?a=b&c=d#frag", generate.arcp_name("app.example.org", "/msgs/1", "a=b&c=d", "frag")) def testInvalidName(self): with self.assertRaises(Exception): generate.arcp_name("example com") def testEmptyName(self): # empty name is valid by the spec (TODO: should it be?) self.assertEqual("arcp://name,/", generate.arcp_name("")) # Example from https://tools.ietf.org/html/rfc6920#section-8.1 BYTES = "Hello World!".encode("ASCII") # echo -n "Hello World!" | sha256sum HASH = "7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069" NI = "ni:///sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk" ARCP = "arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/" class HashTest(unittest.TestCase): """Test arcp_hash()""" def testHash(self): # Using "Hello World!" example # instead of a real archive file self.assertEqual(ARCP, generate.arcp_hash(BYTES)) def testHashPath(self): self.assertEqual(ARCP + "bin/evil", generate.arcp_hash(BYTES, "/bin/evil")) def testHashQuery(self): self.assertEqual(ARCP + "?q=a", generate.arcp_hash(BYTES, query="q=a")) def testHashFragment(self): self.assertEqual(ARCP + "#frag", generate.arcp_hash(BYTES, fragment="frag")) def testHashPathQueryFragment(self): self.assertEqual(ARCP + "bin/evil?a=b&c=d#frag", generate.arcp_hash(BYTES, "/bin/evil", "a=b&c=d", "frag")) def testHashHash(self): h = sha256(BYTES) self.assertEqual(ARCP, generate.arcp_hash(hash=h)) def testHashPathEmptyHash(self): h = sha256() self.assertEqual(ARCP + "bin/evil", generate.arcp_hash(BYTES, "bin/evil", hash=h)) def testHashPathPremadeHash(self): h = sha256(BYTES) self.assertEqual(ARCP + "bin/evil", generate.arcp_hash(path="bin/evil", hash=h)) def testHashWrongHash(self): h = md5(BYTES) with self.assertRaises(Exception): generate.arcp_hash(hash=h) arcp-0.2.1/tests/test_parse.py0000644000175000017500000003065413620764671016626 0ustar stainstain00000000000000#!/usr/bin/env python ## Copyright 2018 Stian Soiland-Reyes, The University of Manchester, UK ## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. ## You may obtain a copy of the License at ## ## http://www.apache.org/licenses/LICENSE-2.0 ## ## Unless required by applicable law or agreed to in writing, software ## distributed under the License is distributed on an "AS IS" BASIS, ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ## See the License for the specific language governing permissions and ## limitations under the License. import unittest from arcp import parse class TestIsArcpURI(unittest.TestCase): """Test is_arcp_uri()""" def test_arcp_uri(self): self.assertTrue(parse.is_arcp_uri( "arcp://uuid,ecba06ed-472e-46d4-8ab8-9570e40e0b8c/")) def test_not_arcp_uri(self): self.assertFalse(parse.is_arcp_uri( "http://example.com/")) def test_arcp_uri_authority_fallback(self): self.assertTrue(parse.is_arcp_uri( "arcp://example.com")) self.assertTrue(parse.is_arcp_uri( "arcp://x-unknown,abc")) class TestParse(unittest.TestCase): """Test parse_arcp()""" def _test_tuple(self, t): (scheme,netloc,path,params,query,fragment) = t self.assertEqual(t.scheme, scheme) self.assertEqual(t.netloc, netloc) self.assertEqual(t.path, path) self.assertEqual(t.params, params) self.assertEqual(t.query, query) self.assertEqual(t.fragment, fragment) def test_parse(self): t = parse.parse_arcp("arcp://uuid,ecba06ed-472e-46d4-8ab8-9570e40e0b8c/") self._test_tuple(t) self.assertEqual("arcp", t.scheme) self.assertEqual("uuid,ecba06ed-472e-46d4-8ab8-9570e40e0b8c", t.netloc) self.assertEqual("/", t.path) self.assertEqual("", t.params) self.assertEqual("", t.query) self.assertEqual("", t.fragment) def test_parse_path_query_fragment(self): t = parse.parse_arcp("arcp://uuid,ecba06ed-472e-46d4-8ab8-9570e40e0b8c/file;p=1?q=a#frag") self._test_tuple(t) self.assertEqual("arcp", t.scheme) self.assertEqual("uuid,ecba06ed-472e-46d4-8ab8-9570e40e0b8c", t.netloc) self.assertEqual("/file", t.path) self.assertEqual("p=1", t.params) self.assertEqual("q=a", t.query) self.assertEqual("frag", t.fragment) def test_parse_authority_fallback(self): t = parse.parse_arcp("arcp://example.com/") self._test_tuple(t) self.assertEqual("arcp", t.scheme) self.assertEqual("example.com", t.netloc) self.assertEqual("/", t.path) self.assertEqual("", t.params) self.assertEqual("", t.query) self.assertEqual("", t.fragment) def test_parse_empty_authority(self): t = parse.parse_arcp("arcp:///") self._test_tuple(t) self.assertEqual("arcp", t.scheme) self.assertEqual("", t.netloc) self.assertEqual("/", t.path) self.assertEqual("", t.params) self.assertEqual("", t.query) self.assertEqual("", t.fragment) def test_parseFails(self): with self.assertRaises(Exception): parse.parse_arcp("http://example.com/") def test_parse_prefix(self): self.assertEqual("uuid", parse.parse_arcp("arcp://uuid,ecba06ed-472e-46d4-8ab8-9570e40e0b8c/").prefix) self.assertEqual("ni", parse.parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/").prefix) self.assertEqual("name", parse.parse_arcp("arcp://name,example.com/").prefix) # authority fall-back self.assertIsNone( parse.parse_arcp("arcp://example.com/").prefix) # silly..but valid by specification self.assertEqual("", parse.parse_arcp("arcp://,example.com/").prefix) # Unknown prefixes are also picked up (should they?) self.assertEqual("x-unknown", parse.parse_arcp("arcp://x-unknown,abc").prefix) def test_parse_name(self): self.assertEqual("example.com", parse.parse_arcp("arcp://name,example.com/").name) # but other "names" are also supported: self.assertEqual("ecba06ed-472e-46d4-8ab8-9570e40e0b8c", parse.parse_arcp("arcp://uuid,ecba06ed-472e-46d4-8ab8-9570e40e0b8c/").name) self.assertEqual("sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk", parse.parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/").name) # authority fall-back self.assertEqual("example.com", parse.parse_arcp("arcp://example.com/").name) # unlikely, but valid by specification self.assertEqual("", parse.parse_arcp("arcp:///").name) # name from an unknown prefix self.assertEqual("abc", parse.parse_arcp("arcp://x-unknown,abc").name) def test_parse_uuid(self): self.assertEqual("ecba06ed472e46d48ab89570e40e0b8c", parse.parse_arcp("arcp://uuid,ecba06ed-472e-46d4-8ab8-9570e40e0b8c/").uuid.hex) self.assertIsNone( parse.parse_arcp("arcp://name,ecba06ed-472e-46d4-8ab8-9570e40e0b8c/").uuid) self.assertIsNone( parse.parse_arcp("arcp://ecba06ed-472e-46d4-8ab8-9570e40e0b8c/").uuid) def test_parse_uuid_fails(self): with self.assertRaises(Exception): parse.parse_arcp("arcp://uuid,ecba06ed-WRONG/").uuid def test_parse_ni(self): self.assertEqual("sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk", parse.parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/").ni) self.assertIsNone( parse.parse_arcp("arcp://name,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/").ni) self.assertIsNone( parse.parse_arcp("arcp://sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/").ni) def test_parse_ni_invalid_fails(self): # Below example is invalid as alg-val string # does not contain ; with self.assertRaises(Exception): parse.parse_arcp("arcp://ni,sha-256/").ni_uri() with self.assertRaises(Exception): parse.parse_arcp("arcp://ni,sha-256/").nih_uri() with self.assertRaises(Exception): parse.parse_arcp("arcp://ni,sha-256/").hash() with self.assertRaises(Exception): parse.parse_arcp("arcp://ni,sha-256/").ni_well_known() with self.assertRaises(Exception): parse.parse_arcp("arcp://ni,sha-256/").ni def test_parse_ni_uri(self): self.assertEqual("ni:///sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk", parse.parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .ni_uri()) self.assertIsNone( parse.parse_arcp("arcp://name,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .ni_uri()) self.assertIsNone( parse.parse_arcp("arcp://sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .ni_uri()) self.assertEqual("ni://example.com/sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk", parse.parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .ni_uri("example.com")) def test_parse_nih_uri(self): self.assertEqual("nih:sha-256-120;532690-57e12f-e2b74b-a07c89-2560a2;f", parse.parse_arcp("arcp://ni,sha-256-120;UyaQV-Ev4rdLoHyJJWCi/") .nih_uri()) self.assertIsNone( parse.parse_arcp("arcp://name,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .nih_uri()) self.assertIsNone( parse.parse_arcp("arcp://sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .nih_uri()) def test_parse_ni_well_known(self): self.assertEqual("/.well-known/ni/sha-256/f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk", parse.parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .ni_well_known()) self.assertIsNone( parse.parse_arcp("arcp://name,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .ni_well_known()) self.assertIsNone( parse.parse_arcp("arcp://sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .ni_well_known()) self.assertEqual("http://example.com/.well-known/ni/sha-256/f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk", parse.parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .ni_well_known("http://example.com/")) def test_parse_hash(self): # sha256 of "Hello World!" in ascii self.assertEqual(("sha-256", "7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069"), parse.parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .hash) self.assertIsNone( parse.parse_arcp("arcp://name,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .hash) self.assertIsNone( parse.parse_arcp("arcp://sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/") .hash) # md5 of "Hello World!" in ascii self.assertEqual(("md5", "ed076287532e86365e841e92bfc50d8c"), parse.parse_arcp("arcp://ni,md5;7Qdih1MuhjZehB6Sv8UNjA/") .hash) def test_parse_repr_ni(self): u = parse.parse_arcp("arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/file?q=a#frag") r = repr(u) self.assertIn("scheme='arcp'", r) self.assertIn("prefix='ni'", r) self.assertIn("name='sha-256;f4OxZX", r)#.. self.assertIn("path='/file'", r) self.assertIn("query='q=a'", r) self.assertIn("fragment='frag'", r) self.assertIn("ni='sha-256;f4OxZX", r)#.. self.assertIn("hash=('sha-256', '7f83b16", r)#.. self.assertNotIn("uuid=", r) def test_parse_repr_uuid(self): u = parse.parse_arcp("arcp://uuid,32a423d6-52ab-47e3-a9cd-54f418a48571/") r = repr(u) self.assertIn("scheme='arcp'", r) self.assertIn("prefix='uuid'", r) self.assertIn("name='32a423d6-", r)#.. self.assertIn("path='/'", r) self.assertIn("uuid=32a423d6-", r)# .. self.assertIn("query=''", r) self.assertIn("fragment=''", r) self.assertNotIn("ni=", r) self.assertNotIn("hash=", r)#.. def test_parse_str_ni(self): uri = "arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/file?q=a#frag" u = parse.parse_arcp(uri) self.assertEqual(uri, str(u)) class URLParse(unittest.TestCase): """Test urlparse()""" def test_urlparse(self): self.assertEqual("name", parse.urlparse("arcp://name,example.com/").prefix) self.assertEqual("http", parse.urlparse("http://example.com/").scheme) class NIH_CheckDigit(unittest.TestCase): """Test _nih_checkdigit() using RFC6920 examples""" def test_checkdigit(self): self.assertEqual("f", parse._nih_checkdigit( "5326-9057-e12f-e2b7-4ba0-7c89-2560-a2".replace("-", ""))) self.assertEqual("b", parse._nih_checkdigit("53269057")) self.assertEqual("0", parse._nih_checkdigit("b053269057")) self.assertEqual("d", parse._nih_checkdigit("acefeed")) self.assertEqual("0", parse._nih_checkdigit("dacefeed")) self.assertEqual("4", parse._nih_checkdigit("123456789abcdef")) self.assertEqual("0", parse._nih_checkdigit("4123456789abcdef")) # Consistency check -- if we add $digit (or $digit0 for even-length) # in front, the new sum should be 0 class NIH_Segmented(unittest.TestCase): """Test _nih_segmented()""" def test_segment(self): self.assertEqual("532690-57e12f-e2b74b-a07c89-2560a2", parse._nih_segmented("53269057e12fe2b74ba07c892560a2")) self.assertEqual("5326-9057-e12f-e2b7-4ba0-7c89-2560-a2", parse._nih_segmented("53269057e12fe2b74ba07c892560a2", 4)) self.assertEqual("532690-5", parse._nih_segmented("5326905")) self.assertEqual("532690", parse._nih_segmented("532690")) self.assertEqual("53269", parse._nih_segmented("53269")) self.assertEqual("", parse._nih_segmented(""))