././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1641382261.3710868 parfive-1.5.1/0000755000175100001710000000000000000000000012200 5ustar00vstsdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/.codecov.yaml0000644000175100001710000000011000000000000014554 0ustar00vstsdockercoverage: status: project: default: threshold: 0.2% ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1641382261.3710868 parfive-1.5.1/.github/0000755000175100001710000000000000000000000013540 5ustar00vstsdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/.github/release-drafter.yml0000644000175100001710000000074400000000000017335 0ustar00vstsdockername-template: 'v$NEXT_MINOR_VERSION' tag-template: 'v$NEXT_MINOR_VERSION' categories: - title: 'Breaking Changes' labels: - 'breaking' - title: 'Enhancements' labels: - 'enhancement' - title: 'Bug Fixes' labels: - 'bug' - title: 'Documentation and code quality' labels: - 'documentation' - title: 'Misc/Internal Changes' labels: - 'misc' change-template: '- $TITLE @$AUTHOR (#$NUMBER)' template: | ## Changes $CHANGES ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1641382261.3710868 parfive-1.5.1/.github/workflows/0000755000175100001710000000000000000000000015575 5ustar00vstsdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/.github/workflows/release-drafter.yml0000644000175100001710000000036500000000000021371 0ustar00vstsdockername: Release Drafter on: push: branches: - main jobs: update_release_draft: runs-on: ubuntu-latest steps: - uses: release-drafter/release-drafter@v5 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/.gitignore0000644000175100001710000000100000000000000014157 0ustar00vstsdocker*.py[cod] .eggs/** # C extensions *.so # Packages *.egg *.egg-info dist build eggs parts bin var sdist develop-eggs .installed.cfg lib lib64 __pycache__ # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox nosetests.xml # Translations *.mo # Mr Developer .mr.developer.cfg .project .pydevproject docs/_build/ docs/api/ htmlcov/ .vscode/ .history pip-wheel-metadata/ parfive/tests/.ipynb_checkpoints/ parfive/tests/predicted-sunspot-radio-flux.txt parfive/_version.py coverage.xml ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/.pre-commit-config.yaml0000644000175100001710000001056100000000000016464 0ustar00vstsdockerrepos: # The warnings/errors we check for here are: # E101 - mix of tabs and spaces # E11 - Fix indentation. # E111 - 4 spaces per indentation level # E112 - 4 spaces per indentation level # E113 - 4 spaces per indentation level # E121 - Fix indentation to be a multiple of four. # E122 - Add absent indentation for hanging indentation. # E123 - Align closing bracket to match opening bracket. # E124 - Align closing bracket to match visual indentation. # E125 - Indent to distinguish line from next logical line. # E126 - Fix over-indented hanging indentation. # E127 - Fix visual indentation. # E128 - Fix visual indentation. # E129 - Fix visual indentation. # E131 - Fix hanging indent for unaligned continuation line. # E133 - Fix missing indentation for closing bracket. # E20 - Remove extraneous whitespace. # E211 - Remove extraneous whitespace. # E231 - Add missing whitespace. # E241 - Fix extraneous whitespace around keywords. # E242 - Remove extraneous whitespace around operator. # E251 - Remove whitespace around parameter '=' sign. # E252 - Missing whitespace around parameter equals. # E26 - Fix spacing after comment hash for inline comments. # E265 - Fix spacing after comment hash for block comments. # E266 - Fix too many leading '#' for block comments. # E27 - Fix extraneous whitespace around keywords. # E301 - Add missing blank line. # E302 - Add missing 2 blank lines. # E303 - Remove extra blank lines. # E304 - Remove blank line following function decorator. # E305 - expected 2 blank lines after class or function definition # E305 - Expected 2 blank lines after end of function or class. # E306 - expected 1 blank line before a nested definition # E306 - Expected 1 blank line before a nested definition. # E401 - Put imports on separate lines. # E402 - Fix module level import not at top of file # E502 - Remove extraneous escape of newline. # E701 - Put colon-separated compound statement on separate lines. # E711 - Fix comparison with None. # E712 - Fix comparison with boolean. # E713 - Use 'not in' for test for membership. # E714 - Use 'is not' test for object identity. # E722 - Fix bare except. # E731 - Use a def when use do not assign a lambda expression. # E901 - SyntaxError or IndentationError # E902 - IOError # F822 - undefined name in __all__ # F823 - local variable name referenced before assignment # W291 - Remove trailing whitespace. # W292 - Add a single newline at the end of the file. # W293 - Remove trailing whitespace on blank line. # W391 - Remove trailing blank lines. # W601 - Use "in" rather than "has_key()". # W602 - Fix deprecated form of raising exception. # W603 - Use "!=" instead of "<>" # W604 - Use "repr()" instead of backticks. # W605 - Fix invalid escape sequence 'x'. # W690 - Fix various deprecated code (via lib2to3). - repo: https://gitlab.com/pycqa/flake8 rev: 3.9.0 hooks: - id: flake8 args: ['--count', '--select', 'E101,E11,E111,E112,E113,E121,E122,E123,E124,E125,E126,E127,E128,E129,E131,E133,E20,E211,E231,E241,E242,E251,E252,E26,E265,E266,E27,E301,E302,E303,E304,E305,E306,E401,E402,E502,E701,E711,E712,E713,E714,E722,E731,E901,E902,F822,F823,W191,W291,W292,W293,W391,W601,W602,W603,W604,W605,W690'] exclude: ".*(.fits|.fts|.fit|.txt|tca.*|extern.*|.rst|.md|cm/__init__.py|sunpy/extern|sunpy/visualization/colormaps/color_tables.py)$" - repo: https://github.com/myint/autoflake rev: v1.4 hooks: - id: autoflake args: ['--in-place', '--remove-all-unused-imports', '--remove-unused-variable'] exclude: ".*(.fits|.fts|.fit|.txt|tca.*|extern.*|.rst|.md|__init__.py|sunpy/extern|docs/conf.py)$" - repo: https://github.com/timothycrosley/isort rev: 5.8.0 hooks: - id: isort args: ['--sp','setup.cfg'] exclude: ".*(.fits|.fts|.fit|.txt|tca.*|extern.*|.rst|.md|cm/__init__.py|sunpy/extern|docs/conf.py)$" - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.4.0 hooks: - id: check-ast - id: check-case-conflict - id: trailing-whitespace exclude: ".*(.fits|.fts|.fit|.txt)$" - id: check-yaml - id: debug-statements - id: check-added-large-files - id: end-of-file-fixer exclude: ".*(.fits|.fts|.fit|.txt|tca.*)$" - id: mixed-line-ending exclude: ".*(.fits|.fts|.fit|.txt|tca.*)$" ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/.readthedocs.yml0000644000175100001710000000072600000000000015273 0ustar00vstsdocker# .readthedocs.yml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Build documentation in the docs/ directory with Sphinx sphinx: builder: html configuration: docs/conf.py fail_on_warning: true # Set the version of Python and requirements required to build your docs python: version: 3.8 install: - method: pip path: . extra_requirements: - docs ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/LICENSE0000644000175100001710000000204700000000000013210 0ustar00vstsdockerCopyright (c) 2017-2020 Stuart Mumford Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1641382261.3710868 parfive-1.5.1/PKG-INFO0000644000175100001710000000714400000000000013303 0ustar00vstsdockerMetadata-Version: 2.1 Name: parfive Version: 1.5.1 Summary: A HTTP and FTP parallel file downloader. Home-page: https://parfive.readthedocs.io/ Author: "Stuart Mumford" Author-email: "stuart@cadair.com" License: MIT Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Requires-Python: >=3.7 Provides-Extra: ftp Provides-Extra: tests Provides-Extra: docs License-File: LICENSE Parfive ======= .. image:: https://img.shields.io/pypi/v/parfive.svg :target: https://pypi.python.org/pypi/parfive :alt: Latest PyPI version .. image:: https://dev.azure.com/DrCadair/parfive/_apis/build/status/Cadair.parfive?repoName=Cadair%2Fparfive&branchName=master :target: https://dev.azure.com/DrCadair/parfive/_build/latest?definitionId=1&repoName=Cadair%2Fparfive&branchName=master :alt: Azure Pipelines Build Status A parallel file downloader using asyncio. parfive can handle downloading multiple files in parallel as well as downloading each file in a number of chunks. Usage ----- parfive works by creating a downloader object, appending files to it and then running the download. parfive has a synchronous API, but uses asyncio to paralellise downloading the files. A simple example is:: from parfive import Downloader dl = Downloader() dl.enqueue_file("http://data.sunpy.org/sample-data/predicted-sunspot-radio-flux.txt", path="./") files = dl.download() Parfive also bundles a CLI. The following example will download the two files concurrently.:: $ parfive 'http://212.183.159.230/5MB.zip' 'http://212.183.159.230/10MB.zip' $ parfive --help usage: parfive [-h] [--max-conn MAX_CONN] [--overwrite] [--no-file-progress] [--directory DIRECTORY] [--print-filenames] URLS [URLS ...] Parfive, the python asyncio based downloader positional arguments: URLS URLs of files to be downloaded. optional arguments: -h, --help show this help message and exit --max-conn MAX_CONN Number of maximum connections. --overwrite Overwrite if the file exists. --no-file-progress Show progress bar for each file. --directory DIRECTORY Directory to which downloaded files are saved. --print-filenames Print successfully downloaded files's names to stdout. Results ^^^^^^^ ``parfive.Downloader.download`` returns a ``parfive.Results`` object, which is a list of the filenames that have been downloaded. It also tracks any files which failed to download. Handling Errors ^^^^^^^^^^^^^^^ If files fail to download, the urls and the response from the server are stored in the ``Results`` object returned by ``parfive.Downloader``. These can be used to inform users about the errors. (Note, the progress bar will finish in an incomplete state if a download fails, i.e. it will show ``4/5 Files Downloaded``). The ``Results`` object is a list with an extra attribute ``errors``, this property returns a list of named tuples, where these named tuples contains the ``.url`` and the ``.response``, which is a ``aiohttp.ClientResponse`` or a ``aiohttp.ClientError`` object. Installation ------------ parfive is available on PyPI, you can install it with pip:: pip install parfive or if you want to use FTP downloads:: pip install parfive[ftp] Requirements ^^^^^^^^^^^^ - Python 3.7 or above - aiohttp - tqdm - aioftp (for downloads over FTP) Licence ------- MIT Licensed Authors ------- `parfive` was written by `Stuart Mumford `__. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/README.rst0000644000175100001710000000610500000000000013671 0ustar00vstsdockerParfive ======= .. image:: https://img.shields.io/pypi/v/parfive.svg :target: https://pypi.python.org/pypi/parfive :alt: Latest PyPI version .. image:: https://dev.azure.com/DrCadair/parfive/_apis/build/status/Cadair.parfive?repoName=Cadair%2Fparfive&branchName=master :target: https://dev.azure.com/DrCadair/parfive/_build/latest?definitionId=1&repoName=Cadair%2Fparfive&branchName=master :alt: Azure Pipelines Build Status A parallel file downloader using asyncio. parfive can handle downloading multiple files in parallel as well as downloading each file in a number of chunks. Usage ----- parfive works by creating a downloader object, appending files to it and then running the download. parfive has a synchronous API, but uses asyncio to paralellise downloading the files. A simple example is:: from parfive import Downloader dl = Downloader() dl.enqueue_file("http://data.sunpy.org/sample-data/predicted-sunspot-radio-flux.txt", path="./") files = dl.download() Parfive also bundles a CLI. The following example will download the two files concurrently.:: $ parfive 'http://212.183.159.230/5MB.zip' 'http://212.183.159.230/10MB.zip' $ parfive --help usage: parfive [-h] [--max-conn MAX_CONN] [--overwrite] [--no-file-progress] [--directory DIRECTORY] [--print-filenames] URLS [URLS ...] Parfive, the python asyncio based downloader positional arguments: URLS URLs of files to be downloaded. optional arguments: -h, --help show this help message and exit --max-conn MAX_CONN Number of maximum connections. --overwrite Overwrite if the file exists. --no-file-progress Show progress bar for each file. --directory DIRECTORY Directory to which downloaded files are saved. --print-filenames Print successfully downloaded files's names to stdout. Results ^^^^^^^ ``parfive.Downloader.download`` returns a ``parfive.Results`` object, which is a list of the filenames that have been downloaded. It also tracks any files which failed to download. Handling Errors ^^^^^^^^^^^^^^^ If files fail to download, the urls and the response from the server are stored in the ``Results`` object returned by ``parfive.Downloader``. These can be used to inform users about the errors. (Note, the progress bar will finish in an incomplete state if a download fails, i.e. it will show ``4/5 Files Downloaded``). The ``Results`` object is a list with an extra attribute ``errors``, this property returns a list of named tuples, where these named tuples contains the ``.url`` and the ``.response``, which is a ``aiohttp.ClientResponse`` or a ``aiohttp.ClientError`` object. Installation ------------ parfive is available on PyPI, you can install it with pip:: pip install parfive or if you want to use FTP downloads:: pip install parfive[ftp] Requirements ^^^^^^^^^^^^ - Python 3.7 or above - aiohttp - tqdm - aioftp (for downloads over FTP) Licence ------- MIT Licensed Authors ------- `parfive` was written by `Stuart Mumford `__. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/azure-pipelines.yml0000644000175100001710000000264200000000000016043 0ustar00vstsdockername: $(BuildDefinitionName)_$(Date:yyyyMMdd)$(Rev:.rr) variables: CI_NAME: Azure Pipelines CI_BUILD_ID: $(Build.BuildId) CI_BUILD_URL: "https://dev.azure.com/Cadair/parfive/_build/results?buildId=$(Build.BuildId)" resources: repositories: - repository: OpenAstronomy type: github endpoint: Cadair name: OpenAstronomy/azure-pipelines-templates ref: master trigger: branches: include: - '*' exclude: - '*backport*' tags: include: - 'v*' exclude: - '*dev*' stages: - stage: tests displayName: Tests jobs: - template: run-tox-env.yml@OpenAstronomy parameters: libraries: apt: - graphviz coverage: codecov envs: - linux: codestyle pytest: false - linux: build_docs pytest: false - linux: py38-conda - linux: py39 - macos: py37 - windows: py38 - ${{ if ne(variables['Build.Reason'], 'PullRequest') }}: - stage: deploy displayName: Build and Release jobs: - template: publish.yml@OpenAstronomy parameters: ${{ if startsWith(variables['Build.SourceBranch'], 'refs/tags/') }}: pypi_connection_name: 'PyPI' test_extras: ftp,tests test_command: pytest --pyargs parfive targets: - sdist - wheels_universal ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1641382261.3710868 parfive-1.5.1/docs/0000755000175100001710000000000000000000000013130 5ustar00vstsdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/docs/Makefile0000644000175100001710000000110500000000000014565 0ustar00vstsdocker# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/docs/conf.py0000644000175100001710000000706400000000000014436 0ustar00vstsdocker""" Configuration file for the Sphinx documentation builder. isort:skip_file """ # flake8: NOQA: E402 # -- stdlib imports ------------------------------------------------------------ import datetime from packaging.version import Version # -- Project information ------------------------------------------------------- project = 'Parfive' author = 'Stuart Mumford and Contributors' copyright = '{}, {}'.format(datetime.datetime.now().year, author) # The full version, including alpha/beta/rc tags from parfive import __version__ release = __version__ parfive_version = Version(__version__) is_release = not(parfive_version.is_prerelease or parfive_version.is_devrelease) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.doctest', 'sphinx.ext.inheritance_diagram', 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax', 'sphinx.ext.napoleon', 'sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx_automodapi.automodapi', 'sphinx_automodapi.smart_resolver', ] # Add any paths that contain templates here, relative to this directory. # templates_path = ['_templates'] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. html_extra_path = ['robots.txt'] exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: source_suffix = '.rst' # The master toctree document. master_doc = 'index' # The reST default role (used for this markup: `text`) to use for all # documents. Set to the "smart" one. default_role = 'obj' # Disable having a separate return type row napoleon_use_rtype = False # Disable google style docstrings napoleon_google_docstring = False # -- Options for intersphinx extension ----------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'https://docs.python.org/': None, 'http://aiohttp.readthedocs.io/en/stable': None, 'https://aioftp.readthedocs.io/': None} # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. try: from sunpy_sphinx_theme.conf import * # NOQA html_theme_options = { 'logo_url': 'https://parfive.readthedocs.io/en/latest/', "page_toctree_depths": {} } except ImportError: pass # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ['_static'] # Render inheritance diagrams in SVG graphviz_output_format = "svg" graphviz_dot_args = [ '-Nfontsize=10', '-Nfontname=Helvetica Neue, Helvetica, Arial, sans-serif', '-Efontsize=10', '-Efontname=Helvetica Neue, Helvetica, Arial, sans-serif', '-Gfontsize=10', '-Gfontname=Helvetica Neue, Helvetica, Arial, sans-serif' ] ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/docs/index.rst0000644000175100001710000000715500000000000015001 0ustar00vstsdocker.. currentmodule:: parfive ======= Parfive ======= Parfive is a small library for downloading files, its objective is to provide a simple API for queuing files for download and then providing excellent feedback to the user about the in progress downloads. It also aims to provide a clear interface for inspecting any failed downloads. The parfive package was motivated by the needs of `SunPy's `__ ``net`` submodule, but should be generally applicable to anyone who wants a user friendly way of downloading multiple files in parallel. Parfive supports downloading files over either HTTP or FTP using `aiohttp `__ and `aioftp `__ ``aioftp`` is an optional dependency, which does not need to be installed to download files over HTTP. Installation ------------ parfive can be installed via pip:: pip install parfive or with FTP support:: pip install parfive[ftp] or with conda from conda-forge:: conda install -c conda-forge parfive or from `GitHub `__. Usage ----- parfive works by creating a downloader object, queuing downloads with it and then running the download. parfive has a synchronous API, but uses `asyncio` to parallelise downloading the files. A simple example is:: from parfive import Downloader dl = Downloader() dl.enqueue_file("http://data.sunpy.org/sample-data/predicted-sunspot-radio-flux.txt", path="./") files = dl.download() It's also possible to download a list of URLs to a single destination using the `parfive.Downloader.simple_download` method:: from parfive import Downloader files = Downloader.simple_download(['http://212.183.159.230/5MB.zip' 'http://212.183.159.230/10MB.zip'], path="./") Parfive also bundles a CLI. The following example will download the two files concurrently:: $ parfive 'http://212.183.159.230/5MB.zip' 'http://212.183.159.230/10MB.zip' $ parfive --help usage: parfive [-h] [--max-conn MAX_CONN] [--overwrite] [--no-file-progress] [--directory DIRECTORY] [--print-filenames] URLS [URLS ...] Parfive, the python asyncio based downloader positional arguments: URLS URLs of files to be downloaded. optional arguments: -h, --help show this help message and exit --max-conn MAX_CONN Number of maximum connections. --overwrite Overwrite if the file exists. --no-file-progress Show progress bar for each file. --directory DIRECTORY Directory to which downloaded files are saved. --print-filenames Print successfully downloaded files's names to stdout. .. automodapi:: parfive :no-heading: :no-main-docstr: Environment Variables --------------------- To facilitate debugging parfive reads the following environment variables: * ``PARFIVE_SINGLE_DOWNLOAD`` - If set (to any value) this variable sets ``max_conn`` and ``max_splits`` to one; meaning that no parallelisation of the downloads will occur. * ``PARFIVE_DISABLE_RANGE`` - If set (to any value) this variable will set ``max_splits`` to one; meaning that each file downloaded will only have one concurrent connection, although multiple files may be downloaded simultaneously. * ``PARFIVE_OVERWRITE_ENABLE_AIOFILES`` - If set (to any value) and aiofiles is installed in the system, aiofiles will be used to write files to disk. * ``PARFIVE_DEBUG`` - If set (to any value) will configure the built-in Python logger to log to stderr and set parfive, aiohttp and aioftp to debug levels. Changelog --------- See `GitHub Releases `__ for the release history and changelog. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/docs/robots.txt0000644000175100001710000000032000000000000015174 0ustar00vstsdockerUser-agent: * Allow: /*/latest/ Allow: /en/latest/ # Fallback for bots that don't understand wildcards Allow: /*/stable/ Allow: /en/stable/ # Fallback for bots that don't understand wildcards Disallow: / ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1641382261.3710868 parfive-1.5.1/parfive/0000755000175100001710000000000000000000000013634 5ustar00vstsdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/parfive/__init__.py0000644000175100001710000000102600000000000015744 0ustar00vstsdocker """ ******* parfive ******* A parallel file downloader using asyncio. * Documentation: https://parfive.readthedocs.io/en/stable/ * Source code: https://github.com/Cadair/parfive """ import logging as _logging from .downloader import Downloader from .results import Results __all__ = ['Downloader', 'Results', 'log', "__version__"] try: from ._version import version as __version__ except ImportError: print("Version not found, please reinstall parfive.") __version__ = "unknown" log = _logging.getLogger('parfive') ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382261.0 parfive-1.5.1/parfive/_version.py0000644000175100001710000000021600000000000016031 0ustar00vstsdocker# coding: utf-8 # file generated by setuptools_scm # don't change, don't track in version control version = '1.5.1' version_tuple = (1, 5, 1) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/parfive/downloader.py0000644000175100001710000006754300000000000016363 0ustar00vstsdockerimport os import sys import asyncio import logging import pathlib import warnings import contextlib import urllib.parse from functools import partial, lru_cache from concurrent.futures import ThreadPoolExecutor import aiohttp from tqdm import tqdm, tqdm_notebook import parfive from .results import Results from .utils import ( FailedDownload, Token, _QueueList, default_name, get_filepath, get_ftp_size, get_http_size, in_notebook, run_in_thread, ) try: import aioftp except ImportError: # pragma: nocover aioftp = None try: import aiofiles # pragma: nocover except ImportError: aiofiles = None SERIAL_MODE = "PARFIVE_SINGLE_DOWNLOAD" in os.environ DISABLE_RANGE = "PARFIVE_DISABLE_RANGE" in os.environ or SERIAL_MODE __all__ = ['Downloader'] class Downloader: """ Download files in parallel. Parameters ---------- max_conn : `int`, optional The number of parallel download slots. progress : `bool`, optional If `True` show a main progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. file_progress : `bool`, optional If `True` and ``progress`` is true, show ``max_conn`` progress bars detailing the progress of each individual file being downloaded. loop : `asyncio.AbstractEventLoop`, optional No longer used, and will be removed in a future release. notebook : `bool`, optional If `True` tqdm will be used in notebook mode. If `None` an attempt will be made to detect the notebook and guess which progress bar to use. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. headers : `dict` Request headers to be passed to the server. Adds `User-Agent` information about `parfive`, `aiohttp` and `python` if not passed explicitely. """ def __init__(self, max_conn=5, progress=True, file_progress=True, loop=None, notebook=None, overwrite=False, headers=None, use_aiofiles=False): if loop: warnings.warn('The loop argument is no longer used, and will be ' 'removed in a future release.') self.max_conn = max_conn if not SERIAL_MODE else 1 self._init_queues() # Configure progress bars if notebook is None: notebook = in_notebook() self.progress = progress self.file_progress = file_progress if self.progress else False self.tqdm = tqdm if not notebook else tqdm_notebook self.overwrite = overwrite self.headers = headers if headers is None or 'User-Agent' not in headers: self.headers = { 'User-Agent': f"parfive/{parfive.__version__} aiohttp/{aiohttp.__version__} python/{sys.version[:5]}"} self._use_aiofiles = use_aiofiles def _init_queues(self): # Setup queues self.http_queue = _QueueList() self.ftp_queue = _QueueList() def _generate_tokens(self): # Create a Queue with max_conn tokens queue = asyncio.Queue(maxsize=self.max_conn) for i in range(self.max_conn): queue.put_nowait(Token(i + 1)) return queue @property @lru_cache() def use_aiofiles(self): """ aiofiles will be used if installed and must be explicitly enabled PARFIVE_OVERWRITE_ENABLE_AIOFILES takes precedence if present, aiofiles will not be used finally the Downloader's constructor argument is considered. """ if aiofiles is None: return False if "PARFIVE_OVERWRITE_ENABLE_AIOFILES" in os.environ: return True return self._use_aiofiles @property @lru_cache() def default_chunk_size(self): """ aiofiles requires a different default chunk size """ return 1024 if self.use_aiofiles else 100 @property def queued_downloads(self): """ The total number of files already queued for download. """ return len(self.http_queue) + len(self.ftp_queue) def enqueue_file(self, url, path=None, filename=None, overwrite=None, **kwargs): """ Add a file to the download queue. Parameters ---------- url : `str` The URL to retrieve. path : `str`, optional The directory to retrieve the file into, if `None` defaults to the current directory. filename : `str` or `callable`, optional The filename to save the file as. Can also be a callable which takes two arguments the url and the response object from opening that URL, and returns the filename. (Note, for FTP downloads the response will be ``None``.) If `None` the HTTP headers will be read for the filename, or the last segment of the URL will be used. overwrite : `bool` or `str`, optional Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. If `None` the value set when constructing the `~parfive.Downloader` object will be used. kwargs : `dict` Extra keyword arguments are passed to `aiohttp.ClientSession.get` or `aioftp.Client.context` depending on the protocol. Notes ----- Proxy URL is read from the environment variables `HTTP_PROXY` or `HTTPS_PROXY`, depending on the protocol of the `url` passed. Proxy Authentication `proxy_auth` should be passed as a `aiohttp.BasicAuth` object. Proxy Headers `proxy_headers` should be passed as `dict` object. """ overwrite = overwrite or self.overwrite if path is None and filename is None: raise ValueError("Either path or filename must be specified.") elif path is None: path = './' path = pathlib.Path(path) if not filename: filepath = partial(default_name, path) elif callable(filename): filepath = filename else: # Define a function because get_file expects a callback def filepath(*args): return path / filename scheme = urllib.parse.urlparse(url).scheme if scheme in ('http', 'https'): get_file = partial(self._get_http, url=url, filepath_partial=filepath, overwrite=overwrite, **kwargs) self.http_queue.append(get_file) elif scheme == 'ftp': if aioftp is None: raise ValueError("The aioftp package must be installed to download over FTP.") get_file = partial(self._get_ftp, url=url, filepath_partial=filepath, overwrite=overwrite, **kwargs) self.ftp_queue.append(get_file) else: raise ValueError("URL must start with either 'http' or 'ftp'.") @staticmethod def _run_in_loop(coro): """ Detect an existing, running loop and run in a separate loop if needed. If no loop is running, use asyncio.run to run the coroutine instead. """ try: loop = asyncio.get_running_loop() except RuntimeError: loop = None if loop and loop.is_running(): aio_pool = ThreadPoolExecutor(1) new_loop = asyncio.new_event_loop() return run_in_thread(aio_pool, new_loop, coro) return asyncio.run(coro) @staticmethod def _configure_debug(): # pragma: no cover sh = logging.StreamHandler() sh.setLevel(logging.DEBUG) formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s') sh.setFormatter(formatter) parfive.log.addHandler(sh) parfive.log.setLevel(logging.DEBUG) aiohttp_logger = logging.getLogger('aiohttp.client') aioftp_logger = logging.getLogger('aioftp.client') aioftp_logger.addHandler(sh) aioftp_logger.setLevel(logging.DEBUG) aiohttp_logger.addHandler(sh) aiohttp_logger.setLevel(logging.DEBUG) parfive.log.debug("Configured parfive to run with debug logging...") async def run_download(self, timeouts=None): """ Download all files in the queue. Parameters ---------- timeouts : `dict`, optional Overrides for the default timeouts for http downloads. Supported keys are any accepted by the `aiohttp.ClientTimeout` class. Defaults to no timeout for total session timeout (overriding the aiohttp 5 minute default) and 90 seconds for socket read timeout. Returns ------- `parfive.Results` A list of files downloaded. Notes ----- The defaults for the `'total'` and `'sock_read'` timeouts can be overridden by two environment variables ``PARFIVE_TOTAL_TIMEOUT`` and ``PARFIVE_SOCK_READ_TIMEOUT``. """ # Setup debug logging before starting a download if "PARFIVE_DEBUG" in os.environ: self._configure_debug() timeouts = timeouts or {"total": float(os.environ.get("PARFIVE_TOTAL_TIMEOUT", 0)), "sock_read": float(os.environ.get("PARFIVE_SOCK_READ_TIMEOUT", 90))} total_files = self.queued_downloads done = set() with self._get_main_pb(total_files) as main_pb: if len(self.http_queue): done.update(await self._run_http_download(main_pb, timeouts)) if len(self.ftp_queue): done.update(await self._run_ftp_download(main_pb, timeouts)) dl_results = await asyncio.gather(*done, return_exceptions=True) results = Results() # Iterate through the results and store any failed download errors in # the errors list of the results object. for res in dl_results: if isinstance(res, FailedDownload): results.add_error(res.filepath_partial, res.url, res.exception) parfive.log.info(f'{res.url} failed to download with exception\n' f'{res.exception}') elif isinstance(res, Exception): raise res else: results.append(res) return results def download(self, timeouts=None): """ Download all files in the queue. Parameters ---------- timeouts : `dict`, optional Overrides for the default timeouts for http downloads. Supported keys are any accepted by the `aiohttp.ClientTimeout` class. Defaults to no timeout for total session timeout (overriding the aiohttp 5 minute default) and 90 seconds for socket read timeout. Returns ------- `parfive.Results` A list of files downloaded. Notes ----- This is a synchronous version of `~parfive.Downloader.run_download`, an `asyncio` event loop will be created to run the download (in it's own thread if a loop is already running). The defaults for the `'total'` and `'sock_read'` timeouts can be overridden by two environment variables ``PARFIVE_TOTAL_TIMEOUT`` and ``PARFIVE_SOCK_READ_TIMEOUT``. """ return self._run_in_loop(self.run_download(timeouts)) def retry(self, results): """ Retry any failed downloads in a results object. .. note:: This will start a new event loop. Parameters ---------- results : `parfive.Results` A previous results object, the ``.errors`` property will be read and the downloads retried. Returns ------- `parfive.Results` A modified version of the input ``results`` with all the errors from this download attempt and any new files appended to the list of file paths. """ # Reset the queues self._init_queues() for err in results.errors: self.enqueue_file(err.url, filename=err.filepath_partial) new_res = self.download() results += new_res results._errors = new_res._errors return results @classmethod def simple_download(cls, urls, *, path="./", overwrite=None): """ Download a series of URLs to a single destination. Parameters ---------- urls : iterable A sequence of URLs to download. path : `pathlib.Path`, optional The destination directory for the downloaded files. Defaults to the current directory. overwrite: `bool`, optional Overwrite the files at the destination directory. If `False` the URL will not be downloaded if a file with the corresponding filename already exists. Returns ------- `parfive.Results` A list of files downloaded. """ dl = cls() for url in urls: dl.enqueue_file(url, path=path, overwrite=overwrite) return dl.download() def _get_main_pb(self, total): """ Return the tqdm instance if we want it, else return a contextmanager that just returns None. """ if self.progress: return self.tqdm(total=total, unit='file', desc="Files Downloaded", position=0) else: return contextlib.contextmanager(lambda: iter([None]))() async def _run_http_download(self, main_pb, timeouts): async with aiohttp.ClientSession(headers=self.headers) as session: self._generate_tokens() futures = await self._run_from_queue( self.http_queue.generate_queue(), self._generate_tokens(), main_pb, session=session, timeouts=timeouts) # Wait for all the coroutines to finish done, _ = await asyncio.wait(futures) return done async def _run_ftp_download(self, main_pb, timeouts): futures = await self._run_from_queue( self.ftp_queue.generate_queue(), self._generate_tokens(), main_pb, timeouts=timeouts) # Wait for all the coroutines to finish done, _ = await asyncio.wait(futures) return done async def _run_from_queue(self, queue, tokens, main_pb, *, session=None, timeouts): futures = [] while not queue.empty(): get_file = await queue.get() token = await tokens.get() file_pb = self.tqdm if self.file_progress else False future = asyncio.create_task(get_file(session, token=token, file_pb=file_pb, timeouts=timeouts)) def callback(token, future, main_pb): tokens.put_nowait(token) # Update the main progressbar if main_pb and not future.exception(): main_pb.update(1) future.add_done_callback(partial(callback, token, main_pb=main_pb)) futures.append(future) return futures async def _get_http(self, session, *, url, filepath_partial, chunksize=None, file_pb=None, token, overwrite, timeouts, max_splits=5, **kwargs): """ Read the file from the given url into the filename given by ``filepath_partial``. Parameters ---------- session : `aiohttp.ClientSession` The `aiohttp.ClientSession` to use to retrieve the files. url : `str` The url to retrieve. filepath_partial : `callable` A function to call which returns the filepath to save the url to. Takes two arguments ``resp, url``. chunksize : `int` The number of bytes to read into the file at a time. file_pb : `tqdm.tqdm` or `False` Should progress bars be displayed for each file downloaded. token : `parfive.downloader.Token` A token for this download slot. max_splits: `int` Number of maximum concurrent connections per file. kwargs : `dict` Extra keyword arguments are passed to `aiohttp.ClientSession.get`. Returns ------- `str` The name of the file saved. """ if chunksize is None: chunksize = self.default_chunk_size timeout = aiohttp.ClientTimeout(**timeouts) try: scheme = urllib.parse.urlparse(url).scheme if 'HTTP_PROXY' in os.environ and scheme == 'http': kwargs['proxy'] = os.environ['HTTP_PROXY'] elif 'HTTPS_PROXY' in os.environ and scheme == 'https': kwargs['proxy'] = os.environ['HTTPS_PROXY'] async with session.get(url, timeout=timeout, **kwargs) as resp: parfive.log.debug("%s request made to %s with headers=%s", resp.request_info.method, resp.request_info.url, resp.request_info.headers) parfive.log.debug("Response received from %s with headers=%s", resp.request_info.url, resp.headers) if resp.status != 200: raise FailedDownload(filepath_partial, url, resp) else: filepath, skip = get_filepath(filepath_partial(resp, url), overwrite) if skip: parfive.log.debug("File %s already exists and overwrite is False; skipping download.", filepath) return str(filepath) if callable(file_pb): file_pb = file_pb(position=token.n, unit='B', unit_scale=True, desc=filepath.name, leave=False, total=get_http_size(resp)) else: file_pb = None # This queue will contain the downloaded chunks and their offsets # as tuples: (offset, chunk) downloaded_chunk_queue = asyncio.Queue() download_workers = [] writer = asyncio.create_task( self._write_worker(downloaded_chunk_queue, file_pb, filepath)) if not DISABLE_RANGE and max_splits and resp.headers.get('Accept-Ranges', None) == "bytes": content_length = int(resp.headers['Content-length']) split_length = max(1, content_length // max_splits) ranges = [ [start, start + split_length] for start in range(0, content_length, split_length) ] # let the last part download everything ranges[-1][1] = '' for _range in ranges: download_workers.append( asyncio.create_task(self._http_download_worker( session, url, chunksize, _range, timeout, downloaded_chunk_queue, **kwargs )) ) else: download_workers.append( asyncio.create_task(self._http_download_worker( session, url, chunksize, None, timeout, downloaded_chunk_queue, **kwargs )) ) # Close the initial request here before we start transferring data. # run all the download workers await asyncio.gather(*download_workers) # join() waits till all the items in the queue have been processed await downloaded_chunk_queue.join() writer.cancel() return str(filepath) except Exception as e: raise FailedDownload(filepath_partial, url, e) async def _write_worker(self, queue, file_pb, filepath): """ Worker for writing the downloaded chunk to the file. The downloaded chunk is put into a asyncio Queue by a download worker. This worker gets the chunk from the queue and write it to the file using the specified offset of the chunk. Parameters ---------- queue: `asyncio.Queue` Queue for chunks file_pb : `tqdm.tqdm` or `False` Should progress bars be displayed for each file downloaded. filepath: `pathlib.Path` Path to the which the file should be downloaded. """ if self.use_aiofiles: await self._async_write_worker(queue, file_pb, filepath) else: await self._blocking_write_worker(queue, file_pb, filepath) async def _async_write_worker(self, queue, file_pb, filepath): async with aiofiles.open(filepath, mode="wb") as f: while True: offset, chunk = await queue.get() await f.seek(offset) await f.write(chunk) await f.flush() # Update the progressbar for file if file_pb is not None: file_pb.update(len(chunk)) queue.task_done() async def _blocking_write_worker(self, queue, file_pb, filepath): with open(filepath, "wb") as f: while True: offset, chunk = await queue.get() f.seek(offset) f.write(chunk) f.flush() # Update the progressbar for file if file_pb is not None: file_pb.update(len(chunk)) queue.task_done() async def _http_download_worker(self, session, url, chunksize, http_range, timeout, queue, **kwargs): """ Worker for downloading chunks from http urls. This function downloads the chunk from the specified http range and puts the chunk in the asyncio Queue. If no range is specified, then the whole file is downloaded via chunks and put in the queue. Parameters ---------- session : `aiohttp.ClientSession` The `aiohttp.ClientSession` to use to retrieve the files. url : `str` The url to retrieve. chunksize : `int` The number of bytes to read into the file at a time. http_range: (`int`, `int`) or `None` Start and end bytes of the file. In None, then no `Range` header is specified in request and the whole file will be downloaded. queue: `asyncio.Queue` Queue to put the download chunks. kwargs : `dict` Extra keyword arguments are passed to `aiohttp.ClientSession.get`. """ headers = kwargs.pop('headers', {}) if http_range: headers['Range'] = 'bytes={}-{}'.format(*http_range) # init offset to start of range offset, _ = http_range else: offset = 0 async with session.get(url, timeout=timeout, headers=headers, **kwargs) as resp: parfive.log.debug("%s request made for download to %s with headers=%s", resp.request_info.method, resp.request_info.url, resp.request_info.headers) parfive.log.debug("Response received from %s with headers=%s", resp.request_info.url, resp.headers) while True: chunk = await resp.content.read(chunksize) if not chunk: break await queue.put((offset, chunk)) offset += len(chunk) async def _get_ftp(self, session=None, *, url, filepath_partial, file_pb=None, token, overwrite, timeouts, **kwargs): """ Read the file from the given url into the filename given by ``filepath_partial``. Parameters ---------- session : `None` A placeholder for API compatibility with ``_get_http`` url : `str` The url to retrieve. filepath_partial : `callable` A function to call which returns the filepath to save the url to. Takes two arguments ``resp, url``. file_pb : `tqdm.tqdm` or `False` Should progress bars be displayed for each file downloaded. token : `parfive.downloader.Token` A token for this download slot. kwargs : `dict` Extra keyword arguments are passed to `~aioftp.Client.context`. Returns ------- `str` The name of the file saved. """ parse = urllib.parse.urlparse(url) try: async with aioftp.Client.context(parse.hostname, **kwargs) as client: parfive.log.debug("Connected to ftp server %s", parse.hostname) if parse.username and parse.password: parfive.log.debug("Explicitly Logging in with %s:%s", parse.username, parse.password) await client.login(parse.username, parse.password) # This has to be done before we start streaming the file: filepath, skip = get_filepath(filepath_partial(None, url), overwrite) if skip: parfive.log.debug("File %s already exists and overwrite is False; skipping download.", filepath) return str(filepath) if callable(file_pb): total_size = await get_ftp_size(client, parse.path) file_pb = file_pb(position=token.n, unit='B', unit_scale=True, desc=filepath.name, leave=False, total=total_size) else: file_pb = None parfive.log.debug("Downloading file %s from %s", parse.path, parse.hostname) async with client.download_stream(parse.path) as stream: downloaded_chunks_queue = asyncio.Queue() download_workers = [] writer = asyncio.create_task( self._write_worker(downloaded_chunks_queue, file_pb, filepath)) download_workers.append( asyncio.create_task(self._ftp_download_worker( stream, downloaded_chunks_queue)) ) await asyncio.gather(*download_workers) await downloaded_chunks_queue.join() writer.cancel() return str(filepath) except Exception as e: raise FailedDownload(filepath_partial, url, e) async def _ftp_download_worker(self, stream, queue): """ Similar to `Downloader._http_download_worker`. See that function's documentation for more info. Parameters ---------- stream: `aioftp.StreamIO` Stream of the file to be downloaded. queue: `asyncio.Queue` Queue to put the download chunks. """ offset = 0 async for chunk in stream.iter_by_block(): # Write this chunk to the output file. await queue.put((offset, chunk)) offset += len(chunk) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/parfive/main.py0000644000175100001710000000320600000000000015133 0ustar00vstsdockerimport sys import argparse from parfive import Downloader def main(): args = parse_args(sys.argv[1:]) downloader = Downloader(max_conn=args.max_conn, file_progress=not args.no_file_progress, overwrite=args.overwrite) for url in args.urls: downloader.enqueue_file(url, path=args.directory) results = downloader.download() for i in results: print(i) err_str = '' for err in results.errors: err_str += f'{err.url} \t {err.exception}\n' if err_str: sys.exit(err_str) def parse_args(args): parser = argparse.ArgumentParser(description='Parfive, the python asyncio based downloader') parser.add_argument('urls', metavar='URLS', type=str, nargs='+', help='URLs of files to be downloaded.') parser.add_argument('--max-conn', type=int, default=5, help='Number of maximum connections.') parser.add_argument('--overwrite', action='store_const', const=True, default=False, help='Overwrite if the file exists.') parser.add_argument('--no-file-progress', action='store_const', const=True, default=False, dest='no_file_progress', help='Show progress bar for each file.') parser.add_argument('--directory', type=str, default='./', help='Directory to which downloaded files are saved.') parser.add_argument('--print-filenames', action='store_const', const=True, default=False, dest='print_filenames', help='Print successfully downloaded files\'s names to stdout.') args = parser.parse_args(args) return args ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/parfive/results.py0000644000175100001710000000474000000000000015714 0ustar00vstsdockerfrom collections import UserList, namedtuple import aiohttp from .utils import FailedDownload __all__ = ['Results'] class Results(UserList): """ The results of a download from `parfive.Downloader.download`. This object contains the filenames of successful downloads as well as a list of any errors encountered in the `~parfive.Results.errors` property. """ def __init__(self, *args, errors=None): super().__init__(*args) self._errors = errors or list() self._error = namedtuple("error", ("filepath_partial", "url", "exception")) def _get_nice_resp_repr(self, response): # This is a modified version of aiohttp.ClientResponse.__repr__ if isinstance(response, aiohttp.ClientResponse): ascii_encodable_url = str(response.url) if response.reason: ascii_encodable_reason = response.reason.encode('ascii', 'backslashreplace').decode('ascii') else: ascii_encodable_reason = response.reason return ''.format( ascii_encodable_url, response.status, ascii_encodable_reason) else: return repr(response) def __str__(self): out = super().__repr__() if self.errors: out += '\nErrors:\n' for error in self.errors: if isinstance(error, FailedDownload): resp = self._get_nice_resp_repr(error.exception) out += f"(url={error.url}, response={resp})\n" else: out += "({})".format(repr(error)) return out def __repr__(self): out = object.__repr__(self) out += '\n' out += str(self) return out def add_error(self, filename, url, exception): """ Add an error to the results. """ if isinstance(exception, aiohttp.ClientResponse): exception._headers = None self._errors.append(self._error(filename, url, exception)) @property def errors(self): """ A list of errors encountered during the download. The errors are represented as a tuple containing ``(filepath, url, exception)`` where ``filepath`` is a function for generating a filepath, ``url`` is the url to be downloaded and ``exception`` is the error raised during download. """ return self._errors ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1641382261.3710868 parfive-1.5.1/parfive/tests/0000755000175100001710000000000000000000000014776 5ustar00vstsdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/parfive/tests/__init__.py0000644000175100001710000000000000000000000017075 0ustar00vstsdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/parfive/tests/simple_download_test.ipynb0000644000175100001710000002402700000000000022265 0ustar00vstsdocker{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import parfive" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "dl = parfive.Downloader()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "dl.enqueue_file(\"http://data.sunpy.org/sample-data/predicted-sunspot-radio-flux.txt\", path=\"./\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "nbreg": { "diff_ignore": [ "/outputs/0/data/" ] } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "db630dac73074c248fb6ecc163e1fd75", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Files Downloaded', max=1.0, style=ProgressStyle(descripti…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "files = dl.download()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\n", "['predicted-sunspot-radio-flux.txt']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files" ] } ], "metadata": { "celltoolbar": "Edit Metadata", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "nbreg": { "diff_ignore": [ "/metadata/widgets" ] }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { "0cc46587e5314f8a95e5373eed4af735": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "Files Downloaded: 100%", "description_tooltip": null, "layout": "IPY_MODEL_d3f4f43178564251aa135d08e87f47db", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_ef5d56b1823d442c829e821516cdbff0", "value": 1 } }, "4a1f3a7b8acd4d1daeebd13033a5ba05": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "658c594e4c7f44e986c5e8d346e290ef": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "720f662a1e20406ba71816e33cbdbe30": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c5a8b53e793f4b328543dd9fedd64b6f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_658c594e4c7f44e986c5e8d346e290ef", "placeholder": "​", "style": "IPY_MODEL_4a1f3a7b8acd4d1daeebd13033a5ba05", "value": " 1/1 [00:00<00:00, 10.57file/s]" } }, "d3f4f43178564251aa135d08e87f47db": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "db630dac73074c248fb6ecc163e1fd75": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_0cc46587e5314f8a95e5373eed4af735", "IPY_MODEL_c5a8b53e793f4b328543dd9fedd64b6f" ], "layout": "IPY_MODEL_720f662a1e20406ba71816e33cbdbe30" } }, "ef5d56b1823d442c829e821516cdbff0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "initial" } } }, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 } ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/parfive/tests/test_downloader.py0000644000175100001710000003331700000000000020554 0ustar00vstsdockerimport os import sys import platform from pathlib import Path from unittest import mock from importlib import reload from unittest.mock import patch import aiohttp import pytest from aiohttp import ClientTimeout from pytest_localserver.http import WSGIServer import parfive from parfive.downloader import Downloader, FailedDownload, Results, Token from parfive.utils import sha256sum skip_windows = pytest.mark.skipif(platform.system() == 'Windows', reason="Windows.") def validate_test_file(f): assert len(f) == 1 assert Path(f[0]).name == "testfile.fits" assert sha256sum(f[0]) == "a1c58cd340e3bd33f94524076f1fa5cf9a7f13c59d5272a9d4bc0b5bc436d9b3" def test_setup(): dl = Downloader() assert isinstance(dl, Downloader) assert len(dl.http_queue) == 0 assert len(dl.ftp_queue) == 0 assert dl._generate_tokens().qsize() == 5 def test_download(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 f = dl.download() validate_test_file(f) def test_simple_download(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) f = Downloader.simple_download([httpserver.url], path=Path(tmpdir)) validate_test_file(f) def test_changed_max_conn(httpserver, tmpdir): # Check that changing max_conn works after creating Downloader tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader(max_conn=4) dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) dl.max_conn = 3 f = dl.download() validate_test_file(f) @pytest.mark.asyncio @pytest.mark.parametrize("use_aiofiles", [True, False]) async def test_async_download(httpserver, tmpdir, use_aiofiles): httpserver.serve_content('SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader(use_aiofiles=use_aiofiles) dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 f = await dl.run_download() validate_test_file(f) def test_download_ranged_http(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir)) assert dl.queued_downloads == 1 f = dl.download() validate_test_file(f) def test_regression_download_ranged_http(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('S', headers={'Content-Disposition': "attachment; filename=testfile.fits", 'Accept-Ranges': "bytes"}) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir)) assert dl.queued_downloads == 1 f = dl.download() assert len(f.errors) == 0, f.errors def test_download_partial(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T') dl = Downloader() dl.enqueue_file(httpserver.url, filename=lambda resp, url: Path(tmpdir) / "filename") f = dl.download() assert len(f) == 1 # strip the http:// assert "filename" in f[0] def test_empty_download(tmpdir): dl = Downloader() f = dl.download() assert len(f) == 0 def test_download_filename(httpserver, tmpdir): httpserver.serve_content('SIMPLE = T') fname = "testing123" filename = str(tmpdir.join(fname)) with open(filename, "w") as fh: fh.write("SIMPLE = T") dl = Downloader() dl.enqueue_file(httpserver.url, filename=filename, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == 1 assert f[0] == filename def test_download_no_overwrite(httpserver, tmpdir): httpserver.serve_content('SIMPLE = T') fname = "testing123" filename = str(tmpdir.join(fname)) with open(filename, "w") as fh: fh.write("Hello world") dl = Downloader() dl.enqueue_file(httpserver.url, filename=filename, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == 1 assert f[0] == filename with open(filename) as fh: # If the contents is the same as when we wrote it, it hasn't been # overwritten assert fh.read() == "Hello world" def test_download_overwrite(httpserver, tmpdir): httpserver.serve_content('SIMPLE = T') fname = "testing123" filename = str(tmpdir.join(fname)) with open(filename, "w") as fh: fh.write("Hello world") dl = Downloader(overwrite=True) dl.enqueue_file(httpserver.url, filename=filename, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == 1 assert f[0] == filename with open(filename) as fh: assert fh.read() == "SIMPLE = T" def test_download_unique(httpserver, tmpdir): httpserver.serve_content('SIMPLE = T') fname = "testing123" filename = str(tmpdir.join(fname)) filenames = [filename, filename + '.fits', filename + '.fits.gz'] dl = Downloader(overwrite='unique') # Write files to both the target filenames. for fn in filenames: with open(fn, "w") as fh: fh.write("Hello world") dl.enqueue_file(httpserver.url, filename=fn, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == len(filenames) for fn in f: assert fn not in filenames assert f"{fname}.1" in fn @pytest.fixture def testserver(request): """ A server that throws a 404 for the second request. """ counter = 0 def simple_app(environ, start_response): """ Simplest possible WSGI application. """ nonlocal counter counter += 1 if counter != 2: status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Disposition', (f'testfile_{counter}'))] start_response(status, response_headers) return [b'Hello world!\n'] else: status = '404' response_headers = [('Content-type', 'text/plain')] start_response(status, response_headers) return "" server = WSGIServer(application=simple_app) server.start() request.addfinalizer(server.stop) return server def test_retrieve_some_content(testserver, tmpdir): """ Test that the downloader handles errors properly. """ tmpdir = str(tmpdir) dl = Downloader() nn = 5 for i in range(nn): dl.enqueue_file(testserver.url, path=tmpdir) f = dl.download() assert len(f) == nn - 1 assert len(f.errors) == 1 def test_no_progress(httpserver, tmpdir, capsys): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T') dl = Downloader(progress=False) dl.enqueue_file(httpserver.url, path=tmpdir) dl.download() # Check that there was not stdout captured = capsys.readouterr().out assert not captured def throwerror(*args, **kwargs): raise ValueError("Out of Cheese.") @patch("parfive.downloader.default_name", throwerror) def test_raises_other_exception(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T') dl = Downloader() dl.enqueue_file(httpserver.url, path=tmpdir) res = dl.download() assert isinstance(res.errors[0].exception, ValueError) def test_token(): t = Token(5) assert "5" in repr(t) assert "5" in str(t) def test_failed_download(): err = FailedDownload("wibble", "bbc.co.uk", "running away") assert "bbc.co.uk" in repr(err) assert "bbc.co.uk" in repr(err) assert "running away" in str(err) assert "running away" in str(err) def test_results(): res = Results() res.append("hello") res.add_error("wibble", "notaurl", "out of cheese") assert "notaurl" in repr(res) assert "hello" in repr(res) assert "out of cheese" in repr(res) def test_notaurl(tmpdir): tmpdir = str(tmpdir) dl = Downloader(progress=False) dl.enqueue_file("http://notaurl.wibble/file", path=tmpdir) f = dl.download() assert len(f.errors) == 1 assert isinstance(f.errors[0].exception, aiohttp.ClientConnectionError) def test_retry(tmpdir, testserver): tmpdir = str(tmpdir) dl = Downloader() nn = 5 for i in range(nn): dl.enqueue_file(testserver.url, path=tmpdir) f = dl.download() assert len(f) == nn - 1 assert len(f.errors) == 1 f2 = dl.retry(f) assert len(f2) == nn assert len(f2.errors) == 0 def test_empty_retry(): f = Results() dl = Downloader() dl.retry(f) @skip_windows @pytest.mark.allow_hosts(True) def test_ftp(tmpdir): tmpdir = str(tmpdir) dl = Downloader() dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2011_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2013_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://notaserver/notafile.fileL", path=tmpdir) f = dl.download() assert len(f) == 1 assert len(f.errors) == 3 @skip_windows @pytest.mark.allow_hosts(True) def test_ftp_pasv_command(tmpdir): tmpdir = str(tmpdir) dl = Downloader() dl.enqueue_file( "ftp://ftp.ngdc.noaa.gov/STP/swpc_products/daily_reports/solar_region_summaries/2002/04/20020414SRS.txt", path=tmpdir, passive_commands=["pasv"]) assert dl.queued_downloads == 1 f = dl.download() assert len(f) == 1 assert len(f.errors) == 0 @skip_windows @pytest.mark.allow_hosts(True) def test_ftp_http(tmpdir, httpserver): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T') dl = Downloader() dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2011_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2013_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://notaserver/notafile.fileL", path=tmpdir) dl.enqueue_file(httpserver.url, path=tmpdir) dl.enqueue_file("http://noaurl.notadomain/noafile", path=tmpdir) assert dl.queued_downloads == 6 f = dl.download() assert len(f) == 2 assert len(f.errors) == 4 def test_default_user_agent(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 dl.download() assert 'User-Agent' in httpserver.requests[0].headers assert httpserver.requests[0].headers[ 'User-Agent'] == f"parfive/{parfive.__version__} aiohttp/{aiohttp.__version__} python/{sys.version[:5]}" def test_custom_user_agent(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader(headers={'User-Agent': 'test value 299792458'}) dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 dl.download() assert 'User-Agent' in httpserver.requests[0].headers assert httpserver.requests[0].headers['User-Agent'] == "test value 299792458" @patch.dict(os.environ, {'HTTP_PROXY': "http_proxy_url", 'HTTPS_PROXY': "https_proxy_url"}) @pytest.mark.parametrize("url,proxy", [('http://test.example.com', 'http_proxy_url'), ('https://test.example.com', 'https_proxy_url')]) def test_proxy_passed_as_kwargs_to_get(tmpdir, url, proxy): with mock.patch( "aiohttp.client.ClientSession._request", new_callable=mock.MagicMock ) as patched: dl = Downloader() dl.enqueue_file(url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 dl.download() assert patched.called, "`ClientSession._request` not called" assert list(patched.call_args) == [('GET', url), {'allow_redirects': True, 'timeout': ClientTimeout(total=0, connect=None, sock_read=90, sock_connect=None), 'proxy': proxy }] @pytest.mark.parametrize("use_aiofiles", [True, False]) def test_enable_aiofiles_constructor(use_aiofiles): dl = Downloader(use_aiofiles=use_aiofiles) assert dl.use_aiofiles == use_aiofiles, f"expected={dl.use_aiofiles}, got={use_aiofiles}" @patch.dict(os.environ, {'PARFIVE_OVERWRITE_ENABLE_AIOFILES': "some_value_to_enable_it"}) @pytest.mark.parametrize("use_aiofiles", [True, False]) def test_enable_aiofiles_env_overwrite_always_enabled(use_aiofiles): dl = Downloader(use_aiofiles=use_aiofiles) assert dl.use_aiofiles is True @pytest.fixture def remove_aiofiles(): parfive.downloader.aiofiles = None yield reload(parfive.downloader) @pytest.mark.parametrize("use_aiofiles", [True, False]) def test_enable_no_aiofiles(remove_aiofiles, use_aiofiles): Downloader.use_aiofiles.fget.cache_clear() dl = Downloader(use_aiofiles=use_aiofiles) assert dl.use_aiofiles is False ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/parfive/tests/test_main.py0000644000175100001710000000157600000000000017344 0ustar00vstsdockerimport pytest from parfive.main import parse_args REQUIRED_ARGUMENTS = ['test_url'] def test_no_url(): with pytest.raises(SystemExit): parse_args(['--overwrite']) def helper(args, name, expected): args = parse_args(REQUIRED_ARGUMENTS + args) assert getattr(args, name) == expected def test_overwrite(): helper(['--overwrite'], 'overwrite', True) helper([], 'overwrite', False) def test_max_conn(): helper(['--max-conn', '10'], 'max_conn', 10) helper([], 'max_conn', 5) def test_no_file_progress(): helper(['--no-file-progress'], 'no_file_progress', True) helper([], 'no_file_progress', False) def test_print_filenames(): helper(['--print-filenames'], 'print_filenames', True) helper([], 'print_filenames', False) def test_directory(): helper(['--directory', '/tmp'], 'directory', '/tmp') helper([], 'directory', './') ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/parfive/tests/test_utils.py0000644000175100001710000000045700000000000017555 0ustar00vstsdockerimport tempfile from parfive.utils import sha256sum def test_sha256sum(): tempfilename = tempfile.mktemp() filehash = "559aead08264d5795d3909718cdd05abd49572e84fe55590eef31a88a08fdffd" with open(tempfilename, 'w') as f: f.write('A') assert sha256sum(tempfilename) == filehash ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/parfive/utils.py0000644000175100001710000001075000000000000015351 0ustar00vstsdockerimport cgi import asyncio import hashlib import pathlib from itertools import count import parfive __all__ = ['run_in_thread', 'Token', 'FailedDownload', 'default_name', 'in_notebook'] def in_notebook(): try: import ipykernel.zmqshell shell = get_ipython() # noqa if isinstance(shell, ipykernel.zmqshell.ZMQInteractiveShell): try: # Newer tqdm import tqdm.notebook return tqdm.notebook.IPY > 0 except ImportError: # Older tqdm try: # Check that we can import the right widget from tqdm import _tqdm_notebook _tqdm_notebook.IntProgress except Exception: return False except Exception: return False return True return False except Exception: return False def default_name(path, resp, url): url_filename = url.split('/')[-1] if resp: cdheader = resp.headers.get("Content-Disposition", None) if cdheader: value, params = cgi.parse_header(cdheader) name = params.get('filename', url_filename) else: name = url_filename else: name = url_filename return pathlib.Path(path) / name def run_in_thread(aio_pool, loop, coro): """ This function returns the asyncio Future after running the loop in a thread. This makes the return value of this function the same as the return of ``loop.run_until_complete``. """ return aio_pool.submit(loop.run_until_complete, coro).result() async def get_ftp_size(client, filepath): """ Given an `aioftp.ClientSession` object get the expected size of the file, return ``None`` if the size can not be determined. """ try: size = await client.stat(filepath) size = size.get("size", None) except Exception: parfive.log.exception("Failed to get size of FTP file") size = None return int(size) if size else size def get_http_size(resp): size = resp.headers.get("content-length", None) return int(size) if size else size def replacement_filename(path): """ Given a path generate a unique filename. """ path = pathlib.Path(path) if not path.exists: return path suffix = ''.join(path.suffixes) for c in count(1): if suffix: name, _ = path.name.split(suffix) else: name = path.name new_name = f"{name}.{c}{suffix}" new_path = path.parent / new_name if not new_path.exists(): return new_path def get_filepath(filepath, overwrite): """ Get the filepath to download to and ensure dir exists. Returns ------- `pathlib.Path`, `bool` """ filepath = pathlib.Path(filepath) if filepath.exists(): if not overwrite: return str(filepath), True if overwrite == 'unique': filepath = replacement_filename(filepath) if not filepath.parent.exists(): filepath.parent.mkdir(parents=True) return filepath, False def sha256sum(filename): """ https://stackoverflow.com/a/44873382 """ h = hashlib.sha256() b = bytearray(128 * 1024) mv = memoryview(b) with open(filename, 'rb', buffering=0) as f: for n in iter(lambda: f.readinto(mv), 0): h.update(mv[:n]) return h.hexdigest() class FailedDownload(Exception): def __init__(self, filepath_partial, url, exception): self.filepath_partial = filepath_partial self.url = url self.exception = exception super().__init__() def __repr__(self): out = super().__repr__() out += f'\n {self.url} {self.exception}' return out def __str__(self): return "Download Failed: {} with error {}".format(self.url, str(self.exception)) class Token: def __init__(self, n): self.n = n def __repr__(self): return super().__repr__() + f"n = {self.n}" def __str__(self): return f"Token {self.n}" class _QueueList(list): """ A list, with an extra method that empties the list and puts it into a `asyncio.Queue`. Creating the queue can only be done inside a running asyncio loop. """ def generate_queue(self, maxsize=0): queue = asyncio.Queue(maxsize=maxsize) for item in self: queue.put_nowait(item) self.clear() return queue ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1641382261.3710868 parfive-1.5.1/parfive.egg-info/0000755000175100001710000000000000000000000015326 5ustar00vstsdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382261.0 parfive-1.5.1/parfive.egg-info/PKG-INFO0000644000175100001710000000714400000000000016431 0ustar00vstsdockerMetadata-Version: 2.1 Name: parfive Version: 1.5.1 Summary: A HTTP and FTP parallel file downloader. Home-page: https://parfive.readthedocs.io/ Author: "Stuart Mumford" Author-email: "stuart@cadair.com" License: MIT Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Requires-Python: >=3.7 Provides-Extra: ftp Provides-Extra: tests Provides-Extra: docs License-File: LICENSE Parfive ======= .. image:: https://img.shields.io/pypi/v/parfive.svg :target: https://pypi.python.org/pypi/parfive :alt: Latest PyPI version .. image:: https://dev.azure.com/DrCadair/parfive/_apis/build/status/Cadair.parfive?repoName=Cadair%2Fparfive&branchName=master :target: https://dev.azure.com/DrCadair/parfive/_build/latest?definitionId=1&repoName=Cadair%2Fparfive&branchName=master :alt: Azure Pipelines Build Status A parallel file downloader using asyncio. parfive can handle downloading multiple files in parallel as well as downloading each file in a number of chunks. Usage ----- parfive works by creating a downloader object, appending files to it and then running the download. parfive has a synchronous API, but uses asyncio to paralellise downloading the files. A simple example is:: from parfive import Downloader dl = Downloader() dl.enqueue_file("http://data.sunpy.org/sample-data/predicted-sunspot-radio-flux.txt", path="./") files = dl.download() Parfive also bundles a CLI. The following example will download the two files concurrently.:: $ parfive 'http://212.183.159.230/5MB.zip' 'http://212.183.159.230/10MB.zip' $ parfive --help usage: parfive [-h] [--max-conn MAX_CONN] [--overwrite] [--no-file-progress] [--directory DIRECTORY] [--print-filenames] URLS [URLS ...] Parfive, the python asyncio based downloader positional arguments: URLS URLs of files to be downloaded. optional arguments: -h, --help show this help message and exit --max-conn MAX_CONN Number of maximum connections. --overwrite Overwrite if the file exists. --no-file-progress Show progress bar for each file. --directory DIRECTORY Directory to which downloaded files are saved. --print-filenames Print successfully downloaded files's names to stdout. Results ^^^^^^^ ``parfive.Downloader.download`` returns a ``parfive.Results`` object, which is a list of the filenames that have been downloaded. It also tracks any files which failed to download. Handling Errors ^^^^^^^^^^^^^^^ If files fail to download, the urls and the response from the server are stored in the ``Results`` object returned by ``parfive.Downloader``. These can be used to inform users about the errors. (Note, the progress bar will finish in an incomplete state if a download fails, i.e. it will show ``4/5 Files Downloaded``). The ``Results`` object is a list with an extra attribute ``errors``, this property returns a list of named tuples, where these named tuples contains the ``.url`` and the ``.response``, which is a ``aiohttp.ClientResponse`` or a ``aiohttp.ClientError`` object. Installation ------------ parfive is available on PyPI, you can install it with pip:: pip install parfive or if you want to use FTP downloads:: pip install parfive[ftp] Requirements ^^^^^^^^^^^^ - Python 3.7 or above - aiohttp - tqdm - aioftp (for downloads over FTP) Licence ------- MIT Licensed Authors ------- `parfive` was written by `Stuart Mumford `__. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382261.0 parfive-1.5.1/parfive.egg-info/SOURCES.txt0000644000175100001710000000132700000000000017215 0ustar00vstsdocker.codecov.yaml .gitignore .pre-commit-config.yaml .readthedocs.yml LICENSE README.rst azure-pipelines.yml pyproject.toml setup.cfg setup.py tox.ini .github/release-drafter.yml .github/workflows/release-drafter.yml docs/Makefile docs/conf.py docs/index.rst docs/robots.txt parfive/__init__.py parfive/_version.py parfive/downloader.py parfive/main.py parfive/results.py parfive/utils.py parfive.egg-info/PKG-INFO parfive.egg-info/SOURCES.txt parfive.egg-info/dependency_links.txt parfive.egg-info/entry_points.txt parfive.egg-info/requires.txt parfive.egg-info/top_level.txt parfive/tests/__init__.py parfive/tests/simple_download_test.ipynb parfive/tests/test_downloader.py parfive/tests/test_main.py parfive/tests/test_utils.py././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382261.0 parfive-1.5.1/parfive.egg-info/dependency_links.txt0000644000175100001710000000000100000000000021374 0ustar00vstsdocker ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382261.0 parfive-1.5.1/parfive.egg-info/entry_points.txt0000644000175100001710000000005700000000000020626 0ustar00vstsdocker[console_scripts] parfive = parfive.main:main ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382261.0 parfive-1.5.1/parfive.egg-info/requires.txt0000644000175100001710000000025300000000000017726 0ustar00vstsdockertqdm aiohttp [docs] sphinx sphinx-automodapi sunpy-sphinx-theme [ftp] aioftp>=0.17.1 [tests] pytest pytest-localserver pytest-asyncio pytest-socket pytest-cov aiofiles ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382261.0 parfive-1.5.1/parfive.egg-info/top_level.txt0000644000175100001710000000001000000000000020047 0ustar00vstsdockerparfive ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/pyproject.toml0000644000175100001710000000015400000000000015114 0ustar00vstsdocker[build-system] requires = ["setuptools", "setuptools_scm", "wheel"] build-backend = 'setuptools.build_meta' ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1641382261.3710868 parfive-1.5.1/setup.cfg0000644000175100001710000000323600000000000014025 0ustar00vstsdocker[metadata] name = parfive description = A HTTP and FTP parallel file downloader. long_description = file: README.rst url = https://parfive.readthedocs.io/ license = MIT author = "Stuart Mumford" author_email = "stuart@cadair.com" classifiers = Programming Language :: Python :: 3 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 [options] python_requires = >=3.7 install_requires = tqdm aiohttp setup_requires = setuptools_scm packages = find: [options.entry_points] console_scripts = parfive = parfive.main:main [options.extras_require] ftp = aioftp>=0.17.1 tests = pytest pytest-localserver pytest-asyncio pytest-socket pytest-cov aiofiles docs = sphinx sphinx-automodapi sunpy-sphinx-theme [flake8] max-line-length = 100 ignore = I100,I101,I102,I103,I104,I201 [tool:pytest] addopts = --allow-hosts=127.0.0.1,::1 [isort] balanced_wrapping = True skip = docs/conf.py default_section = THIRDPARTY include_trailing_comma = True known_first_party = parfive length_sort = False length_sort_sections = stdlib line_length = 110 multi_line_output = 3 no_lines_before = LOCALFOLDER sections = STDLIB, THIRDPARTY, FIRSTPARTY, LOCALFOLDER [coverage:run] omit = parfive/__init__* parfive/_dev/* parfive/*setup* parfive/conftest.py parfive/tests/* parfive/version.py */parfive/__init__* */parfive/_dev/* */parfive/*setup* */parfive/conftest.py */parfive/tests/* */parfive/version.py [coverage:report] exclude_lines = pragma: no cover except ImportError raise AssertionError raise NotImplementedError def main\(.*\): pragma: py{ignore_python_version} [egg_info] tag_build = tag_date = 0 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/setup.py0000644000175100001710000000023700000000000013714 0ustar00vstsdocker#!/usr/bin/env python from setuptools import setup # isort:skip import os setup( use_scm_version={'write_to': os.path.join('parfive', '_version.py')}, ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1641382243.0 parfive-1.5.1/tox.ini0000644000175100001710000000254300000000000013517 0ustar00vstsdocker[tox] envlist = py{37,38,39}{,-conda} codestyle build_docs isolated_build = True [testenv] setenv = PYTEST_COMMAND = pytest -vvv -s -raR --pyargs parfive --cov-report=xml --cov={envsitepackagesdir}/parfive --cov-config={toxinidir}/setup.cfg {toxinidir}/docs extras = ftp tests commands = {env:PYTEST_COMMAND} {posargs} [testenv:build_docs] changedir = docs description = Invoke sphinx-build to build the HTML docs # Be verbose about the extras rather than using dev for clarity extras = ftp docs commands = sphinx-build -j auto --color -W --keep-going -b html -d _build/.doctrees . _build/html {posargs} python -c 'import pathlib; print("Documentation available under file://\{0\}".format(pathlib.Path(r"{toxinidir}") / "docs" / "_build" / "index.html"))' [testenv:codestyle] skip_install = true description = Run all style and file checks with pre-commit deps = pre-commit commands = pre-commit install-hooks pre-commit run --color always --all-files --show-diff-on-failure # This env requires tox-conda. [testenv:conda] extras = deps = conda_deps = aioftp aiohttp pytest-asyncio pytest-cov pytest-localserver pytest-socket pytest-sugar tqdm conda_channels = conda-forge install_command = pip install --no-deps {opts} {packages} commands = conda list {env:PYTEST_COMMAND} {posargs}