pax_global_header00006660000000000000000000000064145264445650014531gustar00rootroot0000000000000052 comment=d6d387a18e71d1c3ff30050ecd3da1e5f6b4c9ff asfsmd-1.4.1/000077500000000000000000000000001452644456500130115ustar00rootroot00000000000000asfsmd-1.4.1/.flake8000066400000000000000000000003121452644456500141600ustar00rootroot00000000000000[flake8] # max_line_length = 79 # max-complexity = 10 extend_ignore = E203,W503 extend_select = W504 per-file-ignores = */*: D105,D107 tests/*: D */tests/*: D statistics = True count = True asfsmd-1.4.1/.github/000077500000000000000000000000001452644456500143515ustar00rootroot00000000000000asfsmd-1.4.1/.github/workflows/000077500000000000000000000000001452644456500164065ustar00rootroot00000000000000asfsmd-1.4.1/.github/workflows/ci.yml000066400000000000000000000055551452644456500175360ustar00rootroot00000000000000name: CI on: push: pull_request: branches: [ "main" ] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: permissions: contents: read jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: submodules: 'true' - name: Set up Python uses: actions/setup-python@v4 with: python-version: "3.x" - name: Install build dependencies run: | python -m pip install --upgrade pip python -m pip install build twine - name: Build package run: | python -m build python -m twine check dist/* - name: Upload build artifacts uses: actions/upload-artifact@v3 with: name: dist path: | dist/*.tar.gz dist/*.whl test: runs-on: ${{ matrix.os }} name: ${{ matrix.os }} ${{ matrix.python-version }} needs: build strategy: matrix: python-version: ["3.7", "3.11"] # aiohttp does not have wheels for 3.12 os: [ubuntu-latest, macos-latest, windows-latest] steps: # Only necessary if the test code is located outside the package - uses: actions/checkout@v4 with: submodules: 'true' - name: Get distribution uses: actions/download-artifact@v3 with: name: dist path: dist - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install shell: bash run: | python -m pip install --upgrade pip python -m pip install dist/*.whl - name: Install test dependencies shell: bash run: | python -m pip install pytest pytest-cov if [ -f requirements-test.txt ]; then pip install -r requirements-test.txt; fi - name: Test run: | python -m pytest --cov=asfsmd --cov-report=html --cov-report=term lint: runs-on: ubuntu-latest env: PACKAGE: asfsmd steps: - uses: actions/checkout@v4 with: submodules: 'true' - name: Set up Python uses: actions/setup-python@v4 with: python-version: "3.x" - name: Install lint dependencies run: | python -m pip install flake8 pydocstyle isort black if [ -f requirements-lint.txt ]; then pip install -r requirements-lint.txt; fi - name: Lint (flake8) run: | python -m flake8 --count --statistics ${PACKAGE} - name: Lint (pydocstyle) run: | python -m pydocstyle --count ${PACKAGE} - name: Lint (isort) run: | python -m isort --check ${PACKAGE} - name: Lint (black) run: | python -m black --check ${PACKAGE} asfsmd-1.4.1/.gitignore000066400000000000000000000034371452644456500150100ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ _venv # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # VSCode .vscode/asfsmd-1.4.1/CHANGELOG.rst000066400000000000000000000036651452644456500150440ustar00rootroot00000000000000asfsmd release history ====================== asfsmd v1.4.1 (19/11/2023) -------------------------- * Disable progress in quiet mode. * Do not try to download products already on disk. * Add masic unit testing. * Enable GHA CI. * Fix dependencies in `pyproject.toml` * Improve typing annotations. * Linting and formatting. * Improved loading of product lists from file (refactoring). * New `--noprogress` CLI option. asfsmd v1.4.0 (05/02/2023) -------------------------- * Support for non SLC products (including RAW). * Move setup configuration to `pyproject.toml`. * Improved formatting to be compatible with the `black` tool. asfsmd v1.3.0 (18/12/2022) -------------------------- * New client based on smart_open_. .. _smart_open: https://github.com/RaRe-Technologies/smart_open asfsmd v1.2.0 (04/12/2022) -------------------------- * Refactoring to convert the `asfsmd.py` module into a package. * Support multiple backends for remote file access: httpio_, fsspec_ and remotezip_. The httpio based implementation seems to be slightly faster w.r.t. the other ones. * Fix issue with the management of default values for the `make_patterns` function. * Improve the management of the download of large files (chunking and progress). .. _httpio: https://github.com/barneygale/httpio .. _fsspec: https://github.com/fsspec/filesystem_spec .. _remotezip: https://github.com/gtsystem/python-remotezip asfsmd v1.1.0 (03/12/2022) -------------------------- * Now it is possible to customize the selection of files to be downloaded. Beyond the manifest and the annotation files, now it is also possible to download: * calibration annotations * noise annotations * rfi annotations * measurement files Moreover now it is possible to select a specific beams or polarizations. Patch developed by @scottstanie and @avalentino. * Restore compatibility with Python 3.6. asfsmd v1.0.0 (09/01/2022) -------------------------- Initial release. asfsmd-1.4.1/LICENSE000066400000000000000000000020671452644456500140230ustar00rootroot00000000000000MIT License Copyright (c) 2021-2023 Antonio Valentino Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. asfsmd-1.4.1/MANIFEST.in000066400000000000000000000000251452644456500145440ustar00rootroot00000000000000include CHANGELOG.rstasfsmd-1.4.1/README.rst000066400000000000000000000124671452644456500145120ustar00rootroot00000000000000ASF Sentinel-1 Metadata Download tool ===================================== Small Python tool (`asfsmd`) that allows to download XML files containing Sentinel-1 products metadata from the ASF archive. Sentinel-1 products are stored in the ASF arcive as ZIP files that are quite large because they contain both the products annotations and the binary image data. The `asfsmd` tool is able to retrieve only the relatively samll annotation files (in XML format) without downloading the entire ZIP archive. `asfsmd` exploits Python packages like `fsspec` or `httpio` for reading HTTP resources as random-access file-like objects. In order to do it the remote server must support the `Range` header. This approach allows to open the ZIP archive remotely, inspects contents, and download only the pieces of data that are actually necessary to the user. Performnces of this approach are quite poor but, in the specific case of Sentinel-1 products, the entire process results to be faster than downloading the entire ZIP archive and extracting only annotation files. Command Line Interface ---------------------- :: $ python3 -m asfsmd --help usage: asfsmd [-h] [--version] [--loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [-q] [-v] [-d] [-f] [--urls] [-o OUTDIR] [-u USERNAME] [-p PASSWORD] [--block-size BLOCK_SIZE] [-b {s1,s2,s3,s4,s5,s6,iw1,iw2,iw3,ew1,ew2,ew3,ew4,ew5,wv1,wv2}] [--pol {vv,vh,hv,hh}] [-c] [-n] [-r] [--data] INPUT [INPUT ...] ASF Sentinel-1 Metadata Download tool. Small Python tool (`asfsmd`) that allows to download XML files containing Sentinel-1 products metadata from the ASF archive. Sentinel-1 products are stored in the ASF arcive as ZIP files that are quite large because they comntain both the products annotations and the binary image data. The `asfsmd` tool is able to retrieve only the relatively small annotation files (in XML format) without downloading the entire ZIP archive. positional arguments: INPUT Sentinel-1 product name(s). If the '-f' flag is set then the argument is interpreted as the filename containing the list of products. If the '--urls' flag is set then the arguments are interpreted as URLs pointing to product on the ASF server. See '--file--list' and the '--urls' options for more details. options: -h, --help show this help message and exit --version show program's version number and exit --loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL} logging level (default: WARNING) -q, --quiet suppress standard output messages, only errors are printed to screen -v, --verbose print verbose output messages -d, --debug print debug messages -f, --file-list read the list of products from a file. The file can be a JSON file (with '.json' extension) or a text file.The text file is expected to contain one product name per line.The json file can contain a list of products or a dictionary containing a list of products for each key.In this case the key is used as sub-folder name to store the corresponding products.Example: // --urls Indicate the inputs are a list of URLs from ASF. -o OUTDIR, --outdir OUTDIR path of the output directory (default='.') -u USERNAME, --username USERNAME username for ASF authentication. If not provided the tool attempts to retrieve the authentication parameters for the user's '.netrc' file looking for the host 'urs.earthdata.nasa.gov' -p PASSWORD, --password PASSWORD password for ASF authentication. If not provided the tool attempts to retrieve the authentication parameters for the user's '.netrc' file looking for the host 'urs.earthdata.nasa.gov' --block-size BLOCK_SIZE httpio block size in bytes (default: 1024) -b {s1,s2,s3,s4,s5,s6,iw1,iw2,iw3,ew1,ew2,ew3,ew4,ew5,wv1,wv2}, --beam {s1,s2,s3,s4,s5,s6,iw1,iw2,iw3,ew1,ew2,ew3,ew4,ew5,wv1,wv2} Choose only one beam to download. If not provided all beams are downloaded. --pol {vv,vh,hv,hh} Choose only one polarization to download. If not provided both polarizations are downloaded. -c, --cal Download calibration files. -n, --noise Download noise calibration files. -r, --rfi Download RFI files. --data Download measurement files. License ------- Copyright (c) 2021-2023 Antonio Valentino The `asfsmd` package is distributed under the MIT License. asfsmd-1.4.1/asfsmd/000077500000000000000000000000001452644456500142665ustar00rootroot00000000000000asfsmd-1.4.1/asfsmd/__init__.py000066400000000000000000000011511452644456500163750ustar00rootroot00000000000000"""ASF Sentinel-1 Metadata Download tool. Small Python tool (`asfsmd`) that allows to download XML files containing Sentinel-1 products metadata from the ASF archive. Sentinel-1 products are stored in the ASF archive as ZIP files that are quite large because they contain both the products annotations and the binary image data. The `asfsmd` tool is able to retrieve only the relatively small annotation files (in XML format) without downloading the entire ZIP archive. """ from .core import ( # noqa: F401 download_annotations, download_components_from_urls, make_patterns, ) __version__ = "1.4.1" asfsmd-1.4.1/asfsmd/__main__.py000066400000000000000000000001711452644456500163570ustar00rootroot00000000000000"""Main ently point for the asfsmd CLI.""" # PYTHON_ARGCOMPLETE_OK import sys from .cli import main sys.exit(main()) asfsmd-1.4.1/asfsmd/_utils.py000066400000000000000000000024241452644456500161410ustar00rootroot00000000000000"""Utility functions for asfsmd.""" import json import pathlib import collections from typing import Any, Dict, Iterable, List from .common import PathType def unique(data: Iterable[Any]) -> List[Any]: """Return a list of unique items preserving the input ordering.""" unique_items = [] unique_items_set = set() for item in data: if item not in unique_items_set: unique_items.append(item) unique_items_set.add(item) return unique_items def load_product_lists(*filenames: PathType) -> Dict[str, List[str]]: """Load product list form files.""" data: Dict[str, List[str]] = collections.defaultdict(list) for filename in filenames: filename = pathlib.Path(filename) if filename.suffix == ".json": data.update(json.loads(filename.read_text())) else: with filename.open() as fd: for line in fd: line = line.strip() if not line or line.startswith("#"): continue data[""].append(line) # Strip .zip or .SAFE extensions return { key: unique( item.replace(".zip", "").replace(".SAFE", "") for item in values ) for key, values in data.items() } asfsmd-1.4.1/asfsmd/cli.py000066400000000000000000000223761452644456500154210ustar00rootroot00000000000000"""Command Line Interface (CLI) for the ASF S1 Metadata Download tool.""" # PYTHON_ARGCOMPLETE_OK import logging import pathlib import argparse import collections from typing import Dict, Iterable, List, Optional import tqdm from . import __version__ from . import __doc__ as _pkg_doc from .core import ( download_annotations, download_components_from_urls, make_patterns, _get_auth, ) from ._utils import unique, load_product_lists from .common import BLOCKSIZE, MB try: from os import EX_OK except ImportError: EX_OK = 0 EX_FAILURE = 1 EX_INTERRUPT = 130 LOGFMT = "%(asctime)s %(levelname)-8s -- %(message)s" def asfsmd_cli( inputs: Iterable[str], beam: Optional[str] = "*", pol: Optional[str] = "??", cal: bool = False, noise: bool = False, rfi: bool = False, data: bool = False, outdir: str = ".", urls: bool = False, file_list: bool = False, block_size: int = BLOCKSIZE, noprogress: bool = False, username: Optional[str] = None, password: Optional[str] = None, ): """High level function for ASF S1 Metadata Download.""" auth = _get_auth(user=username, pwd=password) outroot = pathlib.Path(outdir) patterns = make_patterns( beam=beam, pol=pol, cal=cal, noise=noise, rfi=rfi, data=data, ) if urls: download_components_from_urls( inputs, patterns=patterns, outdir=outroot, auth=auth, block_size=block_size, noprogress=noprogress, ) else: products_tree: Dict[str, List[str]] = collections.defaultdict(list) if file_list: products_tree = load_product_lists(*inputs) else: # Ignore if user passed files with .zip or .SAFE extensions products_tree[""].extend( unique( p.replace(".zip", "").replace(".SAFE", "") for p in inputs ) ) items = pbar = tqdm.tqdm(products_tree.items(), disable=noprogress) for folder, products in items: pbar.set_description(folder if folder else "DOWNLOAD") outpath = outroot / folder download_annotations( products, outdir=outpath, auth=auth, patterns=patterns, block_size=block_size, noprogress=noprogress, ) return EX_OK def _autocomplete(parser): try: import argcomplete except ImportError: pass else: argcomplete.autocomplete(parser) def _set_logging_control_args(parser, default_loglevel="WARNING"): """Set up command line options for logging control.""" loglevels = [logging.getLevelName(level) for level in range(10, 60, 10)] parser.add_argument( "--loglevel", default=default_loglevel, choices=loglevels, help="logging level (default: %(default)s)", ) parser.add_argument( "-q", "--quiet", dest="loglevel", action="store_const", const="ERROR", help="suppress standard output messages, " "only errors are printed to screen", ) parser.add_argument( "-v", "--verbose", dest="loglevel", action="store_const", const="INFO", help="print verbose output messages", ) parser.add_argument( "-d", "--debug", dest="loglevel", action="store_const", const="DEBUG", help="print debug messages", ) return parser def _get_parser(subparsers=None): """Instantiate the command line argument (sub-)parser.""" name = __package__ synopsis = __doc__.splitlines()[0] doc = _pkg_doc if subparsers is None: parser = argparse.ArgumentParser(prog=name, description=doc) parser.add_argument( "--version", action="version", version="%(prog)s v" + __version__ ) else: parser = subparsers.add_parser(name, description=doc, help=synopsis) # parser.set_defaults(func=info) parser = _set_logging_control_args(parser) # Command line options parser.add_argument( "-f", "--file-list", action="store_true", help="read the list of products from a file. " "The file can be a JSON file (with '.json' extension) or a text file." "The text file is expected to contain one product name per line." "The json file can contain a list of products or a dictionary " "containing a list of products for each key." "In this case the key is used as sub-folder name to store the " "corresponding products." "Example: //", ) parser.add_argument( "--urls", action="store_true", help="Indicate the inputs are a list of URLs from ASF.", ) parser.add_argument( "-o", "--outdir", default=".", help="path of the output directory (default='%(default)s')", ) parser.add_argument( "-u", "--username", help="username for ASF authentication. " "If not provided the tool attempts to retrieve the " "authentication parameters for the user's '.netrc' file looking " "for the host 'urs.earthdata.nasa.gov'", ) parser.add_argument( "-p", "--password", help="password for ASF authentication. " "If not provided the tool attempts to retrieve the " "authentication parameters for the user's '.netrc' file looking " "for the host 'urs.earthdata.nasa.gov'", ) parser.add_argument( "--block-size", type=int, default=BLOCKSIZE // MB, help="httpio block size in MB (default: %(default)d)", ) # Optional filters parser.add_argument( "-b", "--beam", choices=[ "s1", "s2", "s3", "s4", "s5", "s6", "iw1", "iw2", "iw3", "ew1", "ew2", "ew3", "ew4", "ew5", "wv1", "wv2", ], type=str.lower, help="Choose only one beam to download. " "If not provided all beams are downloaded.", ) parser.add_argument( "--pol", choices=["vv", "vh", "hv", "hh"], type=str.lower, help="Choose only one polarization to download. " "If not provided both polarizations are downloaded.", ) # Additional file downloads parser.add_argument( "-c", "--cal", action="store_true", help="Download calibration files." ) parser.add_argument( "-n", "--noise", action="store_true", help="Download noise calibration files.", ) parser.add_argument( "-r", "--rfi", action="store_true", help="Download RFI files." ) parser.add_argument( "--data", action="store_true", help="Download measurement files." ) parser.add_argument( "--noprogress", action="store_true", help="Disable teh progress bar." ) # Positional arguments parser.add_argument( "inputs", nargs="+", metavar="INPUT", help="Sentinel-1 product name(s). " "If the '-f' flag is set then the argument is interpreted as " "the filename containing the list of products. " "If the '--urls' flag is set then the arguments are interpreted as " "URLs pointing to product on the ASF server. " "See '--file-list' and the '--urls' options for more details.", ) if subparsers is None: _autocomplete(parser) return parser def _parse_args(args=None, namespace=None, parser=None): """Parse command line arguments.""" if parser is None: parser = _get_parser() args = parser.parse_args(args, namespace) # Common pre-processing of parsed arguments and consistency checks # ... return args def main(*argv): """Implement the main CLI interface.""" # setup logging logging.basicConfig(format=LOGFMT, level=logging.INFO) # stream=sys.stdout logging.captureWarnings(True) _log = logging.getLogger(__name__) # parse cmd line arguments args = _parse_args(argv if argv else None) # execute main tasks exit_code = EX_OK try: logging.getLogger().setLevel(args.loglevel) exit_code = asfsmd_cli( inputs=args.inputs, beam=args.beam, pol=args.pol, cal=args.cal, noise=args.noise, rfi=args.rfi, data=args.data, outdir=args.outdir, urls=args.urls, file_list=args.file_list, block_size=args.block_size * MB, noprogress=args.noprogress, username=args.username, password=args.password, ) except Exception as exc: _log.critical( "unexpected exception caught: {!r} {}".format( type(exc).__name__, exc ) ) _log.debug("stacktrace:", exc_info=True) exit_code = EX_FAILURE except KeyboardInterrupt: _log.warning("Keyboard interrupt received: exit the program") exit_code = EX_INTERRUPT return exit_code asfsmd-1.4.1/asfsmd/common.py000066400000000000000000000013621452644456500161320ustar00rootroot00000000000000"""Common constants and types.""" import os import abc from typing import NamedTuple, Union MB = 1024 * 1024 BLOCKSIZE = 16 * MB # 16MB (64MB is a better choice to download data) # @COMPATIBILITY: requires Python >= 3.9 # PathType = Union[str, os.PathLike[str]] PathType = Union[str, os.PathLike] Url = str class Auth(NamedTuple): """Authentication parameters.""" user: str pwd: str class AbstractClient(abc.ABC): """Base asfsmd client class.""" def __enter__(self): # noqa: D105 return self def __exit__(self, exc_type, exc_value, traceback): # noqa: D105 pass @abc.abstractmethod def open_zip_archive(self, url: Url): """Context manager for the remote zip archive.""" pass asfsmd-1.4.1/asfsmd/core.py000066400000000000000000000217571452644456500156040ustar00rootroot00000000000000"""Core functions for the ASF Sentinel-1 Metadata Download tool.""" import os import netrc import fnmatch import hashlib import logging import pathlib import zipfile import warnings import functools import importlib from typing import List, Optional from xml.etree import ElementTree as etree # noqa: N813 from urllib.parse import urlparse import tqdm import asf_search as asf from .common import Auth, BLOCKSIZE, PathType, Url __all__ = [ "download_annotations", "download_components_from_urls", "make_patterns", ] _log = logging.getLogger(__name__) def _get_client_type(): implementations = ["httpio", "fsspec", "remotezip"] if os.environ.get("ASFSMD_CLIENT") in implementations: name = os.environ.get("ASFSMD_CLIENT") name = f".{name}_client" mod = importlib.import_module(name, package=__package__) else: for name in implementations: name = f".{name}_client" try: mod = importlib.import_module(name, package=__package__) break except ImportError: _log.debug("exception caught:", exc_info=True) pass else: raise ImportError( f"Unable to import any of the asfsmd client implementations. " f"At least one of the following modules is required: " f"{','.join(map(repr, implementations))}" ) _log.debug(f"Client: {mod.Client}") return mod.Client _ClientType = _get_client_type() def query(products): """Query the specified Sentinel-1 products.""" if isinstance(products, str): products = [products] results = asf.granule_search(products) results = [ result for result in results if "METADATA" not in result.properties["processingLevel"] ] return results def make_patterns( beam: Optional[str] = "*", pol: Optional[str] = "??", cal: bool = False, noise: bool = False, rfi: bool = False, data: bool = False, ) -> List[str]: """Generate a list of patterns according to the specified options. Patterns are used to match components in the ZIP archive of the Sentinel-1 products. """ beam = "*" if beam is None else beam pol = "??" if pol is None else pol patterns = [ "S1*.SAFE/manifest.safe", ] head = "S1*.SAFE/annotation" tail = f"s1?-{beam}-???-{pol}-*.xml" patterns.append(f"{head}/{tail}") if cal: patterns.append(f"{head}/calibration/calibration-{tail}") if noise: patterns.append(f"{head}/calibration/noise-{tail}") if rfi: patterns.append(f"{head}/rfi/rfi-{tail}") if data: patterns.append(f"S1*.SAFE/measurement/s1?-{beam}-???-{pol}-*.tiff") patterns.append(f"S1*.SAFE/s1?-{beam}-???-?-{pol}-*.dat") return patterns def _is_product_complete( path: pathlib.Path, patterns: Optional[List[str]] = None, block_size: Optional[int] = BLOCKSIZE, ) -> bool: if not path.is_dir(): return False manifest_path = path / "manifest.safe" if not manifest_path.is_file(): return False xmldoc = etree.parse(os.fspath(manifest_path)) for elem in xmldoc.iterfind("./dataObjectSection/dataObject/byteStream"): relative_component_path = elem.find("fileLocation").attrib["href"] relative_component_path = pathlib.Path(relative_component_path) relative_component_path = relative_component_path.relative_to(".") patterns = make_patterns() if not patterns else patterns component_path = path.name / relative_component_path for pattern in patterns: if component_path.match(pattern): # pattern matches: exit the current loop and continue # in the current main iteration break else: # skip the rest and go to the next main iteration continue component_path = path / relative_component_path if not component_path.is_file(): return False size = int(elem.attrib["size"]) if component_path.stat().st_size != size: return False checksum_elem = elem.find("checksum") checksum_type = checksum_elem.attrib["checksumName"] if checksum_type.upper() != "MD5": _log.warning("unexpected checksum type: %s", checksum_type) return False # cannot check if the file is complete md5 = hashlib.md5() with path.joinpath(relative_component_path).open("rb") as fd: for data in iter(functools.partial(fd.read, block_size), b""): md5.update(data) if md5.hexdigest() != checksum_elem.text: return False return True def _filter_components( zf: zipfile.ZipFile, patterns: List[str], ) -> List[zipfile.ZipInfo]: components = [] for info in zf.filelist: for pattern in patterns: if fnmatch.fnmatch(info.filename, pattern): components.append(info) break return components def _download( zf: zipfile.ZipFile, info: zipfile.ZipInfo, outfile: PathType, block_size: int = BLOCKSIZE, noprogress: bool = False, ): size = info.file_size with tqdm.tqdm( total=size, leave=False, unit_scale=True, unit="B", disable=noprogress ) as pbar: with zf.open(info) as src, open(outfile, "wb") as dst: for data in iter(functools.partial(src.read, block_size), b""): dst.write(data) pbar.update(len(data)) def download_components_from_urls( urls, *, patterns: Optional[List[str]] = None, outdir: PathType = ".", auth: Optional[Auth] = None, block_size: int = BLOCKSIZE, noprogress: bool = False, ): """Download Sentinel-1 annotation for the specified product urls.""" outdir = pathlib.Path(outdir) if patterns is None: patterns = make_patterns() with _ClientType(auth=auth, block_size=block_size) as client: url_iter = tqdm.tqdm(urls, unit=" products", disable=noprogress) for url in url_iter: url_iter.set_description(url) product_out_path = outdir / pathlib.Path(urlparse(url).path).name product_out_path = product_out_path.with_suffix(".SAFE") product_name = product_out_path.stem if _is_product_complete(product_out_path, patterns, block_size): _log.debug("product already on disk: %r", product_name) continue else: _log.debug("download: %r", product_name) with client.open_zip_archive(url) as zf: _log.debug("%s open", url) components = _filter_components(zf, patterns) component_iter = tqdm.tqdm( components, unit="files", leave=False, disable=noprogress ) for info in component_iter: filename = pathlib.Path(info.filename) component_iter.set_description(filename.name) targetdir = outdir / filename.parent outfile = targetdir / filename.name _log.debug("targetdir = %r", targetdir) _log.debug("outfile = %r", outfile) targetdir.mkdir(exist_ok=True, parents=True) if outfile.exists(): _log.debug("outfile = %r exists", outfile) else: _download( zf, info, outfile, block_size=block_size, noprogress=noprogress, ) _log.debug("%r extracted", info.filename) def download_annotations( products: List[str], *, patterns: Optional[List[str]] = None, outdir: PathType = ".", auth: Optional[Auth] = None, block_size: Optional[int] = BLOCKSIZE, noprogress: bool = False, ): """Download annotations for the specified Sentinel-1 products.""" results = query(products) if len(results) != len(products): warnings.warn( f"only {len(results)} of the {len(products)} requested products " f"found on the remote server" ) urls = [item.properties["url"] for item in results] download_components_from_urls( urls, patterns=patterns, outdir=outdir, auth=auth, block_size=block_size if block_size is not None else BLOCKSIZE, noprogress=noprogress, ) def _get_auth( user: Optional[str] = None, pwd: Optional[str] = None, hostname: Url = "urs.earthdata.nasa.gov", ) -> Auth: if user is not None and pwd is not None: return Auth(user, pwd) elif user is None and pwd is None: db = netrc.netrc() user, _, pwd = db.authenticators(hostname) return Auth(user, pwd) else: raise ValueError( "Both username and password must be provided to authenticate." ) asfsmd-1.4.1/asfsmd/fsspec_client.py000066400000000000000000000017251452644456500174660ustar00rootroot00000000000000"""Asfsmd client based on fsspec.""" import zipfile import contextlib from typing import Iterator, Optional import fsspec import aiohttp from .common import AbstractClient, Auth, Url class FsspacClient(AbstractClient): """Fsspec based asfsmd client.""" def __init__(self, auth: Auth, block_size: Optional[int] = None): """Initialize the fsspec based client.""" client_kwargs = None if auth is not None: user, pwd = auth client_kwargs = {"auth": aiohttp.BasicAuth(user, pwd)} self._fs = fsspec.filesystem( "http", block_size=block_size, client_kwargs=client_kwargs, ) @contextlib.contextmanager def open_zip_archive(self, url: Url) -> Iterator[zipfile.ZipFile]: """Context manager for the remote zip archive.""" with self._fs.open(url, "rb") as fd: with zipfile.ZipFile(fd) as zf: yield zf Client = FsspacClient asfsmd-1.4.1/asfsmd/httpio_client.py000066400000000000000000000044631452644456500175140ustar00rootroot00000000000000"""Asfsmd client based on httpio and requests.""" import zipfile import contextlib from typing import IO, Iterator, Optional import httpio import requests from .common import AbstractClient, Auth, BLOCKSIZE, Url class HttpIOFile(httpio.SyncHTTPIOFile): """Class to represent an file-like object accessed via HTTP.""" def open(self, session: Optional[requests.Session] = None): # noqa: A003 """Open the remote file.""" self._assert_not_closed() if not self._closing and self._session is None: self._session: requests.Session self._session = requests.Session() if session is None else session response = self._session.get(self.url, stream=True, **self._kwargs) with response: response.raise_for_status() try: self.length = int(response.headers["Content-Length"]) except KeyError: raise httpio.HTTPIOError( "Server does not report content length" ) accept_ranges = response.headers.get("Accept-Ranges", "") if accept_ranges.lower() != "bytes": raise httpio.HTTPIOError( "Server does not accept 'Range' headers" ) return self class HttpIOClient(AbstractClient): """HttpIO based asfsmd client.""" def __init__(self, auth: Auth, block_size: int = BLOCKSIZE): """Initialize the httpio based client.""" self._session = requests.Session() self._session.auth = auth self._block_size = block_size def __exit__(self, exc_type, exc_value, traceback): # noqa: D105 self._session.close() def open(self, url: Url, mode: str = "rb") -> IO[bytes]: # noqa: A003 """Open a remote file.""" if mode != "rb": raise ValueError("invalid mode: {mode!r}") remote_file = HttpIOFile(url, block_size=self._block_size) return remote_file.open(session=self._session) @contextlib.contextmanager def open_zip_archive(self, url: Url) -> Iterator[zipfile.ZipFile]: """Context manager for the remote zip archive.""" with self.open(url) as fd: with zipfile.ZipFile(fd) as zf: yield zf Client = HttpIOClient asfsmd-1.4.1/asfsmd/remotezip_client.py000066400000000000000000000015031452644456500202130ustar00rootroot00000000000000"""Asfsmd client based on remotezip.""" import zipfile import requests import remotezip from .common import AbstractClient, Auth, BLOCKSIZE, Url class RemoteZipCLient(AbstractClient): """RemoteZip based asfsmd client.""" def __init__(self, auth: Auth, block_size: int = BLOCKSIZE): """Initialize the remotezip based client.""" self._session = requests.Session() self._session.auth = auth self._block_size = block_size def __exit__(self, exc_type, exc_value, traceback): # noqa: D105 self._session.close() def open_zip_archive(self, url: Url) -> zipfile.ZipFile: """Context manager for the remote zip archive.""" return remotezip.RemoteZip( url, session=self._session, initial_buffer_size=self._block_size ) Client = RemoteZipCLient asfsmd-1.4.1/asfsmd/smart_open_client.py000066400000000000000000000020301452644456500203400ustar00rootroot00000000000000"""Asfsmd client based on smart-open.""" import zipfile import contextlib from typing import Any, Dict, Iterator, Optional import smart_open from .common import AbstractClient, Auth, Url class SmartOpenClient(AbstractClient): """SmartOpen based asfsmd client.""" def __init__(self, auth: Auth, block_size: Optional[int] = None): """Initialize the smartopen based client.""" client_kwargs: Dict[str, Any] = {} if auth is not None: client_kwargs["user"] = auth.user client_kwargs["password"] = auth.pwd if block_size is not None: client_kwargs["buffer_size"] = block_size self.client_kwargs = client_kwargs if client_kwargs else None @contextlib.contextmanager def open_zip_archive(self, url: Url) -> Iterator[zipfile.ZipFile]: """Context manager for the remote zip archive.""" with smart_open.open(url, "rb", **self.client_kwargs) as fd: with zipfile.ZipFile(fd) as zf: yield zf Client = SmartOpenClient asfsmd-1.4.1/pyproject.toml000066400000000000000000000033751452644456500157350ustar00rootroot00000000000000[build-system] requires = ["setuptools>=61.0.0"] build-backend = "setuptools.build_meta" [project] name = "asfsmd" authors = [ {name = "Antonio Valentino", email = "antonio.valentino@tiscali.it"}, ] description = "ASF Sentinel-1 Metadata Download tool" readme = {file = "README.rst", content-type = "text/x-rst"} requires-python = ">=3.7" keywords = ["satellite", "download"] license = {text = "MIT License"} classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", "Topic :: Scientific/Engineering", "Topic :: Utilities", ] dependencies = [ "fsspec", "aiohttp", "asf-search", "asf-search < 6; python_version<'3.8'", "tqdm", ] dynamic = ["version"] [project.optional-dependencies] cli = ["argcomplete"] httpio = ["httpio", "requests"] remotezip = ["remotezip"] smartopen = ["smart_open"] all = [ "asfsmd[cli]", "asfsmd[httpio]", "asfsmd[remotezip]", "asfsmd[smartopen]", ] [project.scripts] asfsmd = "asfsmd.cli:main" [project.urls] Homepage = "https://github.com/avalentino/asfsmd" Documentation = "https://github.com/avalentino/asfsmd/blob/main/README.rst" Repository = "https://github.com/avalentino/asfsmd.git" Changelog = "https://github.com/avalentino/asfsmd/blob/main/CHANGELOG.rst" [tool.setuptools] packages = ["asfsmd"] zip-safe = true # license-files = ["LICENSE"] [tool.setuptools.dynamic] version = {attr = "asfsmd.__version__"} [tool.black] line-length = 79 # target-version = ['py311'] [tool.isort] profile = "black" length_sort = true no_inline_sort = true include_trailing_comma = true use_parentheses = true line_length = 79 asfsmd-1.4.1/tests/000077500000000000000000000000001452644456500141535ustar00rootroot00000000000000asfsmd-1.4.1/tests/test__utils.py000066400000000000000000000127651452644456500170760ustar00rootroot00000000000000"""Unit tests for the `asfsmd._utils` module.""" import itertools import pytest from asfsmd._utils import unique, load_product_lists @pytest.mark.parametrize( ["in_", "out"], [ pytest.param(["a", "b", "c"], ["a", "b", "c"], id="unique-list"), pytest.param(["a", "b", "c", "b"], ["a", "b", "c"], id="list"), pytest.param((1, 2, 2, 3, 1, 2), [1, 2, 3], id="tuple"), pytest.param(range(3), [0, 1, 2], id="generator"), pytest.param( itertools.chain(range(3, 0, -1), range(3)), [3, 2, 1, 0], id="reversed-generator", ), ], ) def test_unique(in_, out): assert unique(in_) == out @pytest.mark.parametrize( ["idata", "odata"], [ pytest.param( """\ { "": [ "filelist01.txt", "filelist02.txt" ], "a": [ "a01.txt", "a02.txt" ], "b/1": [ "b01.txt", "b02.txt" ] } """, { "": ["filelist01.txt", "filelist02.txt"], "a": ["a01.txt", "a02.txt"], "b/1": ["b01.txt", "b02.txt"], }, id="unique", ), pytest.param( """\ { "": [ "filelist01.txt", "filelist02.txt", "a02.txt" ], "a": [ "a01.txt", "a02.txt" ], "b/1": [ "b01.txt", "b02.txt" ] } """, { "": ["filelist01.txt", "filelist02.txt", "a02.txt"], "a": ["a01.txt", "a02.txt"], "b/1": ["b01.txt", "b02.txt"], }, id="unique-per-section", ), pytest.param( """\ { "": [ "filelist01.txt", "filelist02.txt", "filelist01.txt" ], "a": [ "a01.txt", "a02.txt", "a02.txt" ], "b/1": [ "b01.txt", "b02.txt" ] } """, { "": ["filelist01.txt", "filelist02.txt"], "a": ["a01.txt", "a02.txt"], "b/1": ["b01.txt", "b02.txt"], }, id="duplicate", ), ], ) def test__load_product_lists_json(idata, odata, tmp_path): jsonfile = tmp_path / "productlist.json" jsonfile.write_text(idata) data = load_product_lists(jsonfile) assert data == odata @pytest.mark.parametrize( ["idata", "odata"], [ pytest.param( """\ filelist01.txt filelist02.txt filelist03.txt """, { "": ["filelist01.txt", "filelist02.txt", "filelist03.txt"], }, id="unique", ), pytest.param( """\ # comment line filelist01.txt filelist02.txt # indented comment line filelist03.txt """, { "": ["filelist01.txt", "filelist02.txt", "filelist03.txt"], }, id="unique-with-comment", ), pytest.param( # NOTE: filename01.txt has trailing spaces ( "filelist01.txt \n" "\n" "filelist02.txt\n" " \n" " filelist03.txt \n" ), { "": ["filelist01.txt", "filelist02.txt", "filelist03.txt"], }, id="unique-with-emply-line", ), pytest.param( """\ filelist01.txt filelist02.txt filelist03.txt filelist03.txt """, { "": ["filelist01.txt", "filelist02.txt", "filelist03.txt"], }, id="duplicate", ), pytest.param( """\ # comment filelist01.txt filelist02.txt # duplicates filelist03.txt filelist03.txt """, { "": ["filelist01.txt", "filelist02.txt", "filelist03.txt"], }, id="duplicate-with-comments-and-empty-lines", ), ], ) def test__load_product_lists_text(idata, odata, tmp_path): textfile = tmp_path / "productlist.txt" textfile.write_text(idata) data = load_product_lists(textfile) assert data == odata @pytest.mark.parametrize( ["jsondata", "textdata", "odata"], [ pytest.param( """\ { "a": [ "a01.txt", "a02.txt" ], "b/1": [ "b01.txt", "b02.txt" ] } """, """\ filelist01.txt filelist02.txt """, { "": [ "filelist01.txt", "filelist02.txt", ], "a": [ "a01.txt", "a02.txt", ], "b/1": [ "b01.txt", "b02.txt", ], }, id="unique", ), pytest.param( """\ { "": [ "filelist01.txt", "filelist02.txt" ], "a": [ "a01.txt", "a02.txt" ] } """, """\ filelist01.txt filelist02.txt filelist03.txt """, { "": [ "filelist01.txt", "filelist02.txt", "filelist03.txt", ], "a": [ "a01.txt", "a02.txt", ], }, id="duplicate", ), ], ) def test__load_product_lists_multifile(jsondata, textdata, odata, tmp_path): jsonfile = tmp_path / "jsonfile.json" jsonfile.write_text(jsondata) textfile = tmp_path / "textfile.txt" textfile.write_text(textdata) data = load_product_lists(jsonfile, textfile) assert data == odata asfsmd-1.4.1/tests/test_cli.py000066400000000000000000000040131452644456500163310ustar00rootroot00000000000000"""Unit tests for the asfsmd.cli module.""" import pathlib from unittest import mock import asfsmd.core from asfsmd.cli import asfsmd_cli dummy_auth = asfsmd.core.Auth("user", "password") @mock.patch("asfsmd.cli._get_auth", mock.Mock(return_value=dummy_auth)) @mock.patch("asfsmd.cli.download_components_from_urls", mock.Mock()) @mock.patch("asfsmd.cli.download_annotations") def test_asfsmd_cli_productlist(download_annotations): product_list = ["product01", "product02"] asfsmd_cli(product_list, noprogress=True) download_annotations.assert_called_once_with( product_list, outdir=pathlib.Path("."), auth=dummy_auth, patterns=asfsmd.core.make_patterns(), block_size=asfsmd.core.BLOCKSIZE, noprogress=True, ) @mock.patch("asfsmd.cli._get_auth", mock.Mock(return_value=dummy_auth)) @mock.patch("asfsmd.cli.download_components_from_urls", mock.Mock()) @mock.patch("asfsmd.cli.download_annotations") def test_asfsmd_cli_filelist(download_annotations, tmp_path): product_list = ["product01", "product02"] filelist = tmp_path.joinpath("filelist.txt") filelist.write_text("\n".join(product_list)) asfsmd_cli([filelist], file_list=True, noprogress=True) download_annotations.assert_called_once_with( product_list, outdir=pathlib.Path("."), auth=dummy_auth, patterns=asfsmd.core.make_patterns(), block_size=asfsmd.core.BLOCKSIZE, noprogress=True, ) @mock.patch("asfsmd.cli._get_auth", mock.Mock(return_value=dummy_auth)) @mock.patch("asfsmd.cli.download_annotations", mock.Mock()) @mock.patch("asfsmd.cli.download_components_from_urls") def test_asfsmd_cli_urls(download_components_from_urls): urls = ["url1", "url2"] asfsmd_cli(urls, urls=True, noprogress=True) download_components_from_urls.assert_called_once_with( urls, outdir=pathlib.Path("."), auth=dummy_auth, patterns=asfsmd.core.make_patterns(), block_size=asfsmd.core.BLOCKSIZE, noprogress=True, ) asfsmd-1.4.1/tests/test_core.py000066400000000000000000000173001452644456500165150ustar00rootroot00000000000000"""Unit tests for the asfsmd.core module.""" import netrc import hashlib import pathlib import zipfile from typing import List, Optional from unittest import mock from xml.etree import ElementTree as etree # noqa: N813 import pytest import asfsmd.core def test_make_patterns_default(): patterns = asfsmd.core.make_patterns() assert len(patterns) == 2 assert "S1*.SAFE/manifest.safe" in patterns assert "S1*.SAFE/annotation/s1?-*-???-??-*.xml" in patterns @pytest.mark.parametrize( "kwargs", [ pytest.param(dict(beam=""), id="beam"), pytest.param(dict(pol=""), id="pol"), ], ) def test_make_patterns(kwargs): patterns = asfsmd.core.make_patterns(**kwargs) assert len(patterns) == 2 assert "S1*.SAFE/manifest.safe" in patterns value = next(iter(kwargs.values())) assert any([value in pattern for pattern in patterns]) @pytest.mark.parametrize( "kwargs", [ pytest.param(dict(cal=True), id="cal"), pytest.param(dict(noise=True), id="noise"), pytest.param(dict(rfi=True), id="rfi"), ], ) def test_make_patterns_extra(kwargs): patterns = asfsmd.core.make_patterns(**kwargs) assert len(patterns) == 3 assert "S1*.SAFE/manifest.safe" in patterns assert "S1*.SAFE/annotation/s1?-*-???-??-*.xml" in patterns value = next(iter(kwargs.keys())) assert any([value in pattern for pattern in patterns]) def test_make_patterns_data(): patterns = asfsmd.core.make_patterns(data=True) assert len(patterns) > 2 assert "S1*.SAFE/manifest.safe" in patterns assert "S1*.SAFE/annotation/s1?-*-???-??-*.xml" in patterns assert any(["tiff" in pattern for pattern in patterns]) assert any(["measurement" in pattern for pattern in patterns]) DEFAULT_PRODUCT = ( "S1A_IW_SLC__1SDV_20230222T051014_20230222T051042_047344_05AECF_FDD1.SAFE" ) class DummyProductWriter: DEFAULT_DATA_SIZE = 8 * 1024 # 4k def __init__(self, components: Optional[List[str]] = None): self._path: Optional[pathlib.Path] = None if components is None: components = [ "./annotation/s1a-iw1-slc-vv-20230222t051014-20230222t051042-047344-05aecf-001.xml", # noqa: E501 "./annotation/s1a-iw2-slc-vv-20230222t051014-20230222t051042-047344-05aecf-002.xml", # noqa: E501 "./annotation/s1a-iw3-slc-vv-20230222t051014-20230222t051042-047344-05aecf-003.xml", # noqa: E501 ] self.components = {path: self.DEFAULT_DATA_SIZE for path in components} def _create_data_object(self, path: pathlib.Path, size: int): path.parent.mkdir(parents=True, exist_ok=True) data = path.name.encode("ascii") data = data + b"x" * (self.DEFAULT_DATA_SIZE - len(data)) path.write_bytes(data) md5 = hashlib.md5(data) filename = str("." / path.relative_to(self._path)) elem = etree.Element("dataObject") byte_stream_elem = etree.SubElement( elem, "byteStream", attrib=dict(size=str(size)) ) etree.SubElement( byte_stream_elem, "fileLocation", attrib=dict(href=filename) ) checksum_elem = etree.SubElement( byte_stream_elem, "checksum", attrib=dict(checksumName="MD5") ) checksum_elem.text = md5.hexdigest() return elem def _create_xmldoc(self): root = etree.Element("dummy_xfdu") data_object_section = etree.SubElement(root, "dataObjectSection") for path, size in self.components.items(): assert self._path elem = self._create_data_object(self._path / path, size) data_object_section.append(elem) return etree.ElementTree(root) def write(self, path: pathlib.Path): try: self._path = path manifest_path = path / "manifest.safe" xmldoc = self._create_xmldoc() xmldoc.write(manifest_path) finally: self._path = None def test__is_product_complete_complete(tmp_path): product_path = tmp_path.joinpath(DEFAULT_PRODUCT) writer = DummyProductWriter() writer.write(product_path) assert asfsmd.core._is_product_complete(product_path) def test__is_product_complete_absent(tmp_path): product_path = tmp_path.joinpath(DEFAULT_PRODUCT) assert not asfsmd.core._is_product_complete(product_path) def test__is_product_complete_missing_manifest(tmp_path): product_path = tmp_path.joinpath(DEFAULT_PRODUCT) writer = DummyProductWriter() writer.write(product_path) manifest_path = product_path / "manifest.safe" assert manifest_path.is_file() manifest_path.unlink() assert not asfsmd.core._is_product_complete(product_path) def test__is_product_complete_missing_component(tmp_path): product_path = tmp_path.joinpath(DEFAULT_PRODUCT) writer = DummyProductWriter() writer.write(product_path) components = list(writer.components.keys()) component_path = product_path.joinpath(components[-1]) assert component_path.is_file() component_path.unlink() assert not asfsmd.core._is_product_complete(product_path) def test__is_product_complete_incomplete_component(tmp_path): product_path = tmp_path.joinpath(DEFAULT_PRODUCT) writer = DummyProductWriter() writer.write(product_path) components = list(writer.components.keys()) component_path = product_path.joinpath(components[-1]) assert component_path.is_file() data = component_path.read_bytes() component_path.write_bytes(data[: len(data) // 2]) assert not asfsmd.core._is_product_complete(product_path) def test__is_product_complete_corrupted_component(tmp_path): product_path = tmp_path.joinpath(DEFAULT_PRODUCT) writer = DummyProductWriter() writer.write(product_path) components = list(writer.components.keys()) component_path = product_path.joinpath(components[-1]) assert component_path.is_file() data = component_path.read_bytes() assert b"s" in data data = data.replace(b"s", b"o") component_path.write_bytes(data) assert not asfsmd.core._is_product_complete(product_path) class DummyZipFile: def __init__(self, filelist): self.filelist = filelist def test__filter_components(): filelist = [ zipfile.ZipInfo(filename=""), zipfile.ZipInfo(filename="abc.txt"), zipfile.ZipInfo(filename="def.dat"), ] zf = DummyZipFile(filelist=filelist) patterns = ["*.txt"] out = asfsmd.core._filter_components(zf, patterns=patterns) assert len(out) == 1 assert out == [filelist[1]] class NummyNetrc(netrc.netrc): def __init__(self, file=None): super().__init__(file="dummy") @mock.patch("netrc.netrc") def test__get_auth(netrc): auth = asfsmd.core._get_auth("user", "password") assert isinstance(auth, asfsmd.core.Auth) assert auth == asfsmd.core.Auth("user", "password") def test__get_auth_default(tmp_path): _netrc = netrc.netrc def fake_netrc(*args, **kewargx): data = """\ machine urs.earthdata.nasa.gov login user password password """ netrc_path = tmp_path.joinpath("dummy_netrc") netrc_path.write_text(data) return _netrc(netrc_path) with mock.patch("netrc.netrc", new_callable=lambda: fake_netrc): auth = asfsmd.core._get_auth() assert isinstance(auth, asfsmd.core.Auth) assert auth == asfsmd.core.Auth("user", "password") def test__get_auth_noauth(tmp_path): _netrc = netrc.netrc def fake_netrc(*args, **kewargx): netrc_path = tmp_path.joinpath("dummy_netrc") return _netrc(netrc_path) with mock.patch("netrc.netrc", new_callable=lambda: fake_netrc): with pytest.raises(FileNotFoundError): asfsmd.core._get_auth()