pax_global_header 0000666 0000000 0000000 00000000064 14720105632 0014512 g ustar 00root root 0000000 0000000 52 comment=fc02c660d9473319af4295f682146db426345e07
eumdac-3.0.0/ 0000775 0000000 0000000 00000000000 14720105632 0012750 5 ustar 00root root 0000000 0000000 eumdac-3.0.0/.gitignore 0000664 0000000 0000000 00000001145 14720105632 0014741 0 ustar 00root root 0000000 0000000 *chain*.yaml
venv/
# byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# packaging
build/
develop-eggs/
dist/
downloads/
eggs/
*.egg
.eggs/
*.egg-info/
.installed.cfg
parts/
sdist/
wheels/
MANIFEST
# testing
.coverage*
.tox/
coverage.xml
htmlcov/
chain.yaml
data/
chain.yml
# jupyter notebook
*.ipynb
.ipynb_checkpoints/
# IDE config
.idea/
### VisualStudioCode ###
.vscode/* # Maybe .vscode/**/* instead - see comments
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
#### VisualStudioCode Patch ###
# Ignore all local history of files
**/.history
eumdac-3.0.0/.gitlab-ci.yml 0000664 0000000 0000000 00000030734 14720105632 0015413 0 ustar 00root root 0000000 0000000 stages:
- documentation
- unit_testing
- integration_testing
- distribution
- binary_testing
test-long:
stage: unit_testing
# use miniconda3 docker image, for simple access to various python versions with tox-conda
image: continuumio/miniconda3
script:
- source /opt/conda/bin/activate
- python -V
- pip install tox tox-conda
- tox -v --recreate
artifacts:
reports:
coverage_report:
coverage_format: cobertura
path: coverage.xml
rules:
- if: "$CI_MERGE_REQUEST_TITLE =~ /^Draft:/"
when: never
- if: "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME == $CI_DEFAULT_BRANCH"
- if: '$CI_PIPELINE_SOURCE == "schedule"'
- if: "$CI_PIPELINE_SOURCE == $CI_PIPELINE_SOURCE"
test-short:
stage: unit_testing
# use miniconda3 docker image, for simple access to various python versions with tox-conda
image: continuumio/miniconda3
script:
- source /opt/conda/bin/activate
- python -V
- pip install tox tox-conda
- tox -v -e linters,py39
rules:
- if: "$CI_MERGE_REQUEST_TITLE =~ /^Draft:/"
- if: "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME == $CI_DEFAULT_BRANCH"
when: never
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
when: never
test-integration:
stage: integration_testing
# use miniconda3 docker image, for simple access to various python versions with tox-conda
image: continuumio/miniconda3
script:
- source /opt/conda/bin/activate
- python -V
- pip install tox tox-conda
- tox -v -e integration
rules:
- if: "$INTEGRATION"
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
build-win-exec:
stage: distribution
# This Job must be executed by a Windows Gitlab Runner, and that explains the 'Windows' tag below.
tags:
- Windows
script:
- Get-Host
- conda --version
- conda info --envs
- conda activate EUMDAC-py39
- conda info --envs
- python --version
- pyinstaller --version
- pip install .
- pip install pillow
- cd win
- python update-version.py
- cd ..
- pyinstaller win\eumdac-win --add-data "eumdac\endpoints.ini;eumdac" --onefile --name eumdac --version-file=win\eumdac-win-version.py --icon win\eumdac-logo.png
- copy .\dist\eumdac.exe .
artifacts:
name: "eumdac-win"
paths: [eumdac.exe]
# when: manual # This setting turns a job into a manual one
rules:
- if: "$INTEGRATION"
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
- if: '$CI_PIPELINE_SOURCE == "schedule"'
build-linux-exec:
stage: distribution
tags:
- linux
image: python:3.9
script:
- echo "Manual Job to create a Linux executable of EUMDAC"
# Installation of the missing dependencies and verirfication of the version of all relevant dependencies
- conda activate eumdac.build
- python --version
- pip --version
- pip install pyinstaller
- pyinstaller --version
# Install EUMDAC
- pip install .
# Create the EUMDAC executable
- pyinstaller bin/eumdac --add-data "eumdac/endpoints.ini:eumdac" --onefile --name eumdac.cli
# Rename the executable from 'eumdac.cli' to 'eumdac'
- mv eumdac eumdac.folder
- mv dist/eumdac.cli eumdac-linux
- ls
artifacts:
name: "eumdac-linux"
paths: [eumdac-linux]
# when: manual # This setting turns a job into a manual one
rules:
- if: "$INTEGRATION"
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
- if: '$CI_PIPELINE_SOURCE == "schedule"'
build-mac-binary:
stage: distribution
tags:
- macos
script:
- echo "Manual Job to create a MacOS executable of EUMDAC"
# Create the EUMDAC x86_64 executable
- echo "Building Intel x86_64 binary"
- conda activate eumdac.build.x64
- pip install .
- pyinstaller bin/eumdac --add-data "eumdac/endpoints.ini:eumdac" --onefile --name eumdac.cli --target-architecture=x86_64
- mkdir dist/x86_64/
- mv dist/eumdac.cli dist/x86_64/eumdac
- rm -r build *.spec
# Create the EUMDAC arm64 executable
- echo "Building ARM binary"
- conda activate eumdac.build.arm64
- pip install .
- pyinstaller bin/eumdac --add-data "eumdac/endpoints.ini:eumdac" --onefile --name eumdac.cli --target-architecture=arm64
- mkdir dist/arm64/
- mv dist/eumdac.cli dist/arm64/eumdac
artifacts:
name: "eumdac-macos"
paths: [dist/x86_64/eumdac, dist/arm64/eumdac]
rules:
- if: "$INTEGRATION"
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
- if: '$CI_PIPELINE_SOURCE == "schedule"'
test-mac-binary:
stage: binary_testing
tags:
- macos
dependencies:
- "build-mac-binary"
variables:
DEBUG: "False"
script:
- ls
- conda info --envs
- conda init
- conda activate EUMDAC-bintest
- python --version
- pip --version
- pip install pytest
- pytest --version
- git clone https://$EUMDAC_TESTING_READ_ONLY_USERNAME:$EUMDAC_TESTING_READ_ONLY_TOKEN@gitlab.eumetsat.int/dso/dso_test_scripts/eumdac-testing.git
- cp dist/arm64/eumdac eumdac-testing/cli-testing/eumdac_OPE
- cd eumdac-testing/cli-testing/
# - git checkout $EUMDAC_TESTING_BRANCH
- export ENV_VARIABLE=OPE ; export consumer_key_OPE=$MAC_KEY ; export consumer_secret_OPE=$MAC_SECRET ; pytest eumdac_cli_tests.py -v -s -rA --junitxml=test-mac-binary-report.xml
rules:
- if: "$INTEGRATION"
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
- if: '$CI_PIPELINE_SOURCE == "schedule"'
artifacts:
when: always
paths: [eumdac-testing/cli-testing/test-mac-binary-report.xml]
reports:
junit: eumdac-testing/cli-testing/test-mac-binary-report.xml
test-linux-binary:
stage: binary_testing
tags:
- linux
dependencies:
- "build-linux-exec"
variables:
DEBUG: "False"
script:
- ls
- conda activate eumdac_binary_test
- python --version
- pip --version
- pip install pytest
- pytest --version
- git clone https://$EUMDAC_TESTING_READ_ONLY_USERNAME:$EUMDAC_TESTING_READ_ONLY_TOKEN@gitlab.eumetsat.int/dso/dso_test_scripts/eumdac-testing.git
- cp eumdac-linux eumdac-testing/cli-testing/eumdac_OPE
# TODO: what is this achieving? disabling for now
- echo "mv eumdac-linux eumdac"
# TODO: where are we?
- pwd
- cd eumdac-testing/cli-testing/
# pwd
# - git checkout $EUMDAC_TESTING_BRANCH
- export ENV_VARIABLE=OPE ; export consumer_key_OPE=$LINUX_KEY ; export consumer_secret_OPE=$LINUX_SECRET ; pytest eumdac_cli_tests.py -v --junitxml=test-linux-binary-report.xml
rules:
- if: "$INTEGRATION"
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
- if: '$CI_PIPELINE_SOURCE == "schedule"'
# when: manual
artifacts:
when: always
paths: [eumdac-testing/cli-testing/test-linux-binary-report.xml]
reports:
junit: eumdac-testing/cli-testing/test-linux-binary-report.xml
test-win10-binary:
stage: binary_testing
tags:
- Windows
dependencies:
- "build-win-exec"
variables:
DEBUG: "False"
script:
- ls
- conda info --envs
- conda activate EUMDAC-bintest
- python --version
- pip --version
- pip install pytest
- pytest --version
- git clone https://${EUMDAC_TESTING_READ_ONLY_USERNAME}:${EUMDAC_TESTING_READ_ONLY_TOKEN}@gitlab.eumetsat.int/dso/dso_test_scripts/eumdac-testing.git
- cp eumdac.exe eumdac-testing/cli-testing/eumdac.exe
- cd eumdac-testing/cli-testing/
# - git checkout $EUMDAC_TESTING_BRANCH
- git config --system core.longpaths true
- $Env:ENV_VARIABLE = "OPE"
- $Env:ENV_OS = "Windows"
- $Env:consumer_key_OPE = "$W10_KEY"
- $Env:consumer_secret_OPE = "$W10_SECRET"
- $Env:ENV_VARIABLE
- dir
- pytest eumdac_cli_tests.py -v -s -rA --junitxml=test-win10-binary-report.xml
rules:
- if: "$INTEGRATION"
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
- if: '$CI_PIPELINE_SOURCE == "schedule"'
artifacts:
when: always
paths: [eumdac-testing/cli-testing/test-win10-binary-report.xml]
reports:
junit: eumdac-testing/cli-testing/test-win10-binary-report.xml
test-win11-binary:
stage: binary_testing
tags:
- Windows11
dependencies:
- "build-win-exec"
variables:
DEBUG: "False"
script:
#- whoami
- ls
- conda info --envs
- conda activate EUMDAC-bintest
- python --version
- pip --version
- pip install pytest
- pytest --version
- git clone https://${EUMDAC_TESTING_READ_ONLY_USERNAME}:${EUMDAC_TESTING_READ_ONLY_TOKEN}@gitlab.eumetsat.int/dso/dso_test_scripts/eumdac-testing.git
- cp eumdac.exe eumdac-testing/cli-testing/eumdac.exe
- cd eumdac-testing/cli-testing/
# - git checkout $EUMDAC_TESTING_BRANCH
- git config --system core.longpaths true
- $Env:ENV_VARIABLE = "OPE"
- $Env:ENV_OS = "Windows"
- $Env:consumer_key_OPE = "$W11_KEY"
- $Env:consumer_secret_OPE = "$W11_SECRET"
- $Env:ENV_VARIABLE
- dir
- pytest eumdac_cli_tests.py -v -s -rA --junitxml=test-win11-binary-report.xml
rules:
- if: "$INTEGRATION"
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
- if: '$CI_PIPELINE_SOURCE == "schedule"'
artifacts:
when: always
paths: [eumdac-testing/cli-testing/test-win11-binary-report.xml]
reports:
junit: eumdac-testing/cli-testing/test-win11-binary-report.xml
generate-docs:
stage: documentation
# use miniconda3 docker image, for simple access to various python versions with tox-conda
image: continuumio/miniconda3
dependencies: []
script:
- source /opt/conda/bin/activate
- python -V
- pip install pdoc PyYAML
- pdoc --version
- mkdir template
- echo "Extracting EUMDAC version"
- version=$(cat eumdac/__version__.py | grep '__version__ = ' | cut -d'"' -f 2)
- echo "v$version"
- >
echo "{#
We want to extend the default template instead of defining everything ourselves.
#}
{% extends 'default/module.html.jinja2' %}
{#
We can access system environment variables in the template, for example to pass version information.
#}
{% block nav_title %}
{% if logo %}
{% if logo_link %}{% endif %}
{% if logo_link %}{% endif %}
{% endif %}
v$version
{% endblock %}
{% block nav_footer %}
{% endblock %}" > template/module.html.jinja2
- cat template/module.html.jinja2
- >
echo ".pdoc {
font-family: Roboto, Helvetica Neue, sans-serif;
}
nav.pdoc {
background-color: #00205B;
color: #FFF;
}
nav.pdoc a, nav.pdoc a:hover {
color: #FFF;
}
nav.pdoc .module-list-button {
display: inline-flex;
align-items: center;
color: #FFF;
border-color: #5B7F95;
margin-bottom: 1rem;
}
@media (max-width: 769px) {
#navtoggle {
cursor: pointer;
position: absolute;
width: 50px;
height: 40px;
top: 1rem;
right: 1rem;
border-color: #5B7F95;
color: #5B7F95;
display: flex;
opacity: 0.8;
z-index: 999;
}
#navtoggle:hover {
opacity: 1;
}
#togglestate + div {
display: none;
}
#togglestate:checked + div {
display: inherit;
}
main, header {
padding: 2rem 3vw;
}
header + main {
margin-top: -3rem;
}
.git-button {
display: none !important;
}
nav input[type="search"] {
/* don't overflow into menu button */
max-width: 77%;
}
nav input[type="search"]:first-child {
/* align vertically with the hamburger menu */
margin-top: -6px;
}
nav input[type="search"]:valid ~ * {
/* hide rest of the menu when search has contents */
display: none !important;
}
}" > template/custom.css
- cat template/custom.css
- mkdir -p "docs/v$version"
- pdoc -o "./docs/v$version" -t ./template/ --logo /docs/eumdac/eumdac-logo-text.png eumdac
artifacts:
paths:
- docs/
deploy-docs:
stage: documentation
tags:
- linux
needs:
- generate-docs
script:
- cat $EUMDAC_DOCS_DEPLOY
- echo
- bash $EUMDAC_DOCS_DEPLOY
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
eumdac-3.0.0/.gitlab/ 0000775 0000000 0000000 00000000000 14720105632 0014270 5 ustar 00root root 0000000 0000000 eumdac-3.0.0/.gitlab/issue_templates/ 0000775 0000000 0000000 00000000000 14720105632 0017476 5 ustar 00root root 0000000 0000000 eumdac-3.0.0/.gitlab/issue_templates/Bug.md 0000664 0000000 0000000 00000001406 14720105632 0020536 0 ustar 00root root 0000000 0000000
# Bug Report
## Description
## Expected Behaviour
## Steps to Reproduce
```python
import eumdac
# code goes here
```
```
# error message goes here
```
## Specifications
- Version / git Commit:
- Python Version:
- Operating System:
eumdac-3.0.0/.gitlab/issue_templates/Change.md 0000664 0000000 0000000 00000001216 14720105632 0021205 0 ustar 00root root 0000000 0000000
# Change Request
## Description
## Justification
## Impact on Existing User Workflows
## Additional Context
eumdac-3.0.0/.gitlab/issue_templates/Feature.md 0000664 0000000 0000000 00000001163 14720105632 0021414 0 ustar 00root root 0000000 0000000
# Feature Request
## Description
## User Story
- As a *role* I want to *capability*, so that *receive benefit*.
## Additional Context
eumdac-3.0.0/AUTHORS.txt 0000664 0000000 0000000 00000000556 14720105632 0014644 0 ustar 00root root 0000000 0000000 # Copyright holder
EUMETSAT
# List of development authors
Carlos Horn
Ben Loveday
Niklas Jordan
Paulo Carmo
Matthias Schwarz
Eoin O'Neill
Pramit Ghosh
Yigit Öner Altintas
Luisa Araujo
Christopher Saloman - EUMETSAT
Joaquin Rodriguez-Guerra - EUMETSAT
Rafa de la Hoz - EUMETSAT
# Support
For all queries on this software package, please contact ops@eumetsat.int
eumdac-3.0.0/LICENSE.txt 0000664 0000000 0000000 00000002221 14720105632 0014570 0 ustar 00root root 0000000 0000000 Unless otherwise specified, the contents of this repository are distributed
by EUMETSAT under an MIT License, with Copyright (c) 2021 EUMETSAT.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
eumdac-3.0.0/LICENSE_APACHE_v2.txt 0000664 0000000 0000000 00000023636 14720105632 0016215 0 ustar 00root root 0000000 0000000
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
eumdac-3.0.0/LICENSE_MIT.txt 0000664 0000000 0000000 00000002031 14720105632 0015300 0 ustar 00root root 0000000 0000000 Copyright Jason R. Coombs
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE. eumdac-3.0.0/README.md 0000664 0000000 0000000 00000007153 14720105632 0014235 0 ustar 00root root 0000000 0000000 # EUMDAC - EUMETSAT Data Access Client
**EUMDAC** is the **EUM**ETSAT **D**ata **A**ccess **C**lient. It provides simple access to the EUMETSAT data of all satellite missions. As a **Python library**, it comes with many methods and helpers to use EUMETSATs APIs and services, like Data Store and Data Tailor. As a **CLI**, it provides a variety of useful command line utilities for data search, translation and processing.
Please consult the following documentation for more information:
- [EUMDAC User Guide](https://user.eumetsat.int/resources/user-guides/eumetsat-data-access-client-eumdac-guide) - Installing and using the CLI and library.
- [EUMDAC API Reference](https://usc.tools.eumetsat.int/docs/eumdac/) - Detailed information on classes, functions, and modules, including method descriptions and parameter usage.
## Prerequisites
You will need a python environment to run the library implementation of this code. EUMDAC requires Python 3.7 or higher. We recommend that you install the latest Anaconda Python distribution for your operating system (https://www.anaconda.com/). No prerequisites are identified for running the CLI binary.
## Installing the EUMDAC library and CLI
### Installing with PIP
The EUMDAC Python package is available through [PyPI](https://pypi.org/):
```bash
pip install eumdac
```
### Installing with Conda
To install EUMDAC on the Anaconda Python distribution, please visit the [EUMETSAT conda-forge page](https://anaconda.org/Eumetsat/repo) for install instructions.
```bash
conda install -c eumetsat-forge eumdac
```
### Installing from source
To install EUMDAC from the development source, clone the repository and install it locally.
```bash
git clone https://gitlab.eumetsat.int/eumetlab/data-services/eumdac.git
cd eumdac
pip install .
```
## Using the EUMDAC CLI binaries (no installation required)
If an installation of EUMDAC is not possible due to missing technical prerequisites, we recommend to use our binaries. These executable applications allow you to use all the functions of the CLI without installation.
The binaries are available for Windows, Linux and Mac in the [Releases section](https://gitlab.eumetsat.int/eumetlab/data-services/eumdac/-/releases).
You can find more information in the [EUMDAC User Guide](https://user.eumetsat.int/resources/user-guides/eumetsat-data-access-client-eumdac-guide#ID-Command-Line-guide).
## Contributing
If you feel like something is missing, should work differently or you find a bug in EUMDAC you are encouraged to provide feedback to the development team. Please contact us via the [EUMETSAT User Support Helpdesk](mailto:ops@eumetsat.int) if you have suggestions or questions.
## Authors
See AUTHORS.txt for the list of contributors.
## Dependencies
pyyaml, License: MIT (LICENSE_MIT.txt), Copyright 2019 Ingy döt Net, info: https://anaconda.org/conda-forge/pyyaml/ \
requests, License: Apache-2.0 (LICENSE_APACHE_v2.txt), Copyright 2014 Kenneth Reitz, info: https://anaconda.org/conda-forge/requests \
responses, License: Apache-2.0 (LICENSE_APACHE_v2.txt), Copyright 2015 David Cramer, info: https://anaconda.org/conda-forge/responses \
setuptools, License: MIT (LICENSE_MIT.txt), Copyright 2020 Jason R. Coombs, info: https://anaconda.org/conda-forge/setuptools
## License
This code is licensed under an MIT license. See file LICENSE.txt for details on the usage and distribution terms. No dependencies are distributed as part of this package.
All product names, logos, and brands are property of their respective owners. All company, product and service names used in this website are for identification purposes only.
eumdac-3.0.0/bin/ 0000775 0000000 0000000 00000000000 14720105632 0013520 5 ustar 00root root 0000000 0000000 eumdac-3.0.0/bin/eumdac 0000664 0000000 0000000 00000000123 14720105632 0014675 0 ustar 00root root 0000000 0000000 #!/bin/python
"""EUMETSAT Data Access Client"""
from eumdac.cli import cli
cli()
eumdac-3.0.0/eumdac.ABOUT 0000664 0000000 0000000 00000003737 14720105632 0015014 0 ustar 00root root 0000000 0000000 # Component information
name: eumdac
description: EUMDAC is the EUMETSAT Data Access Client.
It provides simple access to the EUMETSAT data of all satellite
missions.
copyright: 2021 EUMETSAT
license_spdx: MIT
home_url:
date: 2021-11-29
# Package IPR documentation
license_text_file: LICENSE.txt
# File(s) associated to this component
about_resource: AUTHORS.txt
about_resource: LICENSE.txt
about_resource: README.md
about_resource: mypy.ini
about_resource: setup.py
about_resource: tox.ini
about_resource: ./bin/eumdac
about_resource: ./eumdac/__init__.py
about_resource: ./eumdac/__version__.py
about_resource: ./eumdac/cli.py
about_resource: ./eumdac/collection.py
about_resource: ./eumdac/customisation.py
about_resource: ./eumdac/datastore.py
about_resource: ./eumdac/datatailor.py
about_resource: ./eumdac/endpoints.ini
about_resource: ./eumdac/product.py
about_resource: ./eumdac/subscription.py
about_resource: ./eumdac/tailor_models.py
about_resource: ./eumdac/token.py
about_resource: ./tests/__init__.py
about_resource: ./tests/base.py
about_resource: ./tests/test_cli.py
about_resource: ./tests/test_collection.py
about_resource: ./tests/test_customisation.py
about_resource: ./tests/test_datastore.py
about_resource: ./tests/test_datatailor.py
about_resource: ./tests/test_product.py
about_resource: ./tests/test_subscription.py
about_resource: ./tests/test_token.py
about_resource: ./tests/data/test_collection.TestCollection.pickle.gz
about_resource: ./tests/data/test_collection.TestSearchResults.pickle.gz
about_resource: ./tests/data/test_customisation.TestCustomisation.pickle.gz
about_resource: ./tests/data/test_datastore.TestDataStore.pickle.gz
about_resource: ./tests/data/test_datatailor.TestDataTailor.pickle.gz
about_resource: ./tests/data/test_product.TestProduct.pickle.gz
about_resource: ./tests/data/test_subscription.TestSubscription.pickle.gz
about_resource: ./tests/data/test_token.TestAccessToken.pickle.gz
eumdac-3.0.0/eumdac/ 0000775 0000000 0000000 00000000000 14720105632 0014206 5 ustar 00root root 0000000 0000000 eumdac-3.0.0/eumdac/__init__.py 0000664 0000000 0000000 00000003601 14720105632 0016317 0 ustar 00root root 0000000 0000000 """
## EUMDAC Library
EUMDAC is a Python library that simplifies access to the EUMETSAT Data Access Services.
## Classes
The main classes are:
- AccessToken - manages authentication, provides tokens to other classes
- DataStore - interfaces with EUMETSAT Data Store for accessing collections and performing searches
- Collection - a Data Store collection of products, providing its metadata and allowing searching for products
- Product - a Data Store product, providing its metadata and allowing downloading it (or some of its contents)
- DataTailor - interfaces with EUMETSAT Data Tailor Webservice for customising Data Store products
## Basic DataStore usage
>>> from eumdac.token import AccessToken
>>> from eumdac.datastore import DataStore
>>> consumer_key = 'my-consumer-key'
>>> consumer_secret = 'my-consumer-secret'
>>> credentials = (consumer_key, consumer_secret)
>>> token = AccessToken(credentials)
>>> datastore = DataStore(token)
>>> for collection in datastore.collections:
... print(f"{collection} - {collection.title}")
...
EO:EUM:DAT:MSG:HRSEVIRI - High Rate SEVIRI Level 1.5 Image Data - MSG - 0 degree
EO:EUM:DAT:MSG:MSG15-RSS - Rapid Scan High Rate SEVIRI Level 1.5 Image Data - MSG
EO:EUM:DAT:0080 - MVIRI Level 1.5 Climate Data Record - MFG - 0 degree
EO:EUM:DAT:MSG:RSS-CLM - Rapid Scan Cloud Mask - MSG
EO:EUM:DAT:0081 - MVIRI Level 1.5 Climate Data Record - MFG - 57 degree
...
## Copyright & License
© EUMETSAT 2024, MIT License
## Support
For all queries on this software package, please contact [ops@eumetsat.int](mailto:ops@eumetsat.int)
"""
from .__version__ import (
__author__,
__author_email__, # noqa
__description__,
__license__,
__title__,
__url__,
__version__,
)
from .datastore import DataStore # noqa
from .datatailor import DataTailor # noqa
from .token import AccessToken # noqa
eumdac-3.0.0/eumdac/__version__.py 0000664 0000000 0000000 00000000772 14720105632 0017047 0 ustar 00root root 0000000 0000000 """Module containing package information for setup.py and __init__.py"""
__title__ = "eumdac"
__description__ = "EUMETSAT Data Access Client"
__url__ = "https://gitlab.eumetsat.int/eumetlab/data-services/eumdac"
__version__ = "3.0.0"
__documentation__ = (
"https://user.eumetsat.int/resources/user-guides/eumetsat-data-access-client-eumdac-guide"
)
__api_documentation__ = "https://usc.tools.eumetsat.int/docs/eumdac/"
__author__ = "EUMETSAT"
__author_email__ = "ops@eumetsat.int"
__license__ = "MIT"
eumdac-3.0.0/eumdac/cli.py 0000664 0000000 0000000 00000242025 14720105632 0015334 0 ustar 00root root 0000000 0000000 """EUMETSAT Data Access Client"""
from __future__ import annotations
import argparse
import fnmatch
import itertools
import os
import pathlib
import re
import shlex
import shutil
import signal
import stat
import sys
import tempfile
from datetime import datetime
from pathlib import Path
import time
from typing import TYPE_CHECKING
import requests
import yaml
from requests.exceptions import HTTPError
import eumdac
import eumdac.common
from eumdac import DataStore, DataTailor
from eumdac.cli_mtg_helpers import (
build_entries_from_coverage,
is_collection_valid_for_coverage,
pretty_print_entry,
)
from eumdac.collection import SearchResults
from eumdac.config import get_config_dir, get_credentials_path
from eumdac.download_app import DownloadApp
from eumdac.errors import EumdacError
from eumdac.fake import FakeDataStore, FakeDataTailor # type: ignore
from eumdac.local_tailor import (
all_url_filenames,
get_api_url,
get_local_tailor,
get_tailor_id,
get_tailor_path,
is_online,
new_local_tailor,
remove_local_tailor,
)
from eumdac.logging import gen_table_printer, init_logger, logger
from eumdac.order import Order, all_order_filenames, get_default_order_dir, resolve_order
from eumdac.product import Product, ProductError
from eumdac.tailor_app import TailorApp
from eumdac.tailor_models import Chain
from eumdac.token import AccessToken, AnonymousAccessToken
if TYPE_CHECKING: # pragma: no cover
from typing import Any, Callable, Collection, Dict, Iterator, Optional, Tuple, Union
if sys.version_info < (3, 9):
from typing import Iterable, Sequence
else:
from collections.abc import Iterable, Sequence
def parse_size(size_str: str) -> int:
size_str = size_str.upper()
units = {"B": 1, "KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4}
match = re.match(r"^(\d+(?:\.\d+)?)\s*([KMGT]?B)$", size_str)
if match:
number, unit = match.groups()
return int(float(number) * units[unit])
else:
raise ValueError("Invalid size format")
def set_credentials(values: Union[str, Sequence[Any], None]) -> None:
token = eumdac.AccessToken(values) # type: ignore[arg-type]
config_dir = get_config_dir()
config_dir.mkdir(exist_ok=True)
credentials_path = get_credentials_path()
credentials_path.touch(mode=(stat.S_IRUSR | stat.S_IWUSR))
try:
logger.info(f"Credentials are correct. Token was generated: {token}")
try:
with credentials_path.open(mode="w") as file:
file.write(",".join(values)) # type: ignore[arg-type]
logger.info(f"Credentials are written to file {credentials_path}")
except OSError:
logger.error(
"Credentials could not be written to {credentials_path}. Please review your configuration."
)
except HTTPError as e:
if e.response.status_code == 401:
token_url = token.urls.get("token", "token")
logger.error(
"The provided credentials are not valid. "
f"Get your personal credentials at {token_url}",
)
else:
report_request_error(e.response)
class SetCredentialsAction(argparse.Action):
"""eumdac set-credentials entry point"""
def __call__(
self,
parser: argparse.ArgumentParser,
namespace: argparse.Namespace,
values: Union[str, Sequence[Any], None],
option_string: Optional[str] = None,
) -> None:
set_credentials(values)
parser.exit()
def credentials(args: argparse.Namespace) -> None:
set_credentials((args.ConsumerKey, args.ConsumerSecret))
def token(args: argparse.Namespace) -> None:
"""eumdac token entrypoint"""
try:
creds = load_credentials()
except CredentialsFileNotFoundError as exc:
raise EumdacError("No credentials found! Please set credentials!") from exc
try:
old_token = ""
validity = 86400 if not args.validity else args.validity
token = AccessToken(creds, validity=validity)
# Request the token value to fetch an actual token
str(token)
# Manage previously generated tokens: validity and expiration
expires_in = token._expiration - time.time()
logger.debug(f"Got token {token}, which expires in {expires_in:.2f} seconds")
got_new_token = not (
old_token == token._access_token
or abs(expires_in - token.validity_period) > token.request_margin
)
if args.force:
while not got_new_token:
logger.debug(
f"Failed to get new token, got: {token._access_token}, which expires in: {expires_in} seconds"
)
old_token = token._access_token
token._revoke()
token._update_token_data()
expires_in = token._expiration - time.time()
logger.debug(f"Got token {token}, which expires in {expires_in} seconds.")
got_new_token = not (
old_token == token._access_token
or abs(expires_in - token.validity_period) > token.request_margin
)
logger.warning("Existing tokens have been revoked as per the --force parameter")
logger.warning(
"Note: this has invalidated any other token already in use, effecting other processes using the same credentials"
)
if not args.force and args.validity:
logger.warning(
f"The requested validity of {args.validity} seconds may not be applied if a valid token was already available"
)
logger.warning(
"Use --force to revoke any current token and get a token with the desired validity, but this will effect other processes using the same credentials"
)
# Report the validity
logger.warning(f"The following token is valid until {token.expiration}")
# Show the token to the user
print(token)
except HTTPError as e:
if e.response.status_code == 401:
token_url = token.urls.get("token", "token")
logger.error(
"A token could not be generated with your current credentials. "
f"Get your credentials from {token_url}",
)
report_request_error(e.response)
def describe(args: argparse.Namespace) -> None:
"""eumdac describe entrypoint"""
datastore = get_datastore(args, anonymous_allowed=True)
if args.filter and (args.collection or args.product):
raise ValueError("The -f/--filter flag and can't be used together with -c or -p")
if args.collection is None and args.product is None:
filter = str(args.filter).lower() if args.filter else ""
for collection in datastore.collections:
collection_str = f"{collection} - {collection.title}"
if args.filter:
collection_str_lowercase = collection_str.lower()
if (filter in collection_str_lowercase) or (
fnmatch.fnmatch(collection_str_lowercase, filter)
):
logger.info(collection_str)
else:
logger.info(collection_str)
elif args.collection is not None and args.product is None:
collection = datastore.get_collection(args.collection)
date = collection.metadata["properties"].get("date", "/")
match = re.match(r"([^/]*)/([^/]*)", date)
start_date, end_date = match.groups() # type: ignore[union-attr]
start_date = start_date or "-"
end_date = end_date or "now"
logger.info(f"{collection} - {collection.title}")
logger.info(f"Date: {start_date} - {end_date}")
logger.info(collection.abstract)
logger.info(f'Licence: {"; ".join(collection.metadata["properties"].get("rights", "-"))}')
logger.info("Search options:")
for option in collection.search_options.items():
extra_pad = "\t" if len(option[0]) < 8 else ""
option_str = f"{option[0]}\t{extra_pad} - {option[1]['title']}"
if option[1]["options"] and option[1]["options"][0]:
option_str += f", accepts: {option[1]['options']}"
cli_param = get_cli_parameter(option[0])
if cli_param:
option_str += f", in CLI {cli_param}"
logger.info(option_str)
elif args.collection is None and args.product is not None:
raise ValueError("Please provide a collection id and a product id")
else:
noneLabel: str = "(Not available for product)"
product = datastore.get_product(args.collection, args.product)
attributes = {
"Platform": product.satellite,
"Instrument": product.instrument,
"Acronym": noneLabel if (not product.acronym) else f"{product.acronym}",
"Orbit": "GEO" if (not product.orbit_is_LEO) else "LEO",
"Sensing Start": (
noneLabel
if (not product.sensing_start)
else f"{product.sensing_start.isoformat(timespec='milliseconds')}Z"
),
"Sensing End": (
noneLabel
if (not product.sensing_end)
else f"{product.sensing_end.isoformat(timespec='milliseconds')}Z"
),
"Size": f"{product.size} KB",
"Published": (
noneLabel
if (not product.ingested)
else f"{product.ingested.isoformat(timespec='milliseconds')}Z"
),
"MD5": noneLabel if (not product.md5) else product.md5,
}
lines = [f"{product.collection} - {product}"] + [
f"{key}: {value}" for key, value in attributes.items()
]
logger.info("\n".join(lines))
## Add additional attributes for LEO products
if product.orbit_is_LEO:
LEO_attributes = {
"Timeliness": product.timeliness,
"Orbit Number": product.orbit_number,
"Orbit Direction": product.orbit_direction,
"Relative Orbit": product.relative_orbit,
"Cycle Number": product.cycle_number,
}
lines = [f"{key}: {value}" for key, value in LEO_attributes.items() if value]
logger.info("\n".join(lines))
## Add additional attributes for MTG products
if product.is_mtg:
MTG_attributes = {
"Coverage": (
noneLabel if (not product.region_coverage) else f"{product.region_coverage}"
),
"Sub-Region": (
noneLabel
if (not product.subregion_identifier)
else f"{product.subregion_identifier}"
),
"Repeat Cycle": (
noneLabel if (not product.repeat_cycle) else f"{product.repeat_cycle}"
),
}
lines = [f"{key}: {value}" for key, value in MTG_attributes.items() if value]
logger.info("\n".join(lines))
if args.verbose:
verbose_attributes = {
"Processing Time": (
noneLabel if (not product.processingTime) else f"{product.processingTime}"
),
"Processor Version": (
noneLabel if (not product.processorVersion) else f"{product.processorVersion}"
),
"Format": noneLabel if (not product.format) else f"{product.format}",
"Quality Status": (
noneLabel if (not product.qualityStatus) else f"{product.qualityStatus}"
),
}
lines = [f"{key}: {value}" for key, value in verbose_attributes.items() if value]
logger.info("\n".join(lines))
if product.entries:
entries: list[str] = []
if args.flat:
entries = sorted(product.entries)
else:
entries = get_product_entries_tree(product.entries)
lines = ["SIP Entries:"] + [f" {filenames}" for filenames in entries]
logger.info("\n".join(lines))
def get_product_entries_tree(entries: Iterable[str]) -> list[str]:
output: list[str] = []
groups: dict[str, list[str]] = {}
for entry in sorted(entries):
if entry.find("/") < 0:
groups[entry] = []
else:
members = entry.split("/", 1)
if members[0] not in groups:
groups[members[0]] = [members[1]]
else:
groups[members[0]].append(members[1])
for group in groups:
is_group: bool = bool(groups[group])
output.append(f"{'+' if is_group else '-'} {group}{('/' if is_group else '')}")
if is_group:
for child in sorted(groups[group]):
output.append(f" - {child}")
return output
def get_cli_parameter(option: str) -> str:
params = {
"bbox": "--bbox",
"geo": "--geometry",
"title": "--filename",
"sat": "--satellite",
"dtstart": "-s, --start",
"dtend": "-e, --end",
"publication": "--publication-after, --publication-before",
"sort": "--sort, --asc, --desc",
"type": "--product-type, --acronym",
"timeliness": "--timeliness",
"orbit": "--orbit",
"relorbit": "--relorbit",
"cycle": "--cycle",
}
if option in params:
return params[option]
else:
return ""
class ProductIterables:
"""Helper class to manage the length of one or more SearchResults which are iterators"""
def __init__(
self,
query_results: list[SearchResults],
limit: Optional[int],
search_query: Dict[str, str],
) -> None:
self.query_results = query_results
self.search_query = search_query
self.limit = limit
def __len__(self) -> int:
result_lengths = sum(len(pq) for pq in self.query_results)
if self.limit:
return min(self.limit, result_lengths)
return result_lengths
def __iter__(self) -> Iterator[Product]:
chained_it = itertools.chain(*self.query_results)
if self.limit:
return itertools.islice(chained_it, self.limit)
return chained_it
def __contains__(self, item: object) -> bool:
raise NotImplementedError()
def _get_args_search_params(args: argparse.Namespace) -> list[str]:
search_params_in_args = []
vargs = vars(args)
for param in [
"dtstart",
"dtend",
"time_range",
"publication_after",
"publication_before",
"sort",
"bbox",
"geo",
"sat",
"sort",
"cycle",
"orbit",
"relorbit",
"title",
"timeliness",
]:
if param in vargs and vargs[param]:
search_params_in_args.append(param)
return search_params_in_args
def _get_query_paging_params(query: str) -> list[str]:
return [
member
for member in query.split("&")
if member.split("=")[0] in ["format", "si", "c", "id", "pw"]
]
def _search(args: argparse.Namespace) -> Tuple[Collection[Product], int, str]:
"""given search query arguments will return the list of matching products"""
datastore = get_datastore(args, anonymous_allowed=True)
query_results = []
products: Collection[Product]
num_products: int
if args.query:
extra_search_params = _get_args_search_params(args)
if extra_search_params:
logger.warning(
f"The following search parameters have been ignored in favour of the opensearch query: {', '.join(extra_search_params)}"
)
paging_params = _get_query_paging_params(args.query[0])
if paging_params:
logger.warning(
f"The following opensearch terms have been ignored: {', '.join(paging_params)}"
)
search_results = datastore.opensearch(args.query[0])
collection_id = str(search_results.collection)
query_results.append(search_results)
products = ProductIterables(query_results, args.limit, search_results.query)
# Check the number of products to execute the search
num_products = len(products)
else:
# See https://docs.opengeospatial.org/is/13-026r9/13-026r9.html#20 for the mathematical notation expected by the publication filter
if args.publication_after and args.publication_before:
publication = f"[{args.publication_after.isoformat(timespec='milliseconds')}Z,{args.publication_before.isoformat(timespec='milliseconds')}Z]"
elif args.publication_after:
publication = f"[{args.publication_after.isoformat(timespec='milliseconds')}Z"
elif args.publication_before:
publication = f"{args.publication_before.isoformat(timespec='milliseconds')}Z]"
else:
publication = None
sort_query = None
if args.sort or args.asc or args.desc:
if args.sort == "ingestion":
sort_prefix = "publicationDate,,"
else: # default to sensing time sorting
sort_prefix = "start,time,"
if not args.sort:
logger.warn(
"Sorting by sensing time by default, use --sort {sensing, ingestion} to remove this warning."
)
direction = 1
if args.desc:
direction = 0
if args.asc:
direction = 1
sort_query = f"{sort_prefix}{direction}"
_query = {
"dtstart": args.dtstart,
"dtend": args.dtend,
"publication": publication,
"bbox": args.bbox,
"geo": args.geo,
"sat": args.sat,
"sort": sort_query,
"cycle": args.cycle,
"orbit": args.orbit,
"relorbit": args.relorbit,
"title": args.filename,
"timeliness": args.timeliness,
"type": args.product_type,
}
query = {key: value for key, value in _query.items() if value is not None}
bbox = query.pop("bbox", None)
if bbox is not None:
query["bbox"] = ",".join(map(str, bbox))
# Use the set=brief parameter to get results faster
query["set"] = "brief"
products = []
num_products = 0
for collection_id in args.collection:
try:
collection = datastore.get_collection(collection_id)
query_results.append(collection.search(**query))
products = ProductIterables(query_results, args.limit, query)
# Check the number of products to execute the search
num_products = len(products)
except Exception as err:
logger.debug(f"Search failed, checking if collection id {collection_id} is valid")
datastore.check_collection_id(collection_id)
raise
return products, num_products, collection_id
def _parse_timerange(args: argparse.Namespace) -> Tuple[datetime, datetime]:
"""
Parses the time range provided as arguments.
This function receives the parsed command-line arguments as an argparse.Namespace object.
The function checks if the `--time-range` argument is used, and if so, it parses the start
and end times from the provided time range. The start time defaults to the beginning of the day
and the end time defaults to the end of the day if specific times are not provided.
If the `--time-range` argument is not used, the function uses the `--start` (`dtstart`) and
`--end` (`dtend`) arguments instead. If `--time-range` is used in combination with
`--start` or `--end`, a ValueError is raised.
Parameters:
args (argparse.Namespace): The parsed command-line arguments.
Returns:
tuple: A tuple of two datetime objects representing the start and end of the time range.
Raises:
ValueError: If both --time-range and --start/--end are used.
"""
if args.time_range and (args.dtstart or args.dtend):
raise ValueError("You can't combine --time-range and --start/--end.")
if args.time_range:
start, end = args.time_range
start = parse_isoformat_beginning_of_day_default(start)
end = parse_isoformat_end_of_day_default(end)
else:
start = args.dtstart
end = args.dtend
return start, end
def search(args: argparse.Namespace) -> None:
"""eumdac search entrypoint"""
products_query, products_count, _ = _search(args)
limit = args.limit or 10000
products = itertools.islice(products_query, limit)
if products_count < 1:
logger.error(f"No products were found for the given search parameters")
return
if products_count > limit:
# show a warning through stderr only when more than 10000
# products would be shown and limit keyword is not used.
logger.warning(f"By default, only 10000 of {products_count} products are displayed.")
logger.warning("Please use --limit to increase the number of products if necessary.")
if products_count > 10000:
logger.error(
"Notice: EUMETSATs DataStore APIs allow a maximum of 10.000 items in a single request. If more than 10.000 items are needed, please split your requests."
)
if args.daily_window:
daily_window_start: datetime = parse_time_str(args.daily_window[0])
daily_window_end: datetime = parse_time_str(args.daily_window[1])
if daily_window_start > daily_window_end:
raise ValueError(
f"The daily window start time must be earlier than the end time. Please review the provided window: {datetime.strftime(daily_window_start, '%H:%M:%S')} - {datetime.strftime(daily_window_end, '%H:%M:%S')}"
)
logger.warning(
f"The search found {products_count} products, but only those within the daily time window are returned: {datetime.strftime(daily_window_start, '%H:%M:%S')} - {datetime.strftime(daily_window_end, '%H:%M:%S')}"
)
CRLF = "\r\n"
for product in products:
if not args.daily_window or (
product.sensing_end.time() >= daily_window_start.time()
and product.sensing_start.time() <= daily_window_end.time()
):
logger.info(str(product).replace(CRLF, "-"))
class AngrySigIntHandler:
"""class that will block a SigInt `max_block` times before exiting the program"""
def __init__(self, max_block: int = 3) -> None:
self.max_block = max_block
self.ints_received = 0
def __call__(self, *args: Any) -> None:
self.ints_received += 1
if self.ints_received > self.max_block:
logger.warning("Forced shut down.")
sys.exit(1)
logger.warning(
"Currently shutting down. "
f"Interrupt {self.max_block - self.ints_received + 1} "
"more times to forcefully shutdown."
)
def safe_run(
app: Any,
collection: Optional[str] = None,
num_products: int = -1,
keep_order: bool = False,
) -> bool:
"""wrapper around app.run() for exception handling and logging"""
if num_products < 0:
num_products = len(list(app.order.iter_product_info()))
plural = "" if num_products == 1 else "s"
logger.info(f"Processing {num_products} product{plural}.")
(chain,) = app.order.get_dict_entries("chain")
if chain:
plural = "" if num_products == 1 else "s"
logger.info(f"Product{plural} will be customized with the following parameters:")
for line in yaml.dump(chain).splitlines():
logger.info(f" {line}")
logger.info(f"Using order: {app.order}")
try:
success = app.run()
if not keep_order and app.order.status() == "DONE":
logger.info(f"Removing successfully finished order {app.order}")
app.order.delete()
return success
except KeyboardInterrupt:
signal.signal(signal.SIGINT, AngrySigIntHandler())
logger.info("\nReceived request to shut down.")
logger.info("Finishing threads... (this may take a while)")
app.shutdown()
logger.info("Resume this order with the following command:")
logger.info(f"$ eumdac order resume {app.order}")
raise
except ProductError:
if collection:
app.datastore.check_collection_id(collection)
raise
else:
raise
except Exception as e:
logger.critical(f"Unexpected exception: {str(e)}")
raise
def download(args: argparse.Namespace) -> None:
"""eumdac download entrypoint"""
datastore = get_datastore(args)
products: Collection[Product]
collection: str
if args.query:
# Search using a query
products, products_count, collection = _search(args)
else:
# Search using CLI parameters or product
if not args.collection or len(args.collection) > 1:
raise ValueError("Please provide a (single) collection.")
if args.product:
if args.dtstart or args.dtend:
logger.warning(
"Parameter(s) for filtering using sensing time ignored as specific product ID was given."
)
if args.publication_after or args.publication_before:
logger.warning(
"Parameter(s) for filtering using sensing time ignored as specific product ID was given."
)
if args.bbox or args.geo:
logger.warning(
"Parameter(s) for filtering using spatial geometry ignored as specific product ID was given."
)
if args.sat:
logger.warning(
"Parameter for filtering using satellite/platform ignored as specific product ID was given."
)
if args.product_type:
logger.warning(
"Parameter for filtering using product type/acronym ignored as specific product ID was given."
)
if args.cycle or args.orbit or args.relorbit:
logger.warning(
"Parameter(s) for filtering using acquisition parameters ignored as specific product ID was given."
)
if args.filename:
logger.warning(
"Parameter for filtering using filename/title ignored as specific product ID was given."
)
if args.timeliness:
logger.warning(
"Parameter for filtering using timeliness ignored as specific product ID was given."
)
collection = args.collection[0]
if args.product:
products = []
for pid in args.product:
pid = pid.strip()
if pid:
products.append(datastore.get_product(collection, pid))
products_count = len(products)
else:
products, products_count, _ = _search(args)
if args.integrity:
if args.download_coverage:
logger.warn("Ignoring --integrity flag as --download-coverage was provided.")
args.integrity = False
elif args.entry:
logger.warn("Ignoring --integrity flag as --entry was provided.")
args.integrity = False
if not args.product and products_count > 10000:
logger.info(f"Processing 10000 out of the total {products_count} products.")
products = itertools.islice(products, 10000) # type: ignore
products_count = 10000
logger.error(
"Notice: EUMETSATs DataStore APIs allow a maximum of 10.000 items in a single request. If more than 10.000 items are needed, please split your requests."
)
else:
plural = "" if products_count == 1 else "s"
logger.info(f"Processing {products_count} product{plural}.")
if args.daily_window:
daily_window_start: datetime = parse_time_str(args.daily_window[0])
daily_window_end: datetime = parse_time_str(args.daily_window[1])
if daily_window_start > daily_window_end:
raise ValueError(
f"The daily window start time must be earlier than the end time. Please review the provided window: {datetime.strftime(daily_window_start, '%H:%M:%S')} - {datetime.strftime(daily_window_end, '%H:%M:%S')}"
)
logger.info(
f"Filtering products by daily search window: {datetime.strftime(daily_window_start, '%H:%M:%S')} - {datetime.strftime(daily_window_end, '%H:%M:%S')}"
)
filtered_products = []
for product in products:
if (
product.sensing_end.time() >= daily_window_start.time()
and product.sensing_start.time() <= daily_window_end.time()
):
filtered_products.append(product)
products = filtered_products
total_count = products_count
products_count = len(products)
logger.info(
f"From the {total_count} products found, only {products_count} sensed within the daily time window will be downloaded."
)
if products_count >= 10 and not args.yes:
user_in = input("Do you want to continue (Y/n)? ")
if user_in.lower() == "n":
return
order = Order()
try:
query = products.search_query # type: ignore
except AttributeError:
query = None
if args.download_coverage:
# Check that a valid, pdu-based collection has been provided (MTG FCI 1C)
if not is_collection_valid_for_coverage(collection):
logger.error(f"Collection {collection} does not support coverage area downloads.")
logger.error(
f"Remove coverage: {args.download_coverage} parameter or provide a different collection."
)
return
# Complain about entry being provided with coverage
if args.entry:
logger.warn(
f"The provided --entry values {args.entry} will be discarded in favour of the coverage parameter."
)
# Prepare multi-entry considering coverage
args.entry, expected = build_entries_from_coverage(args.download_coverage)
# Check first if all the chunks are in the product
if args.entry:
for product in products:
matches = []
for pattern in args.entry:
matches.extend(fnmatch.filter(product.entries, pattern))
logger.info(f"{len(matches)} entries will be downloaded for {product}")
if args.verbose:
logger.info(
"\n".join([f" - {pretty_print_entry(match)}" for match in sorted(matches)])
)
if len(matches) < expected:
logger.warn(
f"Warning: not all the expected chunks could be found: found {len(matches)} out of {expected}"
)
if args.chain:
datatailor = get_datatailor(args, datastore.token)
chain = parse_arguments_chain(args.chain, datatailor)
order.initialize(
chain,
products,
Path(args.output_dir),
args.entry,
query,
args.dirs,
args.onedir,
args.no_warning_logs,
)
app: Any = TailorApp(order, datastore, datatailor)
else:
order.initialize(
None,
products,
Path(args.output_dir),
args.entry,
query,
args.dirs,
args.onedir,
args.no_warning_logs,
)
app = DownloadApp(
order,
datastore,
integrity=args.integrity,
download_threads=args.download_threads,
chunk_size=parse_size(args.chunk_size) if args.chunk_size else None,
)
if args.dirs:
logger.warn("A subdirectory per product will be created, as per the --dirs option")
if args.onedir:
logger.warn("Subdirectories per product will not be created, as per the --onedir option")
success = safe_run(
app,
collection=collection,
num_products=products_count,
keep_order=args.keep_order,
)
if not success:
raise EumdacError("Downloads didn't finish successfully")
def download_cart(args: argparse.Namespace) -> None:
cart_filename = args.file
datastore = get_datastore(args)
products = []
try:
from xml.dom.minidom import parse
cart_dom = parse(cart_filename)
urls = cart_dom.getElementsByTagName("url")
for u in urls:
product: Product = datastore.get_product_from_url(u.firstChild.data) # type: ignore
products.append(product)
except eumdac.datastore.DataStoreError:
raise
except Exception as e:
logger.error(f"Cart XML file could not be read due to {e}")
sys.exit(1)
products_count = len(products)
plural = "" if products_count == 1 else "s"
logger.info(f"Processing {products_count} product{plural}.")
if products_count >= 10 and not args.yes:
user_in = input("Do you want to continue (Y/n)? ")
if user_in.lower() == "n":
return
order = Order()
order.initialize(
None,
products,
Path(args.output_dir),
None,
None,
args.dirs,
False,
False,
)
app = DownloadApp(order, datastore, integrity=args.integrity)
if args.dirs:
logger.warn("A subdirectory per product will be created, as per the --dirs option")
success = safe_run(
app, collection=None, num_products=products_count, keep_order=args.keep_order
)
if not success:
raise EumdacError("Downloads didn't finish successfully")
def parse_arguments_chain(args_chain: str, datatailor: Any) -> Chain:
chain_config = args_chain
if chain_config.endswith(".yml") or chain_config.endswith(".yaml"):
with open(chain_config, "r") as file:
try:
return Chain(**yaml.safe_load(file))
except:
logger.error("YAML file is corrupted. Please, check the YAML syntax.")
sys.exit()
else:
chain_config = chain_config.strip()
if chain_config.find(" ") < 0:
# Assume chain name is being provided
chain_name = chain_config
logger.info(f"Using chain name: {chain_name}")
return datatailor.chains.read(chain_name)
else:
if not chain_config.startswith("{"):
chain_config = "{" + chain_config + "}"
try:
return Chain(**yaml.safe_load(chain_config))
except:
logger.error("YAML string is corrupted. Please, check the YAML syntax.")
sys.exit()
def order(args: argparse.Namespace) -> None:
"""eumdac order entrypoint"""
if args.order_command == "list":
filenames = list(all_order_filenames(get_default_order_dir()))
logger.info(f"Found {len(filenames)} order(s):")
table_printer = gen_table_printer(
logger.info,
[
("Order ID", 15),
("Created on", 10),
("Products", 8),
("Tailor", 6),
("Status", 15),
("Collection", 28),
],
column_sep=" ",
)
for filename in filenames:
try:
order = Order(filename)
with order.dict_from_file() as order_d:
table_printer(
[
filename.stem, # order_id
filename.stem.split("#")[0], # created
str(len(order_d["products_to_process"])), # products
"Yes" if order_d["type"] == "tailor" else "No", # tailor
order.status(), # status
", ".join(order.collections()), # collection
]
)
except (EumdacError, KeyError, yaml.scanner.ScannerError):
logger.error(f"{filename.stem} is corrupted.")
return
order_name = args.order_id
order = resolve_order(get_default_order_dir(), order_name)
if args.order_command == "status":
logger.info(order.pretty_string(print_products=args.verbose))
if not args.verbose:
logger.info("")
logger.info("Use the -v flag to see more details")
return
if args.order_command == "restart":
order.reset_states()
if args.order_command == "delete":
if args.all:
filenames = list(all_order_filenames(get_default_order_dir()))
logger.info(f"Deleting {len(filenames)} order(s):")
for filename in filenames:
try:
order = Order(filename)
order.delete()
logger.info(f"Order {order} successfully deleted.")
except Exception as err:
logger.error(f"Unable to delete order {order} due to: {err}")
elif order._order_file.is_file():
delete = True
if not args.yes:
user_in = input(f"Are you sure to delete order {order_name} (Y/n)?")
delete = not (user_in.lower() == "n")
if delete:
try:
order.delete()
logger.info(f"Order {order_name} successfully deleted.")
except:
logger.warning(f"Order {order_name} can't be deleted.")
else:
logger.info(f"Order {order_name} wasn't deleted.")
else:
logger.info(f"Order {order_name} doesn't exist.")
sys.exit(1)
if not order._order_file.is_file():
logger.info(f"Order {order_name} doesn't exist.")
sys.exit(1)
(typ,) = order.get_dict_entries("type")
if typ == "download":
if args.integrity and order.get_dict_entries("file_patterns")[0]:
logger.warn("Ignoring --integrity flag as Order is configured to download entries.")
args.integrity = False
app: Any = DownloadApp(
order,
get_datastore(args),
integrity=args.integrity,
download_threads=args.download_threads,
chunk_size=parse_size(args.chunk_size) if args.chunk_size else None,
)
elif typ == "tailor":
if order.all_done():
logger.info("Order already completed")
return
datastore = get_datastore(args)
app = TailorApp(order, datastore, get_datatailor(args, datastore.token))
else:
raise Exception(f"Unknown Order Type: {typ}")
success = safe_run(app, keep_order=args.keep_order)
if not success:
raise EumdacError("Process didn't finish successfully")
def local_tailor(args: argparse.Namespace) -> None:
"""eumdac config entrypoint"""
if args.local_tailor_command == "set":
old_url = ""
try:
try:
old_url = get_api_url(get_tailor_path(args.localtailor_id[0]))
except:
pass
local_tailor_config_path = new_local_tailor(
args.localtailor_id[0], args.localtailor_url[0]
)
logger.info(
f"Local tailor instance {get_tailor_id(local_tailor_config_path)} is configured with the following address: {get_api_url(local_tailor_config_path)}"
)
if old_url:
logger.warning(
f"This replaces the previous address for {get_tailor_id(local_tailor_config_path)}: {old_url}"
)
if not is_online(local_tailor_config_path):
logger.warning(
"Note that the provided local-tailor instance address is unavailable at the moment"
)
except EumdacError as e:
logger.error(
f"The provided address {args.localtailor_url[0]} appears to be invalid: {e}"
)
# Don't remove existing instances
if not old_url:
remove_local_tailor(args.localtailor_id[0])
elif args.local_tailor_command == "remove":
try:
local_tailor_config_path = get_tailor_path(args.localtailor_id[0])
logger.info(
f"Local tailor instance {get_tailor_id(local_tailor_config_path)} is removed"
)
remove_local_tailor(args.localtailor_id[0])
except EumdacError as e:
logger.error(f"Could not remove local tailor instance: {e}")
elif args.local_tailor_command == "show":
table_printer = gen_table_printer(logger.info, [("Name", 10), ("URL", 40), ("Status", 8)])
local_tailor_config_path = get_tailor_path(args.localtailor_id[0])
table_printer(
[
get_tailor_id(local_tailor_config_path),
get_api_url(local_tailor_config_path),
"ONLINE" if is_online(local_tailor_config_path) else "OFFLINE",
]
)
elif args.local_tailor_command == "instances":
table_printer = gen_table_printer(logger.info, [("Name", 10), ("URL", 40), ("Status", 8)])
for filepath in all_url_filenames():
if filepath.exists():
line = [
get_tailor_id(filepath),
get_api_url(filepath),
"ONLINE" if is_online(filepath) else "OFFLINE",
]
table_printer(line)
else:
raise EumdacError(f"Unsupported clear command: {args.local_tailor_command}")
def get_datastore(args: argparse.Namespace, anonymous_allowed: bool = False) -> Any:
"""get an instance of DataStore"""
if args.test:
return FakeDataStore()
try:
creds = load_credentials()
except CredentialsFileNotFoundError as exc:
if anonymous_allowed:
creds = None
else:
raise EumdacError("No credentials found! Please set credentials!") from exc
if creds is None:
token: Any = AnonymousAccessToken()
else:
token = AccessToken(creds)
return DataStore(token)
def get_datatailor(args: argparse.Namespace, token: Optional[AccessToken] = None) -> Any:
"""get an instance of DataTailor"""
if args.test:
logger.info("Using Fake DataTailor instance")
return FakeDataTailor()
if args.local_tailor:
logger.info(f"Using Data Tailor Standalone instance: {args.local_tailor}")
return get_local_tailor(args.local_tailor)
if not token:
try:
creds = load_credentials()
except CredentialsFileNotFoundError as exc:
raise EumdacError("No credentials found! Please set credentials!") from exc
token = AccessToken(creds)
logger.info("Using Data Tailor Web Service")
return DataTailor(token)
def load_credentials() -> Iterable[str]:
"""load the credentials and do error handling"""
credentials_path = get_credentials_path()
try:
content = credentials_path.read_text()
except FileNotFoundError as exc:
raise CredentialsFileNotFoundError(str(credentials_path)) from exc
match = re.match(r"(\w+),(\w+)$", content)
if match is None:
raise EumdacError(f'Corrupted file "{credentials_path}"! Please reset credentials!')
return match.groups()
def tailor_post_job(args: argparse.Namespace) -> None:
"""eumdac tailor post entrypoint"""
from eumdac.tailor_models import Chain
datastore = get_datastore(args)
datatailor = get_datatailor(args, datastore.token)
collection_id = args.collection
product_ids = args.product
if not args.collection or not args.product or not args.chain:
raise ValueError("Please provide collection ID, product ID and a chain file!")
chain = parse_arguments_chain(args.chain, datatailor)
products = [datastore.get_product(collection_id, product_id) for product_id in product_ids]
try:
customisation = datatailor.new_customisations(products, chain=chain)
jobidsToStr = "\n".join([str(jobid) for jobid in customisation])
logger.info("Customisation(s) has been started.")
logger.info(jobidsToStr)
except requests.exceptions.HTTPError as exception:
messages = {
400: "Collection ID and/or Product ID does not seem to be a valid. See below:",
500: "There was an issue on server side. See below:",
0: "An error occurred. See below:",
-1: "An unexpected error has occurred.",
}
report_request_error(exception.response, None, messages=messages)
def tailor_list_customisations(args: argparse.Namespace) -> None:
"""eumdac tailor list entrypoint"""
datatailor = get_datatailor(args)
try:
customisations = datatailor.customisations
if not customisations:
logger.error("No customisations available")
else:
table_printer = gen_table_printer(
logger.info,
[("Job ID", 10), ("Status", 8), ("Product", 10), ("Creation Time", 20)],
)
for customisation in datatailor.customisations:
line = [
str(customisation),
customisation.status,
customisation.product_type,
str(customisation.creation_time),
]
table_printer(line)
except requests.exceptions.HTTPError as exception:
report_request_error(exception.response)
def tailor_show_status(args: argparse.Namespace) -> None:
"""eumdac tailor status entrypoint"""
datatailor = get_datatailor(args)
if args.verbose:
table_printer = gen_table_printer(
logger.info,
[("Job ID", 10), ("Status", 8), ("Product", 10), ("Creation Time", 20)],
)
for customisation_id in args.job_ids:
try:
customisation = datatailor.get_customisation(customisation_id)
line = [
str(customisation),
customisation.status,
customisation.product_type,
str(customisation.creation_time),
]
table_printer(line)
except requests.exceptions.HTTPError as exception:
report_request_error(exception.response, customisation_id)
else:
for customisation_id in args.job_ids:
try:
customisation = datatailor.get_customisation(customisation_id)
logger.info(customisation.status)
except requests.exceptions.HTTPError as exception:
report_request_error(exception.response, customisation_id)
def tailor_get_log(args: argparse.Namespace) -> None:
"""eumdac tailor log entrypoint"""
datatailor = get_datatailor(args)
try:
customisation = datatailor.get_customisation(args.job_id)
logger.info(customisation.logfile)
except requests.exceptions.HTTPError as exception:
report_request_error(exception.response, args.job_id)
def tailor_quota(args: argparse.Namespace) -> None:
"""eumdac tailor quota entrypoint"""
datatailor = get_datatailor(args)
user_name = datatailor.user_info["username"]
quota_info = datatailor.quota["data"][user_name]
is_quota_active = quota_info["disk_quota_active"]
logger.info(f"Usage: {round(quota_info['space_usage'] / 1024, 1)} Gb")
if is_quota_active:
logger.info(f"Percentage: {round(quota_info['space_usage_percentage'], 1)}%")
if args.verbose:
logger.info(f"Available: {round(quota_info['user_quota'] / 1024, 1)} Gb")
else:
logger.info("No quota limit set in the system")
if args.verbose:
logger.info(f"Workspace usage: {round(quota_info['workspace_dir_size'] / 1024, 1)} Gb")
logger.info(f"Logs space usage: {round(quota_info['log_dir_size'], 3)} Mb")
logger.info(f"Output usage: {round(quota_info['output_dir_size'], 1)} Mb")
logger.info(f"Jobs: {quota_info['nr_customisations']}")
def tailor_delete_jobs(args: argparse.Namespace) -> None:
"""eumdac tailor delete entrypoint"""
datatailor = get_datatailor(args)
for customisation_id in args.job_ids:
customisation = datatailor.get_customisation(customisation_id)
try:
customisation.delete()
logger.info(f"Customisation {customisation_id} has been deleted.")
except requests.exceptions.HTTPError as exception:
if exception.response.status_code >= 400:
report_request_error(exception.response, customisation_id)
def tailor_cancel_jobs(args: argparse.Namespace) -> None:
"""eumdac tailor cancel entrypoint"""
datatailor = get_datatailor(args)
for customisation_id in args.job_ids:
customisation = datatailor.get_customisation(customisation_id)
try:
customisation.kill()
logger.info(f"Customisation {customisation_id} has been cancelled.")
except requests.exceptions.HTTPError as exception:
messages = {
400: f"{customisation_id} is already cancelled or job id is invalid. See below:",
500: "There was an issue on server side. See below:",
0: "An error occurred. See below:",
-1: "An unexpected error has occurred.",
}
report_request_error(exception.response, None, messages=messages)
def tailor_clear_jobs(args: argparse.Namespace) -> None:
"""eumdac tailor clear entrypoint"""
datatailor = get_datatailor(args)
jobs_to_clean = args.job_ids
if args.all and len(args.job_ids) > 0:
logger.info(
"All flag provided. Ignoring the provided customization IDs and clearing all jobs"
)
if args.all:
# Fetch all job ids
jobs_to_clean = datatailor.customisations
for customisation in jobs_to_clean:
# If we are provided a job id, get the customisation
if isinstance(customisation, str):
customisation_id = customisation
customisation = datatailor.get_customisation(customisation)
else:
customisation_id = customisation._id
try:
if (
customisation.status == "QUEUED"
or customisation.status == "RUNNING"
or customisation.status == "INACTIVE"
):
customisation.kill()
logger.info(f"Customisation {customisation_id} has been cancelled.")
except requests.exceptions.HTTPError as exception:
messages = {
400: f"{customisation_id} is already cancelled or job id is invalid. See below:",
500: "There was an issue on server side. See below:",
0: "An error occurred. See below:",
-1: "An unexpected error has occurred.",
}
report_request_error(exception.response, None, messages=messages)
try:
customisation.delete()
logger.info(f"Customisation {customisation_id} has been deleted.")
except requests.exceptions.HTTPError as exception:
report_request_error(exception.response, customisation_id)
def tailor_download(args: argparse.Namespace) -> None:
"""eumdac tailor download entrypoint"""
creds = load_credentials()
token = AccessToken(creds)
customisation = eumdac.datatailor.Customisation(args.job_id, datatailor=DataTailor(token))
results: Iterable[str] = customisation.outputs
logger.info(f"Output directory: {os.path.abspath(args.output_dir)}")
if not os.path.exists(args.output_dir):
logger.info(f"Output directory {args.output_dir} does not exist. It will be created.")
os.makedirs(args.output_dir)
# Download all the output files into the output path
logger.info(f"Downloading {len(results)} output products") # type: ignore
for result in results:
product_name = os.path.basename(result)
logger.info("Downloading " + product_name)
with tempfile.TemporaryDirectory(dir=args.output_dir, suffix=".tmp") as tempdir:
tmp_prod_p = Path(tempdir) / str(product_name)
with tmp_prod_p.open("wb") as tmp_prod:
with customisation.stream_output_iter_content(result) as chunks:
for chunk in chunks:
tmp_prod.write(chunk)
shutil.move(str(tmp_prod_p), str(args.output_dir) + "/" + product_name)
logger.info(f"{product_name} has been downloaded.")
def report_request_error(
response: requests.Response,
cust_id: Optional[str] = None,
messages: Optional[Dict[int, str]] = None,
) -> None:
"""helper function report requests errors to the user"""
if messages is not None:
_messages = messages
else:
_messages = {
400: "There was an issue on client side. See below:",
500: "There was an issue on server side. See below:",
0: "An error occurred. See below:",
-1: "An unexpected error has occurred.",
}
if cust_id is not None:
_messages[400] = f"{cust_id} does not seem to be a valid job id. See below:"
def _message_func(status_code: Optional[int] = None) -> str:
try:
if not status_code:
return _messages[-1]
if 400 <= status_code < 500:
return _messages[400]
elif status_code >= 500:
return _messages[500]
return _messages[0]
except KeyError:
return "Error description not found"
return "Unexpected error"
message = _message_func(response.status_code)
logger.error(message)
logger.error(f"{response.status_code} - {response.text}")
class HelpAction(argparse.Action):
"""eumdac tailor/search/download/order -h entrypoint"""
def __call__(self, parser: argparse.ArgumentParser, *args: Any, **kwargs: Any) -> None:
# Print the help if the command has 2 args,
# meaning it's just $ eumdac tailor
if len(sys.argv) == 2:
parser.print_help()
parser.exit()
def parse_isoformat(input_string: str, time_default: str = "start") -> datetime:
"""helper function to provide a user readable message when argparse encounters
a wrongly formatted date"""
time_defaults = {
"start": "00:00:00",
"end": "23:59:59",
}
try:
_default_time = time_defaults[time_default]
except KeyError as exc:
raise ValueError(f"Unexpected time_default: '{time_default}'") from exc
if "T" not in input_string:
input_string += f"T{_default_time}"
if time_default == "end":
logger.warning(f"As no time was given for end date, it was set to {input_string}.")
try:
return datetime.fromisoformat(input_string)
except ValueError as exc:
raise argparse.ArgumentTypeError(
"The format of the provided date was not recognized."
"Expecting YYYY-MM-DD[THH[:MM[:SS]]]"
) from exc
def parse_isoformat_beginning_of_day_default(input_string: str) -> datetime:
"""helper function to provide to parse start dates"""
return parse_isoformat(input_string, time_default="start")
def parse_isoformat_end_of_day_default(input_string: str) -> datetime:
"""helper function to provide to parse end dates"""
return parse_isoformat(input_string, time_default="end")
def parse_time_str(input_string: str) -> datetime:
"""helper function to parse time with optional minutes and seconds: HH[:MM[:SS]]"""
if len(input_string) == 2:
input_string += ":00:00"
elif len(input_string) == 5:
input_string += ":00"
return datetime.strptime(input_string, "%H:%M:%S")
def get_piped_args() -> str:
"""
Attempt to read from standard input (stdin) and return the contents as a string.
This function is designed to handle being executed in a variety of environments,
including being called with 'nohup', in which case stdin may not be accessible.
In such a scenario, it will log a warning and return an empty string.
:return: A string containing the data read from stdin, or an empty string if stdin
is not accessible (for example, when the script is executed with 'nohup').
"""
try:
return sys.stdin.read()
except OSError:
logger.warning(
"Received OSError when trying to read stdin."
"This is expected when executed with nohup."
)
return ""
def cli(command_line: Optional[Sequence[str]] = None) -> None:
"""eumdac CLI entrypoint"""
init_logger("INFO")
# Change referer to mark CLI usage
eumdac.common.headers["referer"] = "EUMDAC.CLI"
# append piped args
if not sys.stdin.isatty():
pipe_args = get_piped_args()
if pipe_args:
sys.argv.extend(shlex.split(pipe_args))
if command_line is not None:
# when we are called directly (e.g. by tests) then mimic a call from
# commandline by setting sys.argv accordingly
sys.argv = ["eumdac"] + list(command_line)
# support type for argparse positive int
def positive_int(value: str) -> int:
if int(value) <= 0:
raise argparse.ArgumentTypeError(f"{value} is an invalid positive integer")
return int(value)
# main parser
parser = argparse.ArgumentParser(description=__doc__, fromfile_prefix_chars="@")
parser.add_argument(
"-v",
"--verbose",
action="count",
default=0,
help="increase output verbosity (can be provided multiple times)",
)
parser.add_argument("--version", action="version", version=f"%(prog)s {eumdac.__version__}")
parser.add_argument(
"--set-credentials",
nargs=2,
action=SetCredentialsAction,
help=argparse.SUPPRESS,
metavar=("ConsumerKey", "ConsumerSecret"),
dest="credentials",
)
parser.add_argument(
"-y",
"--yes",
help="set any confirmation value to 'yes' automatically",
action="store_true",
)
parser.add_argument(
"--debug",
help="show additional debugging info and traces for errors",
action="store_true",
)
common_parser = argparse.ArgumentParser(add_help=False)
common_parser.add_argument("--test", action="store_true", help=argparse.SUPPRESS)
common_parser.add_argument(
"-v",
"--verbose",
action="count",
default=0,
help="increase output verbosity (can be provided multiple times)",
)
common_parser.add_argument(
"-y",
"--yes",
help="set any confirmation value to 'yes' automatically",
action="store_true",
)
common_parser.add_argument(
"--debug",
help="show additional debugging info and traces for errors",
action="store_true",
)
subparsers = parser.add_subparsers(dest="command")
# credentials parser
parser_credentials = subparsers.add_parser(
"set-credentials",
description="Set authentication parameters for the EUMETSAT APIs, see https://api.eumetsat.int/api-key",
help=("permanently set consumer key and secret, " "see https://api.eumetsat.int/api-key"),
parents=[common_parser],
)
parser_credentials.add_argument("ConsumerKey", help="consumer key")
parser_credentials.add_argument("ConsumerSecret", help="consumer secret")
parser_credentials.set_defaults(func=credentials)
# token parser
parser_token = subparsers.add_parser(
"token",
description="Generate an access token and exit",
help="generate an access token",
epilog="example: %(prog)s",
parents=[common_parser],
)
parser_token.add_argument(
"--val",
"--validity",
help="duration of the token, in seconds, default: 86400 seconds (1 day)",
dest="validity",
type=int,
)
parser_token.add_argument(
"--force",
help="revokes current token and forces the generation of a new one. Warning: this will effect other processes using the same credentials",
action="store_true",
)
parser_token.set_defaults(func=token)
# describe parser
parser_describe = subparsers.add_parser(
"describe",
description="Describe a collection or product, provide no arguments to list all collections",
help="describe a collection or product",
epilog="example: %(prog)s -c EO:EUM:DAT:MSG:HRSEVIRI",
parents=[common_parser],
)
parser_describe.add_argument(
"-f",
"--filter",
help='wildcard filter for collection identifier and name, e.g. "*MSG*"',
dest="filter",
type=str,
)
parser_describe.add_argument(
"-c",
"--collection",
help="id of the collection to describe, e.g. EO:EUM:DAT:MSG:CLM",
metavar="COLLECTION",
)
parser_describe.add_argument(
"-p",
"--product",
help="id of the product to describe, e.g. MSG1-SEVI-MSGCLMK-0100-0100-20040129130000.000000000Z-NA",
metavar="PRODUCT",
)
parser_describe.add_argument(
"--flat",
help="avoid tree view when showing product package contents",
action="store_true",
)
parser_describe.set_defaults(func=describe)
# search parser
search_argument_parser = argparse.ArgumentParser(add_help=False)
query_group = search_argument_parser.add_mutually_exclusive_group(required=True)
query_group.add_argument(
"-q",
"--query",
nargs=1,
help='opensearch query string, e.g. "pi=EO:EUM:DAT:MSG:HRSEVIRI&dtstart=2023-06-21T12:27:42Z&dtend=2023-06-22T12:27:42Z"',
)
query_group.add_argument("-c", "--collection", nargs="+", help="collection id")
search_argument_parser.add_argument(
"-s",
"--start",
type=parse_isoformat_beginning_of_day_default,
help='sensing start date/time in UTC, e.g. "2002-12-21T12:30:15"',
metavar="YYYY-MM-DD[THH[:MM[:SS]]]",
dest="dtstart",
)
search_argument_parser.add_argument(
"-e",
"--end",
type=parse_isoformat_end_of_day_default,
help='sensing end date/time in UTC, e.g. "2002-12-21T12:30:15"',
metavar="YYYY-MM-DD[THH[:MM[:SS]]]",
dest="dtend",
)
search_argument_parser.add_argument(
"--time-range",
nargs=2,
type=str,
help="range of dates in UTC to search by sensing date/time",
metavar="YYYY-MM-DD[THH[:MM[:SS]]]",
)
search_argument_parser.add_argument(
"--publication-after",
type=parse_isoformat_beginning_of_day_default,
help='filter by publication date, products ingested after this UTC date e.g. "2002-12-21T12:30:15"',
metavar="YYYY-MM-DD[THH[:MM[:SS]]]",
)
search_argument_parser.add_argument(
"--publication-before",
type=parse_isoformat_beginning_of_day_default,
help='filter by publication date, products ingested before this UTC date e.g. "2002-12-21T12:30:15"',
metavar="YYYY-MM-DD[THH[:MM[:SS]]]",
)
search_argument_parser.add_argument(
"--daily-window",
nargs=2,
metavar=("HH[:MM[:SS]]", "HH[:MM[:SS]]"),
dest="daily_window",
help="filter by daily time window, e.g. 10:00:00 12:30:00",
default=None,
)
search_argument_parser.add_argument(
"--bbox",
nargs=4,
type=float,
metavar=("W", "S", "E", "N"),
help="filter by bounding box, defined in EPSG:4326 decimal degrees, e.g. 51.69 0.33 0.51 51.69",
)
search_argument_parser.add_argument(
"--geometry",
help='filter by geometry, custom geometry in a EPSG:4326 decimal degrees, e.g. "POLYGON ((10.09 56.09, 10.34 56.09, 10.34 56.19, 10.09 56.09))"',
dest="geo",
)
search_argument_parser.add_argument(
"--cycle",
help="filter by cycle number, must be a positive integer",
dest="cycle",
type=positive_int,
)
search_argument_parser.add_argument(
"--orbit",
help="filter by orbit number, must be a positive integer",
dest="orbit",
type=positive_int,
)
search_argument_parser.add_argument(
"--relorbit",
help="filter by relative orbit number, must be a positive integer",
dest="relorbit",
type=positive_int,
)
search_argument_parser.add_argument(
"--filename",
help='wildcard filter by product identifier, e.g. "*MSG*"',
dest="filename",
type=str,
)
search_argument_parser.add_argument(
"--timeliness",
help="filter by timeliness",
dest="timeliness",
choices=["NT", "NR", "ST"],
)
search_argument_parser.add_argument(
"--product-type",
"--acronym",
help="filter by product type/acronym, e.g. MSG15",
dest="product_type",
type=str,
)
search_argument_parser.add_argument(
"--satellite", help="filter by satellite, e.g. MSG4", dest="sat"
)
search_argument_parser.add_argument(
"--sort",
choices=("ingestion", "sensing"),
help="sort results by ingestion time or sensing time, default: sensing",
)
sorting_direction = search_argument_parser.add_mutually_exclusive_group(required=False)
sorting_direction.add_argument("--asc", action="store_true", help="sort ascending")
sorting_direction.add_argument("--desc", action="store_true", help="sort descending")
search_argument_parser.add_argument(
"--limit", type=positive_int, help="max number of products to return"
)
parser_search = subparsers.add_parser(
"search",
description="Search for products",
help="search for products",
epilog="example: %(prog)s -c EO:EUM:DAT:MSG:CLM -s 2010-03-01 -e 2010-03-15T12:15",
parents=[common_parser, search_argument_parser],
)
parser_search.add_argument(
dest="print_help", nargs=0, action=HelpAction, help=argparse.SUPPRESS
)
parser_search.set_defaults(func=search)
parser_download = subparsers.add_parser(
"download",
help="download products, with optional customisation",
parents=[
common_parser,
search_argument_parser,
], # this inherits collection lists
)
parser_download.add_argument(
"-p", "--product", nargs="*", help="id of the product(s) to download"
)
parser_download.add_argument(
"-o",
"--output-dir",
type=pathlib.Path,
help="path to output directory, default: current directory",
metavar="DIR",
default=pathlib.Path.cwd(),
)
parser_download.add_argument(
"-i",
"--integrity",
action="store_true",
help="verify integrity of downloaded files through their md5, if available",
)
parser_download.add_argument(
"--chunk-size",
help=argparse.SUPPRESS,
)
parser_download.add_argument(
"--entry",
nargs="+",
help="shell-style wildcard pattern(s) to filter product files",
)
parser_download.add_argument(
"--download-coverage",
choices=["FD", "H1", "H2", "T1", "T2", "T3", "Q1", "Q2", "Q3", "Q4"],
help="download only the area matching the provided coverage (only for specific missions)",
)
parser_download.add_argument(
"--chain",
"--tailor",
help="chain id, file, or YAML string for customising the data",
metavar="CHAIN",
)
parser_download.add_argument(
"--local-tailor",
help="id of the instance to use for customisating the data",
metavar="ID",
)
dir_group = parser_download.add_mutually_exclusive_group()
dir_group.add_argument(
"--onedir",
action="store_true",
help="avoid creating a subdirectory for each product",
)
dir_group.add_argument(
"--dirs",
help="download each product into its own individual directory",
action="store_true",
)
parser_download.add_argument(
"-k",
"--keep-order",
action="store_true",
help="keep order file after finishing successfully",
)
parser_download.add_argument(
"--no-warning-logs", help="don't show logs when jobs fail", action="store_true"
)
parser_download.add_argument(
"-t",
"--threads",
type=int,
help="set the number of parallel connections",
default=3,
dest="download_threads",
)
parser_download.add_argument(
"--no-progress-bars", help="don't show the download status bar", action="store_true"
)
parser_download.add_argument(
dest="print_help", nargs=0, action=HelpAction, help=argparse.SUPPRESS
)
parser_download.set_defaults(func=download)
parser_download_cart = subparsers.add_parser(
"download-metalink",
help="download Data Store cart metalink files",
parents=[
common_parser,
],
)
parser_download_cart.add_argument(
"file", help="Data Store cart metalink file to download, i.e. cart-user.xml"
)
parser_download_cart.add_argument(
"-o",
"--output-dir",
type=pathlib.Path,
help="path to output directory, default: current directory",
metavar="DIR",
default=pathlib.Path.cwd(),
)
parser_download_cart.add_argument(
"-i",
"--integrity",
action="store_true",
help="verify integrity of downloaded files through their md5, if available",
)
parser_download_cart.add_argument(
"--dirs",
help="download each product into its own individual directory",
action="store_true",
)
parser_download_cart.add_argument(
"-k",
"--keep-order",
action="store_true",
help="keep order file after finishing successfully",
)
parser_download_cart.add_argument(
"--no-progress-bars",
help="don't show download progress bars",
action="store_true",
)
parser_download_cart.set_defaults(func=download_cart)
# tailor parser
# tailor parser common arguments
tailor_common_parser = argparse.ArgumentParser(add_help=False)
tailor_common_parser.add_argument(
"--local-tailor",
help="id of the instance to use for customisating the data",
metavar="ID",
)
parser_tailor = subparsers.add_parser(
"tailor",
description="Manage Data Tailor customisations",
help="manage Data Tailor resources",
parents=[common_parser],
)
parser_tailor.add_argument(
dest="print_help", nargs=0, action=HelpAction, help=argparse.SUPPRESS
)
tailor_subparsers = parser_tailor.add_subparsers(dest="tailor-command")
tailor_post_parser = tailor_subparsers.add_parser(
"post",
description="Post individual customisation jobs",
help="post a new customisation job",
parents=[common_parser, tailor_common_parser],
)
tailor_post_parser.add_argument("-c", "--collection", help="collection id")
tailor_post_parser.add_argument(
"-p", "--product", nargs="+", help="id of the product(s) to customise"
)
tailor_post_parser.add_argument(
"--chain",
"--tailor",
help="chain id, file, or YAML string for customising the data",
metavar="CHAIN",
)
tailor_post_parser.set_defaults(func=tailor_post_job)
tailor_list_parser = tailor_subparsers.add_parser(
"list",
description="List customisation jobs",
help="list customisation jobs",
parents=[common_parser, tailor_common_parser],
)
tailor_list_parser.set_defaults(func=tailor_list_customisations)
tailor_status_parser = tailor_subparsers.add_parser(
"status",
description="Check the status of one (or more) customisations",
help="check the status of customisations",
parents=[common_parser, tailor_common_parser],
)
tailor_status_parser.add_argument("job_ids", metavar="Customisation ID", type=str, nargs="+")
tailor_status_parser.set_defaults(func=tailor_show_status)
tailor_log_parser = tailor_subparsers.add_parser(
"log",
description="Get the log of a customisation",
help="get the log of a customisation",
parents=[common_parser, tailor_common_parser],
)
tailor_log_parser.add_argument(
"job_id", metavar="Customisation ID", type=str, help="Customisation ID"
)
tailor_log_parser.set_defaults(func=tailor_get_log)
tailor_quota_parser = tailor_subparsers.add_parser(
"quota",
description="Show user workspace usage quota. Verbose mode (-v) shows more details",
help="show user workspace usage quota",
parents=[common_parser, tailor_common_parser],
)
tailor_quota_parser.set_defaults(func=tailor_quota)
tailor_delete_parser = tailor_subparsers.add_parser(
"delete",
description="Delete finished customisations",
help="delete customisations",
parents=[common_parser, tailor_common_parser],
)
tailor_delete_parser.add_argument("job_ids", metavar="Customisation ID", type=str, nargs="+")
tailor_delete_parser.set_defaults(func=tailor_delete_jobs)
tailor_cancel_parser = tailor_subparsers.add_parser(
"cancel",
description="Cancel QUEUED, RUNNING or INACTIVE customisations",
help="cancel running customisations",
parents=[common_parser, tailor_common_parser],
)
tailor_cancel_parser.add_argument("job_ids", metavar="Customisation ID", type=str, nargs="+")
tailor_cancel_parser.set_defaults(func=tailor_cancel_jobs)
tailor_clean_parser = tailor_subparsers.add_parser(
"clean",
description="Clean up customisations in any state (cancelling them if needed)",
help="clean up customisations in any state",
parents=[common_parser, tailor_common_parser],
)
tailor_clean_parser.add_argument("job_ids", metavar="Customisation ID", type=str, nargs="*")
tailor_clean_parser.add_argument("--all", help="Clean all customisations", action="store_true")
tailor_clean_parser.set_defaults(func=tailor_clear_jobs)
tailor_download_parser = tailor_subparsers.add_parser(
"download",
description="Download the output of finished customisations",
help="download the output of finished customisations",
parents=[common_parser, tailor_common_parser],
)
tailor_download_parser.add_argument(
"job_id", metavar="Customisation ID", type=str, help="Customisation ID"
)
tailor_download_parser.add_argument(
"-o",
"--output-dir",
type=pathlib.Path,
help="path to output directory, default: current directory",
metavar="DIR",
default=pathlib.Path.cwd(),
)
tailor_download_parser.set_defaults(func=tailor_download)
# Local Data Tailor instances parser
parser_local_tailor = subparsers.add_parser(
"local-tailor",
description="Manage local Data Tailor instances",
help="manage local Data Tailor instances",
parents=[common_parser],
)
parser_local_tailor.add_argument(
dest="print_help", nargs=0, action=HelpAction, help=argparse.SUPPRESS
)
local_tailor_subparsers = parser_local_tailor.add_subparsers(dest="local_tailor_command")
local_tailor_list_parser = local_tailor_subparsers.add_parser(
"instances",
help="list configured instances",
description="List configured local Data Tailor instances",
parents=[common_parser],
)
local_tailor_list_parser.set_defaults(func=local_tailor)
local_tailor_show_parser = local_tailor_subparsers.add_parser(
"show",
help="show details of an instance",
description="Show details of local Data Tailor instances",
parents=[common_parser],
)
local_tailor_show_parser.add_argument(
"localtailor_id",
help="id of the local instance, e.g. my-local-tailor",
metavar="ID",
nargs=1,
)
local_tailor_show_parser.set_defaults(func=local_tailor)
local_tailor_set_parser = local_tailor_subparsers.add_parser(
"set",
help="configure a local instance",
description="Configure a local Data Tailor instance",
parents=[common_parser],
)
local_tailor_set_parser.add_argument(
"localtailor_id",
help="id for the local instance, e.g. my-local-tailor",
metavar="ID",
nargs=1,
)
local_tailor_set_parser.add_argument(
"localtailor_url",
help="base URL of the local instance, e.g. http://localhost:40000/",
metavar="URL",
nargs=1,
)
local_tailor_set_parser.set_defaults(func=local_tailor)
local_tailor_remove_parser = local_tailor_subparsers.add_parser(
"remove",
help="remove a configured instance",
description="Remove a configured local instance",
parents=[common_parser],
)
local_tailor_remove_parser.add_argument(
"localtailor_id",
help="id of the local instance, e.g. my-local-tailor",
metavar="ID",
nargs=1,
)
local_tailor_remove_parser.set_defaults(func=local_tailor)
# Order parser
parser_order = subparsers.add_parser(
"order",
description="Manage eumdac orders",
help="manage orders",
parents=[common_parser],
)
parser_order.add_argument(dest="print_help", nargs=0, action=HelpAction, help=argparse.SUPPRESS)
order_subparsers = parser_order.add_subparsers(dest="order_command")
order_parsers = {}
order_parsers["list"] = order_subparsers.add_parser(
"list",
description="List eumdac orders",
help="list orders",
parents=[common_parser],
)
order_parsers["list"].set_defaults(func=order)
for action in ["status", "resume", "restart", "delete"]:
subparser = order_subparsers.add_parser(
action,
description=f"{action.capitalize()} eumdac orders",
help=f"{action} orders",
parents=[common_parser],
)
if action in ["resume", "restart"]:
subparser.add_argument(
"--chunk-size",
help=argparse.SUPPRESS,
)
subparser.add_argument(
"-t",
"--threads",
type=int,
help="set the number of parallel connections",
default=3,
dest="download_threads",
)
subparser.add_argument(
"-i",
"--integrity",
action="store_true",
help="verify integrity of downloaded files through their md5, if available",
)
subparser.add_argument(
"--local-tailor",
help="id of the instance to use for customisating the data",
metavar="ID",
)
subparser.add_argument(
"-k",
"--keep-order",
action="store_true",
help="keep order file after finishing successfully",
)
subparser.add_argument(
"order_id", help="order id", metavar="ID", nargs="?", default="latest"
)
if action == "delete":
subparser.add_argument("--all", help="delete all orders", action="store_true")
subparser.set_defaults(func=order)
order_parsers[action] = subparser
args = parser.parse_args(command_line)
if hasattr(args, "time_range"):
args.dtstart, args.dtend = _parse_timerange(args)
del args.time_range
# initialize logging
try:
progress_bars = not args.no_progress_bars
except AttributeError:
progress_bars = True
if args.debug:
init_logger("DEBUG", progress_bars)
elif args.verbose > 1:
init_logger("VERBOSE", progress_bars)
else:
init_logger("INFO", progress_bars)
if args.command:
if args.test:
return args.func(args)
try:
args.func(args)
except KeyboardInterrupt:
# Ignoring KeyboardInterrupts to allow for clean CTRL+C-ing
pass
except Exception as error:
log_error(error)
if args.debug:
raise
sys.exit(1)
else:
parser.print_help()
def log_error(error: Exception) -> None:
logger.error(str(error))
if isinstance(error, EumdacError) and error.extra_info: # type:ignore
extra_info: Dict[str, Any] = error.extra_info # type: ignore
extra_msg: str = ""
if "text" in extra_info:
extra_msg += f"{extra_info['text']}, "
if "title" in extra_info:
extra_msg += f"{extra_info['title']} "
if "description" in extra_info:
extra_msg += f"{extra_info['description']} "
if extra_msg:
# Add the status code only if there's more info
if "status" in extra_info:
extra_msg = f"{extra_info['status']} - {extra_msg}"
logger.error(extra_msg)
if "exceptions" in extra_info:
for problem in extra_info["exceptions"]:
detail_msg: str = f"{extra_info['status']} - {problem['exceptionText']}"
if not ("NoApplicableCode" in problem["exceptionCode"]):
detail_msg += f" - Type: {problem['exceptionCode']}"
logger.error(detail_msg)
class CredentialsFileNotFoundError(EumdacError):
"""Error that will be raised when no credentials file is found"""
eumdac-3.0.0/eumdac/cli_mtg_helpers.py 0000664 0000000 0000000 00000005157 14720105632 0017730 0 ustar 00root root 0000000 0000000 import re
from eumdac.logging import logger
from typing import Tuple, List
def is_collection_valid_for_coverage(collection: str) -> bool:
cs = [
"0662",
"0665",
"0672",
]
for c in cs:
if re.match(rf"EO\:EUM(IVV|VAL)?\:DAT\:{c}(:COM)?", collection):
return True
return False
def build_entries_from_coverage(coverage: str) -> Tuple[List[str], int]:
entries = []
expected = -1
# Using if-elif to be python < 3.10 compliant
if coverage == "FD":
logger.info("Downloading all chunks inside the full disk: 01-40")
entries.extend(["*_????_00[0-3][0-9].nc", "*_????_0040.nc"])
expected = 40
elif coverage == "H1":
logger.info("Downloading chunks inside H1: 01-21")
entries.extend(["*_????_000[1-9].nc", "*_????_001[0-9].nc", "*_????_002[0-1].nc"])
expected = 21
elif coverage == "H2":
logger.info("Downloading chunks inside H2: 20-40")
entries.extend(["*_????_002[0-9].nc", "*_????_003[0-9].nc", "*_????_0040.nc"])
expected = 21
elif coverage == "T1":
logger.info("Downloading chunks inside T1: 01-16")
entries.extend(["*_????_000[1-9].nc", "*_????_001[0-6].nc"])
expected = 16
elif coverage == "T2":
logger.info("Downloading chunks inside T2: 13-27")
entries.extend(["*_????_001[3-9].nc", "*_????_002[0-7].nc"])
expected = 15
elif coverage == "T3":
logger.info("Downloading chunks inside T3: 26-40")
entries.extend(["*_????_002[6-9].nc", "*_????_003[0-9].nc", "*_????_0040.nc"])
expected = 15
elif coverage == "Q1":
logger.info("Downloading chunks inside Q1: 01-13")
entries.extend(["*_????_000[0-9].nc", "*_????_001[0-3].nc"])
expected = 13
elif coverage == "Q2":
logger.info("Downloading chunks inside Q2: 10-21")
entries.extend(["*_????_001[0-9].nc", "*_????_002[0-1].nc"])
expected = 12
elif coverage == "Q3":
logger.info("Downloading chunks inside Q3: 20-30")
entries.extend(["*_????_002[0-9].nc", "*_????_0030.nc"])
expected = 11
elif coverage == "Q4":
logger.info("Downloading chunks inside Q4: 29-40")
entries.extend(["*_????_0029.nc", "*_????_003[0-9].nc", "*_????_0040.nc"])
expected = 12
# Include TRAIL file (chunk 41) in all areas
entries.append("*_????_0041.nc")
expected += 1
return (entries, expected)
# Removes subdirectories for printing entries
def pretty_print_entry(entry: str) -> str:
if entry.find("/") > -1:
return entry.split("/")[1]
else:
return entry
eumdac-3.0.0/eumdac/collection.py 0000664 0000000 0000000 00000032577 14720105632 0016731 0 ustar 00root root 0000000 0000000 """Module containing the Data Store Collection related classes."""
from __future__ import annotations
import json
import re
from functools import total_ordering
from typing import TYPE_CHECKING
from xml.etree import ElementTree
import requests
if TYPE_CHECKING: # pragma: no cover
import sys
from typing import Any, Optional
if sys.version_info < (3, 9):
from typing import Generator, Mapping, MutableMapping, Pattern
else:
from collections.abc import Mapping, MutableMapping, Generator
from re import Pattern
from eumdac.datastore import DataStore
from eumdac.product import Product
from eumdac.errors import EumdacError, eumdac_raise_for_status
from eumdac.request import get
import eumdac.common
class SearchResults:
"""Iterable results for a search of a given Data Store collection.
Usage:
>>> results = collection.search(parameters)
>>> number_found = results.total_results
>>> for product in results:
>>> print(product)
Attributes
----------
- `collection`: *Collection*
Parameters
----------
- `total_results`: *int*
Number of total results in the search.
- `query`: *dict*
Query parameters for the search.
Methods
-------
- `update_query(**query)`: *SearchResults*
Perform a new search updating the current params with `query` and return its results. Does not modify this instance.
- `first`: *Product*
Return the first product of the search.
"""
collection: Collection
_query: MutableMapping[str, Optional[str]]
_total_results: Optional[int] = None
_items_per_page: int = 100
def __init__(self, collection: Collection, query: Mapping[str, Any]) -> None:
"""Init the SearchResults for searching 'collection' based on 'query'.
Does not perform the search yet.
"""
self.collection = collection
self.query = query # type: ignore[assignment]
# Use bigger pages for brief searches
if self.query["set"] == "brief":
self._items_per_page = 500
def __contains__(self, product: Product) -> bool:
"""Return true if 'product' is among the search results.
Iterates over the whole result set in the worst case.
"""
# if this is used more often, maybe better implement a bisection
# on page loading to find the product
for item in self.__iter__():
if product == item:
return True
return False
def __iter__(self) -> Generator[Product, None, None]:
"""Iterate the found products, querying the next page if needed."""
params = self._get_request_params()
page_json = self._load_page(params)
self._total_results = int(page_json["totalResults"])
yield from self._yield_products(page_json)
for start_index in range(
self._items_per_page, min(self._total_results, 10000), self._items_per_page
):
params["si"] = start_index
page_json = self._load_page(params)
yield from self._yield_products(page_json)
def __len__(self) -> int:
"""Return total results."""
return self.total_results
def __repr__(self) -> str:
"""Represent the search as `collection` and `query` performed."""
return f"{self.__class__}({self.collection}, {self.query})"
@property
def total_results(self) -> int:
"""Number of total results in the search."""
if self._total_results is None:
params = self._get_request_params()
params["c"] = 0
page_json = self._load_page(params)
self._total_results = int(page_json["totalResults"])
return self._total_results
@property
def query(self) -> MutableMapping[str, Optional[str]]:
"""Query performed to get the search results."""
return {**self._query}
@query.setter
def query(self, query: Mapping[str, Any]) -> None:
"""Set the query terms."""
valid_keys = set(self.collection.search_options)
new_keys = set(query)
diff = new_keys.difference(valid_keys)
if diff:
raise CollectionError(f"invalid search options {diff}, valid options are {valid_keys}")
self._query = {
key: None if query.get(key) is None else str(query.get(key)) for key in valid_keys
}
if hasattr(query.get("dtstart"), "isoformat"):
self._query["dtstart"] = query["dtstart"].isoformat()
if hasattr(query.get("dtend"), "isoformat"):
self._query["dtend"] = query["dtend"].isoformat()
def first(self) -> Optional[Product]:
"""Return the first product of the search."""
params = self._get_request_params()
params["c"] = 1
page_json = self._load_page(params)
self._total_results = page_json["totalResults"]
if self._total_results == 0:
return None
return next(self._yield_products(page_json))
def update_query(self, **query: Any) -> SearchResults:
"""Perform a new search updating the current params with `query` and return its results. Does not modify this instance."""
new_query = {**self._query, **query}
return SearchResults(self.collection, new_query)
def _load_page(
self, params: Mapping[str, Any], session: Optional[requests.Session] = None
) -> MutableMapping[str, Any]:
"""Fetch the next page of the search."""
auth = self.collection.datastore.token.auth
url = self.collection.datastore.urls.get("datastore", "search")
session = None
if session is None:
response = get(
url,
params=params,
auth=auth,
headers=eumdac.common.headers,
)
else:
response = session.get(url, params=params, auth=auth, headers=eumdac.common.headers)
eumdac_raise_for_status(
f"Search query load page failed for {self.collection} with {self._query}",
response,
CollectionError,
)
return response.json()
def _yield_products(self, page_json: Mapping[str, Any]) -> Generator[Product, None, None]:
"""Return all products."""
collection_id = str(self.collection)
for feature in page_json["features"]:
product = self.collection.datastore.get_product_from_search_feature(
collection_id, feature
)
yield product
def _get_request_params(self) -> MutableMapping[str, Any]:
"""Build the search request parameters from 'query'."""
return {
"format": "json",
"pi": str(self.collection),
"si": 0,
"c": self._items_per_page,
**{key: value for key, value in self._query.items() if value is not None},
}
@total_ordering
class Collection:
"""Collection from Data Store.
Provides access to the collection metadata and allows performing searches of its products.
Attributes
----------
- `datastore`: *DataStore*
Properties
----------
- `abstract`: *str*
Detailed description of the collection products.
- `title`: *str*
- `medatadata`: *dict*
- `product_type`: *str*
- `search_options`: *dict*
Dictionary of available search options for the collection.
Methods
-------
- `search(**query)`: *SearchResults*
Perform a product search inside the collection
"""
_id: str
_title: Optional[str]
datastore: DataStore
_geometry: Optional[Mapping[str, Any]] = None
_properties: Optional[Mapping[str, Any]] = None
_search_options: Optional[Mapping[str, Any]] = None
# Title and abstract come with squences of whitespace in the text.
# We use this regex to substitue them with a normal space.
_whitespaces: Pattern[str] = re.compile(r"\s+")
def __init__(
self, collection_id: str, datastore: DataStore, title: Optional[str] = None
) -> None:
"""Init the collection.
Arguments
---------
- `collection_id`: *str*
Id of the collection in Data Store.
- `datastore`: *DataStore*
Reference to Data Store.
- `title`: *str, optional, internal*
Collection title, used by DataStore when listing collections
"""
self._id = collection_id
self.datastore = datastore
self._title = self._whitespaces.sub(" ", title) if title else None
def __str__(self) -> str:
return self._id
def __repr__(self) -> str:
return f"{self.__class__}({self._id})"
def __eq__(self, other: Any) -> bool:
return isinstance(other, self.__class__) and self._id == other._id
def __lt__(self, other: Collection) -> bool:
return self._id < other._id
def _ensure_properties(self) -> None:
"""Fetch properties from Data Store, unless they were already requested."""
if self._properties is not None:
return
url = self.datastore.urls.get(
"datastore", "browse collection", vars={"collection_id": self._id}
)
auth = self.datastore.token.auth
response = get(
url,
params={"format": "json"},
auth=auth,
headers=eumdac.common.headers,
)
eumdac_raise_for_status(
f"Could not get properties of {self._id}", response, CollectionError
)
geometry = response.json()["collection"]["geometry"]
properties = response.json()["collection"]["properties"]
properties.pop("links")
self._geometry = geometry
self._properties = properties
title = properties["title"]
abstract = properties["abstract"]
self._properties["title"] = self._whitespaces.sub(" ", title) # type: ignore[index]
self._properties["abstract"] = self._whitespaces.sub(" ", abstract) # type: ignore[index]
@property
def abstract(self) -> str:
"""Detailed description of the collection products."""
self._ensure_properties()
return str(self._properties["abstract"]) # type: ignore[index]
@property
def title(self) -> str:
"""Collection title."""
if self._title:
return self._title
else:
self._ensure_properties()
return str(self._properties["title"]) # type: ignore[index]
@property
def metadata(self) -> Mapping[str, Any]:
"""Collection metadata."""
self._ensure_properties()
return {
"geometry": self._geometry.copy(), # type: ignore[union-attr]
"properties": self._properties.copy(), # type: ignore[union-attr]
}
@property
def product_type(self) -> Optional[str]:
"""Product type."""
self._ensure_properties()
auth = self.datastore.token.auth
url = self.datastore.urls.get("tailor", "products")
response = get(
url,
auth=auth,
headers=eumdac.common.headers,
)
eumdac_raise_for_status(f"Could not get search product type", response, CollectionError)
api_response = json.loads(response.text)
collection_ids = [i["pn_id"] for i in api_response["data"]]
product_types = [i["id"] for i in api_response["data"]]
product_types_dict = dict(zip(product_types, collection_ids))
for key, value in product_types_dict.items():
if type(value) == list:
if self._id in value:
return key
else:
if self._id == value:
return key
return None
def search(self, **query: Any) -> SearchResults:
"""Product search inside the collection.
Note: search parameters differ depending on the collection
they can be listed with the property search_options
"""
return SearchResults(self, query)
@property
def search_options(self) -> Mapping[str, Any]:
"""Dictionary of available search options for the collection."""
if self._search_options is None:
# load remote options
# this lines may change when the new version of DT offers
# a way to load collection specific options
url_static = self.datastore.urls.get("datastore", "search options")
url = url_static + "?pi=" + self._id
auth = self.datastore.token.auth
response = get(
url,
auth=auth,
headers=eumdac.common.headers,
)
eumdac_raise_for_status(
f"Could not get search options for {self._id}", response, CollectionError
)
root = ElementTree.fromstring(response.text)
(element,) = [
ele
for ele in root
if ele.tag.endswith("Url") and ele.get("type") == "application/json"
]
self._search_options = {
str(e.get("name")): {
"title": e.get("title"),
"options": [o.get("value") for o in e],
}
for e in element
# remove options controlled by SearchResults
if e.get("name") not in ["format", "pi", "si", "c", "id", "pw"]
and e.get("name") is not None
}
return self._search_options
class CollectionError(EumdacError):
"""Errors related to collections"""
eumdac-3.0.0/eumdac/common.py 0000664 0000000 0000000 00000000355 14720105632 0016053 0 ustar 00root root 0000000 0000000 """Module containing common data to be reused accross modules"""
from eumdac.__version__ import __title__, __documentation__, __version__
headers = {
"referer": "EUMDAC.LIB",
"User-Agent": str(__title__ + "/" + __version__),
}
eumdac-3.0.0/eumdac/config.py 0000664 0000000 0000000 00000001161 14720105632 0016024 0 ustar 00root root 0000000 0000000 """Module containing helper functions related to eumdac configuration files"""
import os
from pathlib import Path
def get_config_dir() -> Path:
"""get the Path to the configuration directory of eumdac"""
return Path(os.getenv("EUMDAC_CONFIG_DIR", (Path.home() / ".eumdac")))
def get_credentials_path() -> Path:
"""get the Path to the credentials of eumdac"""
return get_config_dir() / "credentials"
def get_url_path() -> Path:
"""get the Path to the tailor configurations of eumdac"""
r = get_config_dir() / "url"
r.mkdir(parents=True, exist_ok=True)
return r
PERCENTAGE_WARNING = 90
eumdac-3.0.0/eumdac/customisation.py 0000664 0000000 0000000 00000025320 14720105632 0017463 0 ustar 00root root 0000000 0000000 """Module containing the Customisation class and related errors."""
from __future__ import annotations
import sys
import time
from contextlib import contextmanager
from datetime import datetime
from typing import TYPE_CHECKING
import requests
from eumdac.errors import EumdacError, eumdac_raise_for_status
from eumdac.request import get, patch, put
if TYPE_CHECKING: # pragma: no cover
if sys.version_info < (3, 9):
from typing import Generator, Iterable, Mapping, MutableMapping
else:
from collections.abc import MutableMapping, Mapping, Iterable, Generator
from types import TracebackType
from typing import IO, Any, Optional, Type
from eumdac.datatailor import DataTailor
import eumdac.common
class Customisation:
"""Customisation job from Data Tailor, used for managing the job lifecycle.
The customisation job in Data Tailor needs to exist before instancing this class.
Use the DataTailor class to create new Customisation instances or get existing ones, then use the received instance to check the job details and manage its life-cycle.
Attributes
----------
- `_id`: *str*
Customisation id in Data Tailor. The string representation of an instance will be its id.
- `datatailor`: *DataTailor*
DataTailor instance that owns this customisation.
- `update_margin`: *float, default 0.5*
Seconds to wait before requesting an update on the customisation details.
Properties
-----------
- `status`: *str*
Status of the job in Data Tailor (`QUEUED`, `RUNNING`, `DONE`, `KILLED`, `FAILED`).
- `progress`: *int*
Progress of the running job, in percentage from 0 to 100.
- `duration`: *int*
Time taken by the running job, in seconds.
- `logfile`: *str*
Log of the job execution.
- `outputs`: *Iterable[str]*
List of outputs generated by the finished job.
Methods
-------
- `stream_output(output : str)`: *IO[bytes]*
Return a stream of the byts of `output` which can be used for downloading.
- `stream_output_iter_content(output : str)`: *Iterable[bytes]*
Return an iterable stream of the bytes of `output` which can be used for downloading.
- `kill()`
Cancel the QUEUED or RUNNING job, putting it in the KILLED status.
- `delete()`
Remove the DONE, KILLED, or FAILED job from Data Tailor.
"""
_id: str
datatailor: DataTailor
update_margin: float = 0.5 # seconds
_properties: Optional[MutableMapping[str, Any]] = None
_deleted: bool = False
_killed: bool = False
_last_update: float = 0
_creation_time_format: str = "%Y%m%dT%H%M%SZ"
def __init__(self, customisation_id: str, datatailor: DataTailor) -> None:
"""Prepare the Customisation instance for the job with `id` from `datatailor`.
Arguments
---------
- `customisation_id`: *str*
Id of the job that this Customisation instance represents.
- `datatailor`: *DataTailor*
Data Tailor instance that owns this customisation.
"""
self._id = customisation_id
self.datatailor = datatailor
@classmethod
def from_properties(
cls, properties: Mapping[str, Any], datatailor: DataTailor
) -> Customisation:
"""Prepare a Customisation from `properties`"""
_properties = {**properties}
instance = cls(_properties.pop("id"), datatailor)
instance._last_update = time.time()
instance._properties = _properties
return instance
def __str__(self) -> str:
"""Return the customisation job id."""
return self._id
def __repr__(self) -> str:
return f"{self.__class__}({self._id})"
def __enter__(self) -> Customisation:
return self
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_value: Optional[BaseException],
exc_traceback: Optional[TracebackType],
) -> None:
self.delete()
def _update_properties(self) -> None:
"""Refresh customisation details.
Raises
------
AlreadyDeletedCustomisationException, if the job is not found in Data Tailor.
"""
if self._deleted:
raise AlreadyDeletedCustomisationError("Customisation already deleted.")
now = time.time()
expired = now - self._last_update > self.update_margin
if expired or self._properties is None:
url = self.datatailor.urls.get(
"tailor", "customisation", vars={"customisation_id": self._id}
)
response = get(url, auth=self.datatailor.token.auth, headers=eumdac.common.headers)
eumdac_raise_for_status(
"Failed to get customisation", response, UnableToGetCustomisationError
)
self._properties = response.json()[self._id]
self._last_update = now
@property
def creation_time(self) -> datetime:
"""Customisation job creation time."""
self._update_properties()
return datetime.strptime(
self._properties["creation_time"], self._creation_time_format # type: ignore[index]
)
@property
def backend(self) -> str:
"""Data Tailor backend used for the customisation."""
self._update_properties()
return self._properties["backend_id"] # type: ignore[index]
@property
def product_type(self) -> str:
"""Product type of the customisation."""
self._update_properties()
return self._properties["product_id"] # type: ignore[index]
@property
def processing_steps(self) -> Iterable[str]:
"""List of processing steps involved in the customisation job."""
self._update_properties()
return self._properties["required_processing_steps"] # type: ignore[index]
@property
def status(self) -> str:
"""Customisation job status ('QUEUED', 'RUNNING', 'DONE', 'KILLED', 'FAILED')."""
self._update_properties()
return self._properties["status"] # type: ignore[index]
@property
def progress(self) -> int:
"""Customisation job progress."""
self._update_properties()
return self._properties["progress"] # type: ignore[index]
@property
def duration(self) -> int:
"""Elapsed time for the customisation job."""
self._update_properties()
return self._properties["processing_duration"] # type: ignore[index]
@property
def outputs(self) -> Iterable[str]:
"""List of outputs of the finished customisation job."""
self._update_properties()
return self._properties["output_products"] # type: ignore[index]
@property
def logfile(self) -> str:
"""Log of the job execution, in a multiline str.
Raises
------
AlreadyDeletedCustomisationException if the job is not found in Data Tailor.
CustomisationError, if the log can't be retrieved.
"""
if self._deleted:
raise AlreadyDeletedCustomisationError("Customisation already deleted.")
url = self.datatailor.urls.get(
"tailor", "customisation log", vars={"customisation_id": self._id}
)
response = get(
url,
auth=self.datatailor.token.auth,
headers=eumdac.common.headers,
)
eumdac_raise_for_status("Failed to get customisation log", response, CustomisationError)
return response.json()["log_content"]
@contextmanager
def _download_response(self, output: str) -> Generator[requests.Response, None, None]:
"""Prepare the download response for `output`.
Raises
------
AlreadyDeletedCustomisationException if the job is not found in Data Tailor.
CustomisationError, if the download response can't be retrieved.
"""
if self._deleted:
raise AlreadyDeletedCustomisationError("Customisation already deleted.")
if output not in self.outputs:
raise ValueError(f"{output} not in {self.outputs}")
url = self.datatailor.urls.get("tailor", "download")
auth = self.datatailor.token.auth
params = {"path": output}
with get(
url,
auth=auth,
params=params,
stream=True,
headers=eumdac.common.headers,
) as response:
eumdac_raise_for_status("Failed to get tailor download", response, CustomisationError)
response.raw.name = output.split("/")[-1]
response.raw.decode_content = True
yield response
@contextmanager
def stream_output_iter_content(
self, output: str, chunks: int = 1024 * 1024
) -> Generator[Iterable[bytes], None, None]:
"""Return an interable stream of the bytes of `output` which can be used for downloading."""
with self._download_response(output) as resp:
yield resp.iter_content(chunks)
@contextmanager
def stream_output(self, output: str) -> Generator[IO[bytes], None, None]:
"""Return a stream of the byts of `output` which can be used for downloading."""
with self._download_response(output) as resp:
yield resp.raw
def delete(self) -> None:
"""Remove the DONE, KILLED, or FAILED job from Data Tailor.
Raises
------
CustomisationError, if the deletion fails.
"""
if not self._deleted:
url = self.datatailor.urls.get("tailor", "delete")
payload = {"uuids": [self._id]}
auth = self.datatailor.token.auth
response = patch(url, auth=auth, json=payload, headers=eumdac.common.headers)
eumdac_raise_for_status("Failed to patch tailor delete", response, CustomisationError)
self._deleted = True
def kill(self) -> None:
"""Cancel the QUEUED or RUNNING job, putting it in the KILLED status.
Raises
------
CustomisationError, if the cancelling fails.
"""
if not self._killed:
url = self.datatailor.urls.get(
"tailor", "customisation", vars={"customisation_id": self._id}
)
auth = self.datatailor.token.auth
response = put(url, json={"status": "killed"}, auth=auth, headers=eumdac.common.headers)
eumdac_raise_for_status(
"Failed to cancel tailor customisation", response, CustomisationError
)
self._killed = True
class CustomisationError(EumdacError):
"""Error related to customisations."""
class AlreadyDeletedCustomisationError(CustomisationError):
"""Error when dealing with customisations that are already deleted."""
class UnableToGetCustomisationError(CustomisationError):
"""Error when retrieval of customisation fails."""
eumdac-3.0.0/eumdac/datastore.py 0000664 0000000 0000000 00000013673 14720105632 0016560 0 ustar 00root root 0000000 0000000 """Module containing the Data Store class and related errors"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from eumdac.collection import Collection, SearchResults
from eumdac.errors import EumdacError, eumdac_raise_for_status
from eumdac.product import Product
from eumdac.request import get
from eumdac.token import BaseToken, URLs
from eumdac.logging import logger
import eumdac.common
if TYPE_CHECKING: # pragma: no cover
import sys
from typing import Optional
if sys.version_info < (3, 9):
from typing import Iterable, Mapping
else:
from collections.abc import Iterable, Mapping
class DataStore:
"""Interface with EUMETSAT Data Store.
Instance it providing a token and access Data Store functions like
listing available collections, fetching specific products or performing
OpenSearch queries.
"""
token: BaseToken
urls: URLs
_collections: Mapping[str, Collection]
def __init__(self, token: BaseToken) -> None:
self.token = token
self.urls = token.urls
self._collections = {}
def _load_collections(self) -> None:
if self._collections:
return
url = self.urls.get("datastore", "browse collections")
response = get(
url,
params={"format": "json"},
auth=self.token.auth,
headers=eumdac.common.headers,
)
eumdac_raise_for_status("Load collections failed", response, DataStoreError)
collection_ids_titles = [
(item["title"], item["datasetTitle"]) for item in response.json()["links"]
]
self._collections = {
collection_id: Collection(collection_id, self, title)
for collection_id, title in collection_ids_titles
}
@property
def collections(self) -> Iterable[Collection]:
"""Collections available"""
self._load_collections()
return list(self._collections.values())
def get_collection(self, collection_id: str) -> Collection:
"""
Return the collection with the given collection id.
Parameters
----------
- `collection_id` : *str*
Collection id, e.g. 'EO:EUM:DAT:MSG:HRSEVIRI'
"""
return Collection(collection_id, self)
def check_collection_id(self, collection_id: str) -> None:
"""Validate a collection id, raising CollectionNotFoundError if it's not valid."""
url = self.urls.get("datastore", "browse collection", vars={"collection_id": collection_id})
response = None
try:
response = get(url, auth=self.token.auth, headers=eumdac.common.headers)
except Exception as err:
logger.error(f"Could not verify collection id due to {err}")
if response and (
response.status_code == 401
or response.status_code == 403
or response.status_code == 404
):
eumdac_raise_for_status(
"The collection you are searching for does not exist or you do not have authorisation to access it",
response,
CollectionNotFoundError,
)
def get_product(self, collection_id: str, product_id: str) -> Product:
"""Return the product with the given id, from the provided collection"""
return Product(collection_id, product_id, self)
def get_product_from_search_feature(
self, collection_id: str, feature: dict[str, Any]
) -> Product:
"""Return a product built from the provided Data Store search results feature object"""
product = Product(collection_id, feature["id"], self)
# Full searches yield a longer property page
# Brief searches yield a basic property page (sensing and publication time)
if len(feature["properties"]) > 4:
product._parse_browse_properties(feature["properties"], feature["geometry"])
return product
def get_product_from_url(self, product_url: str) -> Product:
"""Return the product matching the given URL, if any"""
try:
if product_url.find("api.eumetsat.int/data/download") < 0:
raise ValueError(f"{product_url} is not a valid Data Store URL")
url_members: list[str] = product_url.split("/")
products_index = url_members.index("products")
# Fetch collection id and de-sanitize it (%3A -> :)
collection_str = url_members[products_index - 1].replace("%3A", ":")
product_str = url_members[products_index + 1]
return self.get_product(collection_str, product_str)
except Exception as e:
raise DataStoreError(f"Could not parse product url {product_url}: {e}") from e
def opensearch(self, query: str) -> SearchResults:
"""Perform an OpenSearch query, returning a SearchResults object with the results.
Raises DataStoreError if no collection id is provided through the pi parameter.
Parameters
----------
- `query` : *str*
OpenSearch query, e.g. 'pi=EO:EUM:DAT:MSG:HRSEVIRI'
"""
q = self._parse_opensearchquery(query)
if not "pi" in q:
raise DataStoreError(
"Please provide a Collection ID via the pi query parameter (i.e. pi=EO:EUM:DAT:MSG:HRSEVIRI)"
)
c = Collection(q.pop("pi"), self)
return c.search(**q)
def _parse_opensearchquery(self, query: str) -> dict[str, str]:
query_dict = {}
for member in query.split("&"):
items = member.split("=")
if len(items) != 2:
raise DataStoreError(f"Invalid query member: {member}")
if items[0] not in ["format", "si", "c", "id", "pw"] and items[0] is not None:
query_dict[items[0]] = items[1]
return query_dict
class DataStoreError(EumdacError):
"Errors related to the DataStore"
class CollectionNotFoundError(EumdacError):
"""Collection not found error"""
eumdac-3.0.0/eumdac/datatailor.py 0000664 0000000 0000000 00000016000 14720105632 0016701 0 ustar 00root root 0000000 0000000 """Module containing the Data Tailor class and related errors"""
from __future__ import annotations
import json
from typing import TYPE_CHECKING
from eumdac.customisation import Customisation
from eumdac.errors import EumdacError, eumdac_raise_for_status
from eumdac.tailor_models import Chain, DataTailorCRUD, Filter, Quicklook, RegionOfInterest
from eumdac.request import get, post
import eumdac.common
from eumdac.token import AccessToken, AnonymousAccessToken
if TYPE_CHECKING: # pragma: no cover
import sys
from typing import Any, Optional
from eumdac.product import Product
from eumdac.token import BaseToken, URLs
if sys.version_info < (3, 9):
from typing import Iterable, Mapping, Sequence
else:
from collections.abc import Iterable, Mapping, Sequence
class DataTailor:
"""Interface with the EUMETSAT Data Tailor Webservice
Instance it by providing a token and access the Data Tailor functions like
posting new customisation jobs, listing the current jobs, cancelling running ones,
download job outputs, and delete finished jobs.
"""
token: BaseToken
urls: URLs
chains: DataTailorCRUD
filters: DataTailorCRUD
rois: DataTailorCRUD
quicklooks: DataTailorCRUD
_info: Optional[Mapping[str, Any]] = None
_user_info: Optional[Mapping[str, Any]] = None
def __init__(self, token: BaseToken) -> None:
self.token = token
self.urls = token.urls
self.chains = DataTailorCRUD(self, Chain)
self.filters = DataTailorCRUD(self, Filter)
self.rois = DataTailorCRUD(self, RegionOfInterest)
self.quicklooks = DataTailorCRUD(self, Quicklook)
@property
def customisations(self) -> Sequence[Customisation]:
"""Return the list of customisations"""
url = self.urls.get("tailor", "customisations")
response = get(
url,
auth=self.token.auth,
headers=eumdac.common.headers,
)
eumdac_raise_for_status("Could not get customisations", response, DataTailorError)
customisations = response.json()["data"]
return [Customisation.from_properties(properties, self) for properties in customisations]
@property
def info(self) -> Mapping[str, Any]:
"""Return information about Data Tailor Webservice in a Dict-like format."""
if self._info is None:
url = self.urls.get("tailor", "info")
auth = self.token.auth
response = get(
url,
auth=auth,
headers=eumdac.common.headers,
)
eumdac_raise_for_status("Could not get info", response, DataTailorError)
self._info = response.json()
return self._info
@property
def user_info(self) -> Mapping[str, Any]:
"""Return information about the current Data Tailor Webservice user in a Dict-like format."""
if self._user_info is None:
url = self.urls.get("tailor", "user info")
auth = self.token.auth
response = get(
url,
auth=auth,
headers=eumdac.common.headers,
)
eumdac_raise_for_status("Could not get user_info", response, DataTailorError)
self._user_info = response.json()
return self._user_info
@property
def quota(self) -> Mapping[str, Any]:
"""Return information about the user workspace quota on the Data Tailor Webservice in a Dict-like format."""
url = self.urls.get("tailor", "report quota")
auth = self.token.auth
response = get(
url,
auth=auth,
headers=eumdac.common.headers,
)
eumdac_raise_for_status("Could not get quota", response, DataTailorError)
return response.json()
@property
def is_local(self) -> bool:
"""Return if the configured Data Tailor is the Data Tailor Webservice or a local instance."""
# when no token for datatailor exists we assume this is a local tailor instance
return isinstance(self.token, AnonymousAccessToken)
def get_customisation(self, cutomisation_id: str) -> Customisation:
"""Return a customisation job given its id"""
return Customisation(cutomisation_id, self)
def new_customisation(self, product: Product, chain: Chain) -> Customisation:
"""Start a new customisation job for the given product.
Started customisations will run asynchronously and need to be monitored.
Once finished, their outputs can be downloaded, and then they need to be deleted.
Arguments
---------
- `product` : *Product*
Data Store product to customise
- `chain` : *Chain*
Chain configuration to use for the customisation
"""
(customisation,) = self.new_customisations([product], chain)
return customisation
def new_customisations(
self, products: Iterable[Product], chain: Chain
) -> Sequence[Customisation]:
"""Starts multiple customisation jobs for the given products
Started customisations will run asynchronously and need to be monitored.
Once finished, their outputs can be downloaded, and then they need to be deleted.
Arguments
---------
- `products` : *Iterable[Product]*
Data Store products to customise
- `chain` : *Chain*
Chain configuration to use for the customisation
"""
product_paths = "|||".join(
self.urls.get(
"datastore",
"download product",
vars={
"product_id": product._id,
"collection_id": product.collection._id,
},
)
for product in products
)
data = {"product_paths": product_paths}
params = {}
# instead of guessing the correct token, datatailor should use the token attached to each product
if isinstance(self.token, AccessToken):
# provide own token to the endpoint since we assume it is valid for datastore
params["access_token"] = str(self.token)
elif self.is_local and any(products):
# for local tailor instances we use the token attached to the first product
params["access_token"] = str(next(iter(products)).datastore.token)
if isinstance(chain, str):
data["chain_name"] = chain
else:
data["chain_config"] = json.dumps(chain.asdict())
response = post(
self.urls.get("tailor", "customisations"),
auth=self.token.auth,
params=params,
files=data,
headers=eumdac.common.headers,
)
eumdac_raise_for_status("Could not add customizations", response, DataTailorError)
customisation_ids = response.json()["data"]
return [self.get_customisation(customisation_id) for customisation_id in customisation_ids]
class DataTailorError(EumdacError):
"""Errors related to DataTailor operations"""
eumdac-3.0.0/eumdac/download_app.py 0000664 0000000 0000000 00000045015 14720105632 0017234 0 ustar 00root root 0000000 0000000 """module containing the DownloadApp which will be used when using
eumdac download **without** the --tailor argument."""
import concurrent
import datetime
import fnmatch
import logging
import shutil
import sys
import tempfile
import threading
import time
from hashlib import md5
from pathlib import Path
from typing import IO, Any, Generator, List, Mapping, Optional, Tuple
import eumdac.common
from eumdac.futures import EumdacFutureFunc, EumdacThreadPoolExecutor
from eumdac.job_id import JobIdentifier
from eumdac.logging import logger
from eumdac.order import Order
from eumdac.product import Product
import eumdac.product
def _divide_into_chunks(
basedir: str, content_length: int, chunk_size: int
) -> Mapping[Path, Tuple[int, int]]:
"""
Divides a products content into chunks and returns a mapping of file paths to chunk ranges.
Each chunk is represented by a tuple specifying the start and end byte positions within the file.
The function creates new paths for each chunk, using the base directory and product name as
part of the path, and appends the chunk index to the file name.
Args:
basedir (str): The base directory where chunks will be stored.
prodname (str): The product name that will be used as a subdirectory or file prefix.
content_length (int): The total size of the content (in bytes) to be divided.
chunk_size (int): The size of each chunk in bytes.
Returns:
Mapping[Path, Tuple[int, int]]: A dictionary where each key is a `Path` object pointing to
a chunk file, and the value is a tuple indicating the start and end byte positions of the chunk.
This is called chunk_dict in the code below.
"""
ret = {
Path(basedir) / f"chunk.{i}": (a, b)
for i, (a, b) in enumerate(_chunk_ranges(content_length, chunk_size))
}
return ret
def _chunk_ranges(content_length: int, chunk_size: int) -> Generator[Tuple[int, int], None, None]:
cur = 0
while True:
if cur + chunk_size > content_length:
break
yield (cur, cur + chunk_size)
cur += chunk_size
if cur != content_length:
yield (cur, content_length)
def _mb_per_s(bytes_downloaded: int, elapsed_time: float) -> float:
"""
Calculate the download speed in MB/s.
"""
if elapsed_time > 0:
return (
bytes_downloaded / 1024 / 1024
) / elapsed_time # Convert bytes to MB, then calculate MB/s
return 0.0
def log(level: int, message: str) -> None:
if (
sys.stdout.isatty()
and logger._progress_handler # type: ignore
and _download_speed_tracker.last_line_was_progress
):
_download_speed_tracker.last_line_was_progress = False
logger.log(level, "")
logger.log(level, message)
class DownloadSpeedTracker:
def __init__(self) -> None:
self.total_bytes_downloaded = 0
self.start_time = 0.0
self.last_measured_speed = -1.0
self.lock = threading.Lock()
self.last_line_was_progress = False
self.running = False
def start(self) -> None:
with self.lock:
self.running = True
self.start_time = time.time()
self.last_update_time = self.start_time
def stop(self) -> None:
with self.lock:
self.running = False
def update(self, bytes_downloaded: int) -> None:
if not self.running:
return
if self.start_time is None:
raise RuntimeError("DownloadSpeedTracker has not been started. Call 'start()' first.")
with self.lock: # Ensure only one thread can update at a time
self.total_bytes_downloaded += bytes_downloaded
elapsed_time = time.time() - self.start_time
mb_downloaded = self.total_bytes_downloaded / 1024 / 1024
if elapsed_time > 0:
self.last_measured_speed = mb_downloaded / elapsed_time
self.last_line_was_progress = True
logger.progress( # type:ignore
f"Elapsed time: {str(datetime.timedelta(seconds=round(elapsed_time)))}, {mb_downloaded} MB downloaded, current speed: {self.get_current_speed():.2f} MB/s"
)
def get_current_speed(self) -> float:
if self.start_time is None:
raise RuntimeError("DownloadSpeedTracker has not been started. Call 'start()' first.")
with self.lock: # Ensure consistent access to shared data
return self.last_measured_speed
_download_speed_tracker = DownloadSpeedTracker()
class DownloadApp:
def __init__(
self,
order: Order,
datastore: Any,
integrity: bool = False,
download_threads: int = 3,
chunk_size: Optional[int] = None,
) -> None:
self.download_executor = EumdacThreadPoolExecutor(max_workers=download_threads)
self.reassembling_executor = EumdacThreadPoolExecutor(max_workers=None)
self.order = order
self.datastore = datastore
self.check_integrity = integrity
num_jobs = len(list(self.order.iter_product_info()))
self.job_identificator = JobIdentifier(num_jobs)
self.num_download_threads = download_threads
self.chunk_size = chunk_size
def run(self) -> bool:
log(logging.DEBUG, "Starting download(s)")
return self._run_app()
def shutdown(self) -> None:
with self.order._lock:
_download_speed_tracker.stop()
self.reassembling_executor.pool_shutdown()
self.download_executor.pool_shutdown()
def _run_app(self) -> bool:
with self.order.dict_from_file() as order_d:
output_dir = order_d["output_dir"]
output_dir = Path(output_dir).resolve()
output_dir.mkdir(exist_ok=True, parents=True)
dirs = order_d["dirs"]
onedir = order_d["onedir"]
(file_patterns,) = self.order.get_dict_entries("file_patterns")
log(logging.INFO, f"Output directory: {output_dir}")
success = True
_download_speed_tracker.start()
reassembling_futures = []
for product in self.order.get_products(self.datastore):
download_futures = []
self.job_identificator.register(product)
with self.order.dict_from_file() as order_d:
state = order_d["products_to_process"][product._id]["server_state"]
if state == "DONE":
continue
if file_patterns:
entries = product.entries
filtered_entries = []
for pattern in file_patterns:
matches = fnmatch.filter(entries, pattern)
filtered_entries.extend(matches)
entries = filtered_entries
else:
entries = [None] # type: ignore
for entry in entries:
job_id = self.job_identificator.job_id_tuple(product)
try:
with product.open(entry=entry) as fsrc:
fsrc_name = fsrc.name
content_size = _get_content_size(fsrc)
except eumdac.product.ProductError as e:
logger.error(f"{_print_job_id_info(job_id)} Skipping download: {e}")
success = False
continue
output = _compute_output_path(
product,
fsrc_name,
dirs,
onedir,
entry,
output_dir,
)
if _already_present(
product,
output,
job_id,
self.check_integrity,
):
continue
funcs, chunk_dict = get_download_funcs(
product,
entry,
output,
content_size,
job_id,
self.num_download_threads,
self.chunk_size,
)
for func, args in funcs:
download_futures.append(self.download_executor.pool_submit(func, *args))
reassembling_futures.append(
self.reassembling_executor.pool_submit(
ReassembleChunkFunc(),
download_futures,
chunk_dict,
product,
output,
self.check_integrity,
job_id,
self.order,
)
)
for f in concurrent.futures.as_completed(reassembling_futures):
success = success and f.result()
return success
def get_download_funcs(
product: Product,
entry: Optional[str],
output: Path,
content_size: Optional[int],
job_id: Tuple[int, str],
num_threads: int,
chunk_size: Optional[int],
) -> Tuple[List[Any], Mapping[Path, Optional[Tuple[int, int]]]]:
# download chunks
with tempfile.TemporaryDirectory(dir=output.parent, suffix=".tmp") as tempdir:
chunk_dict: Mapping[Path, Optional[Tuple[int, int]]]
if content_size is None:
chunk_dict = {Path(tempdir) / "chunk.0": None}
else:
if chunk_size is None:
# At this point we know the content size and can do a chunk based download
min_chunk_size = 1024 * 1024 * 100 # 100 MB
chunk_size = max(content_size // (num_threads), min_chunk_size)
chunk_dict = _divide_into_chunks(tempdir, content_size, chunk_size)
log(
logging.INFO,
f"{_print_job_id_info(job_id)} Preparing download of {_print_product(product, output)}, splitting in {len(chunk_dict)} chunks.",
)
download_funcs = []
chunk_range: Optional[Tuple[int, int]]
for chunk_name, chunk_range in chunk_dict.items():
if len(chunk_dict) == 1:
chunk_range = None
download_fname = chunk_name
download_fname.parent.mkdir(parents=True, exist_ok=True)
log(
logging.DEBUG,
f"Scheduling DownloadChunkFunc fo {product}, with range {chunk_range} to {download_fname}",
)
download_funcs.append(
(
DownloadChunkFunc(),
(job_id, product, entry, download_fname, chunk_range),
)
)
return download_funcs, chunk_dict
class ReassembleChunkFunc(EumdacFutureFunc):
def __call__(
self,
download_futures: List[concurrent.futures.Future], # type: ignore
chunk_dict: Mapping[Path, Tuple[int, int]],
product: Product,
output: Path,
check_integrity: bool,
job_id: Tuple[int, str],
order: Order,
) -> bool:
success = False
# wait for all downloads to be completed
while True:
if self.aborted:
# delete temp dir
shutil.rmtree(list(chunk_dict)[0].parent)
raise KeyboardInterrupt()
if all(x.done() for x in download_futures):
break
time.sleep(0.1)
# check if all chunks are present and have the expected size
if not _check_chunks(chunk_dict, _print_job_id_info(job_id)):
# avoid reporting errors when the process is interrupted
if not self.aborted:
log(
logging.ERROR,
f"{_print_job_id_info(job_id)} Could not verify all chunks from {product}",
)
success = False
else:
# reassemble the chunks into the outputfile
_reassemble_from_chunks(chunk_dict, output)
# delete temp dir
shutil.rmtree(list(chunk_dict)[0].parent)
if not check_integrity:
success = True
else:
if product.md5 is None:
log(
logging.WARN,
f"{_print_job_id_info(job_id)} Skipping integrity check: no MD5 metadata found for {_print_product(product, output)}",
)
success = True
elif not _md5_check(output, product.md5):
log(
logging.WARN,
f"{_print_job_id_info(job_id)} Integrity check failed for {_print_product(product, output)} with MD5: {product.md5}",
)
success = False
else:
log(
logging.INFO,
f"{_print_job_id_info(job_id)} Integrity check successful for {_print_product(product, output)} with MD5: {product.md5}",
)
success = True
if success:
order.update(None, product._id, "DONE")
log(
logging.INFO,
f"{_print_job_id_info(job_id)} Download complete: {_print_product(product, output)}, current speed: {_download_speed_tracker.get_current_speed():.2f} MB/s",
)
else:
order.update(None, product._id, "FAILED")
log(logging.ERROR, f"{_print_job_id_info(job_id)} Download failure: {product}")
return success
class DownloadChunkFunc(EumdacFutureFunc):
def __call__(
self,
job_id: str,
product: Product,
entry: Optional[str],
output: Path,
chunk_range: Optional[Tuple[int, int]],
) -> None:
output.parent.mkdir(exist_ok=True, parents=True)
with output.open("wb") as outf:
if chunk_range is not None:
bytes_to_read = chunk_range[1] - chunk_range[0]
log(
logging.DEBUG,
f"{job_id} Downloading {bytes_to_read} bytes of {output} [chunk-based]",
)
else:
log(logging.DEBUG, f"{job_id} Downloading {output} [full-file]")
if self.aborted:
raise KeyboardInterrupt()
modified_referer = f"{eumdac.common.headers['referer']} JobID: {job_id[1]}"
with product.open(
entry=entry,
chunk=chunk_range,
custom_headers={
"referer": modified_referer,
},
) as fsrc:
while True:
if self.aborted:
raise KeyboardInterrupt()
chunk = fsrc.read(1024 * 1024) # type: ignore
_download_speed_tracker.update(1024 * 1024)
if not chunk:
break
outf.write(chunk)
log(logging.DEBUG, f"{job_id} Download {output} finished")
def _get_content_size(fsrc: IO[bytes]) -> Optional[int]:
if not hasattr(fsrc, "getheader"):
return None
content_size_header = fsrc.getheader("Content-Length")
if not content_size_header:
return None
return int(content_size_header)
def _check_chunks(chunks: Mapping[Path, Tuple[int, int]], job_id: Optional[str] = "") -> bool:
for fname, chunk_range in chunks.items():
if not fname.exists():
log(logging.ERROR, f"{job_id} Error checking chunk {fname}: file does not exist")
return False
expected_chunk_size = chunk_range[1] - chunk_range[0]
if fname.stat().st_size != expected_chunk_size:
log(
logging.ERROR,
f"{job_id} Error checking chunk {fname}: size mismatch, expected {expected_chunk_size}, got {fname.stat().st_size}",
)
return False
return True
def _reassemble_from_chunks(chunks: Mapping[Path, Tuple[int, int]], output_fname: Path) -> Path:
output_fname = Path(output_fname)
chunkdir = list(chunks)[0].parent
with output_fname.open("wb") as binfile:
for i, _ in enumerate(chunks):
chunk_file = chunkdir / f"chunk.{i}"
with chunk_file.open("rb") as chunkfile:
binfile.write(chunkfile.read())
return output_fname
def _md5_check(file_to_check: Path, expected_md5: str) -> bool:
md5sum = md5()
with file_to_check.open("rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
md5sum.update(chunk)
return expected_md5 == md5sum.hexdigest()
def _compute_output_path(
product: Product,
fsrc_name: str,
dirs: bool,
onedir: bool,
entry: Optional[str],
output_dir: Path,
) -> Path:
output = output_dir / fsrc_name
if dirs or (entry and not onedir):
# when the dirs or entry flags are used
# a subdirectory is created
# to avoid overwriting common files
# unless the onedir flag has been provided
output_subdir = output_dir / f"{product}"
output_subdir.mkdir(exist_ok=True)
output = output_subdir / fsrc_name
return output
def _already_present(
product: Product,
output: Path,
job_id: Tuple[int, str],
check_integrity: bool,
) -> bool:
if output.is_file():
if check_integrity and product.md5 is not None:
# md5 check
md5sum = md5()
with output.open("rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
md5sum.update(chunk)
if product.md5 == md5sum.hexdigest():
log(
logging.INFO,
f"{_print_job_id_info(job_id)} Skip {output.name}: file already exists and passes integrity check with MD5 (computed/expected): {md5sum.hexdigest()}/{product.md5}",
)
return True
else:
log(
logging.INFO,
f"{_print_job_id_info(job_id)} Found existing {output.name}, but failed integrity check with MD5 (computed/expected): {md5sum.hexdigest()}/{product.md5}",
)
return False
else:
if check_integrity:
log(
logging.WARN,
f"{_print_job_id_info(job_id)} Skipping integrity check: no MD5 metadata found for {output.name}",
)
log(logging.INFO, f"{_print_job_id_info(job_id)} Skip {output}, file already exists")
return True
return False
def _print_product(product: Product, output: Path) -> str:
return str(product) if output.name.find(str(product)) > -1 else str(f"{product}/{output.name}")
def _print_job_id_info(job_id: Tuple[int, str]) -> str:
return f"Job {job_id[0]}:"
eumdac-3.0.0/eumdac/endpoints.ini 0000664 0000000 0000000 00000002425 14720105632 0016715 0 ustar 00root root 0000000 0000000 [DEFAULT]
api = https://api.eumetsat.int
version_os = 1.0.0
version_browse = 1.0.0
version_download = 1.0.0
[token]
token = %(api)s/token
revoke = %(api)s/revoke
[datastore]
data = %(api)s/data
browse = %(data)s/browse/%(version_browse)s
browse collections = %(browse)s/collections
browse collection = %(browse collections)s/%(collection_id)s
browse product = %(browse collection)s/products/%(product_id)s
download = %(data)s/download/%(version_download)s
download product = %(download)s/collections/%(collection_id)s/products/%(product_id)s
download product metadata = %(download product)s/metadata
search = %(data)s/search-products/%(version_os)s/os
search options = %(data)s/search-products/%(version_os)s/osdd
subscriptions = %(data)s/subscribe/subscription
subscription = %(subscriptions)s/%(subscription_id)s
[tailor]
epcs = %(api)s/epcs
customisations = %(epcs)s/customisations
customisation = %(customisations)s/%(customisation_id)s
customisation log = %(customisation)s/log
delete = %(customisations)s/delete
download = %(epcs)s/download
products = %(epcs)s/products
info = %(epcs)s/info
user info = %(epcs)s/user_info
report quota = %(epcs)s/report_quota
formats = %(epcs)s/formats
chains = %(epcs)s/chains
filters = %(epcs)s/filters
rois = %(epcs)s/rois
quicklooks = %(epcs)s/quicklooks
eumdac-3.0.0/eumdac/errors.py 0000664 0000000 0000000 00000004225 14720105632 0016077 0 ustar 00root root 0000000 0000000 """
This module defines errors and error handling functions for eumdac.
"""
import json
from typing import *
from urllib.parse import urlparse
import requests
def eumdac_raise_for_status(
msg: str, response: requests.Response, exc_cls: Type[Exception]
) -> None:
"""Raises an EumdacError with the given message wrapping an HTTPError, if one occurred.
Raises
------
- `EumdacError`
If the provided response raises an HTTPError
"""
try:
response.raise_for_status()
except requests.HTTPError as exc:
url = urlparse(response.url)
response_text = response.text
if not response_text and response.raw:
response_text = response.raw.data
try:
extra_info = json.loads(response_text)
except json.decoder.JSONDecodeError:
extra_info = {"text": response_text}
extra_info.update({"url": url, "status": response.status_code})
if response.status_code == 401:
msg += " - Authentication error (401)"
elif response.status_code == 403:
msg += " - Unauthorised (403)"
elif response.status_code == 404:
msg += " - Not found (404)"
if response.status_code > 500:
msg += f" (due to a server-side error ({response.status_code})"
exception = exc_cls(msg, extra_info)
raise exception from exc
class EumdacError(Exception):
"""Common base class for eumdac errors
Attributes
----------
- `msg` : *str*
exception text
- `extra_info` : *Optional[Dict[str, Any]]*
Dictionary containing additional information
The title and description entries are embedded into the msg attribute, if present
"""
def __init__(self, msg: str, extra_info: Optional[Dict[str, Any]] = None):
"""Init the error, putting common extra_info members into the message."""
self.extra_info = extra_info
if extra_info:
if "title" in extra_info:
msg = f"{msg} - {extra_info['title']}"
if "description" in extra_info:
msg = f"{msg}. {extra_info['description']}"
super().__init__(msg)
eumdac-3.0.0/eumdac/fake.py 0000664 0000000 0000000 00000005213 14720105632 0015467 0 ustar 00root root 0000000 0000000 # type: ignore
"""Fake DataStore, DataTailor and Product that will be used when adding the --test
option in favour of the real implementations. Only useful for unittests."""
import io
from contextlib import contextmanager
class FakeDataStore:
"""Fake DataStore for testing."""
def get_collection(self, collection_id):
"""Return a FakeCollection with `collection_id`."""
return FakeCollection(collection_id)
def get_product(self, collection_id, product_id):
"""Return a FakeProduct with `product_id` from `collection_id`."""
return FakeProduct(collection_id, product_id)
class FakeProduct:
"""FakeProduct for testing."""
def __init__(self, collection_id, product_id):
"""Init from `collection_id` and `product_id`."""
self._id = product_id
self.collection = FakeCollection(collection_id)
self.entries = ["entry1.nc", "entry2.nc"]
def __str__(self):
"""Return the id as str representation"""
return str(self._id)
def open(self, entry=None, chunk=None, custom_headers=None):
"""Return a fake stream as the contents of the product."""
if entry:
return FakeStream(f"{self._id}-{entry}")
return FakeStream(self._id)
@property
def md5(self):
"""Return the md5 of the fake stream returned on open."""
import hashlib
with self.open(None) as f:
return hashlib.md5(f.read()).hexdigest()
class FakeStream:
def __init__(self, name):
self.decode_content = True
self.name = name
self.content = io.BytesIO(b"Content")
def getheader(self, header):
if header == "Content-Length":
# Return a fixed length (7) for 'Content-Length' header.
return 7
return None
def read(self, num=None):
return self.content.read(num)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
pass
class FakeCollection:
"""Fake Collection for testing."""
def __init__(self, collection_id):
"""Init from `collection_id`."""
self._id = collection_id
def __str__(self):
"""Return id as the str representation."""
return str(self._id)
def search(self, **query):
"""Return fake search results."""
dtstart = query["dtstart"]
dtend = query["dtend"]
return [
FakeProduct(self._id, f"prod_{dtstart.isoformat().strip().replace(':', '-')}"),
FakeProduct(self._id, f"prod_{dtend.isoformat().strip().replace(':', '-')}"),
]
class FakeDataTailor:
"""Fake DataTailor for testing."""
pass
eumdac-3.0.0/eumdac/futures.py 0000664 0000000 0000000 00000005541 14720105632 0016262 0 ustar 00root root 0000000 0000000 """
Module: eumdac.futures
This module defines classes for managing a custom thread pool executor with cooperative function handling.
Classes:
- EumdacFutureFunc: Represents a callable function with cooperative handling.
- EumdacThreadPoolExecutor: Extends ThreadPoolExecutor to manage cooperative function execution.
Usage:
1. Create instances of EumdacFutureFunc to define callable functions with cooperative handling.
2. Use EumdacThreadPoolExecutor to submit functions to a thread pool with cooperative handling.
"""
import concurrent.futures
import sys
from typing import Any, List
from eumdac.logging import logger
class EumdacFutureFunc:
def __init__(self) -> None:
"""
Initialize the EumdacFutureFunc object.
"""
self.aborted = False
def __call__(self, *args: Any, **kwargs: Any) -> Any:
"""
Placeholder for the callable function. Must be implemented in subclasses.
Raises:
- NotImplementedError: If the method is called directly without being implemented in a subclass.
"""
raise NotImplementedError()
def abort(self) -> None:
"""
Set the 'aborted' flag to True, indicating that the function has been aborted.
This needs to be handled cooperatively in the Subclasses
"""
logger.debug(f"{self} abort request received")
self.aborted = True
class EumdacThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):
def __init__(self, *args: Any, **kwargs: Any) -> None:
"""
Initialize the EumdacThreadPoolExecutor object.
Attributes:
- functors (List[EumdacFutureFunc]): List to store EumdacFutureFunc instances.
"""
self.functors: List[EumdacFutureFunc] = []
super().__init__(*args, **kwargs)
def pool_shutdown(self) -> None:
"""
Abort all functions in the 'functors' list and ask them to gracefully shut down.
"""
logger.debug(f"{self} pool_shutdown issued")
for f in self.functors:
logger.debug(f"{self} aborting {f}")
f.abort()
if sys.version_info >= (3, 9):
return super().shutdown(wait=True, cancel_futures=True)
else:
return super().shutdown(wait=True)
def pool_submit(
self, fn: EumdacFutureFunc, *args: Any, **kwargs: Any
) -> "concurrent.futures.Future[Any]":
"""
Submit a function to the thread pool executor and add it to the 'functors' list.
Args:
- fn (EumdacFutureFunc): The function to be submitted.
- *args: Variable length argument list.
- **kwargs: Arbitrary keyword arguments.
Returns:
- concurrent.futures.Future[Any]: A Future object representing the execution of the submitted function.
"""
self.functors.append(fn)
return super().submit(fn, *args, **kwargs)
eumdac-3.0.0/eumdac/job_id.py 0000664 0000000 0000000 00000003441 14720105632 0016010 0 ustar 00root root 0000000 0000000 """Module providing the JobIdentifier, a helper class for consistent identification and logging of activities."""
import uuid
from threading import Lock
from typing import Any, Dict, Tuple
from eumdac.errors import EumdacError
class JobIdentifier:
"""Wraps an activity as an identified job."""
def __init__(self, total_jobs: int):
"""Init considering the expected `total_jobs`."""
self.current_count = 0
self.total_jobs = total_jobs
self._lock = Lock()
self.registered_objects: Dict[Any, Tuple[int, str]] = {}
def register(self, obj: Any) -> None:
"""Register a new job from `obj`."""
if obj in self.registered_objects:
raise JobIdError(f"Object '{obj}' already registered.")
self.registered_objects[obj] = (self._make_new_job_id(), str(uuid.uuid4()))
def job_id_tuple(self, obj: Any) -> Tuple[int, str]:
"""Return a tuple that identifies the job for `obj`, if any."""
try:
return self.registered_objects[obj]
except KeyError:
raise JobIdError(
f"No Job ID for '{obj}'. Available ones: {list(self.registered_objects.keys())}"
)
def job_id_str(self, obj: Any) -> str:
return f"Job {self.job_id_tuple(obj)[0]}"
def _make_new_job_id(self) -> int:
"""Reserve a new job id, if the total has not been reached."""
with self._lock:
self.current_count += 1
if self.current_count > self.total_jobs:
raise JobIdError(
"Too many Job IDs requested. "
f"Expected a maximum of {self.total_jobs} Job ID requests"
)
return self.current_count
class JobIdError(EumdacError):
"""JobIdentifier related errors."""
pass
eumdac-3.0.0/eumdac/local_tailor.py 0000664 0000000 0000000 00000006532 14720105632 0017232 0 ustar 00root root 0000000 0000000 """Module for interfacing with local Data Tailor instances."""
from pathlib import Path
from urllib.parse import urlparse
from eumdac.config import get_url_path
from eumdac.datatailor import DataTailor
from eumdac.errors import EumdacError
from eumdac.token import AnonymousAccessToken, URLs
import sys
if sys.version_info < (3, 9):
from typing import Iterable
else:
from collections.abc import Iterable
def get_tailor_id(filepath: Path) -> str:
"""get a tailor id from a configuration file path"""
return filepath.stem
def get_tailor_path(tailor_id: str) -> Path:
"""get a configuration file path from a tailor id"""
for fn in all_url_filenames():
if tailor_id == get_tailor_id(fn):
return fn
raise EumdacError(f"local-tailor id not found: {tailor_id}")
def get_urls(filepath: Path) -> URLs:
"""retrieve a URLs instance from an INI file path"""
return URLs(str(filepath))
def all_url_filenames(prefix: str = "") -> Iterable[Path]:
"""retrieve file Paths of configured URL files"""
url_dir = get_url_path()
glob = "*.ini"
if len(prefix) > 0:
glob = f"{prefix}#*.ini"
return sorted(url_dir.glob(glob))
def new_url_filename(tailor_id: str) -> Path:
"""create a Path pointing to a URLs configuration INI file that can be created subsequently"""
return get_url_path() / Path(f"{tailor_id}.ini")
def remove_url(url_name: str) -> None:
"""remove a URLs configuration INI from configuration directory"""
p = get_url_path() / Path(f"{url_name}.ini")
if p.exists():
p.unlink()
def resolve_url(url_name: str) -> URLs:
"""retrieve a URLs instance from an identifier"""
p = get_url_path() / Path(f"{url_name}.ini")
if p.exists():
return URLs(str(p))
else:
raise EumdacError(f"{url_name} not found at {str(p)}.")
def get_local_tailor(tailor_id: str) -> DataTailor:
"""create a DataTailor instance using a URLs configuration identified via configuration identifier"""
url = resolve_url(tailor_id)
token = AnonymousAccessToken(urls=url)
return DataTailor(token)
def new_local_tailor(tailor_id: str, tailor_url: str) -> Path:
"""create a configuration for a local-tailor instance specifying an identifer and base url"""
parsed_url = urlparse(tailor_url)
if not parsed_url.scheme:
raise EumdacError("No scheme provided")
if not parsed_url.hostname:
raise EumdacError("No hostname provided")
if not parsed_url.port:
raise EumdacError("No port provided")
filepath = new_url_filename(tailor_id)
with filepath.open("w") as f:
new_url = URLs()
new_url.set("tailor", "epcs", f"{tailor_url}/api/v1")
new_url.write(f)
return filepath
def remove_local_tailor(tailor_id: str) -> None:
"""remove a local-tailor configuration by specifying its' identifier"""
remove_url(tailor_id)
def is_online(filepath: Path) -> bool:
"""determine the state of a local-tailor instance by specifying Path to configuration file"""
try:
dt = get_local_tailor(get_tailor_id(filepath))
_ = dt.info
return True
except Exception:
return False
def get_api_url(filepath: Path) -> str:
"""retrieve local-tailor base api url from a given local-tailor configuration file path"""
urls = get_urls(filepath)
return urls.get("tailor", "epcs")
eumdac-3.0.0/eumdac/lockfile.py 0000664 0000000 0000000 00000004547 14720105632 0016362 0 ustar 00root root 0000000 0000000 """Lockfile implementation mindful of OS specifics."""
import sys
from contextlib import contextmanager
from datetime import datetime, timedelta
from pathlib import Path
from threading import Lock
from time import sleep
from typing import Generator, IO, Optional
if sys.platform == "win32":
import msvcrt
else:
import fcntl
@contextmanager
def open_locked(
lockfile_path: Path,
timeout: Optional[timedelta] = None,
delete: Optional[bool] = False,
) -> Generator[Optional[IO[str]], None, None]:
"""Open a file, locking it."""
with open_locked.lock: # type: ignore
try:
open_locked.locks # type: ignore
except AttributeError:
open_locked.locks = {} # type: ignore
# create one lock object per file
if lockfile_path not in open_locked.locks: # type: ignore
open_locked.locks[lockfile_path] = Lock() # type: ignore
lock = open_locked.locks[lockfile_path] # type: ignore
start = datetime.now()
lockfile_path.parent.mkdir(exist_ok=True, parents=True)
if timeout:
r = lock.acquire(timeout=timeout.total_seconds())
else:
r = lock.acquire()
if not r:
yield None
else:
while True:
if timeout and datetime.now() - start >= timeout:
lockfile = None
break
lockfile = open(lockfile_path, "w")
if sys.platform == "win32":
try:
msvcrt.locking(lockfile.fileno(), msvcrt.LK_NBLCK, 1)
break
except OSError:
sleep(0.1)
continue
else:
try:
fcntl.flock(lockfile, fcntl.LOCK_EX | fcntl.LOCK_NB)
break
except IOError:
sleep(0.1)
continue
yield lockfile
if lockfile:
if sys.platform == "win32":
msvcrt.locking(lockfile.fileno(), msvcrt.LK_UNLCK, 1)
else:
fcntl.flock(lockfile, fcntl.LOCK_UN)
lockfile.close()
if delete:
if sys.version_info >= (3, 8):
lockfile_path.unlink(missing_ok=False)
else:
lockfile_path.unlink()
lock.release()
setattr(open_locked, "lock", Lock())
eumdac-3.0.0/eumdac/logging.py 0000664 0000000 0000000 00000012234 14720105632 0016210 0 ustar 00root root 0000000 0000000 """Module used for logging in eumdac CLI."""
import logging
import platform
import sys
from pathlib import Path
from typing import Callable, Iterable, Tuple, Any, Optional
def gen_table_printer(
print_func: Callable[[str], None],
columns: Iterable[Tuple[str, int]],
header_sep: str = "-",
column_sep: str = " ",
) -> Callable[[Iterable[str]], None]:
headings = [x[0] for x in columns]
colwidths = [x[1] for x in columns]
fmt_string = column_sep.join(["{:<" + str(x) + "}" for x in colwidths])
contentseps = [header_sep * x for x in colwidths]
print_func(fmt_string.format(*headings))
print_func(fmt_string.format(*contentseps))
return lambda c: print_func(fmt_string.format(*[str(x) for x in c]))
class CustomFormatter(logging.Formatter):
"""Logging colored formatter, adapted from https://stackoverflow.com/a/56944256/3638629"""
def __init__(self, fmt: str, color: bool):
super().__init__()
self.fmt = fmt
self.formats = {
logging.INFO: colorize(self.fmt, "grey", True),
logging.DEBUG: colorize(self.fmt, "blue", not color),
logging.WARNING: colorize(self.fmt, "yellow", not color),
logging.ERROR: colorize(self.fmt, "bold_red", not color),
logging.CRITICAL: colorize(self.fmt, "bold_red_underline", not color),
}
def format(self, record: logging.LogRecord) -> str:
log_fmt = self.formats.get(record.levelno)
formatter = logging.Formatter(log_fmt)
return formatter.format(record)
def colorize(txt: str, color: str, no_color: bool = False) -> str:
known_colors = {
"grey": "\x1b[37;1m",
"blue": "\x1b[94;1m",
"yellow": "\x1b[93;1m",
"bold_red": "\x1b[31;1m",
"bold_red_underline": "\x1b[31;1;4m",
}
reset = "\x1b[0m"
if no_color:
return txt
return known_colors[color] + txt + reset
class LevelFilter(logging.Filter):
def __init__(self, levels: Iterable[str]):
self.levels = levels
def filter(self, record: logging.LogRecord) -> bool:
return record.levelname in self.levels
class TraceFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
filename = Path(record.pathname).parts[-1]
record.func_trace = f"{filename}:{record.lineno} {record.funcName}()"
return True
class ProgressBarHandler(logging.StreamHandler): # type:ignore
def __init__(self) -> None:
super().__init__(sys.stdout)
def emit(self, record: logging.LogRecord) -> None:
message = f"{record.msg}\r"
self.stream.write(message)
self.stream.flush()
class EumdacLogger(logging.Logger):
LOGLEVEL_PROGRESS = logging.INFO + 1
def __init__(self, name: str, level: int = logging.NOTSET):
super().__init__(name, level)
self._progress_handler: Optional[ProgressBarHandler] = None
def set_progress_handler(self, handler: ProgressBarHandler) -> None:
self._progress_handler = handler
def progress(self, msg: str, *args: Any, **kwargs: Any) -> None:
self.log(EumdacLogger.LOGLEVEL_PROGRESS, msg, *args, **kwargs)
logging.setLoggerClass(EumdacLogger)
logger = logging.getLogger(__package__) # type:ignore
def init_logger(level: str = "INFO", progress_bars: bool = False) -> None:
loglevels = {
"VERBOSE": logging.DEBUG, # VERBOSE is DEBUG but less technical
"DEBUG": logging.DEBUG,
"INFO": logging.INFO,
"PROGRESS": EumdacLogger.LOGLEVEL_PROGRESS,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL,
}
global logger
logging.setLoggerClass(EumdacLogger)
logger = logging.getLogger(__package__) # type:ignore
logger.handlers.clear()
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.addFilter(LevelFilter(["INFO"]))
stdout_handler.addFilter(TraceFilter())
progress_handler: ProgressBarHandler = ProgressBarHandler()
progress_handler.addFilter(LevelFilter(["PROGRESS"]))
logging.addLevelName(EumdacLogger.LOGLEVEL_PROGRESS, "PROGRESS")
stderr_handler = logging.StreamHandler(sys.stderr)
stderr_handler.addFilter(
LevelFilter(
[
"WARNING",
"DEBUG",
"ERROR",
"CRITICAL",
]
)
)
stderr_handler.addFilter(TraceFilter())
colorize = sys.stderr.isatty() and not platform.system() == "Windows"
# Avoid having the custom format in VERBOSE
if level in ["DEBUG"]:
formatter = CustomFormatter(
"%(asctime)s | %(threadName)s | " "%(func_trace)-40s - %(levelname)-8s - %(message)s",
color=colorize,
)
else:
formatter = CustomFormatter("%(message)s", color=colorize)
stdout_handler.setFormatter(formatter)
progress_handler.setFormatter(formatter)
stderr_handler.setFormatter(formatter)
logger.addHandler(stdout_handler)
logger.addHandler(stderr_handler)
if progress_bars and sys.stdout.isatty():
logger.addHandler(progress_handler)
logger.set_progress_handler(progress_handler) # type:ignore
logger.setLevel(loglevels[level])
eumdac-3.0.0/eumdac/order.py 0000664 0000000 0000000 00000026572 14720105632 0015707 0 ustar 00root root 0000000 0000000 """Module that enables order management in eumdac CLI."""
import os
import re
import threading
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime, timedelta
from pathlib import Path
from typing import *
import yaml
from eumdac.config import get_config_dir
from eumdac.datatailor import DataTailor
from eumdac.errors import EumdacError
from eumdac.lockfile import open_locked
from eumdac.logging import gen_table_printer
from eumdac.product import Product
from eumdac.tailor_models import Chain
@dataclass
class ProductInfo:
p_id: str
p_dict: Dict[str, Any]
class Order:
def __init__(self, order_file: Optional[Path] = None, order_dir: Optional[Path] = None):
if order_dir is None:
order_dir = get_default_order_dir()
self._order_file = order_file or new_order_filename(order_dir)
self._lock = threading.Lock()
self._update_lock = threading.Lock()
def initialize(
self,
chain: Optional[Chain],
products: Iterable[Product],
output_dir: Path,
file_pattern: Optional[Iterable[str]],
query: Optional[Dict[str, str]],
dirs: bool = False,
onedir: bool = False,
no_warning_logs: bool = True,
) -> None:
self._chain = chain
self._output_dir = output_dir
self._file_patterns = file_pattern
self._no_warning_logs = no_warning_logs
order_info: Dict[str, Any] = {
"file_patterns": file_pattern,
"output_dir": str(output_dir.resolve()),
"query": query,
"dirs": dirs,
"onedir": onedir,
}
if chain:
order_info["type"] = "tailor"
order_info["chain"] = chain.asdict()
order_info["products_to_process"] = {
p._id: {
"col_id": p.collection._id,
"server_state": "UNSUBMITTED",
"customisation": None,
}
for p in products
}
else:
order_info["type"] = "download"
order_info["products_to_process"] = {
p._id: {
"col_id": p.collection._id,
"server_state": "UNSUBMITTED",
}
for p in products
}
with self._lock:
with self._order_file.open("w") as orf:
yaml.dump(
order_info,
orf,
)
def __str__(self) -> str:
return Path(self._order_file).stem
def get_dict_entries(self, *args: str) -> Tuple[Optional[str], ...]:
ret: List[Optional[str]] = []
with self.dict_from_file() as order_d:
for name in args:
try:
ret.append(order_d[name])
except KeyError:
ret.append(None)
return tuple(ret)
def status(self) -> str:
if self.all_done():
return "DONE"
for p_info in self.iter_product_info():
if p_info.p_dict["server_state"] in ("FAILED", "INACTIVE"):
return "FAILED"
return "NOT COMPLETED"
def delete(self) -> None:
os.remove(self._order_file)
def collections(self) -> List[str]:
ret = []
for p_info in self.iter_product_info():
ret.append(p_info.p_dict["col_id"])
return list(set(ret))
def pretty_string(self, print_products: bool = False) -> str:
ret_lines: List[str] = []
(typ, query, chain, output_dir) = self.get_dict_entries(
"type", "query", "chain", "output_dir"
)
ret = [
f"Order {str(self)}",
f"Status: {self.status()}",
f"Collection: {self.collections()}",
"Query:",
]
query_dump = yaml.dump(query).strip()
for line in query_dump.split("\n"):
ret.append(f" {line.rstrip()}")
if chain:
ret.append("Chain:")
chain_dump = yaml.dump(chain).strip()
for line in chain_dump.split("\n"):
ret.append(f" {line.rstrip()}")
ret.append(f"Output directory: {output_dir}")
if print_products:
print_func = ret.append
if typ == "tailor":
printer = gen_table_printer(
print_func, [("Product", 60), ("Job Id", 10), ("Status", 12)]
)
for p_info in self.iter_product_info():
state = _compute_state(p_info.p_dict)
printer([p_info.p_id, p_info.p_dict["customisation"], state])
elif typ == "download":
printer = gen_table_printer(print_func, [("Product", 60), ("Status", 12)])
for p_info in self.iter_product_info():
printer([p_info.p_id, p_info.p_dict["server_state"]])
else:
raise NotImplementedError(typ)
return "\n".join(ret)
def _locked_serialize(self, order_dict: Dict[str, Any]) -> None:
with self._lock:
with self._order_file.open("w") as orf:
yaml.dump(order_dict, orf)
@contextmanager
def dict_from_file(self) -> Generator[Dict[str, Any], None, None]:
with self._lock:
ret_dict = self._deserialize()
yield ret_dict
def _deserialize(self) -> Dict[str, Any]:
with self._order_file.open("r") as orf:
ret_val = yaml.safe_load(orf)
if ret_val is None:
raise EumdacError(f"{self._order_file.resolve()} is corrupted.")
return ret_val
def remote_delete_failed(self, datatailor: DataTailor) -> None:
for p_info in self.iter_product_info():
if p_info.p_dict["server_state"] == "FAILED":
customisation_id = p_info.p_dict["customisation"]
if customisation_id:
try:
customisation = datatailor.get_customisation(customisation_id)
customisation.delete()
except EumdacError:
continue
def resolve_product_num(self, product_id: str) -> Tuple[int, int]:
num_products = len(list(self.iter_product_info()))
for num, p_info in enumerate(self.iter_product_info(), 1):
if p_info.p_id == product_id:
return num_products, num
raise KeyError(product_id)
def update(
self,
customisation_id: Optional[str],
product_id: str,
status: Optional[str] = None,
download_states: Optional[Dict[str, str]] = None,
) -> None:
with self._update_lock:
with self.dict_from_file() as order:
if status:
order["products_to_process"][product_id]["server_state"] = status
if download_states:
order["products_to_process"][product_id]["download_states"] = download_states
if customisation_id:
order["products_to_process"][product_id]["customisation"] = customisation_id
self._locked_serialize(order)
def reset_states(self) -> None:
with self.dict_from_file() as order:
products = order["products_to_process"]
for prod_id, prod_info in products.items():
if "download_states" in prod_info:
del prod_info["download_states"]
prod_info["server_state"] = "UNSUBMITTED"
if order["type"] == "tailor":
prod_info["customisation"] = None
self._locked_serialize(order)
def iter_product_info(self) -> Iterable[ProductInfo]:
with self.dict_from_file() as order:
orders = order["products_to_process"].items()
for p_id, p_dict in orders:
yield ProductInfo(p_id, p_dict)
def get_products(self, datastore: Any) -> Iterable[Product]:
for p_info in self.iter_product_info():
yield datastore.get_product(p_info.p_dict["col_id"], p_info.p_id)
def all_done(self) -> bool:
(typ,) = self.get_dict_entries("type")
if typ == "tailor":
return self._all_done_tailor()
elif typ == "download":
return self._all_done_download()
else:
raise NotImplementedError(typ)
def _all_done_tailor(self) -> bool:
for p_info in self.iter_product_info():
if not "download_states" in p_info.p_dict:
return False
for _fname, state in p_info.p_dict["download_states"].items():
if state != "DOWNLOADED":
return False
return True
def _all_done_download(self) -> bool:
return all([pi.p_dict["server_state"] == "DONE" for pi in self.iter_product_info()])
def _compute_state(p_dict: Dict[str, Any]) -> str:
server_state = p_dict["server_state"]
if server_state != "DONE":
return server_state
if not "download_states" in p_dict:
return "DONE (NOT DOWNLOADED)"
for _fname, state in p_dict["download_states"].items():
if state != "DOWNLOADED":
return "DONE (NOT DOWNLOADED)"
return "DONE"
def get_default_order_dir() -> Path:
order_dir = get_config_dir() / "orders"
order_dir.mkdir(exist_ok=True, parents=True)
return order_dir
def highest_number_in_order_filenames(file_paths: Iterable[Path]) -> int:
number_pattern = re.compile(r".*#([\d]+).yml")
order_numbers = [int(number_pattern.findall(fname.name)[0]) for fname in file_paths]
return max(order_numbers)
def highest_prefix_in_order_filenames(file_paths: Iterable[Path]) -> str:
fpaths = [f for f in file_paths if "#" in f.stem]
return max([fpath.stem.split("#")[0] for fpath in fpaths])
def all_order_filenames(
order_dir: Path,
prefix: str = "",
) -> Iterable[Path]:
glob = "*.yml"
if len(prefix) > 0:
glob = f"{prefix}#*.yml"
return sorted(order_dir.glob(glob), key=_dt_from_order_filename)
def _dt_from_order_filename(fn: Path) -> datetime:
try:
date, number = fn.stem.split("#")
return datetime.fromisoformat(date) + timedelta(milliseconds=int(number))
except:
return datetime.fromtimestamp(fn.stat().st_ctime)
def latest_order_file(order_dir: Path) -> Path:
filepaths = all_order_filenames(order_dir)
prefix = highest_prefix_in_order_filenames(filepaths)
filepaths = all_order_filenames(order_dir, prefix)
number = highest_number_in_order_filenames(filepaths)
return order_dir / Path(f"{prefix}#{number:04d}.yml")
def new_order_filename(order_dir: Path) -> Path:
with open_locked(order_dir / "lock") as lf:
order_prefix = f"{datetime.now().strftime('%Y-%m-%d')}"
all_filenames = list(all_order_filenames(order_dir, order_prefix))
order_fn = order_dir / Path(f"{order_prefix}#{1:04d}.yml")
if any(all_filenames):
highest_existing_number = highest_number_in_order_filenames(all_filenames)
order_fn = order_dir / Path(f"{order_prefix}#{int(highest_existing_number)+1:04d}.yml")
order_fn.touch(exist_ok=False)
return order_fn
def resolve_order(order_dir: Path, order_name: str) -> Order:
if order_name == "latest":
filenames = list(all_order_filenames(order_dir))
if len(filenames) == 0:
raise EumdacError("No order files found.")
return Order(latest_order_file(order_dir))
return Order(order_dir / Path(f"{order_name}.yml"))
eumdac-3.0.0/eumdac/product.py 0000664 0000000 0000000 00000044766 14720105632 0016261 0 ustar 00root root 0000000 0000000 """Module containing the Data Store Product class."""
from __future__ import annotations
import re
from contextlib import contextmanager
from datetime import datetime
from functools import total_ordering
from typing import TYPE_CHECKING, Tuple, Dict
from eumdac.request import get
from eumdac.logging import logger
if TYPE_CHECKING: # pragma: no cover
import sys
from typing import IO, Any, Optional
if sys.version_info < (3, 9):
from typing import Generator, Mapping, MutableMapping, Pattern, Sequence
else:
from collections.abc import Mapping, MutableMapping, Generator, Sequence
from re import Pattern
from eumdac.datastore import DataStore
from eumdac.collection import Collection
from eumdac.errors import EumdacError, eumdac_raise_for_status
import eumdac.common
@total_ordering
class Product:
"""Product of a Collection in the Data Store
Attributes:
-----------
- `datastore`:
Reference to the Data Store
- `download_url`:
URL to the download endpoint of the Data Store
Arguments:
----------
- `collection_id`:
Data Store ID of the collection
- `product_id`:
Data Store ID of the product
- `datastore`:
Reference to the Data Store
"""
_id: str
datastore: DataStore
collection: Collection
_browse_properties: Optional[Mapping[str, Any]] = None
_download_properties: Optional[Mapping[str, Any]] = None
_geometry: Optional[Mapping[str, Any]] = None
_entries: Optional[Sequence[str]] = None
_extract_filename: Pattern[str] = re.compile(r'filename="(.*?)"')
_extract_sensing_time: Pattern[str] = re.compile(
r"(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d{1,6})?)?"
r"\d*Z?[\\/]+"
r"(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d{1,6})?)?"
)
_extract_ingestion_time: Pattern[str] = re.compile(
r"(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d{1,6})?)?"
)
def __init__(self, collection_id: str, product_id: str, datastore: DataStore) -> None:
self._id = product_id
self.datastore = datastore
self.collection = self.datastore.get_collection(collection_id)
def __str__(self) -> str:
return str(self._id)
def __repr__(self) -> str:
return f"{self.__class__}({self.collection._id}, {self._id})"
def __eq__(self, other: Any) -> bool:
return isinstance(other, self.__class__) and (
self.collection._id,
self._id,
) == (
other.collection._id,
other._id,
)
def __lt__(self, other: Product) -> bool:
return (self.collection._id, self._id) < (other.collection._id, other._id)
def __hash__(self) -> int:
return hash(repr(self))
def _parse_browse_properties(
self, properties: dict[str, str | Any], geometry: Optional[dict[str, Any]]
) -> None:
# Remove collection ID
properties.pop("parentIdentifier")
# Bring MD5 to base properties
if "md5" in properties["extraInformation"]:
properties.update({"md5": properties["extraInformation"]["md5"]}) # type: ignore
else:
properties.update({"md5": None})
self._browse_properties = properties
# Handle geometry
if not geometry or len(geometry) == 0:
self._geometry = None
else:
self._geometry = geometry
# Handle entries
self._entries = [link["title"] for link in properties["links"]["sip-entries"]] # type: ignore
def _ensure_properties(self) -> None:
if self._browse_properties is not None:
return
url = self.datastore.urls.get(
"datastore",
"browse product",
vars={"collection_id": self.collection._id, "product_id": self._id},
)
auth = self.datastore.token.auth
response = get(
url,
params={"format": "json"},
auth=auth,
headers=eumdac.common.headers,
)
eumdac_raise_for_status(
f"Product {self._id} not found in {self.collection._id}",
response,
ProductError,
)
# DSSHD-658: Better reporting of parsing errors
try:
response_json = response.json()
except Exception as decode_error:
logger.debug(f"Failed to parse JSON response: {response.text}")
raise EumdacError(
"Could not parse response received from server",
{"response": response.text, "status_code": response.status_code},
) from decode_error
properties = response_json["properties"]
geometry: Optional[dict[str, Any]] = None
if "geometry" in response_json:
geometry = response_json["geometry"]
self._parse_browse_properties(properties, geometry)
def _ensure_download_properties(self) -> None:
if self._download_properties is not None:
return
url = self.datastore.urls.get(
"datastore",
"download product metadata",
vars={"collection_id": self.collection._id, "product_id": self._id},
)
auth = self.datastore.token.auth
response = get(
url,
params={"format": "json"},
auth=auth,
headers=eumdac.common.headers,
)
eumdac_raise_for_status(
f"Product {self._id} not found in {self.collection._id}",
response,
ProductError,
)
response_data = response.json()
download_properties = response_data["properties"]
self._download_properties = download_properties
@property
def sensing_start(self) -> datetime:
"""Sensing start date"""
self._ensure_properties()
try:
sensing_time = self._extract_sensing_time.search( # type: ignore[union-attr]
self._browse_properties["date"] # type: ignore[index]
).groupdict()
sensing_start = sensing_time["start"]
if len(sensing_start) > 19:
sensing_start = sensing_start.ljust(26, "0")
return datetime.fromisoformat(sensing_start)
except Exception as e:
raise ProductError(f"Couldn't prodcue sensing start time from received value: {self._browse_properties['date']}") from e # type: ignore[index]
@property
def sensing_end(self) -> datetime:
"""Sensing end date"""
self._ensure_properties()
try:
sensing_time = self._extract_sensing_time.search( # type: ignore[union-attr]
self._browse_properties["date"] # type: ignore[index]
).groupdict()
sensing_end = sensing_time["end"]
if len(sensing_end) > 19:
sensing_end = sensing_end.ljust(26, "0")
return datetime.fromisoformat(sensing_end)
except Exception as e:
raise ProductError(f"Couldn't produce sensing end time from received value: {self._browse_properties['date']}") from e # type: ignore[index]
@property
def satellite(self) -> str:
"""Platform or Mission related to the product"""
self._ensure_properties()
satellites = [
acquisition["platform"]["platformShortName"]
for acquisition in self._browse_properties["acquisitionInformation"] # type: ignore[index]
]
return ", ".join(satellites)
@property
def instrument(self) -> str:
"""Instrument related to the product"""
self._ensure_properties()
instruments = [
acquisition["instrument"]["instrumentShortName"]
for acquisition in self._browse_properties["acquisitionInformation"] # type: ignore[index]
]
return ", ".join(instruments)
@property
def size(self) -> int:
"""Size of the product"""
self._ensure_properties()
return self._browse_properties["productInformation"]["size"] # type: ignore[index]
@property
def acronym(self) -> str:
"""Acronym or Product Type of the product"""
self._ensure_properties()
return self._browse_properties["productInformation"]["productType"] # type: ignore[index]
@property
def product_type(self) -> str:
"""Product Type or Acronym of the product"""
return self.acronym
@property
def timeliness(self) -> Optional[int]:
"""Timeliness of the product"""
self._ensure_properties()
if "timeliness" in self._browse_properties["productInformation"]: # type: ignore[index]
return self._browse_properties["productInformation"]["timeliness"] # type: ignore[index]
return None
@property
def md5(self) -> Optional[str]:
"""MD5 checksum of the product SIP"""
self._ensure_properties()
return self._browse_properties["md5"] # type: ignore[index]
@property
def processingTime(self) -> Optional[str]:
"""Processing time"""
self._ensure_download_properties()
if "processingDate" in self._download_properties["productInformation"]["processingInformation"]: # type: ignore[index]
return self._download_properties["productInformation"]["processingInformation"]["processingDate"] # type: ignore[index]
return None
@property
def processorVersion(self) -> Optional[str]: # type: ignore[valid-type]
"""Processor version"""
self._ensure_download_properties()
if "processorVersion" in self._download_properties["productInformation"]["processingInformation"]: # type: ignore[index]
return self._download_properties["productInformation"]["processingInformation"]["processorVersion"] # type: ignore[index]
return None
@property
def format(self) -> Optional[str]:
"""Format"""
self._ensure_download_properties()
if "format" in self._download_properties["productInformation"]["processingInformation"]: # type: ignore[index]
return self._download_properties["productInformation"]["processingInformation"]["format"] # type: ignore[index]
return None
@property
def qualityStatus(self) -> Optional[str]:
"""Quality status"""
self._ensure_download_properties()
if "qualityStatus" in self._download_properties["productInformation"]["qualityInformation"]: # type: ignore[index]
return self._download_properties["productInformation"]["qualityInformation"]["qualityStatus"] # type: ignore[index]
return None
@property
def ingested(self) -> Optional[datetime]:
"""Date of publication"""
self._ensure_properties()
ingestion_time = self._extract_ingestion_time.search( # type: ignore[union-attr]
self._browse_properties["updated"] # type: ignore[index]
).groupdict()
ingestion_time = ingestion_time["updated"]
if len(ingestion_time) > 19:
ingestion_time = ingestion_time.ljust(26, "0")
return datetime.fromisoformat(ingestion_time)
@property
def orbit_type(self) -> str:
"""Orbit Type (GEO/LEO)"""
self._ensure_download_properties()
return self._download_properties["acquisitionInformation"][0]["platform"]["orbitType"] # type: ignore[index]
@property
def orbit_is_LEO(self) -> bool:
"""Is Orbit Type LEO?"""
self._ensure_download_properties()
return self.orbit_type == "LEO" # type: ignore[index]
@property
def orbit_number(self) -> Optional[int]:
"""Orbit Number of LEO product"""
self._ensure_properties()
if "orbitNumber" in self._browse_properties["acquisitionInformation"][0]["acquisitionParameters"]: # type: ignore[index]
return self._browse_properties["acquisitionInformation"][0]["acquisitionParameters"]["orbitNumber"] # type: ignore[index]
return None
@property
def orbit_direction(self) -> Optional[str]:
"""Orbit Direction of LEO product"""
self._ensure_properties()
if "orbitDirection" in self._browse_properties["acquisitionInformation"][0]["acquisitionParameters"]: # type: ignore[index]
return self._browse_properties["acquisitionInformation"][0]["acquisitionParameters"][ # type: ignore[index]
"orbitDirection"
]
return None
@property
def relative_orbit(self) -> Optional[int]:
"""Relative Orbit Number of LEO product"""
self._ensure_properties()
if (
"relOrbitStart"
in self._browse_properties["acquisitionInformation"][0]["acquisitionParameters"] # type: ignore[index]
):
return self._browse_properties["acquisitionInformation"][0]["acquisitionParameters"][ # type: ignore[index]
"relOrbitStart"
]
return None
@property
def cycle_number(self) -> Optional[int]:
"""Cycle Number of LEO product"""
self._ensure_properties()
if (
"cycleNumber"
in self._browse_properties["acquisitionInformation"][0]["acquisitionParameters"] # type: ignore[index]
):
return self._browse_properties["acquisitionInformation"][0]["acquisitionParameters"][ # type: ignore[index]
"cycleNumber"
]
return None
@property
def is_mtg(self) -> bool:
"""Is MTG collection?"""
self._ensure_download_properties()
return (
"mtgCoverage"
in self._download_properties["acquisitionInformation"][0]["acquisitionParameters"] # type: ignore[index]
)
@property
def repeat_cycle(self) -> Optional[str]:
"""Repeat cycle number (only for MTG products)"""
self._ensure_properties()
if (
self.is_mtg
and "repeatCycleIdentifier"
in self._download_properties["acquisitionInformation"][0]["acquisitionParameters"]["mtgCoverage"] # type: ignore[index]
):
return self._download_properties["acquisitionInformation"][0]["acquisitionParameters"]["mtgCoverage"][ # type: ignore[index]
"repeatCycleIdentifier"
]
return None
@property
def region_coverage(self) -> Optional[str]:
"""Region Coverage (only for MTG products)"""
self._ensure_properties()
if (
self.is_mtg
and "majorRegionCoverage"
in self._download_properties["acquisitionInformation"][0]["acquisitionParameters"]["mtgCoverage"] # type: ignore[index]
):
return self._download_properties["acquisitionInformation"][0]["acquisitionParameters"]["mtgCoverage"][ # type: ignore[index]
"majorRegionCoverage"
]
return None
@property
def subregion_identifier(self) -> Optional[str]:
"""Sub-Region (only for MTG products)"""
self._ensure_properties()
if (
self.is_mtg
and "subRegionIdentifier"
in self._download_properties["acquisitionInformation"][0]["acquisitionParameters"]["mtgCoverage"] # type: ignore[index]
):
return self._download_properties["acquisitionInformation"][0]["acquisitionParameters"]["mtgCoverage"][ # type: ignore[index]
"subRegionIdentifier"
]
return None
@property
def metadata(self) -> MutableMapping[str, Any]:
"""Product metadata"""
self._ensure_properties()
self._ensure_download_properties()
return {
"geometry": self._geometry.copy() if self._geometry else None, # type: ignore
"properties": self._browse_properties.copy(), # type: ignore[union-attr]
"download_properties": self._download_properties.copy(), # type: ignore[union-attr]
}
@property
def entries(self) -> Sequence[str]:
"""Files inside the product"""
self._ensure_properties()
if not self._entries:
return []
else:
return tuple(self._entries)
@property
def url(self) -> str:
"""URL of the product"""
access_token = None
if hasattr(self.datastore.token, "access_token"):
access_token = str(self.datastore.token.access_token)
url = self.datastore.urls.get(
"datastore",
"download product",
vars={"collection_id": self.collection._id, "product_id": self._id},
)
download_url = str(url)
if access_token is not None:
download_url += "?access_token=" + access_token
return download_url
@contextmanager
def open(
self,
entry: Optional[str] = None,
chunk: Optional[Tuple[int, int]] = None,
custom_headers: Optional[Dict[str, str]] = None,
) -> Generator[IO[bytes], None, None]:
"""Opens a stream to download the product content.
Note:
A Data Store product refers to a zip archive containing the data.
Arguments:
- `entry` (optional):
specific file inside the product
- `chunk` (optional):
Byte range to download as (start, end). If None, downloads the full content.
- `custom_headers` (optional):
Additional HTTP headers to include in the request.
Yields:
Generator[IO[bytes]]: Chunks of the downloaded content.
"""
url = self.datastore.urls.get(
"datastore",
"download product",
vars={"collection_id": self.collection._id, "product_id": self._id},
)
auth = self.datastore.token.auth
params = None
if entry is not None:
url += "/entry"
params = {"name": entry}
headers = eumdac.common.headers.copy()
if chunk:
headers = {**headers, "Range": f"bytes={chunk[0]}-{chunk[1]-1}"}
if custom_headers:
headers.update(custom_headers)
with get(
url,
auth=auth,
params=params,
stream=True,
headers=headers,
) as response:
eumdac_raise_for_status(
f"Could not download Product {self._id} of Collection {self.collection._id}",
response,
ProductError,
)
match = self._extract_filename.search(response.headers["Content-Disposition"])
filename = match.group(1) # type: ignore[union-attr]
response.raw.name = filename
response.raw.decode_content = True
yield response.raw
class ProductError(EumdacError):
"""Errors related to products"""
eumdac-3.0.0/eumdac/py.typed 0000664 0000000 0000000 00000000000 14720105632 0015673 0 ustar 00root root 0000000 0000000 eumdac-3.0.0/eumdac/request.py 0000664 0000000 0000000 00000023251 14720105632 0016253 0 ustar 00root root 0000000 0000000 """This module contains the eumdac requests wrapper that includes automatic retries and management of throttling."""
import json
import random
import time
from datetime import datetime
from typing import Any, Dict
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from eumdac.errors import EumdacError
from eumdac.logging import logger
class RequestError(EumdacError):
"""Error related to requests."""
pass
class RetryAndLog(Retry):
"""Retry configuration that will log retry attempts.
Extends urllib3.util.retry.Retry, decorating the 'increment' method.
"""
def increment( # type: ignore
self,
method: Any = None,
url: Any = None,
response: Any = None,
error: Any = None,
_pool: Any = None,
_stacktrace: Any = None,
) -> Retry:
"""Decorated urllib3.util.retry.Retry::increment to include logging."""
target_uri = ""
if _pool:
target_uri = f"{method} {_pool.scheme}://{_pool.host}:{_pool.port}{url}"
elif error:
target_uri = f"{error.conn.host}{url}"
cause = ""
if response and response.data:
cause = f'server response {response.status} - "{response.data}" '
if error:
cause = f'{cause}error: "{error}"'
logger.info(f"Trying again for {target_uri} due to {cause}")
return super().increment(method, url, response, error, _pool, _stacktrace)
def _get_adapter(max_retries: int, backoff_factor: float) -> HTTPAdapter:
"""Prepare an an HTTPAdapter that will retry failed requests up to 'max_retries' times.
Only requests that return a 50X error code will be retried.
Parameters
----------
- `max_retries` : *int*
Number of retries to perform.
- `backoff_factor` : *float*
Backoff factor to apply between attempts after the second try.
Returns
-------
- `HTTPAdapter`
Adapter prepared with the given 'max_retries' and 'backoff_factor'.
"""
retry = RetryAndLog(
total=max_retries,
backoff_factor=backoff_factor,
status_forcelist=[500, 502, 503, 504],
allowed_methods=["HEAD", "GET", "OPTIONS", "POST", "PUT", "PATCH"],
raise_on_status=False,
)
return HTTPAdapter(max_retries=retry)
def _should_retry(response: requests.Response, backoff: int = random.randint(1, 6) * 10) -> bool:
"""Decide whether the request should be retried considering the received response.
Handles the throttling of requests done by the Data Store API.
Parameters
---------
- `response` : *requests.Response*
Response received from the server.
- `backoff` : *int, optional*
Backoff, in seconds, to apply between attempts, defaults to a random value smaller than 1 minute.
"""
if response.status_code == 429:
rd = json.loads(response.text)
# handle throttling
message = rd["message"]["reason"]
if "message" in rd and "retryAfter" in rd["message"]:
# Traffic limits exceeded
timestamp = int(rd["message"]["retryAfter"]) / 1000
utc_endtime = datetime.utcfromtimestamp(timestamp)
duration = utc_endtime - datetime.utcnow()
if duration.total_seconds() > 0:
logger.warning(f"{rd['message']}: operation will resume in {duration}")
time.sleep(duration.total_seconds())
return True
elif "message" in rd and "reason" in rd["message"]:
if rd["message"]["reason"] == "Maximum number of connections exceeded":
# Maximum number of connections exceeded
logger.warning(f"{message}: throttling for {backoff}s")
time.sleep(backoff)
return True
elif rd["message"]["reason"] == "Maximum number of requests exceeded":
# Maximum number of requests exceeded
logger.warning(f"{message}: throttling for 1s")
time.sleep(1)
return True
return False
def _request(
method: str,
url: str,
max_retries: int = 3,
backoff_factor: float = 0.3,
**kwargs: Any,
) -> requests.Response:
"""Perform a request with the given `method`, `url` and parameters with automatic retries and throttling management.
Parameters
----------
- `method`: *{'get', 'post', 'patch', 'put', 'delete'}*
HTTP request method to use in the request.
- `url`: *str*
URL to make the request to.
- `max_retries`: *int, optional*
Max number of retries to perform if the request fails, default: 3.
- `backoff_factor`: *float, optional*
Backoff factor to apply between attempts, default 0.3.
- `**kwargs`: *dict, optional*
Extra arguments to pass to the request, refer to the requests library documentation for a list of possible arguments.
Returns
-------
- `requests.Response`:
Response received from the server.
"""
adapter = _get_adapter(max_retries, backoff_factor)
session = requests.Session()
session.mount("http://", adapter)
session.mount("https://", adapter)
response = requests.Response()
try:
while True:
if hasattr(session, method):
logger.debug(_pretty_print(method, url, kwargs))
response = getattr(session, method.lower())(url, **kwargs)
if _should_retry(response):
continue
else:
raise RequestError(f"Operation not supported: {method}")
break
except (ValueError, KeyError, TypeError) as e:
logger.error(f"Received unexpected response: {e}")
except requests.exceptions.RetryError as e:
raise RequestError(
f"Maximum retries ({max_retries}) reached for {method.capitalize()} {url}"
)
return response
def get(url: str, **kwargs: Any) -> requests.Response:
"""Perform a GET HTTP request to the given `url` with the given parameters.
Retries and throttling will be managed in a transparent way when making the request.
Arguments
---------
- `url`: *str*
URL to make the request to.
- `**kwargs`: *dict, optional*
Extra arguments to pass to the request, refer to the requests library documentation for a list of possible arguments.
Returns
-------
- `request.Response`:
Response received from the server.
"""
return _request("get", url, **kwargs)
def post(url: str, **kwargs: Any) -> requests.Response:
"""Perform a POST HTTP request to the given `url` with the given parameters.
Retries and throttling will be managed in a transparent way when making the request.
Arguments
---------
- `url`: *str*
URL to make the request to.
- `**kwargs`: *dict, optional*
Extra arguments to pass to the request, refer to the requests library documentation for a list of possible arguments.
Returns
-------
- `request.Response`:
Response received from the server.
"""
return _request("post", url, **kwargs)
def patch(url: str, **kwargs: Any) -> requests.Response:
"""Perform a PATCH HTTP request to the given `url` with the given parameters.
Retries and throttling will be managed in a transparent way when making the request.
Arguments
---------
- `url`: *str*
URL to make the request to.
- `**kwargs`: *dict, optional*
Extra arguments to pass to the request, refer to the requests library documentation for a list of possible arguments.
Returns
-------
- `request.Response`:
Response received from the server.
"""
return _request("patch", url, **kwargs)
def put(url: str, **kwargs: Any) -> requests.Response:
"""Perform a PUT HTTP request to the given `url` with the given parameters.
Retries and throttling will be managed in a transparent way when making the request.
Arguments
---------
- `url`: *str*
URL to make the request to.
- `**kwargs`: *dict, optional*
Extra arguments to pass to the request, refer to the requests library documentation for a list of possible arguments.
Returns
-------
- `request.Response`:
Response received from the server.
"""
return _request("put", url, **kwargs)
def delete(url: str, **kwargs: Any) -> requests.Response:
"""Perform a DELETE HTTP request to the given `url` with the given parameters.
Retries and throttling will be managed in a transparent way when making the request.
Arguments
---------
- `url`: *str*
URL to make the request to.
- `**kwargs`: *dict, optional*
Extra arguments to pass to the request, refer to the requests library documentation for a list of possible arguments.
Returns
-------
- `request.Response`:
Response received from the server.
"""
return _request("delete", url, **kwargs)
def _pretty_print(method: str, url: str, kwargs: Dict[str, Any]) -> str:
"""Returns a readable str of the given request."""
pargs = {}
for key in kwargs.keys():
if key == "headers":
headers = {}
for header in kwargs[key]:
if header not in ["referer", "User-Agent"]:
headers[header] = kwargs[key][header]
if len(headers) > 0:
pargs[key] = headers
elif key == "auth":
if hasattr(kwargs[key], "token"):
pargs[key] = f"Bearer {str(kwargs[key].token)}" # type: ignore
else:
pargs[key] = f"{type(kwargs[key]).__name__}" # type: ignore
else:
pargs[key] = kwargs[key]
return f"Request: {method.upper()} {url}, payload: {pargs}"
eumdac-3.0.0/eumdac/tailor_app.py 0000664 0000000 0000000 00000064717 14720105632 0016731 0 ustar 00root root 0000000 0000000 """module containing the TailorApp which will be used when using
eumdac download **with** the --tailor argument."""
from __future__ import annotations
import concurrent
import fnmatch
import re
import shutil
import tempfile
import threading
import time
import typing
from collections import namedtuple
from datetime import timedelta
from pathlib import Path
from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Tuple
from eumdac.config import PERCENTAGE_WARNING
from eumdac.customisation import (
Customisation,
CustomisationError,
UnableToGetCustomisationError,
)
from eumdac.datastore import DataStore
from eumdac.datatailor import DataTailor
from eumdac.errors import EumdacError
from eumdac.futures import EumdacFutureFunc, EumdacThreadPoolExecutor
from eumdac.job_id import JobIdentifier
from eumdac.logging import logger
from eumdac.order import Order
from eumdac.product import Product
from eumdac.tailor_models import Chain
class FailedStateTransitionError(Exception):
def __init__(self, msg: str, faillog: str) -> None:
self.faillog = faillog
super().__init__(msg)
def find_lines(self, search_string: str) -> Generator[str, None, None]:
if self.faillog:
for line in self.faillog.splitlines():
if search_string in line:
result_text = line.split(" - ")[-1]
yield result_text
class TailorApp:
def __init__(
self,
order: Order,
datastore: Any,
datatailor: Any,
) -> None:
self.server_state_monitor_executor = EumdacThreadPoolExecutor(max_workers=3)
self.download_executor = EumdacThreadPoolExecutor(max_workers=2)
self.datastore = datastore
self.datatailor = datatailor
self.order = order
with self.order.dict_from_file() as order_d:
self.output_dir = Path(order_d["output_dir"])
self.output_dir.mkdir(exist_ok=True, parents=True)
self.chain = Chain(**order_d["chain"])
def run(self) -> bool:
try:
return self.resume()
except FatalEumdacError as fee:
logger.error(f"Fatal error during execution: {fee}")
self.order.remote_delete_failed(self.datatailor)
return False
def shutdown(self) -> None:
self.server_state_monitor_executor.pool_shutdown()
self.download_executor.pool_shutdown()
with self.order._lock:
return
def concurrent_download(
self,
products: Iterable[Product],
output_dir: Path,
customization_add_func: Callable[[Product], Customisation],
timeout: float = 600,
) -> None:
customisation_futures: List[concurrent.futures.Future[Any]] = []
download_futures = []
failed_customisations = []
done_customistations = []
num_jobs = len(list(self.order.iter_product_info()))
job_identificator = JobIdentifier(num_jobs)
for product in products:
customisation_futures.append(
self.server_state_monitor_executor.pool_submit(
WaitForDoneCustomisationFutureFunc(
customization_add_func,
self.order,
job_identificator,
),
product,
)
)
while customisation_futures:
done_concurrent_futures: List[concurrent.futures.Future[Any]] = []
not_done_concurrent_futures: List[
concurrent.futures.Future[Any]
] = customisation_futures
try:
(
done_concurrent_futures,
not_done_concurrent_futures,
) = [
list(x)
for x in concurrent.futures.wait(
customisation_futures,
return_when=concurrent.futures.FIRST_COMPLETED,
timeout=None,
)
]
for future in done_concurrent_futures:
customisation_futures.remove(future)
except concurrent.futures.TimeoutError:
pass
# at this point done_concurrent_futures contain all finished customisations
# now check if they failed and submit a task to download the result if success
for done_future in done_concurrent_futures:
try:
completed_customistation, product = done_future.result()
done_customistations.append(completed_customistation)
except GracefulAbortError as graceful_abort_error:
logger.debug(f"External abort for {done_future}: {graceful_abort_error}")
continue
except FatalEumdacError as fatal_eumdac_error:
for future in customisation_futures:
future.cancel()
self.server_state_monitor_executor.pool_shutdown()
if fatal_eumdac_error.extra_info:
logger.error(
f"Fatal error: {fatal_eumdac_error} - {fatal_eumdac_error.extra_info['title']}: {fatal_eumdac_error.extra_info['description']}"
)
else:
logger.error(f"Fatal error: {fatal_eumdac_error}")
raise
except CustomisationTimeoutError as te:
logger.error(f"{completed_customistation} timed out: {te}")
failed_customisations.append(completed_customistation)
continue
except CustomisationError as ce:
logger.error(f"Failed: {ce}")
continue
except Exception as exc:
logger.error(f"{done_future} failed: {exc}")
failed_customisations.append(done_future)
continue
logger.debug(f"{completed_customistation} processed and ready to download.")
download_futures.append(
self.download_executor.pool_submit(
DownloadRunFutureFunc(
self.order,
output_dir,
job_identificator.job_id_str(product),
),
completed_customistation,
product,
)
)
_ = concurrent.futures.wait(download_futures, return_when=concurrent.futures.ALL_COMPLETED)
def resume(self) -> bool:
# handle existing customisations
customisations = []
products_to_resume = []
products_to_repeat = []
# query all customisation states
success_customisation_product_futures = []
try:
user_name = self.datatailor.user_info["username"]
quota_info = self.datatailor.quota["data"][user_name]
if quota_info["space_usage_percentage"] > PERCENTAGE_WARNING:
logger.warning(f"Reaching maximum quota: {quota_info['space_usage_percentage']}%")
elif quota_info["space_usage_percentage"] > 100:
logger.warning(f"Over maximum quota: {quota_info['space_usage_percentage']}%")
except EumdacError as e:
# The quota call is unsupported by local-tailor, so we don't report
if not self.datatailor.is_local:
logger.warning(f"Could not determine current quota: {e}")
for p_info in self.order.iter_product_info():
success_customisation_product_futures.append(
self.server_state_monitor_executor.pool_submit(
StateQueryFutureFunc(),
p_info.p_id,
p_info.p_dict,
self.datatailor,
self.datastore,
)
)
done_success_customisation_product_futures, _ = concurrent.futures.wait(
success_customisation_product_futures,
return_when=concurrent.futures.ALL_COMPLETED,
)
for done_success_customisation_product_future in done_success_customisation_product_futures:
(
success,
customisation,
product,
) = done_success_customisation_product_future.result()
if success:
customisations.append(customisation)
products_to_resume.append(product)
else:
products_to_repeat.append(product)
if len(products_to_resume) > 0:
self.concurrent_download(
products_to_resume,
self.output_dir,
customization_add_func=GetCustomisation(customisations),
)
if len(products_to_repeat) > 0:
self.concurrent_download(
products_to_repeat,
self.output_dir,
customization_add_func=lambda x: self.datatailor.new_customisation(x, self.chain),
)
return True
class DownloadRunFutureFunc(EumdacFutureFunc):
def __init__(self, order: Order, output_dir: Path, job_id: str):
super().__init__()
self.order = order
self.output_dir = output_dir
self.job_id = job_id
@typing.no_type_check
def __call__(
self,
customisation: Customisation,
product: Product,
) -> None:
results = customisation.outputs
# compare with file_patterns from order_file
(file_patterns,) = self.order.get_dict_entries("file_patterns")
if file_patterns:
filtered_results = []
for pattern in file_patterns:
matches = fnmatch.filter(results, pattern)
filtered_results.extend(matches)
results = filtered_results
logger.debug(f"{self.job_id}: Starting download(s) for {results}")
download_states = {result: "PENDING" for result in results}
self.order.update(
customisation._id,
product._id,
status=None,
download_states=download_states,
)
with self.order.dict_from_file() as order_d:
dirs = order_d["dirs"]
num_total, num = self.order.resolve_product_num(product._id)
for result in results:
if self.aborted:
break
download_states[result] = "DOWNLOAD_ERROR"
try:
logger.info(
f"{self.job_id}: Downloading output of job {customisation._id} for {product._id}"
)
self.download_customisation_result(customisation, result, dirs)
logger.info(f"{self.job_id}: {Path(result).parts[-1]} has been downloaded.")
download_states[result] = "DOWNLOADED"
except DownloadExistsError as err:
logger.info(f"{self.job_id}: Skipping download. File exists: {err.product_path}")
download_states[result] = "DOWNLOADED"
except DownloadAbortedError as err:
logger.warning(f"{self.job_id}: Download of {err.product_path} aborted")
except Exception as exc:
logger.error(f"{self.job_id}: Error while downloading: {exc}")
self.order.update(
customisation._id,
product._id,
status=None,
download_states=download_states,
)
# delete serverside customisation on success
if "DOWNLOAD_ERROR" not in download_states.values():
logger.info(
f"{self.job_id}: Deleting customization {customisation._id} for {product._id}"
)
customisation.delete()
else:
logger.warning(
f"{self.job_id}: {customisation} download failed. Keeping customisation."
)
fatal_error_logs = {
"ERROR": ["invalid", "INTERNAL ERROR"],
}
def download_customisation_result(
self, customisation: Customisation, result: str, dirs: bool
) -> None:
product_path = self.output_dir / Path(result).parts[-1]
if dirs:
# when the dirs flag is used
# a subdirectory is created
# to avoid overwriting common files
output_subdir = self.output_dir / f"{Path(result).parts[0]}"
output_subdir.mkdir(exist_ok=True)
product_path = output_subdir / Path(result).parts[-1]
if product_path.is_file():
raise DownloadExistsError(product_path)
with tempfile.TemporaryDirectory(dir=self.output_dir, suffix=".tmp") as tempdir:
tmp_prod_p = Path(tempdir) / product_path.parts[-1]
with tmp_prod_p.open("wb") as tmp_prod:
with customisation.stream_output_iter_content(result) as chunks:
for chunk in chunks:
if self.aborted:
raise DownloadAbortedError(product_path)
tmp_prod.write(chunk)
shutil.move(str(tmp_prod_p), str(product_path))
def check_invalid_state_transition_changelog(error: FailedStateTransitionError) -> None:
fatal_error_logs = {
"ERROR": ["INTERNAL ERROR", "incompatible"],
}
# find messages containing errors
for severity, filters in fatal_error_logs.items():
for line in error.find_lines(severity):
if any(f in line for f in filters):
desc = {"status": 200, "title": severity, "description": line}
raise FatalEumdacError(EumdacError(line, desc))
class StateQueryFutureFunc(EumdacFutureFunc):
@typing.no_type_check
def __call__(
self,
p_id: str,
p_info: Dict[str, Any],
datatailor: DataTailor,
datastore: DataStore,
) -> Tuple[bool, Optional[Customisation], Optional[Product]]:
if self.aborted:
return False, None, None
product = datastore.get_product(p_info["col_id"], p_id)
customisation_id = p_info["customisation"]
if customisation_id is None:
return False, None, product
customisation = datatailor.get_customisation(customisation_id)
success = False
try:
_ = customisation.status
success = True
except (CustomisationError, CustomisationTimeoutError) as ce:
download_states = {}
if "download_states" in p_info:
download_states = p_info["download_states"]
if "DOWNLOADED" in download_states.values():
logger.warning(
f"Customisation {customisation_id} has already been finished and downloaded."
)
else:
logger.warning(f"Could not restore customisation for {customisation_id}: {ce}")
try:
if customisation:
customisation.delete()
except:
pass
return success, customisation, product
class GetCustomisation:
def __init__(self, customisations: List[Customisation]):
self.cnt = 0
self.customisations = customisations
self.lock = threading.Lock()
def __call__(self, _product: Product) -> Customisation:
with self.lock:
result = self.customisations[self.cnt]
if self.cnt < len(self.customisations):
self.cnt += 1
return result
State = namedtuple("State", "name log")
class WaitForDoneCustomisationFutureFunc(EumdacFutureFunc):
def __init__(
self,
customization_add_func: Callable[[Product], Customisation],
order: Order,
job_identificator: JobIdentifier,
timeout: float = 1800,
polling_interval: float = 1.0,
max_retries: int = 10,
max_timeouts: int = 3,
wait_timedelta: timedelta = timedelta(seconds=5.0),
) -> None:
super().__init__()
self.customization_add_func = customization_add_func
self.order = order
self.job_identificator = job_identificator
self.timeout = timeout
self.polling_interval = polling_interval
self.max_retries = max_retries
self.max_timeouts = max_timeouts
self.wait_timedelta = wait_timedelta
self.terminated = False
self.failed = False
self.state = State("UNSUBMITTED", "")
self.state_transitions: Dict[str, List[str]] = {
"UNSUBMITTED": ["QUEUED", "RUNNING", "DONE", "FAILED", "INACTIVE"],
"QUEUED": ["QUEUED", "RUNNING", "DONE", "FAILED", "INACTIVE"],
"RUNNING": ["RUNNING", "DONE", "FAILED", "INACTIVE"],
"DONE": [],
"FAILED": [],
"INACTIVE": [],
}
self.timed_out = False
self.job_identificator = job_identificator
self.job_id = "(? / ?)"
@typing.no_type_check
def __call__(
self,
product: Product,
) -> Optional[Customisation]:
if self.aborted:
return
# Set the job id only when the Future is actually called by the Scheduler to ensure
# (somewhat) correct ordering.
self.job_identificator.register(product)
self.job_id = self.job_identificator.job_id_str(product)
logger.info(f"Triggering {self.job_id.lower()} of {self.job_identificator.total_jobs}")
logger.debug(
f"{self.job_id} tracked as {self.job_identificator.registered_objects[product]}"
)
customisation = self.try_to_add_customisation(product)
return self.wait_for_success(customisation, product)
def try_to_add_customisation(self, product: Product) -> Optional[Customisation]:
retries = 0
while True:
if self.aborted:
raise GracefulAbortError("Abort requested.")
if retries >= self.max_retries:
raise CustomisationError(
f"{self.job_id}: Could not add customisation after {retries} retries"
)
try:
return self.customization_add_func(product)
except EumdacError as e:
retries += 1
try:
check_error_response(e)
except ExceedingNumberOfCustomisationsEumdacError as _exceeding_number_error:
pass
logger.warning(
f"{self.job_id}: {e}: Could not create customisation. Retry: {retries}"
)
time.sleep(self.wait_timedelta.total_seconds())
def wait_for_success(
self, customisation: Customisation, product: Product
) -> Tuple[Customisation, Product]:
retries = 0
timeouts = 0
while retries < self.max_retries:
if self.aborted:
break
self.state_timer = self.windup_timer()
try:
while not self.aborted and not self.terminated:
self.step(customisation, product._id)
if self.timed_out:
logger.error(f"{self.job_id}: {self} timed_out")
retries += 1
timeouts += 1
if timeouts >= self.max_timeouts:
raise FatalEumdacError(
EumdacError(f"{self.job_id}: {customisation._id} timed out")
)
break
except FailedStateTransitionError as fste:
logger.debug(f"{fste}: {fste.faillog}")
check_invalid_state_transition_changelog(fste)
if customisation:
if self.state.name == "INACTIVE":
customisation.kill()
customisation.delete()
retries += 1
logger.debug(
f"{self.job_id}: {self} failed with {fste} on try {retries}/{self.max_retries}"
)
if retries < self.max_retries:
# if we retry this reinitialize state
self.state = State("UNSUBMITTED", "")
while retries < self.max_retries:
ret_customisation = self.try_to_add_customisation(product)
if ret_customisation:
customisation = ret_customisation
break
retries += 1
finally:
self.state_timer.cancel()
if retries >= self.max_retries:
raise FatalEumdacError(
EumdacError(f"{self.job_id}: {customisation._id} is inactive")
)
return customisation, product
def step(self, customisation: Customisation, product_id: str) -> None:
self.transition(customisation, product_id)
self.terminated = self.state.name == "DONE"
self.failed = self.state.name in ["FAILED", "INACTIVE"]
if self.terminated:
return
if self.failed:
logs = self.get_latest_log(customisation)
if not self.order._no_warning_logs:
log_message = logs.split("\n\n")
logger.warning(f"{self.job_id}: {log_message[-1]}")
raise FailedStateTransitionError(
f"{self.job_id}: Server state is: {self.state.name}",
faillog=logs,
)
if self.state.name not in self.state_transitions.keys():
raise InvalidStateTransitionError(
"{self.job_id}: Unexpected State: {self.state.name})",
old_state=self.state,
new_state=State(self.state.name, self.get_latest_log(customisation)),
)
def transition(
self,
customisation: Customisation,
product_id: str,
) -> None:
new_state = customisation.status
if self.state.name != new_state:
logger.debug(
f"{self.job_id}: {customisation}: State change: {self.state.name} -> {new_state}"
)
num_total, num = self.order.resolve_product_num(product_id)
suffix = {
"QUEUED": "is now queued",
"RUNNING": "is now running",
"DONE": "has finished",
"FAILED": "has failed",
"INACTIVE": "is inactive",
}
logger.info(
f"{self.job_id}: Customisation {customisation._id} for {product_id} {suffix[new_state]}"
)
if new_state not in self.state_transitions[self.state.name]:
self.state_timer.cancel()
raise InvalidStateTransitionError(
f"{self.job_id}: Tried to transition from state {self.state} to {new_state}, "
"which is not expected",
old_state=self.state,
new_state=State(new_state, self.get_latest_log(customisation)),
)
elif self.state.name != new_state:
self.state_timer.cancel()
self.state_timer = self.windup_timer()
self.state = State(new_state, self.get_latest_log(customisation))
if self.order:
self.order.update(
customisation._id,
product_id,
self.state.name,
)
def windup_timer(self) -> threading.Timer:
logger.debug(f"{self.job_id}: {repr(self)} windup_timer")
timer = threading.Timer(self.timeout, self.on_timeout)
timer.start()
return timer
def on_timeout(self) -> None:
logger.debug(f"{self.job_id}: {repr(self)} on_timeout")
self.timed_out = True
def get_latest_log(self, customisation: Customisation) -> str:
try:
return customisation.logfile
except UnableToGetCustomisationError:
return "Unable to get logs"
class InvalidStateTransitionError(EumdacError):
def __init__(self, msg: str, old_state: State, new_state: State) -> None:
self.old_state = old_state
self.new_state = new_state
super().__init__(msg)
class CustomisationTimeoutError(EumdacError):
"""Error raised during downloads"""
class DownloadError(EumdacError):
"""Error raised during downloads"""
def __init__(self, product_path: Path, *args: Any) -> None:
super().__init__(msg=str(product_path))
self.product_path = product_path
class DownloadAbortedError(DownloadError):
"""Error raised when a download is aborted"""
class DownloadExistsError(DownloadError):
"""Error raised when a download file already exists"""
class GracefulAbortError(EumdacError):
"""Error related to abort conditions when creating customisations"""
class FatalEumdacError(EumdacError):
"""Unrecoverable Error"""
def __init__(self, eumdac_error: EumdacError):
super().__init__(eumdac_error.args[0], eumdac_error.extra_info)
class ExceedingNumberOfCustomisationsEumdacError(EumdacError):
"""Error for server responding that maximum number of customisations is reached"""
def __init__(self, eumdac_error: EumdacError, number_of_customisations: int):
super().__init__(eumdac_error.args[0], eumdac_error.extra_info)
self.number_of_customisations = number_of_customisations
def check_error_response(error: Exception) -> None:
"""helper function to check error responses from the server"""
fatal_error_responses = {
400: [""],
}
number_of_customisation_responses = {
500: ["You are exceeding your maximum number"],
}
if isinstance(error, EumdacError):
if (
error.extra_info
and error.extra_info["status"] in fatal_error_responses
and "description" in error.extra_info
):
if any(
fatal_response in error.extra_info["description"]
for fatal_response in fatal_error_responses[error.extra_info["status"]]
):
raise FatalEumdacError(error)
if (
error.extra_info
and error.extra_info["status"] in number_of_customisation_responses
and "description" in error.extra_info
):
if any(
number_response in error.extra_info["description"]
for number_response in number_of_customisation_responses[error.extra_info["status"]]
):
response_match = re.search(r"([\d]+)", error.extra_info["description"])
if response_match:
number = int(response_match[0])
else:
number = -1
raise ExceedingNumberOfCustomisationsEumdacError(error, number)
else:
return
eumdac-3.0.0/eumdac/tailor_models.py 0000664 0000000 0000000 00000015621 14720105632 0017422 0 ustar 00root root 0000000 0000000 """This module contains classes modeling Data Tailor resources."""
from __future__ import annotations
from collections.abc import Mapping
from dataclasses import asdict, dataclass
import requests
from typing import TYPE_CHECKING
from eumdac.request import _request
import eumdac.common
if TYPE_CHECKING: # pragma: no cover
import sys
from typing import Any, Optional, Type, Union
if sys.version_info < (3, 9):
from typing import MutableMapping, Sequence
else:
from collections.abc import MutableMapping, Sequence
from eumdac.datatailor import DataTailor
from eumdac.errors import EumdacError, eumdac_raise_for_status
def _none_filter(*args: Any, **kwargs: Any) -> MutableMapping[str, Any]:
"""Build a mapping of '*args' and '**kwargs' removing None values."""
return {k: v for k, v in dict(*args, **kwargs).items() if v is not None}
class AsDictMixin:
"""Base class adding an 'asdict' method that removes None values."""
def asdict(self) -> MutableMapping[str, Any]:
"""Return the fields of the instance as a new dictionary mapping field names to field values, removing None values."""
return asdict(self, dict_factory=_none_filter) # type: ignore
@dataclass
class Filter(AsDictMixin):
"""Layer filter, a list of `bands` or layers for a given `product`.
Attributes
----------
- `id`: *str*
- `name`: *str*
Human readable name.
- `product`: *str*
Product that the filter applies to.
- `bands`: *list[dict]*
List of bands part of the filter, as dicts of {id, number, name}.
"""
__endpoint = "filters"
id: Optional[str] = None
bands: Optional[list] = None # type: ignore[type-arg]
name: Optional[str] = None
product: Optional[str] = None
@dataclass
class RegionOfInterest(AsDictMixin):
"""Region of interest, a geographical area defined by its `NSWE` coordinates.
Attributes
----------
- `id`: *str*
- `name`: *str*
Human readable name.
- `description`: *str*
Human readable description.
- `NSWE`:
North, south, west, east coordinates, in decimal degrees.
"""
__endpoint = "rois"
id: Optional[str] = None
name: Optional[str] = None
NSWE: Optional[str] = None
description: Optional[str] = None
@dataclass
class Quicklook(AsDictMixin):
"""Configuration for generating quicklooks."""
__endpoint = "quicklooks"
id: Optional[str] = None
name: Optional[str] = None
resample_method: Optional[str] = None
stretch_method: Optional[str] = None
product: Optional[str] = None
format: Optional[str] = None
nodatacolor: Optional[str] = None
filter: Union[None, dict, Filter] = None # type: ignore[type-arg]
x_size: Optional[int] = None
y_size: Optional[int] = None
def __post_init__(self) -> None:
"""Prepare `filter` as a Filter instance if given as dict."""
if self.filter is not None and isinstance(self.filter, dict):
self.filter = Filter(**self.filter)
@dataclass
class Chain(AsDictMixin):
"""Chain configuration for Data Tailor customisation jobs."""
__endpoint = "chains"
__submodels = {"filter": Filter, "roi": RegionOfInterest, "quicklook": Quicklook}
id: Optional[str] = None
product: Optional[str] = None
format: Optional[str] = None
name: Optional[str] = None
description: Optional[str] = None
aggregation: Optional[str] = None
projection: Optional[str] = None
roi: Union[None, dict, RegionOfInterest] = None # type: ignore[type-arg]
filter: Union[None, dict, Filter] = None # type: ignore[type-arg]
quicklook: Union[None, dict, Quicklook] = None # type: ignore[type-arg]
resample_method: Optional[str] = None
resample_resolution: Optional[list] = None # type: ignore[type-arg]
compression: Optional[dict] = None # type: ignore[type-arg]
xrit_segments: Optional[list] = None # type: ignore[type-arg]
def __post_init__(self) -> None:
"""Prepare attributes as an instance of their class if given as dict."""
for name, Model in self.__submodels.items():
attr = getattr(self, name)
if attr is not None and isinstance(attr, Mapping):
setattr(self, name, Model(**attr))
if TYPE_CHECKING: # pragma: no cover
CrudModelClass = Union[Type[Filter], Type[RegionOfInterest], Type[Quicklook], Type[Chain]]
CrudModel = Union[Filter, RegionOfInterest, Quicklook, Chain]
class DataTailorCRUD:
"""Generic CRUD for Data Tailor models (Chain, ROI, Filter, Quicklook)."""
datatailor: DataTailor
Model: CrudModelClass
endpoint: str
url: str
def __init__(self, datatailor: DataTailor, Model: CrudModelClass) -> None:
"""Init the CRUD for `datatailor` and `Model`."""
self.datatailor = datatailor
self.Model = Model
endpoint = getattr(Model, f"_{Model.__name__}__endpoint")
self.url = datatailor.urls.get("tailor", endpoint)
def search(
self, product: Optional[str] = None, format: Optional[str] = None
) -> Sequence[CrudModel]:
"""Search resources by 'format' and 'product'."""
params = _none_filter(product=product, format=format)
auth = self.datatailor.token.auth
response = self._request("get", self.url, auth=auth, params=params)
return [self.Model(**data) for data in response.json()["data"]]
def create(self, model: CrudModel) -> None:
"""Create a new resource from 'model' on Data Tailor."""
auth = self.datatailor.token.auth
payload = model.asdict()
self._request("post", self.url, auth=auth, json=payload)
def read(self, model_id: str) -> CrudModel:
"""Retrieve the resource data with id 'model_id' from Data Tailor."""
url = f"{self.url}/{model_id}"
auth = self.datatailor.token.auth
response = self._request("get", url, auth=auth)
return self.Model(**response.json())
def update(self, model: CrudModel) -> None:
"""Update the resource based on 'model' in Data Tailor."""
data = model.asdict()
url = f"{self.url}/{data['id']}"
auth = self.datatailor.token.auth
self._request("put", url, auth=auth, json=data)
def delete(self, model: Union[str, CrudModel]) -> None:
"""Remove the resource 'model' from Data Tailor."""
if isinstance(model, str):
model_id = model
else:
model_id = model.id # type: ignore[assignment]
url = f"{self.url}/{model_id}"
auth = self.datatailor.token.auth
self._request("delete", url, auth=auth)
def _request(self, method: str, url: str, **options: Any) -> requests.Response:
"""Perform a 'method' request to 'url' with 'options'."""
response = _request(method, url, headers=eumdac.common.headers, **options)
eumdac_raise_for_status(f"Request for {self.Model} failed.", response, EumdacError)
return response
eumdac-3.0.0/eumdac/token.py 0000664 0000000 0000000 00000020064 14720105632 0015702 0 ustar 00root root 0000000 0000000 """Module containing classes to handle the token authentication."""
from __future__ import annotations
import abc
import sys
import time
from configparser import ConfigParser
from datetime import datetime
from typing import TYPE_CHECKING, NamedTuple
from urllib.parse import quote as url_quote
import requests
from importlib import resources as importlib_resources
from requests.auth import AuthBase, HTTPBasicAuth
from eumdac.request import post
import eumdac.common
from eumdac.errors import EumdacError
from eumdac.logging import logger
if TYPE_CHECKING: # pragma: no cover
from typing import Optional
if sys.version_info < (3, 9):
from typing import Iterable, Mapping
else:
from collections.abc import Iterable, Mapping
class URLs(ConfigParser):
"""Dictionary-like ConfigParser based storage of EUMDAC related URLs"""
def __init__(self, inifile: Optional[str] = None) -> None:
super().__init__()
if inifile:
self.read(inifile)
else:
if sys.version_info >= (3, 9):
with importlib_resources.as_file(
importlib_resources.files("eumdac") / "endpoints.ini"
) as path:
self.read(path)
else: # python < 3.9
with importlib_resources.path("eumdac", "endpoints.ini") as path:
self.read(path)
def get( # type: ignore[override]
self,
section: str,
option: str,
raw: bool = False,
vars: Optional[Mapping[str, str]] = None,
fallback: str = "",
) -> str:
"""Get an option value for the given section"""
if vars is not None:
vars = {k: url_quote(str(v).encode()).replace("%", "%%") for k, v in vars.items()}
return super().get(section, option, raw=raw, vars=vars, fallback=fallback)
class Credentials(NamedTuple):
"""Pair of Consumer Key and Secret authentication parameters.
Attributes
----------
- `consumer_key` : *str*
- `consumer_secret` : *str*
"""
consumer_key: str
consumer_secret: str
class HTTPBearerAuth(AuthBase):
"""Attaches HTTP Bearer Authentication to the given Request object.
Attributes
----------
- `token`: *str*
Bearer token
"""
def __init__(self, token: str) -> None:
"""
Parameters
----------
- `token` : *str*
Token to use for authentication
"""
self.token = token
def __call__(self, request: requests.PreparedRequest) -> requests.PreparedRequest:
"""Returns the given 'request' with the Bearer authentication parameter attached to the headers."""
request.headers["authorization"] = f"Bearer {self.token}"
return request
class BaseToken(metaclass=abc.ABCMeta):
"""Base class from which all eumdac authentication implementations derive"""
urls: URLs
@property
def auth(self) -> Optional[AuthBase]:
"""To be overloaded in subclasses, shall return a configured AuthBase instance."""
# overload in subclasses
pass
class AccessToken(BaseToken):
"""EUMETSAT API access token
Handles requesting of API tokens and their renewal after expiration.
The str representation of AccessToken instances will be the current token value.
Attributes
----------
- `request_margin` : *int*
seconds before expiration to start requesting a new token
"""
request_margin: int = 2 # seconds
_expiration: int = 0
_access_token: str = ""
credentials: Credentials
validity_period: int # seconds
urls: URLs
cache: bool # does nothing, cache is always on in the server
def __init__(
self,
credentials: Iterable[str],
validity: int = 86400,
cache: bool = True, # does nothing
urls: Optional[URLs] = None,
) -> None:
"""Initializes the AccessToken, but does not request a token.
Parameters
----------
- `credentials`: *(consumer_key, consumer_secret)*
Authentication credentials in the form of a pair of key and secret.
- `cache`: *bool, default: False*
Set to false to always request new tokens, even if the current one has not expired.
- `validity`: *int*
Validity period for tokens, in seconds
- `urls`: *URLs, optional*
URLs instance to be used, will be initialized to the default if not provided.
"""
self.credentials = Credentials(*credentials)
self.validity_period = validity
self.urls = urls or URLs()
self.cache = cache
def __str__(self) -> str:
"""Return the current token in str form."""
return self.access_token
@property
def expiration(self) -> datetime:
"""Expiration of the current token string"""
# Generate a token only when uninitialized
if self._expiration == 0:
self._update_token_data()
return datetime.fromtimestamp(self._expiration)
@property
def access_token(self) -> str:
"""Token string"""
expires_in = self._expiration - time.time()
if expires_in > 0:
logger.debug(f"Current token {self._access_token} expires in {expires_in} seconds.")
else:
# If we don't have a token, just get one
logger.debug(f"Requesting new token")
self._update_token_data()
expires_in = self._expiration - time.time()
# Renew token when if there's less than request_margin time to expire
if expires_in < self.request_margin:
tries = 0
previous_token = self._access_token
logger.debug(
f"Token expires in {expires_in:.2f}, starting renewal of {self._access_token}."
)
# Loop until we are sure we got the new token
while (
tries < 20
and self._access_token == previous_token
or expires_in < self.request_margin
):
tries += 1
time.sleep(0.5)
logger.debug(f"Requesting new token...")
self._update_token_data()
logger.debug(f"Received/previous {self._access_token}/{previous_token})")
expires_in = self._expiration - time.time()
if tries >= 100:
raise EumdacError(
f"Could not get fresh token from server, got {self._access_token}, which expires {datetime.fromtimestamp(self._expiration)}"
)
return self._access_token
@property
def auth(self) -> AuthBase:
"""Authentication object using the current token"""
return HTTPBearerAuth(self.access_token)
def _update_token_data(self) -> None:
"""Request a new token and renew the expiration time"""
auth = HTTPBasicAuth(*self.credentials)
now = time.time()
response = post(
self.urls.get("token", "token"),
auth=auth,
data={"grant_type": "client_credentials", "validity_period": self.validity_period},
headers=eumdac.common.headers,
)
response.raise_for_status()
token_data = response.json()
self._expiration = now + token_data["expires_in"]
self._access_token = token_data["access_token"]
def _revoke(self) -> None:
"""Revoke the current token"""
auth = HTTPBasicAuth(*self.credentials)
response = post(
self.urls.get("token", "revoke"),
auth=auth,
data={"grant_type": "client_credentials", "token": self._access_token},
headers=eumdac.common.headers,
)
response.raise_for_status()
self._expiration = 0
self._access_token = ""
class AnonymousAccessToken(BaseToken):
"""Token class for anonymous access, provides no authentication parameters."""
def __init__(self, urls: Optional[URLs] = None):
"""Init the token."""
self.urls = urls or URLs()
@property
def auth(self) -> Optional[AuthBase]:
"""Return None"""
return None
eumdac-3.0.0/interrogate.cfg 0000664 0000000 0000000 00000000314 14720105632 0015752 0 ustar 00root root 0000000 0000000 [tool:interrogate]
ignore-semiprivate = true
ignore-private = true
exclude = setup.py,eumdac/cli.py,eumdac/cli_mtg_helpers.py,eumdac/download_app.py,eumdac/logging.py,eumdac/order.py,eumdac/tailor_app.py
eumdac-3.0.0/mypy.ini 0000664 0000000 0000000 00000000653 14720105632 0014453 0 ustar 00root root 0000000 0000000 [mypy]
disallow_untyped_calls = True
disallow_untyped_defs = True
disallow_incomplete_defs = True
check_untyped_defs = True
disallow_subclassing_any = True
warn_no_return = True
strict_optional = True
strict_equality = True
no_implicit_optional = True
disallow_any_generics = True
disallow_any_unimported = True
warn_redundant_casts = True
warn_unused_configs = True
show_traceback = True
show_error_codes = True
pretty = True
eumdac-3.0.0/setup.py 0000664 0000000 0000000 00000003220 14720105632 0014457 0 ustar 00root root 0000000 0000000 import os
from setuptools import setup
about = {}
with open(os.path.join("eumdac", "__version__.py")) as f:
exec(f.read(), about)
with open("README.md", mode="r") as file:
readme = file.read()
setup(
name=about["__title__"],
version=about["__version__"],
description=about["__description__"],
long_description=readme,
long_description_content_type="text/markdown",
author=about["__author__"],
author_email=about["__author_email__"],
url=about["__url__"],
project_urls={
"User guide": about["__documentation__"],
"API reference": about["__api_documentation__"],
},
license=about["__license__"],
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Operating System :: OS Independent",
],
packages=["eumdac"],
package_data={"eumdac": ["endpoints.ini", "py.typed"]},
python_requires=">=3.7",
install_requires=["requests>=2.5.0", "pyyaml", "urllib3"],
extras_require={
"test": [
"mypy",
"pytest",
"pytest-cov",
"responses",
"types-requests<2.32.0.20240905",
"types-setuptools",
]
},
entry_points={"console_scripts": ["eumdac=eumdac.cli:cli"]},
)
eumdac-3.0.0/tests/ 0000775 0000000 0000000 00000000000 14720105632 0014112 5 ustar 00root root 0000000 0000000 eumdac-3.0.0/tests/__init__.py 0000664 0000000 0000000 00000000000 14720105632 0016211 0 ustar 00root root 0000000 0000000 eumdac-3.0.0/tests/base.py 0000664 0000000 0000000 00000014314 14720105632 0015401 0 ustar 00root root 0000000 0000000 import unittest
import re
import pickle
import os
import io
import gzip
import zlib
import urllib
from base64 import b64encode
import responses
from responses import matchers
from contextlib import contextmanager
CONSUMER_KEY = os.getenv("CONSUMER_KEY")
CONSUMER_SECRET = os.getenv("CONSUMER_SECRET")
# different modes
# INTEGRATION_TESTING -> Targets real OPE endpoints + record all calls
INTEGRATION_TESTING = os.getenv("INTEGRATION_TESTING")
if INTEGRATION_TESTING and not (CONSUMER_KEY and CONSUMER_SECRET):
raise RuntimeError("Integration testing requires credentials!")
http_methods = ["GET", "HEAD", "POST", "PUT", "DELETE", "CONNECT", "OPTIONS", "TRACE", "PATCH"]
class ReplayResponse(responses.Response):
def __init__(self, request, response):
self._request = request
self._response = response
parsed_url = urllib.parse.urlparse(request.url)
super().__init__(
request.method,
request.url,
body=response.content,
status=response.status_code,
headers=response.headers,
match=[matchers.query_string_matcher(parsed_url.query)],
)
if "gzip" in response.headers.get("Content-Encoding", ""):
self.body = self._gzip_compress(self.body)
@staticmethod
def _gzip_compress(data):
if hasattr(data, "read"):
data = data.read()
if isinstance(data, str):
data = data.encode()
# window size flag, +25 to +31 include a basic gzip
# header and trailing checksum
wbits = 28
compressor = zlib.compressobj(wbits=wbits)
compressed_chunks = [compressor.compress(data), compressor.flush()]
return b"".join(compressed_chunks)
class ReplayResponsesTestCase(unittest.TestCase):
@classmethod
def setUpClass(cls):
if INTEGRATION_TESTING:
cls.prepare_integration_test()
cls.recordpath = "{0}/data/{1}.{2}.pickle.gz".format(
os.path.dirname(__file__), cls.__module__.split(".")[-1], cls.__name__
)
cls.requests_mock = responses.RequestsMock()
if INTEGRATION_TESTING:
cls._records = {}
else:
with gzip.open(cls.recordpath) as file:
cls._records = pickle.load(file)
def setUp(self):
if INTEGRATION_TESTING:
url = re.compile(".*")
for method in http_methods:
self.requests_mock.add(responses.PassthroughResponse(method, url))
self.requests_mock.assert_all_requests_are_fired = False
self.requests_mock.response_callback = self._record_streaming_response_body
else:
calls = self._records.get(self.record_key, responses.CallList())
for request, response in calls:
replay = ReplayResponse(request, response)
self.requests_mock.add(replay)
self.requests_mock.assert_all_requests_are_fired = True
self.addCleanup(self.requests_mock.reset)
self.requests_mock.start()
self.addCleanup(self.requests_mock.stop)
def tearDown(self):
if INTEGRATION_TESTING:
calls = list(self.requests_mock.calls)
if calls:
self._records[self.record_key] = calls
@classmethod
def tearDownClass(cls):
if INTEGRATION_TESTING:
with gzip.open(cls.recordpath, mode="wb") as file:
pickle.dump(cls._records, file)
@property
def record_key(self):
return self._testMethodName
@staticmethod
def _record_streaming_response_body(response):
if not response.raw.closed:
content = response.content
response.raw = io.BytesIO(content)
return response
@classmethod
def prepare_integration_test(cls):
"""This method can be used to check preconditions for integration tests"""
pass
class DataServiceTestCase(ReplayResponsesTestCase):
credentials = (
CONSUMER_KEY or "pewh0CiBQ5Gl8BX7K2i8vww9tsr0",
CONSUMER_SECRET or "cP58wJbenKRPaK9RKA8ODPxHmkw1",
)
@classmethod
def tearDownClass(cls):
if INTEGRATION_TESTING:
key, secret = map(str.encode, cls.credentials)
sensitive = b64encode(key + b":" + secret)
xs = len(key) * b"x"
crossed = b64encode(xs + b":" + xs)
pickled = pickle.dumps(cls._records)
sanitized = pickled.replace(sensitive, crossed)
with gzip.open(cls.recordpath, mode="wb") as file:
file.write(sanitized)
class FakeProduct:
def __init__(self, id=None, collection=None):
self._id = id or "product"
self.collection = collection or FakeCollection()
@property
def md5(self):
return None
class FakeCollection:
def __init__(self, id=None):
self._id = id or "collection"
class FakeCustomisation:
def __init__(self, states_to_return, cid=None):
self.states_to_return = iter(states_to_return)
self.logfile = f"Fake Log: {states_to_return}"
self._id = cid or "test"
self.deleted = False
@property
def status(self):
return next(self.states_to_return)
@property
def outputs(self):
return ["prod1.nc", "prod2.nc"]
def kill(self):
pass
def delete(self):
if not self.deleted:
self.deleted = True
else:
from eumdac.errors import CustomisationError
raise CustomisationError("double deletion")
@contextmanager
def stream_output_iter_content(self, output, chunks=0):
yield [bytes("test", "utf-8")]
class FakeTailor:
def __init__(self):
self.user_info = {
"username": "test",
}
self.quota = {
"data": {
"test": {
"space_usage_percentage": 95,
}
}
}
def get_customisation(self, id):
return FakeCustomisation(["QUEUED", "RUNNING", "DONE"], cid=id)
def new_customisation(self, product, chain):
return FakeCustomisation(["QUEUED", "RUNNING", "DONE"], cid=None)
class FakeStore:
def get_product(self, col_id, p_id):
return FakeProduct(id=p_id, collection=FakeCollection(id=col_id))
eumdac-3.0.0/tests/data/ 0000775 0000000 0000000 00000000000 14720105632 0015023 5 ustar 00root root 0000000 0000000 eumdac-3.0.0/tests/data/test_collection.TestCollection.pickle.gz 0000664 0000000 0000000 00000121110 14720105632 0024753 0 ustar 00root root 0000000 0000000 ڠgtest_collection.TestCollection.pickle ݓJv'6^fF#>Viwխa5@}rw.Vw_/)1]U T7>h~rO~#d&@,d˓'Os2w姃~=xpO?3}wdžm7>gBqh,L>gg.:W0`,z*\gk[( [sW0= 13? Xxlpm|+ck3=wm.= i.Síoz->1iG¼|nA̝_F>EsixO?Xߣ//l(T8Z~3
/ĉ['Hzs'Z[kF`0(omZޢ%1d``?Ag^Thߋ]zuZ:@ix6G쿧/ߚ#ժSnL9`o>@;[+*Gbc_^]Z VuEN \aWW=
_K\|tHRb? /am? )͝YK˸1~/ɗWɏƙaCۆ?~w
Lʜņ$p`~('h.CvoEtiqS]|q??~v{zO?>~Y[Pc7
t,~mc+ȴ?iBOb{v{`Ќ5)CPkm: J}Z?{ X$GD Z?F\5Ái7F'ϟ,7fݖf*aLP|&7zjglB1Bʾ0b{@OFu<.?}` Ly`8`PS<@j[뢗<ېm/|?T z߾?_m7o%tr賿|qd -㛭x)b|eFe`l$G(}-9@?喠3_@o埈FX%\ϸ]6f0``~s[IضXGyG%tv9QL?6EuE6c''Jv?Gfnol{sZ42ͬ~}pn,`x> Q
߀n'X8߮^jb5/oWgZůVZ]f70l> bl>}pW;9ӔxԵ[_WVAw?˓O}:vxv9L9jg'T|sKtvz2K*Om`Qڋ[OyX'*vluvV?^=$QTRJ\oa.[+Th0/V"Wۿ]ſwC=I z7+0)wk:CL5K[GA'J,ki'
y:JC/c邕'27)[kC)8toq8e:jV`SnB!Z
w.a"P?ze܀5!7
Hs3004P]ߐ#bV@}G cc)*]6p@sǣ8( ~0f