pax_global_header 0000666 0000000 0000000 00000000064 14715642725 0014527 g ustar 00root root 0000000 0000000 52 comment=e2ef582a5d5e47bfa34d464e45efdb202775cb9d
xarray-safe-rcm-2024.11.0/ 0000775 0000000 0000000 00000000000 14715642725 0015037 5 ustar 00root root 0000000 0000000 xarray-safe-rcm-2024.11.0/.flake8 0000664 0000000 0000000 00000000535 14715642725 0016215 0 ustar 00root root 0000000 0000000 [flake8]
ignore =
# E203: whitespace before ':' - doesn't work well with black
# E402: module level import not at top of file
# E501: line too long - let black worry about that
# E731: do not assign a lambda expression, use a def
# W503: line break before binary operator
E203,E402,E501,E731,W503
exclude=
.eggs
docs
xarray-safe-rcm-2024.11.0/.github/ 0000775 0000000 0000000 00000000000 14715642725 0016377 5 ustar 00root root 0000000 0000000 xarray-safe-rcm-2024.11.0/.github/dependabot.yml 0000664 0000000 0000000 00000000166 14715642725 0021232 0 ustar 00root root 0000000 0000000 version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
xarray-safe-rcm-2024.11.0/.github/release.yml 0000664 0000000 0000000 00000000114 14715642725 0020536 0 ustar 00root root 0000000 0000000 changelog:
exclude:
authors:
- dependabot
- pre-commit-ci
xarray-safe-rcm-2024.11.0/.github/workflows/ 0000775 0000000 0000000 00000000000 14715642725 0020434 5 ustar 00root root 0000000 0000000 xarray-safe-rcm-2024.11.0/.github/workflows/ci.yaml 0000664 0000000 0000000 00000004350 14715642725 0021715 0 ustar 00root root 0000000 0000000 name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
detect-skip-ci-trigger:
name: "Detect CI Trigger: [skip-ci]"
if: |
github.repository == 'umr-lops/xarray-safe-rcm'
&& github.event_name == 'push'
|| github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- uses: xarray-contrib/ci-trigger@v1
id: detect-trigger
with:
keyword: "[skip-ci]"
ci:
name: ${{ matrix.os }} py${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
needs: detect-skip-ci-trigger
if: needs.detect-skip-ci-trigger.outputs.triggered == 'false'
defaults:
run:
shell: bash -l {0}
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
steps:
- name: Checkout the repository
uses: actions/checkout@v4
with:
# need to fetch all tags to get a correct version
fetch-depth: 0 # fetch all branches and tags
- name: Setup environment variables
run: |
echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
echo "CONDA_ENV_FILE=ci/requirements/environment.yaml" >> $GITHUB_ENV
- name: Setup micromamba
uses: mamba-org/setup-micromamba@v2
with:
environment-file: ${{ env.CONDA_ENV_FILE }}
environment-name: xarray-safe-rcm-tests
cache-environment: true
cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{matrix.python-version}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
create-args: >-
python=${{matrix.python-version}}
conda
- name: Install xarray-safe-rcm
run: |
python -m pip install --no-deps -e .
- name: Import xarray-safe-rcm
run: |
python -c "import safe_rcm"
- name: Run tests
run: |
python -m pytest --cov=safe_rcm
xarray-safe-rcm-2024.11.0/.github/workflows/pypi.yaml 0000664 0000000 0000000 00000002426 14715642725 0022305 0 ustar 00root root 0000000 0000000 name: Upload Package to PyPI
on:
release:
types: [created]
jobs:
build:
name: Build packages
runs-on: ubuntu-latest
if: github.repository == 'umr-lops/xarray-safe-rcm'
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.x"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install build twine
- name: Build
run: |
python -m build --sdist --wheel --outdir dist/ .
- name: Check the built archives
run: |
twine check dist/*
- name: Upload build artifacts
uses: actions/upload-artifact@v4
with:
name: packages
path: dist/*
pypi-publish:
name: Upload to PyPI
runs-on: ubuntu-latest
needs: build
environment:
name: pypi
url: https://pypi.org/p/xarray-safe-rcm
permissions:
id-token: write
steps:
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: packages
path: dist/
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@15c56dba361d8335944d31a2ecd17d700fc7bcbc
xarray-safe-rcm-2024.11.0/.github/workflows/upstream-dev.yaml 0000664 0000000 0000000 00000005033 14715642725 0023735 0 ustar 00root root 0000000 0000000 name: upstream-dev CI
on:
push:
branches: [main]
pull_request:
branches: [main]
schedule:
- cron: "0 18 * * 0" # Weekly "On Sundays at 18:00" UTC
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
detect-test-upstream-trigger:
name: "Detect CI Trigger: [test-upstream]"
if: github.event_name == 'push' || github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- uses: xarray-contrib/ci-trigger@v1.2
id: detect-trigger
with:
keyword: "[test-upstream]"
upstream-dev:
name: upstream-dev
runs-on: ubuntu-latest
needs: detect-test-upstream-trigger
if: |
always()
&& github.repository == 'umr-lops/xarray-safe-rcm'
&& (
github.event_name == 'schedule'
|| github.event_name == 'workflow_dispatch'
|| needs.detect-test-upstream-trigger.outputs.triggered == 'true'
|| contains(github.event.pull_request.labels.*.name, 'run-upstream')
)
defaults:
run:
shell: bash -l {0}
strategy:
fail-fast: false
matrix:
python-version: ["3.12"]
steps:
- name: checkout the repository
uses: actions/checkout@v4
with:
# need to fetch all tags to get a correct version
fetch-depth: 0 # fetch all branches and tags
- name: set up conda environment
uses: mamba-org/setup-micromamba@v2
with:
environment-file: ci/requirements/environment.yaml
environment-name: tests
create-args: >-
python=${{ matrix.python-version }}
pytest-reportlog
conda
- name: install upstream-dev dependencies
run: bash ci/install-upstream-dev.sh
- name: install the package
run: python -m pip install --no-deps -e .
- name: show versions
run: python -m pip list
- name: import
run: |
python -c 'import safe_rcm'
- name: run tests
if: success()
id: status
run: |
python -m pytest -rf --report-log=pytest-log.jsonl
- name: report failures
if: |
failure()
&& steps.tests.outcome == 'failure'
&& github.event_name == 'schedule'
uses: xarray-contrib/issue-from-pytest-log@v1
with:
log-path: pytest-log.jsonl
xarray-safe-rcm-2024.11.0/.gitignore 0000664 0000000 0000000 00000000342 14715642725 0017026 0 ustar 00root root 0000000 0000000 # editor files
*~
\#*\#
# python bytecode
*.py[co]
__pycache__/
# install artifacts
/build
/dist
/*.egg-info
# tools
.ipynb_checkpoints/
.hypothesis/
.pytest_cache
.coverage
.coverage.*
.cache
/docs/_build/
.prettier_cache
xarray-safe-rcm-2024.11.0/.pre-commit-config.yaml 0000664 0000000 0000000 00000002442 14715642725 0021322 0 ustar 00root root 0000000 0000000 ci:
autoupdate_schedule: weekly
# https://pre-commit.com/
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-docstring-first
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.3
hooks:
- id: ruff
args: [--fix]
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.10.0
hooks:
- id: black-jupyter
- repo: https://github.com/keewis/blackdoc
rev: v0.3.9
hooks:
- id: blackdoc
additional_dependencies: ["black==24.10.0"]
- id: blackdoc-autoupdate-black
- repo: https://github.com/kynan/nbstripout
rev: 0.8.0
hooks:
- id: nbstripout
args: [--extra-keys=metadata.kernelspec metadata.language_info.version]
- repo: https://github.com/rbubley/mirrors-prettier
rev: v3.3.3
hooks:
- id: prettier
args: [--cache-location=.prettier_cache]
- repo: https://github.com/ComPWA/taplo-pre-commit
rev: v0.9.3
hooks:
- id: taplo-format
args: [--option, array_auto_collapse=false]
- id: taplo-lint
args: [--no-schema]
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.23
hooks:
- id: validate-pyproject
xarray-safe-rcm-2024.11.0/.readthedocs.yml 0000664 0000000 0000000 00000000642 14715642725 0020127 0 ustar 00root root 0000000 0000000 version: 2
build:
os: ubuntu-22.04
tools:
python: mambaforge-4.10
jobs:
post_checkout:
- (git --no-pager log --pretty="tformat:%s" -1 | grep -vqF "[skip-rtd]") || exit 183
pre_install:
- git update-index --assume-unchanged docs/conf.py ci/requirements/docs.yaml
conda:
environment: ci/requirements/docs.yaml
sphinx:
fail_on_warning: true
configuration: docs/conf.py
formats: []
xarray-safe-rcm-2024.11.0/LICENSE 0000664 0000000 0000000 00000002074 14715642725 0016047 0 ustar 00root root 0000000 0000000 MIT License
Copyright (c) 2023, xarray-safe-rcm developers
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
xarray-safe-rcm-2024.11.0/README.md 0000664 0000000 0000000 00000000232 14715642725 0016313 0 ustar 00root root 0000000 0000000 # xarray-safe-rcm
Read RCM SAFE files into `xarray.DataTree` objects.
## Usage
```python
import safe_rcm
tree = safe_rcm.open_rcm(url, chunks={})
```
xarray-safe-rcm-2024.11.0/ci/ 0000775 0000000 0000000 00000000000 14715642725 0015432 5 ustar 00root root 0000000 0000000 xarray-safe-rcm-2024.11.0/ci/install-upstream-dev.sh 0000664 0000000 0000000 00000001327 14715642725 0022051 0 ustar 00root root 0000000 0000000 #!/usr/bin/env bash
if command -v micromamba >/dev/null; then
conda=micromamba
elif command -v mamba >/dev/null; then
conda=mamba
else
conda=conda
fi
conda remove -y --force cytoolz numpy xarray toolz fsspec python-dateutil pandas lxml xmlschema rioxarray
python -m pip install \
-i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
--no-deps \
--pre \
--upgrade \
numpy \
pandas \
xarray
python -m pip install --upgrade \
git+https://github.com/pytoolz/toolz \
git+https://github.com/lxml/lxml \
git+https://github.com/sissaschool/xmlschema \
git+https://github.com/fsspec/filesystem_spec \
git+https://github.com/dateutil/dateutil \
git+https://github.com/corteva/rioxarray
xarray-safe-rcm-2024.11.0/ci/requirements/ 0000775 0000000 0000000 00000000000 14715642725 0020155 5 ustar 00root root 0000000 0000000 xarray-safe-rcm-2024.11.0/ci/requirements/docs.yaml 0000664 0000000 0000000 00000000223 14715642725 0021766 0 ustar 00root root 0000000 0000000 name: xarray-safe-rcm-docs
channels:
- conda-forge
dependencies:
- python=3.10
- sphinx>=4
- sphinx-book-theme
- ipython
- myst-parser
xarray-safe-rcm-2024.11.0/ci/requirements/environment.yaml 0000664 0000000 0000000 00000000700 14715642725 0023402 0 ustar 00root root 0000000 0000000 name: xarray-safe-rcm-tests
channels:
- conda-forge
dependencies:
- python
# development
- ipython
- pre-commit
- jupyterlab
- jupyterlab_code_formatter
- isort
- black
- dask-labextension
# testing
- pytest
- pytest-reportlog
- pytest-cov
- hypothesis
- coverage
# I/O
- rioxarray
- h5netcdf
- zarr
- scipy
# data
- xarray
- dask
- numpy
- pandas
# processing
- toolz
- lxml
- xmlschema
xarray-safe-rcm-2024.11.0/docs/ 0000775 0000000 0000000 00000000000 14715642725 0015767 5 ustar 00root root 0000000 0000000 xarray-safe-rcm-2024.11.0/docs/conf.py 0000664 0000000 0000000 00000003753 14715642725 0017276 0 ustar 00root root 0000000 0000000 # -- Project information -----------------------------------------------------
import datetime as dt
project = "xarray-safe-rcm"
author = f"{project} developers"
initial_year = "2023"
year = dt.datetime.now().year
copyright = f"{initial_year}-{year}, {author}"
# The root toctree document.
root_doc = "index"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"myst_parser",
"sphinx.ext.extlinks",
"sphinx.ext.intersphinx",
"IPython.sphinxext.ipython_directive",
"IPython.sphinxext.ipython_console_highlighting",
]
extlinks = {
"issue": ("https://github.com/umr-lops/xarray-safe-rcm/issues/%s", "GH%s"),
"pull": ("https://github.com/umr-lops/xarray-safe-rcm/pull/%s", "PR%s"),
}
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_build", "directory"]
# nitpicky mode: complain if references could not be found
nitpicky = True
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "sphinx_book_theme"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ["_static"]
# -- Options for the intersphinx extension -----------------------------------
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"sphinx": ("https://www.sphinx-doc.org/en/stable/", None),
}
xarray-safe-rcm-2024.11.0/docs/index.md 0000664 0000000 0000000 00000000022 14715642725 0017412 0 ustar 00root root 0000000 0000000 # xarray-safe-rcm
xarray-safe-rcm-2024.11.0/docs/requirements.txt 0000664 0000000 0000000 00000000044 14715642725 0021251 0 ustar 00root root 0000000 0000000 sphinx>=4
sphinx-book-theme
ipython
xarray-safe-rcm-2024.11.0/pyproject.toml 0000664 0000000 0000000 00000003111 14715642725 0017747 0 ustar 00root root 0000000 0000000 [project]
name = "xarray-safe-rcm"
requires-python = ">= 3.10"
license = { text = "MIT" }
description = "xarray reader for radarsat constellation mission (RCM) SAFE files"
readme = "README.md"
dependencies = [
"toolz",
"numpy",
"xarray",
"lxml",
"xmlschema",
"rioxarray",
"fsspec",
"exceptiongroup; python_version < '3.11'",
]
dynamic = ["version"]
[build-system]
requires = ["setuptools>=64.0", "setuptools-scm"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
include = [
"safe_rcm",
"safe_rcm.*",
]
[tool.setuptools_scm]
fallback_version = "9999"
[tool.ruff]
target-version = "py310"
builtins = ["ellipsis"]
exclude = [".git", ".eggs", "build", "dist", "__pycache__"]
line-length = 100
[tool.ruff.lint]
ignore = [
"E402", # module level import not at top of file
"E501", # line too long - let black worry about that
"E731", # do not assign a lambda expression, use a def
"UP038", # type union instead of tuple for isinstance etc
]
select = [
"F", # Pyflakes
"E", # Pycodestyle
"I", # isort
"UP", # Pyupgrade
"TID", # flake8-tidy-imports
"W",
]
extend-safe-fixes = [
"TID252", # absolute imports
"UP031", # percent string interpolation
]
fixable = ["I", "TID252", "UP"]
[tool.ruff.lint.isort]
known-first-party = ["safe_rcm"]
known-third-party = ["xarray", "tlz"]
[tool.ruff.lint.flake8-tidy-imports]
# Disallow all relative imports.
ban-relative-imports = "all"
[tool.coverage.run]
source = ["safe_rcm"]
branch = true
[tool.coverage.report]
show_missing = true
exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"]
xarray-safe-rcm-2024.11.0/safe_rcm/ 0000775 0000000 0000000 00000000000 14715642725 0016616 5 ustar 00root root 0000000 0000000 xarray-safe-rcm-2024.11.0/safe_rcm/__init__.py 0000664 0000000 0000000 00000000266 14715642725 0020733 0 ustar 00root root 0000000 0000000 from importlib.metadata import version
from safe_rcm.api import open_rcm # noqa: F401
try:
__version__ = version("xarray-safe-rcm")
except Exception:
__version__ = "9999"
xarray-safe-rcm-2024.11.0/safe_rcm/api.py 0000664 0000000 0000000 00000012313 14715642725 0017741 0 ustar 00root root 0000000 0000000 import os
import posixpath
from fnmatch import fnmatchcase
import fsspec
import xarray as xr
from fsspec.implementations.dirfs import DirFileSystem
from tlz.dicttoolz import valmap
from tlz.functoolz import compose_left, curry, juxt
from safe_rcm.calibrations import read_noise_levels
from safe_rcm.manifest import read_manifest
from safe_rcm.product.reader import read_product
from safe_rcm.product.transformers import extract_dataset
from safe_rcm.product.utils import starcall
from safe_rcm.xml import read_xml
try:
ExceptionGroup
except NameError:
from exceptiongroup import ExceptionGroup
@curry
def execute(tree, f, path):
node = tree[path]
return f(node)
def ignored_file(path, ignores):
ignored = [
fnmatchcase(path, ignore) or fnmatchcase(posixpath.basename(path), ignore)
for ignore in ignores
]
return any(ignored)
def open_rcm(
url,
*,
backend_kwargs=None,
manifest_ignores=[
"*.pdf",
"*.html",
"*.xslt",
"*.png",
"*.kml",
"*.txt",
"preview/*",
],
**dataset_kwargs,
):
"""read SAFE files of the radarsat constellation mission (RCM)
Parameters
----------
url : str
backend_kwargs : mapping
manifest_ignores : list of str, default: ["*.pdf", "*.html", "*.xslt", "*.png", \
"*.kml", "*.txt", "preview/*"]
Globs that match files from the manifest that are allowed to be missing.
**dataset_kwargs
Keyword arguments forwarded to `xr.open_dataset`, used to open
the contained data files.
"""
if not isinstance(url, (str, os.PathLike)):
raise ValueError(f"cannot deal with object of type {type(url)}: {url}")
if backend_kwargs is None:
backend_kwargs = {}
url = os.fspath(url)
storage_options = backend_kwargs.get("storage_options", {})
mapper = fsspec.get_mapper(url, **storage_options)
relative_fs = DirFileSystem(path=url, fs=mapper.fs)
try:
declared_files = read_manifest(mapper, "manifest.safe")
except (FileNotFoundError, KeyError):
raise ValueError(
"cannot find the `manifest.safe` file. Are you sure this is a SAFE dataset?"
)
missing_files = [
path
for path in declared_files
if not ignored_file(path, manifest_ignores) and not relative_fs.exists(path)
]
if missing_files:
raise ExceptionGroup(
"not all files declared in the manifest are available",
[ValueError(f"{p} does not exist") for p in missing_files],
)
tree = read_product(mapper, "metadata/product.xml")
calibration_root = "metadata/calibration"
lookup_table_structure = {
"/incidenceAngles": {
"path": "/imageReferenceAttributes",
"f": compose_left(
lambda obj: obj.attrs["incidenceAngleFileName"],
curry(posixpath.join, calibration_root),
curry(read_xml, mapper),
curry(extract_dataset, dims="coefficients"),
),
},
"/lookupTables": {
"path": "/imageReferenceAttributes/lookupTableFileName",
"f": compose_left(
lambda obj: obj.stack(stacked=["sarCalibrationType", "pole"]),
lambda obj: obj.reset_index("stacked"),
juxt(
compose_left(
lambda obj: obj.to_series().to_dict(),
curry(valmap, curry(posixpath.join, calibration_root)),
curry(valmap, curry(read_xml)(mapper)),
curry(valmap, curry(extract_dataset, dims="coefficients")),
curry(valmap, lambda ds: ds["gains"].assign_attrs(ds.attrs)),
lambda d: xr.concat(list(d.values()), dim="stacked"),
),
lambda obj: obj.coords,
),
curry(starcall, lambda arr, coords: arr.assign_coords(coords)),
lambda arr: arr.set_index({"stacked": ["sarCalibrationType", "pole"]}),
lambda arr: arr.unstack("stacked"),
lambda arr: arr.rename("lookup_tables"),
lambda arr: arr.to_dataset(),
),
},
"/noiseLevels": {
"path": "/imageReferenceAttributes/noiseLevelFileName",
"f": curry(read_noise_levels, mapper, calibration_root),
},
}
calibration = valmap(
lambda x: execute(**x)(tree),
lookup_table_structure,
)
imagery_paths = tree["/sceneAttributes/ipdf"].to_series().to_dict()
resolved = valmap(
compose_left(
curry(posixpath.join, "metadata"),
posixpath.normpath,
),
imagery_paths,
)
imagery_dss = valmap(
compose_left(
curry(relative_fs.open),
curry(xr.open_dataset, engine="rasterio", **dataset_kwargs),
),
resolved,
)
dss = [ds.assign_coords(pole=coord) for coord, ds in imagery_dss.items()]
imagery = xr.concat(dss, dim="pole")
return tree.assign(
{
"lookupTables": xr.DataTree.from_dict(calibration),
"imagery": xr.DataTree(imagery),
}
)
xarray-safe-rcm-2024.11.0/safe_rcm/calibrations.py 0000664 0000000 0000000 00000006261 14715642725 0021647 0 ustar 00root root 0000000 0000000 import posixpath
import numpy as np
import xarray as xr
from tlz.dicttoolz import itemmap, merge_with, valfilter, valmap
from tlz.functoolz import compose_left, curry, flip
from tlz.itertoolz import first
from safe_rcm.product.dicttoolz import keysplit
from safe_rcm.product.reader import execute
from safe_rcm.product.transformers import extract_dataset
from safe_rcm.xml import read_xml
def move_attrs_to_coords(ds, names):
coords, attrs = keysplit(lambda k: k in names, ds.attrs)
new = ds.copy()
new.attrs = attrs
return new.assign_coords(coords)
def pad_common(dss):
def compute_padding(item, maximum):
key, value = item
return key, (0, maximum[key] - value)
sizes = [dict(ds.sizes) for ds in dss]
maximum_sizes = valmap(max, merge_with(list, *sizes))
pad_widths = [itemmap(flip(compute_padding, maximum_sizes), _) for _ in sizes]
return [
ds.pad(padding, mode="constant", constant_values=np.nan)
for ds, padding in zip(dss, pad_widths)
]
def _read_level(mapping):
return (
extract_dataset(mapping)
.pipe(
lambda ds: ds.swap_dims(
{first(valfilter(lambda v: v > 1, ds.sizes)): "coefficients"}
)
)
.pipe(lambda ds: ds.reset_coords())
.pipe(
move_attrs_to_coords,
["sarCalibrationType", "pixelFirstNoiseValue", "stepSize"],
)
)
def read_noise_level_file(mapper, path):
layout = {
"/referenceNoiseLevel": {
"path": "/referenceNoiseLevel",
"f": compose_left(
curry(map, _read_level),
curry(map, lambda ds: ds.expand_dims("sarCalibrationType")),
list,
curry(xr.combine_by_coords, combine_attrs="drop_conflicts"),
),
},
"/perBeamReferenceNoiseLevel": {
"path": "/perBeamReferenceNoiseLevel",
"f": compose_left(
curry(map, _read_level),
curry(map, lambda ds: ds.expand_dims("sarCalibrationType")),
list,
pad_common,
curry(xr.combine_by_coords, combine_attrs="drop_conflicts"),
),
},
"/azimuthNoiseLevelScaling": {
"path": "/azimuthNoiseLevelScaling",
"f": compose_left(
curry(map, _read_level),
list,
pad_common,
curry(xr.combine_by_coords, combine_attrs="drop_conflicts"),
),
},
}
decoded = read_xml(mapper, path)
converted = valmap(lambda x: execute(**x)(decoded), layout)
return converted
def read_noise_levels(mapper, root, fnames):
fnames = fnames.data.tolist()
paths = [posixpath.join(root, name) for name in fnames]
poles = [path.removesuffix(".xml").split("_")[1] for path in paths]
trees = [read_noise_level_file(mapper, path) for path in paths]
merged = merge_with(list, *trees)
combined = valmap(
compose_left(
curry(xr.concat, dim="pole", combine_attrs="no_conflicts"),
lambda x: x.assign_coords(pole=poles),
),
merged,
)
return xr.DataTree.from_dict(combined)
xarray-safe-rcm-2024.11.0/safe_rcm/manifest.py 0000664 0000000 0000000 00000002412 14715642725 0020775 0 ustar 00root root 0000000 0000000 from tlz import filter
from tlz.functoolz import compose_left, curry
from tlz.itertoolz import concat, get
from safe_rcm.product.dicttoolz import query
from safe_rcm.xml import read_xml
def merge_location(loc):
locator = loc["@locator"]
href = loc["@href"]
return f"{locator}/{href}".lstrip("/")
def read_manifest(mapper, path):
structure = {
"/dataObjectSection/dataObject": compose_left(
curry(
map,
compose_left(
curry(get, "byteStream"),
curry(
map,
compose_left(
curry(get, "fileLocation"), curry(map, merge_location)
),
),
concat,
),
),
concat,
),
"/metadataSection/metadataObject": compose_left(
curry(
filter,
compose_left(curry(get, "@classification"), lambda x: x == "SYNTAX"),
),
curry(map, compose_left(curry(get, "metadataReference"), merge_location)),
),
}
manifest = read_xml(mapper, path)
return list(concat(func(query(path, manifest)) for path, func in structure.items()))
xarray-safe-rcm-2024.11.0/safe_rcm/product/ 0000775 0000000 0000000 00000000000 14715642725 0020276 5 ustar 00root root 0000000 0000000 xarray-safe-rcm-2024.11.0/safe_rcm/product/dicttoolz.py 0000664 0000000 0000000 00000001255 14715642725 0022666 0 ustar 00root root 0000000 0000000 from tlz.dicttoolz import get_in
from tlz.itertoolz import first, groupby
def query(path, mapping):
if path == "/":
return mapping
keys = path.lstrip("/").split("/")
return get_in(keys, mapping, no_default=True)
def itemsplit(predicate, d):
groups = groupby(predicate, d.items())
first = dict(groups.get(True, ()))
second = dict(groups.get(False, ()))
return first, second
def valsplit(predicate, d):
wrapper = lambda item: predicate(item[1])
return itemsplit(wrapper, d)
def keysplit(predicate, d):
wrapper = lambda item: predicate(item[0])
return itemsplit(wrapper, d)
def first_values(d):
return first(d.values())
xarray-safe-rcm-2024.11.0/safe_rcm/product/predicates.py 0000664 0000000 0000000 00000004742 14715642725 0023002 0 ustar 00root root 0000000 0000000 import numpy as np
from tlz.functoolz import compose, juxt
from tlz.itertoolz import isiterable
def disjunction(*predicates):
return compose(any, juxt(predicates))
def conjunction(*predicates):
return compose(all, juxt(predicates))
def is_scalar(x):
return not isiterable(x) or isinstance(x, (str, bytes))
def is_composite_value(obj):
if not isinstance(obj, list) or len(obj) not in [1, 2]:
return False
if any(not isinstance(el, dict) or list(el) != ["@dataStream", "$"] for el in obj):
return False
data_stream_values = [el["@dataStream"].lower() for el in obj]
return data_stream_values in (["real", "imaginary"], ["magnitude"])
def is_complex(obj):
return is_composite_value(obj) and len(obj) == 2
def is_magnitude(obj):
return is_composite_value(obj) and len(obj) == 1
def is_array(obj):
# definition of a array:
# - list of scalars
# - list of 1d lists
# - complex array:
# - complex parts
# - list of complex values
if not isinstance(obj, list):
return False
if len(obj) == 0:
# zero-sized list, not sure what to do here
return False
elem = obj[0]
if is_complex(obj):
return not is_scalar(elem["$"])
elif is_scalar(elem):
return True
elif isinstance(elem, list):
if len(elem) == 1 and is_scalar(elem[0]):
return True
elif is_complex(elem):
# array of imaginary values
return True
elif all(map(is_scalar, elem)):
return True
return False
def is_scalar_variable(obj):
if not isinstance(obj, dict):
return False
if not all(is_scalar(v) for v in obj.values()):
return False
return all(k == "$" or k.startswith("@") for k in obj)
is_scalar_valued = disjunction(
is_scalar, lambda x: is_array(x) and len(x) == 1, is_scalar_variable
)
def is_nested(obj):
"""nested means: list of dict, but all dict values are scalar or 1-valued"""
if not isinstance(obj, list) or len(obj) == 0:
return False
elem = obj[0]
if not isinstance(elem, dict):
return False
if all(map(is_scalar_valued, elem.values())):
return True
return False
def is_nested_array(obj):
return is_nested(obj) and "$" in obj[0]
def is_nested_dataset(obj):
return is_nested(obj) and "$" not in obj[0]
def is_attr(column):
"""an attribute is a index if it has multiple unique values"""
return np.unique(column).size == 1
xarray-safe-rcm-2024.11.0/safe_rcm/product/reader.py 0000664 0000000 0000000 00000025247 14715642725 0022124 0 ustar 00root root 0000000 0000000 import xarray as xr
from tlz.dicttoolz import keyfilter, merge, merge_with, valfilter, valmap
from tlz.functoolz import compose_left, curry, juxt
from tlz.itertoolz import first, second
from safe_rcm.product import transformers
from safe_rcm.product.dicttoolz import keysplit, query
from safe_rcm.product.predicates import disjunction, is_nested_array, is_scalar_valued
from safe_rcm.product.utils import dictfirst, starcall
from safe_rcm.xml import read_xml
@curry
def attach_path(obj, path):
if not hasattr(obj, "encoding"):
raise ValueError(
"cannot attach source path: `obj` does not have a `encoding` attribute."
)
new = obj.copy()
new.encoding["xpath"] = path
return new
@curry
def execute(mapping, f, path):
subset = query(path, mapping)
return compose_left(f, attach_path(path=path))(subset)
def read_product(mapper, product_path):
decoded = read_xml(mapper, product_path)
layout = {
"/": {
"path": "/",
"f": curry(transformers.extract_metadata)(collapse=["securityAttributes"]),
},
"/sourceAttributes": {
"path": "/sourceAttributes",
"f": transformers.extract_metadata,
},
"/sourceAttributes/radarParameters": {
"path": "/sourceAttributes/radarParameters",
"f": transformers.extract_dataset,
},
"/sourceAttributes/radarParameters/prfInformation": {
"path": "/sourceAttributes/radarParameters/prfInformation",
"f": transformers.extract_nested_dataset,
},
"/sourceAttributes/orbitAndAttitude/orbitInformation": {
"path": "/sourceAttributes/orbitAndAttitude/orbitInformation",
"f": compose_left(
curry(transformers.extract_dataset)(dims="timeStamp"),
lambda ds: ds.assign_coords(
{"timeStamp": ds["timeStamp"].astype("datetime64")}
),
),
},
"/sourceAttributes/orbitAndAttitude/attitudeInformation": {
"path": "/sourceAttributes/orbitAndAttitude/attitudeInformation",
"f": compose_left(
curry(transformers.extract_dataset)(dims="timeStamp"),
lambda ds: ds.assign_coords(
{"timeStamp": ds["timeStamp"].astype("datetime64")}
),
),
},
"/sourceAttributes/rawDataAttributes": {
"path": "/sourceAttributes/rawDataAttributes",
"f": compose_left(
curry(keysplit, lambda k: k != "rawDataAnalysis"),
juxt(
compose_left(first, transformers.extract_dataset),
compose_left(
second,
dictfirst,
curry(starcall, curry(merge_with, list)),
curry(
transformers.extract_dataset,
dims={"rawDataHistogram": ["stacked", "histogram"]},
default_dims=["stacked"],
),
lambda obj: obj.set_index({"stacked": ["pole", "beam"]}),
lambda obj: obj.unstack("stacked"),
),
),
curry(xr.merge),
),
},
"/imageGenerationParameters/generalProcessingInformation": {
"path": "/imageGenerationParameters/generalProcessingInformation",
"f": transformers.extract_metadata,
},
"/imageGenerationParameters/sarProcessingInformation": {
"path": "/imageGenerationParameters/sarProcessingInformation",
"f": compose_left(
curry(keyfilter, lambda k: k not in {"azimuthWindow", "rangeWindow"}),
transformers.extract_dataset,
),
},
"/imageGenerationParameters/chirps": {
"path": "/imageGenerationParameters/chirp",
"f": compose_left(
lambda el: merge_with(list, *el),
curry(keysplit, lambda k: k != "chirpQuality"),
juxt(
first,
compose_left(
second,
dictfirst,
lambda el: merge_with(list, *el),
),
),
lambda x: merge(*x),
curry(
transformers.extract_dataset,
dims={
"amplitudeCoefficients": ["stacked", "coefficients"],
"phaseCoefficients": ["stacked", "coefficients"],
},
default_dims=["stacked"],
),
lambda obj: obj.set_index({"stacked": ["pole", "pulse"]}),
lambda obj: obj.drop_duplicates("stacked", keep="last"),
lambda obj: obj.unstack("stacked"),
),
},
"/imageGenerationParameters/slantRangeToGroundRange": {
"path": "/imageGenerationParameters/slantRangeToGroundRange",
"f": compose_left(
lambda el: merge_with(list, *el),
curry(
transformers.extract_dataset,
dims={
"groundToSlantRangeCoefficients": [
"zeroDopplerAzimuthTime",
"coefficients",
],
},
default_dims=["zeroDopplerAzimuthTime"],
),
),
},
"/imageReferenceAttributes": {
"path": "/imageReferenceAttributes",
"f": compose_left(
curry(valfilter)(disjunction(is_scalar_valued, is_nested_array)),
transformers.extract_dataset,
),
},
"/imageReferenceAttributes/rasterAttributes": {
"path": "/imageReferenceAttributes/rasterAttributes",
"f": transformers.extract_dataset,
},
"/imageReferenceAttributes/geographicInformation/ellipsoidParameters": {
"path": "/imageReferenceAttributes/geographicInformation/ellipsoidParameters",
"f": curry(transformers.extract_dataset)(dims="params"),
},
"/imageReferenceAttributes/geographicInformation/geolocationGrid": {
"path": "/imageReferenceAttributes/geographicInformation/geolocationGrid/imageTiePoint",
"f": compose_left(
curry(transformers.extract_nested_datatree)(dims="tie_points"),
lambda tree: xr.merge([node.ds for node in tree.subtree]),
lambda ds: ds.set_index(tie_points=["line", "pixel"]),
lambda ds: ds.unstack("tie_points"),
),
},
"/imageReferenceAttributes/geographicInformation/rationalFunctions": {
"path": "/imageReferenceAttributes/geographicInformation/rationalFunctions",
"f": curry(transformers.extract_dataset)(dims="coefficients"),
},
"/sceneAttributes": {
"path": "/sceneAttributes/imageAttributes",
"f": compose_left(
first, # GRD datasets only have 1
curry(keyfilter)(lambda x: not x.startswith("@")),
transformers.extract_dataset,
),
},
"/grdBurstMap": {
"path": "/grdBurstMap",
"f": compose_left(
curry(
map,
compose_left(
curry(keysplit, lambda k: k != "burstAttributes"),
juxt(
first,
compose_left(
second,
dictfirst,
curry(starcall, curry(merge_with, list)),
),
),
curry(starcall, merge),
curry(
transformers.extract_dataset,
dims=["stacked"],
),
lambda obj: obj.set_index({"stacked": ["burst", "beam"]}),
lambda obj: obj.unstack("stacked"),
),
),
list,
curry(xr.concat, dim="burst_maps"),
),
},
"/dopplerCentroid": {
"path": "/dopplerCentroid",
"f": compose_left(
curry(
map,
compose_left(
curry(keysplit, lambda k: k != "dopplerCentroidEstimate"),
juxt(
first,
compose_left(
second,
dictfirst,
curry(starcall, curry(merge_with, list)),
),
),
curry(starcall, merge),
curry(
transformers.extract_dataset,
dims={
"dopplerCentroidCoefficients": [
"burst",
"coefficients",
],
},
default_dims=["burst"],
),
),
),
list,
curry(xr.concat, dim="burst_maps"),
),
},
"/dopplerRate": {
"path": "/dopplerRate",
"f": compose_left(
curry(
map,
compose_left(
curry(keysplit, lambda k: k != "dopplerRateEstimate"),
juxt(
first,
compose_left(
second,
dictfirst,
curry(starcall, curry(merge_with, list)),
),
),
curry(starcall, merge),
curry(
transformers.extract_dataset,
dims={
"dopplerRateCoefficients": ["burst", "coefficients"],
},
default_dims=["burst"],
),
),
),
list,
curry(xr.concat, dim="burst_maps"),
),
},
}
converted = valmap(
lambda x: execute(**x)(decoded),
layout,
)
return xr.DataTree.from_dict(converted)
xarray-safe-rcm-2024.11.0/safe_rcm/product/transformers.py 0000664 0000000 0000000 00000015616 14715642725 0023406 0 ustar 00root root 0000000 0000000 import numpy as np
import xarray as xr
from tlz.dicttoolz import (
itemfilter,
itemmap,
keyfilter,
keymap,
merge_with,
valfilter,
valmap,
)
from tlz.functoolz import compose_left, curry, flip
from tlz.itertoolz import concat, first, second
from safe_rcm.product.dicttoolz import first_values, keysplit, valsplit
from safe_rcm.product.predicates import (
is_array,
is_attr,
is_composite_value,
is_nested_array,
is_nested_dataset,
is_scalar,
)
ignore = ("@xmlns", "@xmlns:xsi", "@xsi:schemaLocation")
def convert_composite(value):
if not is_composite_value(value):
raise ValueError(f"not a composite: {value}")
converted = {part["@dataStream"].lower(): np.array(part["$"]) for part in value}
if list(converted) == ["magnitude"]:
return "magnitude", converted["magnitude"]
else:
return "complex", converted["real"] + 1j * converted["imaginary"]
def extract_metadata(
mapping,
collapse=(),
ignore=ignore,
):
without_ignores = keyfilter(lambda k: k not in ignore, mapping)
# extract the metadata
metadata_ = itemfilter(
lambda it: it[0].startswith("@") or is_scalar(it[1]),
without_ignores,
)
metadata = keymap(flip(str.lstrip, "@"), metadata_)
# collapse the selected items
to_collapse = keyfilter(lambda x: x in collapse, mapping)
collapsed = dict(concat(v.items() for v in to_collapse.values()))
attrs = metadata | collapsed
return xr.Dataset(attrs=attrs) # return dataset to avoid bug in datatree
def extract_array(obj, dims):
if isinstance(dims, str):
dims = [dims]
# special case for pulses:
if "pulses" in dims and len(obj) == 1 and isinstance(obj[0], str):
obj = obj[0].split()
elif len(obj) >= 1 and is_composite_value(obj[0]):
obj = list(map(compose_left(convert_composite, second), obj))
data = np.array(obj)
if data.size > 1:
data = np.squeeze(data)
return xr.Variable(dims, data)
def extract_composite(obj, dims=()):
type_, value = convert_composite(obj)
if is_scalar(value):
dims = ()
return xr.Variable(dims, value, {"type": type_})
def extract_variable(obj, dims=()):
attributes, data = keysplit(lambda k: k.startswith("@"), obj)
if list(data) != ["$"]:
raise ValueError("not a variable")
values = data["$"]
if is_scalar(values):
dims = ()
attrs = keymap(lambda k: k.lstrip("@"), attributes)
return xr.Variable(dims, values, attrs)
def extract_entry(name, obj, dims=None, default_dims=None):
if default_dims is None:
default_dims = [name]
if isinstance(dims, dict):
dims = dims.get(name, default_dims)
elif dims is None:
dims = default_dims
if is_array(obj):
# dimension coordinate
return extract_array(obj, dims=dims)
elif is_composite_value(obj):
return extract_composite(obj, dims=dims)
elif isinstance(obj, dict):
return extract_variable(obj, dims=dims)
elif is_nested_array(obj):
return extract_nested_array(obj, dims=dims).pipe(rename, name)
else:
raise ValueError(f"unknown datastructure:\n{obj}")
def extract_dataset(obj, dims=None, default_dims=None):
filtered = keyfilter(lambda x: x not in ignore, obj)
attrs, variables = valsplit(is_scalar, filtered)
if len(variables) == 1 and is_nested_dataset(first_values(variables)):
return extract_nested_dataset(first_values(variables), dims=dims).assign_attrs(
attrs
)
variables_ = keymap(lambda k: k.lstrip("@"), variables)
filtered_variables = valfilter(lambda x: not is_nested_dataset(x), variables_)
data_vars = itemmap(
lambda item: (
item[0],
extract_entry(*item, dims=dims, default_dims=default_dims),
),
filtered_variables,
)
return xr.Dataset(data_vars=data_vars, attrs=attrs)
def extract_nested_variable(obj, dims=None):
if is_array(obj):
return xr.Variable(dims, obj)
columns = merge_with(list, *obj)
attributes, data = keysplit(lambda k: k.startswith("@"), columns)
renamed = keymap(lambda k: k.lstrip("@"), attributes)
attrs = valmap(first, renamed)
return xr.Variable(dims, data["$"], attrs)
def unstack(obj, dim="stacked"):
if dim not in obj.dims:
return obj
stacked_coords = [name for name, arr in obj.coords.items() if dim in arr.dims]
return obj.set_index({dim: stacked_coords}).unstack(dim)
def rename(obj, name):
renamed = obj.rename(name)
if "$" not in obj.dims:
return renamed
if len(obj.dims) != 1:
raise ValueError(f"unexpected number of dimensions: {list(obj.dims)}")
return renamed.swap_dims({"$": name})
def to_variable_tuple(name, value, dims):
if name in dims:
dims_ = [name]
else:
dims_ = dims
return (dims_, value)
def extract_nested_array(obj, dims=None):
columns = merge_with(list, *obj)
attributes, data = keysplit(flip(str.startswith, "@"), columns)
renamed = keymap(flip(str.lstrip, "@"), attributes)
preprocessed_attrs = valmap(np.squeeze, renamed)
attrs_, indexes = valsplit(is_attr, preprocessed_attrs)
preprocessed_data = valmap(np.squeeze, data)
originally_stacked = isinstance(dims, (tuple, list)) and "stacked" in dims
if len(indexes) == 1:
dims = list(indexes)
elif len(indexes) >= 2:
dims = ["stacked"]
elif dims is None:
dims = ["$"]
coords = itemmap(
lambda it: (it[0], to_variable_tuple(*it, dims=dims)),
indexes,
)
arr = xr.DataArray(
data=preprocessed_data["$"],
attrs=valmap(first, attrs_),
dims=dims,
coords=coords,
)
if originally_stacked:
return arr
return arr.pipe(unstack, dim="stacked")
def extract_nested_dataset(obj, dims=None):
if not isinstance(obj, list):
raise ValueError(f"unknown type: {type(obj)}")
columns = merge_with(list, *obj)
attributes, data = keysplit(flip(str.startswith, "@"), columns)
renamed = keymap(flip(str.lstrip, "@"), attributes)
preprocessed = valmap(np.squeeze, renamed)
attrs_, indexes = valsplit(is_attr, preprocessed)
attrs = valmap(first, attrs_)
if dims is None:
if len(indexes) <= 1:
dims = list(indexes)
else:
dims = ["stacked"]
data_vars = valmap(curry(extract_nested_variable)(dims=dims), data)
coords = itemmap(
lambda it: (it[0], to_variable_tuple(*it, dims=dims)),
indexes,
)
return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs).pipe(
unstack, dim="stacked"
)
def extract_nested_datatree(obj, dims=None):
if not isinstance(obj, list):
raise ValueError(f"unknown type: {type(obj)}")
datasets = merge_with(list, *obj)
tree = valmap(curry(extract_nested_dataset)(dims=dims), datasets)
return xr.DataTree.from_dict(tree)
xarray-safe-rcm-2024.11.0/safe_rcm/product/utils.py 0000664 0000000 0000000 00000001732 14715642725 0022013 0 ustar 00root root 0000000 0000000 from tlz.functoolz import flip, pipe
from tlz.itertoolz import first, groupby
def split_marked(mapping, marker="@"):
groups = groupby(lambda item: item[0].startswith(marker), mapping.items())
attrs = {key.lstrip(marker): value for key, value in groups.get(True, {})}
data = {key: value for key, value in groups.get(False, {})}
return attrs, data
def strip_namespaces(name, namespaces):
"""remove the given namespaces from a name
Parameters
----------
name : str
The string to trim
namespaces : sequence of str
The list of namespaces.
Returns
-------
trimmed : str
The string without prefix and without leading colon.
"""
funcs = [
flip(str.removeprefix, ns) for ns in sorted(namespaces, key=len, reverse=True)
]
return pipe(name, *funcs).lstrip(":")
def starcall(func, args, **kwargs):
return func(*args, **kwargs)
def dictfirst(mapping):
return first(mapping.values())
xarray-safe-rcm-2024.11.0/safe_rcm/tests/ 0000775 0000000 0000000 00000000000 14715642725 0017760 5 ustar 00root root 0000000 0000000 xarray-safe-rcm-2024.11.0/safe_rcm/tests/test_product_utils.py 0000664 0000000 0000000 00000003336 14715642725 0024276 0 ustar 00root root 0000000 0000000 import functools
import string
import hypothesis.strategies as st
from hypothesis import given
from safe_rcm.product import utils
def shared(*, key):
def outer(func):
@functools.wraps(func)
def inner(*args, **kwargs):
result = func(*args, **kwargs)
return st.shared(result, key=key)
return inner
return outer
markers = st.characters()
marker = st.shared(markers, key="marker")
def marked_mapping(marker):
values = st.just(None)
unmarked_keys = st.text()
marked_keys = st.builds(lambda k, m: m + k, unmarked_keys, marker)
keys = st.one_of(unmarked_keys, marked_keys)
return st.dictionaries(keys, values)
@given(marked_mapping(marker), marker)
def test_split_marked(mapping, marker):
marked, unmarked = utils.split_marked(mapping, marker=marker)
assert list(unmarked) == [key for key in mapping if not key.startswith(marker)]
@shared(key="namespaces")
def namespaces():
values = st.just(None)
keys = st.text(string.ascii_letters, min_size=1, max_size=4)
return st.dictionaries(keys, values)
@st.composite
def draw_from(draw, elements):
elements = draw(elements)
if not elements:
return ""
return draw(st.sampled_from(elements))
def prefixed_names(namespaces):
def builder(base, prefix):
return f"{prefix}:{base}" if prefix != "" else base
bases = st.text(string.ascii_letters, min_size=1)
all_prefixes = namespaces.map(list)
prefixes = draw_from(all_prefixes)
return st.builds(builder, bases, prefixes)
@given(prefixed_names(namespaces()), namespaces())
def test_strip_namespaces(name, namespaces):
stripped = utils.strip_namespaces(name, namespaces)
assert ":" not in stripped
xarray-safe-rcm-2024.11.0/safe_rcm/tests/test_xml.py 0000664 0000000 0000000 00000021102 14715642725 0022165 0 ustar 00root root 0000000 0000000 import collections
import textwrap
import fsspec
import pytest
from safe_rcm import xml
def dedent(text):
return textwrap.dedent(text.removeprefix("\n").rstrip())
schemas = [
dedent(
"""
"""
),
dedent(
"""
"""
),
dedent(
"""
"""
),
]
Container = collections.namedtuple("SchemaSetup", ["mapper", "path", "expected"])
SchemaProperties = collections.namedtuple(
"SchemaProperties", ["root_elements", "simple_types", "complex_types"]
)
@pytest.fixture(params=enumerate(schemas))
def schema_setup(request):
schema_index, schema = request.param
mapper = fsspec.get_mapper("memory")
mapper["schemas/root.xsd"] = schema.encode()
mapper["schemas/schema1.xsd"] = dedent(
"""
"""
).encode()
mapper["schemas/schema2.xsd"] = dedent(
"""
"""
).encode()
mapper["schemas/schema3.xsd"] = dedent(
"""
"""
).encode()
mapper["schemas/schema4.xsd"] = dedent(
"""
"""
).encode()
return schema_index, mapper
@pytest.fixture
def schema_paths_setup(schema_setup):
schema_index, mapper = schema_setup
expected = [
["schemas/root.xsd"],
["schemas/root.xsd", "schemas/schema2.xsd", "schemas/schema4.xsd"],
[
"schemas/root.xsd",
"schemas/schema1.xsd",
"schemas/schema2.xsd",
"schemas/schema3.xsd",
"schemas/schema4.xsd",
],
]
return Container(mapper, "schemas/root.xsd", expected[schema_index])
@pytest.fixture
def schema_content_setup(schema_setup):
schema_index, mapper = schema_setup
count_type = {"name": "count", "type": "simple", "base_type": "integer"}
manifest_type = {"name": "manifest", "type": "complex"}
manifest_element = {"name": "manifest", "type": manifest_type}
count_element = {"name": "count", "type": count_type}
expected = [
SchemaProperties([], [], []),
SchemaProperties([count_element], [count_type], []),
SchemaProperties(
[manifest_element, count_element], [count_type], [manifest_type]
),
]
return Container(mapper, "schemas/root.xsd", expected[schema_index])
@pytest.fixture(params=["data.xml", "data/file.xml"])
def data_file_setup(request):
path = request.param
mapper = fsspec.get_mapper("memory")
mapper["schemas/root.xsd"] = dedent(
"""
"""
).encode()
mapper["schemas/schema1.xsd"] = dedent(
"""
"""
).encode()
mapper["schemas/schema2.xsd"] = dedent(
"""
"""
).encode()
schema_path = "schemas/root.xsd" if "/" not in path else "../schemas/root.xsd"
mapper[path] = dedent(
f"""
1
2
3
"""
).encode()
expected = {
"@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
"@xsi:schemaLocation": f"schema {schema_path}",
"summary": {"quantity_a": 1, "quantity_b": 2},
"count": 3,
}
return Container(mapper, path, expected)
def convert_type(t):
def strip_namespace(name):
return name.split("}", maxsplit=1)[1]
if hasattr(t, "content"):
# complex type
return {"name": t.name, "type": "complex"}
elif hasattr(t, "base_type"):
# simple type, only restriction
return {
"name": t.name,
"base_type": strip_namespace(t.base_type.name),
"type": "simple",
}
def convert_element(el):
return {"name": el.name, "type": convert_type(el.type)}
def extract_schema_properties(schema):
return SchemaProperties(
[convert_element(v) for v in schema.root_elements],
[convert_type(v) for v in schema.simple_types],
[convert_type(v) for v in schema.complex_types],
)
def test_remove_includes():
expected = schemas[0]
actual = xml.remove_includes(schemas[1])
assert actual == expected
@pytest.mark.parametrize(
["schema", "expected"],
(
(schemas[0], []),
(schemas[1], ["schema2.xsd"]),
(schemas[2], ["schema1.xsd", "schema2.xsd"]),
),
)
def test_extract_includes(schema, expected):
actual = xml.extract_includes(schema)
assert actual == expected
@pytest.mark.parametrize(
["root", "path", "expected"],
(
("", "file.xml", "file.xml"),
("/root", "file.xml", "/root/file.xml"),
("/root", "/other_root/file.xml", "/other_root/file.xml"),
),
)
def test_normalize(root, path, expected):
actual = xml.normalize(root, path)
assert actual == expected
def test_schema_paths(schema_paths_setup):
actual = xml.schema_paths(schema_paths_setup.mapper, schema_paths_setup.path)
expected = schema_paths_setup.expected
assert actual == expected
def test_open_schemas(schema_content_setup):
container = schema_content_setup
actual = xml.open_schema(container.mapper, container.path)
expected = container.expected
assert extract_schema_properties(actual) == expected
def test_read_xml(data_file_setup):
container = data_file_setup
actual = xml.read_xml(container.mapper, container.path)
assert actual == container.expected
xarray-safe-rcm-2024.11.0/safe_rcm/xml.py 0000664 0000000 0000000 00000004222 14715642725 0017770 0 ustar 00root root 0000000 0000000 import io
import posixpath
import re
from collections import deque
import xmlschema
from lxml import etree
from tlz.dicttoolz import keymap
include_re = re.compile(r'\s*')
def remove_includes(text):
return include_re.sub("", text)
def extract_includes(text):
return include_re.findall(text)
def normalize(root, path):
if posixpath.isabs(path) or posixpath.dirname(path):
return path
return posixpath.join(root, path)
def schema_paths(mapper, root_schema):
unvisited = deque([root_schema])
visited = []
while unvisited:
path = unvisited.popleft()
if path not in visited:
visited.append(path)
text = mapper[path].decode()
includes = extract_includes(text)
current_root = posixpath.dirname(path)
normalized = [normalize(current_root, p) for p in includes]
unvisited.extend([p for p in normalized if p not in visited])
return visited
def open_schema(mapper, schema):
"""fsspec-compatible way to open remote schema files
Parameters
----------
fs : fsspec.filesystem
pre-instantiated fsspec filesystem instance
root : str
URL of the root directory of the schema files
name : str
File name of the schema to open.
glob : str, default: "*.xsd"
The glob used to find other schema files
Returns
-------
xmlschema.XMLSchema
The opened schema object
"""
paths = schema_paths(mapper, schema)
preprocessed = [io.StringIO(remove_includes(mapper[p].decode())) for p in paths]
return xmlschema.XMLSchema(preprocessed)
def read_xml(mapper, path):
raw_data = mapper[path]
tree = etree.fromstring(raw_data)
namespaces = keymap(lambda x: x if x is not None else "rcm", tree.nsmap)
schema_location = tree.xpath("./@xsi:schemaLocation", namespaces=namespaces)[0]
_, schema_path_ = schema_location.split(" ")
schema_path = posixpath.normpath(
posixpath.join(posixpath.dirname(path), schema_path_)
)
schema = open_schema(mapper, schema_path)
decoded = schema.decode(tree)
return decoded