pax_global_header00006660000000000000000000000064145651346450014527gustar00rootroot0000000000000052 comment=7f6750230f1442e3bf41237498cfa16450e53845 xarray-safe-rcm-2024.02.0/000077500000000000000000000000001456513464500150375ustar00rootroot00000000000000xarray-safe-rcm-2024.02.0/.flake8000066400000000000000000000005351456513464500162150ustar00rootroot00000000000000[flake8] ignore = # E203: whitespace before ':' - doesn't work well with black # E402: module level import not at top of file # E501: line too long - let black worry about that # E731: do not assign a lambda expression, use a def # W503: line break before binary operator E203,E402,E501,E731,W503 exclude= .eggs docs xarray-safe-rcm-2024.02.0/.github/000077500000000000000000000000001456513464500163775ustar00rootroot00000000000000xarray-safe-rcm-2024.02.0/.github/dependabot.yml000066400000000000000000000001661456513464500212320ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" xarray-safe-rcm-2024.02.0/.github/workflows/000077500000000000000000000000001456513464500204345ustar00rootroot00000000000000xarray-safe-rcm-2024.02.0/.github/workflows/pypi.yaml000066400000000000000000000024261456513464500223050ustar00rootroot00000000000000name: Upload Package to PyPI on: release: types: [created] jobs: build: name: Build packages runs-on: ubuntu-latest if: github.repository == 'umr-lops/xarray-safe-rcm' steps: - name: Checkout uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.x" - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install build twine - name: Build run: | python -m build --sdist --wheel --outdir dist/ . - name: Check the built archives run: | twine check dist/* - name: Upload build artifacts uses: actions/upload-artifact@v4 with: name: packages path: dist/* pypi-publish: name: Upload to PyPI runs-on: ubuntu-latest needs: build environment: name: pypi url: https://pypi.org/p/xarray-safe-rcm permissions: id-token: write steps: - name: Download build artifacts uses: actions/download-artifact@v4 with: name: packages path: dist/ - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@2f6f737ca5f74c637829c0f5c3acd0e29ea5e8bf xarray-safe-rcm-2024.02.0/.gitignore000066400000000000000000000003221456513464500170240ustar00rootroot00000000000000# editor files *~ \#*\# # python bytecode *.py[co] __pycache__/ # install artifacts /build /dist /*.egg-info # tools .ipynb_checkpoints/ .hypothesis/ .pytest_cache .coverage .coverage.* .cache /docs/_build/ xarray-safe-rcm-2024.02.0/.pre-commit-config.yaml000066400000000000000000000016761456513464500213320ustar00rootroot00000000000000ci: autoupdate_schedule: weekly # https://pre-commit.com/ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-docstring-first - id: check-yaml - id: check-toml - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: - id: isort - repo: https://github.com/psf/black rev: 24.2.0 hooks: - id: black - id: black-jupyter - repo: https://github.com/keewis/blackdoc rev: v0.3.9 hooks: - id: blackdoc - repo: https://github.com/pycqa/flake8 rev: 7.0.0 hooks: - id: flake8 - repo: https://github.com/kynan/nbstripout rev: 0.7.1 hooks: - id: nbstripout args: [--extra-keys=metadata.kernelspec metadata.language_info.version] - repo: https://github.com/pre-commit/mirrors-prettier rev: v4.0.0-alpha.8 hooks: - id: prettier xarray-safe-rcm-2024.02.0/.readthedocs.yml000066400000000000000000000006421456513464500201270ustar00rootroot00000000000000version: 2 build: os: ubuntu-22.04 tools: python: mambaforge-4.10 jobs: post_checkout: - (git --no-pager log --pretty="tformat:%s" -1 | grep -vqF "[skip-rtd]") || exit 183 pre_install: - git update-index --assume-unchanged docs/conf.py ci/requirements/docs.yaml conda: environment: ci/requirements/docs.yaml sphinx: fail_on_warning: true configuration: docs/conf.py formats: [] xarray-safe-rcm-2024.02.0/LICENSE000066400000000000000000000020741456513464500160470ustar00rootroot00000000000000MIT License Copyright (c) 2023, xarray-safe-rcm developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. xarray-safe-rcm-2024.02.0/README.md000066400000000000000000000002231456513464500163130ustar00rootroot00000000000000# xarray-safe-rcm Read RCM SAFE files into `datatree` objects. ## Usage ```python import safe_rcm tree = safe_rcm.open_rcm(url, chunks={}) ``` xarray-safe-rcm-2024.02.0/ci/000077500000000000000000000000001456513464500154325ustar00rootroot00000000000000xarray-safe-rcm-2024.02.0/ci/requirements/000077500000000000000000000000001456513464500201555ustar00rootroot00000000000000xarray-safe-rcm-2024.02.0/ci/requirements/docs.yaml000066400000000000000000000002231456513464500217660ustar00rootroot00000000000000name: xarray-safe-rcm-docs channels: - conda-forge dependencies: - python=3.10 - sphinx>=4 - sphinx-book-theme - ipython - myst-parser xarray-safe-rcm-2024.02.0/ci/requirements/environment.yaml000066400000000000000000000007121456513464500234050ustar00rootroot00000000000000name: xarray-safe-rcm-tests channels: - conda-forge dependencies: - python=3.10 # development - ipython - pre-commit - jupyterlab - jupyterlab_code_formatter - isort - black - dask-labextension # testing - pytest - pytest-reportlog - hypothesis - coverage # I/O - rioxarray - h5netcdf - zarr - scipy # data - xarray - xarray-datatree - dask - numpy - pandas # processing - toolz - lxml - xmlschema xarray-safe-rcm-2024.02.0/docs/000077500000000000000000000000001456513464500157675ustar00rootroot00000000000000xarray-safe-rcm-2024.02.0/docs/conf.py000066400000000000000000000037531456513464500172760ustar00rootroot00000000000000# -- Project information ----------------------------------------------------- import datetime as dt project = "xarray-safe-rcm" author = f"{project} developers" initial_year = "2023" year = dt.datetime.now().year copyright = f"{initial_year}-{year}, {author}" # The root toctree document. root_doc = "index" # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "myst_parser", "sphinx.ext.extlinks", "sphinx.ext.intersphinx", "IPython.sphinxext.ipython_directive", "IPython.sphinxext.ipython_console_highlighting", ] extlinks = { "issue": ("https://github.com/umr-lops/xarray-safe-rcm/issues/%s", "GH%s"), "pull": ("https://github.com/umr-lops/xarray-safe-rcm/pull/%s", "PR%s"), } # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "directory"] # nitpicky mode: complain if references could not be found nitpicky = True # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = "sphinx_book_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ["_static"] # -- Options for the intersphinx extension ----------------------------------- intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "sphinx": ("https://www.sphinx-doc.org/en/stable/", None), } xarray-safe-rcm-2024.02.0/docs/index.md000066400000000000000000000000221456513464500174120ustar00rootroot00000000000000# xarray-safe-rcm xarray-safe-rcm-2024.02.0/docs/requirements.txt000066400000000000000000000000441456513464500212510ustar00rootroot00000000000000sphinx>=4 sphinx-book-theme ipython xarray-safe-rcm-2024.02.0/pyproject.toml000066400000000000000000000014051456513464500177530ustar00rootroot00000000000000[project] name = "xarray-safe-rcm" requires-python = ">= 3.10" license = {text = "MIT"} description = "xarray reader for radarsat constellation mission (RCM) SAFE files" readme = "README.md" dependencies = [ "toolz", "numpy", "xarray", "xarray-datatree", "lxml", "xmlschema", "rioxarray", "fsspec", "exceptiongroup; python_version < '3.11'", ] dynamic = ["version"] [build-system] requires = ["setuptools>=64.0", "setuptools-scm"] build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] include = [ "safe_rcm", "safe_rcm.*", ] [tool.setuptools_scm] fallback_version = "999" [tool.isort] profile = "black" skip_gitignore = true float_to_top = true default_section = "THIRDPARTY" known_first_party = "safe_rcm" xarray-safe-rcm-2024.02.0/safe_rcm/000077500000000000000000000000001456513464500166165ustar00rootroot00000000000000xarray-safe-rcm-2024.02.0/safe_rcm/__init__.py000066400000000000000000000002461456513464500207310ustar00rootroot00000000000000from importlib.metadata import version from .api import open_rcm # noqa: F401 try: __version__ = version("safe_rcm") except Exception: __version__ = "999" xarray-safe-rcm-2024.02.0/safe_rcm/api.py000066400000000000000000000122111456513464500177360ustar00rootroot00000000000000import os import posixpath from fnmatch import fnmatchcase import datatree import fsspec import xarray as xr from fsspec.implementations.dirfs import DirFileSystem from tlz.dicttoolz import valmap from tlz.functoolz import compose_left, curry, juxt from .calibrations import read_noise_levels from .manifest import read_manifest from .product.reader import read_product from .product.transformers import extract_dataset from .product.utils import starcall from .xml import read_xml try: ExceptionGroup except NameError: from exceptiongroup import ExceptionGroup @curry def execute(tree, f, path): node = tree[path] return f(node) def ignored_file(path, ignores): ignored = [ fnmatchcase(path, ignore) or fnmatchcase(posixpath.basename(path), ignore) for ignore in ignores ] return any(ignored) def open_rcm( url, *, backend_kwargs=None, manifest_ignores=[ "*.pdf", "*.html", "*.xslt", "*.png", "*.kml", "*.txt", "preview/*", ], **dataset_kwargs, ): """read SAFE files of the radarsat constellation mission (RCM) Parameters ---------- url : str backend_kwargs : mapping manifest_ignores : list of str, default: ["*.pdf", "*.html", "*.xslt", "*.png", \ "*.kml", "*.txt", "preview/*"] Globs that match files from the manifest that are allowed to be missing. **dataset_kwargs Keyword arguments forwarded to `xr.open_dataset`, used to open the contained data files. """ if not isinstance(url, (str, os.PathLike)): raise ValueError(f"cannot deal with object of type {type(url)}: {url}") if backend_kwargs is None: backend_kwargs = {} url = os.fspath(url) storage_options = backend_kwargs.get("storage_options", {}) mapper = fsspec.get_mapper(url, **storage_options) relative_fs = DirFileSystem(path=url, fs=mapper.fs) try: declared_files = read_manifest(mapper, "manifest.safe") except (FileNotFoundError, KeyError): raise ValueError( "cannot find the `manifest.safe` file. Are you sure this is a SAFE dataset?" ) missing_files = [ path for path in declared_files if not ignored_file(path, manifest_ignores) and not relative_fs.exists(path) ] if missing_files: raise ExceptionGroup( "not all files declared in the manifest are available", [ValueError(f"{p} does not exist") for p in missing_files], ) tree = read_product(mapper, "metadata/product.xml") calibration_root = "metadata/calibration" lookup_table_structure = { "/incidenceAngles": { "path": "/imageReferenceAttributes", "f": compose_left( lambda obj: obj.attrs["incidenceAngleFileName"], curry(posixpath.join, calibration_root), curry(read_xml, mapper), curry(extract_dataset, dims="coefficients"), ), }, "/lookupTables": { "path": "/imageReferenceAttributes/lookupTableFileName", "f": compose_left( lambda obj: obj.stack(stacked=["sarCalibrationType", "pole"]), lambda obj: obj.reset_index("stacked"), juxt( compose_left( lambda obj: obj.to_series().to_dict(), curry(valmap, curry(posixpath.join, calibration_root)), curry(valmap, curry(read_xml)(mapper)), curry(valmap, curry(extract_dataset, dims="coefficients")), curry(valmap, lambda ds: ds["gains"].assign_attrs(ds.attrs)), lambda d: xr.concat(list(d.values()), dim="stacked"), ), lambda obj: obj.coords, ), curry(starcall, lambda arr, coords: arr.assign_coords(coords)), lambda arr: arr.set_index({"stacked": ["sarCalibrationType", "pole"]}), lambda arr: arr.unstack("stacked"), lambda arr: arr.rename("lookup_tables"), ), }, "/noiseLevels": { "path": "/imageReferenceAttributes/noiseLevelFileName", "f": curry(read_noise_levels, mapper, calibration_root), }, } calibration = valmap( lambda x: execute(**x)(tree), lookup_table_structure, ) imagery_paths = tree["/sceneAttributes/ipdf"].to_series().to_dict() resolved = valmap( compose_left( curry(posixpath.join, "metadata"), posixpath.normpath, ), imagery_paths, ) imagery_dss = valmap( compose_left( curry(relative_fs.open), curry(xr.open_dataset, engine="rasterio", **dataset_kwargs), ), resolved, ) dss = [ds.assign_coords(pole=coord) for coord, ds in imagery_dss.items()] imagery = xr.concat(dss, dim="pole") return tree.assign( { "lookupTables": datatree.DataTree.from_dict(calibration), "imagery": datatree.DataTree(imagery), } ) xarray-safe-rcm-2024.02.0/safe_rcm/calibrations.py000066400000000000000000000062601456513464500216460ustar00rootroot00000000000000import posixpath import datatree import numpy as np import xarray as xr from tlz.dicttoolz import itemmap, merge_with, valfilter, valmap from tlz.functoolz import compose_left, curry, flip from tlz.itertoolz import first from safe_rcm.product.reader import execute from .product.dicttoolz import keysplit from .product.transformers import extract_dataset from .xml import read_xml def move_attrs_to_coords(ds, names): coords, attrs = keysplit(lambda k: k in names, ds.attrs) new = ds.copy() new.attrs = attrs return new.assign_coords(coords) def pad_common(dss): def compute_padding(item, maximum): key, value = item return key, (0, maximum[key] - value) sizes = [dict(ds.sizes) for ds in dss] maximum_sizes = valmap(max, merge_with(list, *sizes)) pad_widths = [itemmap(flip(compute_padding, maximum_sizes), _) for _ in sizes] return [ ds.pad(padding, mode="constant", constant_values=np.nan) for ds, padding in zip(dss, pad_widths) ] def _read_level(mapping): return ( extract_dataset(mapping) .pipe( lambda ds: ds.swap_dims( {first(valfilter(lambda v: v > 1, ds.sizes)): "coefficients"} ) ) .pipe(lambda ds: ds.reset_coords()) .pipe( move_attrs_to_coords, ["sarCalibrationType", "pixelFirstNoiseValue", "stepSize"], ) ) def read_noise_level_file(mapper, path): layout = { "/referenceNoiseLevel": { "path": "/referenceNoiseLevel", "f": compose_left( curry(map, _read_level), curry(map, lambda ds: ds.expand_dims("sarCalibrationType")), list, curry(xr.combine_by_coords, combine_attrs="drop_conflicts"), ), }, "/perBeamReferenceNoiseLevel": { "path": "/perBeamReferenceNoiseLevel", "f": compose_left( curry(map, _read_level), curry(map, lambda ds: ds.expand_dims("sarCalibrationType")), list, pad_common, curry(xr.combine_by_coords, combine_attrs="drop_conflicts"), ), }, "/azimuthNoiseLevelScaling": { "path": "/azimuthNoiseLevelScaling", "f": compose_left( curry(map, _read_level), list, pad_common, curry(xr.combine_by_coords, combine_attrs="drop_conflicts"), ), }, } decoded = read_xml(mapper, path) converted = valmap(lambda x: execute(**x)(decoded), layout) return converted def read_noise_levels(mapper, root, fnames): fnames = fnames.data.tolist() paths = [posixpath.join(root, name) for name in fnames] poles = [path.removesuffix(".xml").split("_")[1] for path in paths] trees = [read_noise_level_file(mapper, path) for path in paths] merged = merge_with(list, *trees) combined = valmap( compose_left( curry(xr.concat, dim="pole", combine_attrs="no_conflicts"), lambda x: x.assign_coords(pole=poles), ), merged, ) return datatree.DataTree.from_dict(combined) xarray-safe-rcm-2024.02.0/safe_rcm/manifest.py000066400000000000000000000023721456513464500210020ustar00rootroot00000000000000from tlz import filter from tlz.functoolz import compose_left, curry from tlz.itertoolz import concat, get from .product.dicttoolz import query from .xml import read_xml def merge_location(loc): locator = loc["@locator"] href = loc["@href"] return f"{locator}/{href}".lstrip("/") def read_manifest(mapper, path): structure = { "/dataObjectSection/dataObject": compose_left( curry( map, compose_left( curry(get, "byteStream"), curry( map, compose_left( curry(get, "fileLocation"), curry(map, merge_location) ), ), concat, ), ), concat, ), "/metadataSection/metadataObject": compose_left( curry( filter, compose_left(curry(get, "@classification"), lambda x: x == "SYNTAX"), ), curry(map, compose_left(curry(get, "metadataReference"), merge_location)), ), } manifest = read_xml(mapper, path) return list(concat(func(query(path, manifest)) for path, func in structure.items())) xarray-safe-rcm-2024.02.0/safe_rcm/product/000077500000000000000000000000001456513464500202765ustar00rootroot00000000000000xarray-safe-rcm-2024.02.0/safe_rcm/product/dicttoolz.py000066400000000000000000000012551456513464500226660ustar00rootroot00000000000000from tlz.dicttoolz import get_in from tlz.itertoolz import first, groupby def query(path, mapping): if path == "/": return mapping keys = path.lstrip("/").split("/") return get_in(keys, mapping, no_default=True) def itemsplit(predicate, d): groups = groupby(predicate, d.items()) first = dict(groups.get(True, ())) second = dict(groups.get(False, ())) return first, second def valsplit(predicate, d): wrapper = lambda item: predicate(item[1]) return itemsplit(wrapper, d) def keysplit(predicate, d): wrapper = lambda item: predicate(item[0]) return itemsplit(wrapper, d) def first_values(d): return first(d.values()) xarray-safe-rcm-2024.02.0/safe_rcm/product/predicates.py000066400000000000000000000047421456513464500230020ustar00rootroot00000000000000import numpy as np from tlz.functoolz import compose, juxt from tlz.itertoolz import isiterable def disjunction(*predicates): return compose(any, juxt(predicates)) def conjunction(*predicates): return compose(all, juxt(predicates)) def is_scalar(x): return not isiterable(x) or isinstance(x, (str, bytes)) def is_composite_value(obj): if not isinstance(obj, list) or len(obj) not in [1, 2]: return False if any(not isinstance(el, dict) or list(el) != ["@dataStream", "$"] for el in obj): return False data_stream_values = [el["@dataStream"].lower() for el in obj] return data_stream_values in (["real", "imaginary"], ["magnitude"]) def is_complex(obj): return is_composite_value(obj) and len(obj) == 2 def is_magnitude(obj): return is_composite_value(obj) and len(obj) == 1 def is_array(obj): # definition of a array: # - list of scalars # - list of 1d lists # - complex array: # - complex parts # - list of complex values if not isinstance(obj, list): return False if len(obj) == 0: # zero-sized list, not sure what to do here return False elem = obj[0] if is_complex(obj): return not is_scalar(elem["$"]) elif is_scalar(elem): return True elif isinstance(elem, list): if len(elem) == 1 and is_scalar(elem[0]): return True elif is_complex(elem): # array of imaginary values return True elif all(map(is_scalar, elem)): return True return False def is_scalar_variable(obj): if not isinstance(obj, dict): return False if not all(is_scalar(v) for v in obj.values()): return False return all(k == "$" or k.startswith("@") for k in obj) is_scalar_valued = disjunction( is_scalar, lambda x: is_array(x) and len(x) == 1, is_scalar_variable ) def is_nested(obj): """nested means: list of dict, but all dict values are scalar or 1-valued""" if not isinstance(obj, list) or len(obj) == 0: return False elem = obj[0] if not isinstance(elem, dict): return False if all(map(is_scalar_valued, elem.values())): return True return False def is_nested_array(obj): return is_nested(obj) and "$" in obj[0] def is_nested_dataset(obj): return is_nested(obj) and "$" not in obj[0] def is_attr(column): """an attribute is a index if it has multiple unique values""" return np.unique(column).size == 1 xarray-safe-rcm-2024.02.0/safe_rcm/product/reader.py000066400000000000000000000251671456513464500221250ustar00rootroot00000000000000import datatree import xarray as xr from tlz.dicttoolz import keyfilter, merge, merge_with, valfilter, valmap from tlz.functoolz import compose_left, curry, juxt from tlz.itertoolz import first, second from ..xml import read_xml from . import transformers from .dicttoolz import keysplit, query from .predicates import disjunction, is_nested_array, is_scalar_valued from .utils import dictfirst, starcall @curry def attach_path(obj, path): if not hasattr(obj, "encoding"): raise ValueError( "cannot attach source path: `obj` does not have a `encoding` attribute." ) new = obj.copy() new.encoding["xpath"] = path return new @curry def execute(mapping, f, path): subset = query(path, mapping) return compose_left(f, attach_path(path=path))(subset) def read_product(mapper, product_path): decoded = read_xml(mapper, product_path) layout = { "/": { "path": "/", "f": curry(transformers.extract_metadata)(collapse=["securityAttributes"]), }, "/sourceAttributes": { "path": "/sourceAttributes", "f": transformers.extract_metadata, }, "/sourceAttributes/radarParameters": { "path": "/sourceAttributes/radarParameters", "f": transformers.extract_dataset, }, "/sourceAttributes/radarParameters/prfInformation": { "path": "/sourceAttributes/radarParameters/prfInformation", "f": transformers.extract_nested_dataset, }, "/sourceAttributes/orbitAndAttitude/orbitInformation": { "path": "/sourceAttributes/orbitAndAttitude/orbitInformation", "f": compose_left( curry(transformers.extract_dataset)(dims="timeStamp"), lambda ds: ds.assign_coords( {"timeStamp": ds["timeStamp"].astype("datetime64")} ), ), }, "/sourceAttributes/orbitAndAttitude/attitudeInformation": { "path": "/sourceAttributes/orbitAndAttitude/attitudeInformation", "f": compose_left( curry(transformers.extract_dataset)(dims="timeStamp"), lambda ds: ds.assign_coords( {"timeStamp": ds["timeStamp"].astype("datetime64")} ), ), }, "/sourceAttributes/rawDataAttributes": { "path": "/sourceAttributes/rawDataAttributes", "f": compose_left( curry(keysplit, lambda k: k != "rawDataAnalysis"), juxt( compose_left(first, transformers.extract_dataset), compose_left( second, dictfirst, curry(starcall, curry(merge_with, list)), curry( transformers.extract_dataset, dims={"rawDataHistogram": ["stacked", "histogram"]}, default_dims=["stacked"], ), lambda obj: obj.set_index({"stacked": ["pole", "beam"]}), lambda obj: obj.unstack("stacked"), ), ), curry(xr.merge), ), }, "/imageGenerationParameters/generalProcessingInformation": { "path": "/imageGenerationParameters/generalProcessingInformation", "f": transformers.extract_metadata, }, "/imageGenerationParameters/sarProcessingInformation": { "path": "/imageGenerationParameters/sarProcessingInformation", "f": compose_left( curry(keyfilter, lambda k: k not in {"azimuthWindow", "rangeWindow"}), transformers.extract_dataset, ), }, "/imageGenerationParameters/chirps": { "path": "/imageGenerationParameters/chirp", "f": compose_left( lambda el: merge_with(list, *el), curry(keysplit, lambda k: k != "chirpQuality"), juxt( first, compose_left( second, dictfirst, lambda el: merge_with(list, *el), ), ), lambda x: merge(*x), curry( transformers.extract_dataset, dims={ "amplitudeCoefficients": ["stacked", "coefficients"], "phaseCoefficients": ["stacked", "coefficients"], }, default_dims=["stacked"], ), lambda obj: obj.set_index({"stacked": ["pole", "pulse"]}), lambda obj: obj.drop_duplicates("stacked", keep="last"), lambda obj: obj.unstack("stacked"), ), }, "/imageGenerationParameters/slantRangeToGroundRange": { "path": "/imageGenerationParameters/slantRangeToGroundRange", "f": compose_left( lambda el: merge_with(list, *el), curry( transformers.extract_dataset, dims={ "groundToSlantRangeCoefficients": [ "zeroDopplerAzimuthTime", "coefficients", ], }, default_dims=["zeroDopplerAzimuthTime"], ), ), }, "/imageReferenceAttributes": { "path": "/imageReferenceAttributes", "f": compose_left( curry(valfilter)(disjunction(is_scalar_valued, is_nested_array)), transformers.extract_dataset, ), }, "/imageReferenceAttributes/rasterAttributes": { "path": "/imageReferenceAttributes/rasterAttributes", "f": transformers.extract_dataset, }, "/imageReferenceAttributes/geographicInformation/ellipsoidParameters": { "path": "/imageReferenceAttributes/geographicInformation/ellipsoidParameters", "f": curry(transformers.extract_dataset)(dims="params"), }, "/imageReferenceAttributes/geographicInformation/geolocationGrid": { "path": "/imageReferenceAttributes/geographicInformation/geolocationGrid/imageTiePoint", "f": compose_left( curry(transformers.extract_nested_datatree)(dims="tie_points"), lambda tree: xr.merge([node.ds for node in tree.subtree]), lambda ds: ds.set_index(tie_points=["line", "pixel"]), lambda ds: ds.unstack("tie_points"), ), }, "/imageReferenceAttributes/geographicInformation/rationalFunctions": { "path": "/imageReferenceAttributes/geographicInformation/rationalFunctions", "f": curry(transformers.extract_dataset)(dims="coefficients"), }, "/sceneAttributes": { "path": "/sceneAttributes/imageAttributes", "f": compose_left( first, # GRD datasets only have 1 curry(keyfilter)(lambda x: not x.startswith("@")), transformers.extract_dataset, ), }, "/grdBurstMap": { "path": "/grdBurstMap", "f": compose_left( curry( map, compose_left( curry(keysplit, lambda k: k != "burstAttributes"), juxt( first, compose_left( second, dictfirst, curry(starcall, curry(merge_with, list)), ), ), curry(starcall, merge), curry( transformers.extract_dataset, dims=["stacked"], ), lambda obj: obj.set_index({"stacked": ["burst", "beam"]}), lambda obj: obj.unstack("stacked"), ), ), list, curry(xr.concat, dim="burst_maps"), ), }, "/dopplerCentroid": { "path": "/dopplerCentroid", "f": compose_left( curry( map, compose_left( curry(keysplit, lambda k: k != "dopplerCentroidEstimate"), juxt( first, compose_left( second, dictfirst, curry(starcall, curry(merge_with, list)), ), ), curry(starcall, merge), curry( transformers.extract_dataset, dims={ "dopplerCentroidCoefficients": [ "burst", "coefficients", ], }, default_dims=["burst"], ), ), ), list, curry(xr.concat, dim="burst_maps"), ), }, "/dopplerRate": { "path": "/dopplerRate", "f": compose_left( curry( map, compose_left( curry(keysplit, lambda k: k != "dopplerRateEstimate"), juxt( first, compose_left( second, dictfirst, curry(starcall, curry(merge_with, list)), ), ), curry(starcall, merge), curry( transformers.extract_dataset, dims={ "dopplerRateCoefficients": ["burst", "coefficients"], }, default_dims=["burst"], ), ), ), list, curry(xr.concat, dim="burst_maps"), ), }, } converted = valmap( lambda x: execute(**x)(decoded), layout, ) return datatree.DataTree.from_dict(converted) xarray-safe-rcm-2024.02.0/safe_rcm/product/transformers.py000066400000000000000000000156041456513464500234030ustar00rootroot00000000000000import datatree import numpy as np import xarray as xr from tlz.dicttoolz import ( itemfilter, itemmap, keyfilter, keymap, merge_with, valfilter, valmap, ) from tlz.functoolz import compose_left, curry, flip from tlz.itertoolz import concat, first, second from .dicttoolz import first_values, keysplit, valsplit from .predicates import ( is_array, is_attr, is_composite_value, is_nested_array, is_nested_dataset, is_scalar, ) ignore = ("@xmlns", "@xmlns:xsi", "@xsi:schemaLocation") def convert_composite(value): if not is_composite_value(value): raise ValueError(f"not a composite: {value}") converted = {part["@dataStream"].lower(): np.array(part["$"]) for part in value} if list(converted) == ["magnitude"]: return "magnitude", converted["magnitude"] else: return "complex", converted["real"] + 1j * converted["imaginary"] def extract_metadata( mapping, collapse=(), ignore=ignore, ): without_ignores = keyfilter(lambda k: k not in ignore, mapping) # extract the metadata metadata_ = itemfilter( lambda it: it[0].startswith("@") or is_scalar(it[1]), without_ignores, ) metadata = keymap(flip(str.lstrip, "@"), metadata_) # collapse the selected items to_collapse = keyfilter(lambda x: x in collapse, mapping) collapsed = dict(concat(v.items() for v in to_collapse.values())) attrs = metadata | collapsed return xr.Dataset(attrs=attrs) # return dataset to avoid bug in datatree def extract_array(obj, dims): if isinstance(dims, str): dims = [dims] # special case for pulses: if "pulses" in dims and len(obj) == 1 and isinstance(obj[0], str): obj = obj[0].split() elif len(obj) >= 1 and is_composite_value(obj[0]): obj = list(map(compose_left(convert_composite, second), obj)) data = np.array(obj) if data.size > 1: data = np.squeeze(data) return xr.Variable(dims, data) def extract_composite(obj, dims=()): type_, value = convert_composite(obj) if is_scalar(value): dims = () return xr.Variable(dims, value, {"type": type_}) def extract_variable(obj, dims=()): attributes, data = keysplit(lambda k: k.startswith("@"), obj) if list(data) != ["$"]: raise ValueError("not a variable") values = data["$"] if is_scalar(values): dims = () attrs = keymap(lambda k: k.lstrip("@"), attributes) return xr.Variable(dims, values, attrs) def extract_entry(name, obj, dims=None, default_dims=None): if default_dims is None: default_dims = [name] if isinstance(dims, dict): dims = dims.get(name, default_dims) elif dims is None: dims = default_dims if is_array(obj): # dimension coordinate return extract_array(obj, dims=dims) elif is_composite_value(obj): return extract_composite(obj, dims=dims) elif isinstance(obj, dict): return extract_variable(obj, dims=dims) elif is_nested_array(obj): return extract_nested_array(obj, dims=dims).pipe(rename, name) else: raise ValueError(f"unknown datastructure:\n{obj}") def extract_dataset(obj, dims=None, default_dims=None): filtered = keyfilter(lambda x: x not in ignore, obj) attrs, variables = valsplit(is_scalar, filtered) if len(variables) == 1 and is_nested_dataset(first_values(variables)): return extract_nested_dataset(first_values(variables), dims=dims).assign_attrs( attrs ) variables_ = keymap(lambda k: k.lstrip("@"), variables) filtered_variables = valfilter(lambda x: not is_nested_dataset(x), variables_) data_vars = itemmap( lambda item: ( item[0], extract_entry(*item, dims=dims, default_dims=default_dims), ), filtered_variables, ) return xr.Dataset(data_vars=data_vars, attrs=attrs) def extract_nested_variable(obj, dims=None): if is_array(obj): return xr.Variable(dims, obj) columns = merge_with(list, *obj) attributes, data = keysplit(lambda k: k.startswith("@"), columns) renamed = keymap(lambda k: k.lstrip("@"), attributes) attrs = valmap(first, renamed) return xr.Variable(dims, data["$"], attrs) def unstack(obj, dim="stacked"): if dim not in obj.dims: return obj stacked_coords = [name for name, arr in obj.coords.items() if dim in arr.dims] return obj.set_index({dim: stacked_coords}).unstack(dim) def rename(obj, name): renamed = obj.rename(name) if "$" not in obj.dims: return renamed if len(obj.dims) != 1: raise ValueError(f"unexpected number of dimensions: {list(obj.dims)}") return renamed.swap_dims({"$": name}) def to_variable_tuple(name, value, dims): if name in dims: dims_ = [name] else: dims_ = dims return (dims_, value) def extract_nested_array(obj, dims=None): columns = merge_with(list, *obj) attributes, data = keysplit(flip(str.startswith, "@"), columns) renamed = keymap(flip(str.lstrip, "@"), attributes) preprocessed_attrs = valmap(np.squeeze, renamed) attrs_, indexes = valsplit(is_attr, preprocessed_attrs) preprocessed_data = valmap(np.squeeze, data) originally_stacked = isinstance(dims, (tuple, list)) and "stacked" in dims if len(indexes) == 1: dims = list(indexes) elif len(indexes) >= 2: dims = ["stacked"] elif dims is None: dims = ["$"] coords = itemmap( lambda it: (it[0], to_variable_tuple(*it, dims=dims)), indexes, ) arr = xr.DataArray( data=preprocessed_data["$"], attrs=valmap(first, attrs_), dims=dims, coords=coords, ) if originally_stacked: return arr return arr.pipe(unstack, dim="stacked") def extract_nested_dataset(obj, dims=None): if not isinstance(obj, list): raise ValueError(f"unknown type: {type(obj)}") columns = merge_with(list, *obj) attributes, data = keysplit(flip(str.startswith, "@"), columns) renamed = keymap(flip(str.lstrip, "@"), attributes) preprocessed = valmap(np.squeeze, renamed) attrs_, indexes = valsplit(is_attr, preprocessed) attrs = valmap(first, attrs_) if dims is None: if len(indexes) <= 1: dims = list(indexes) else: dims = ["stacked"] data_vars = valmap(curry(extract_nested_variable)(dims=dims), data) coords = itemmap( lambda it: (it[0], to_variable_tuple(*it, dims=dims)), indexes, ) return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs).pipe( unstack, dim="stacked" ) def extract_nested_datatree(obj, dims=None): if not isinstance(obj, list): raise ValueError(f"unknown type: {type(obj)}") datasets = merge_with(list, *obj) tree = valmap(curry(extract_nested_dataset)(dims=dims), datasets) return datatree.DataTree.from_dict(tree) xarray-safe-rcm-2024.02.0/safe_rcm/product/utils.py000066400000000000000000000016551456513464500220170ustar00rootroot00000000000000from tlz.functoolz import flip, pipe from tlz.itertoolz import first, groupby def split_marked(mapping, marker="@"): groups = groupby(lambda item: item[0].startswith(marker), mapping.items()) attrs = {key.lstrip(marker): value for key, value in groups.get(True, {})} data = {key: value for key, value in groups.get(False, {})} return attrs, data def strip_namespaces(name, namespaces): """remove the given namespaces from a name Parameters ---------- name : str The string to trim namespaces : sequence of str The list of namespaces. Returns ------- trimmed : str The string without prefix and without leading colon. """ funcs = [flip(str.removeprefix, ns) for ns in namespaces] return pipe(name, *funcs).lstrip(":") def starcall(func, args, **kwargs): return func(*args, **kwargs) def dictfirst(mapping): return first(mapping.values()) xarray-safe-rcm-2024.02.0/safe_rcm/tests/000077500000000000000000000000001456513464500177605ustar00rootroot00000000000000xarray-safe-rcm-2024.02.0/safe_rcm/tests/test_product_utils.py000066400000000000000000000033361456513464500242760ustar00rootroot00000000000000import functools import string import hypothesis.strategies as st from hypothesis import given from safe_rcm.product import utils def shared(*, key): def outer(func): @functools.wraps(func) def inner(*args, **kwargs): result = func(*args, **kwargs) return st.shared(result, key=key) return inner return outer markers = st.characters() marker = st.shared(markers, key="marker") def marked_mapping(marker): values = st.just(None) unmarked_keys = st.text() marked_keys = st.builds(lambda k, m: m + k, unmarked_keys, marker) keys = st.one_of(unmarked_keys, marked_keys) return st.dictionaries(keys, values) @given(marked_mapping(marker), marker) def test_split_marked(mapping, marker): marked, unmarked = utils.split_marked(mapping, marker=marker) assert list(unmarked) == [key for key in mapping if not key.startswith(marker)] @shared(key="namespaces") def namespaces(): values = st.just(None) keys = st.text(string.ascii_letters, min_size=1, max_size=4) return st.dictionaries(keys, values) @st.composite def draw_from(draw, elements): elements = draw(elements) if not elements: return "" return draw(st.sampled_from(elements)) def prefixed_names(namespaces): def builder(base, prefix): return f"{prefix}:{base}" if prefix != "" else base bases = st.text(string.ascii_letters, min_size=1) all_prefixes = namespaces.map(list) prefixes = draw_from(all_prefixes) return st.builds(builder, bases, prefixes) @given(prefixed_names(namespaces()), namespaces()) def test_strip_namespaces(name, namespaces): stripped = utils.strip_namespaces(name, namespaces) assert ":" not in stripped xarray-safe-rcm-2024.02.0/safe_rcm/xml.py000066400000000000000000000041561456513464500177760ustar00rootroot00000000000000import io import posixpath import re from collections import deque import xmlschema from lxml import etree from tlz.dicttoolz import keymap include_re = re.compile(r'\s*') def remove_includes(text): return io.StringIO(include_re.sub("", text)) def extract_includes(text): return include_re.findall(text) def normalize(root, path): if posixpath.isabs(path) or posixpath.dirname(path): return path return posixpath.join(root, path) def schema_paths(mapper, root_schema): unvisited = deque([root_schema]) visited = [] while unvisited: path = unvisited.popleft() visited.append(path) text = mapper[path].decode() includes = extract_includes(text) current_root = posixpath.dirname(path) normalized = [normalize(current_root, p) for p in includes] unvisited.extend([p for p in normalized if p not in visited]) return visited def open_schema(mapper, schema): """fsspec-compatible way to open remote schema files Parameters ---------- fs : fsspec.filesystem pre-instantiated fsspec filesystem instance root : str URL of the root directory of the schema files name : str File name of the schema to open. glob : str, default: "*.xsd" The glob used to find other schema files Returns ------- xmlschema.XMLSchema The opened schema object """ paths = schema_paths(mapper, schema) preprocessed = [remove_includes(mapper[p].decode()) for p in paths] return xmlschema.XMLSchema(preprocessed) def read_xml(mapper, path): raw_data = mapper[path] tree = etree.fromstring(raw_data) namespaces = keymap(lambda x: x if x is not None else "rcm", tree.nsmap) schema_location = tree.xpath("./@xsi:schemaLocation", namespaces=namespaces)[0] _, schema_path_ = schema_location.split(" ") schema_path = posixpath.normpath( posixpath.join(posixpath.dirname(path), schema_path_) ) schema = open_schema(mapper, schema_path) decoded = schema.decode(tree) return decoded