pax_global_header00006660000000000000000000000064147220740730014520gustar00rootroot0000000000000052 comment=1c120d89a2cdf1d3853bd9d9ff47ace4d1914354 xarray-safe-s1-2024.11.28/000077500000000000000000000000001472207407300146645ustar00rootroot00000000000000xarray-safe-s1-2024.11.28/.flake8000066400000000000000000000005351472207407300160420ustar00rootroot00000000000000[flake8] ignore = # E203: whitespace before ':' - doesn't work well with black # E402: module level import not at top of file # E501: line too long - let black worry about that # E731: do not assign a lambda expression, use a def # W503: line break before binary operator E203,E402,E501,E731,W503 exclude= .eggs docs xarray-safe-s1-2024.11.28/.github/000077500000000000000000000000001472207407300162245ustar00rootroot00000000000000xarray-safe-s1-2024.11.28/.github/dependabot.yml000066400000000000000000000001651472207407300210560ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly"xarray-safe-s1-2024.11.28/.github/workflows/000077500000000000000000000000001472207407300202615ustar00rootroot00000000000000xarray-safe-s1-2024.11.28/.github/workflows/ci.yaml000066400000000000000000000043361472207407300215460ustar00rootroot00000000000000name: CI on: push: branches: [main] pull_request: branches: [main] workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: detect-skip-ci-trigger: name: "Detect CI Trigger: [skip-ci]" if: | github.repository == 'umr-lops/xarray-safe-s1' && ( github.event_name == 'push' || github.event_name == 'pull_request' ) runs-on: ubuntu-latest outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - uses: actions/checkout@v4 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1 id: detect-trigger with: keyword: "[skip-ci]" ci: name: ${{ matrix.os }} py${{ matrix.python-version }} runs-on: ${{ matrix.os }} needs: detect-skip-ci-trigger if: needs.detect-skip-ci-trigger.outputs.triggered == 'false' defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: python-version: ["3.10", "3.11", "3.12"] os: ["ubuntu-latest", "macos-latest", "windows-latest"] steps: - name: Checkout the repository uses: actions/checkout@v4 with: # need to fetch all tags to get a correct version fetch-depth: 0 # fetch all branches and tags - name: Setup environment variables run: | echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV echo "CONDA_ENV_FILE=ci/requirements/environment.yaml" >> $GITHUB_ENV - name: Setup micromamba uses: mamba-org/setup-micromamba@v2 with: environment-file: ${{ env.CONDA_ENV_FILE }} environment-name: xarray-safe-s1-tests cache-environment: true cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{matrix.python-version}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" create-args: >- python=${{matrix.python-version}} - name: Install xarray-safe-s1 run: | python -m pip install --no-deps -e . - name: Import xarray-safe-s1 run: | python -c "import safe_s1" - name: Run tests run: | python -m pytest --cov=safe_s1 xarray-safe-s1-2024.11.28/.github/workflows/publish.yml000066400000000000000000000016341472207407300224560ustar00rootroot00000000000000name: Upload package to PyPI on: release: types: [created] jobs: publish: name: Publish to PyPI runs-on: ubuntu-latest permissions: contents: "read" id-token: "write" steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.x" - name: Install dependencies run: | python -m pip install --upgrade pip build twine - name: Build run: | python -m build --sdist --wheel . - name: Check the built archives run: | twine check dist/* pip install dist/*.whl - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@15c56dba361d8335944d31a2ecd17d700fc7bcbc with: password: ${{ secrets.pypi_token }} repository_url: https://upload.pypi.org/legacy/ verify_metadata: true xarray-safe-s1-2024.11.28/.github/workflows/upstream-dev.yaml000066400000000000000000000050071472207407300235630ustar00rootroot00000000000000name: upstream-dev CI on: push: branches: [main] pull_request: branches: [main] schedule: - cron: "0 18 * * 0" # Weekly "On Sundays at 18:00" UTC workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: detect-test-upstream-trigger: name: "Detect CI Trigger: [test-upstream]" if: github.event_name == 'push' || github.event_name == 'pull_request' runs-on: ubuntu-latest outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - uses: actions/checkout@v4 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1.2 id: detect-trigger with: keyword: "[test-upstream]" upstream-dev: name: upstream-dev runs-on: ubuntu-latest needs: detect-test-upstream-trigger if: | always() && github.repository == 'umr-lops/xarray-safe-s1' && ( github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || needs.detect-test-upstream-trigger.outputs.triggered == 'true' || contains(github.event.pull_request.labels.*.name, 'run-upstream') ) defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: python-version: ["3.12"] steps: - name: checkout the repository uses: actions/checkout@v4 with: # need to fetch all tags to get a correct version fetch-depth: 0 # fetch all branches and tags - name: set up conda environment uses: mamba-org/setup-micromamba@v1 with: environment-file: ci/requirements/environment.yaml environment-name: tests create-args: >- python=${{ matrix.python-version }} pytest-reportlog - name: install upstream-dev dependencies run: bash ci/install-upstream-dev.sh - name: install the package run: python -m pip install --no-deps -e . - name: show versions run: python -m pip list - name: import run: | python -c 'import safe_s1' - name: run tests if: success() id: status run: | python -m pytest -rf --report-log=pytest-log.jsonl - name: report failures if: | failure() && steps.tests.outcome == 'failure' && github.event_name == 'schedule' uses: xarray-contrib/issue-from-pytest-log@v1 with: log-path: pytest-log.jsonl xarray-safe-s1-2024.11.28/.gitignore000066400000000000000000000003501472207407300166520ustar00rootroot00000000000000# editor files *~ \#*\# # python bytecode *.py[co] __pycache__/ # install artifacts /build /dist /*.egg-info # tools .ipynb_checkpoints/ .hypothesis/ .pytest_cache .coverage .coverage.* .cache /docs/_build/ localconfig.yml .idea xarray-safe-s1-2024.11.28/.pre-commit-config.yaml000066400000000000000000000016761472207407300211570ustar00rootroot00000000000000ci: autoupdate_schedule: weekly # https://pre-commit.com/ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-docstring-first - id: check-yaml - id: check-toml - repo: https://github.com/pycqa/isort rev: 5.12.0 hooks: - id: isort - repo: https://github.com/psf/black rev: 23.3.0 hooks: - id: black - id: black-jupyter - repo: https://github.com/keewis/blackdoc rev: v0.3.8 hooks: - id: blackdoc - repo: https://github.com/pycqa/flake8 rev: 6.0.0 hooks: - id: flake8 - repo: https://github.com/kynan/nbstripout rev: 0.6.1 hooks: - id: nbstripout args: [--extra-keys=metadata.kernelspec metadata.language_info.version] - repo: https://github.com/pre-commit/mirrors-prettier rev: v3.0.0-alpha.6 hooks: - id: prettier xarray-safe-s1-2024.11.28/.readthedocs.yml000066400000000000000000000006421472207407300177540ustar00rootroot00000000000000version: 2 build: os: ubuntu-22.04 tools: python: mambaforge-4.10 jobs: post_checkout: - (git --no-pager log --pretty="tformat:%s" -1 | grep -vqF "[skip-rtd]") || exit 183 pre_install: - git update-index --assume-unchanged docs/conf.py ci/requirements/docs.yaml conda: environment: ci/requirements/docs.yaml sphinx: fail_on_warning: true configuration: docs/conf.py formats: [] xarray-safe-s1-2024.11.28/LICENSE000066400000000000000000000020741472207407300156740ustar00rootroot00000000000000MIT License Copyright (c) 2023, xarray-safe-rcm developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. xarray-safe-s1-2024.11.28/README.md000066400000000000000000000171061472207407300161500ustar00rootroot00000000000000 [![Documentation Status](https://readthedocs.org/projects/xarray-safe-s1/badge/?version=latest)](https://xarray-safe-s1.readthedocs.io/en/latest/?badge=latest) # xarray-safe-s1 Xarray Sentinel1 python SAFE files reader # Install ## Conda 1) Install xarray-safe-s1 ``` conda create -n safe_s1 conda activate safe_s1 conda install -c conda-forge xarray-safe-s1 ``` ## Pypi 1) Install xarray-safe-s1 ``` conda create -n safe_s1 conda activate safe_s1 pip install xarray-safe-s1 ``` ```pycon >>> from safe_s1 import Sentinel1Reader, sentinel1_xml_mappings >>> filename = sentinel1_xml_mappings.get_test_file('S1A_IW_GRDH_1SDV_20170907T103020_20170907T103045_018268_01EB76_Z010.SAFE') >>> Sentinel1Reader(filename).datatree DataTree('None', parent=None) ├── DataTree('geolocationGrid') │ Dimensions: (line: 10, sample: 21) │ Coordinates: │ * line (line) int64 0 2014 4028 6042 ... 12084 14098 16112 16777 │ * sample (sample) int64 0 1260 2520 3780 ... 21420 22680 23940 25186 │ Data variables: │ longitude (line, sample) float64 -67.84 -67.96 -68.08 ... -70.4 -70.51 │ latitude (line, sample) float64 20.73 20.75 20.77 ... 19.62 19.64 │ height (line, sample) float64 8.405e-05 8.058e-05 ... 3.478e-05 │ azimuthTime (line, sample) datetime64[ns] 2017-09-07T10:30:20.936147 ... │ slantRangeTime (line, sample) float64 0.005331 0.005375 ... 0.006382 │ incidenceAngle (line, sample) float64 30.82 31.7 32.57 ... 44.71 45.36 46.0 │ elevationAngle (line, sample) float64 27.5 28.27 29.02 ... 39.89 40.41 │ Attributes: │ history: longitude:\n annotation/s1a.xml:\n - /product/geolocationGrid... ├── DataTree('orbit') │ Dimensions: (time: 17) │ Coordinates: │ * time (time) datetime64[ns] 2017-09-07T10:29:14.474905 ... 2017-09-... │ Data variables: │ velocity_x (time) float64 -116.7 -154.1 -191.4 ... -628.1 -663.4 -698.6 │ velocity_y (time) float64 -3.433e+03 -3.368e+03 ... -2.413e+03 -2.342e+03 │ velocity_z (time) float64 -6.776e+03 -6.808e+03 ... -7.174e+03 -7.194e+03 │ position_x (time) float64 2.892e+06 2.89e+06 ... 2.833e+06 2.826e+06 │ position_y (time) float64 -5.782e+06 -5.816e+06 ... -6.222e+06 -6.246e+06 │ position_z (time) float64 2.869e+06 2.801e+06 ... 1.82e+06 1.748e+06 │ Attributes: │ orbit_pass: Descending │ platform_heading: -167.7668824808032 │ frame: Earth Fixed │ history: orbit:\n annotation/s1a.xml:\n - //product/generalAn... ├── DataTree('image') │ Dimensions: (dim_0: 2) │ Dimensions without coordinates: dim_0 │ Data variables: (12/14) │ LineUtcTime (dim_0) datetime64[ns] 2017-09-07T10:30:20.93640... │ numberOfLines int64 16778 │ numberOfSamples int64 25187 │ azimuthPixelSpacing float64 10.0 │ slantRangePixelSpacing float64 10.0 │ groundRangePixelSpacing float64 10.0 │ ... ... │ slantRangeTime float64 0.005331 │ swath_subswath =4 - sphinx-book-theme - ipython - myst-parser - nbsphinx - jupyter_sphinx - aiohttp - pip - pip: - -e ../..xarray-safe-s1-2024.11.28/ci/requirements/environment.yaml000066400000000000000000000010051472207407300232260ustar00rootroot00000000000000name: xarray-safe-s1-tests channels: - conda-forge dependencies: - python # development - ipython - pre-commit - jupyterlab - jupyterlab_code_formatter - isort - black - dask-labextension # testing - pytest - pytest-reportlog - pytest-cov - hypothesis - coverage # I/O - rioxarray - h5netcdf - zarr - scipy - rasterio # data - xarray - xarray-datatree - dask - numpy - pandas - shapely - geopandas - affine - pyproj # processing - lxml - jmespath xarray-safe-s1-2024.11.28/docs/000077500000000000000000000000001472207407300156145ustar00rootroot00000000000000xarray-safe-s1-2024.11.28/docs/Makefile000066400000000000000000000011721472207407300172550ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) xarray-safe-s1-2024.11.28/docs/_static/000077500000000000000000000000001472207407300172425ustar00rootroot00000000000000xarray-safe-s1-2024.11.28/docs/_static/css/000077500000000000000000000000001472207407300200325ustar00rootroot00000000000000xarray-safe-s1-2024.11.28/docs/_static/css/xsar.css000066400000000000000000000001761472207407300215250ustar00rootroot00000000000000@import url("theme.css"); .wy-nav-content { max-width: 1000px !important; } dl.py.property { display: block !important; } xarray-safe-s1-2024.11.28/docs/api.rst000066400000000000000000000002211472207407300171120ustar00rootroot00000000000000############# API reference ############# .. automodule:: safe_s1 :members: metadata .. autoclass:: Sentinel1Reader :members: xarray-safe-s1-2024.11.28/docs/conf.py000066400000000000000000000064351472207407300171230ustar00rootroot00000000000000# -- Project information ----------------------------------------------------- import datetime as dt project = "xarray-safe-s1" author = f"{project} developers" initial_year = "2023" year = dt.datetime.now().year copyright = f"{initial_year}-{year}, {author}" # The root toctree document. root_doc = "index" # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autosummary", "sphinx.ext.autodoc", "myst_parser", "sphinx.ext.extlinks", "sphinx.ext.intersphinx", "IPython.sphinxext.ipython_directive", "IPython.sphinxext.ipython_console_highlighting", "nbsphinx", "jupyter_sphinx", "sphinx.ext.napoleon", ] extlinks = { "issue": ("https://github.com/umr-lops/xarray-safe-s1/issues/%s", "GH%s"), "pull": ("https://github.com/umr-lops/xarray-safe-s1/pull/%s", "PR%s"), } # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] html_static_path = ["_static"] html_style = "css/xsar.css" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "directory"] # nitpicky mode: complain if references could not be found nitpicky = True # Napoleon settings napoleon_google_docstring = False napoleon_numpy_docstring = True napoleon_include_init_with_doc = False napoleon_include_private_with_doc = False napoleon_include_special_with_doc = True napoleon_use_admonition_for_examples = False napoleon_use_admonition_for_notes = False napoleon_use_admonition_for_references = False napoleon_use_ivar = False napoleon_use_param = True napoleon_use_rtype = True napoleon_type_aliases = None # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = "sphinx_book_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ["_static"] # -- Options for the intersphinx extension ----------------------------------- intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), "numpy": ("https://numpy.org/doc/stable", None), "dask": ("https://docs.dask.org/en/latest", None), "xarray": ("https://docs.xarray.dev/en/latest/", None), "rasterio": ("https://rasterio.readthedocs.io/en/latest/", None), "datatree": ("https://xarray-datatree.readthedocs.io/en/latest/", None), } html_theme_options = { "navigation_depth": 4, # FIXME: doesn't work as expeted: should expand side menu "collapse_navigation": False, # FIXME: same as above } # If true, links to the reST sources are added to the pages. html_show_sourcelink = False nbsphinx_allow_errors = False nbsphinx_execute = "always" nbsphinx_timeout = 300 today_fmt = "%b %d %Y at %H:%M" xarray-safe-s1-2024.11.28/docs/examples/000077500000000000000000000000001472207407300174325ustar00rootroot00000000000000xarray-safe-s1-2024.11.28/docs/examples/simple_tutorial.ipynb000066400000000000000000000243471472207407300237230ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "6481cafa-2f86-43f1-a6be-4b6a29fffdd9", "metadata": { "tags": [] }, "source": [ "# Tutorial for xarray-safe-s1\n", "xarray-safe-s1 is a xarray reader for Sentinel-1 SAFE files" ] }, { "cell_type": "code", "execution_count": null, "id": "71f12140-2f6f-449d-991f-38f3772a58d8", "metadata": {}, "outputs": [], "source": [ "from safe_s1 import Sentinel1Reader, sentinel1_xml_mappings" ] }, { "cell_type": "markdown", "id": "11b6ce6d-87cd-49ab-a222-40fc3bacb573", "metadata": {}, "source": [ "## Get a product path\n", "Here, we get the product path with xsar library, but this is optionnal" ] }, { "cell_type": "code", "execution_count": null, "id": "44824fc1-d3c3-4ec8-9d60-96900fc7d980", "metadata": {}, "outputs": [], "source": [ "# get test file. You can replace with an path to other SAFE\n", "filename = sentinel1_xml_mappings.get_test_file('S1A_IW_GRDH_1SDV_20170907T103020_20170907T103045_018268_01EB76_Z010.SAFE')\n", "filename" ] }, { "cell_type": "markdown", "id": "550f4b52-5efd-4567-b9cd-f6e88c140379", "metadata": { "tags": [] }, "source": [ "## Open a product\n", "We apply Sentinel1Reader to open a Sentinel-1 product" ] }, { "cell_type": "code", "execution_count": null, "id": "3457cbc8-d952-4524-be75-a33a693276a7", "metadata": {}, "outputs": [], "source": [ "reader = Sentinel1Reader(name=filename)\n", "reader" ] }, { "cell_type": "markdown", "id": "37779ce8-b845-4647-96a1-bfb9e61d03a2", "metadata": {}, "source": [ "Some properties permit to have informations about the product :" ] }, { "cell_type": "code", "execution_count": null, "id": "dac3189b-358f-445a-abdd-726825e287dc", "metadata": {}, "outputs": [], "source": [ "# Type of product \n", "reader.product" ] }, { "cell_type": "code", "execution_count": null, "id": "5ae24650-3c2b-45e0-ae29-7c180e447aeb", "metadata": {}, "outputs": [], "source": [ "# Mode\n", "reader.dsid" ] }, { "cell_type": "code", "execution_count": null, "id": "727887e2-ea70-4a67-b5cf-89ddd9f4982d", "metadata": {}, "outputs": [], "source": [ "# Pixel line spacing (unit : meters)\n", "reader.pixel_line_m" ] }, { "cell_type": "code", "execution_count": null, "id": "03e607cf-a72d-43d7-9cfc-fd98acdc6901", "metadata": {}, "outputs": [], "source": [ "# Pixel sample spacing (unit : meters)\n", "reader.pixel_sample_m" ] }, { "cell_type": "markdown", "id": "5a4dc99a-3df2-4d97-8a0c-0d937de3f1a3", "metadata": {}, "source": [ "## Explore different files available" ] }, { "cell_type": "markdown", "id": "385d740a-e897-4245-80d6-a79973700b4d", "metadata": {}, "source": [ "In the reader object representation, we can see if the product is a multidataset or not. We can also access this information with the property [multidataset](../api.rst#safe_s1.Sentinel1Reader.multidataset) :" ] }, { "cell_type": "code", "execution_count": null, "id": "a5e9535c-73ec-4b24-977f-41924a0ec3b9", "metadata": {}, "outputs": [], "source": [ "reader.multidataset" ] }, { "cell_type": "markdown", "id": "a5a58746-1e0a-46ca-afde-04d0d7da373d", "metadata": {}, "source": [ "### Single dataset product" ] }, { "cell_type": "markdown", "id": "425f9725-7a37-4b11-98c5-a7c72e15fe62", "metadata": {}, "source": [ "Above, we can see that the product isn't a multidataset, so we can access files like this :" ] }, { "cell_type": "code", "execution_count": null, "id": "ad9729cd-5f70-4d28-80a3-f9081a68264c", "metadata": {}, "outputs": [], "source": [ "reader.files" ] }, { "cell_type": "markdown", "id": "03e9c002-6864-4426-a31e-64aecdf4acb3", "metadata": {}, "source": [ "### Multidataset product" ] }, { "cell_type": "markdown", "id": "3a8509b0-9ba8-4777-b099-24bdb3978496", "metadata": {}, "source": [ "If the product is a multidataset, you must use the following property to see the safe files :" ] }, { "cell_type": "code", "execution_count": null, "id": "f1bc7fc6-7d44-451a-83b0-17c1c4d949c1", "metadata": {}, "outputs": [], "source": [ "reader.safe_files" ] }, { "cell_type": "markdown", "id": "9954b236-edee-4174-9054-bf0dcdd0dc3f", "metadata": {}, "source": [ "Or you can access the files of a subdataset applying the reader to a subdataset :" ] }, { "cell_type": "code", "execution_count": null, "id": "206b0bc0-47cf-4bdd-9318-0d182afb6e69", "metadata": {}, "outputs": [], "source": [ "# Access available datasets\n", "datasets = reader.datasets_names\n", "datasets" ] }, { "cell_type": "code", "execution_count": null, "id": "3e768c03-597e-4319-ab72-d7441f40a01f", "metadata": {}, "outputs": [], "source": [ "# Instanciate a reader for a subdataset\n", "reader = Sentinel1Reader(datasets[0])" ] }, { "cell_type": "code", "execution_count": null, "id": "b4b58d0a-fbd1-45bd-a512-e7c2303b91ce", "metadata": {}, "outputs": [], "source": [ "# Access the files of the subdataset\n", "reader.files" ] }, { "cell_type": "markdown", "id": "159a9ddf-90cd-4703-a0dc-f726cf477480", "metadata": {}, "source": [ "## Access the useful data\n", "It is expressed as a datatree" ] }, { "cell_type": "code", "execution_count": null, "id": "b891e4ee-9ae8-462d-b315-0020554f85ff", "metadata": {}, "outputs": [], "source": [ "data = reader.datatree\n", "data" ] }, { "cell_type": "markdown", "id": "fd5c3484-469d-47fa-98bf-f354380d3519", "metadata": {}, "source": [ "In attributes (history) we can retrieve the files and xpaths used to get concerned data" ] }, { "cell_type": "markdown", "id": "f8c680a3-3854-48c0-9bff-73395afb22c4", "metadata": {}, "source": [ "## Load digital numbers" ] }, { "cell_type": "markdown", "id": "d3575fd8-96f5-4052-82ac-33d79adb06ff", "metadata": {}, "source": [ "A function to load digital numbers with a specific resolution is also included in the reader. The function used is [load_digital_number](../api.rst#safe_s1.Sentinel1Reader.load_digital_number)" ] }, { "cell_type": "markdown", "id": "7e165171-de5f-4b7c-bd7d-61ad486ca1b4", "metadata": {}, "source": [ "Note : This function returns a tuple: resolution and digital numbers" ] }, { "cell_type": "code", "execution_count": null, "id": "9b73f78f-a01b-413d-9005-3c50af46ae90", "metadata": {}, "outputs": [], "source": [ "import rasterio" ] }, { "cell_type": "code", "execution_count": null, "id": "07b2b114-b5fa-421e-b136-a6254c4b47e5", "metadata": {}, "outputs": [], "source": [ "# parameters\n", "resampling = rasterio.enums.Resampling.rms\n", "chunks = {'line': 5000, 'sample': 5000}\n", "resolution = '1000m'" ] }, { "cell_type": "code", "execution_count": null, "id": "10afe1bd-b364-44dc-bb55-7768a6d3663c", "metadata": {}, "outputs": [], "source": [ "dn = reader.load_digital_number(resolution=resolution, resampling=resampling, chunks=chunks)\n", "dn" ] }, { "cell_type": "markdown", "id": "daed8449-c296-4b76-8934-19c297430a67", "metadata": { "tags": [] }, "source": [ "## Some important functions used to build the datatree" ] }, { "cell_type": "markdown", "id": "4f95747f-619c-4a4f-9d8b-2984b5be9b31", "metadata": {}, "source": [ "### Calibration luts" ] }, { "cell_type": "code", "execution_count": null, "id": "314fd977-cf46-4cdd-80df-344870b5563c", "metadata": {}, "outputs": [], "source": [ "reader.get_calibration_luts" ] }, { "cell_type": "markdown", "id": "b52cd828-158b-4293-8d04-63611cc01c41", "metadata": {}, "source": [ "### Noise range luts" ] }, { "cell_type": "code", "execution_count": null, "id": "bb463ce3-ae0d-4ed0-b162-1e7a4feaf29f", "metadata": {}, "outputs": [], "source": [ "reader.get_noise_range_raw" ] }, { "cell_type": "markdown", "id": "8ef6f480-9890-43f7-8c53-d6dcd6c60602", "metadata": {}, "source": [ "### Noise azimuth luts" ] }, { "cell_type": "code", "execution_count": null, "id": "a218824e-662d-40bf-b2ef-b288eca7dee6", "metadata": {}, "outputs": [], "source": [ "reader.get_noise_azi_raw" ] }, { "cell_type": "markdown", "id": "ba6fcfbb-1866-4bdc-9bcd-23902b4086ff", "metadata": { "tags": [] }, "source": [ "### Geolocation grid" ] }, { "cell_type": "markdown", "id": "a8e4165f-19c8-4089-a947-81583487a115", "metadata": {}, "source": [ "The function used is [geoloc](../api.rst#safe_s1.Sentinel1Reader.geoloc), but it has a safety not to retrieve the data when it is already loaded. For this example, the data is already loaded so we must manually unlock the safety not to have a None result :" ] }, { "cell_type": "code", "execution_count": null, "id": "fcc6c58f-9e92-462e-bd1c-2a2488724331", "metadata": {}, "outputs": [], "source": [ "# Unlock the safety\n", "reader._dict['geolocationGrid'] = None\n", "# Load the geolocation grid dataset\n", "reader.geoloc" ] }, { "cell_type": "markdown", "id": "24f90a89-1d2b-4017-89d7-35ba9b9e2c5a", "metadata": {}, "source": [ "### Orbit information" ] }, { "cell_type": "code", "execution_count": null, "id": "e2c68dea-3fa9-4ca8-996b-d59af36827d6", "metadata": {}, "outputs": [], "source": [ "reader.orbit" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 } xarray-safe-s1-2024.11.28/docs/index.rst000066400000000000000000000043651472207407300174650ustar00rootroot00000000000000####################################################### xarray-safe-s1: xarray SAFE files reader for Sentinel-1 ####################################################### **safe_s1** is a SAR file reader Documentation ------------- Overview ........ **safe_s1** rely on `xarray.open_rasterio` and `rasterio` to read *digital_number* from SAFE product to return an xarray.datatree. Luts are decoded from xml files following `ESA Sentinel-1 Product Specification`_. `safe_s1.metadata.Sentinel1reader` is the main class and contains a xarray.datatree with the useful data. In the following example, you will find some additional functions and properties that can be useful. Examples ........ .. note:: Those examples use `sentinel1_xml_mappings.get_test_file` to automatically download test data from https://cyclobs.ifremer.fr/static/sarwing_datarmor/xsardata/ Those file are not official ones: they are resampled to a lower resolution and compressed to avoid big network transfert and disk usage. Don't use them for real science ! * :doc:`examples/simple_tutorial` Reference ......... * :doc:`api` Get in touch ------------ - Report bugs, suggest features or view the source code `on github`_. ---------------------------------------------- Last documentation build: |today| .. toctree:: :maxdepth: 1 :hidden: :caption: Getting Started installing .. toctree:: :maxdepth: 1 :caption: Examples examples/simple_tutorial .. toctree:: :maxdepth: 1 :hidden: :caption: Reference api .. _on github: https://github.com/umr-lops/xarray-safe-s1 .. _xarray: http://xarray.pydata.org .. _dask: http://dask.org .. _rasterio: https://rasterio.readthedocs.io/en/latest/ .. _xarray.open_rasterio: http://xarray.pydata.org/en/stable/generated/xarray.open_rasterio.html .. _ESA Sentinel-1 Product Specification: https://earth.esa.int/documents/247904/1877131/Sentinel-1-Product-Specification .. _xarray.Dataset: http://xarray.pydata.org/en/stable/generated/xarray.Dataset.html .. _`recommended installation`: installing.rst#recommended-packages .. _SAFE format: https://sentinel.esa.int/web/sentinel/user-guides/sentinel-1-sar/data-formats .. _jupyter notebook: https://jupyter.readthedocs.io/en/latest/running.html#running xarray-safe-s1-2024.11.28/docs/installing.rst000066400000000000000000000020241472207407300205100ustar00rootroot00000000000000************ Installation ************ conda install ############# Install xarray-safe-s1 .. code-block:: conda create -n safe_s1 conda activate safe_s1 conda install -c conda-forge xarray-safe-s1 pip install ########### Install xarray-safe-s1 .. code-block:: conda create -n safe_s1 conda activate safe_s1 pip install git+https://github.com/umr-lops/xarray-safe-s1.git Developement installation .......................... .. code-block:: git clone https://github.com/umr-lops/xarray-safe-s1 cd xsar # this is needed to register git filters git config --local include.path ../.gitconfig pip install -e . pip install -r requirements.txt Pytest configuration .................... Pytest uses a default configuration file (`config.yml`) in which we can found products paths to test. This configuration can be superseded by adding a local config file on the home directory : (`~/xarray-safe-s1/localconfig.yml`). In this file, testing files can be listed in the var `product_paths`. xarray-safe-s1-2024.11.28/docs/make.bat000066400000000000000000000014401472207407300172200ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.https://www.sphinx-doc.org/ exit /b 1 ) if "%1" == "" goto help %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd xarray-safe-s1-2024.11.28/docs/requirements.txt000066400000000000000000000000441472207407300210760ustar00rootroot00000000000000sphinx>=4 sphinx-book-theme ipython xarray-safe-s1-2024.11.28/highleveltests/000077500000000000000000000000001472207407300177165ustar00rootroot00000000000000xarray-safe-s1-2024.11.28/highleveltests/open_GRD_IW.py000066400000000000000000000012261472207407300223250ustar00rootroot00000000000000import pdb from safe_s1 import Sentinel1Reader, getconfig import time import logging logging.basicConfig(level=logging.DEBUG) logging.debug('start GRD test') conf = getconfig.get_config() subswath = conf['product_paths'][0] print(subswath) t0 = time.time() if 'GRD' in subswath: sub_reader = Sentinel1Reader(subswath) else: sub_reader = Sentinel1Reader('SENTINEL1_DS:'+subswath+':IW3') elapse_t = time.time()-t0 dt = sub_reader.datatree print('out of the reader') print(dt) print('time to read the SAFE through nfs: %1.2f sec'%elapse_t) DN = sub_reader.load_digital_number(chunks={'pol':'VV','line':6000,'sample':8000}) print('DN',DN) # pdb.set_trace() xarray-safe-s1-2024.11.28/highleveltests/open_SLC_IW.py000066400000000000000000000005671472207407300223410ustar00rootroot00000000000000import pdb from safe_s1 import Sentinel1Reader, getconfig import time conf = getconfig.get_config() subswath = conf['nfs_iw_grd_path'] print(subswath) t0 = time.time() sub_reader = Sentinel1Reader(subswath) elapse_t = time.time()-t0 dt = sub_reader.datatree print('out of the reader') print(dt) print('time to read the SAFE through nfs: %1.2f sec'%elapse_t) pdb.set_trace() xarray-safe-s1-2024.11.28/highleveltests/open_SLC_IW_S3.py000066400000000000000000000032121472207407300226740ustar00rootroot00000000000000# see https://stackoverflow.com/questions/69624867/no-such-file-error-when-trying-to-create-local-cache-of-s3-object from safe_s1 import Sentinel1Reader,getconfig import pdb import os import time import logging import fsspec logging.basicConfig(level=logging.INFO) logging.info('test start') conf = getconfig.get_config() access_key = conf['access_key'] secret_key = conf['secret_key'] entrypoint_url = conf['entrypoint_url'] s3 = fsspec.filesystem("s3", anon=False, key=access_key, secret=secret_key, endpoint_url='https://'+entrypoint_url) # this syntaxe works we can get content xml files but I would have to precise which subswath I want to decode in case of SLC # safe2 = 's3:///eodata/Sentinel-1/SAR/SLC/2019/10/13/S1B_IW_SLC__1SDV_20191013T155948_20191013T160015_018459_022C6B_13A2.SAFE' safe2 = 's3:///eodata/Sentinel-1/SAR/IW_GRDH_1S/2024/04/18/S1A_IW_GRDH_1SSH_20240418T080141_20240418T080210_053485_067D74_C073.SAFE' # safe2 = conf['s3_iw_grd_path'] option = 'kwargs' if option == 'kwargs': storage_options = {"anon": False, "client_kwargs": {"endpoint_url": 'https://'+entrypoint_url, 'aws_access_key_id':access_key, 'aws_secret_access_key':secret_key}} t0 = time.time() sub_reader = Sentinel1Reader(safe2,backend_kwargs={"storage_options": storage_options}) elapse_t = time.time()-t0 print('time to read the SAFE through S3: %1.2f sec'%elapse_t) else: # this solution is not supported. sub_reader = Sentinel1Reader(s3.get_mapper(safe2)) # botocore.errorfactory.NoSuchKey: An error occurred (NoSuchKey) when calling the GetObject operation: Unknown dt = sub_reader.datatree print('out of the reader') print(dt) xarray-safe-s1-2024.11.28/pyproject.toml000066400000000000000000000027621472207407300176070ustar00rootroot00000000000000[project] name = "xarray-safe-s1" requires-python = ">= 3.9" license = {text = "MIT"} dependencies = [ "geopandas", "numpy", "xarray>=2024.10.0", "lxml", "rioxarray", "jmespath", "fsspec", "rasterio", "affine", "pandas", "shapely", "pyproj", "dask", "aiohttp", ] readme = "README.md" dynamic = ["version"] [build-system] requires = ["setuptools>=64.0", "setuptools-scm"] build-backend = "setuptools.build_meta" [tool.setuptools] packages = ["safe_s1"] [tool.setuptools_scm] fallback_version = "999" [tool.isort] profile = "black" skip_gitignore = true float_to_top = true default_section = "THIRDPARTY" known_first_party = "safe_s1" [tool.coverage.report] show_missing = true exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"] [tool.ruff.lint] ignore = [ "E402", # module level import not at top of file "E501", # line too long - let black worry about that "E731", # do not assign a lambda expression, use a def "UP038", # type union instead of tuple for isinstance etc ] select = [ "F", # Pyflakes "E", # Pycodestyle "I", # isort "UP", # Pyupgrade "TID", # flake8-tidy-imports "W", ] extend-safe-fixes = [ "TID252", # absolute imports "UP031", # percent string interpolation ] fixable = ["I", "TID252", "UP"] [tool.ruff.lint.isort] known-first-party = ["safe_s1"] known-third-party = ["xarray", "toolz", "construct"] [tool.ruff.lint.flake8-tidy-imports] # Disallow all relative imports. ban-relative-imports = "all" xarray-safe-s1-2024.11.28/safe_s1/000077500000000000000000000000001472207407300162055ustar00rootroot00000000000000xarray-safe-s1-2024.11.28/safe_s1/__init__.py000066400000000000000000000005041472207407300203150ustar00rootroot00000000000000import traceback from safe_s1.reader import Sentinel1Reader try: from importlib import metadata except ImportError: # for Python<3.8 import importlib_metadata as metadata try: __version__ = metadata.version("xarray-safe-s1") except Exception: print("trace", traceback.format_exc()) __version__ = "999" xarray-safe-s1-2024.11.28/safe_s1/config.yml000066400000000000000000000001751472207407300202000ustar00rootroot00000000000000# default data paths for tests product_paths: - "S1A_IW_GRDH_1SDV_20170907T103020_20170907T103045_018268_01EB76_Z010.SAFE" xarray-safe-s1-2024.11.28/safe_s1/getconfig.py000066400000000000000000000021131472207407300205210ustar00rootroot00000000000000import logging import os from pathlib import Path import yaml import safe_s1 # determine the config file we will use (config.yml by default, and a local config if one is present) and retrieve # the products names def get_config(): local_config_pontential_path = os.path.join( os.path.dirname(safe_s1.__file__), "localconfig.yml" ) logging.info("potential local config: %s", local_config_pontential_path) # local_config_pontential_path = Path(os.path.join('~', 'xarray-safe-s1', 'localconfig.yml')).expanduser() if os.path.exists(local_config_pontential_path): logging.info("localconfig used") config_path = local_config_pontential_path with open(config_path) as config_content: conf = yaml.load(config_content, Loader=yaml.SafeLoader) else: logging.info("default config") config_path = Path( os.path.join(os.path.dirname(safe_s1.__file__), "config.yml") ) with open(config_path) as config_content: conf = yaml.load(config_content, Loader=yaml.SafeLoader) return conf xarray-safe-s1-2024.11.28/safe_s1/reader.py000066400000000000000000000740771472207407300200400ustar00rootroot00000000000000import logging import os import re import dask import fsspec import numpy as np import pandas as pd import rasterio import xarray as xr import yaml from affine import Affine from rioxarray import rioxarray from safe_s1 import sentinel1_xml_mappings from safe_s1.xml_parser import XmlParser class Sentinel1Reader: def __init__(self, name, backend_kwargs=None): logging.debug("input name: %s", name) if not isinstance(name, (str, os.PathLike)): raise ValueError(f"cannot deal with object of type {type(name)}: {name}") # gdal dataset name if not name.startswith("SENTINEL1_DS:"): name = "SENTINEL1_DS:%s:" % name self.name = name """Gdal dataset name""" name_parts = self.name.split(":") if len(name_parts) > 3: logging.debug("windows case") # windows might have semicolon in path ('c:\...') name_parts[1] = ":".join(name_parts[1:-1]) del name_parts[2:-1] name_parts[1] = os.path.basename(name_parts[1]) self.short_name = ":".join(name_parts) logging.debug("short_name : %s", self.short_name) """Like name, but without path""" if len(name_parts) == 2: self.path = self.name.split(":")[1] else: self.path = ":".join(self.name.split(":")[1:-1]) logging.debug("path: %s", self.path) # remove trailing slash in the safe path if self.path[-1] == "/": self.path = self.path.rstrip("/") """Dataset path""" self.safe = os.path.basename(self.path) self.path = os.fspath(self.path) if backend_kwargs is None: backend_kwargs = {} storage_options = backend_kwargs.get("storage_options", {}) mapper = fsspec.get_mapper(self.path, **storage_options) self.xml_parser = XmlParser( xpath_mappings=sentinel1_xml_mappings.xpath_mappings, compounds_vars=sentinel1_xml_mappings.compounds_vars, namespaces=sentinel1_xml_mappings.namespaces, mapper=mapper, ) self.manifest = "manifest.safe" if "SLC" in self.path or "GRD" in self.path: self.manifest_attrs = self.xml_parser.get_compound_var( self.manifest, "safe_attributes_slcgrd" ) elif "SL2" in self.path: self.manifest_attrs = self.xml_parser.get_compound_var( self.manifest, "safe_attributes_sl2" ) else: raise Exception("case not handled") self._safe_files = None self._multidataset = False """True if multi dataset""" self._datasets_names = list(self.safe_files["dsid"].sort_index().unique()) self.xsd_definitions = self.get_annotation_definitions() if self.name.endswith(":") and len(self._datasets_names) == 1: self.name = self._datasets_names[0] self.dsid = self.name.split(":")[-1] """Dataset identifier (like 'WV_001', 'IW1', 'IW'), or empty string for multidataset""" try: self.product = os.path.basename(self.path).split("_")[2] except ValueError: print("path: %s" % self.path) self.product = "XXX" """Product type, like 'GRDH', 'SLC', etc ..""" # submeta is a list of submeta objects if multidataset and TOPS # this list will remain empty for _WV__SLC because it will be time-consuming to process them # self._submeta = [] if self.short_name.endswith(":"): self.short_name = self.short_name + self.dsid if self.files.empty: self._multidataset = True self.dt = None self._dict = { "geolocationGrid": None, } if not self.multidataset: self._dict = { "geolocationGrid": self.geoloc, "orbit": self.orbit, "image": self.image, "azimuth_fmrate": self.azimuth_fmrate, "doppler_estimate": self.doppler_estimate, "bursts": self.bursts, "calibration_luts": self.get_calibration_luts, "noise_azimuth_raw": self.get_noise_azi_raw, "noise_range_raw": self.get_noise_range_raw, "antenna_pattern": self.antenna_pattern, "swath_merging": self.swath_merging, } self.dt = xr.DataTree.from_dict(self._dict) assert self.dt == self.datatree else: print("multidataset") # there is no error raised here, because we want to let the user access the metadata for multidatasets def load_digital_number( self, resolution=None, chunks=None, resampling=rasterio.enums.Resampling.rms ): """ load digital_number from self.sar_meta.files['measurement'], as an `xarray.Dataset`. Parameters ---------- resolution: None, numbers.Number, str or dict resampling: rasterio.enums.Resampling Returns ------- (float, xarray.Dataset) tuple that contains resolution and dataset (possibly dual-pol), with basic coords/dims naming convention """ def get_glob(strlist): # from list of str, replace diff by '?' def _get_glob(st): stglob = "".join( [ "?" if len(charlist) > 1 else charlist[0] for charlist in [list(set(charset)) for charset in zip(*st)] ] ) return re.sub(r"\?+", "*", stglob) strglob = _get_glob(strlist) if strglob.endswith("*"): strglob += _get_glob(s[::-1] for s in strlist)[::-1] strglob = strglob.replace("**", "*") return strglob map_dims = {"pol": "band", "line": "y", "sample": "x"} _dtypes = { "latitude": "f4", "longitude": "f4", "incidence": "f4", "elevation": "f4", "altitude": "f4", "ground_heading": "f4", "nesz": None, "negz": None, "sigma0_raw": None, "gamma0_raw": None, "noise_lut": "f4", "noise_lut_range": "f4", "noise_lut_azi": "f4", "sigma0_lut": "f8", "gamma0_lut": "f8", "azimuth_time": np.datetime64, "slant_range_time": None, } if resolution is not None: comment = 'resampled at "%s" with %s.%s.%s' % ( resolution, resampling.__module__, resampling.__class__.__name__, resampling.name, ) else: comment = "read at full resolution" # Add root to path files_measurement = self.files["measurement"].copy() files_measurement = [os.path.join(self.path, f) for f in files_measurement] # arbitrary rio object, to get shape, etc ... (will not be used to read data) rio = rasterio.open(files_measurement[0]) chunks["pol"] = 1 # sort chunks keys like map_dims chunks = dict( sorted( chunks.items(), key=lambda pair: list(map_dims.keys()).index(pair[0]) ) ) chunks_rio = {map_dims[d]: chunks[d] for d in map_dims.keys()} res = None if resolution is None: # using tiff driver: need to read individual tiff and concat them # riofiles['rio'] is ordered like self.sar_meta.manifest_attrs['polarizations'] dn = xr.concat( [ rioxarray.open_rasterio( f, chunks=chunks_rio, parse_coordinates=False ) for f in files_measurement ], "band", ).assign_coords( band=np.arange(len(self.manifest_attrs["polarizations"])) + 1 ) # set dimensions names dn = dn.rename(dict(zip(map_dims.values(), map_dims.keys()))) # create coordinates from dimension index (because of parse_coordinates=False) dn = dn.assign_coords({"line": dn.line, "sample": dn.sample}) dn = dn.drop_vars("spatial_ref", errors="ignore") else: if not isinstance(resolution, dict): if isinstance(resolution, str) and resolution.endswith("m"): resolution = float(resolution[:-1]) res = resolution resolution = dict( line=resolution / self.pixel_line_m, sample=resolution / self.pixel_sample_m, ) # resolution = dict(line=resolution / self.dataset['sampleSpacing'].values, # sample=resolution / self.dataset['lineSpacing'].values) # resample the DN at gdal level, before feeding it to the dataset out_shape = ( int(rio.height / resolution["line"]), int(rio.width / resolution["sample"]), ) out_shape_pol = (1,) + out_shape # read resampled array in one chunk, and rechunk # this doesn't optimize memory, but total size remain quite small if isinstance(resolution["line"], int): # legacy behaviour: winsize is the maximum full image size that can be divided by resolution (int) winsize = ( 0, 0, rio.width // resolution["sample"] * resolution["sample"], rio.height // resolution["line"] * resolution["line"], ) window = rasterio.windows.Window(*winsize) else: window = None dn = xr.concat( [ xr.DataArray( dask.array.from_array( rasterio.open(f).read( out_shape=out_shape_pol, resampling=resampling, window=window, ), chunks=chunks_rio, ), dims=tuple(map_dims.keys()), coords={"pol": [pol]}, ) for f, pol in zip( files_measurement, self.manifest_attrs["polarizations"] ) ], "pol", ).chunk(chunks) # create coordinates at box center translate = Affine.translation( (resolution["sample"] - 1) / 2, (resolution["line"] - 1) / 2 ) scale = Affine.scale( rio.width // resolution["sample"] * resolution["sample"] / out_shape[1], rio.height // resolution["line"] * resolution["line"] / out_shape[0], ) sample, _ = translate * scale * (dn.sample, 0) _, line = translate * scale * (0, dn.line) dn = dn.assign_coords({"line": line, "sample": sample}) # for GTiff driver, pols are already ordered. just rename them dn = dn.assign_coords(pol=self.manifest_attrs["polarizations"]) if not all(self.denoised.values()): descr = "denoised" else: descr = "not denoised" var_name = "digital_number" dn.attrs = { "comment": "%s digital number, %s" % (descr, comment), "history": yaml.safe_dump( { var_name: get_glob( [p.replace(self.path + "/", "") for p in files_measurement] ) } ), } ds = dn.to_dataset(name=var_name) astype = _dtypes.get(var_name) if astype is not None: ds = ds.astype(_dtypes[var_name]) return res, ds @property def pixel_line_m(self): """ pixel line spacing, in meters (at sensor level) Returns ------- xarray.Dataset Sample spacing """ if self.multidataset: res = None # not defined for multidataset else: res = self.image["azimuthPixelSpacing"] return res @property def pixel_sample_m(self): """ pixel sample spacing, in meters (at sensor level) Returns ------- xarray.Dataset Sample spacing """ if self.multidataset: res = None # not defined for multidataset else: res = self.image["groundRangePixelSpacing"] return res @property def datasets_names(self): """ Alias to `Sentinel1Reader._datasets_names` Returns ------- list datasets names """ return self._datasets_names @property def datatree(self): """ Return data of the reader as datatree. Can't open data from a multiple dataset (must select a single one with displayed in `Sentinel1Reader.datasets_names`). So if multiple dataset, returns None. Alias to `Sentinel1Reader.dt`. Returns ------- xr.DataTree Contains data from the reader """ return self.dt @property def geoloc(self): """ xarray.Dataset with `['longitude', 'latitude', 'altitude', 'azimuth_time', 'slant_range_time','incidence','elevation' ]` variables and `['line', 'sample']` coordinates, at the geolocation grid Returns ------- xarray.Dataset Geolocation Grid """ if self.multidataset: raise TypeError("geolocation_grid not available for multidataset") if self._dict["geolocationGrid"] is None: xml_annotation = self.files["annotation"].iloc[0] da_var_list = [] for var_name in [ "longitude", "latitude", "height", "azimuthTime", "slantRangeTime", "incidenceAngle", "elevationAngle", ]: # TODO: we should use dask.array.from_delayed so xml files are read on demand da_var = self.xml_parser.get_compound_var(xml_annotation, var_name) da_var.name = var_name da_var.attrs["history"] = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], var_name, describe=True ) da_var_list.append(da_var) return xr.merge(da_var_list) @property def orbit(self): """ orbit, as a geopandas.GeoDataFrame, with columns: - 'velocity' : shapely.geometry.Point with velocity in x, y, z direction - 'geometry' : shapely.geometry.Point with position in x, y, z direction crs is set to 'geocentric' attrs keys: - 'orbit_pass': 'Ascending' or 'Descending' - 'platform_heading': in degrees, relative to north Notes ----- orbit is longer than the SAFE, because it belongs to all datatakes, not only this slice """ if self.multidataset: return None # not defined for multidataset gdf_orbit = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "orbit" ) for vv in gdf_orbit: if vv in self.xsd_definitions: gdf_orbit[vv].attrs["definition"] = self.xsd_definitions[vv] gdf_orbit.attrs["history"] = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "orbit", describe=True ) return gdf_orbit @property def denoised(self): """ dict with pol as key, and bool as values (True is DN is predenoised at L1 level) Returns ------- None | dict """ if self.multidataset: return None # not defined for multidataset else: return dict( [ self.xml_parser.get_compound_var(f, "denoised") for f in self.files["annotation"] ] ) @property def time_range(self): """ Get time range Returns ------- """ if not self.multidataset: return self.xml_parser.get_var( self.files["annotation"].iloc[0], "annotation.line_time_range" ) @property def image(self): """ Get image information Returns ------- xarray.Dataset Image information dataArrays """ if self.multidataset: return None img_dict = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "image" ) img_dict["history"] = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "image", describe=True ) for vv in img_dict: if vv in self.xsd_definitions: img_dict[vv].attrs["definition"] = self.xsd_definitions[vv] return img_dict @property def azimuth_fmrate(self): """ Returns ------- xarray.Dataset Frequency Modulation rate annotations such as t0 (azimuth time reference) and polynomial coefficients: Azimuth FM rate = c0 + c1(tSR - t0) + c2(tSR - t0)^2 """ fmrates = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "azimuth_fmrate" ) fmrates.attrs["history"] = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "azimuth_fmrate", describe=True ) for vv in fmrates: if vv in self.xsd_definitions: fmrates[vv].attrs["definition"] = self.xsd_definitions[vv] return fmrates @property def doppler_estimate(self): """ Returns ------- xarray.Dataset with Doppler Centroid Estimates from annotations such as geo_polynom,data_polynom or frequency """ dce = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "doppler_estimate" ) for vv in dce: if vv in self.xsd_definitions: dce[vv].attrs["definition"] = self.xsd_definitions[vv] dce.attrs["history"] = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "doppler_estimate", describe=True ) return dce @property def bursts(self): """ Get bursts information Returns ------- xarray.Dataset Bursts information dataArrays """ if ( self.xml_parser.get_var( self.files["annotation"].iloc[0], "annotation.number_of_bursts" ) > 0 ): bursts = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "bursts" ) for vv in bursts: if vv in self.xsd_definitions: bursts[vv].attrs["definition"] = self.xsd_definitions[vv] bursts.attrs["history"] = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "bursts", describe=True ) return bursts else: bursts = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "bursts_grd" ) bursts.attrs["history"] = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "bursts_grd", describe=True ) return bursts @property def antenna_pattern(self): ds = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "antenna_pattern" ) ds.attrs["history"] = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "antenna_pattern", describe=True ) return ds @property def swath_merging(self): if "GRD" in self.product: ds = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "swath_merging" ) ds.attrs["history"] = self.xml_parser.get_compound_var( self.files["annotation"].iloc[0], "swath_merging", describe=True ) else: ds = xr.Dataset() return ds @property def multidataset(self): """ Alias to `Sentinel1Reader._multidataset` Returns ------- bool """ return self._multidataset def get_annotation_definitions(self): """ Get annotation definitions (paths used to retrieve concerned data in the files) Returns ------- dict annotations definitions """ final_dict = {} ds_path_xsd = self.xml_parser.get_compound_var(self.manifest, "xsd_files") path_xsd = ds_path_xsd["xsd_product"].values[0] full_path_xsd = os.path.join(self.path, path_xsd) if os.path.exists(full_path_xsd): rootxsd = self.xml_parser.getroot(path_xsd) mypath = "/xsd:schema/xsd:complexType/xsd:sequence/xsd:element" for lulu, uu in enumerate( rootxsd.xpath(mypath, namespaces=sentinel1_xml_mappings.namespaces) ): mykey = uu.values()[0] if uu.getchildren() != []: myvalue = uu.getchildren()[0].getchildren()[0] else: myvalue = None final_dict[mykey] = myvalue return final_dict @property def get_calibration_luts(self): """ get original (ie not interpolation) xr.Dataset sigma0 and gamma0 Look Up Tables to apply calibration Returns ------- xarray.Dataset Original sigma0 and gamma0 calibration Look Up Tables """ # sigma0_lut = self.xml_parser.get_var(self.files['calibration'].iloc[0], 'calibration.sigma0_lut',describe=True) pols = [] tmp = [] for pol_code, xml_file in self.files["calibration"].items(): luts_ds = self.xml_parser.get_compound_var(xml_file, "luts_raw") # add history to attributes minifile = re.sub(".*SAFE/", "", xml_file) minifile = re.sub(r"-.*\.xml", ".xml", minifile) for da in luts_ds: histo = self.xml_parser.get_var( xml_file, f"calibration.{da}", describe=True ) luts_ds[da].attrs["history"] = yaml.safe_dump({da: {minifile: histo}}) pol = os.path.basename(xml_file).split("-")[4].upper() pols.append(pol) tmp.append(luts_ds) ds = xr.concat(tmp, pd.Index(pols, name="pol")) # ds.attrs = {'description': # 'original (ie not interpolation) xr.Dataset sigma0 and gamma0 Look Up Tables'} return ds @property def get_noise_azi_raw(self): """ Get raw noise azimuth lut Returns ------- xarray.Dataset raw noise azimuth lut """ tmp = [] pols = [] history = [] for pol_code, xml_file in self.files["noise"].items(): pol = os.path.basename(xml_file).split("-")[4].upper() pols.append(pol) if self.product == "SLC" or self.product == "SL2": noise_lut_azi_raw_ds = self.xml_parser.get_compound_var( xml_file, "noise_lut_azi_raw_slc" ) history.append( self.xml_parser.get_compound_var( xml_file, "noise_lut_azi_raw_slc", describe=True ) ) else: noise_lut_azi_raw_ds = self.xml_parser.get_compound_var( xml_file, "noise_lut_azi_raw_grd" ) # noise_lut_azi_raw_ds.attrs[f'raw_azi_lut_{pol}'] = \ # self.xml_parser.get_var(xml_file, 'noise.azi.noiseLut') history.append( self.xml_parser.get_compound_var( xml_file, "noise_lut_azi_raw_grd", describe=True ) ) for vari in noise_lut_azi_raw_ds: if "noise_lut" in vari: varitmp = "noiseLut" hihi = self.xml_parser.get_var( self.files["noise"].iloc[0], "noise.azi.%s" % varitmp, describe=True, ) elif vari == "noise_lut" and self.product == "WV": # WV case hihi = "dummy variable, noise is not defined in azimuth for WV acquisitions" else: varitmp = vari hihi = self.xml_parser.get_var( self.files["noise"].iloc[0], "noise.azi.%s" % varitmp, describe=True, ) noise_lut_azi_raw_ds[vari].attrs["description"] = hihi tmp.append(noise_lut_azi_raw_ds) ds = xr.concat(tmp, pd.Index(pols, name="pol")) ds.attrs["history"] = "\n".join(history) return ds @property def get_noise_range_raw(self): """ Get raw noise range lut Returns ------- xarray.Dataset raw noise range lut """ tmp = [] pols = [] history = [] for pol_code, xml_file in self.files["noise"].items(): # pol = self.files['polarization'].cat.categories[pol_code - 1] pol = os.path.basename(xml_file).split("-")[4].upper() pols.append(pol) noise_lut_range_raw_ds = self.xml_parser.get_compound_var( xml_file, "noise_lut_range_raw" ) for vari in noise_lut_range_raw_ds: if "noise_lut" in vari: varitmp = "noiseLut" hihi = self.xml_parser.get_var( self.files["noise"].iloc[0], "noise.range.%s" % varitmp, describe=True, ) noise_lut_range_raw_ds[vari].attrs["description"] = hihi history.append( self.xml_parser.get_compound_var( xml_file, "noise_lut_range_raw", describe=True ) ) tmp.append(noise_lut_range_raw_ds) ds = xr.concat(tmp, pd.Index(pols, name="pol")) ds.attrs["history"] = "\n".join(history) return ds def get_noise_azi_initial_parameters(self, pol): """ Retrieve initial noise lut and lines Parameters ---------- pol: str polarization selected Returns ------- (List, List, List, List, List, List, List) Tuple that contains the swaths, noise azimuth lines, line_start, line_stop, sample_start, sample_stop and noise azimuth lut for the pol selected. """ for pol_code, xml_file in self.files["noise"].items(): if pol in os.path.basename(xml_file).upper(): return ( self.xml_parser.get_var(xml_file, "noise.azi.swath"), self.xml_parser.get_var(xml_file, "noise.azi.line"), self.xml_parser.get_var(xml_file, "noise.azi.line_start"), self.xml_parser.get_var(xml_file, "noise.azi.line_stop"), self.xml_parser.get_var(xml_file, "noise.azi.sample_start"), self.xml_parser.get_var(xml_file, "noise.azi.sample_stop"), self.xml_parser.get_var(xml_file, "noise.azi.noiseLut"), ) @property def safe_files(self): """ Files and polarizations for whole SAFE. The index is the file number, extracted from the filename. To get files in official SAFE order, the resulting dataframe should be sorted by polarization or index. Returns ------- pandas.core.frame.DataFrame Columns: * index : file number, extracted from the filename. * dsid : dataset id, compatible with gdal sentinel1 driver ('SENTINEL1_DS:/path/file.SAFE:WV_012') * polarization : polarization name. * annotation : xml annotation file. * calibration : xml calibration file. * noise : xml noise file. * measurement : tiff measurement file. See Also -------- Sentinel1Reader.files """ if self._safe_files is None: files = self.xml_parser.get_compound_var(self.manifest, "files") """ # add path for f in ['annotation', 'measurement', 'noise', 'calibration']: files[f] = files[f].map(lambda f: os.path.join(# self.path, f))""" # set "polarization" as a category, so sorting dataframe on polarization # will return the dataframe in same order as self._safe_attributes['polarizations'] files["polarization"] = files.polarization.astype( "category" ).cat.reorder_categories(self.manifest_attrs["polarizations"], ordered=True) # replace 'dsid' with full path, compatible with gdal sentinel1 driver files["dsid"] = files["dsid"].map( lambda dsid: "SENTINEL1_DS:%s:%s" % (self.path, dsid) ) files.sort_values("polarization", inplace=True) self._safe_files = files return self._safe_files @property def files(self): """ Files for current dataset. (Empty for multi datasets) See Also -------- Sentinel1Reader.safe_files """ return self.safe_files[self.safe_files["dsid"] == self.name] def __repr__(self): if self.multidataset: typee = "multi (%d)" % len(self.subdatasets) else: typee = "single" return "" % typee xarray-safe-s1-2024.11.28/safe_s1/sentinel1_xml_mappings.py000066400000000000000000001561311472207407300232460ustar00rootroot00000000000000""" xpath mapping from xml file, with convertion functions """ import os.path import warnings import zipfile from datetime import datetime import aiohttp import fsspec import geopandas as gpd import numpy as np import pandas as pd import pyproj import xarray import xarray as xr from numpy.polynomial import Polynomial from shapely.geometry import Point, Polygon namespaces = { "xfdu": "urn:ccsds:schema:xfdu:1", "s1sarl1": "http://www.esa.int/safe/sentinel-1.0/sentinel-1/sar/level-1", "s1sar": "http://www.esa.int/safe/sentinel-1.0/sentinel-1/sar", "s1": "http://www.esa.int/safe/sentinel-1.0/sentinel-1", "safe": "http://www.esa.int/safe/sentinel-1.0", "gml": "http://www.opengis.net/gml", } # xpath convertion function: they take only one args (list returned by xpath) scalar = lambda x: x[0] scalar_int = lambda x: int(x[0]) scalar_float = lambda x: float(x[0]) date_converter = lambda x: datetime.strptime(x[0], "%Y-%m-%dT%H:%M:%S.%f") datetime64_array = lambda x: np.array( [np.datetime64(date_converter([sx])).astype("datetime64[ns]") for sx in x] ) int_1Darray_from_string = lambda x: np.fromstring(x[0], dtype=int, sep=" ") float_2Darray_from_string_list = lambda x: np.vstack( [np.fromstring(e, dtype=float, sep=" ") for e in x] ) list_of_float_1D_array_from_string = lambda x: [ np.fromstring(e, dtype=float, sep=" ") for e in x ] int_1Darray_from_join_strings = lambda x: np.fromstring(" ".join(x), dtype=int, sep=" ") float_1Darray_from_join_strings = lambda x: np.fromstring( " ".join(x), dtype=float, sep=" " ) int_array = lambda x: np.array(x, dtype=int) bool_array = lambda x: np.array(x, dtype=bool) float_array = lambda x: np.array(x, dtype=float) uniq_sorted = lambda x: np.array(sorted(set(x))) ordered_category = lambda x: pd.Categorical(x).reorder_categories(x, ordered=True) normpath = lambda paths: [os.path.normpath(p) for p in paths] def get_test_file(fname): """ get test file from https://cyclobs.ifremer.fr/static/sarwing_datarmor/xsardata/ file is unzipped and extracted to `config['data_dir']` Parameters ---------- fname: str file name to get (without '.zip' extension) Returns ------- str path to file, relative to `config['data_dir']` """ config = {"data_dir": "/tmp"} def url_get(url, cache_dir=os.path.join(config["data_dir"], "fsspec_cache")): """ Get fil from url, using caching. Parameters ---------- url: str cache_dir: str Cache dir to use. default to `os.path.join(config['data_dir'], 'fsspec_cache')` Raises ------ FileNotFoundError Returns ------- filename: str The local file name Notes ----- Due to fsspec, the returned filename won't match the remote one. """ if "://" in url: with fsspec.open( "filecache::%s" % url, https={"client_kwargs": {"timeout": aiohttp.ClientTimeout(total=3600)}}, filecache={ "cache_storage": os.path.join( os.path.join(config["data_dir"], "fsspec_cache") ) }, ) as f: fname = f.name else: fname = url return fname res_path = config["data_dir"] base_url = "https://cyclobs.ifremer.fr/static/sarwing_datarmor/xsardata" file_url = "%s/%s.zip" % (base_url, fname) if not os.path.exists(os.path.join(res_path, fname)): warnings.warn("Downloading %s" % file_url) local_file = url_get(file_url) warnings.warn("Unzipping %s" % os.path.join(res_path, fname)) with zipfile.ZipFile(local_file, "r") as zip_ref: zip_ref.extractall(res_path) return os.path.join(res_path, fname) def or_ipf28(xpath): """change xpath to match ipf <2.8 or >2.9 (for noise range)""" xpath28 = xpath.replace("noiseRange", "noise").replace("noiseAzimuth", "noise") if xpath28 != xpath: xpath += " | %s" % xpath28 return xpath def list_poly_from_list_string_coords(str_coords_list): footprints = [] for gmlpoly in str_coords_list: footprints.append( Polygon( [ (float(lon), float(lat)) for lat, lon in [latlon.split(",") for latlon in gmlpoly.split(" ")] ] ) ) return footprints # xpath_mappings: # first level key is xml file type # second level key is variable name # mappings may be 'xpath', or 'tuple(func,xpath)', or 'dict' # - xpath is an lxml xpath # - func is a decoder function fed by xpath # - dict is a nested dict, to create more hierarchy levels. xpath_mappings = { "manifest": { "ipf_version": ( scalar_float, "//xmlData/safe:processing/safe:facility/safe:software/@version", ), "swath_type": (scalar, "//s1sarl1:instrumentMode/s1sarl1:mode"), # 'product': (scalar, '/xfdu:XFDU/informationPackageMap/xfdu:contentUnit/@textInfo'), "polarizations": ( ordered_category, "//s1sarl1:standAloneProductInformation/s1sarl1:transmitterReceiverPolarisation", ), "footprints": ( list_poly_from_list_string_coords, "//safe:frame/safe:footPrint/gml:coordinates", ), "product_type": ( scalar, "//s1sarl1:standAloneProductInformation/s1sarl1:productType", ), "mission": (scalar, "//safe:platform/safe:familyName"), "satellite": (scalar, "//safe:platform/safe:number"), "start_date": (date_converter, "//safe:acquisitionPeriod/safe:startTime"), "stop_date": (date_converter, "//safe:acquisitionPeriod/safe:stopTime"), "aux_cal": ( scalar, '//metadataSection/metadataObject/metadataWrap/xmlData/safe:processing/safe:resource/safe:processing/safe:resource[@role="AUX_CAL"]/@name', ), "aux_pp1": ( scalar, '//metadataSection/metadataObject/metadataWrap/xmlData/safe:processing/safe:resource/safe:processing/safe:resource[@role="AUX_PP1"]/@name', ), "aux_ins": ( scalar, '//metadataSection/metadataObject/metadataWrap/xmlData/safe:processing/safe:resource/safe:processing/safe:resource[@role="AUX_INS"]/@name', ), "aux_cal_sl2": ( scalar, '//metadataSection/metadataObject/metadataWrap/xmlData/safe:processing/safe:resource[@role="AUX_CAL"]/@name', ), "annotation_files": ( normpath, '/xfdu:XFDU/dataObjectSection/*[@repID="s1Level1ProductSchema"]/byteStream/fileLocation/@href', ), "measurement_files": ( normpath, '/xfdu:XFDU/dataObjectSection/*[@repID="s1Level1MeasurementSchema"]/byteStream/fileLocation/@href', ), "noise_files": ( normpath, '/xfdu:XFDU/dataObjectSection/*[@repID="s1Level1NoiseSchema"]/byteStream/fileLocation/@href', ), "calibration_files": ( normpath, '/xfdu:XFDU/dataObjectSection/*[@repID="s1Level1CalibrationSchema"]/byteStream/fileLocation/@href', ), "xsd_product_file": ( normpath, '/xfdu:XFDU/metadataSection/metadataObject[@ID="s1Level1ProductSchema"]/metadataReference/@href', ), "xsd_Noise_file": ( normpath, '/xfdu:XFDU/metadataSection/metadataObject[@ID="s1Level1NoiseSchema"]/metadataReference/@href', ), "xsd_RFI_file": ( normpath, '/xfdu:XFDU/metadataSection/metadataObject[@ID="s1Level1RfiSchema"]/metadataReference/@href', ), "xsd_calibration_file": ( normpath, '/xfdu:XFDU/metadataSection/metadataObject[@ID="s1Level1CalibrationSchema"]/metadataReference/@href', ), "xsd_objecttype_file": ( normpath, '/xfdu:XFDU/metadataSection/metadataObject[@ID="s1ObjectTypesSchema"]/metadataReference/@href', ), "xsd_measurement_file": ( normpath, '/xfdu:XFDU/metadataSection/metadataObject[@ID="s1Level1MeasurementSchema"]/metadataReference/@href', ), "xsd_level1product_file": ( normpath, '/xfdu:XFDU/metadataSection/metadataObject[@ID="s1Level1ProductPreviewSchema"]/metadataReference/@href', ), "xsd_overlay_file": ( normpath, '/xfdu:XFDU/metadataSection/metadataObject[@ID="s1Level1MapOverlaySchema"]/metadataReference/@href', ), "instrument_configuration_id": ( scalar, "//s1sarl1:standAloneProductInformation/s1sarl1:instrumentConfigurationID/text()", ), }, "calibration": { "polarization": (scalar, "/calibration/adsHeader/polarisation"), # 'number_of_vector': '//calibration/calibrationVectorList/@count', "line": ( np.array, "//calibration/calibrationVectorList/calibrationVector/line", ), "sample": ( int_1Darray_from_string, "//calibration/calibrationVectorList/calibrationVector[1]/pixel", ), "sigma0_lut": ( float_2Darray_from_string_list, "//calibration/calibrationVectorList/calibrationVector/sigmaNought", ), "gamma0_lut": ( float_2Darray_from_string_list, "//calibration/calibrationVectorList/calibrationVector/gamma", ), "azimuthTime": ( datetime64_array, "/calibration/calibrationVectorList/calibrationVector/azimuthTime", ), }, "noise": { "mode": (scalar, "/noise/adsHeader/mode"), "polarization": (scalar, "/noise/adsHeader/polarisation"), "range": { "line": ( int_array, or_ipf28("/noise/noiseRangeVectorList/noiseRangeVector/line"), ), "sample": ( lambda x: [np.fromstring(s, dtype=int, sep=" ") for s in x], or_ipf28("/noise/noiseRangeVectorList/noiseRangeVector/pixel"), ), "noiseLut": ( lambda x: [np.fromstring(s, dtype=float, sep=" ") for s in x], or_ipf28("/noise/noiseRangeVectorList/noiseRangeVector/noiseRangeLut"), ), "azimuthTime": ( datetime64_array, "/noise/noiseRangeVectorList/noiseRangeVector/azimuthTime", ), }, "azi": { "swath": "/noise/noiseAzimuthVectorList/noiseAzimuthVector/swath", "line": ( lambda x: [np.fromstring(str(s), dtype=int, sep=" ") for s in x], "/noise/noiseAzimuthVectorList/noiseAzimuthVector/line", ), "line_start": ( int_array, "/noise/noiseAzimuthVectorList/noiseAzimuthVector/firstAzimuthLine", ), "line_stop": ( int_array, "/noise/noiseAzimuthVectorList/noiseAzimuthVector/lastAzimuthLine", ), "sample_start": ( int_array, "/noise/noiseAzimuthVectorList/noiseAzimuthVector/firstRangeSample", ), "sample_stop": ( int_array, "/noise/noiseAzimuthVectorList/noiseAzimuthVector/lastRangeSample", ), "noiseLut": ( lambda x: [np.fromstring(str(s), dtype=float, sep=" ") for s in x], "/noise/noiseAzimuthVectorList/noiseAzimuthVector/noiseAzimuthLut", ), }, }, "annotation": { "product_type": (scalar, "/product/adsHeader/productType"), "swath_subswath": (scalar, "/product/adsHeader/swath"), "line": ( uniq_sorted, "/product/geolocationGrid/geolocationGridPointList/geolocationGridPoint/line", ), "sample": ( uniq_sorted, "/product/geolocationGrid/geolocationGridPointList/geolocationGridPoint/pixel", ), "incidenceAngle": ( float_array, "/product/geolocationGrid/geolocationGridPointList/geolocationGridPoint/incidenceAngle", ), "elevationAngle": ( float_array, "/product/geolocationGrid/geolocationGridPointList/geolocationGridPoint/elevationAngle", ), "height": ( float_array, "/product/geolocationGrid/geolocationGridPointList/geolocationGridPoint/height", ), "azimuthTime": ( datetime64_array, "/product/geolocationGrid/geolocationGridPointList/geolocationGridPoint/azimuthTime", ), "slantRangeTime": ( float_array, "/product/geolocationGrid/geolocationGridPointList/geolocationGridPoint/slantRangeTime", ), "longitude": ( float_array, "/product/geolocationGrid/geolocationGridPointList/geolocationGridPoint/longitude", ), "latitude": ( float_array, "/product/geolocationGrid/geolocationGridPointList/geolocationGridPoint/latitude", ), "polarization": (scalar, "/product/adsHeader/polarisation"), "line_time_range": ( datetime64_array, '/product/imageAnnotation/imageInformation/*[contains(name(),"LineUtcTime")]', ), "line_size": ( scalar, "/product/imageAnnotation/imageInformation/numberOfLines", ), "sample_size": ( scalar, "/product/imageAnnotation/imageInformation/numberOfSamples", ), "incidence_angle_mid_swath": ( scalar_float, "/product/imageAnnotation/imageInformation/incidenceAngleMidSwath", ), "azimuth_time_interval": ( scalar_float, "/product/imageAnnotation/imageInformation/azimuthTimeInterval", ), "slant_range_time_image": ( scalar_float, "/product/imageAnnotation/imageInformation/slantRangeTime", ), "rangePixelSpacing": ( scalar_float, "/product/imageAnnotation/imageInformation/rangePixelSpacing", ), "azimuthPixelSpacing": ( scalar_float, "/product/imageAnnotation/imageInformation/azimuthPixelSpacing", ), "denoised": ( scalar, "/product/imageAnnotation/processingInformation/thermalNoiseCorrectionPerformed", ), "pol": (scalar, "/product/adsHeader/polarisation"), "pass": (scalar, "/product/generalAnnotation/productInformation/pass"), "platform_heading": ( scalar_float, "/product/generalAnnotation/productInformation/platformHeading", ), "radar_frequency": ( scalar_float, "/product/generalAnnotation/productInformation/radarFrequency", ), "range_sampling_rate": ( scalar_float, "/product/generalAnnotation/productInformation/rangeSamplingRate", ), "azimuth_steering_rate": ( scalar_float, "/product/generalAnnotation/productInformation/azimuthSteeringRate", ), "orbit_time": ( datetime64_array, "//product/generalAnnotation/orbitList/orbit/time", ), "orbit_frame": (np.array, "//product/generalAnnotation/orbitList/orbit/frame"), "orbit_pos_x": ( float_array, "//product/generalAnnotation/orbitList/orbit/position/x", ), "orbit_pos_y": ( float_array, "//product/generalAnnotation/orbitList/orbit/position/y", ), "orbit_pos_z": ( float_array, "//product/generalAnnotation/orbitList/orbit/position/z", ), "orbit_vel_x": ( float_array, "//product/generalAnnotation/orbitList/orbit/velocity/x", ), "orbit_vel_y": ( float_array, "//product/generalAnnotation/orbitList/orbit/velocity/y", ), "orbit_vel_z": ( float_array, "//product/generalAnnotation/orbitList/orbit/velocity/z", ), "number_of_bursts": (scalar_int, "/product/swathTiming/burstList/@count"), "linesPerBurst": (scalar, "/product/swathTiming/linesPerBurst"), "samplesPerBurst": (scalar, "/product/swathTiming/samplesPerBurst"), "all_bursts": (np.array, "//product/swathTiming/burstList/burst"), "burst_azimuthTime": ( datetime64_array, "//product/swathTiming/burstList/burst/azimuthTime", ), "burst_azimuthAnxTime": ( float_array, "//product/swathTiming/burstList/burst/azimuthAnxTime", ), "burst_sensingTime": ( datetime64_array, "//product/swathTiming/burstList/burst/sensingTime", ), "burst_byteOffset": ( np.array, "//product/swathTiming/burstList/burst/byteOffset", ), "burst_firstValidSample": ( float_2Darray_from_string_list, "//product/swathTiming/burstList/burst/firstValidSample", ), "burst_lastValidSample": ( float_2Darray_from_string_list, "//product/swathTiming/burstList/burst/lastValidSample", ), "nb_dcestimate": (scalar_int, "/product/dopplerCentroid/dcEstimateList/@count"), "nb_geoDcPoly": ( scalar_int, "/product/dopplerCentroid/dcEstimateList/dcEstimate[1]/geometryDcPolynomial/@count", ), "nb_dataDcPoly": ( scalar_int, "/product/dopplerCentroid/dcEstimateList/dcEstimate[1]/dataDcPolynomial/@count", ), "nb_fineDce": ( scalar_int, "/product/dopplerCentroid/dcEstimateList/dcEstimate[1]/fineDceList/@count", ), "dc_azimuth_time": ( datetime64_array, "//product/dopplerCentroid/dcEstimateList/dcEstimate/azimuthTime", ), "dc_t0": (np.array, "//product/dopplerCentroid/dcEstimateList/dcEstimate/t0"), "dc_geoDcPoly": ( list_of_float_1D_array_from_string, "//product/dopplerCentroid/dcEstimateList/dcEstimate/geometryDcPolynomial", ), "dc_dataDcPoly": ( list_of_float_1D_array_from_string, "//product/dopplerCentroid/dcEstimateList/dcEstimate/dataDcPolynomial", ), "dc_rmserr": ( np.array, "//product/dopplerCentroid/dcEstimateList/dcEstimate/dataDcRmsError", ), "dc_rmserrAboveThres": ( bool_array, "//product/dopplerCentroid/dcEstimateList/dcEstimate/dataDcRmsErrorAboveThreshold", ), "dc_azstarttime": ( datetime64_array, "//product/dopplerCentroid/dcEstimateList/dcEstimate/fineDceAzimuthStartTime", ), "dc_azstoptime": ( datetime64_array, "//product/dopplerCentroid/dcEstimateList/dcEstimate/fineDceAzimuthStopTime", ), "dc_slantRangeTime": ( float_array, "///product/dopplerCentroid/dcEstimateList/dcEstimate/fineDceList/fineDce/slantRangeTime", ), "dc_frequency": ( float_array, "///product/dopplerCentroid/dcEstimateList/dcEstimate/fineDceList/fineDce/frequency", ), "nb_fmrate": ( scalar_int, "/product/generalAnnotation/azimuthFmRateList/@count", ), "fmrate_azimuthtime": ( datetime64_array, "//product/generalAnnotation/azimuthFmRateList/azimuthFmRate/azimuthTime", ), "fmrate_t0": ( float_array, "//product/generalAnnotation/azimuthFmRateList/azimuthFmRate/t0", ), "fmrate_c0": ( float_array, "//product/generalAnnotation/azimuthFmRateList/azimuthFmRate/c0", ), "fmrate_c1": ( float_array, "//product/generalAnnotation/azimuthFmRateList/azimuthFmRate/c1", ), "fmrate_c2": ( float_array, "//product/generalAnnotation/azimuthFmRateList/azimuthFmRate/c2", ), "fmrate_azimuthFmRatePolynomial": ( list_of_float_1D_array_from_string, "//product/generalAnnotation/azimuthFmRateList/azimuthFmRate/azimuthFmRatePolynomial", ), "ap_azimuthTime": ( datetime64_array, "/product/antennaPattern/antennaPatternList/antennaPattern/azimuthTime", ), "ap_roll": ( float_array, "/product/antennaPattern/antennaPatternList/antennaPattern/roll", ), "ap_swath": ( lambda x: np.array(x), "/product/antennaPattern/antennaPatternList/antennaPattern/swath", ), "ap_elevationAngle": ( list_of_float_1D_array_from_string, "/product/antennaPattern/antennaPatternList/antennaPattern/elevationAngle", ), "ap_incidenceAngle": ( list_of_float_1D_array_from_string, "/product/antennaPattern/antennaPatternList/antennaPattern/incidenceAngle", ), "ap_slantRangeTime": ( list_of_float_1D_array_from_string, "/product/antennaPattern/antennaPatternList/antennaPattern/slantRangeTime", ), "ap_terrainHeight": ( float_array, "/product/antennaPattern/antennaPatternList/antennaPattern/terrainHeight", ), "ap_elevationPattern": ( list_of_float_1D_array_from_string, "/product/antennaPattern/antennaPatternList/antennaPattern/elevationPattern", ), "sm_nbPerSwat": ( int_array, "/product/swathMerging/swathMergeList/swathMerge/swathBoundsList/@count", ), "sm_swath": ( lambda x: np.array(x), "/product/swathMerging/swathMergeList/swathMerge/swath", ), "sm_azimuthTime": ( datetime64_array, "/product/swathMerging/swathMergeList/swathMerge/swathBoundsList/swathBounds/azimuthTime", ), "sm_firstAzimuthLine": ( int_array, "/product/swathMerging/swathMergeList/swathMerge/swathBoundsList/swathBounds/firstAzimuthLine", ), "sm_lastAzimuthLine": ( int_array, "/product/swathMerging/swathMergeList/swathMerge/swathBoundsList/swathBounds/lastAzimuthLine", ), "sm_firstRangeSample": ( int_array, "/product/swathMerging/swathMergeList/swathMerge/swathBoundsList/swathBounds/firstRangeSample", ), "sm_lastRangeSample": ( int_array, "/product/swathMerging/swathMergeList/swathMerge/swathBoundsList/swathBounds/lastRangeSample", ), }, "xsd": { "all": ( str, "/xsd:schema/xsd:complexType/xsd:sequence/xsd:element/xsd:annotation/xsd:documentation", ), "names": (str, "/xsd:schema/xsd:complexType/xsd:sequence/xsd:element/@name"), "sensingtime": ( str, "/xsd:schema/xsd:complexType/xsd:sequence/xsd:element/sensingTime", ), }, } def signal_lut_raw(line, sample, lut_sigma0, lut_gamma0, azimuth_times): ds = xr.Dataset() ds["sigma0_lut"] = xr.DataArray( lut_sigma0, dims=["line", "sample"], coords={"line": line, "sample": sample}, name="sigma0", attrs={"description": "look up table sigma0"}, ) ds["gamma0_lut"] = xr.DataArray( lut_gamma0, dims=["line", "sample"], coords={"line": line, "sample": sample}, name="gamma0", attrs={"description": "look up table gamma0"}, ) ds["azimuthTime"] = xr.DataArray( azimuth_times, dims=["line"], coords={"line": line}, attrs={"description": "azimuth times associated to the signal look up table"}, ) return ds def noise_lut_range_raw(lines, samples, noiseLuts, azimuthTimes): """ Parameters ---------- lines: np.ndarray 1D array of lines. lut is defined at each line samples: list of np.ndarray arrays of samples. list length is same as samples. each array define samples where lut is defined noiseLuts: list of np.ndarray arrays of luts. Same structure as samples. azimuthTimes: np.ndarray 1D array of azimuth dates associated to each lines of the noise range grid Returns ------- """ ds = xr.Dataset() # check that all the noiseLuts vector are the same size in range, in old IPF eg <=2017, there was one +/- 1 point over 634 minimum_pts = 100000 normalized_noise_luts = [] normalized_samples = [] for uu in range(len(noiseLuts)): if len(noiseLuts[uu]) < minimum_pts: minimum_pts = len(noiseLuts[uu]) # reduce to the smaller number of points (knowing that it is quite often that last noise value is zero ) for uu in range(len(noiseLuts)): normalized_noise_luts.append(noiseLuts[uu][0:minimum_pts]) normalized_samples.append(samples[uu][0:minimum_pts]) tmp_noise = np.stack(normalized_noise_luts) ds["noise_lut"] = xr.DataArray( tmp_noise, coords={"line": lines, "sample": samples[0][0:minimum_pts]}, dims=["line", "sample"], ) try: ds["azimuthTime"] = xr.DataArray( azimuthTimes, coords={"line": lines}, dims=["line"] ) except ( ValueError ): # for IPF2.72 for instance there is no azimuthTimes associated to the noise range LUT ds["azimuthTime"] = xr.DataArray( np.ones(len(lines)) * np.nan, coords={"line": lines}, dims=["line"] ) # ds['sample'] = xr.DataArray(np.stack(normalized_samples), coords={'lines': lines, 'sample_index': np.arange(minimum_pts)}, # dims=['lines', 'sample_index']) return ds def noise_lut_azi_raw_grd( line_azi, line_azi_start, line_azi_stop, sample_azi_start, sample_azi_stop, noise_azi_lut, swath, ): ds = xr.Dataset() for ii, swathi in enumerate( swath ): # with 2018 data the noise vector are not the same size -> stacking impossible ds["noise_lut_%s" % swathi] = xr.DataArray( noise_azi_lut[ii], coords={"line": line_azi[ii]}, dims=["line"] ) ds["line_start"] = xr.DataArray( line_azi_start, coords={"swath": swath}, dims=["swath"] ) ds["line_stop"] = xr.DataArray( line_azi_stop, coords={"swath": swath}, dims=["swath"] ) ds["sample_start"] = xr.DataArray( sample_azi_start, coords={"swath": swath}, dims=["swath"] ) ds["sample_stop"] = xr.DataArray( sample_azi_stop, coords={"swath": swath}, dims=["swath"] ) return ds def noise_lut_azi_raw_slc( line_azi, line_azi_start, line_azi_stop, sample_azi_start, sample_azi_stop, noise_azi_lut, swath, ): ds = xr.Dataset() # if 'WV' in mode: # there is no noise in azimuth for WV acquisitions if swath == []: # WV SLC case ds["noise_lut"] = xr.DataArray( 1.0 ) # set noise_azimuth to one to make post steps like noise_azi*noise_range always possible ds["line_start"] = xr.DataArray(line_azi_start, attrs={"swath": swath}) ds["line_stop"] = xr.DataArray(line_azi_stop, attrs={"swath": swath}) ds["sample_start"] = xr.DataArray(sample_azi_start, attrs={"swath": swath}) ds["sample_stop"] = xr.DataArray(sample_azi_stop, attrs={"swath": swath}) else: ds["noise_lut"] = xr.DataArray( noise_azi_lut[0], coords={"line": line_azi[0]}, dims=["line"] ) # only on subswath opened ds["line_start"] = xr.DataArray(line_azi_start[0], attrs={"swath": swath}) ds["line_stop"] = xr.DataArray(line_azi_stop[0], attrs={"swath": swath}) ds["sample_start"] = xr.DataArray(sample_azi_start[0], attrs={"swath": swath}) ds["sample_stop"] = xr.DataArray(sample_azi_stop[0], attrs={"swath": swath}) # ds['noise_lut'] = xr.DataArray(np.stack(noise_azi_lut).T, coords={'line_index': np.arange(len(line_azi[0])), 'swath': swath}, # dims=['line_index', 'swath']) # ds['line'] = xr.DataArray(np.stack(line_azi).T, coords={'line_index': np.arange(len(line_azi[0])), 'swath': swath}, # dims=['line_index', 'swath']) return ds def datetime64_array(dates): """list of datetime to np.datetime64 array""" return np.array([np.datetime64(d) for d in dates]) def df_files(annotation_files, measurement_files, noise_files, calibration_files): # get polarizations and file number from filename pols = [os.path.basename(f).split("-")[3].upper() for f in annotation_files] num = [ int(os.path.splitext(os.path.basename(f))[0].split("-")[8]) for f in annotation_files ] dsid = [os.path.basename(f).split("-")[1].upper() for f in annotation_files] # check that dsid are spatialy uniques (i.e. there is only one dsid per geographic position) # some SAFES like WV, dsid are not uniques ('WV1' and 'WV2') # we want them uniques, and compatibles with gdal sentinel driver (ie 'WV_012') pols_count = len(set(pols)) subds_count = len(annotation_files) // pols_count dsid_count = len(set(dsid)) if dsid_count != subds_count: dsid_rad = dsid[0][:-1] # WV dsid = ["%s_%03d" % (dsid_rad, n) for n in num] assert ( len(set(dsid)) == subds_count ) # probably an unknown mode we need to handle df = pd.DataFrame( { "polarization": pols, "dsid": dsid, "annotation": annotation_files, "measurement": measurement_files, "noise": noise_files, "calibration": calibration_files, }, index=num, ) return df def xsd_files_func(xsd_product_file): """ return a xarray Dataset with path of the different xsd files :param xsd_product: str :return: """ ds = xr.Dataset() ds["xsd_product"] = xarray.DataArray(xsd_product_file) return ds def orbit( time, frame, pos_x, pos_y, pos_z, vel_x, vel_y, vel_z, orbit_pass, platform_heading, return_xarray=True, ): """ Parameters ---------- return_xarray: bool, True-> return a xarray.Dataset, False-> returns a GeoDataFrame Returns ------- geopandas.GeoDataFrame with 'geometry' as position, 'time' as index, 'velocity' as velocity, and 'geocent' as crs. """ if (frame[0] != "Earth Fixed") or (np.unique(frame).size != 1): raise NotImplementedError('All orbit frames must be of type "Earth Fixed"') if return_xarray is False: crs = pyproj.crs.CRS(proj="geocent", ellps="WGS84", datum="WGS84") res = gpd.GeoDataFrame( {"velocity": list(map(Point, zip(vel_x, vel_y, vel_z)))}, geometry=list(map(Point, zip(pos_x, pos_y, pos_z))), crs=crs, index=time, ) else: res = xr.Dataset() res["velocity_x"] = xr.DataArray(vel_x, dims=["time"], coords={"time": time}) res["velocity_y"] = xr.DataArray(vel_y, dims=["time"], coords={"time": time}) res["velocity_z"] = xr.DataArray(vel_z, dims=["time"], coords={"time": time}) res["position_x"] = xr.DataArray(pos_x, dims=["time"], coords={"time": time}) res["position_y"] = xr.DataArray(pos_y, dims=["time"], coords={"time": time}) res["position_z"] = xr.DataArray(pos_z, dims=["time"], coords={"time": time}) res.attrs = { "orbit_pass": orbit_pass, "platform_heading": platform_heading, "frame": frame[0], } return res def azimuth_fmrate(azimuthtime, t0, c0, c1, c2, polynomial): """ decode FM rate information from xml annotations Parameters ---------- azimuthtime t0 c0 c1 c2 polynomial Returns ------- xarray.Dataset containing the polynomial coefficient for each of the FM rate along azimuth time coordinates """ if (np.sum([c.size for c in [c0, c1, c2]]) != 0) and (len(polynomial) == 0): # old IPF annotation polynomial = np.stack([c0, c1, c2], axis=1) res = xr.Dataset() res["t0"] = xr.DataArray( t0, dims=["azimuthTime"], coords={"azimuthTime": azimuthtime}, attrs={"source": xpath_mappings["annotation"]["fmrate_t0"][1]}, ) res["azimuthFmRatePolynomial"] = xr.DataArray( [Polynomial(p) for p in polynomial], dims=["azimuthTime"], coords={"azimuthTime": azimuthtime}, attrs={ "source": xpath_mappings["annotation"]["fmrate_azimuthFmRatePolynomial"][1] }, ) return res def image( product_type, line_time_range, line_size, sample_size, incidence_angle_mid_swath, azimuth_time_interval, slant_range_time_image, azimuthPixelSpacing, rangePixelSpacing, swath_subswath, radar_frequency, range_sampling_rate, azimuth_steering_rate, ): """ Decode attribute describing the SAR image Parameters ---------- product_type: str line_time_range: int line_size: int sample_size: int incidence_angle_mid_swath: float azimuth_time_interval: float [ in seconds] slant_range_time_image: float [ in seconds] azimuthPixelSpacing: int [m] rangePixelSpacing: int [m] swath_subswath: str radar_frequency: float [second-1] range_sampling_rate: float azimuth_steering_rate: float Returns ------- xarray.Dataset """ if product_type == "SLC" or product_type == "SL2": pixel_sample_m = rangePixelSpacing / np.sin( np.radians(incidence_angle_mid_swath) ) else: pixel_sample_m = rangePixelSpacing tmp = { "LineUtcTime": (line_time_range, "line_time_range"), "numberOfLines": (line_size, "line_size"), "numberOfSamples": (sample_size, "sample_size"), "azimuthPixelSpacing": (azimuthPixelSpacing, "azimuthPixelSpacing"), "slantRangePixelSpacing": (rangePixelSpacing, "rangePixelSpacing"), "groundRangePixelSpacing": (pixel_sample_m, "rangePixelSpacing"), "incidenceAngleMidSwath": ( incidence_angle_mid_swath, "incidence_angle_mid_swath", ), "azimuthTimeInterval": (azimuth_time_interval, "azimuth_time_interval"), "slantRangeTime": (slant_range_time_image, "slant_range_time_image"), "swath_subswath": (swath_subswath, "swath_subswath"), "radarFrequency": (radar_frequency, "radar_frequency"), "rangeSamplingRate": (range_sampling_rate, "range_sampling_rate"), "azimuthSteeringRate": (azimuth_steering_rate, "azimuth_steering_rate"), } ds = xr.Dataset() for ke in tmp: ds[ke] = xr.DataArray( tmp[ke][0], attrs={"source": xpath_mappings["annotation"][tmp[ke][1]][1]} ) return ds def bursts( line_per_burst, sample_per_burst, burst_azimuthTime, burst_azimuthAnxTime, burst_sensingTime, burst_byteOffset, burst_firstValidSample, burst_lastValidSample, ): """return burst as an xarray dataset""" da = xr.Dataset() if (line_per_burst == 0) and (sample_per_burst == 0): pass else: # convert to float, so we can use NaN as missing value, instead of -1 burst_firstValidSample = burst_firstValidSample.astype(float) burst_lastValidSample = burst_lastValidSample.astype(float) burst_firstValidSample[burst_firstValidSample == -1] = np.nan burst_lastValidSample[burst_lastValidSample == -1] = np.nan da = xr.Dataset( { "azimuthTime": ("burst", burst_azimuthTime), "azimuthAnxTime": ("burst", burst_azimuthAnxTime), "sensingTime": ("burst", burst_sensingTime), "byteOffset": ("burst", burst_byteOffset), "firstValidSample": (["burst", "line"], burst_firstValidSample), "lastValidSample": (["burst", "line"], burst_lastValidSample), # 'valid_location': xr.DataArray(dims=['burst', 'limits'], data=valid_locations, # attrs={ # 'description': 'start line index, start sample index, stop line index, stop sample index'}), } ) da["azimuthTime"].attrs = { "source": xpath_mappings["annotation"]["burst_azimuthTime"][1] } da["azimuthAnxTime"].attrs = { "source": xpath_mappings["annotation"]["burst_azimuthAnxTime"][1] } da["sensingTime"].attrs = { "source": xpath_mappings["annotation"]["burst_sensingTime"][1] } da["byteOffset"].attrs = { "source": xpath_mappings["annotation"]["burst_byteOffset"][1] } da["firstValidSample"].attrs = { "source": xpath_mappings["annotation"]["burst_firstValidSample"][1] } da["lastValidSample"].attrs = { "source": xpath_mappings["annotation"]["burst_lastValidSample"][1] } # da['valid_location'].attrs = {'source': xpath_mappings['annotation']['burst_firstValidSample'][1]+'\n'+xpath_mappings['annotation']['burst_lastValidSample'][1]} da["linesPerBurst"] = xr.DataArray( line_per_burst, attrs={"source": xpath_mappings["annotation"]["linesPerBurst"][1]}, ) da["samplesPerBurst"] = xr.DataArray( sample_per_burst, attrs={"source": xpath_mappings["annotation"]["samplesPerBurst"][1]}, ) return da def bursts_grd(line_per_burst, sample_per_burst): """return burst as an xarray dataset""" da = xr.Dataset({"azimuthTime": ("burst", [])}) da["linesPerBurst"] = xr.DataArray(line_per_burst) da["samplesPerBurst"] = xr.DataArray(sample_per_burst) return da def doppler_centroid_estimates( nb_dcestimate, nb_fineDce, dc_azimuth_time, dc_t0, dc_geoDcPoly, dc_dataDcPoly, dc_rmserr, dc_rmserrAboveThres, dc_azstarttime, dc_azstoptime, dc_slantRangeTime, dc_frequency, ): """ decoding Doppler Centroid estimates information from xml annotation files Parameters ---------- nb_dcestimate nb_geoDcPoly nb_dataDcPoly nb_fineDce dc_azimuth_time dc_t0 dc_geoDcPoly dc_dataDcPoly dc_rmserr dc_rmserrAboveThres dc_azstarttime dc_azstoptime dc_slantRangeTime dc_frequency Returns ------- """ ds = xr.Dataset() ds["t0"] = xr.DataArray( dc_t0.astype(float), dims=["azimuthTime"], attrs={"source": xpath_mappings["annotation"]["dc_t0"][1]}, coords={"azimuthTime": dc_azimuth_time}, ) ds["geometryDcPolynomial"] = xr.DataArray( [Polynomial(p) for p in dc_geoDcPoly], dims=["azimuthTime"], attrs={"source": xpath_mappings["annotation"]["dc_geoDcPoly"][1]}, coords={"azimuthTime": dc_azimuth_time}, ) ds["dataDcPolynomial"] = xr.DataArray( [Polynomial(p) for p in dc_dataDcPoly], dims=["azimuthTime"], attrs={"source": xpath_mappings["annotation"]["dc_dataDcPoly"][1]}, coords={"azimuthTime": dc_azimuth_time}, ) dims = (nb_dcestimate, nb_fineDce) ds["azimuthTime"].attrs = { "source": xpath_mappings["annotation"]["dc_azimuth_time"][1] } ds["fineDceAzimuthStartTime"] = xr.DataArray( dc_azstarttime, dims=["azimuthTime"], attrs={"source": xpath_mappings["annotation"]["dc_azstarttime"][1]}, coords={"azimuthTime": dc_azimuth_time}, ) ds["fineDceAzimuthStopTime"] = xr.DataArray( dc_azstoptime, dims=["azimuthTime"], attrs={"source": xpath_mappings["annotation"]["dc_azstoptime"][1]}, coords={"azimuthTime": dc_azimuth_time}, ) ds["dataDcRmsError"] = xr.DataArray( dc_rmserr.astype(float), dims=["azimuthTime"], attrs={"source": xpath_mappings["annotation"]["dc_rmserr"][1]}, coords={"azimuthTime": dc_azimuth_time}, ) ds["slantRangeTime"] = xr.DataArray( dc_slantRangeTime.reshape(dims), dims=["azimuthTime", "nb_fine_dce"], attrs={"source": xpath_mappings["annotation"]["dc_slantRangeTime"][1]}, coords={"azimuthTime": dc_azimuth_time, "nb_fine_dce": np.arange(nb_fineDce)}, ) ds["frequency"] = xr.DataArray( dc_frequency.reshape(dims), dims=["azimuthTime", "nb_fine_dce"], attrs={"source": xpath_mappings["annotation"]["dc_frequency"][1]}, coords={"azimuthTime": dc_azimuth_time, "nb_fine_dce": np.arange(nb_fineDce)}, ) ds["dataDcRmsErrorAboveThreshold"] = xr.DataArray( dc_rmserrAboveThres, dims=["azimuthTime"], attrs={"source": xpath_mappings["annotation"]["dc_rmserrAboveThres"][1]}, coords={"azimuthTime": dc_azimuth_time}, ) return ds def geolocation_grid(line, sample, values): """ Parameters ---------- line: np.ndarray 1D array of line dimension sample: np.ndarray Returns ------- xarray.DataArray with line and sample coordinates, and values as 2D """ shape = (line.size, sample.size) values = np.reshape(values, shape) return xr.DataArray( values, dims=["line", "sample"], coords={"line": line, "sample": sample} ) def antenna_pattern( ap_swath, ap_roll, ap_azimuthTime, ap_terrainHeight, ap_elevationAngle, ap_elevationPattern, ap_incidenceAngle, ap_slantRangeTime, ): """ Parameters ---------- ap_swath ap_roll ap_azimuthTime ap_terrainHeight ap_elevationAngle ap_elevationPattern ap_incidenceAngle ap_slantRangeTime Returns ------- xarray.DataSet """ # Fonction to convert string 'EW1' ou 'IW3' as int def convert_to_int(swath): return int(swath[-1]) vectorized_convert = np.vectorize(convert_to_int) swathNumber = vectorized_convert(ap_swath) dim_azimuthTime = max(np.bincount(swathNumber)) dim_slantRangeTime = max(array.shape[0] for array in ap_elevationAngle) include_roll = len(ap_roll) != 0 # Create 2Ds arrays elevAngle2d = np.full((len(ap_elevationAngle), dim_slantRangeTime), np.nan) gain2d = np.full((len(ap_elevationPattern), dim_slantRangeTime), np.nan) slantRangeTime2d = np.full((len(ap_slantRangeTime), dim_slantRangeTime), np.nan) incAngle2d = np.full((len(ap_incidenceAngle), dim_slantRangeTime), np.nan) for i in range(len(ap_elevationAngle)): elevAngle2d[i, : ap_elevationAngle[i].shape[0]] = ap_elevationAngle[i] if ap_elevationAngle[i].shape[0] != ap_elevationPattern[i].shape[0]: gain2d[i, : ap_elevationAngle[i].shape[0]] = np.sqrt( ap_elevationPattern[i][::2] ** 2 + ap_elevationPattern[i][1::2] ** 2 ) else: # logging.warn("antenna pattern is not given in complex values. You probably use an old file\n" + e) gain2d[i, : ap_elevationAngle[i].shape[0]] = ap_elevationPattern[i] slantRangeTime2d[i, : ap_slantRangeTime[i].shape[0]] = ap_slantRangeTime[i] incAngle2d[i, : ap_incidenceAngle[i].shape[0]] = ap_incidenceAngle[i] swath_number_2d = np.full((len(np.unique(swathNumber)), dim_azimuthTime), np.nan) roll_angle_2d = np.full((len(np.unique(swathNumber)), dim_azimuthTime), np.nan) azimuthTime_2d = np.full((len(np.unique(swathNumber)), dim_azimuthTime), np.nan) terrainHeight_2d = np.full((len(np.unique(swathNumber)), dim_azimuthTime), np.nan) slantRangeTime_2d = np.full( (len(np.unique(swathNumber)), dim_slantRangeTime), np.nan ) elevationAngle_3d = np.full( (len(np.unique(swathNumber)), dim_azimuthTime, dim_slantRangeTime), np.nan ) incidenceAngle_3d = np.full( (len(np.unique(swathNumber)), dim_azimuthTime, dim_slantRangeTime), np.nan ) gain3d = np.full( (len(np.unique(swathNumber)), dim_azimuthTime, dim_slantRangeTime), np.nan ) for i, swath_number in enumerate(np.unique(swathNumber)): length_dim0 = len(ap_azimuthTime[swathNumber == swath_number]) swath_number_2d[i, :length_dim0] = swathNumber[swathNumber == swath_number] azimuthTime_2d[i, :length_dim0] = ap_azimuthTime[swathNumber == swath_number] terrainHeight_2d[i, :length_dim0] = ap_terrainHeight[ swathNumber == swath_number ] slantRangeTime_2d[i, :] = slantRangeTime2d[i, :] if include_roll: roll_angle_2d[i, :length_dim0] = ap_roll[swathNumber == swath_number] for j in range(0, dim_slantRangeTime): elevationAngle_3d[i, :length_dim0, j] = elevAngle2d[ swathNumber == swath_number, j ] incidenceAngle_3d[i, :length_dim0, j] = incAngle2d[ swathNumber == swath_number, j ] gain3d[i, :length_dim0, j] = gain2d[swathNumber == swath_number, j] azimuthTime_2d = azimuthTime_2d.astype("datetime64[ns]") # return a Dataset ds = xr.Dataset( { "slantRangeTime": (["swath_nb", "dim_slantRangeTime"], slantRangeTime_2d), "swath": (["swath_nb", "dim_azimuthTime"], swath_number_2d), "roll": (["swath_nb", "dim_azimuthTime"], roll_angle_2d), "azimuthTime": (["swath_nb", "dim_azimuthTime"], azimuthTime_2d), "terrainHeight": (["swath_nb", "dim_azimuthTime"], terrainHeight_2d), "elevationAngle": ( ["swath_nb", "dim_azimuthTime", "dim_slantRangeTime"], elevationAngle_3d, ), "incidenceAngle": ( ["swath_nb", "dim_azimuthTime", "dim_slantRangeTime"], incidenceAngle_3d, ), "gain": (["swath_nb", "dim_azimuthTime", "dim_slantRangeTime"], gain3d), }, coords={"swath_nb": np.unique(swathNumber)}, ) ds.attrs["dim_azimuthTime"] = "max dimension of azimuthTime for a swath" ds.attrs["dim_slantRangeTime"] = "max dimension of slantRangeTime for a swath" ds.attrs[ "comment" ] = "The antenna pattern data set record contains a list of vectors of the \ antenna elevation pattern values that have been updated along track\ and used to correct the radiometry during image processing." ds.attrs[ "example" ] = "for example, if swath Y is smaller than swath X, user has to remove nan to get the dims of the swath" ds.attrs["source"] = "Sentinel-1 Product Specification" return ds def swath_merging( sm_swath, sm_nbPerSwat, sm_azimuthTime, sm_firstAzimuthLine, sm_lastAzimuthLine, sm_firstRangeSample, sm_lastRangeSample, ): """ Parameters ---------- sm_swath sm_nbPerSwat sm_azimuthTime sm_firstAzimuthLine sm_lastAzimuthLine sm_firstRangeSample sm_lastRangeSample Returns ------- xarray.DataSet """ # Fonction to convert string 'EW1' ou 'IW3' as int def convert_to_int(swath): return int(swath[-1]) vectorized_convert = np.vectorize(convert_to_int) repeated_swaths = np.repeat(sm_swath, sm_nbPerSwat) swathNumber = vectorized_convert(repeated_swaths) ds = xr.Dataset( { "swaths": (["dim_azimuthTime"], swathNumber), "azimuthTime": (["dim_azimuthTime"], sm_azimuthTime), "firstAzimuthLine": (["dim_azimuthTime"], sm_firstAzimuthLine), "lastAzimuthLine": (["dim_azimuthTime"], sm_lastAzimuthLine), "firstRangeSample": (["dim_azimuthTime"], sm_firstRangeSample), "lastRangeSample": (["dim_azimuthTime"], sm_lastRangeSample), }, ) ds.attrs[ "comment" ] = "The swath merging data set record contains information about how \ multiple swaths were stitched together to form one large contiguous \ swath. This data set record only applies to IW and EW GRD \ products" ds.attrs["source"] = "Sentinel-1 Product Specification" return ds # dict of compounds variables. # compounds variables are variables composed of several variables. # the key is the variable name, and the value is a python structure, # where leaves are jmespath in xpath_mappings compounds_vars = { "safe_attributes_slcgrd": { "ipf_version": "manifest.ipf_version", "swath_type": "manifest.swath_type", "polarizations": "manifest.polarizations", "product_type": "manifest.product_type", "mission": "manifest.mission", "satellite": "manifest.satellite", "start_date": "manifest.start_date", "stop_date": "manifest.stop_date", "footprints": "manifest.footprints", "aux_cal": "manifest.aux_cal", "aux_pp1": "manifest.aux_pp1", "aux_ins": "manifest.aux_ins", "icid": "manifest.instrument_configuration_id", }, "safe_attributes_sl2": { "ipf_version": "manifest.ipf_version", "swath_type": "manifest.swath_type", "polarizations": "manifest.polarizations", "product_type": "manifest.product_type", "mission": "manifest.mission", "satellite": "manifest.satellite", "start_date": "manifest.start_date", "stop_date": "manifest.stop_date", "footprints": "manifest.footprints", "aux_cal_sl2": "manifest.aux_cal_sl2", }, "files": { "func": df_files, "args": ( "manifest.annotation_files", "manifest.measurement_files", "manifest.noise_files", "manifest.calibration_files", ), }, "xsd_files": {"func": xsd_files_func, "args": ("manifest.xsd_product_file",)}, "luts_raw": { "func": signal_lut_raw, "args": ( "calibration.line", "calibration.sample", "calibration.sigma0_lut", "calibration.gamma0_lut", "calibration.azimuthTime", ), }, "noise_lut_range_raw": { "func": noise_lut_range_raw, "args": ( "noise.range.line", "noise.range.sample", "noise.range.noiseLut", "noise.range.azimuthTime", ), }, "noise_lut_azi_raw_grd": { "func": noise_lut_azi_raw_grd, "args": ( "noise.azi.line", "noise.azi.line_start", "noise.azi.line_stop", "noise.azi.sample_start", "noise.azi.sample_stop", "noise.azi.noiseLut", "noise.azi.swath", ), }, "noise_lut_azi_raw_slc": { "func": noise_lut_azi_raw_slc, "args": ( "noise.azi.line", "noise.azi.line_start", "noise.azi.line_stop", "noise.azi.sample_start", "noise.azi.sample_stop", "noise.azi.noiseLut", "noise.azi.swath", ), }, "denoised": ("annotation.pol", "annotation.denoised"), "incidenceAngle": { "func": geolocation_grid, "args": ("annotation.line", "annotation.sample", "annotation.incidenceAngle"), }, "elevationAngle": { "func": geolocation_grid, "args": ("annotation.line", "annotation.sample", "annotation.elevationAngle"), }, "longitude": { "func": geolocation_grid, "args": ("annotation.line", "annotation.sample", "annotation.longitude"), }, "latitude": { "func": geolocation_grid, "args": ("annotation.line", "annotation.sample", "annotation.latitude"), }, "height": { "func": geolocation_grid, "args": ("annotation.line", "annotation.sample", "annotation.height"), }, "azimuthTime": { "func": geolocation_grid, "args": ("annotation.line", "annotation.sample", "annotation.azimuthTime"), }, "slantRangeTime": { "func": geolocation_grid, "args": ("annotation.line", "annotation.sample", "annotation.slantRangeTime"), }, "bursts": { "func": bursts, "args": ( "annotation.linesPerBurst", "annotation.samplesPerBurst", "annotation.burst_azimuthTime", "annotation.burst_azimuthAnxTime", "annotation.burst_sensingTime", "annotation.burst_byteOffset", "annotation.burst_firstValidSample", "annotation.burst_lastValidSample", ), }, "bursts_grd": { "func": bursts_grd, "args": ( "annotation.linesPerBurst", "annotation.samplesPerBurst", ), }, "orbit": { "func": orbit, "args": ( "annotation.orbit_time", "annotation.orbit_frame", "annotation.orbit_pos_x", "annotation.orbit_pos_y", "annotation.orbit_pos_z", "annotation.orbit_vel_x", "annotation.orbit_vel_y", "annotation.orbit_vel_z", "annotation.pass", "annotation.platform_heading", ), }, "image": { "func": image, "args": ( "annotation.product_type", "annotation.line_time_range", "annotation.line_size", "annotation.sample_size", "annotation.incidence_angle_mid_swath", "annotation.azimuth_time_interval", "annotation.slant_range_time_image", "annotation.azimuthPixelSpacing", "annotation.rangePixelSpacing", "annotation.swath_subswath", "annotation.radar_frequency", "annotation.range_sampling_rate", "annotation.azimuth_steering_rate", ), }, "azimuth_fmrate": { "func": azimuth_fmrate, "args": ( "annotation.fmrate_azimuthtime", "annotation.fmrate_t0", "annotation.fmrate_c0", "annotation.fmrate_c1", "annotation.fmrate_c2", "annotation.fmrate_azimuthFmRatePolynomial", ), }, "doppler_estimate": { "func": doppler_centroid_estimates, "args": ( "annotation.nb_dcestimate", "annotation.nb_fineDce", "annotation.dc_azimuth_time", "annotation.dc_t0", "annotation.dc_geoDcPoly", "annotation.dc_dataDcPoly", "annotation.dc_rmserr", "annotation.dc_rmserrAboveThres", "annotation.dc_azstarttime", "annotation.dc_azstoptime", "annotation.dc_slantRangeTime", "annotation.dc_frequency", ), }, "antenna_pattern": { "func": antenna_pattern, "args": ( "annotation.ap_swath", "annotation.ap_roll", "annotation.ap_azimuthTime", "annotation.ap_terrainHeight", "annotation.ap_elevationAngle", "annotation.ap_elevationPattern", "annotation.ap_incidenceAngle", "annotation.ap_slantRangeTime", ), }, "swath_merging": { "func": swath_merging, "args": ( "annotation.sm_swath", "annotation.sm_nbPerSwat", "annotation.sm_azimuthTime", "annotation.sm_firstAzimuthLine", "annotation.sm_lastAzimuthLine", "annotation.sm_firstRangeSample", "annotation.sm_lastRangeSample", ), }, } xarray-safe-s1-2024.11.28/safe_s1/xml_parser.py000066400000000000000000000126561472207407300207450ustar00rootroot00000000000000import logging import re from collections.abc import Iterable from io import BytesIO import jmespath import yaml from lxml import objectify logger = logging.getLogger("xsar.xml_parser") logger.addHandler(logging.NullHandler()) # TODO: no variable caching is not while https://github.com/dask/distributed/issues/5610 is not solved class XmlParser: """ Parameters ---------- xpath_mappings: dict first level key is xml file type second level key is variable name to be created mappings may be 'xpath', or 'tuple(func,xpath)', or 'dict' - xpath is an lxml xpath - func is a decoder function fed by xpath - dict is a nested dict with same structure, to create more hierarchy levels. compounds_vars: dict compounds variables are variables composed of several variables taken from xpath_mappings the key is the variable name, and the value is a tuple or a dict. if dict: (key, jpath), where key is the sub variable name to create, and jpath is a jmespath in xpath_mappings. if tuple: ( func, iterable ), where func(iterable) will be called to convert the iterable to another object. iterable values are jpath. if iterable is a tuple, func(*iterable) will be called. namespaces: dict xml namespaces, passed to lxml.xpath. namespaces are mutualised between all handled xml files. """ def __init__(self, mapper, xpath_mappings={}, compounds_vars={}, namespaces={}): self._namespaces = namespaces self._xpath_mappings = xpath_mappings self._compounds_vars = compounds_vars self._mapper = mapper def __del__(self): logger.debug("__del__ XmlParser") def getroot(self, xml_file): """return xml root object from xml_file. (also update self._namespaces with fetched ones)""" raw_data = self._mapper[xml_file] # xml_root = etree.fromstring(raw_data) xml_root = objectify.parse(BytesIO(raw_data)).getroot() self._namespaces.update(xml_root.nsmap) return xml_root def xpath(self, xml_file, path): """ get path from xml_file. this is a simple wrapper for `objectify.parse(xml_file).getroot().xpath(path)` """ xml_root = self.getroot(xml_file) result = [ getattr(e, "pyval", e) for e in xml_root.xpath(path, namespaces=self._namespaces) ] return result def get_var(self, xml_file, jpath, describe=False): """ get simple variable in xml_file. Parameters ---------- xml_file: str xml filename jpath: str jmespath string reaching xpath in xpath_mappings describe: bool If True, describe the variable (ie return xpath used) Returns ------- object xpath list, or decoded object, if a conversion function was specified in xpath_mappings """ func = None xpath = jmespath.search(jpath, self._xpath_mappings) if xpath is None: raise KeyError('jmespath "%s" not found in xpath_mappings' % jpath) if isinstance(xpath, tuple) and callable(xpath[0]): func, xpath = xpath if describe: return xpath if not isinstance(xpath, str): raise NotImplementedError( 'Non leaf xpath of type "%s" instead of str' % type(xpath).__name__ ) result = self.xpath(xml_file, xpath) if func is not None: result = func(result) return result def get_compound_var(self, xml_file, var_name, describe=False): """ Parameters ---------- var_name: str key in self._compounds_vars xml_file: str xml_file to use. describe: bool If True, only returns a string describing the variable (file, xpath, etc...) Returns ------- object """ if describe: # keep only informative parts in filename # sub SAFE path minifile = re.sub(".*SAFE/", "", xml_file) minifile = re.sub(r"-.*\.xml", ".xml", minifile) var_object = self._compounds_vars[var_name] func = None if ( isinstance(var_object, dict) and "func" in var_object and callable(var_object["func"]) ): func = var_object["func"] if isinstance(var_object["args"], tuple): args = var_object["args"] else: raise ValueError("args must be a tuple when func is called") else: args = var_object result = None if isinstance(args, dict): result = {} for key, path in args.items(): result[key] = self.get_var(xml_file, path, describe=describe) elif isinstance(args, Iterable): result = [self.get_var(xml_file, p, describe=describe) for p in args] if isinstance(args, tuple): result = tuple(result) if func is not None and not describe: # apply converter result = func(*result) if describe: if isinstance(result, dict): result = result.values() description = yaml.safe_dump({var_name: {minifile: result}}) return description else: return result xarray-safe-s1-2024.11.28/test/000077500000000000000000000000001472207407300156435ustar00rootroot00000000000000xarray-safe-s1-2024.11.28/test/test_s1reader.py000066400000000000000000000015731472207407300207700ustar00rootroot00000000000000from safe_s1 import sentinel1_xml_mappings, Sentinel1Reader, getconfig import logging logging.basicConfig() logging.captureWarnings(True) logger = logging.getLogger('s1_reader_test') logger.setLevel(logging.DEBUG) conf = getconfig.get_config() products = [sentinel1_xml_mappings.get_test_file(filename) for filename in conf['product_paths']] # Try to apply the reader on different products def test_reader(): try: for product in products: reader = Sentinel1Reader(product) # When a product is a multidataset, datatree is none, so we want to be sure that the datatree isn't empty # (selecting a dataset) sub_reader = Sentinel1Reader(reader.datasets_names[0]) dt = sub_reader.datatree for ds in dt: dt[ds].to_dataset().compute() assert True except: assert False