pax_global_header00006660000000000000000000000064145525765030014526gustar00rootroot0000000000000052 comment=03b0a8c04248ae2bb13ab8c4b832a9f406e0d156 datatree-0.0.14/000077500000000000000000000000001455257650300134015ustar00rootroot00000000000000datatree-0.0.14/.flake8000066400000000000000000000005161455257650300145560ustar00rootroot00000000000000[flake8] ignore = # whitespace before ':' - doesn't work well with black E203 # module level import not at top of file E402 # line too long - let black worry about that E501 # do not assign a lambda expression, use a def E731 # line break before binary operator W503 exclude= .eggs doc datatree-0.0.14/.git_archival.txt000066400000000000000000000001571455257650300166570ustar00rootroot00000000000000node: $Format:%H$ node-date: $Format:%cI$ describe-name: $Format:%(describe:tags=true)$ ref-names: $Format:%D$ datatree-0.0.14/.github/000077500000000000000000000000001455257650300147415ustar00rootroot00000000000000datatree-0.0.14/.github/dependabot.yml000066400000000000000000000004011455257650300175640ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: pip directory: "/" schedule: interval: daily - package-ecosystem: "github-actions" directory: "/" schedule: # Check for updates to GitHub Actions every weekday interval: "daily" datatree-0.0.14/.github/pull_request_template.md000066400000000000000000000004151455257650300217020ustar00rootroot00000000000000 - [ ] Closes #xxxx - [ ] Tests added - [ ] Passes `pre-commit run --all-files` - [ ] New functions/methods are listed in `api.rst` - [ ] Changes are summarized in `docs/source/whats-new.rst` datatree-0.0.14/.github/workflows/000077500000000000000000000000001455257650300167765ustar00rootroot00000000000000datatree-0.0.14/.github/workflows/main.yaml000066400000000000000000000044061455257650300206120ustar00rootroot00000000000000name: CI on: push: branches: - main pull_request: branches: - main schedule: - cron: "0 0 * * *" jobs: test: name: ${{ matrix.python-version }}-build runs-on: ubuntu-latest defaults: run: shell: bash -l {0} strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 - name: Create conda environment uses: mamba-org/provision-with-micromamba@main with: cache-downloads: true micromamba-version: 'latest' environment-file: ci/environment.yml extra-specs: | python=${{ matrix.python-version }} - name: Conda info run: conda info - name: Install datatree run: | python -m pip install -e . --no-deps --force-reinstall - name: Conda list run: conda list - name: Running Tests run: | python -m pytest --cov=./ --cov-report=xml --verbose - name: Upload code coverage to Codecov uses: codecov/codecov-action@v3.1.4 with: file: ./coverage.xml flags: unittests env_vars: OS,PYTHON name: codecov-umbrella fail_ci_if_error: false test-upstream: name: ${{ matrix.python-version }}-dev-build runs-on: ubuntu-latest defaults: run: shell: bash -l {0} strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 - name: Create conda environment uses: mamba-org/provision-with-micromamba@main with: cache-downloads: true micromamba-version: 'latest' environment-file: ci/environment.yml extra-specs: | python=${{ matrix.python-version }} - name: Conda info run: conda info - name: Install dev reqs run: | python -m pip install --no-deps --upgrade \ git+https://github.com/pydata/xarray \ git+https://github.com/Unidata/netcdf4-python python -m pip install -e . --no-deps --force-reinstall - name: Conda list run: conda list - name: Running Tests run: | python -m pytest --verbose datatree-0.0.14/.github/workflows/pypipublish.yaml000066400000000000000000000037411455257650300222370ustar00rootroot00000000000000name: Build distribution on: release: types: - published push: branches: - main pull_request: branches: - main concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: build-artifacts: runs-on: ubuntu-latest if: github.repository == 'xarray-contrib/datatree' steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/setup-python@v5 name: Install Python with: python-version: 3.9 - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install build - name: Build tarball and wheels run: | git clean -xdf git restore -SW . python -m build --sdist --wheel . - uses: actions/upload-artifact@v4 with: name: releases path: dist test-built-dist: needs: build-artifacts runs-on: ubuntu-latest steps: - uses: actions/setup-python@v5 name: Install Python with: python-version: '3.10' - uses: actions/download-artifact@v4 with: name: releases path: dist - name: List contents of built dist run: | ls -ltrh ls -ltrh dist - name: Verify the built dist/wheel is valid run: | python -m pip install --upgrade pip python -m pip install dist/xarray_datatree*.whl python -c "import datatree; print(datatree.__version__)" upload-to-pypi: needs: test-built-dist if: github.event_name == 'release' runs-on: ubuntu-latest steps: - uses: actions/download-artifact@v4 with: name: releases path: dist - name: Publish package to PyPI uses: pypa/gh-action-pypi-publish@v1.8.11 with: user: ${{ secrets.PYPI_USERNAME }} password: ${{ secrets.PYPI_PASSWORD }} verbose: true datatree-0.0.14/.gitignore000066400000000000000000000035401455257650300153730ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ docs/source/generated # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # version _version.py # Ignore vscode specific settings .vscode/ datatree-0.0.14/.pre-commit-config.yaml000066400000000000000000000031531455257650300176640ustar00rootroot00000000000000# https://pre-commit.com/ ci: autoupdate_schedule: monthly repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml # isort should run before black as black sometimes tweaks the isort output - repo: https://github.com/PyCQA/isort rev: 5.13.2 hooks: - id: isort # https://github.com/python/black#version-control-integration - repo: https://github.com/psf/black rev: 23.12.1 hooks: - id: black - repo: https://github.com/keewis/blackdoc rev: v0.3.9 hooks: - id: blackdoc - repo: https://github.com/PyCQA/flake8 rev: 6.1.0 hooks: - id: flake8 # - repo: https://github.com/Carreau/velin # rev: 0.0.8 # hooks: # - id: velin # args: ["--write", "--compact"] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.8.0 hooks: - id: mypy # Copied from setup.cfg exclude: "properties|asv_bench|docs" additional_dependencies: [ # Type stubs types-python-dateutil, types-pkg_resources, types-PyYAML, types-pytz, # Dependencies that are typed numpy, typing-extensions>=4.1.0, ] # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194 # - repo: https://github.com/asottile/pyupgrade # rev: v1.22.1 # hooks: # - id: pyupgrade # args: # - "--py3-only" # # remove on f-strings in Py3.7 # - "--keep-percent-format" datatree-0.0.14/LICENSE000066400000000000000000000261371455257650300144170ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright (c) 2022 onwards, datatree developers Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. datatree-0.0.14/README.md000066400000000000000000000135541455257650300146700ustar00rootroot00000000000000# datatree | CI | [![GitHub Workflow Status][github-ci-badge]][github-ci-link] [![Code Coverage Status][codecov-badge]][codecov-link] [![pre-commit.ci status][pre-commit.ci-badge]][pre-commit.ci-link] | | :---------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | | **Docs** | [![Documentation Status][rtd-badge]][rtd-link] | | **Package** | [![Conda][conda-badge]][conda-link] [![PyPI][pypi-badge]][pypi-link] | | **License** | [![License][license-badge]][repo-link] | **Datatree is a prototype implementation of a tree-like hierarchical data structure for xarray.** Datatree was born after the xarray team recognised a [need for a new hierarchical data structure](https://github.com/pydata/xarray/issues/4118), that was more flexible than a single `xarray.Dataset` object. The initial motivation was to represent netCDF files / Zarr stores with multiple nested groups in a single in-memory object, but `datatree.DataTree` objects have many other uses. ### Installation You can install datatree via pip: ```shell pip install xarray-datatree ``` or via conda-forge ```shell conda install -c conda-forge xarray-datatree ``` ### Why Datatree? You might want to use datatree for: - Organising many related datasets, e.g. results of the same experiment with different parameters, or simulations of the same system using different models, - Analysing similar data at multiple resolutions simultaneously, such as when doing a convergence study, - Comparing heterogenous but related data, such as experimental and theoretical data, - I/O with nested data formats such as netCDF / Zarr groups. [**Talk slides on Datatree from AMS-python 2023**](https://speakerdeck.com/tomnicholas/xarray-datatree-hierarchical-data-structures-for-multi-model-science) ### Features The approach used here is based on benbovy's [`DatasetNode` example](https://gist.github.com/benbovy/92e7c76220af1aaa4b3a0b65374e233a) - the basic idea is that each tree node wraps a up to a single `xarray.Dataset`. The differences are that this effort: - Uses a node structure inspired by [anytree](https://github.com/xarray-contrib/datatree/issues/7) for the tree, - Implements path-like getting and setting, - Has functions for mapping user-supplied functions over every node in the tree, - Automatically dispatches *some* of `xarray.Dataset`'s API over every node in the tree (such as `.isel`), - Has a bunch of tests, - Has a printable representation that currently looks like this: drawing ### Get Started You can create a `DataTree` object in 3 ways: 1) Load from a netCDF file (or Zarr store) that has groups via `open_datatree()`. 2) Using the init method of `DataTree`, which creates an individual node. You can then specify the nodes' relationships to one other, either by setting `.parent` and `.children` attributes, or through `__get/setitem__` access, e.g. `dt['path/to/node'] = DataTree()`. 3) Create a tree from a dictionary of paths to datasets using `DataTree.from_dict()`. ### Development Roadmap Datatree currently lives in a separate repository to the main xarray package. This allows the datatree developers to make changes to it, experiment, and improve it faster. Eventually we plan to fully integrate datatree upstream into xarray's main codebase, at which point the [github.com/xarray-contrib/datatree](https://github.com/xarray-contrib/datatree>) repository will be archived. This should not cause much disruption to code that depends on datatree - you will likely only have to change the import line (i.e. from ``from datatree import DataTree`` to ``from xarray import DataTree``). However, until this full integration occurs, datatree's API should not be considered to have the same [level of stability as xarray's](https://docs.xarray.dev/en/stable/contributing.html#backwards-compatibility). ### User Feedback We really really really want to hear your opinions on datatree! At this point in development, user feedback is critical to help us create something that will suit everyone's needs. Please raise any thoughts, issues, suggestions or bugs, no matter how small or large, on the [github issue tracker](https://github.com/xarray-contrib/datatree/issues). [github-ci-badge]: https://img.shields.io/github/actions/workflow/status/xarray-contrib/datatree/main.yaml?branch=main&label=CI&logo=github [github-ci-link]: https://github.com/xarray-contrib/datatree/actions?query=workflow%3ACI [codecov-badge]: https://img.shields.io/codecov/c/github/xarray-contrib/datatree.svg?logo=codecov [codecov-link]: https://codecov.io/gh/xarray-contrib/datatree [rtd-badge]: https://img.shields.io/readthedocs/xarray-datatree/latest.svg [rtd-link]: https://xarray-datatree.readthedocs.io/en/latest/?badge=latest [pypi-badge]: https://img.shields.io/pypi/v/xarray-datatree?logo=pypi [pypi-link]: https://pypi.org/project/xarray-datatree [conda-badge]: https://img.shields.io/conda/vn/conda-forge/xarray-datatree?logo=anaconda [conda-link]: https://anaconda.org/conda-forge/xarray-datatree [license-badge]: https://img.shields.io/github/license/xarray-contrib/datatree [repo-link]: https://github.com/xarray-contrib/datatree [pre-commit.ci-badge]: https://results.pre-commit.ci/badge/github/xarray-contrib/datatree/main.svg [pre-commit.ci-link]: https://results.pre-commit.ci/latest/github/xarray-contrib/datatree/main datatree-0.0.14/ci/000077500000000000000000000000001455257650300137745ustar00rootroot00000000000000datatree-0.0.14/ci/doc.yml000066400000000000000000000006671455257650300152750ustar00rootroot00000000000000name: datatree-doc channels: - conda-forge dependencies: - pip - python>=3.9 - netcdf4 - scipy - sphinx>=4.2.0 - sphinx-copybutton - sphinx-panels - sphinx-autosummary-accessors - sphinx-book-theme >= 0.0.38 - nbsphinx - sphinxcontrib-srclinks - pickleshare - pydata-sphinx-theme>=0.4.3 - ipython - h5netcdf - zarr - xarray - pip: - -e .. - sphinxext-rediraffe - sphinxext-opengraph datatree-0.0.14/ci/environment.yml000066400000000000000000000003401455257650300170600ustar00rootroot00000000000000name: datatree-test channels: - conda-forge - nodefaults dependencies: - python>=3.9 - netcdf4 - pytest - flake8 - black - codecov - pytest-cov - h5netcdf - zarr - pip: - xarray>=2022.05.0.dev0 datatree-0.0.14/codecov.yml000066400000000000000000000004531455257650300155500ustar00rootroot00000000000000codecov: require_ci_to_pass: false max_report_age: off comment: false ignore: - 'datatree/tests/*' - 'setup.py' - 'conftest.py' coverage: precision: 2 round: down status: project: default: target: 95 informational: true patch: off changes: false datatree-0.0.14/conftest.py000066400000000000000000000001021455257650300155710ustar00rootroot00000000000000import pytest pytest.register_assert_rewrite("datatree.testing") datatree-0.0.14/datatree/000077500000000000000000000000001455257650300151725ustar00rootroot00000000000000datatree-0.0.14/datatree/__init__.py000066400000000000000000000013601455257650300173030ustar00rootroot00000000000000# import public API from .datatree import DataTree from .extensions import register_datatree_accessor from .io import open_datatree from .mapping import TreeIsomorphismError, map_over_subtree from .treenode import InvalidTreeError, NotFoundInTreeError try: # NOTE: the `_version.py` file must not be present in the git repository # as it is generated by setuptools at install time from ._version import __version__ except ImportError: # pragma: no cover # Local copy or not installed with setuptools __version__ = "999" __all__ = ( "DataTree", "open_datatree", "TreeIsomorphismError", "InvalidTreeError", "NotFoundInTreeError", "map_over_subtree", "register_datatree_accessor", "__version__", ) datatree-0.0.14/datatree/common.py000066400000000000000000000105141455257650300170350ustar00rootroot00000000000000""" This file and class only exists because it was easier to copy the code for AttrAccessMixin from xarray.core.common with some slight modifications than it was to change the behaviour of an inherited xarray internal here. The modifications are marked with # TODO comments. """ import warnings from contextlib import suppress from typing import Any, Hashable, Iterable, List, Mapping class TreeAttrAccessMixin: """Mixin class that allows getting keys with attribute access""" __slots__ = () def __init_subclass__(cls, **kwargs): """Verify that all subclasses explicitly define ``__slots__``. If they don't, raise error in the core xarray module and a FutureWarning in third-party extensions. """ if not hasattr(object.__new__(cls), "__dict__"): pass # TODO reinstate this once integrated upstream # elif cls.__module__.startswith("datatree."): # raise AttributeError(f"{cls.__name__} must explicitly define __slots__") # else: # cls.__setattr__ = cls._setattr_dict # warnings.warn( # f"xarray subclass {cls.__name__} should explicitly define __slots__", # FutureWarning, # stacklevel=2, # ) super().__init_subclass__(**kwargs) @property def _attr_sources(self) -> Iterable[Mapping[Hashable, Any]]: """Places to look-up items for attribute-style access""" yield from () @property def _item_sources(self) -> Iterable[Mapping[Hashable, Any]]: """Places to look-up items for key-autocompletion""" yield from () def __getattr__(self, name: str) -> Any: if name not in {"__dict__", "__setstate__"}: # this avoids an infinite loop when pickle looks for the # __setstate__ attribute before the xarray object is initialized for source in self._attr_sources: with suppress(KeyError): return source[name] raise AttributeError( f"{type(self).__name__!r} object has no attribute {name!r}" ) # This complicated two-method design boosts overall performance of simple operations # - particularly DataArray methods that perform a _to_temp_dataset() round-trip - by # a whopping 8% compared to a single method that checks hasattr(self, "__dict__") at # runtime before every single assignment. All of this is just temporary until the # FutureWarning can be changed into a hard crash. def _setattr_dict(self, name: str, value: Any) -> None: """Deprecated third party subclass (see ``__init_subclass__`` above)""" object.__setattr__(self, name, value) if name in self.__dict__: # Custom, non-slotted attr, or improperly assigned variable? warnings.warn( f"Setting attribute {name!r} on a {type(self).__name__!r} object. Explicitly define __slots__ " "to suppress this warning for legitimate custom attributes and " "raise an error when attempting variables assignments.", FutureWarning, stacklevel=2, ) def __setattr__(self, name: str, value: Any) -> None: """Objects with ``__slots__`` raise AttributeError if you try setting an undeclared attribute. This is desirable, but the error message could use some improvement. """ try: object.__setattr__(self, name, value) except AttributeError as e: # Don't accidentally shadow custom AttributeErrors, e.g. # DataArray.dims.setter if str(e) != "{!r} object has no attribute {!r}".format( type(self).__name__, name ): raise raise AttributeError( f"cannot set attribute {name!r} on a {type(self).__name__!r} object. Use __setitem__ style" "assignment (e.g., `ds['name'] = ...`) instead of assigning variables." ) from e def __dir__(self) -> List[str]: """Provide method name lookup and completion. Only provide 'public' methods. """ extra_attrs = { item for source in self._attr_sources for item in source if isinstance(item, str) } return sorted(set(dir(type(self))) | extra_attrs) datatree-0.0.14/datatree/datatree.py000066400000000000000000001474611455257650300173520ustar00rootroot00000000000000from __future__ import annotations import copy import itertools from collections import OrderedDict from html import escape from typing import ( TYPE_CHECKING, Any, Callable, Dict, Generic, Hashable, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Set, Tuple, Union, overload, ) from xarray.core import utils from xarray.core.coordinates import DatasetCoordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset, DataVariables from xarray.core.indexes import Index, Indexes from xarray.core.merge import dataset_update_method from xarray.core.options import OPTIONS as XR_OPTS from xarray.core.utils import ( Default, Frozen, HybridMappingProxy, _default, either_dict_or_kwargs, maybe_wrap_array, ) from xarray.core.variable import Variable from . import formatting, formatting_html from .common import TreeAttrAccessMixin from .mapping import TreeIsomorphismError, check_isomorphic, map_over_subtree from .ops import ( DataTreeArithmeticMixin, MappedDatasetMethodsMixin, MappedDataWithCoords, ) from .render import RenderTree from .treenode import NamedNode, NodePath, Tree try: from xarray.core.variable import calculate_dimensions except ImportError: # for xarray versions 2022.03.0 and earlier from xarray.core.dataset import calculate_dimensions if TYPE_CHECKING: import pandas as pd from xarray.core.merge import CoercibleValue from xarray.core.types import ErrorOptions # """ # DEVELOPERS' NOTE # ---------------- # The idea of this module is to create a `DataTree` class which inherits the tree structure from TreeNode, and also copies # the entire API of `xarray.Dataset`, but with certain methods decorated to instead map the dataset function over every # node in the tree. As this API is copied without directly subclassing `xarray.Dataset` we instead create various Mixin # classes (in ops.py) which each define part of `xarray.Dataset`'s extensive API. # # Some of these methods must be wrapped to map over all nodes in the subtree. Others are fine to inherit unaltered # (normally because they (a) only call dataset properties and (b) don't return a dataset that should be nested into a new # tree) and some will get overridden by the class definition of DataTree. # """ T_Path = Union[str, NodePath] def _coerce_to_dataset(data: Dataset | DataArray | None) -> Dataset: if isinstance(data, DataArray): ds = data.to_dataset() elif isinstance(data, Dataset): ds = data elif data is None: ds = Dataset() else: raise TypeError( f"data object is not an xarray Dataset, DataArray, or None, it is of type {type(data)}" ) return ds def _check_for_name_collisions( children: Iterable[str], variables: Iterable[Hashable] ) -> None: colliding_names = set(children).intersection(set(variables)) if colliding_names: raise KeyError( f"Some names would collide between variables and children: {list(colliding_names)}" ) class DatasetView(Dataset): """ An immutable Dataset-like view onto the data in a single DataTree node. In-place operations modifying this object should raise an AttributeError. This requires overriding all inherited constructors. Operations returning a new result will return a new xarray.Dataset object. This includes all API on Dataset, which will be inherited. """ # TODO what happens if user alters (in-place) a DataArray they extracted from this object? __slots__ = ( "_attrs", "_cache", "_coord_names", "_dims", "_encoding", "_close", "_indexes", "_variables", ) def __init__( self, data_vars: Optional[Mapping[Any, Any]] = None, coords: Optional[Mapping[Any, Any]] = None, attrs: Optional[Mapping[Any, Any]] = None, ): raise AttributeError("DatasetView objects are not to be initialized directly") @classmethod def _from_node( cls, wrapping_node: DataTree, ) -> DatasetView: """Constructor, using dataset attributes from wrapping node""" obj: DatasetView = object.__new__(cls) obj._variables = wrapping_node._variables obj._coord_names = wrapping_node._coord_names obj._dims = wrapping_node._dims obj._indexes = wrapping_node._indexes obj._attrs = wrapping_node._attrs obj._close = wrapping_node._close obj._encoding = wrapping_node._encoding return obj def __setitem__(self, key, val) -> None: raise AttributeError( "Mutation of the DatasetView is not allowed, please use `.__setitem__` on the wrapping DataTree node, " "or use `dt.to_dataset()` if you want a mutable dataset. If calling this from within `map_over_subtree`," "use `.copy()` first to get a mutable version of the input dataset." ) def update(self, other) -> None: raise AttributeError( "Mutation of the DatasetView is not allowed, please use `.update` on the wrapping DataTree node, " "or use `dt.to_dataset()` if you want a mutable dataset. If calling this from within `map_over_subtree`," "use `.copy()` first to get a mutable version of the input dataset." ) # FIXME https://github.com/python/mypy/issues/7328 @overload def __getitem__(self, key: Mapping) -> Dataset: # type: ignore[misc] ... @overload def __getitem__(self, key: Hashable) -> DataArray: # type: ignore[misc] ... @overload def __getitem__(self, key: Any) -> Dataset: ... def __getitem__(self, key) -> DataArray: # TODO call the `_get_item` method of DataTree to allow path-like access to contents of other nodes # For now just call Dataset.__getitem__ return Dataset.__getitem__(self, key) @classmethod def _construct_direct( cls, variables: dict[Any, Variable], coord_names: set[Hashable], dims: Optional[dict[Any, int]] = None, attrs: Optional[dict] = None, indexes: Optional[dict[Any, Index]] = None, encoding: Optional[dict] = None, close: Optional[Callable[[], None]] = None, ) -> Dataset: """ Overriding this method (along with ._replace) and modifying it to return a Dataset object should hopefully ensure that the return type of any method on this object is a Dataset. """ if dims is None: dims = calculate_dimensions(variables) if indexes is None: indexes = {} obj = object.__new__(Dataset) obj._variables = variables obj._coord_names = coord_names obj._dims = dims obj._indexes = indexes obj._attrs = attrs obj._close = close obj._encoding = encoding return obj def _replace( self, variables: Optional[dict[Hashable, Variable]] = None, coord_names: Optional[set[Hashable]] = None, dims: Optional[dict[Any, int]] = None, attrs: dict[Hashable, Any] | None | Default = _default, indexes: Optional[dict[Hashable, Index]] = None, encoding: dict | None | Default = _default, inplace: bool = False, ) -> Dataset: """ Overriding this method (along with ._construct_direct) and modifying it to return a Dataset object should hopefully ensure that the return type of any method on this object is a Dataset. """ if inplace: raise AttributeError("In-place mutation of the DatasetView is not allowed") return Dataset._replace( self, variables=variables, coord_names=coord_names, dims=dims, attrs=attrs, indexes=indexes, encoding=encoding, inplace=inplace, ) def map( self, func: Callable, keep_attrs: bool | None = None, args: Iterable[Any] = (), **kwargs: Any, ) -> Dataset: """Apply a function to each data variable in this dataset Parameters ---------- func : callable Function which can be called in the form `func(x, *args, **kwargs)` to transform each DataArray `x` in this dataset into another DataArray. keep_attrs : bool or None, optional If True, both the dataset's and variables' attributes (`attrs`) will be copied from the original objects to the new ones. If False, the new dataset and variables will be returned without copying the attributes. args : iterable, optional Positional arguments passed on to `func`. **kwargs : Any Keyword arguments passed on to `func`. Returns ------- applied : Dataset Resulting dataset from applying ``func`` to each data variable. Examples -------- >>> da = xr.DataArray(np.random.randn(2, 3)) >>> ds = xr.Dataset({"foo": da, "bar": ("x", [-1, 2])}) >>> ds Dimensions: (dim_0: 2, dim_1: 3, x: 2) Dimensions without coordinates: dim_0, dim_1, x Data variables: foo (dim_0, dim_1) float64 1.764 0.4002 0.9787 2.241 1.868 -0.9773 bar (x) int64 -1 2 >>> ds.map(np.fabs) Dimensions: (dim_0: 2, dim_1: 3, x: 2) Dimensions without coordinates: dim_0, dim_1, x Data variables: foo (dim_0, dim_1) float64 1.764 0.4002 0.9787 2.241 1.868 0.9773 bar (x) float64 1.0 2.0 """ # Copied from xarray.Dataset so as not to call type(self), which causes problems (see datatree GH188). # TODO Refactor xarray upstream to avoid needing to overwrite this. # TODO This copied version will drop all attrs - the keep_attrs stuff should be re-instated variables = { k: maybe_wrap_array(v, func(v, *args, **kwargs)) for k, v in self.data_vars.items() } # return type(self)(variables, attrs=attrs) return Dataset(variables) class DataTree( NamedNode, MappedDatasetMethodsMixin, MappedDataWithCoords, DataTreeArithmeticMixin, TreeAttrAccessMixin, Generic[Tree], Mapping, ): """ A tree-like hierarchical collection of xarray objects. Attempts to present an API like that of xarray.Dataset, but methods are wrapped to also update all the tree's child nodes. """ # TODO Some way of sorting children by depth # TODO do we need a watch out for if methods intended only for root nodes are called on non-root nodes? # TODO dataset methods which should not or cannot act over the whole tree, such as .to_array # TODO .loc method # TODO a lot of properties like .variables could be defined in a DataMapping class which both Dataset and DataTree inherit from # TODO all groupby classes # TODO a lot of properties like .variables could be defined in a DataMapping class which both Dataset and DataTree inherit from # TODO __slots__ # TODO all groupby classes _name: Optional[str] _parent: Optional[DataTree] _children: OrderedDict[str, DataTree] _attrs: Optional[Dict[Hashable, Any]] _cache: Dict[str, Any] _coord_names: Set[Hashable] _dims: Dict[Hashable, int] _encoding: Optional[Dict[Hashable, Any]] _close: Optional[Callable[[], None]] _indexes: Dict[Hashable, Index] _variables: Dict[Hashable, Variable] __slots__ = ( "_name", "_parent", "_children", "_attrs", "_cache", "_coord_names", "_dims", "_encoding", "_close", "_indexes", "_variables", ) def __init__( self, data: Optional[Dataset | DataArray] = None, parent: Optional[DataTree] = None, children: Optional[Mapping[str, DataTree]] = None, name: Optional[str] = None, ): """ Create a single node of a DataTree. The node may optionally contain data in the form of data and coordinate variables, stored in the same way as data is stored in an xarray.Dataset. Parameters ---------- data : Dataset, DataArray, or None, optional Data to store under the .ds attribute of this node. DataArrays will be promoted to Datasets. Default is None. parent : DataTree, optional Parent node to this node. Default is None. children : Mapping[str, DataTree], optional Any child nodes of this node. Default is None. name : str, optional Name for this node of the tree. Default is None. Returns ------- DataTree See Also -------- DataTree.from_dict """ # validate input if children is None: children = {} ds = _coerce_to_dataset(data) _check_for_name_collisions(children, ds.variables) super().__init__(name=name) # set data attributes self._replace( inplace=True, variables=ds._variables, coord_names=ds._coord_names, dims=ds._dims, indexes=ds._indexes, attrs=ds._attrs, encoding=ds._encoding, ) self._close = ds._close # set tree attributes (must happen after variables set to avoid initialization errors) self.children = children self.parent = parent @property def parent(self: DataTree) -> DataTree | None: """Parent of this node.""" return self._parent @parent.setter def parent(self: DataTree, new_parent: DataTree) -> None: if new_parent and self.name is None: raise ValueError("Cannot set an unnamed node as a child of another node") self._set_parent(new_parent, self.name) @property def ds(self) -> DatasetView: """ An immutable Dataset-like view onto the data in this node. For a mutable Dataset containing the same data as in this node, use `.to_dataset()` instead. See Also -------- DataTree.to_dataset """ return DatasetView._from_node(self) @ds.setter def ds(self, data: Optional[Union[Dataset, DataArray]] = None) -> None: ds = _coerce_to_dataset(data) _check_for_name_collisions(self.children, ds.variables) self._replace( inplace=True, variables=ds._variables, coord_names=ds._coord_names, dims=ds._dims, indexes=ds._indexes, attrs=ds._attrs, encoding=ds._encoding, ) self._close = ds._close def _pre_attach(self: DataTree, parent: DataTree) -> None: """ Method which superclass calls before setting parent, here used to prevent having two children with duplicate names (or a data variable with the same name as a child). """ super()._pre_attach(parent) if self.name in list(parent.ds.variables): raise KeyError( f"parent {parent.name} already contains a data variable named {self.name}" ) def to_dataset(self) -> Dataset: """ Return the data in this node as a new xarray.Dataset object. See Also -------- DataTree.ds """ return Dataset._construct_direct( self._variables, self._coord_names, self._dims, self._attrs, self._indexes, self._encoding, self._close, ) @property def has_data(self): """Whether or not there are any data variables in this node.""" return len(self._variables) > 0 @property def has_attrs(self) -> bool: """Whether or not there are any metadata attributes in this node.""" return len(self.attrs.keys()) > 0 @property def is_empty(self) -> bool: """False if node contains any data or attrs. Does not look at children.""" return not (self.has_data or self.has_attrs) @property def is_hollow(self) -> bool: """True if only leaf nodes contain data.""" return not any(node.has_data for node in self.subtree if not node.is_leaf) @property def variables(self) -> Mapping[Hashable, Variable]: """Low level interface to node contents as dict of Variable objects. This ordered dictionary is frozen to prevent mutation that could violate Dataset invariants. It contains all variable objects constituting this DataTree node, including both data variables and coordinates. """ return Frozen(self._variables) @property def attrs(self) -> Dict[Hashable, Any]: """Dictionary of global attributes on this node object.""" if self._attrs is None: self._attrs = {} return self._attrs @attrs.setter def attrs(self, value: Mapping[Any, Any]) -> None: self._attrs = dict(value) @property def encoding(self) -> Dict: """Dictionary of global encoding attributes on this node object.""" if self._encoding is None: self._encoding = {} return self._encoding @encoding.setter def encoding(self, value: Mapping) -> None: self._encoding = dict(value) @property def dims(self) -> Mapping[Hashable, int]: """Mapping from dimension names to lengths. Cannot be modified directly, but is updated when adding new variables. Note that type of this object differs from `DataArray.dims`. See `DataTree.sizes`, `Dataset.sizes`, and `DataArray.sizes` for consistently named properties. """ return Frozen(self._dims) @property def sizes(self) -> Mapping[Hashable, int]: """Mapping from dimension names to lengths. Cannot be modified directly, but is updated when adding new variables. This is an alias for `DataTree.dims` provided for the benefit of consistency with `DataArray.sizes`. See Also -------- DataArray.sizes """ return self.dims @property def _attr_sources(self) -> Iterable[Mapping[Hashable, Any]]: """Places to look-up items for attribute-style access""" yield from self._item_sources yield self.attrs @property def _item_sources(self) -> Iterable[Mapping[Any, Any]]: """Places to look-up items for key-completion""" yield self.data_vars yield HybridMappingProxy(keys=self._coord_names, mapping=self.coords) # virtual coordinates yield HybridMappingProxy(keys=self.dims, mapping=self) # immediate child nodes yield self.children def _ipython_key_completions_(self) -> List[str]: """Provide method for the key-autocompletions in IPython. See http://ipython.readthedocs.io/en/stable/config/integrating.html#tab-completion For the details. """ # TODO allow auto-completing relative string paths, e.g. `dt['path/to/../ node'` # Would require changes to ipython's autocompleter, see https://github.com/ipython/ipython/issues/12420 # Instead for now we only list direct paths to all node in subtree explicitly items_on_this_node = self._item_sources full_file_like_paths_to_all_nodes_in_subtree = { node.path[1:]: node for node in self.subtree } all_item_sources = itertools.chain( items_on_this_node, [full_file_like_paths_to_all_nodes_in_subtree] ) items = { item for source in all_item_sources for item in source if isinstance(item, str) } return list(items) def __contains__(self, key: object) -> bool: """The 'in' operator will return true or false depending on whether 'key' is either an array stored in the datatree or a child node, or neither. """ return key in self.variables or key in self.children def __bool__(self) -> bool: return bool(self.ds.data_vars) or bool(self.children) def __iter__(self) -> Iterator[Hashable]: return itertools.chain(self.ds.data_vars, self.children) def __array__(self, dtype=None): raise TypeError( "cannot directly convert a DataTree into a " "numpy array. Instead, create an xarray.DataArray " "first, either with indexing on the DataTree or by " "invoking the `to_array()` method." ) def __repr__(self) -> str: return formatting.datatree_repr(self) def __str__(self) -> str: return formatting.datatree_repr(self) def _repr_html_(self): """Make html representation of datatree object""" if XR_OPTS["display_style"] == "text": return f"
{escape(repr(self))}
" return formatting_html.datatree_repr(self) @classmethod def _construct_direct( cls, variables: dict[Any, Variable], coord_names: set[Hashable], dims: Optional[dict[Any, int]] = None, attrs: Optional[dict] = None, indexes: Optional[dict[Any, Index]] = None, encoding: Optional[dict] = None, name: str | None = None, parent: DataTree | None = None, children: Optional[OrderedDict[str, DataTree]] = None, close: Optional[Callable[[], None]] = None, ) -> DataTree: """Shortcut around __init__ for internal use when we want to skip costly validation.""" # data attributes if dims is None: dims = calculate_dimensions(variables) if indexes is None: indexes = {} if children is None: children = OrderedDict() obj: DataTree = object.__new__(cls) obj._variables = variables obj._coord_names = coord_names obj._dims = dims obj._indexes = indexes obj._attrs = attrs obj._close = close obj._encoding = encoding # tree attributes obj._name = name obj._children = children obj._parent = parent return obj def _replace( self: DataTree, variables: Optional[dict[Hashable, Variable]] = None, coord_names: Optional[set[Hashable]] = None, dims: Optional[dict[Any, int]] = None, attrs: dict[Hashable, Any] | None | Default = _default, indexes: Optional[dict[Hashable, Index]] = None, encoding: dict | None | Default = _default, name: str | None | Default = _default, parent: DataTree | None = _default, children: Optional[OrderedDict[str, DataTree]] = None, inplace: bool = False, ) -> DataTree: """ Fastpath constructor for internal use. Returns an object with optionally replaced attributes. Explicitly passed arguments are *not* copied when placed on the new datatree. It is up to the caller to ensure that they have the right type and are not used elsewhere. """ # TODO Adding new children inplace using this method will cause bugs. # You will end up with an inconsistency between the name of the child node and the key the child is stored under. # Use ._set() instead for now if inplace: if variables is not None: self._variables = variables if coord_names is not None: self._coord_names = coord_names if dims is not None: self._dims = dims if attrs is not _default: self._attrs = attrs if indexes is not None: self._indexes = indexes if encoding is not _default: self._encoding = encoding if name is not _default: self._name = name if parent is not _default: self._parent = parent if children is not None: self._children = children obj = self else: if variables is None: variables = self._variables.copy() if coord_names is None: coord_names = self._coord_names.copy() if dims is None: dims = self._dims.copy() if attrs is _default: attrs = copy.copy(self._attrs) if indexes is None: indexes = self._indexes.copy() if encoding is _default: encoding = copy.copy(self._encoding) if name is _default: name = self._name # no need to copy str objects or None if parent is _default: parent = copy.copy(self._parent) if children is _default: children = copy.copy(self._children) obj = self._construct_direct( variables, coord_names, dims, attrs, indexes, encoding, name, parent, children, ) return obj def copy( self: DataTree, deep: bool = False, ) -> DataTree: """ Returns a copy of this subtree. Copies this node and all child nodes. If `deep=True`, a deep copy is made of each of the component variables. Otherwise, a shallow copy of each of the component variable is made, so that the underlying memory region of the new datatree is the same as in the original datatree. Parameters ---------- deep : bool, default: False Whether each component variable is loaded into memory and copied onto the new object. Default is False. Returns ------- object : DataTree New object with dimensions, attributes, coordinates, name, encoding, and data of this node and all child nodes copied from original. See Also -------- xarray.Dataset.copy pandas.DataFrame.copy """ return self._copy_subtree(deep=deep) def _copy_subtree( self: DataTree, deep: bool = False, memo: dict[int, Any] | None = None, ) -> DataTree: """Copy entire subtree""" new_tree = self._copy_node(deep=deep) for node in self.descendants: path = node.relative_to(self) new_tree[path] = node._copy_node(deep=deep) return new_tree def _copy_node( self: DataTree, deep: bool = False, ) -> DataTree: """Copy just one node of a tree""" new_node: DataTree = DataTree() new_node.name = self.name new_node.ds = self.to_dataset().copy(deep=deep) return new_node def __copy__(self: DataTree) -> DataTree: return self._copy_subtree(deep=False) def __deepcopy__(self: DataTree, memo: dict[int, Any] | None = None) -> DataTree: return self._copy_subtree(deep=True, memo=memo) def get( self: DataTree, key: str, default: Optional[DataTree | DataArray] = None ) -> Optional[DataTree | DataArray]: """ Access child nodes, variables, or coordinates stored in this node. Returned object will be either a DataTree or DataArray object depending on whether the key given points to a child or variable. Parameters ---------- key : str Name of variable / child within this node. Must lie in this immediate node (not elsewhere in the tree). default : DataTree | DataArray, optional A value to return if the specified key does not exist. Default return value is None. """ if key in self.children: return self.children[key] elif key in self.ds: return self.ds[key] else: return default def __getitem__(self: DataTree, key: str) -> DataTree | DataArray: """ Access child nodes, variables, or coordinates stored anywhere in this tree. Returned object will be either a DataTree or DataArray object depending on whether the key given points to a child or variable. Parameters ---------- key : str Name of variable / child within this node, or unix-like path to variable / child within another node. Returns ------- Union[DataTree, DataArray] """ # Either: if utils.is_dict_like(key): # dict-like indexing raise NotImplementedError("Should this index over whole tree?") elif isinstance(key, str): # TODO should possibly deal with hashables in general? # path-like: a name of a node/variable, or path to a node/variable path = NodePath(key) return self._get_item(path) elif utils.is_list_like(key): # iterable of variable names raise NotImplementedError( "Selecting via tags is deprecated, and selecting multiple items should be " "implemented via .subset" ) else: raise ValueError(f"Invalid format for key: {key}") def _set(self, key: str, val: DataTree | CoercibleValue) -> None: """ Set the child node or variable with the specified key to value. Counterpart to the public .get method, and also only works on the immediate node, not other nodes in the tree. """ if isinstance(val, DataTree): # create and assign a shallow copy here so as not to alter original name of node in grafted tree new_node = val.copy(deep=False) new_node.name = key new_node.parent = self else: if not isinstance(val, (DataArray, Variable)): # accommodate other types that can be coerced into Variables val = DataArray(val) self.update({key: val}) def __setitem__( self, key: str, value: Any, ) -> None: """ Add either a child node or an array to the tree, at any position. Data can be added anywhere, and new nodes will be created to cross the path to the new location if necessary. If there is already a node at the given location, then if value is a Node class or Dataset it will overwrite the data already present at that node, and if value is a single array, it will be merged with it. """ # TODO xarray.Dataset accepts other possibilities, how do we exactly replicate all the behaviour? if utils.is_dict_like(key): raise NotImplementedError elif isinstance(key, str): # TODO should possibly deal with hashables in general? # path-like: a name of a node/variable, or path to a node/variable path = NodePath(key) return self._set_item(path, value, new_nodes_along_path=True) else: raise ValueError("Invalid format for key") def update(self, other: Dataset | Mapping[str, DataTree | DataArray]) -> None: """ Update this node's children and / or variables. Just like `dict.update` this is an in-place operation. """ # TODO separate by type new_children = {} new_variables = {} for k, v in other.items(): if isinstance(v, DataTree): # avoid named node being stored under inconsistent key new_child = v.copy() new_child.name = k new_children[k] = new_child elif isinstance(v, (DataArray, Variable)): # TODO this should also accommodate other types that can be coerced into Variables new_variables[k] = v else: raise TypeError(f"Type {type(v)} cannot be assigned to a DataTree") vars_merge_result = dataset_update_method(self.to_dataset(), new_variables) # TODO are there any subtleties with preserving order of children like this? merged_children = OrderedDict({**self.children, **new_children}) self._replace( inplace=True, children=merged_children, **vars_merge_result._asdict() ) def assign( self, items: Mapping[Any, Any] | None = None, **items_kwargs: Any ) -> DataTree: """ Assign new data variables or child nodes to a DataTree, returning a new object with all the original items in addition to the new ones. Parameters ---------- items : mapping of hashable to Any Mapping from variable or child node names to the new values. If the new values are callable, they are computed on the Dataset and assigned to new data variables. If the values are not callable, (e.g. a DataTree, DataArray, scalar, or array), they are simply assigned. **items_kwargs The keyword arguments form of ``variables``. One of variables or variables_kwargs must be provided. Returns ------- dt : DataTree A new DataTree with the new variables or children in addition to all the existing items. Notes ----- Since ``kwargs`` is a dictionary, the order of your arguments may not be preserved, and so the order of the new variables is not well-defined. Assigning multiple items within the same ``assign`` is possible, but you cannot reference other variables created within the same ``assign`` call. See Also -------- xarray.Dataset.assign pandas.DataFrame.assign """ items = either_dict_or_kwargs(items, items_kwargs, "assign") dt = self.copy() dt.update(items) return dt def drop_nodes( self: DataTree, names: str | Iterable[str], *, errors: ErrorOptions = "raise" ) -> DataTree: """ Drop child nodes from this node. Parameters ---------- names : str or iterable of str Name(s) of nodes to drop. errors : {"raise", "ignore"}, default: "raise" If 'raise', raises a KeyError if any of the node names passed are not present as children of this node. If 'ignore', any given names that are present are dropped and no error is raised. Returns ------- dropped : DataTree A copy of the node with the specified children dropped. """ # the Iterable check is required for mypy if isinstance(names, str) or not isinstance(names, Iterable): names = {names} else: names = set(names) if errors == "raise": extra = names - set(self.children) if extra: raise KeyError(f"Cannot drop all nodes - nodes {extra} not present") children_to_keep = OrderedDict( {name: child for name, child in self.children.items() if name not in names} ) return self._replace(children=children_to_keep) @classmethod def from_dict( cls, d: MutableMapping[str, Dataset | DataArray | DataTree | None], name: Optional[str] = None, ) -> DataTree: """ Create a datatree from a dictionary of data objects, organised by paths into the tree. Parameters ---------- d : dict-like A mapping from path names to xarray.Dataset, xarray.DataArray, or DataTree objects. Path names are to be given as unix-like path. If path names containing more than one part are given, new tree nodes will be constructed as necessary. To assign data to the root node of the tree use "/" as the path. name : Hashable, optional Name for the root node of the tree. Default is None. Returns ------- DataTree Notes ----- If your dictionary is nested you will need to flatten it before using this method. """ # First create the root node root_data = d.pop("/", None) obj = cls(name=name, data=root_data, parent=None, children=None) if d: # Populate tree with children determined from data_objects mapping for path, data in d.items(): # Create and set new node node_name = NodePath(path).name if isinstance(data, cls): new_node = data.copy() new_node.orphan() else: new_node = cls(name=node_name, data=data) obj._set_item( path, new_node, allow_overwrite=False, new_nodes_along_path=True, ) return obj def to_dict(self) -> Dict[str, Dataset]: """ Create a dictionary mapping of absolute node paths to the data contained in those nodes. Returns ------- Dict[str, Dataset] """ return {node.path: node.to_dataset() for node in self.subtree} @property def nbytes(self) -> int: return sum(node.to_dataset().nbytes for node in self.subtree) def __len__(self) -> int: return len(self.children) + len(self.data_vars) @property def indexes(self) -> Indexes[pd.Index]: """Mapping of pandas.Index objects used for label based indexing. Raises an error if this DataTree node has indexes that cannot be coerced to pandas.Index objects. See Also -------- DataTree.xindexes """ return self.xindexes.to_pandas_indexes() @property def xindexes(self) -> Indexes[Index]: """Mapping of xarray Index objects used for label based indexing.""" return Indexes(self._indexes, {k: self._variables[k] for k in self._indexes}) @property def coords(self) -> DatasetCoordinates: """Dictionary of xarray.DataArray objects corresponding to coordinate variables """ return DatasetCoordinates(self.to_dataset()) @property def data_vars(self) -> DataVariables: """Dictionary of DataArray objects corresponding to data variables""" return DataVariables(self.to_dataset()) def isomorphic( self, other: DataTree, from_root: bool = False, strict_names: bool = False, ) -> bool: """ Two DataTrees are considered isomorphic if every node has the same number of children. Nothing about the data in each node is checked. Isomorphism is a necessary condition for two trees to be used in a nodewise binary operation, such as ``tree1 + tree2``. By default this method does not check any part of the tree above the given node. Therefore this method can be used as default to check that two subtrees are isomorphic. Parameters ---------- other : DataTree The other tree object to compare to. from_root : bool, optional, default is False Whether or not to first traverse to the root of the two trees before checking for isomorphism. If neither tree has a parent then this has no effect. strict_names : bool, optional, default is False Whether or not to also check that every node in the tree has the same name as its counterpart in the other tree. See Also -------- DataTree.equals DataTree.identical """ try: check_isomorphic( self, other, require_names_equal=strict_names, check_from_root=from_root, ) return True except (TypeError, TreeIsomorphismError): return False def equals(self, other: DataTree, from_root: bool = True) -> bool: """ Two DataTrees are equal if they have isomorphic node structures, with matching node names, and if they have matching variables and coordinates, all of which are equal. By default this method will check the whole tree above the given node. Parameters ---------- other : DataTree The other tree object to compare to. from_root : bool, optional, default is True Whether or not to first traverse to the root of the two trees before checking for isomorphism. If neither tree has a parent then this has no effect. See Also -------- Dataset.equals DataTree.isomorphic DataTree.identical """ if not self.isomorphic(other, from_root=from_root, strict_names=True): return False return all( [ node.ds.equals(other_node.ds) for node, other_node in zip(self.subtree, other.subtree) ] ) def identical(self, other: DataTree, from_root=True) -> bool: """ Like equals, but will also check all dataset attributes and the attributes on all variables and coordinates. By default this method will check the whole tree above the given node. Parameters ---------- other : DataTree The other tree object to compare to. from_root : bool, optional, default is True Whether or not to first traverse to the root of the two trees before checking for isomorphism. If neither tree has a parent then this has no effect. See Also -------- Dataset.identical DataTree.isomorphic DataTree.equals """ if not self.isomorphic(other, from_root=from_root, strict_names=True): return False return all( node.ds.identical(other_node.ds) for node, other_node in zip(self.subtree, other.subtree) ) def filter(self: DataTree, filterfunc: Callable[[DataTree], bool]) -> DataTree: """ Filter nodes according to a specified condition. Returns a new tree containing only the nodes in the original tree for which `fitlerfunc(node)` is True. Will also contain empty nodes at intermediate positions if required to support leaves. Parameters ---------- filterfunc: function A function which accepts only one DataTree - the node on which filterfunc will be called. Returns ------- DataTree See Also -------- match pipe map_over_subtree """ filtered_nodes = { node.path: node.ds for node in self.subtree if filterfunc(node) } return DataTree.from_dict(filtered_nodes, name=self.root.name) def match(self, pattern: str) -> DataTree: """ Return nodes with paths matching pattern. Uses unix glob-like syntax for pattern-matching. Parameters ---------- pattern: str A pattern to match each node path against. Returns ------- DataTree See Also -------- filter pipe map_over_subtree Examples -------- >>> dt = DataTree.from_dict( ... { ... "/a/A": None, ... "/a/B": None, ... "/b/A": None, ... "/b/B": None, ... } ... ) >>> dt.match("*/B") DataTree('None', parent=None) ├── DataTree('a') │ └── DataTree('B') └── DataTree('b') └── DataTree('B') """ matching_nodes = { node.path: node.ds for node in self.subtree if NodePath(node.path).match(pattern) } return DataTree.from_dict(matching_nodes, name=self.root.name) def map_over_subtree( self, func: Callable, *args: Iterable[Any], **kwargs: Any, ) -> DataTree | Tuple[DataTree]: """ Apply a function to every dataset in this subtree, returning a new tree which stores the results. The function will be applied to any dataset stored in this node, as well as any dataset stored in any of the descendant nodes. The returned tree will have the same structure as the original subtree. func needs to return a Dataset in order to rebuild the subtree. Parameters ---------- func : callable Function to apply to datasets with signature: `func(node.ds, *args, **kwargs) -> Dataset`. Function will not be applied to any nodes without datasets. *args : tuple, optional Positional arguments passed on to `func`. **kwargs : Any Keyword arguments passed on to `func`. Returns ------- subtrees : DataTree, Tuple of DataTrees One or more subtrees containing results from applying ``func`` to the data at each node. """ # TODO this signature means that func has no way to know which node it is being called upon - change? # TODO fix this typing error return map_over_subtree(func)(self, *args, **kwargs) # type: ignore[operator] def map_over_subtree_inplace( self, func: Callable, *args: Iterable[Any], **kwargs: Any, ) -> None: """ Apply a function to every dataset in this subtree, updating data in place. Parameters ---------- func : callable Function to apply to datasets with signature: `func(node.ds, *args, **kwargs) -> Dataset`. Function will not be applied to any nodes without datasets, *args : tuple, optional Positional arguments passed on to `func`. **kwargs : Any Keyword arguments passed on to `func`. """ # TODO if func fails on some node then the previous nodes will still have been updated... for node in self.subtree: if node.has_data: node.ds = func(node.ds, *args, **kwargs) def pipe( self, func: Callable | tuple[Callable, str], *args: Any, **kwargs: Any ) -> Any: """Apply ``func(self, *args, **kwargs)`` This method replicates the pandas method of the same name. Parameters ---------- func : callable function to apply to this xarray object (Dataset/DataArray). ``args``, and ``kwargs`` are passed into ``func``. Alternatively a ``(callable, data_keyword)`` tuple where ``data_keyword`` is a string indicating the keyword of ``callable`` that expects the xarray object. *args positional arguments passed into ``func``. **kwargs a dictionary of keyword arguments passed into ``func``. Returns ------- object : Any the return type of ``func``. Notes ----- Use ``.pipe`` when chaining together functions that expect xarray or pandas objects, e.g., instead of writing .. code:: python f(g(h(dt), arg1=a), arg2=b, arg3=c) You can write .. code:: python (dt.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)) If you have a function that takes the data as (say) the second argument, pass a tuple indicating which keyword expects the data. For example, suppose ``f`` takes its data as ``arg2``: .. code:: python (dt.pipe(h).pipe(g, arg1=a).pipe((f, "arg2"), arg1=a, arg3=c)) """ if isinstance(func, tuple): func, target = func if target in kwargs: raise ValueError( f"{target} is both the pipe target and a keyword argument" ) kwargs[target] = self else: args = (self,) + args return func(*args, **kwargs) def render(self): """Print tree structure, including any data stored at each node.""" for pre, fill, node in RenderTree(self): print(f"{pre}DataTree('{self.name}')") for ds_line in repr(node.ds)[1:]: print(f"{fill}{ds_line}") def merge(self, datatree: DataTree) -> DataTree: """Merge all the leaves of a second DataTree into this one.""" raise NotImplementedError def merge_child_nodes(self, *paths, new_path: T_Path) -> DataTree: """Merge a set of child nodes into a single new node.""" raise NotImplementedError # TODO some kind of .collapse() or .flatten() method to merge a subtree def as_array(self) -> DataArray: return self.ds.as_dataarray() @property def groups(self): """Return all netCDF4 groups in the tree, given as a tuple of path-like strings.""" return tuple(node.path for node in self.subtree) def to_netcdf( self, filepath, mode: str = "w", encoding=None, unlimited_dims=None, **kwargs ): """ Write datatree contents to a netCDF file. Parameters ---------- filepath : str or Path Path to which to save this datatree. mode : {"w", "a"}, default: "w" Write ('w') or append ('a') mode. If mode='w', any existing file at this location will be overwritten. If mode='a', existing variables will be overwritten. Only appies to the root group. encoding : dict, optional Nested dictionary with variable names as keys and dictionaries of variable specific encodings as values, e.g., ``{"root/set1": {"my_variable": {"dtype": "int16", "scale_factor": 0.1, "zlib": True}, ...}, ...}``. See ``xarray.Dataset.to_netcdf`` for available options. unlimited_dims : dict, optional Mapping of unlimited dimensions per group that that should be serialized as unlimited dimensions. By default, no dimensions are treated as unlimited dimensions. Note that unlimited_dims may also be set via ``dataset.encoding["unlimited_dims"]``. kwargs : Addional keyword arguments to be passed to ``xarray.Dataset.to_netcdf`` """ from .io import _datatree_to_netcdf _datatree_to_netcdf( self, filepath, mode=mode, encoding=encoding, unlimited_dims=unlimited_dims, **kwargs, ) def to_zarr( self, store, mode: str = "w-", encoding=None, consolidated: bool = True, **kwargs, ): """ Write datatree contents to a Zarr store. Parameters ---------- store : MutableMapping, str or Path, optional Store or path to directory in file system mode : {{"w", "w-", "a", "r+", None}, default: "w-" Persistence mode: “w” means create (overwrite if exists); “w-” means create (fail if exists); “a” means override existing variables (create if does not exist); “r+” means modify existing array values only (raise an error if any metadata or shapes would change). The default mode is “a” if append_dim is set. Otherwise, it is “r+” if region is set and w- otherwise. encoding : dict, optional Nested dictionary with variable names as keys and dictionaries of variable specific encodings as values, e.g., ``{"root/set1": {"my_variable": {"dtype": "int16", "scale_factor": 0.1}, ...}, ...}``. See ``xarray.Dataset.to_zarr`` for available options. consolidated : bool If True, apply zarr's `consolidate_metadata` function to the store after writing metadata for all groups. kwargs : Additional keyword arguments to be passed to ``xarray.Dataset.to_zarr`` """ from .io import _datatree_to_zarr _datatree_to_zarr( self, store, mode=mode, encoding=encoding, consolidated=consolidated, **kwargs, ) def plot(self): raise NotImplementedError datatree-0.0.14/datatree/extensions.py000066400000000000000000000010151455257650300177400ustar00rootroot00000000000000from xarray.core.extensions import _register_accessor from .datatree import DataTree def register_datatree_accessor(name): """Register a custom accessor on DataTree objects. Parameters ---------- name : str Name under which the accessor should be registered. A warning is issued if this name conflicts with a preexisting attribute. See Also -------- xarray.register_dataarray_accessor xarray.register_dataset_accessor """ return _register_accessor(name, DataTree) datatree-0.0.14/datatree/formatting.py000066400000000000000000000056331455257650300177250ustar00rootroot00000000000000from typing import TYPE_CHECKING from xarray.core.formatting import _compat_to_str, diff_dataset_repr from .mapping import diff_treestructure from .render import RenderTree if TYPE_CHECKING: from .datatree import DataTree def diff_nodewise_summary(a, b, compat): """Iterates over all corresponding nodes, recording differences between data at each location.""" compat_str = _compat_to_str(compat) summary = [] for node_a, node_b in zip(a.subtree, b.subtree): a_ds, b_ds = node_a.ds, node_b.ds if not a_ds._all_compat(b_ds, compat): dataset_diff = diff_dataset_repr(a_ds, b_ds, compat_str) data_diff = "\n".join(dataset_diff.split("\n", 1)[1:]) nodediff = ( f"\nData in nodes at position '{node_a.path}' do not match:" f"{data_diff}" ) summary.append(nodediff) return "\n".join(summary) def diff_tree_repr(a, b, compat): summary = [ f"Left and right {type(a).__name__} objects are not {_compat_to_str(compat)}" ] # TODO check root parents? strict_names = True if compat in ["equals", "identical"] else False treestructure_diff = diff_treestructure(a, b, strict_names) # If the trees structures are different there is no point comparing each node # TODO we could show any differences in nodes up to the first place that structure differs? if treestructure_diff or compat == "isomorphic": summary.append("\n" + treestructure_diff) else: nodewise_diff = diff_nodewise_summary(a, b, compat) summary.append("\n" + nodewise_diff) return "\n".join(summary) def datatree_repr(dt): """A printable representation of the structure of this entire tree.""" renderer = RenderTree(dt) lines = [] for pre, fill, node in renderer: node_repr = _single_node_repr(node) node_line = f"{pre}{node_repr.splitlines()[0]}" lines.append(node_line) if node.has_data or node.has_attrs: ds_repr = node_repr.splitlines()[2:] for line in ds_repr: if len(node.children) > 0: lines.append(f"{fill}{renderer.style.vertical}{line}") else: lines.append(f"{fill}{' ' * len(renderer.style.vertical)}{line}") # Tack on info about whether or not root node has a parent at the start first_line = lines[0] parent = f'"{dt.parent.name}"' if dt.parent is not None else "None" first_line_with_parent = first_line[:-1] + f", parent={parent})" lines[0] = first_line_with_parent return "\n".join(lines) def _single_node_repr(node: "DataTree") -> str: """Information about this node, not including its relationships to other nodes.""" node_info = f"DataTree('{node.name}')" if node.has_data or node.has_attrs: ds_info = "\n" + repr(node.ds) else: ds_info = "" return node_info + ds_info datatree-0.0.14/datatree/formatting_html.py000066400000000000000000000064241455257650300207500ustar00rootroot00000000000000from functools import partial from html import escape from typing import Any, Mapping from xarray.core.formatting_html import ( _mapping_section, _obj_repr, attr_section, coord_section, datavar_section, dim_section, ) from xarray.core.options import OPTIONS OPTIONS["display_expand_groups"] = "default" def summarize_children(children: Mapping[str, Any]) -> str: N_CHILDREN = len(children) - 1 # Get result from node_repr and wrap it lines_callback = lambda n, c, end: _wrap_repr(node_repr(n, c), end=end) children_html = "".join( lines_callback(n, c, end=False) # Long lines if i < N_CHILDREN else lines_callback(n, c, end=True) # Short lines for i, (n, c) in enumerate(children.items()) ) return "".join( [ "
", children_html, "
", ] ) children_section = partial( _mapping_section, name="Groups", details_func=summarize_children, max_items_collapse=1, expand_option_name="display_expand_groups", ) def node_repr(group_title: str, dt: Any) -> str: header_components = [f"
{escape(group_title)}
"] ds = dt.ds sections = [ children_section(dt.children), dim_section(ds), coord_section(ds.coords), datavar_section(ds.data_vars), attr_section(ds.attrs), ] return _obj_repr(ds, header_components, sections) def _wrap_repr(r: str, end: bool = False) -> str: """ Wrap HTML representation with a tee to the left of it. Enclosing HTML tag is a
with :code:`display: inline-grid` style. Turns: [ title ] | details | |_____________| into (A): |─ [ title ] | | details | | |_____________| or (B): └─ [ title ] | details | |_____________| Parameters ---------- r: str HTML representation to wrap. end: bool Specify if the line on the left should continue or end. Default is True. Returns ------- str Wrapped HTML representation. Tee color is set to the variable :code:`--xr-border-color`. """ # height of line end = bool(end) height = "100%" if end is False else "1.2em" return "".join( [ "
", "
", "
", "
", "
", "
", "
    ", r, "
" "
", "
", ] ) def datatree_repr(dt: Any) -> str: obj_type = f"datatree.{type(dt).__name__}" return node_repr(obj_type, dt) datatree-0.0.14/datatree/io.py000066400000000000000000000156621455257650300161650ustar00rootroot00000000000000from xarray import Dataset, open_dataset from .datatree import DataTree, NodePath def _iter_zarr_groups(root, parent="/"): parent = NodePath(parent) for path, group in root.groups(): gpath = parent / path yield str(gpath) yield from _iter_zarr_groups(group, parent=gpath) def _iter_nc_groups(root, parent="/"): parent = NodePath(parent) for path, group in root.groups.items(): gpath = parent / path yield str(gpath) yield from _iter_nc_groups(group, parent=gpath) def _get_nc_dataset_class(engine): if engine == "netcdf4": from netCDF4 import Dataset # type: ignore elif engine == "h5netcdf": from h5netcdf.legacyapi import Dataset # type: ignore elif engine is None: try: from netCDF4 import Dataset except ImportError: from h5netcdf.legacyapi import Dataset # type: ignore else: raise ValueError(f"unsupported engine: {engine}") return Dataset def open_datatree(filename_or_obj, engine=None, **kwargs) -> DataTree: """ Open and decode a dataset from a file or file-like object, creating one Tree node for each group in the file. Parameters ---------- filename_or_obj : str, Path, file-like, or DataStore Strings and Path objects are interpreted as a path to a netCDF file or Zarr store. engine : str, optional Xarray backend engine to us. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`. kwargs : Additional keyword arguments passed to ``xarray.open_dataset`` for each group. Returns ------- DataTree """ if engine == "zarr": return _open_datatree_zarr(filename_or_obj, **kwargs) elif engine in [None, "netcdf4", "h5netcdf"]: return _open_datatree_netcdf(filename_or_obj, engine=engine, **kwargs) else: raise ValueError("Unsupported engine") def _open_datatree_netcdf(filename: str, **kwargs) -> DataTree: ncDataset = _get_nc_dataset_class(kwargs.get("engine", None)) ds = open_dataset(filename, **kwargs) tree_root = DataTree.from_dict({"/": ds}) with ncDataset(filename, mode="r") as ncds: for path in _iter_nc_groups(ncds): subgroup_ds = open_dataset(filename, group=path, **kwargs) # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again node_name = NodePath(path).name new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) tree_root._set_item( path, new_node, allow_overwrite=False, new_nodes_along_path=True, ) return tree_root def _open_datatree_zarr(store, **kwargs) -> DataTree: import zarr # type: ignore zds = zarr.open_group(store, mode="r") ds = open_dataset(store, engine="zarr", **kwargs) tree_root = DataTree.from_dict({"/": ds}) for path in _iter_zarr_groups(zds): try: subgroup_ds = open_dataset(store, engine="zarr", group=path, **kwargs) except zarr.errors.PathNotFoundError: subgroup_ds = Dataset() # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again node_name = NodePath(path).name new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) tree_root._set_item( path, new_node, allow_overwrite=False, new_nodes_along_path=True, ) return tree_root def _create_empty_netcdf_group(filename, group, mode, engine): ncDataset = _get_nc_dataset_class(engine) with ncDataset(filename, mode=mode) as rootgrp: rootgrp.createGroup(group) def _datatree_to_netcdf( dt: DataTree, filepath, mode: str = "w", encoding=None, unlimited_dims=None, **kwargs, ): if kwargs.get("format", None) not in [None, "NETCDF4"]: raise ValueError("to_netcdf only supports the NETCDF4 format") engine = kwargs.get("engine", None) if engine not in [None, "netcdf4", "h5netcdf"]: raise ValueError("to_netcdf only supports the netcdf4 and h5netcdf engines") if kwargs.get("group", None) is not None: raise NotImplementedError( "specifying a root group for the tree has not been implemented" ) if not kwargs.get("compute", True): raise NotImplementedError("compute=False has not been implemented yet") if encoding is None: encoding = {} # In the future, we may want to expand this check to insure all the provided encoding # options are valid. For now, this simply checks that all provided encoding keys are # groups in the datatree. if set(encoding) - set(dt.groups): raise ValueError( f"unexpected encoding group name(s) provided: {set(encoding) - set(dt.groups)}" ) if unlimited_dims is None: unlimited_dims = {} for node in dt.subtree: ds = node.ds group_path = node.path if ds is None: _create_empty_netcdf_group(filepath, group_path, mode, engine) else: ds.to_netcdf( filepath, group=group_path, mode=mode, encoding=encoding.get(node.path), unlimited_dims=unlimited_dims.get(node.path), **kwargs, ) mode = "r+" def _create_empty_zarr_group(store, group, mode): import zarr # type: ignore root = zarr.open_group(store, mode=mode) root.create_group(group, overwrite=True) def _datatree_to_zarr( dt: DataTree, store, mode: str = "w-", encoding=None, consolidated: bool = True, **kwargs, ): from zarr.convenience import consolidate_metadata # type: ignore if kwargs.get("group", None) is not None: raise NotImplementedError( "specifying a root group for the tree has not been implemented" ) if not kwargs.get("compute", True): raise NotImplementedError("compute=False has not been implemented yet") if encoding is None: encoding = {} # In the future, we may want to expand this check to insure all the provided encoding # options are valid. For now, this simply checks that all provided encoding keys are # groups in the datatree. if set(encoding) - set(dt.groups): raise ValueError( f"unexpected encoding group name(s) provided: {set(encoding) - set(dt.groups)}" ) for node in dt.subtree: ds = node.ds group_path = node.path if ds is None: _create_empty_zarr_group(store, group_path, mode) else: ds.to_zarr( store, group=group_path, mode=mode, encoding=encoding.get(node.path), consolidated=False, **kwargs, ) if "w" in mode: mode = "a" if consolidated: consolidate_metadata(store) datatree-0.0.14/datatree/iterators.py000066400000000000000000000070531455257650300175650ustar00rootroot00000000000000from abc import abstractmethod from collections import abc from typing import Callable, Iterator, List, Optional from .treenode import Tree """These iterators are copied from anytree.iterators, with minor modifications.""" class AbstractIter(abc.Iterator): def __init__( self, node: Tree, filter_: Optional[Callable] = None, stop: Optional[Callable] = None, maxlevel: Optional[int] = None, ): """ Iterate over tree starting at `node`. Base class for all iterators. Keyword Args: filter_: function called with every `node` as argument, `node` is returned if `True`. stop: stop iteration at `node` if `stop` function returns `True` for `node`. maxlevel (int): maximum descending in the node hierarchy. """ self.node = node self.filter_ = filter_ self.stop = stop self.maxlevel = maxlevel self.__iter = None def __init(self): node = self.node maxlevel = self.maxlevel filter_ = self.filter_ or AbstractIter.__default_filter stop = self.stop or AbstractIter.__default_stop children = ( [] if AbstractIter._abort_at_level(1, maxlevel) else AbstractIter._get_children([node], stop) ) return self._iter(children, filter_, stop, maxlevel) @staticmethod def __default_filter(node): return True @staticmethod def __default_stop(node): return False def __iter__(self) -> Iterator[Tree]: return self def __next__(self) -> Iterator[Tree]: if self.__iter is None: self.__iter = self.__init() item = next(self.__iter) # type: ignore[call-overload] return item @staticmethod @abstractmethod def _iter(children: List[Tree], filter_, stop, maxlevel) -> Iterator[Tree]: ... @staticmethod def _abort_at_level(level, maxlevel): return maxlevel is not None and level > maxlevel @staticmethod def _get_children(children: List[Tree], stop) -> List[Tree]: return [child for child in children if not stop(child)] class PreOrderIter(AbstractIter): """ Iterate over tree applying pre-order strategy starting at `node`. Start at root and go-down until reaching a leaf node. Step upwards then, and search for the next leafs. """ @staticmethod def _iter(children, filter_, stop, maxlevel): for child_ in children: if stop(child_): continue if filter_(child_): yield child_ if not AbstractIter._abort_at_level(2, maxlevel): descendantmaxlevel = maxlevel - 1 if maxlevel else None for descendant_ in PreOrderIter._iter( list(child_.children.values()), filter_, stop, descendantmaxlevel ): yield descendant_ class LevelOrderIter(AbstractIter): """ Iterate over tree applying level-order strategy starting at `node`. """ @staticmethod def _iter(children, filter_, stop, maxlevel): level = 1 while children: next_children = [] for child in children: if filter_(child): yield child next_children += AbstractIter._get_children( list(child.children.values()), stop ) children = next_children level += 1 if AbstractIter._abort_at_level(level, maxlevel): break datatree-0.0.14/datatree/mapping.py000066400000000000000000000324341455257650300172050ustar00rootroot00000000000000from __future__ import annotations import functools import sys from itertools import repeat from textwrap import dedent from typing import TYPE_CHECKING, Callable, Tuple from xarray import DataArray, Dataset from .iterators import LevelOrderIter from .treenode import NodePath, TreeNode if TYPE_CHECKING: from .datatree import DataTree class TreeIsomorphismError(ValueError): """Error raised if two tree objects do not share the same node structure.""" pass def check_isomorphic( a: DataTree, b: DataTree, require_names_equal: bool = False, check_from_root: bool = True, ): """ Check that two trees have the same structure, raising an error if not. Does not compare the actual data in the nodes. By default this function only checks that subtrees are isomorphic, not the entire tree above (if it exists). Can instead optionally check the entire trees starting from the root, which will ensure all Can optionally check if corresponding nodes should have the same name. Parameters ---------- a : DataTree b : DataTree require_names_equal : Bool Whether or not to also check that each node has the same name as its counterpart. check_from_root : Bool Whether or not to first traverse to the root of the trees before checking for isomorphism. If a & b have no parents then this has no effect. Raises ------ TypeError If either a or b are not tree objects. TreeIsomorphismError If a and b are tree objects, but are not isomorphic to one another. Also optionally raised if their structure is isomorphic, but the names of any two respective nodes are not equal. """ if not isinstance(a, TreeNode): raise TypeError(f"Argument `a` is not a tree, it is of type {type(a)}") if not isinstance(b, TreeNode): raise TypeError(f"Argument `b` is not a tree, it is of type {type(b)}") if check_from_root: a = a.root b = b.root diff = diff_treestructure(a, b, require_names_equal=require_names_equal) if diff: raise TreeIsomorphismError("DataTree objects are not isomorphic:\n" + diff) def diff_treestructure(a: DataTree, b: DataTree, require_names_equal: bool) -> str: """ Return a summary of why two trees are not isomorphic. If they are isomorphic return an empty string. """ # Walking nodes in "level-order" fashion means walking down from the root breadth-first. # Checking for isomorphism by walking in this way implicitly assumes that the tree is an ordered tree # (which it is so long as children are stored in a tuple or list rather than in a set). for node_a, node_b in zip(LevelOrderIter(a), LevelOrderIter(b)): path_a, path_b = node_a.path, node_b.path if require_names_equal: if node_a.name != node_b.name: diff = dedent( f"""\ Node '{path_a}' in the left object has name '{node_a.name}' Node '{path_b}' in the right object has name '{node_b.name}'""" ) return diff if len(node_a.children) != len(node_b.children): diff = dedent( f"""\ Number of children on node '{path_a}' of the left object: {len(node_a.children)} Number of children on node '{path_b}' of the right object: {len(node_b.children)}""" ) return diff return "" def map_over_subtree(func: Callable) -> Callable: """ Decorator which turns a function which acts on (and returns) Datasets into one which acts on and returns DataTrees. Applies a function to every dataset in one or more subtrees, returning new trees which store the results. The function will be applied to any data-containing dataset stored in any of the nodes in the trees. The returned trees will have the same structure as the supplied trees. `func` needs to return one Datasets, DataArrays, or None in order to be able to rebuild the subtrees after mapping, as each result will be assigned to its respective node of a new tree via `DataTree.__setitem__`. Any returned value that is one of these types will be stacked into a separate tree before returning all of them. The trees passed to the resulting function must all be isomorphic to one another. Their nodes need not be named similarly, but all the output trees will have nodes named in the same way as the first tree passed. Parameters ---------- func : callable Function to apply to datasets with signature: `func(*args, **kwargs) -> Union[Dataset, Iterable[Dataset]]`. (i.e. func must accept at least one Dataset and return at least one Dataset.) Function will not be applied to any nodes without datasets. *args : tuple, optional Positional arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets via .ds . **kwargs : Any Keyword arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets via .ds . Returns ------- mapped : callable Wrapped function which returns one or more tree(s) created from results of applying ``func`` to the dataset at each node. See also -------- DataTree.map_over_subtree DataTree.map_over_subtree_inplace DataTree.subtree """ # TODO examples in the docstring # TODO inspect function to work out immediately if the wrong number of arguments were passed for it? @functools.wraps(func) def _map_over_subtree(*args, **kwargs) -> DataTree | Tuple[DataTree, ...]: """Internal function which maps func over every node in tree, returning a tree of the results.""" from .datatree import DataTree all_tree_inputs = [a for a in args if isinstance(a, DataTree)] + [ a for a in kwargs.values() if isinstance(a, DataTree) ] if len(all_tree_inputs) > 0: first_tree, *other_trees = all_tree_inputs else: raise TypeError("Must pass at least one tree object") for other_tree in other_trees: # isomorphism is transitive so this is enough to guarantee all trees are mutually isomorphic check_isomorphic( first_tree, other_tree, require_names_equal=False, check_from_root=False ) # Walk all trees simultaneously, applying func to all nodes that lie in same position in different trees # We don't know which arguments are DataTrees so we zip all arguments together as iterables # Store tuples of results in a dict because we don't yet know how many trees we need to rebuild to return out_data_objects = {} args_as_tree_length_iterables = [ a.subtree if isinstance(a, DataTree) else repeat(a) for a in args ] n_args = len(args_as_tree_length_iterables) kwargs_as_tree_length_iterables = { k: v.subtree if isinstance(v, DataTree) else repeat(v) for k, v in kwargs.items() } for node_of_first_tree, *all_node_args in zip( first_tree.subtree, *args_as_tree_length_iterables, *list(kwargs_as_tree_length_iterables.values()), ): node_args_as_datasetviews = [ a.ds if isinstance(a, DataTree) else a for a in all_node_args[:n_args] ] node_kwargs_as_datasetviews = dict( zip( [k for k in kwargs_as_tree_length_iterables.keys()], [ v.ds if isinstance(v, DataTree) else v for v in all_node_args[n_args:] ], ) ) func_with_error_context = _handle_errors_with_path_context( node_of_first_tree.path )(func) if node_of_first_tree.has_data: # call func on the data in this particular set of corresponding nodes results = func_with_error_context( *node_args_as_datasetviews, **node_kwargs_as_datasetviews ) elif node_of_first_tree.has_attrs: # propagate attrs results = node_of_first_tree.ds else: # nothing to propagate so use fastpath to create empty node in new tree results = None # TODO implement mapping over multiple trees in-place using if conditions from here on? out_data_objects[node_of_first_tree.path] = results # Find out how many return values we received num_return_values = _check_all_return_values(out_data_objects) # Reconstruct 1+ subtrees from the dict of results, by filling in all nodes of all result trees original_root_path = first_tree.path result_trees = [] for i in range(num_return_values): out_tree_contents = {} for n in first_tree.subtree: p = n.path if p in out_data_objects.keys(): if isinstance(out_data_objects[p], tuple): output_node_data = out_data_objects[p][i] else: output_node_data = out_data_objects[p] else: output_node_data = None # Discard parentage so that new trees don't include parents of input nodes relative_path = str(NodePath(p).relative_to(original_root_path)) relative_path = "/" if relative_path == "." else relative_path out_tree_contents[relative_path] = output_node_data new_tree = DataTree.from_dict( out_tree_contents, name=first_tree.name, ) result_trees.append(new_tree) # If only one result then don't wrap it in a tuple if len(result_trees) == 1: return result_trees[0] else: return tuple(result_trees) return _map_over_subtree def _handle_errors_with_path_context(path): """Wraps given function so that if it fails it also raises path to node on which it failed.""" def decorator(func): def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except Exception as e: if sys.version_info >= (3, 11): # Add the context information to the error message e.add_note( f"Raised whilst mapping function over node with path {path}" ) raise return wrapper return decorator def add_note(err: BaseException, msg: str) -> None: # TODO: remove once python 3.10 can be dropped if sys.version_info < (3, 11): err.__notes__ = getattr(err, "__notes__", []) + [msg] # type: ignore[attr-defined] else: err.add_note(msg) def _check_single_set_return_values(path_to_node, obj): """Check types returned from single evaluation of func, and return number of return values received from func.""" if isinstance(obj, (Dataset, DataArray)): return 1 elif isinstance(obj, tuple): for r in obj: if not isinstance(r, (Dataset, DataArray)): raise TypeError( f"One of the results of calling func on datasets on the nodes at position {path_to_node} is " f"of type {type(r)}, not Dataset or DataArray." ) return len(obj) else: raise TypeError( f"The result of calling func on the node at position {path_to_node} is of type {type(obj)}, not " f"Dataset or DataArray, nor a tuple of such types." ) def _check_all_return_values(returned_objects): """Walk through all values returned by mapping func over subtrees, raising on any invalid or inconsistent types.""" if all(r is None for r in returned_objects.values()): raise TypeError( "Called supplied function on all nodes but found a return value of None for" "all of them." ) result_data_objects = [ (path_to_node, r) for path_to_node, r in returned_objects.items() if r is not None ] if len(result_data_objects) == 1: # Only one node in the tree: no need to check consistency of results between nodes path_to_node, result = result_data_objects[0] num_return_values = _check_single_set_return_values(path_to_node, result) else: prev_path, _ = result_data_objects[0] prev_num_return_values, num_return_values = None, None for path_to_node, obj in result_data_objects[1:]: num_return_values = _check_single_set_return_values(path_to_node, obj) if ( num_return_values != prev_num_return_values and prev_num_return_values is not None ): raise TypeError( f"Calling func on the nodes at position {path_to_node} returns {num_return_values} separate return " f"values, whereas calling func on the nodes at position {prev_path} instead returns " f"{prev_num_return_values} separate return values." ) prev_path, prev_num_return_values = path_to_node, num_return_values return num_return_values datatree-0.0.14/datatree/ops.py000066400000000000000000000152571455257650300163570ustar00rootroot00000000000000import textwrap from xarray import Dataset from .mapping import map_over_subtree """ Module which specifies the subset of xarray.Dataset's API which we wish to copy onto DataTree. Structured to mirror the way xarray defines Dataset's various operations internally, but does not actually import from xarray's internals directly, only the public-facing xarray.Dataset class. """ _MAPPED_DOCSTRING_ADDENDUM = textwrap.fill( "This method was copied from xarray.Dataset, but has been altered to " "call the method on the Datasets stored in every node of the subtree. " "See the `map_over_subtree` function for more details.", width=117, ) # TODO equals, broadcast_equals etc. # TODO do dask-related private methods need to be exposed? _DATASET_DASK_METHODS_TO_MAP = [ "load", "compute", "persist", "unify_chunks", "chunk", "map_blocks", ] _DATASET_METHODS_TO_MAP = [ "as_numpy", "set_coords", "reset_coords", "info", "isel", "sel", "head", "tail", "thin", "broadcast_like", "reindex_like", "reindex", "interp", "interp_like", "rename", "rename_dims", "rename_vars", "swap_dims", "expand_dims", "set_index", "reset_index", "reorder_levels", "stack", "unstack", "merge", "drop_vars", "drop_sel", "drop_isel", "drop_dims", "transpose", "dropna", "fillna", "interpolate_na", "ffill", "bfill", "combine_first", "reduce", "map", "diff", "shift", "roll", "sortby", "quantile", "rank", "differentiate", "integrate", "cumulative_integrate", "filter_by_attrs", "polyfit", "pad", "idxmin", "idxmax", "argmin", "argmax", "query", "curvefit", ] _ALL_DATASET_METHODS_TO_MAP = _DATASET_DASK_METHODS_TO_MAP + _DATASET_METHODS_TO_MAP _DATA_WITH_COORDS_METHODS_TO_MAP = [ "squeeze", "clip", "assign_coords", "where", "close", "isnull", "notnull", "isin", "astype", ] REDUCE_METHODS = ["all", "any"] NAN_REDUCE_METHODS = [ "max", "min", "mean", "prod", "sum", "std", "var", "median", ] NAN_CUM_METHODS = ["cumsum", "cumprod"] _TYPED_DATASET_OPS_TO_MAP = [ "__add__", "__sub__", "__mul__", "__pow__", "__truediv__", "__floordiv__", "__mod__", "__and__", "__xor__", "__or__", "__lt__", "__le__", "__gt__", "__ge__", "__eq__", "__ne__", "__radd__", "__rsub__", "__rmul__", "__rpow__", "__rtruediv__", "__rfloordiv__", "__rmod__", "__rand__", "__rxor__", "__ror__", "__iadd__", "__isub__", "__imul__", "__ipow__", "__itruediv__", "__ifloordiv__", "__imod__", "__iand__", "__ixor__", "__ior__", "__neg__", "__pos__", "__abs__", "__invert__", "round", "argsort", "conj", "conjugate", ] # TODO NUM_BINARY_OPS apparently aren't defined on DatasetArithmetic, and don't appear to be injected anywhere... _ARITHMETIC_METHODS_TO_MAP = ( REDUCE_METHODS + NAN_REDUCE_METHODS + NAN_CUM_METHODS + _TYPED_DATASET_OPS_TO_MAP + ["__array_ufunc__"] ) def _wrap_then_attach_to_cls( target_cls_dict, source_cls, methods_to_set, wrap_func=None ): """ Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree) Result is like having written this in the classes' definition: ``` @wrap_func def method_name(self, *args, **kwargs): return self.method(*args, **kwargs) ``` Every method attached here needs to have a return value of Dataset or DataArray in order to construct a new tree. Parameters ---------- target_cls_dict : MappingProxy The __dict__ attribute of the class which we want the methods to be added to. (The __dict__ attribute can also be accessed by calling vars() from within that classes' definition.) This will be updated by this function. source_cls : class Class object from which we want to copy methods (and optionally wrap them). Should be the actual class object (or instance), not just the __dict__. methods_to_set : Iterable[Tuple[str, callable]] The method names and definitions supplied as a list of (method_name_string, method) pairs. This format matches the output of inspect.getmembers(). wrap_func : callable, optional Function to decorate each method with. Must have the same return type as the method. """ for method_name in methods_to_set: orig_method = getattr(source_cls, method_name) wrapped_method = ( wrap_func(orig_method) if wrap_func is not None else orig_method ) target_cls_dict[method_name] = wrapped_method if wrap_func is map_over_subtree: # Add a paragraph to the method's docstring explaining how it's been mapped orig_method_docstring = orig_method.__doc__ # if orig_method_docstring is not None: # if "\n" in orig_method_docstring: # new_method_docstring = orig_method_docstring.replace( # "\n", _MAPPED_DOCSTRING_ADDENDUM, 1 # ) # else: # new_method_docstring = ( # orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}" # ) setattr(target_cls_dict[method_name], "__doc__", orig_method_docstring) class MappedDatasetMethodsMixin: """ Mixin to add methods defined specifically on the Dataset class such as .query(), but wrapped to map over all nodes in the subtree. """ _wrap_then_attach_to_cls( target_cls_dict=vars(), source_cls=Dataset, methods_to_set=_ALL_DATASET_METHODS_TO_MAP, wrap_func=map_over_subtree, ) class MappedDataWithCoords: """ Mixin to add coordinate-aware Dataset methods such as .where(), but wrapped to map over all nodes in the subtree. """ # TODO add mapped versions of groupby, weighted, rolling, rolling_exp, coarsen, resample _wrap_then_attach_to_cls( target_cls_dict=vars(), source_cls=Dataset, methods_to_set=_DATA_WITH_COORDS_METHODS_TO_MAP, wrap_func=map_over_subtree, ) class DataTreeArithmeticMixin: """ Mixin to add Dataset arithmetic operations such as __add__, reduction methods such as .mean(), and enable numpy ufuncs such as np.sin(), but wrapped to map over all nodes in the subtree. """ _wrap_then_attach_to_cls( target_cls_dict=vars(), source_cls=Dataset, methods_to_set=_ARITHMETIC_METHODS_TO_MAP, wrap_func=map_over_subtree, ) datatree-0.0.14/datatree/py.typed000066400000000000000000000000001455257650300166570ustar00rootroot00000000000000datatree-0.0.14/datatree/render.py000066400000000000000000000206761455257650300170360ustar00rootroot00000000000000""" String Tree Rendering. Copied from anytree. """ import collections from typing import TYPE_CHECKING if TYPE_CHECKING: from .datatree import DataTree Row = collections.namedtuple("Row", ("pre", "fill", "node")) class AbstractStyle(object): def __init__(self, vertical, cont, end): """ Tree Render Style. Args: vertical: Sign for vertical line. cont: Chars for a continued branch. end: Chars for the last branch. """ super(AbstractStyle, self).__init__() self.vertical = vertical self.cont = cont self.end = end assert ( len(cont) == len(vertical) == len(end) ), f"'{vertical}', '{cont}' and '{end}' need to have equal length" @property def empty(self): """Empty string as placeholder.""" return " " * len(self.end) def __repr__(self): return f"{self.__class__.__name__}()" class ContStyle(AbstractStyle): def __init__(self): """ Continued style, without gaps. >>> from anytree import Node, RenderTree >>> root = Node("root") >>> s0 = Node("sub0", parent=root) >>> s0b = Node("sub0B", parent=s0) >>> s0a = Node("sub0A", parent=s0) >>> s1 = Node("sub1", parent=root) >>> print(RenderTree(root, style=ContStyle())) Node('/root') ├── Node('/root/sub0') │ ├── Node('/root/sub0/sub0B') │ └── Node('/root/sub0/sub0A') └── Node('/root/sub1') """ super(ContStyle, self).__init__( "\u2502 ", "\u251c\u2500\u2500 ", "\u2514\u2500\u2500 " ) class RenderTree(object): def __init__( self, node: "DataTree", style=ContStyle(), childiter=list, maxlevel=None ): """ Render tree starting at `node`. Keyword Args: style (AbstractStyle): Render Style. childiter: Child iterator. maxlevel: Limit rendering to this depth. :any:`RenderTree` is an iterator, returning a tuple with 3 items: `pre` tree prefix. `fill` filling for multiline entries. `node` :any:`NodeMixin` object. It is up to the user to assemble these parts to a whole. >>> from anytree import Node, RenderTree >>> root = Node("root", lines=["c0fe", "c0de"]) >>> s0 = Node("sub0", parent=root, lines=["ha", "ba"]) >>> s0b = Node("sub0B", parent=s0, lines=["1", "2", "3"]) >>> s0a = Node("sub0A", parent=s0, lines=["a", "b"]) >>> s1 = Node("sub1", parent=root, lines=["Z"]) Simple one line: >>> for pre, _, node in RenderTree(root): ... print("%s%s" % (pre, node.name)) ... root ├── sub0 │ ├── sub0B │ └── sub0A └── sub1 Multiline: >>> for pre, fill, node in RenderTree(root): ... print("%s%s" % (pre, node.lines[0])) ... for line in node.lines[1:]: ... print("%s%s" % (fill, line)) ... c0fe c0de ├── ha │ ba │ ├── 1 │ │ 2 │ │ 3 │ └── a │ b └── Z `maxlevel` limits the depth of the tree: >>> print(RenderTree(root, maxlevel=2)) Node('/root', lines=['c0fe', 'c0de']) ├── Node('/root/sub0', lines=['ha', 'ba']) └── Node('/root/sub1', lines=['Z']) The `childiter` is responsible for iterating over child nodes at the same level. An reversed order can be achived by using `reversed`. >>> for row in RenderTree(root, childiter=reversed): ... print("%s%s" % (row.pre, row.node.name)) ... root ├── sub1 └── sub0 ├── sub0A └── sub0B Or writing your own sort function: >>> def mysort(items): ... return sorted(items, key=lambda item: item.name) ... >>> for row in RenderTree(root, childiter=mysort): ... print("%s%s" % (row.pre, row.node.name)) ... root ├── sub0 │ ├── sub0A │ └── sub0B └── sub1 :any:`by_attr` simplifies attribute rendering and supports multiline: >>> print(RenderTree(root).by_attr()) root ├── sub0 │ ├── sub0B │ └── sub0A └── sub1 >>> print(RenderTree(root).by_attr("lines")) c0fe c0de ├── ha │ ba │ ├── 1 │ │ 2 │ │ 3 │ └── a │ b └── Z And can be a function: >>> print(RenderTree(root).by_attr(lambda n: " ".join(n.lines))) c0fe c0de ├── ha ba │ ├── 1 2 3 │ └── a b └── Z """ if not isinstance(style, AbstractStyle): style = style() self.node = node self.style = style self.childiter = childiter self.maxlevel = maxlevel def __iter__(self): return self.__next(self.node, tuple()) def __next(self, node, continues, level=0): yield RenderTree.__item(node, continues, self.style) children = node.children.values() level += 1 if children and (self.maxlevel is None or level < self.maxlevel): children = self.childiter(children) for child, is_last in _is_last(children): for grandchild in self.__next( child, continues + (not is_last,), level=level ): yield grandchild @staticmethod def __item(node, continues, style): if not continues: return Row("", "", node) else: items = [style.vertical if cont else style.empty for cont in continues] indent = "".join(items[:-1]) branch = style.cont if continues[-1] else style.end pre = indent + branch fill = "".join(items) return Row(pre, fill, node) def __str__(self): lines = ["%s%r" % (pre, node) for pre, _, node in self] return "\n".join(lines) def __repr__(self): classname = self.__class__.__name__ args = [ repr(self.node), "style=%s" % repr(self.style), "childiter=%s" % repr(self.childiter), ] return "%s(%s)" % (classname, ", ".join(args)) def by_attr(self, attrname="name"): """ Return rendered tree with node attribute `attrname`. >>> from anytree import AnyNode, RenderTree >>> root = AnyNode(id="root") >>> s0 = AnyNode(id="sub0", parent=root) >>> s0b = AnyNode(id="sub0B", parent=s0, foo=4, bar=109) >>> s0a = AnyNode(id="sub0A", parent=s0) >>> s1 = AnyNode(id="sub1", parent=root) >>> s1a = AnyNode(id="sub1A", parent=s1) >>> s1b = AnyNode(id="sub1B", parent=s1, bar=8) >>> s1c = AnyNode(id="sub1C", parent=s1) >>> s1ca = AnyNode(id="sub1Ca", parent=s1c) >>> print(RenderTree(root).by_attr("id")) root ├── sub0 │ ├── sub0B │ └── sub0A └── sub1 ├── sub1A ├── sub1B └── sub1C └── sub1Ca """ def get(): for pre, fill, node in self: attr = ( attrname(node) if callable(attrname) else getattr(node, attrname, "") ) if isinstance(attr, (list, tuple)): lines = attr else: lines = str(attr).split("\n") yield "%s%s" % (pre, lines[0]) for line in lines[1:]: yield "%s%s" % (fill, line) return "\n".join(get()) def _is_last(iterable): iter_ = iter(iterable) try: nextitem = next(iter_) except StopIteration: pass else: item = nextitem while True: try: nextitem = next(iter_) yield item, False except StopIteration: yield nextitem, True break item = nextitem datatree-0.0.14/datatree/testing.py000066400000000000000000000070471455257650300172310ustar00rootroot00000000000000from xarray.testing.assertions import ensure_warnings from .datatree import DataTree from .formatting import diff_tree_repr @ensure_warnings def assert_isomorphic(a: DataTree, b: DataTree, from_root: bool = False): """ Two DataTrees are considered isomorphic if every node has the same number of children. Nothing about the data in each node is checked. Isomorphism is a necessary condition for two trees to be used in a nodewise binary operation, such as tree1 + tree2. By default this function does not check any part of the tree above the given node. Therefore this function can be used as default to check that two subtrees are isomorphic. Parameters ---------- a : DataTree The first object to compare. b : DataTree The second object to compare. from_root : bool, optional, default is False Whether or not to first traverse to the root of the trees before checking for isomorphism. If a & b have no parents then this has no effect. See Also -------- DataTree.isomorphic assert_equals assert_identical """ __tracebackhide__ = True assert isinstance(a, type(b)) if isinstance(a, DataTree): if from_root: a = a.root b = b.root assert a.isomorphic(b, from_root=from_root), diff_tree_repr(a, b, "isomorphic") else: raise TypeError(f"{type(a)} not of type DataTree") @ensure_warnings def assert_equal(a: DataTree, b: DataTree, from_root: bool = True): """ Two DataTrees are equal if they have isomorphic node structures, with matching node names, and if they have matching variables and coordinates, all of which are equal. By default this method will check the whole tree above the given node. Parameters ---------- a : DataTree The first object to compare. b : DataTree The second object to compare. from_root : bool, optional, default is True Whether or not to first traverse to the root of the trees before checking for isomorphism. If a & b have no parents then this has no effect. See Also -------- DataTree.equals assert_isomorphic assert_identical """ __tracebackhide__ = True assert isinstance(a, type(b)) if isinstance(a, DataTree): if from_root: a = a.root b = b.root assert a.equals(b, from_root=from_root), diff_tree_repr(a, b, "equals") else: raise TypeError(f"{type(a)} not of type DataTree") @ensure_warnings def assert_identical(a: DataTree, b: DataTree, from_root: bool = True): """ Like assert_equals, but will also check all dataset attributes and the attributes on all variables and coordinates. By default this method will check the whole tree above the given node. Parameters ---------- a : xarray.DataTree The first object to compare. b : xarray.DataTree The second object to compare. from_root : bool, optional, default is True Whether or not to first traverse to the root of the trees before checking for isomorphism. If a & b have no parents then this has no effect. See Also -------- DataTree.identical assert_isomorphic assert_equal """ __tracebackhide__ = True assert isinstance(a, type(b)) if isinstance(a, DataTree): if from_root: a = a.root b = b.root assert a.identical(b, from_root=from_root), diff_tree_repr(a, b, "identical") else: raise TypeError(f"{type(a)} not of type DataTree") datatree-0.0.14/datatree/tests/000077500000000000000000000000001455257650300163345ustar00rootroot00000000000000datatree-0.0.14/datatree/tests/__init__.py000066400000000000000000000015541455257650300204520ustar00rootroot00000000000000import importlib import pytest from packaging import version def _importorskip(modname, minversion=None): try: mod = importlib.import_module(modname) has = True if minversion is not None: if LooseVersion(mod.__version__) < LooseVersion(minversion): raise ImportError("Minimum version not satisfied") except ImportError: has = False func = pytest.mark.skipif(not has, reason=f"requires {modname}") return has, func def LooseVersion(vstring): # Our development version is something like '0.10.9+aac7bfc' # This function just ignores the git commit id. vstring = vstring.split("+")[0] return version.parse(vstring) has_zarr, requires_zarr = _importorskip("zarr") has_h5netcdf, requires_h5netcdf = _importorskip("h5netcdf") has_netCDF4, requires_netCDF4 = _importorskip("netCDF4") datatree-0.0.14/datatree/tests/conftest.py000066400000000000000000000035421455257650300205370ustar00rootroot00000000000000import pytest import xarray as xr from datatree import DataTree @pytest.fixture(scope="module") def create_test_datatree(): """ Create a test datatree with this structure: |-- set1 | |-- | | Dimensions: () | | Data variables: | | a int64 0 | | b int64 1 | |-- set1 | |-- set2 |-- set2 | |-- | | Dimensions: (x: 2) | | Data variables: | | a (x) int64 2, 3 | | b (x) int64 0.1, 0.2 | |-- set1 |-- set3 |-- | Dimensions: (x: 2, y: 3) | Data variables: | a (y) int64 6, 7, 8 | set0 (x) int64 9, 10 The structure has deliberately repeated names of tags, variables, and dimensions in order to better check for bugs caused by name conflicts. """ def _create_test_datatree(modify=lambda ds: ds): set1_data = modify(xr.Dataset({"a": 0, "b": 1})) set2_data = modify(xr.Dataset({"a": ("x", [2, 3]), "b": ("x", [0.1, 0.2])})) root_data = modify(xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])})) # Avoid using __init__ so we can independently test it root = DataTree(data=root_data) set1 = DataTree(name="set1", parent=root, data=set1_data) DataTree(name="set1", parent=set1) DataTree(name="set2", parent=set1) set2 = DataTree(name="set2", parent=root, data=set2_data) DataTree(name="set1", parent=set2) DataTree(name="set3", parent=root) return root return _create_test_datatree @pytest.fixture(scope="module") def simple_datatree(create_test_datatree): """ Invoke create_test_datatree fixture (callback). Returns a DataTree. """ return create_test_datatree() datatree-0.0.14/datatree/tests/test_dataset_api.py000066400000000000000000000060511455257650300222250ustar00rootroot00000000000000import numpy as np import xarray as xr from datatree import DataTree from datatree.testing import assert_equal class TestDSMethodInheritance: def test_dataset_method(self): ds = xr.Dataset({"a": ("x", [1, 2, 3])}) dt = DataTree(data=ds) DataTree(name="results", parent=dt, data=ds) expected = DataTree(data=ds.isel(x=1)) DataTree(name="results", parent=expected, data=ds.isel(x=1)) result = dt.isel(x=1) assert_equal(result, expected) def test_reduce_method(self): ds = xr.Dataset({"a": ("x", [False, True, False])}) dt = DataTree(data=ds) DataTree(name="results", parent=dt, data=ds) expected = DataTree(data=ds.any()) DataTree(name="results", parent=expected, data=ds.any()) result = dt.any() assert_equal(result, expected) def test_nan_reduce_method(self): ds = xr.Dataset({"a": ("x", [1, 2, 3])}) dt = DataTree(data=ds) DataTree(name="results", parent=dt, data=ds) expected = DataTree(data=ds.mean()) DataTree(name="results", parent=expected, data=ds.mean()) result = dt.mean() assert_equal(result, expected) def test_cum_method(self): ds = xr.Dataset({"a": ("x", [1, 2, 3])}) dt = DataTree(data=ds) DataTree(name="results", parent=dt, data=ds) expected = DataTree(data=ds.cumsum()) DataTree(name="results", parent=expected, data=ds.cumsum()) result = dt.cumsum() assert_equal(result, expected) class TestOps: def test_binary_op_on_int(self): ds1 = xr.Dataset({"a": [5], "b": [3]}) ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]}) dt = DataTree(data=ds1) DataTree(name="subnode", data=ds2, parent=dt) expected = DataTree(data=ds1 * 5) DataTree(name="subnode", data=ds2 * 5, parent=expected) result = dt * 5 assert_equal(result, expected) def test_binary_op_on_dataset(self): ds1 = xr.Dataset({"a": [5], "b": [3]}) ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]}) dt = DataTree(data=ds1) DataTree(name="subnode", data=ds2, parent=dt) other_ds = xr.Dataset({"z": ("z", [0.1, 0.2])}) expected = DataTree(data=ds1 * other_ds) DataTree(name="subnode", data=ds2 * other_ds, parent=expected) result = dt * other_ds assert_equal(result, expected) def test_binary_op_on_datatree(self): ds1 = xr.Dataset({"a": [5], "b": [3]}) ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]}) dt = DataTree(data=ds1) DataTree(name="subnode", data=ds2, parent=dt) expected = DataTree(data=ds1 * ds1) DataTree(name="subnode", data=ds2 * ds2, parent=expected) result = dt * dt assert_equal(result, expected) class TestUFuncs: def test_tree(self, create_test_datatree): dt = create_test_datatree() expected = create_test_datatree(modify=lambda ds: np.sin(ds)) result_tree = np.sin(dt) assert_equal(result_tree, expected) datatree-0.0.14/datatree/tests/test_datatree.py000066400000000000000000000613401455257650300215420ustar00rootroot00000000000000from copy import copy, deepcopy import numpy as np import pytest import xarray as xr import xarray.testing as xrt from xarray.tests import create_test_data, source_ndarray import datatree.testing as dtt from datatree import DataTree, NotFoundInTreeError class TestTreeCreation: def test_empty(self): dt = DataTree(name="root") assert dt.name == "root" assert dt.parent is None assert dt.children == {} xrt.assert_identical(dt.to_dataset(), xr.Dataset()) def test_unnamed(self): dt = DataTree() assert dt.name is None def test_bad_names(self): with pytest.raises(TypeError): DataTree(name=5) with pytest.raises(ValueError): DataTree(name="folder/data") class TestFamilyTree: def test_setparent_unnamed_child_node_fails(self): john = DataTree(name="john") with pytest.raises(ValueError, match="unnamed"): DataTree(parent=john) def test_create_two_children(self): root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) set1_data = xr.Dataset({"a": 0, "b": 1}) root = DataTree(data=root_data) set1 = DataTree(name="set1", parent=root, data=set1_data) DataTree(name="set1", parent=root) DataTree(name="set2", parent=set1) def test_create_full_tree(self, simple_datatree): root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) set1_data = xr.Dataset({"a": 0, "b": 1}) set2_data = xr.Dataset({"a": ("x", [2, 3]), "b": ("x", [0.1, 0.2])}) root = DataTree(data=root_data) set1 = DataTree(name="set1", parent=root, data=set1_data) DataTree(name="set1", parent=set1) DataTree(name="set2", parent=set1) set2 = DataTree(name="set2", parent=root, data=set2_data) DataTree(name="set1", parent=set2) DataTree(name="set3", parent=root) expected = simple_datatree assert root.identical(expected) class TestNames: def test_child_gets_named_on_attach(self): sue = DataTree() mary = DataTree(children={"Sue": sue}) # noqa assert sue.name == "Sue" class TestPaths: def test_path_property(self): sue = DataTree() mary = DataTree(children={"Sue": sue}) john = DataTree(children={"Mary": mary}) # noqa assert sue.path == "/Mary/Sue" assert john.path == "/" def test_path_roundtrip(self): sue = DataTree() mary = DataTree(children={"Sue": sue}) john = DataTree(children={"Mary": mary}) # noqa assert john[sue.path] is sue def test_same_tree(self): mary = DataTree() kate = DataTree() john = DataTree(children={"Mary": mary, "Kate": kate}) # noqa assert mary.same_tree(kate) def test_relative_paths(self): sue = DataTree() mary = DataTree(children={"Sue": sue}) annie = DataTree() john = DataTree(children={"Mary": mary, "Annie": annie}) result = sue.relative_to(john) assert result == "Mary/Sue" assert john.relative_to(sue) == "../.." assert annie.relative_to(sue) == "../../Annie" assert sue.relative_to(annie) == "../Mary/Sue" assert sue.relative_to(sue) == "." evil_kate = DataTree() with pytest.raises( NotFoundInTreeError, match="nodes do not lie within the same tree" ): sue.relative_to(evil_kate) class TestStoreDatasets: def test_create_with_data(self): dat = xr.Dataset({"a": 0}) john = DataTree(name="john", data=dat) xrt.assert_identical(john.to_dataset(), dat) with pytest.raises(TypeError): DataTree(name="mary", parent=john, data="junk") # noqa def test_set_data(self): john = DataTree(name="john") dat = xr.Dataset({"a": 0}) john.ds = dat xrt.assert_identical(john.to_dataset(), dat) with pytest.raises(TypeError): john.ds = "junk" def test_has_data(self): john = DataTree(name="john", data=xr.Dataset({"a": 0})) assert john.has_data john = DataTree(name="john", data=None) assert not john.has_data def test_is_hollow(self): john = DataTree(data=xr.Dataset({"a": 0})) assert john.is_hollow eve = DataTree(children={"john": john}) assert eve.is_hollow eve.ds = xr.Dataset({"a": 1}) assert not eve.is_hollow class TestVariablesChildrenNameCollisions: def test_parent_already_has_variable_with_childs_name(self): dt = DataTree(data=xr.Dataset({"a": [0], "b": 1})) with pytest.raises(KeyError, match="already contains a data variable named a"): DataTree(name="a", data=None, parent=dt) def test_assign_when_already_child_with_variables_name(self): dt = DataTree(data=None) DataTree(name="a", data=None, parent=dt) with pytest.raises(KeyError, match="names would collide"): dt.ds = xr.Dataset({"a": 0}) dt.ds = xr.Dataset() new_ds = dt.to_dataset().assign(a=xr.DataArray(0)) with pytest.raises(KeyError, match="names would collide"): dt.ds = new_ds class TestGet: ... class TestGetItem: def test_getitem_node(self): folder1 = DataTree(name="folder1") results = DataTree(name="results", parent=folder1) highres = DataTree(name="highres", parent=results) assert folder1["results"] is results assert folder1["results/highres"] is highres def test_getitem_self(self): dt = DataTree() assert dt["."] is dt def test_getitem_single_data_variable(self): data = xr.Dataset({"temp": [0, 50]}) results = DataTree(name="results", data=data) xrt.assert_identical(results["temp"], data["temp"]) def test_getitem_single_data_variable_from_node(self): data = xr.Dataset({"temp": [0, 50]}) folder1 = DataTree(name="folder1") results = DataTree(name="results", parent=folder1) DataTree(name="highres", parent=results, data=data) xrt.assert_identical(folder1["results/highres/temp"], data["temp"]) def test_getitem_nonexistent_node(self): folder1 = DataTree(name="folder1") DataTree(name="results", parent=folder1) with pytest.raises(KeyError): folder1["results/highres"] def test_getitem_nonexistent_variable(self): data = xr.Dataset({"temp": [0, 50]}) results = DataTree(name="results", data=data) with pytest.raises(KeyError): results["pressure"] @pytest.mark.xfail(reason="Should be deprecated in favour of .subset") def test_getitem_multiple_data_variables(self): data = xr.Dataset({"temp": [0, 50], "p": [5, 8, 7]}) results = DataTree(name="results", data=data) xrt.assert_identical(results[["temp", "p"]], data[["temp", "p"]]) @pytest.mark.xfail(reason="Indexing needs to return whole tree (GH #77)") def test_getitem_dict_like_selection_access_to_dataset(self): data = xr.Dataset({"temp": [0, 50]}) results = DataTree(name="results", data=data) xrt.assert_identical(results[{"temp": 1}], data[{"temp": 1}]) class TestUpdate: def test_update(self): dt = DataTree() dt.update({"foo": xr.DataArray(0), "a": DataTree()}) expected = DataTree.from_dict({"/": xr.Dataset({"foo": 0}), "a": None}) print(dt) print(dt.children) print(dt._children) print(dt["a"]) print(expected) dtt.assert_equal(dt, expected) def test_update_new_named_dataarray(self): da = xr.DataArray(name="temp", data=[0, 50]) folder1 = DataTree(name="folder1") folder1.update({"results": da}) expected = da.rename("results") xrt.assert_equal(folder1["results"], expected) def test_update_doesnt_alter_child_name(self): dt = DataTree() dt.update({"foo": xr.DataArray(0), "a": DataTree(name="b")}) assert "a" in dt.children child = dt["a"] assert child.name == "a" def test_update_overwrite(self): actual = DataTree.from_dict({"a": DataTree(xr.Dataset({"x": 1}))}) actual.update({"a": DataTree(xr.Dataset({"x": 2}))}) expected = DataTree.from_dict({"a": DataTree(xr.Dataset({"x": 2}))}) print(actual) print(expected) dtt.assert_equal(actual, expected) class TestCopy: def test_copy(self, create_test_datatree): dt = create_test_datatree() for node in dt.root.subtree: node.attrs["Test"] = [1, 2, 3] for copied in [dt.copy(deep=False), copy(dt)]: dtt.assert_identical(dt, copied) for node, copied_node in zip(dt.root.subtree, copied.root.subtree): assert node.encoding == copied_node.encoding # Note: IndexVariable objects with string dtype are always # copied because of xarray.core.util.safe_cast_to_index. # Limiting the test to data variables. for k in node.data_vars: v0 = node.variables[k] v1 = copied_node.variables[k] assert source_ndarray(v0.data) is source_ndarray(v1.data) copied_node["foo"] = xr.DataArray(data=np.arange(5), dims="z") assert "foo" not in node copied_node.attrs["foo"] = "bar" assert "foo" not in node.attrs assert node.attrs["Test"] is copied_node.attrs["Test"] def test_copy_subtree(self): dt = DataTree.from_dict({"/level1/level2/level3": xr.Dataset()}) actual = dt["/level1/level2"].copy() expected = DataTree.from_dict({"/level3": xr.Dataset()}, name="level2") dtt.assert_identical(actual, expected) def test_deepcopy(self, create_test_datatree): dt = create_test_datatree() for node in dt.root.subtree: node.attrs["Test"] = [1, 2, 3] for copied in [dt.copy(deep=True), deepcopy(dt)]: dtt.assert_identical(dt, copied) for node, copied_node in zip(dt.root.subtree, copied.root.subtree): assert node.encoding == copied_node.encoding # Note: IndexVariable objects with string dtype are always # copied because of xarray.core.util.safe_cast_to_index. # Limiting the test to data variables. for k in node.data_vars: v0 = node.variables[k] v1 = copied_node.variables[k] assert source_ndarray(v0.data) is not source_ndarray(v1.data) copied_node["foo"] = xr.DataArray(data=np.arange(5), dims="z") assert "foo" not in node copied_node.attrs["foo"] = "bar" assert "foo" not in node.attrs assert node.attrs["Test"] is not copied_node.attrs["Test"] @pytest.mark.xfail(reason="data argument not yet implemented") def test_copy_with_data(self, create_test_datatree): orig = create_test_datatree() # TODO use .data_vars once that property is available data_vars = { k: v for k, v in orig.variables.items() if k not in orig._coord_names } new_data = {k: np.random.randn(*v.shape) for k, v in data_vars.items()} actual = orig.copy(data=new_data) expected = orig.copy() for k, v in new_data.items(): expected[k].data = v dtt.assert_identical(expected, actual) # TODO test parents and children? class TestSetItem: def test_setitem_new_child_node(self): john = DataTree(name="john") mary = DataTree(name="mary") john["mary"] = mary grafted_mary = john["mary"] assert grafted_mary.parent is john assert grafted_mary.name == "mary" def test_setitem_unnamed_child_node_becomes_named(self): john2 = DataTree(name="john2") john2["sonny"] = DataTree() assert john2["sonny"].name == "sonny" def test_setitem_new_grandchild_node(self): john = DataTree(name="john") mary = DataTree(name="mary", parent=john) rose = DataTree(name="rose") john["mary/rose"] = rose grafted_rose = john["mary/rose"] assert grafted_rose.parent is mary assert grafted_rose.name == "rose" def test_grafted_subtree_retains_name(self): subtree = DataTree(name="original_subtree_name") root = DataTree(name="root") root["new_subtree_name"] = subtree # noqa assert subtree.name == "original_subtree_name" def test_setitem_new_empty_node(self): john = DataTree(name="john") john["mary"] = DataTree() mary = john["mary"] assert isinstance(mary, DataTree) xrt.assert_identical(mary.to_dataset(), xr.Dataset()) def test_setitem_overwrite_data_in_node_with_none(self): john = DataTree(name="john") mary = DataTree(name="mary", parent=john, data=xr.Dataset()) john["mary"] = DataTree() xrt.assert_identical(mary.to_dataset(), xr.Dataset()) john.ds = xr.Dataset() with pytest.raises(ValueError, match="has no name"): john["."] = DataTree() @pytest.mark.xfail(reason="assigning Datasets doesn't yet create new nodes") def test_setitem_dataset_on_this_node(self): data = xr.Dataset({"temp": [0, 50]}) results = DataTree(name="results") results["."] = data xrt.assert_identical(results.to_dataset(), data) @pytest.mark.xfail(reason="assigning Datasets doesn't yet create new nodes") def test_setitem_dataset_as_new_node(self): data = xr.Dataset({"temp": [0, 50]}) folder1 = DataTree(name="folder1") folder1["results"] = data xrt.assert_identical(folder1["results"].to_dataset(), data) @pytest.mark.xfail(reason="assigning Datasets doesn't yet create new nodes") def test_setitem_dataset_as_new_node_requiring_intermediate_nodes(self): data = xr.Dataset({"temp": [0, 50]}) folder1 = DataTree(name="folder1") folder1["results/highres"] = data xrt.assert_identical(folder1["results/highres"].to_dataset(), data) def test_setitem_named_dataarray(self): da = xr.DataArray(name="temp", data=[0, 50]) folder1 = DataTree(name="folder1") folder1["results"] = da expected = da.rename("results") xrt.assert_equal(folder1["results"], expected) def test_setitem_unnamed_dataarray(self): data = xr.DataArray([0, 50]) folder1 = DataTree(name="folder1") folder1["results"] = data xrt.assert_equal(folder1["results"], data) def test_setitem_variable(self): var = xr.Variable(data=[0, 50], dims="x") folder1 = DataTree(name="folder1") folder1["results"] = var xrt.assert_equal(folder1["results"], xr.DataArray(var)) def test_setitem_coerce_to_dataarray(self): folder1 = DataTree(name="folder1") folder1["results"] = 0 xrt.assert_equal(folder1["results"], xr.DataArray(0)) def test_setitem_add_new_variable_to_empty_node(self): results = DataTree(name="results") results["pressure"] = xr.DataArray(data=[2, 3]) assert "pressure" in results.ds results["temp"] = xr.Variable(data=[10, 11], dims=["x"]) assert "temp" in results.ds # What if there is a path to traverse first? results = DataTree(name="results") results["highres/pressure"] = xr.DataArray(data=[2, 3]) assert "pressure" in results["highres"].ds results["highres/temp"] = xr.Variable(data=[10, 11], dims=["x"]) assert "temp" in results["highres"].ds def test_setitem_dataarray_replace_existing_node(self): t = xr.Dataset({"temp": [0, 50]}) results = DataTree(name="results", data=t) p = xr.DataArray(data=[2, 3]) results["pressure"] = p expected = t.assign(pressure=p) xrt.assert_identical(results.to_dataset(), expected) class TestDictionaryInterface: ... class TestTreeFromDict: def test_data_in_root(self): dat = xr.Dataset() dt = DataTree.from_dict({"/": dat}) assert dt.name is None assert dt.parent is None assert dt.children == {} xrt.assert_identical(dt.to_dataset(), dat) def test_one_layer(self): dat1, dat2 = xr.Dataset({"a": 1}), xr.Dataset({"b": 2}) dt = DataTree.from_dict({"run1": dat1, "run2": dat2}) xrt.assert_identical(dt.to_dataset(), xr.Dataset()) assert dt.name is None xrt.assert_identical(dt["run1"].to_dataset(), dat1) assert dt["run1"].children == {} xrt.assert_identical(dt["run2"].to_dataset(), dat2) assert dt["run2"].children == {} def test_two_layers(self): dat1, dat2 = xr.Dataset({"a": 1}), xr.Dataset({"a": [1, 2]}) dt = DataTree.from_dict({"highres/run": dat1, "lowres/run": dat2}) assert "highres" in dt.children assert "lowres" in dt.children highres_run = dt["highres/run"] xrt.assert_identical(highres_run.to_dataset(), dat1) def test_nones(self): dt = DataTree.from_dict({"d": None, "d/e": None}) assert [node.name for node in dt.subtree] == [None, "d", "e"] assert [node.path for node in dt.subtree] == ["/", "/d", "/d/e"] xrt.assert_identical(dt["d/e"].to_dataset(), xr.Dataset()) def test_full(self, simple_datatree): dt = simple_datatree paths = list(node.path for node in dt.subtree) assert paths == [ "/", "/set1", "/set1/set1", "/set1/set2", "/set2", "/set2/set1", "/set3", ] def test_datatree_values(self): dat1 = DataTree(data=xr.Dataset({"a": 1})) expected = DataTree() expected["a"] = dat1 actual = DataTree.from_dict({"a": dat1}) dtt.assert_identical(actual, expected) def test_roundtrip(self, simple_datatree): dt = simple_datatree roundtrip = DataTree.from_dict(dt.to_dict()) assert roundtrip.equals(dt) @pytest.mark.xfail def test_roundtrip_unnamed_root(self, simple_datatree): # See GH81 dt = simple_datatree dt.name = "root" roundtrip = DataTree.from_dict(dt.to_dict()) assert roundtrip.equals(dt) class TestDatasetView: def test_view_contents(self): ds = create_test_data() dt = DataTree(data=ds) assert ds.identical( dt.ds ) # this only works because Dataset.identical doesn't check types assert isinstance(dt.ds, xr.Dataset) def test_immutability(self): # See issue #38 dt = DataTree(name="root", data=None) DataTree(name="a", data=None, parent=dt) with pytest.raises( AttributeError, match="Mutation of the DatasetView is not allowed" ): dt.ds["a"] = xr.DataArray(0) with pytest.raises( AttributeError, match="Mutation of the DatasetView is not allowed" ): dt.ds.update({"a": 0}) # TODO are there any other ways you can normally modify state (in-place)? # (not attribute-like assignment because that doesn't work on Dataset anyway) def test_methods(self): ds = create_test_data() dt = DataTree(data=ds) assert ds.mean().identical(dt.ds.mean()) assert type(dt.ds.mean()) == xr.Dataset def test_arithmetic(self, create_test_datatree): dt = create_test_datatree() expected = create_test_datatree(modify=lambda ds: 10.0 * ds)["set1"] result = 10.0 * dt["set1"].ds assert result.identical(expected) def test_init_via_type(self): # from datatree GH issue #188 # xarray's .weighted is unusual because it uses type() to create a Dataset/DataArray a = xr.DataArray( np.random.rand(3, 4, 10), dims=["x", "y", "time"], coords={"area": (["x", "y"], np.random.rand(3, 4))}, ).to_dataset(name="data") dt = DataTree(data=a) def weighted_mean(ds): return ds.weighted(ds.area).mean(["x", "y"]) weighted_mean(dt.ds) class TestAccess: def test_attribute_access(self, create_test_datatree): dt = create_test_datatree() # vars / coords for key in ["a", "set0"]: xrt.assert_equal(dt[key], getattr(dt, key)) assert key in dir(dt) # dims xrt.assert_equal(dt["a"]["y"], getattr(dt.a, "y")) assert "y" in dir(dt["a"]) # children for key in ["set1", "set2", "set3"]: dtt.assert_equal(dt[key], getattr(dt, key)) assert key in dir(dt) # attrs dt.attrs["meta"] = "NASA" assert dt.attrs["meta"] == "NASA" assert "meta" in dir(dt) def test_ipython_key_completions(self, create_test_datatree): dt = create_test_datatree() key_completions = dt._ipython_key_completions_() node_keys = [node.path[1:] for node in dt.subtree] assert all(node_key in key_completions for node_key in node_keys) var_keys = list(dt.variables.keys()) assert all(var_key in key_completions for var_key in var_keys) def test_operation_with_attrs_but_no_data(self): # tests bug from xarray-datatree GH262 xs = xr.Dataset({"testvar": xr.DataArray(np.ones((2, 3)))}) dt = DataTree.from_dict({"node1": xs, "node2": xs}) dt.attrs["test_key"] = 1 # sel works fine without this line dt.sel(dim_0=0) class TestRestructuring: def test_drop_nodes(self): sue = DataTree.from_dict({"Mary": None, "Kate": None, "Ashley": None}) # test drop just one node dropped_one = sue.drop_nodes(names="Mary") assert "Mary" not in dropped_one.children # test drop multiple nodes dropped = sue.drop_nodes(names=["Mary", "Kate"]) assert not set(["Mary", "Kate"]).intersection(set(dropped.children)) assert "Ashley" in dropped.children # test raise with pytest.raises(KeyError, match="nodes {'Mary'} not present"): dropped.drop_nodes(names=["Mary", "Ashley"]) # test ignore childless = dropped.drop_nodes(names=["Mary", "Ashley"], errors="ignore") assert childless.children == {} def test_assign(self): dt = DataTree() expected = DataTree.from_dict({"/": xr.Dataset({"foo": 0}), "/a": None}) # kwargs form result = dt.assign(foo=xr.DataArray(0), a=DataTree()) dtt.assert_equal(result, expected) # dict form result = dt.assign({"foo": xr.DataArray(0), "a": DataTree()}) dtt.assert_equal(result, expected) class TestPipe: def test_noop(self, create_test_datatree): dt = create_test_datatree() actual = dt.pipe(lambda tree: tree) assert actual.identical(dt) def test_params(self, create_test_datatree): dt = create_test_datatree() def f(tree, **attrs): return tree.assign(arr_with_attrs=xr.Variable("dim0", [], attrs=attrs)) attrs = {"x": 1, "y": 2, "z": 3} actual = dt.pipe(f, **attrs) assert actual["arr_with_attrs"].attrs == attrs def test_named_self(self, create_test_datatree): dt = create_test_datatree() def f(x, tree, y): tree.attrs.update({"x": x, "y": y}) return tree attrs = {"x": 1, "y": 2} actual = dt.pipe((f, "tree"), **attrs) assert actual is dt and actual.attrs == attrs class TestSubset: def test_match(self): # TODO is this example going to cause problems with case sensitivity? dt = DataTree.from_dict( { "/a/A": None, "/a/B": None, "/b/A": None, "/b/B": None, } ) result = dt.match("*/B") expected = DataTree.from_dict( { "/a/B": None, "/b/B": None, } ) dtt.assert_identical(result, expected) def test_filter(self): simpsons = DataTree.from_dict( d={ "/": xr.Dataset({"age": 83}), "/Herbert": xr.Dataset({"age": 40}), "/Homer": xr.Dataset({"age": 39}), "/Homer/Bart": xr.Dataset({"age": 10}), "/Homer/Lisa": xr.Dataset({"age": 8}), "/Homer/Maggie": xr.Dataset({"age": 1}), }, name="Abe", ) expected = DataTree.from_dict( d={ "/": xr.Dataset({"age": 83}), "/Herbert": xr.Dataset({"age": 40}), "/Homer": xr.Dataset({"age": 39}), }, name="Abe", ) elders = simpsons.filter(lambda node: node["age"] > 18) dtt.assert_identical(elders, expected) datatree-0.0.14/datatree/tests/test_extensions.py000066400000000000000000000023001455257650300221370ustar00rootroot00000000000000import pytest from datatree import DataTree, register_datatree_accessor class TestAccessor: def test_register(self) -> None: @register_datatree_accessor("demo") class DemoAccessor: """Demo accessor.""" def __init__(self, xarray_obj): self._obj = xarray_obj @property def foo(self): return "bar" dt: DataTree = DataTree() assert dt.demo.foo == "bar" # type: ignore # accessor is cached assert dt.demo is dt.demo # type: ignore # check descriptor assert dt.demo.__doc__ == "Demo accessor." # type: ignore # TODO: typing doesn't seem to work with accessors assert DataTree.demo.__doc__ == "Demo accessor." # type: ignore assert isinstance(dt.demo, DemoAccessor) # type: ignore assert DataTree.demo is DemoAccessor # type: ignore with pytest.warns(Warning, match="overriding a preexisting attribute"): @register_datatree_accessor("demo") class Foo: pass # ensure we can remove it del DataTree.demo # type: ignore assert not hasattr(DataTree, "demo") datatree-0.0.14/datatree/tests/test_formatting.py000066400000000000000000000072361455257650300221270ustar00rootroot00000000000000from textwrap import dedent from xarray import Dataset from datatree import DataTree from datatree.formatting import diff_tree_repr class TestRepr: def test_print_empty_node(self): dt = DataTree(name="root") printout = dt.__str__() assert printout == "DataTree('root', parent=None)" def test_print_empty_node_with_attrs(self): dat = Dataset(attrs={"note": "has attrs"}) dt = DataTree(name="root", data=dat) printout = dt.__str__() assert printout == dedent( """\ DataTree('root', parent=None) Dimensions: () Data variables: *empty* Attributes: note: has attrs""" ) def test_print_node_with_data(self): dat = Dataset({"a": [0, 2]}) dt = DataTree(name="root", data=dat) printout = dt.__str__() expected = [ "DataTree('root', parent=None)", "Dimensions", "Coordinates", "a", "Data variables", "*empty*", ] for expected_line, printed_line in zip(expected, printout.splitlines()): assert expected_line in printed_line def test_nested_node(self): dat = Dataset({"a": [0, 2]}) root = DataTree(name="root") DataTree(name="results", data=dat, parent=root) printout = root.__str__() assert printout.splitlines()[2].startswith(" ") def test_print_datatree(self, simple_datatree): dt = simple_datatree print(dt) # TODO work out how to test something complex like this def test_repr_of_node_with_data(self): dat = Dataset({"a": [0, 2]}) dt = DataTree(name="root", data=dat) assert "Coordinates" in repr(dt) class TestDiffFormatting: def test_diff_structure(self): dt_1 = DataTree.from_dict({"a": None, "a/b": None, "a/c": None}) dt_2 = DataTree.from_dict({"d": None, "d/e": None}) expected = dedent( """\ Left and right DataTree objects are not isomorphic Number of children on node '/a' of the left object: 2 Number of children on node '/d' of the right object: 1""" ) actual = diff_tree_repr(dt_1, dt_2, "isomorphic") assert actual == expected def test_diff_node_names(self): dt_1 = DataTree.from_dict({"a": None}) dt_2 = DataTree.from_dict({"b": None}) expected = dedent( """\ Left and right DataTree objects are not identical Node '/a' in the left object has name 'a' Node '/b' in the right object has name 'b'""" ) actual = diff_tree_repr(dt_1, dt_2, "identical") assert actual == expected def test_diff_node_data(self): import numpy as np # casting to int64 explicitly ensures that int64s are created on all architectures ds1 = Dataset({"u": np.int64(0), "v": np.int64(1)}) ds3 = Dataset({"w": np.int64(5)}) dt_1 = DataTree.from_dict({"a": ds1, "a/b": ds3}) ds2 = Dataset({"u": np.int64(0)}) ds4 = Dataset({"w": np.int64(6)}) dt_2 = DataTree.from_dict({"a": ds2, "a/b": ds4}) expected = dedent( """\ Left and right DataTree objects are not equal Data in nodes at position '/a' do not match: Data variables only on the left object: v int64 1 Data in nodes at position '/a/b' do not match: Differing data variables: L w int64 5 R w int64 6""" ) actual = diff_tree_repr(dt_1, dt_2, "equals") assert actual == expected datatree-0.0.14/datatree/tests/test_formatting_html.py000066400000000000000000000127051455257650300231500ustar00rootroot00000000000000import pytest import xarray as xr from datatree import DataTree, formatting_html @pytest.fixture(scope="module", params=["some html", "some other html"]) def repr(request): return request.param class Test_summarize_children: """ Unit tests for summarize_children. """ func = staticmethod(formatting_html.summarize_children) @pytest.fixture(scope="class") def childfree_tree_factory(self): """ Fixture for a child-free DataTree factory. """ from random import randint def _childfree_tree_factory(): return DataTree( data=xr.Dataset({"z": ("y", [randint(1, 100) for _ in range(3)])}) ) return _childfree_tree_factory @pytest.fixture(scope="class") def childfree_tree(self, childfree_tree_factory): """ Fixture for a child-free DataTree. """ return childfree_tree_factory() @pytest.fixture(scope="function") def mock_node_repr(self, monkeypatch): """ Apply mocking for node_repr. """ def mock(group_title, dt): """ Mock with a simple result """ return group_title + " " + str(id(dt)) monkeypatch.setattr(formatting_html, "node_repr", mock) @pytest.fixture(scope="function") def mock_wrap_repr(self, monkeypatch): """ Apply mocking for _wrap_repr. """ def mock(r, *, end, **kwargs): """ Mock by appending "end" or "not end". """ return r + " " + ("end" if end else "not end") + "//" monkeypatch.setattr(formatting_html, "_wrap_repr", mock) def test_empty_mapping(self): """ Test with an empty mapping of children. """ children = {} assert self.func(children) == ( "
" "
" ) def test_one_child(self, childfree_tree, mock_wrap_repr, mock_node_repr): """ Test with one child. Uses a mock of _wrap_repr and node_repr to essentially mock the inline lambda function "lines_callback". """ # Create mapping of children children = {"a": childfree_tree} # Expect first line to be produced from the first child, and # wrapped as the last child first_line = f"a {id(children['a'])} end//" assert self.func(children) == ( "
" f"{first_line}" "
" ) def test_two_children(self, childfree_tree_factory, mock_wrap_repr, mock_node_repr): """ Test with two level deep children. Uses a mock of _wrap_repr and node_repr to essentially mock the inline lambda function "lines_callback". """ # Create mapping of children children = {"a": childfree_tree_factory(), "b": childfree_tree_factory()} # Expect first line to be produced from the first child, and # wrapped as _not_ the last child first_line = f"a {id(children['a'])} not end//" # Expect second line to be produced from the second child, and # wrapped as the last child second_line = f"b {id(children['b'])} end//" assert self.func(children) == ( "
" f"{first_line}" f"{second_line}" "
" ) class Test__wrap_repr: """ Unit tests for _wrap_repr. """ func = staticmethod(formatting_html._wrap_repr) def test_end(self, repr): """ Test with end=True. """ r = self.func(repr, end=True) assert r == ( "
" "
" "
" "
" "
" "
" "
    " f"{repr}" "
" "
" "
" ) def test_not_end(self, repr): """ Test with end=False. """ r = self.func(repr, end=False) assert r == ( "
" "
" "
" "
" "
" "
" "
    " f"{repr}" "
" "
" "
" ) datatree-0.0.14/datatree/tests/test_io.py000066400000000000000000000106431455257650300203600ustar00rootroot00000000000000import pytest import zarr.errors from datatree.io import open_datatree from datatree.testing import assert_equal from datatree.tests import requires_h5netcdf, requires_netCDF4, requires_zarr class TestIO: @requires_netCDF4 def test_to_netcdf(self, tmpdir, simple_datatree): filepath = str( tmpdir / "test.nc" ) # casting to str avoids a pathlib bug in xarray original_dt = simple_datatree original_dt.to_netcdf(filepath, engine="netcdf4") roundtrip_dt = open_datatree(filepath) assert_equal(original_dt, roundtrip_dt) @requires_netCDF4 def test_netcdf_encoding(self, tmpdir, simple_datatree): filepath = str( tmpdir / "test.nc" ) # casting to str avoids a pathlib bug in xarray original_dt = simple_datatree # add compression comp = dict(zlib=True, complevel=9) enc = {"/set2": {var: comp for var in original_dt["/set2"].ds.data_vars}} original_dt.to_netcdf(filepath, encoding=enc, engine="netcdf4") roundtrip_dt = open_datatree(filepath) assert roundtrip_dt["/set2/a"].encoding["zlib"] == comp["zlib"] assert roundtrip_dt["/set2/a"].encoding["complevel"] == comp["complevel"] enc["/not/a/group"] = {"foo": "bar"} with pytest.raises(ValueError, match="unexpected encoding group.*"): original_dt.to_netcdf(filepath, encoding=enc, engine="netcdf4") @requires_h5netcdf def test_to_h5netcdf(self, tmpdir, simple_datatree): filepath = str( tmpdir / "test.nc" ) # casting to str avoids a pathlib bug in xarray original_dt = simple_datatree original_dt.to_netcdf(filepath, engine="h5netcdf") roundtrip_dt = open_datatree(filepath) assert_equal(original_dt, roundtrip_dt) @requires_zarr def test_to_zarr(self, tmpdir, simple_datatree): filepath = str( tmpdir / "test.zarr" ) # casting to str avoids a pathlib bug in xarray original_dt = simple_datatree original_dt.to_zarr(filepath) roundtrip_dt = open_datatree(filepath, engine="zarr") assert_equal(original_dt, roundtrip_dt) @requires_zarr def test_zarr_encoding(self, tmpdir, simple_datatree): import zarr filepath = str( tmpdir / "test.zarr" ) # casting to str avoids a pathlib bug in xarray original_dt = simple_datatree comp = {"compressor": zarr.Blosc(cname="zstd", clevel=3, shuffle=2)} enc = {"/set2": {var: comp for var in original_dt["/set2"].ds.data_vars}} original_dt.to_zarr(filepath, encoding=enc) roundtrip_dt = open_datatree(filepath, engine="zarr") print(roundtrip_dt["/set2/a"].encoding) assert roundtrip_dt["/set2/a"].encoding["compressor"] == comp["compressor"] enc["/not/a/group"] = {"foo": "bar"} with pytest.raises(ValueError, match="unexpected encoding group.*"): original_dt.to_zarr(filepath, encoding=enc, engine="zarr") @requires_zarr def test_to_zarr_zip_store(self, tmpdir, simple_datatree): from zarr.storage import ZipStore filepath = str( tmpdir / "test.zarr.zip" ) # casting to str avoids a pathlib bug in xarray original_dt = simple_datatree store = ZipStore(filepath) original_dt.to_zarr(store) roundtrip_dt = open_datatree(store, engine="zarr") assert_equal(original_dt, roundtrip_dt) @requires_zarr def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree): filepath = tmpdir / "test.zarr" zmetadata = filepath / ".zmetadata" s1zmetadata = filepath / "set1" / ".zmetadata" filepath = str(filepath) # casting to str avoids a pathlib bug in xarray original_dt = simple_datatree original_dt.to_zarr(filepath, consolidated=False) assert not zmetadata.exists() assert not s1zmetadata.exists() with pytest.warns(RuntimeWarning, match="consolidated"): roundtrip_dt = open_datatree(filepath, engine="zarr") assert_equal(original_dt, roundtrip_dt) @requires_zarr def test_to_zarr_default_write_mode(self, tmpdir, simple_datatree): simple_datatree.to_zarr(tmpdir) # with default settings, to_zarr should not overwrite an existing dir with pytest.raises(zarr.errors.ContainsGroupError): simple_datatree.to_zarr(tmpdir) datatree-0.0.14/datatree/tests/test_mapping.py000066400000000000000000000272121455257650300214040ustar00rootroot00000000000000import numpy as np import pytest import xarray as xr from datatree.datatree import DataTree from datatree.mapping import TreeIsomorphismError, check_isomorphic, map_over_subtree from datatree.testing import assert_equal empty = xr.Dataset() class TestCheckTreesIsomorphic: def test_not_a_tree(self): with pytest.raises(TypeError, match="not a tree"): check_isomorphic("s", 1) def test_different_widths(self): dt1 = DataTree.from_dict(d={"a": empty}) dt2 = DataTree.from_dict(d={"b": empty, "c": empty}) expected_err_str = ( "Number of children on node '/' of the left object: 1\n" "Number of children on node '/' of the right object: 2" ) with pytest.raises(TreeIsomorphismError, match=expected_err_str): check_isomorphic(dt1, dt2) def test_different_heights(self): dt1 = DataTree.from_dict({"a": empty}) dt2 = DataTree.from_dict({"b": empty, "b/c": empty}) expected_err_str = ( "Number of children on node '/a' of the left object: 0\n" "Number of children on node '/b' of the right object: 1" ) with pytest.raises(TreeIsomorphismError, match=expected_err_str): check_isomorphic(dt1, dt2) def test_names_different(self): dt1 = DataTree.from_dict({"a": xr.Dataset()}) dt2 = DataTree.from_dict({"b": empty}) expected_err_str = ( "Node '/a' in the left object has name 'a'\n" "Node '/b' in the right object has name 'b'" ) with pytest.raises(TreeIsomorphismError, match=expected_err_str): check_isomorphic(dt1, dt2, require_names_equal=True) def test_isomorphic_names_equal(self): dt1 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty}) dt2 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty}) check_isomorphic(dt1, dt2, require_names_equal=True) def test_isomorphic_ordering(self): dt1 = DataTree.from_dict({"a": empty, "b": empty, "b/d": empty, "b/c": empty}) dt2 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty}) check_isomorphic(dt1, dt2, require_names_equal=False) def test_isomorphic_names_not_equal(self): dt1 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty}) dt2 = DataTree.from_dict({"A": empty, "B": empty, "B/C": empty, "B/D": empty}) check_isomorphic(dt1, dt2) def test_not_isomorphic_complex_tree(self, create_test_datatree): dt1 = create_test_datatree() dt2 = create_test_datatree() dt2["set1/set2/extra"] = DataTree(name="extra") with pytest.raises(TreeIsomorphismError, match="/set1/set2"): check_isomorphic(dt1, dt2) def test_checking_from_root(self, create_test_datatree): dt1 = create_test_datatree() dt2 = create_test_datatree() real_root = DataTree(name="real root") dt2.name = "not_real_root" dt2.parent = real_root with pytest.raises(TreeIsomorphismError): check_isomorphic(dt1, dt2, check_from_root=True) class TestMapOverSubTree: def test_no_trees_passed(self): @map_over_subtree def times_ten(ds): return 10.0 * ds with pytest.raises(TypeError, match="Must pass at least one tree"): times_ten("dt") def test_not_isomorphic(self, create_test_datatree): dt1 = create_test_datatree() dt2 = create_test_datatree() dt2["set1/set2/extra"] = DataTree(name="extra") @map_over_subtree def times_ten(ds1, ds2): return ds1 * ds2 with pytest.raises(TreeIsomorphismError): times_ten(dt1, dt2) def test_no_trees_returned(self, create_test_datatree): dt1 = create_test_datatree() dt2 = create_test_datatree() @map_over_subtree def bad_func(ds1, ds2): return None with pytest.raises(TypeError, match="return value of None"): bad_func(dt1, dt2) def test_single_dt_arg(self, create_test_datatree): dt = create_test_datatree() @map_over_subtree def times_ten(ds): return 10.0 * ds expected = create_test_datatree(modify=lambda ds: 10.0 * ds) result_tree = times_ten(dt) assert_equal(result_tree, expected) def test_single_dt_arg_plus_args_and_kwargs(self, create_test_datatree): dt = create_test_datatree() @map_over_subtree def multiply_then_add(ds, times, add=0.0): return (times * ds) + add expected = create_test_datatree(modify=lambda ds: (10.0 * ds) + 2.0) result_tree = multiply_then_add(dt, 10.0, add=2.0) assert_equal(result_tree, expected) def test_multiple_dt_args(self, create_test_datatree): dt1 = create_test_datatree() dt2 = create_test_datatree() @map_over_subtree def add(ds1, ds2): return ds1 + ds2 expected = create_test_datatree(modify=lambda ds: 2.0 * ds) result = add(dt1, dt2) assert_equal(result, expected) def test_dt_as_kwarg(self, create_test_datatree): dt1 = create_test_datatree() dt2 = create_test_datatree() @map_over_subtree def add(ds1, value=0.0): return ds1 + value expected = create_test_datatree(modify=lambda ds: 2.0 * ds) result = add(dt1, value=dt2) assert_equal(result, expected) def test_return_multiple_dts(self, create_test_datatree): dt = create_test_datatree() @map_over_subtree def minmax(ds): return ds.min(), ds.max() dt_min, dt_max = minmax(dt) expected_min = create_test_datatree(modify=lambda ds: ds.min()) assert_equal(dt_min, expected_min) expected_max = create_test_datatree(modify=lambda ds: ds.max()) assert_equal(dt_max, expected_max) def test_return_wrong_type(self, simple_datatree): dt1 = simple_datatree @map_over_subtree def bad_func(ds1): return "string" with pytest.raises(TypeError, match="not Dataset or DataArray"): bad_func(dt1) def test_return_tuple_of_wrong_types(self, simple_datatree): dt1 = simple_datatree @map_over_subtree def bad_func(ds1): return xr.Dataset(), "string" with pytest.raises(TypeError, match="not Dataset or DataArray"): bad_func(dt1) @pytest.mark.xfail def test_return_inconsistent_number_of_results(self, simple_datatree): dt1 = simple_datatree @map_over_subtree def bad_func(ds): # Datasets in simple_datatree have different numbers of dims # TODO need to instead return different numbers of Dataset objects for this test to catch the intended error return tuple(ds.dims) with pytest.raises(TypeError, match="instead returns"): bad_func(dt1) def test_wrong_number_of_arguments_for_func(self, simple_datatree): dt = simple_datatree @map_over_subtree def times_ten(ds): return 10.0 * ds with pytest.raises( TypeError, match="takes 1 positional argument but 2 were given" ): times_ten(dt, dt) def test_map_single_dataset_against_whole_tree(self, create_test_datatree): dt = create_test_datatree() @map_over_subtree def nodewise_merge(node_ds, fixed_ds): return xr.merge([node_ds, fixed_ds]) other_ds = xr.Dataset({"z": ("z", [0])}) expected = create_test_datatree(modify=lambda ds: xr.merge([ds, other_ds])) result_tree = nodewise_merge(dt, other_ds) assert_equal(result_tree, expected) @pytest.mark.xfail def test_trees_with_different_node_names(self): # TODO test this after I've got good tests for renaming nodes raise NotImplementedError def test_dt_method(self, create_test_datatree): dt = create_test_datatree() def multiply_then_add(ds, times, add=0.0): return times * ds + add expected = create_test_datatree(modify=lambda ds: (10.0 * ds) + 2.0) result_tree = dt.map_over_subtree(multiply_then_add, 10.0, add=2.0) assert_equal(result_tree, expected) def test_discard_ancestry(self, create_test_datatree): # Check for datatree GH issue #48 dt = create_test_datatree() subtree = dt["set1"] @map_over_subtree def times_ten(ds): return 10.0 * ds expected = create_test_datatree(modify=lambda ds: 10.0 * ds)["set1"] result_tree = times_ten(subtree) assert_equal(result_tree, expected, from_root=False) def test_skip_empty_nodes_with_attrs(self, create_test_datatree): # inspired by xarray-datatree GH262 dt = create_test_datatree() dt["set1/set2"].attrs["foo"] = "bar" def check_for_data(ds): # fails if run on a node that has no data assert len(ds.variables) != 0 return ds dt.map_over_subtree(check_for_data) def test_keep_attrs_on_empty_nodes(self, create_test_datatree): # GH278 dt = create_test_datatree() dt["set1/set2"].attrs["foo"] = "bar" def empty_func(ds): return ds result = dt.map_over_subtree(empty_func) assert result["set1/set2"].attrs == dt["set1/set2"].attrs @pytest.mark.xfail( reason="probably some bug in pytests handling of exception notes" ) def test_error_contains_path_of_offending_node(self, create_test_datatree): dt = create_test_datatree() dt["set1"]["bad_var"] = 0 print(dt) def fail_on_specific_node(ds): if "bad_var" in ds: raise ValueError("Failed because 'bar_var' present in dataset") with pytest.raises( ValueError, match="Raised whilst mapping function over node /set1" ): dt.map_over_subtree(fail_on_specific_node) class TestMutableOperations: def test_construct_using_type(self): # from datatree GH issue #188 # xarray's .weighted is unusual because it uses type() to create a Dataset/DataArray a = xr.DataArray( np.random.rand(3, 4, 10), dims=["x", "y", "time"], coords={"area": (["x", "y"], np.random.rand(3, 4))}, ).to_dataset(name="data") b = xr.DataArray( np.random.rand(2, 6, 14), dims=["x", "y", "time"], coords={"area": (["x", "y"], np.random.rand(2, 6))}, ).to_dataset(name="data") dt = DataTree.from_dict({"a": a, "b": b}) def weighted_mean(ds): return ds.weighted(ds.area).mean(["x", "y"]) dt.map_over_subtree(weighted_mean) def test_alter_inplace_forbidden(self): simpsons = DataTree.from_dict( d={ "/": xr.Dataset({"age": 83}), "/Herbert": xr.Dataset({"age": 40}), "/Homer": xr.Dataset({"age": 39}), "/Homer/Bart": xr.Dataset({"age": 10}), "/Homer/Lisa": xr.Dataset({"age": 8}), "/Homer/Maggie": xr.Dataset({"age": 1}), }, name="Abe", ) def fast_forward(ds: xr.Dataset, years: float) -> xr.Dataset: """Add some years to the age, but by altering the given dataset""" ds["age"] = ds["age"] + years return ds with pytest.raises(AttributeError): simpsons.map_over_subtree(fast_forward, years=10) @pytest.mark.xfail class TestMapOverSubTreeInplace: def test_map_over_subtree_inplace(self): raise NotImplementedError datatree-0.0.14/datatree/tests/test_treenode.py000066400000000000000000000250141455257650300215540ustar00rootroot00000000000000import pytest from datatree.iterators import LevelOrderIter, PreOrderIter from datatree.treenode import InvalidTreeError, NamedNode, NodePath, TreeNode class TestFamilyTree: def test_lonely(self): root = TreeNode() assert root.parent is None assert root.children == {} def test_parenting(self): john = TreeNode() mary = TreeNode() mary._set_parent(john, "Mary") assert mary.parent == john assert john.children["Mary"] is mary def test_no_time_traveller_loops(self): john = TreeNode() with pytest.raises(InvalidTreeError, match="cannot be a parent of itself"): john._set_parent(john, "John") with pytest.raises(InvalidTreeError, match="cannot be a parent of itself"): john.children = {"John": john} mary = TreeNode() rose = TreeNode() mary._set_parent(john, "Mary") rose._set_parent(mary, "Rose") with pytest.raises(InvalidTreeError, match="is already a descendant"): john._set_parent(rose, "John") with pytest.raises(InvalidTreeError, match="is already a descendant"): rose.children = {"John": john} def test_parent_swap(self): john = TreeNode() mary = TreeNode() mary._set_parent(john, "Mary") steve = TreeNode() mary._set_parent(steve, "Mary") assert mary.parent == steve assert steve.children["Mary"] is mary assert "Mary" not in john.children def test_multi_child_family(self): mary = TreeNode() kate = TreeNode() john = TreeNode(children={"Mary": mary, "Kate": kate}) assert john.children["Mary"] is mary assert john.children["Kate"] is kate assert mary.parent is john assert kate.parent is john def test_disown_child(self): mary = TreeNode() john = TreeNode(children={"Mary": mary}) mary.orphan() assert mary.parent is None assert "Mary" not in john.children def test_doppelganger_child(self): kate = TreeNode() john = TreeNode() with pytest.raises(TypeError): john.children = {"Kate": 666} with pytest.raises(InvalidTreeError, match="Cannot add same node"): john.children = {"Kate": kate, "Evil_Kate": kate} john = TreeNode(children={"Kate": kate}) evil_kate = TreeNode() evil_kate._set_parent(john, "Kate") assert john.children["Kate"] is evil_kate def test_sibling_relationships(self): mary = TreeNode() kate = TreeNode() ashley = TreeNode() TreeNode(children={"Mary": mary, "Kate": kate, "Ashley": ashley}) assert kate.siblings["Mary"] is mary assert kate.siblings["Ashley"] is ashley assert "Kate" not in kate.siblings def test_ancestors(self): tony = TreeNode() michael = TreeNode(children={"Tony": tony}) vito = TreeNode(children={"Michael": michael}) assert tony.root is vito assert tony.parents == (michael, vito) assert tony.ancestors == (vito, michael, tony) class TestGetNodes: def test_get_child(self): steven = TreeNode() sue = TreeNode(children={"Steven": steven}) mary = TreeNode(children={"Sue": sue}) john = TreeNode(children={"Mary": mary}) # get child assert john._get_item("Mary") is mary assert mary._get_item("Sue") is sue # no child exists with pytest.raises(KeyError): john._get_item("Kate") # get grandchild assert john._get_item("Mary/Sue") is sue # get great-grandchild assert john._get_item("Mary/Sue/Steven") is steven # get from middle of tree assert mary._get_item("Sue/Steven") is steven def test_get_upwards(self): sue = TreeNode() kate = TreeNode() mary = TreeNode(children={"Sue": sue, "Kate": kate}) john = TreeNode(children={"Mary": mary}) assert sue._get_item("../") is mary assert sue._get_item("../../") is john # relative path assert sue._get_item("../Kate") is kate def test_get_from_root(self): sue = TreeNode() mary = TreeNode(children={"Sue": sue}) john = TreeNode(children={"Mary": mary}) # noqa assert sue._get_item("/Mary") is mary class TestSetNodes: def test_set_child_node(self): john = TreeNode() mary = TreeNode() john._set_item("Mary", mary) assert john.children["Mary"] is mary assert isinstance(mary, TreeNode) assert mary.children == {} assert mary.parent is john def test_child_already_exists(self): mary = TreeNode() john = TreeNode(children={"Mary": mary}) mary_2 = TreeNode() with pytest.raises(KeyError): john._set_item("Mary", mary_2, allow_overwrite=False) def test_set_grandchild(self): rose = TreeNode() mary = TreeNode() john = TreeNode() john._set_item("Mary", mary) john._set_item("Mary/Rose", rose) assert john.children["Mary"] is mary assert isinstance(mary, TreeNode) assert "Rose" in mary.children assert rose.parent is mary def test_create_intermediate_child(self): john = TreeNode() rose = TreeNode() # test intermediate children not allowed with pytest.raises(KeyError, match="Could not reach"): john._set_item(path="Mary/Rose", item=rose, new_nodes_along_path=False) # test intermediate children allowed john._set_item("Mary/Rose", rose, new_nodes_along_path=True) assert "Mary" in john.children mary = john.children["Mary"] assert isinstance(mary, TreeNode) assert mary.children == {"Rose": rose} assert rose.parent == mary assert rose.parent == mary def test_overwrite_child(self): john = TreeNode() mary = TreeNode() john._set_item("Mary", mary) # test overwriting not allowed marys_evil_twin = TreeNode() with pytest.raises(KeyError, match="Already a node object"): john._set_item("Mary", marys_evil_twin, allow_overwrite=False) assert john.children["Mary"] is mary assert marys_evil_twin.parent is None # test overwriting allowed marys_evil_twin = TreeNode() john._set_item("Mary", marys_evil_twin, allow_overwrite=True) assert john.children["Mary"] is marys_evil_twin assert marys_evil_twin.parent is john class TestPruning: def test_del_child(self): john = TreeNode() mary = TreeNode() john._set_item("Mary", mary) del john["Mary"] assert "Mary" not in john.children assert mary.parent is None with pytest.raises(KeyError): del john["Mary"] def create_test_tree(): a = NamedNode(name="a") b = NamedNode() c = NamedNode() d = NamedNode() e = NamedNode() f = NamedNode() g = NamedNode() h = NamedNode() i = NamedNode() a.children = {"b": b, "c": c} b.children = {"d": d, "e": e} e.children = {"f": f, "g": g} c.children = {"h": h} h.children = {"i": i} return a, f class TestIterators: def test_preorderiter(self): root, _ = create_test_tree() result = [node.name for node in PreOrderIter(root)] expected = [ "a", "b", "d", "e", "f", "g", "c", "h", "i", ] assert result == expected def test_levelorderiter(self): root, _ = create_test_tree() result = [node.name for node in LevelOrderIter(root)] expected = [ "a", # root Node is unnamed "b", "c", "d", "e", "h", "f", "g", "i", ] assert result == expected class TestAncestry: def test_parents(self): _, leaf = create_test_tree() expected = ["e", "b", "a"] assert [node.name for node in leaf.parents] == expected def test_lineage(self): _, leaf = create_test_tree() expected = ["f", "e", "b", "a"] assert [node.name for node in leaf.lineage] == expected def test_ancestors(self): _, leaf = create_test_tree() ancestors = leaf.ancestors expected = ["a", "b", "e", "f"] for node, expected_name in zip(ancestors, expected): assert node.name == expected_name def test_subtree(self): root, _ = create_test_tree() subtree = root.subtree expected = [ "a", "b", "d", "e", "f", "g", "c", "h", "i", ] for node, expected_name in zip(subtree, expected): assert node.name == expected_name def test_descendants(self): root, _ = create_test_tree() descendants = root.descendants expected = [ "b", "d", "e", "f", "g", "c", "h", "i", ] for node, expected_name in zip(descendants, expected): assert node.name == expected_name def test_leaves(self): tree, _ = create_test_tree() leaves = tree.leaves expected = [ "d", "f", "g", "i", ] for node, expected_name in zip(leaves, expected): assert node.name == expected_name def test_levels(self): a, f = create_test_tree() assert a.level == 0 assert f.level == 3 assert a.depth == 3 assert f.depth == 3 assert a.width == 1 assert f.width == 3 class TestRenderTree: def test_render_nodetree(self): sam = NamedNode() ben = NamedNode() mary = NamedNode(children={"Sam": sam, "Ben": ben}) kate = NamedNode() john = NamedNode(children={"Mary": mary, "Kate": kate}) printout = john.__str__() expected_nodes = [ "NamedNode()", "NamedNode('Mary')", "NamedNode('Sam')", "NamedNode('Ben')", "NamedNode('Kate')", ] for expected_node, printed_node in zip(expected_nodes, printout.splitlines()): assert expected_node in printed_node def test_nodepath(): path = NodePath("/Mary") assert path.root == "/" assert path.stem == "Mary" datatree-0.0.14/datatree/tests/test_version.py000066400000000000000000000001171455257650300214310ustar00rootroot00000000000000import datatree def test_version(): assert datatree.__version__ != "999" datatree-0.0.14/datatree/treenode.py000066400000000000000000000547201455257650300173610ustar00rootroot00000000000000from __future__ import annotations import sys from collections import OrderedDict from pathlib import PurePosixPath from typing import ( TYPE_CHECKING, Generic, Iterator, Mapping, Optional, Tuple, TypeVar, Union, ) from xarray.core.utils import Frozen, is_dict_like if TYPE_CHECKING: from xarray.core.types import T_DataArray class InvalidTreeError(Exception): """Raised when user attempts to create an invalid tree in some way.""" class NotFoundInTreeError(ValueError): """Raised when operation can't be completed because one node is part of the expected tree.""" class NodePath(PurePosixPath): """Represents a path from one node to another within a tree.""" def __init__(self, *pathsegments): if sys.version_info >= (3, 12): super().__init__(*pathsegments) else: super().__new__(PurePosixPath, *pathsegments) if self.drive: raise ValueError("NodePaths cannot have drives") if self.root not in ["/", ""]: raise ValueError( 'Root of NodePath can only be either "/" or "", with "" meaning the path is relative.' ) # TODO should we also forbid suffixes to avoid node names with dots in them? Tree = TypeVar("Tree", bound="TreeNode") class TreeNode(Generic[Tree]): """ Base class representing a node of a tree, with methods for traversing and altering the tree. This class stores no data, it has only parents and children attributes, and various methods. Stores child nodes in an Ordered Dictionary, which is necessary to ensure that equality checks between two trees also check that the order of child nodes is the same. Nodes themselves are intrinsically unnamed (do not possess a ._name attribute), but if the node has a parent you can find the key it is stored under via the .name property. The .parent attribute is read-only: to replace the parent using public API you must set this node as the child of a new parent using `new_parent.children[name] = child_node`, or to instead detach from the current parent use `child_node.orphan()`. This class is intended to be subclassed by DataTree, which will overwrite some of the inherited behaviour, in particular to make names an inherent attribute, and allow setting parents directly. The intention is to mirror the class structure of xarray.Variable & xarray.DataArray, where Variable is unnamed but DataArray is (optionally) named. Also allows access to any other node in the tree via unix-like paths, including upwards referencing via '../'. (This class is heavily inspired by the anytree library's NodeMixin class.) """ _parent: Optional[Tree] _children: OrderedDict[str, Tree] def __init__(self, children: Optional[Mapping[str, Tree]] = None): """Create a parentless node.""" self._parent = None self._children = OrderedDict() if children is not None: self.children = children @property def parent(self) -> Tree | None: """Parent of this node.""" return self._parent def _set_parent( self, new_parent: Tree | None, child_name: Optional[str] = None ) -> None: # TODO is it possible to refactor in a way that removes this private method? if new_parent is not None and not isinstance(new_parent, TreeNode): raise TypeError( "Parent nodes must be of type DataTree or None, " f"not type {type(new_parent)}" ) old_parent = self._parent if new_parent is not old_parent: self._check_loop(new_parent) self._detach(old_parent) self._attach(new_parent, child_name) def _check_loop(self, new_parent: Tree | None) -> None: """Checks that assignment of this new parent will not create a cycle.""" if new_parent is not None: if new_parent is self: raise InvalidTreeError( f"Cannot set parent, as node {self} cannot be a parent of itself." ) if self._is_descendant_of(new_parent): raise InvalidTreeError( "Cannot set parent, as intended parent is already a descendant of this node." ) def _is_descendant_of(self, node: Tree) -> bool: return any(n is self for n in node.parents) def _detach(self, parent: Tree | None) -> None: if parent is not None: self._pre_detach(parent) parents_children = parent.children parent._children = OrderedDict( { name: child for name, child in parents_children.items() if child is not self } ) self._parent = None self._post_detach(parent) def _attach(self, parent: Tree | None, child_name: Optional[str] = None) -> None: if parent is not None: if child_name is None: raise ValueError( "To directly set parent, child needs a name, but child is unnamed" ) self._pre_attach(parent) parentchildren = parent._children assert not any( child is self for child in parentchildren ), "Tree is corrupt." parentchildren[child_name] = self self._parent = parent self._post_attach(parent) else: self._parent = None def orphan(self) -> None: """Detach this node from its parent.""" self._set_parent(new_parent=None) @property def children(self: Tree) -> Mapping[str, Tree]: """Child nodes of this node, stored under a mapping via their names.""" return Frozen(self._children) @children.setter def children(self: Tree, children: Mapping[str, Tree]) -> None: self._check_children(children) children = OrderedDict(children) old_children = self.children del self.children try: self._pre_attach_children(children) for name, child in children.items(): child._set_parent(new_parent=self, child_name=name) self._post_attach_children(children) assert len(self.children) == len(children) except Exception: # if something goes wrong then revert to previous children self.children = old_children raise @children.deleter def children(self) -> None: # TODO this just detaches all the children, it doesn't actually delete them... children = self.children self._pre_detach_children(children) for child in self.children.values(): child.orphan() assert len(self.children) == 0 self._post_detach_children(children) @staticmethod def _check_children(children: Mapping[str, Tree]) -> None: """Check children for correct types and for any duplicates.""" if not is_dict_like(children): raise TypeError( "children must be a dict-like mapping from names to node objects" ) seen = set() for name, child in children.items(): if not isinstance(child, TreeNode): raise TypeError( f"Cannot add object {name}. It is of type {type(child)}, " "but can only add children of type DataTree" ) childid = id(child) if childid not in seen: seen.add(childid) else: raise InvalidTreeError( f"Cannot add same node {name} multiple times as different children." ) def __repr__(self) -> str: return f"TreeNode(children={dict(self._children)})" def _pre_detach_children(self: Tree, children: Mapping[str, Tree]) -> None: """Method call before detaching `children`.""" pass def _post_detach_children(self: Tree, children: Mapping[str, Tree]) -> None: """Method call after detaching `children`.""" pass def _pre_attach_children(self: Tree, children: Mapping[str, Tree]) -> None: """Method call before attaching `children`.""" pass def _post_attach_children(self: Tree, children: Mapping[str, Tree]) -> None: """Method call after attaching `children`.""" pass def _iter_parents(self: Tree) -> Iterator[Tree]: """Iterate up the tree, starting from the current node.""" node: Tree | None = self.parent while node is not None: yield node node = node.parent def iter_lineage(self: Tree) -> Tuple[Tree, ...]: """Iterate up the tree, starting from the current node.""" from warnings import warn warn( "`iter_lineage` has been deprecated, and in the future will raise an error." "Please use `parents` from now on.", DeprecationWarning, ) return tuple((self, *self.parents)) @property def lineage(self: Tree) -> Tuple[Tree, ...]: """All parent nodes and their parent nodes, starting with the closest.""" from warnings import warn warn( "`lineage` has been deprecated, and in the future will raise an error." "Please use `parents` from now on.", DeprecationWarning, ) return self.iter_lineage() @property def parents(self: Tree) -> Tuple[Tree, ...]: """All parent nodes and their parent nodes, starting with the closest.""" return tuple(self._iter_parents()) @property def ancestors(self: Tree) -> Tuple[Tree, ...]: """All parent nodes and their parent nodes, starting with the most distant.""" from warnings import warn warn( "`ancestors` has been deprecated, and in the future will raise an error." "Please use `parents`. Example: `tuple(reversed(node.parents))`", DeprecationWarning, ) return tuple((*reversed(self.parents), self)) @property def root(self: Tree) -> Tree: """Root node of the tree""" node = self while node.parent is not None: node = node.parent return node @property def is_root(self) -> bool: """Whether this node is the tree root.""" return self.parent is None @property def is_leaf(self) -> bool: """ Whether this node is a leaf node. Leaf nodes are defined as nodes which have no children. """ return self.children == {} @property def leaves(self: Tree) -> Tuple[Tree, ...]: """ All leaf nodes. Leaf nodes are defined as nodes which have no children. """ return tuple([node for node in self.subtree if node.is_leaf]) @property def siblings(self: Tree) -> OrderedDict[str, Tree]: """ Nodes with the same parent as this node. """ if self.parent: return OrderedDict( { name: child for name, child in self.parent.children.items() if child is not self } ) else: return OrderedDict() @property def subtree(self: Tree) -> Iterator[Tree]: """ An iterator over all nodes in this tree, including both self and all descendants. Iterates depth-first. See Also -------- DataTree.descendants """ from . import iterators return iterators.PreOrderIter(self) @property def descendants(self: Tree) -> Tuple[Tree, ...]: """ Child nodes and all their child nodes. Returned in depth-first order. See Also -------- DataTree.subtree """ all_nodes = tuple(self.subtree) this_node, *descendants = all_nodes return tuple(descendants) @property def level(self: Tree) -> int: """ Level of this node. Level means number of parent nodes above this node before reaching the root. The root node is at level 0. Returns ------- level : int See Also -------- depth width """ return len(self.parents) @property def depth(self: Tree) -> int: """ Maximum level of this tree. Measured from the root, which has a depth of 0. Returns ------- depth : int See Also -------- level width """ return max(node.level for node in self.root.subtree) @property def width(self: Tree) -> int: """ Number of nodes at this level in the tree. Includes number of immediate siblings, but also "cousins" in other branches and so-on. Returns ------- depth : int See Also -------- level depth """ return len([node for node in self.root.subtree if node.level == self.level]) def _pre_detach(self: Tree, parent: Tree) -> None: """Method call before detaching from `parent`.""" pass def _post_detach(self: Tree, parent: Tree) -> None: """Method call after detaching from `parent`.""" pass def _pre_attach(self: Tree, parent: Tree) -> None: """Method call before attaching to `parent`.""" pass def _post_attach(self: Tree, parent: Tree) -> None: """Method call after attaching to `parent`.""" pass def get(self: Tree, key: str, default: Optional[Tree] = None) -> Optional[Tree]: """ Return the child node with the specified key. Only looks for the node within the immediate children of this node, not in other nodes of the tree. """ if key in self.children: return self.children[key] else: return default # TODO `._walk` method to be called by both `_get_item` and `_set_item` def _get_item(self: Tree, path: str | NodePath) -> Union[Tree, T_DataArray]: """ Returns the object lying at the given path. Raises a KeyError if there is no object at the given path. """ if isinstance(path, str): path = NodePath(path) if path.root: current_node = self.root root, *parts = list(path.parts) else: current_node = self parts = list(path.parts) for part in parts: if part == "..": if current_node.parent is None: raise KeyError(f"Could not find node at {path}") else: current_node = current_node.parent elif part in ("", "."): pass else: if current_node.get(part) is None: raise KeyError(f"Could not find node at {path}") else: current_node = current_node.get(part) return current_node def _set(self: Tree, key: str, val: Tree) -> None: """ Set the child node with the specified key to value. Counterpart to the public .get method, and also only works on the immediate node, not other nodes in the tree. """ new_children = {**self.children, key: val} self.children = new_children def _set_item( self: Tree, path: str | NodePath, item: Union[Tree, T_DataArray], new_nodes_along_path: bool = False, allow_overwrite: bool = True, ) -> None: """ Set a new item in the tree, overwriting anything already present at that path. The given value either forms a new node of the tree or overwrites an existing item at that location. Parameters ---------- path item new_nodes_along_path : bool If true, then if necessary new nodes will be created along the given path, until the tree can reach the specified location. allow_overwrite : bool Whether or not to overwrite any existing node at the location given by path. Raises ------ KeyError If node cannot be reached, and new_nodes_along_path=False. Or if a node already exists at the specified path, and allow_overwrite=False. """ if isinstance(path, str): path = NodePath(path) if not path.name: raise ValueError("Can't set an item under a path which has no name") if path.root: # absolute path current_node = self.root root, *parts, name = path.parts else: # relative path current_node = self *parts, name = path.parts if parts: # Walk to location of new node, creating intermediate node objects as we go if necessary for part in parts: if part == "..": if current_node.parent is None: # We can't create a parent if `new_nodes_along_path=True` as we wouldn't know what to name it raise KeyError(f"Could not reach node at path {path}") else: current_node = current_node.parent elif part in ("", "."): pass else: if part in current_node.children: current_node = current_node.children[part] elif new_nodes_along_path: # Want child classes (i.e. DataTree) to populate tree with their own types new_node = type(self)() current_node._set(part, new_node) current_node = current_node.children[part] else: raise KeyError(f"Could not reach node at path {path}") if name in current_node.children: # Deal with anything already existing at this location if allow_overwrite: current_node._set(name, item) else: raise KeyError(f"Already a node object at path {path}") else: current_node._set(name, item) def __delitem__(self: Tree, key: str): """Remove a child node from this tree object.""" if key in self.children: child = self._children[key] del self._children[key] child.orphan() else: raise KeyError("Cannot delete") def same_tree(self, other: Tree) -> bool: """True if other node is in the same tree as this node.""" return self.root is other.root class NamedNode(TreeNode, Generic[Tree]): """ A TreeNode which knows its own name. Implements path-like relationships to other nodes in its tree. """ _name: Optional[str] _parent: Optional[Tree] _children: OrderedDict[str, Tree] def __init__(self, name=None, children=None): super().__init__(children=children) self._name = None self.name = name @property def name(self) -> str | None: """The name of this node.""" return self._name @name.setter def name(self, name: str | None) -> None: if name is not None: if not isinstance(name, str): raise TypeError("node name must be a string or None") if "/" in name: raise ValueError("node names cannot contain forward slashes") self._name = name def __str__(self) -> str: return f"NamedNode({self.name})" if self.name else "NamedNode()" def _post_attach(self: NamedNode, parent: NamedNode) -> None: """Ensures child has name attribute corresponding to key under which it has been stored.""" key = next(k for k, v in parent.children.items() if v is self) self.name = key @property def path(self) -> str: """Return the file-like path from the root to this node.""" if self.is_root: return "/" else: root, *ancestors = tuple(reversed(self.parents)) # don't include name of root because (a) root might not have a name & (b) we want path relative to root. names = [*(node.name for node in ancestors), self.name] return "/" + "/".join(names) def relative_to(self: NamedNode, other: NamedNode) -> str: """ Compute the relative path from this node to node `other`. If other is not in this tree, or it's otherwise impossible, raise a ValueError. """ if not self.same_tree(other): raise NotFoundInTreeError( "Cannot find relative path because nodes do not lie within the same tree" ) this_path = NodePath(self.path) if other.path in list(parent.path for parent in (self, *self.parents)): return str(this_path.relative_to(other.path)) else: common_ancestor = self.find_common_ancestor(other) path_to_common_ancestor = other._path_to_ancestor(common_ancestor) return str( path_to_common_ancestor / this_path.relative_to(common_ancestor.path) ) def find_common_ancestor(self, other: NamedNode) -> NamedNode: """ Find the first common ancestor of two nodes in the same tree. Raise ValueError if they are not in the same tree. """ if self is other: return self other_paths = [op.path for op in other.parents] for parent in (self, *self.parents): if parent.path in other_paths: return parent raise NotFoundInTreeError( "Cannot find common ancestor because nodes do not lie within the same tree" ) def _path_to_ancestor(self, ancestor: NamedNode) -> NodePath: """Return the relative path from this node to the given ancestor node""" if not self.same_tree(ancestor): raise NotFoundInTreeError( "Cannot find relative path to ancestor because nodes do not lie within the same tree" ) if ancestor.path not in list(a.path for a in (self, *self.parents)): raise NotFoundInTreeError( "Cannot find relative path to ancestor because given node is not an ancestor of this node" ) parents_paths = list(parent.path for parent in (self, *self.parents)) generation_gap = list(parents_paths).index(ancestor.path) path_upwards = "../" * generation_gap if generation_gap > 0 else "." return NodePath(path_upwards) datatree-0.0.14/docs/000077500000000000000000000000001455257650300143315ustar00rootroot00000000000000datatree-0.0.14/docs/Makefile000066400000000000000000000156271455257650300160040ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html rtdhtml dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " rtdhtml Build html using same settings used on ReadtheDocs" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." rtdhtml: $(SPHINXBUILD) -T -j auto -E -W --keep-going -b html -d $(BUILDDIR)/doctrees -D language=en . $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/complexity.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/complexity.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/complexity" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/complexity" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." datatree-0.0.14/docs/README.md000066400000000000000000000004471455257650300156150ustar00rootroot00000000000000# README - docs ## Build the documentation locally ```bash cd docs # From project's root make clean rm -rf source/generated # remove autodoc artefacts, that are not removed by `make clean` make html ``` ## Access the documentation locally Open `docs/_build/html/index.html` in a web browser datatree-0.0.14/docs/make.bat000066400000000000000000000145031455257650300157410ustar00rootroot00000000000000@ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=_build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . set I18NSPHINXOPTS=%SPHINXOPTS% . if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. xml to make Docutils-native XML files echo. pseudoxml to make pseudoxml-XML files for display purposes echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) %SPHINXBUILD% 2> nul if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\complexity.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\complexity.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdf" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdfja" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf-ja cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) if "%1" == "xml" ( %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml if errorlevel 1 exit /b 1 echo. echo.Build finished. The XML files are in %BUILDDIR%/xml. goto end ) if "%1" == "pseudoxml" ( %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml if errorlevel 1 exit /b 1 echo. echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. goto end ) :end datatree-0.0.14/docs/source/000077500000000000000000000000001455257650300156315ustar00rootroot00000000000000datatree-0.0.14/docs/source/api.rst000066400000000000000000000141161455257650300171370ustar00rootroot00000000000000.. currentmodule:: datatree ############# API reference ############# DataTree ======== Creating a DataTree ------------------- Methods of creating a datatree. .. autosummary:: :toctree: generated/ DataTree DataTree.from_dict Tree Attributes --------------- Attributes relating to the recursive tree-like structure of a ``DataTree``. .. autosummary:: :toctree: generated/ DataTree.parent DataTree.children DataTree.name DataTree.path DataTree.root DataTree.is_root DataTree.is_leaf DataTree.leaves DataTree.level DataTree.depth DataTree.width DataTree.subtree DataTree.descendants DataTree.siblings DataTree.lineage DataTree.parents DataTree.ancestors DataTree.groups Data Contents ------------- Interface to the data objects (optionally) stored inside a single ``DataTree`` node. This interface echoes that of ``xarray.Dataset``. .. autosummary:: :toctree: generated/ DataTree.dims DataTree.sizes DataTree.data_vars DataTree.coords DataTree.attrs DataTree.encoding DataTree.indexes DataTree.nbytes DataTree.ds DataTree.to_dataset DataTree.has_data DataTree.has_attrs DataTree.is_empty DataTree.is_hollow Dictionary Interface -------------------- ``DataTree`` objects also have a dict-like interface mapping keys to either ``xarray.DataArray``s or to child ``DataTree`` nodes. .. autosummary:: :toctree: generated/ DataTree.__getitem__ DataTree.__setitem__ DataTree.__delitem__ DataTree.update DataTree.get DataTree.items DataTree.keys DataTree.values Tree Manipulation ----------------- For manipulating, traversing, navigating, or mapping over the tree structure. .. autosummary:: :toctree: generated/ DataTree.orphan DataTree.same_tree DataTree.relative_to DataTree.iter_lineage DataTree.find_common_ancestor DataTree.map_over_subtree map_over_subtree DataTree.pipe DataTree.match DataTree.filter Pathlib-like Interface ---------------------- ``DataTree`` objects deliberately echo some of the API of `pathlib.PurePath`. .. autosummary:: :toctree: generated/ DataTree.name DataTree.parent DataTree.parents DataTree.relative_to Missing: .. ``DataTree.glob`` ``DataTree.joinpath`` ``DataTree.with_name`` ``DataTree.walk`` ``DataTree.rename`` ``DataTree.replace`` DataTree Contents ----------------- Manipulate the contents of all nodes in a tree simultaneously. .. autosummary:: :toctree: generated/ DataTree.copy DataTree.assign_coords DataTree.merge DataTree.rename DataTree.rename_vars DataTree.rename_dims DataTree.swap_dims DataTree.expand_dims DataTree.drop_vars DataTree.drop_dims DataTree.set_coords DataTree.reset_coords DataTree Node Contents ---------------------- Manipulate the contents of a single DataTree node. .. autosummary:: :toctree: generated/ DataTree.assign DataTree.drop_nodes Comparisons =========== Compare one ``DataTree`` object to another. .. autosummary:: :toctree: generated/ DataTree.isomorphic DataTree.equals DataTree.identical Indexing ======== Index into all nodes in the subtree simultaneously. .. autosummary:: :toctree: generated/ DataTree.isel DataTree.sel DataTree.drop_sel DataTree.drop_isel DataTree.head DataTree.tail DataTree.thin DataTree.squeeze DataTree.interp DataTree.interp_like DataTree.reindex DataTree.reindex_like DataTree.set_index DataTree.reset_index DataTree.reorder_levels DataTree.query .. Missing: ``DataTree.loc`` Missing Value Handling ====================== .. autosummary:: :toctree: generated/ DataTree.isnull DataTree.notnull DataTree.combine_first DataTree.dropna DataTree.fillna DataTree.ffill DataTree.bfill DataTree.interpolate_na DataTree.where DataTree.isin Computation =========== Apply a computation to the data in all nodes in the subtree simultaneously. .. autosummary:: :toctree: generated/ DataTree.map DataTree.reduce DataTree.diff DataTree.quantile DataTree.differentiate DataTree.integrate DataTree.map_blocks DataTree.polyfit DataTree.curvefit Aggregation =========== Aggregate data in all nodes in the subtree simultaneously. .. autosummary:: :toctree: generated/ DataTree.all DataTree.any DataTree.argmax DataTree.argmin DataTree.idxmax DataTree.idxmin DataTree.max DataTree.min DataTree.mean DataTree.median DataTree.prod DataTree.sum DataTree.std DataTree.var DataTree.cumsum DataTree.cumprod ndarray methods =============== Methods copied from :py:class:`numpy.ndarray` objects, here applying to the data in all nodes in the subtree. .. autosummary:: :toctree: generated/ DataTree.argsort DataTree.astype DataTree.clip DataTree.conj DataTree.conjugate DataTree.round DataTree.rank Reshaping and reorganising ========================== Reshape or reorganise the data in all nodes in the subtree. .. autosummary:: :toctree: generated/ DataTree.transpose DataTree.stack DataTree.unstack DataTree.shift DataTree.roll DataTree.pad DataTree.sortby DataTree.broadcast_like Plotting ======== I/O === Open a datatree from an on-disk store or serialize the tree. .. autosummary:: :toctree: generated/ open_datatree DataTree.to_dict DataTree.to_netcdf DataTree.to_zarr .. Missing: ``open_mfdatatree`` Tutorial ======== Testing ======= Test that two DataTree objects are similar. .. autosummary:: :toctree: generated/ testing.assert_isomorphic testing.assert_equal testing.assert_identical Exceptions ========== Exceptions raised when manipulating trees. .. autosummary:: :toctree: generated/ TreeIsomorphismError InvalidTreeError NotFoundInTreeError Advanced API ============ Relatively advanced API for users or developers looking to understand the internals, or extend functionality. .. autosummary:: :toctree: generated/ DataTree.variables register_datatree_accessor .. Missing: ``DataTree.set_close`` datatree-0.0.14/docs/source/conf.py000066400000000000000000000316061455257650300171360ustar00rootroot00000000000000# -*- coding: utf-8 -*- # flake8: noqa # Ignoring F401: imported but unused # complexity documentation build configuration file, created by # sphinx-quickstart on Tue Jul 9 22:26:36 2013. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import inspect import os import sys import sphinx_autosummary_accessors import datatree # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.insert(0, os.path.abspath('.')) cwd = os.getcwd() parent = os.path.dirname(cwd) sys.path.insert(0, parent) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinx.ext.linkcode", "sphinx.ext.autosummary", "sphinx.ext.intersphinx", "sphinx.ext.extlinks", "sphinx.ext.napoleon", "sphinx_copybutton", "sphinxext.opengraph", "sphinx_autosummary_accessors", "IPython.sphinxext.ipython_console_highlighting", "IPython.sphinxext.ipython_directive", "nbsphinx", "sphinxcontrib.srclinks", ] extlinks = { "issue": ("https://github.com/xarray-contrib/datatree/issues/%s", "GH#%s"), "pull": ("https://github.com/xarray-contrib/datatree/pull/%s", "GH#%s"), } # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates", sphinx_autosummary_accessors.templates_path] # Generate the API documentation when building autosummary_generate = True # Napoleon configurations napoleon_google_docstring = False napoleon_numpy_docstring = True napoleon_use_param = False napoleon_use_rtype = False napoleon_preprocess_types = True napoleon_type_aliases = { # general terms "sequence": ":term:`sequence`", "iterable": ":term:`iterable`", "callable": ":py:func:`callable`", "dict_like": ":term:`dict-like `", "dict-like": ":term:`dict-like `", "path-like": ":term:`path-like `", "mapping": ":term:`mapping`", "file-like": ":term:`file-like `", # special terms # "same type as caller": "*same type as caller*", # does not work, yet # "same type as values": "*same type as values*", # does not work, yet # stdlib type aliases "MutableMapping": "~collections.abc.MutableMapping", "sys.stdout": ":obj:`sys.stdout`", "timedelta": "~datetime.timedelta", "string": ":class:`string `", # numpy terms "array_like": ":term:`array_like`", "array-like": ":term:`array-like `", "scalar": ":term:`scalar`", "array": ":term:`array`", "hashable": ":term:`hashable `", # matplotlib terms "color-like": ":py:func:`color-like `", "matplotlib colormap name": ":doc:`matplotlib colormap name `", "matplotlib axes object": ":py:class:`matplotlib axes object `", "colormap": ":py:class:`colormap `", # objects without namespace: xarray "DataArray": "~xarray.DataArray", "Dataset": "~xarray.Dataset", "Variable": "~xarray.Variable", "DatasetGroupBy": "~xarray.core.groupby.DatasetGroupBy", "DataArrayGroupBy": "~xarray.core.groupby.DataArrayGroupBy", # objects without namespace: numpy "ndarray": "~numpy.ndarray", "MaskedArray": "~numpy.ma.MaskedArray", "dtype": "~numpy.dtype", "ComplexWarning": "~numpy.ComplexWarning", # objects without namespace: pandas "Index": "~pandas.Index", "MultiIndex": "~pandas.MultiIndex", "CategoricalIndex": "~pandas.CategoricalIndex", "TimedeltaIndex": "~pandas.TimedeltaIndex", "DatetimeIndex": "~pandas.DatetimeIndex", "Series": "~pandas.Series", "DataFrame": "~pandas.DataFrame", "Categorical": "~pandas.Categorical", "Path": "~~pathlib.Path", # objects with abbreviated namespace (from pandas) "pd.Index": "~pandas.Index", "pd.NaT": "~pandas.NaT", } # The suffix of source filenames. source_suffix = ".rst" # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = "index" # General information about the project. project = "Datatree" copyright = "2021 onwards, Tom Nicholas and its Contributors" author = "Tom Nicholas" html_show_sourcelink = True srclink_project = "https://github.com/xarray-contrib/datatree" srclink_branch = "main" srclink_src_path = "docs/source" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = datatree.__version__ # The full version, including alpha/beta/rc tags. release = datatree.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. # keep_warnings = False # -- Intersphinx links --------------------------------------------------------- intersphinx_mapping = { "python": ("https://docs.python.org/3.8/", None), "numpy": ("https://numpy.org/doc/stable", None), "xarray": ("https://xarray.pydata.org/en/stable/", None), } # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "sphinx_book_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. html_theme_options = { "repository_url": "https://github.com/xarray-contrib/datatree", "repository_branch": "main", "path_to_docs": "docs/source", "use_repository_button": True, "use_issues_button": True, "use_edit_page_button": True, } # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = "datatree_doc" # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ("index", "datatree.tex", "Datatree Documentation", author, "manual") ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [("index", "datatree", "Datatree Documentation", [author], 1)] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( "index", "datatree", "Datatree Documentation", author, "datatree", "Tree-like hierarchical data structure for xarray.", "Miscellaneous", ) ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # texinfo_no_detailmenu = False # based on numpy doc/source/conf.py def linkcode_resolve(domain, info): """ Determine the URL corresponding to Python object """ if domain != "py": return None modname = info["module"] fullname = info["fullname"] submod = sys.modules.get(modname) if submod is None: return None obj = submod for part in fullname.split("."): try: obj = getattr(obj, part) except AttributeError: return None try: fn = inspect.getsourcefile(inspect.unwrap(obj)) except TypeError: fn = None if not fn: return None try: source, lineno = inspect.getsourcelines(obj) except OSError: lineno = None if lineno: linespec = f"#L{lineno}-L{lineno + len(source) - 1}" else: linespec = "" fn = os.path.relpath(fn, start=os.path.dirname(datatree.__file__)) if "+" in datatree.__version__: return f"https://github.com/xarray-contrib/datatree/blob/main/datatree/{fn}{linespec}" else: return ( f"https://github.com/xarray-contrib/datatree/blob/" f"v{datatree.__version__}/datatree/{fn}{linespec}" ) datatree-0.0.14/docs/source/contributing.rst000066400000000000000000000102271455257650300210740ustar00rootroot00000000000000======================== Contributing to Datatree ======================== Contributions are highly welcomed and appreciated. Every little help counts, so do not hesitate! .. contents:: Contribution links :depth: 2 .. _submitfeedback: Feature requests and feedback ----------------------------- Do you like Datatree? Share some love on Twitter or in your blog posts! We'd also like to hear about your propositions and suggestions. Feel free to `submit them as issues `_ and: * Explain in detail how they should work. * Keep the scope as narrow as possible. This will make it easier to implement. .. _reportbugs: Report bugs ----------- Report bugs for Datatree in the `issue tracker `_. If you are reporting a bug, please include: * Your operating system name and version. * Any details about your local setup that might be helpful in troubleshooting, specifically the Python interpreter version, installed libraries, and Datatree version. * Detailed steps to reproduce the bug. If you can write a demonstration test that currently fails but should pass (xfail), that is a very useful commit to make as well, even if you cannot fix the bug itself. .. _fixbugs: Fix bugs -------- Look through the `GitHub issues for bugs `_. Talk to developers to find out how you can fix specific bugs. Write documentation ------------------- Datatree could always use more documentation. What exactly is needed? * More complementary documentation. Have you perhaps found something unclear? * Docstrings. There can never be too many of them. * Blog posts, articles and such -- they're all very appreciated. You can also edit documentation files directly in the GitHub web interface, without using a local copy. This can be convenient for small fixes. To build the documentation locally, you first need to install the following tools: - `Sphinx `__ - `sphinx_rtd_theme `__ - `sphinx-autosummary-accessors `__ You can then build the documentation with the following commands:: $ cd docs $ make html The built documentation should be available in the ``docs/_build/`` folder. .. _`pull requests`: .. _pull-requests: Preparing Pull Requests ----------------------- #. Fork the `Datatree GitHub repository `__. It's fine to use ``Datatree`` as your fork repository name because it will live under your user. #. Clone your fork locally using `git `_ and create a branch:: $ git clone git@github.com:{YOUR_GITHUB_USERNAME}/Datatree.git $ cd Datatree # now, to fix a bug or add feature create your own branch off "master": $ git checkout -b your-bugfix-feature-branch-name master #. Install `pre-commit `_ and its hook on the Datatree repo:: $ pip install --user pre-commit $ pre-commit install Afterwards ``pre-commit`` will run whenever you commit. https://pre-commit.com/ is a framework for managing and maintaining multi-language pre-commit hooks to ensure code-style and code formatting is consistent. #. Install dependencies into a new conda environment:: $ conda env update -f ci/environment.yml #. Run all the tests Now running tests is as simple as issuing this command:: $ conda activate datatree-dev $ pytest --junitxml=test-reports/junit.xml --cov=./ --verbose This command will run tests via the "pytest" tool. #. You can now edit your local working copy and run the tests again as necessary. Please follow PEP-8 for naming. When committing, ``pre-commit`` will re-format the files if necessary. #. Commit and push once your tests pass and you are happy with your change(s):: $ git commit -a -m "" $ git push -u #. Finally, submit a pull request through the GitHub website using this data:: head-fork: YOUR_GITHUB_USERNAME/Datatree compare: your-branch-name base-fork: TomNicholas/datatree base: master datatree-0.0.14/docs/source/data-structures.rst000066400000000000000000000171661455257650300215300ustar00rootroot00000000000000.. currentmodule:: datatree .. _data structures: Data Structures =============== .. ipython:: python :suppress: import numpy as np import pandas as pd import xarray as xr import datatree np.random.seed(123456) np.set_printoptions(threshold=10) %xmode minimal .. note:: This page builds on the information given in xarray's main page on `data structures `_, so it is suggested that you are familiar with those first. DataTree -------- :py:class:`DataTree` is xarray's highest-level data structure, able to organise heterogeneous data which could not be stored inside a single :py:class:`Dataset` object. This includes representing the recursive structure of multiple `groups`_ within a netCDF file or `Zarr Store`_. .. _groups: https://www.unidata.ucar.edu/software/netcdf/workshops/2011/groups-types/GroupsIntro.html .. _Zarr Store: https://zarr.readthedocs.io/en/stable/tutorial.html#groups Each ``DataTree`` object (or "node") contains the same data that a single ``xarray.Dataset`` would (i.e. ``DataArray`` objects stored under hashable keys), and so has the same key properties: - ``dims``: a dictionary mapping of dimension names to lengths, for the variables in this node, - ``data_vars``: a dict-like container of DataArrays corresponding to variables in this node, - ``coords``: another dict-like container of DataArrays, corresponding to coordinate variables in this node, - ``attrs``: dict to hold arbitary metadata relevant to data in this node. A single ``DataTree`` object acts much like a single ``Dataset`` object, and has a similar set of dict-like methods defined upon it. However, ``DataTree``'s can also contain other ``DataTree`` objects, so they can be thought of as nested dict-like containers of both ``xarray.DataArray``'s and ``DataTree``'s. A single datatree object is known as a "node", and its position relative to other nodes is defined by two more key properties: - ``children``: An ordered dictionary mapping from names to other ``DataTree`` objects, known as its' "child nodes". - ``parent``: The single ``DataTree`` object whose children this datatree is a member of, known as its' "parent node". Each child automatically knows about its parent node, and a node without a parent is known as a "root" node (represented by the ``parent`` attribute pointing to ``None``). Nodes can have multiple children, but as each child node has at most one parent, there can only ever be one root node in a given tree. The overall structure is technically a `connected acyclic undirected rooted graph`, otherwise known as a `"Tree" `_. .. note:: Technically a ``DataTree`` with more than one child node forms an `"Ordered Tree" `_, because the children are stored in an Ordered Dictionary. However, this distinction only really matters for a few edge cases involving operations on multiple trees simultaneously, and can safely be ignored by most users. ``DataTree`` objects can also optionally have a ``name`` as well as ``attrs``, just like a ``DataArray``. Again these are not normally used unless explicitly accessed by the user. .. _creating a datatree: Creating a DataTree ~~~~~~~~~~~~~~~~~~~ One way to create a ``DataTree`` from scratch is to create each node individually, specifying the nodes' relationship to one another as you create each one. The ``DataTree`` constructor takes: - ``data``: The data that will be stored in this node, represented by a single ``xarray.Dataset``, or a named ``xarray.DataArray``. - ``parent``: The parent node (if there is one), given as a ``DataTree`` object. - ``children``: The various child nodes (if there are any), given as a mapping from string keys to ``DataTree`` objects. - ``name``: A string to use as the name of this node. Let's make a single datatree node with some example data in it: .. ipython:: python from datatree import DataTree ds1 = xr.Dataset({"foo": "orange"}) dt = DataTree(name="root", data=ds1) # create root node dt At this point our node is also the root node, as every tree has a root node. We can add a second node to this tree either by referring to the first node in the constructor of the second: .. ipython:: python ds2 = xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])}) # add a child by referring to the parent node node2 = DataTree(name="a", parent=dt, data=ds2) or by dynamically updating the attributes of one node to refer to another: .. ipython:: python # add a second child by first creating a new node ... ds3 = xr.Dataset({"zed": np.NaN}) node3 = DataTree(name="b", data=ds3) # ... then updating its .parent property node3.parent = dt Our tree now has three nodes within it: .. ipython:: python dt It is at tree construction time that consistency checks are enforced. For instance, if we try to create a `cycle` the constructor will raise an error: .. ipython:: python :okexcept: dt.parent = node3 Alternatively you can also create a ``DataTree`` object from - An ``xarray.Dataset`` using ``Dataset.to_node()`` (not yet implemented), - A dictionary mapping directory-like paths to either ``DataTree`` nodes or data, using :py:meth:`DataTree.from_dict()`, - A netCDF or Zarr file on disk with :py:func:`open_datatree()`. See :ref:`reading and writing files `. DataTree Contents ~~~~~~~~~~~~~~~~~ Like ``xarray.Dataset``, ``DataTree`` implements the python mapping interface, but with values given by either ``xarray.DataArray`` objects or other ``DataTree`` objects. .. ipython:: python dt["a"] dt["foo"] Iterating over keys will iterate over both the names of variables and child nodes. We can also access all the data in a single node through a dataset-like view .. ipython:: python dt["a"].ds This demonstrates the fact that the data in any one node is equivalent to the contents of a single ``xarray.Dataset`` object. The ``DataTree.ds`` property returns an immutable view, but we can instead extract the node's data contents as a new (and mutable) ``xarray.Dataset`` object via :py:meth:`DataTree.to_dataset()`: .. ipython:: python dt["a"].to_dataset() Like with ``Dataset``, you can access the data and coordinate variables of a node separately via the ``data_vars`` and ``coords`` attributes: .. ipython:: python dt["a"].data_vars dt["a"].coords Dictionary-like methods ~~~~~~~~~~~~~~~~~~~~~~~ We can update a datatree in-place using Python's standard dictionary syntax, similar to how we can for Dataset objects. For example, to create this example datatree from scratch, we could have written: # TODO update this example using ``.coords`` and ``.data_vars`` as setters, .. ipython:: python dt = DataTree(name="root") dt["foo"] = "orange" dt["a"] = DataTree(data=xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])})) dt["a/b/zed"] = np.NaN dt To change the variables in a node of a ``DataTree``, you can use all the standard dictionary methods, including ``values``, ``items``, ``__delitem__``, ``get`` and :py:meth:`DataTree.update`. Note that assigning a ``DataArray`` object to a ``DataTree`` variable using ``__setitem__`` or ``update`` will :ref:`automatically align ` the array(s) to the original node's indexes. If you copy a ``DataTree`` using the :py:func:`copy` function or the :py:meth:`DataTree.copy` method it will copy the subtree, meaning that node and children below it, but no parents above it. Like for ``Dataset``, this copy is shallow by default, but you can copy all the underlying data arrays by calling ``dt.copy(deep=True)``. datatree-0.0.14/docs/source/hierarchical-data.rst000066400000000000000000000542061455257650300217170ustar00rootroot00000000000000.. currentmodule:: datatree .. _hierarchical-data: Working With Hierarchical Data ============================== .. ipython:: python :suppress: import numpy as np import pandas as pd import xarray as xr from datatree import DataTree np.random.seed(123456) np.set_printoptions(threshold=10) %xmode minimal Why Hierarchical Data? ---------------------- Many real-world datasets are composed of multiple differing components, and it can often be be useful to think of these in terms of a hierarchy of related groups of data. Examples of data which one might want organise in a grouped or hierarchical manner include: - Simulation data at multiple resolutions, - Observational data about the same system but from multiple different types of sensors, - Mixed experimental and theoretical data, - A systematic study recording the same experiment but with different parameters, - Heterogenous data, such as demographic and metereological data, or even any combination of the above. Often datasets like this cannot easily fit into a single :py:class:`xarray.Dataset` object, or are more usefully thought of as groups of related ``xarray.Dataset`` objects. For this purpose we provide the :py:class:`DataTree` class. This page explains in detail how to understand and use the different features of the :py:class:`DataTree` class for your own hierarchical data needs. .. _node relationships: Node Relationships ------------------ .. _creating a family tree: Creating a Family Tree ~~~~~~~~~~~~~~~~~~~~~~ The three main ways of creating a ``DataTree`` object are described briefly in :ref:`creating a datatree`. Here we go into more detail about how to create a tree node-by-node, using a famous family tree from the Simpsons cartoon as an example. Let's start by defining nodes representing the two siblings, Bart and Lisa Simpson: .. ipython:: python bart = DataTree(name="Bart") lisa = DataTree(name="Lisa") Each of these node objects knows their own :py:class:`~DataTree.name`, but they currently have no relationship to one another. We can connect them by creating another node representing a common parent, Homer Simpson: .. ipython:: python homer = DataTree(name="Homer", children={"Bart": bart, "Lisa": lisa}) Here we set the children of Homer in the node's constructor. We now have a small family tree .. ipython:: python homer where we can see how these individual Simpson family members are related to one another. The nodes representing Bart and Lisa are now connected - we can confirm their sibling rivalry by examining the :py:class:`~DataTree.siblings` property: .. ipython:: python list(bart.siblings) But oops, we forgot Homer's third daughter, Maggie! Let's add her by updating Homer's :py:class:`~DataTree.children` property to include her: .. ipython:: python maggie = DataTree(name="Maggie") homer.children = {"Bart": bart, "Lisa": lisa, "Maggie": maggie} homer Let's check that Maggie knows who her Dad is: .. ipython:: python maggie.parent.name That's good - updating the properties of our nodes does not break the internal consistency of our tree, as changes of parentage are automatically reflected on both nodes. These children obviously have another parent, Marge Simpson, but ``DataTree`` nodes can only have a maximum of one parent. Genealogical `family trees are not even technically trees `_ in the mathematical sense - the fact that distant relatives can mate makes it a directed acyclic graph. Trees of ``DataTree`` objects cannot represent this. Homer is currently listed as having no parent (the so-called "root node" of this tree), but we can update his :py:class:`~DataTree.parent` property: .. ipython:: python abe = DataTree(name="Abe") homer.parent = abe Abe is now the "root" of this tree, which we can see by examining the :py:class:`~DataTree.root` property of any node in the tree .. ipython:: python maggie.root.name We can see the whole tree by printing Abe's node or just part of the tree by printing Homer's node: .. ipython:: python abe homer We can see that Homer is aware of his parentage, and we say that Homer and his children form a "subtree" of the larger Simpson family tree. In episode 28, Abe Simpson reveals that he had another son, Herbert "Herb" Simpson. We can add Herbert to the family tree without displacing Homer by :py:meth:`~DataTree.assign`-ing another child to Abe: .. ipython:: python herbert = DataTree(name="Herb") abe.assign({"Herbert": herbert}) .. note:: This example shows a minor subtlety - the returned tree has Homer's brother listed as ``"Herbert"``, but the original node was named "Herbert". Not only are names overriden when stored as keys like this, but the new node is a copy, so that the original node that was reference is unchanged (i.e. ``herbert.name == "Herb"`` still). In other words, nodes are copied into trees, not inserted into them. This is intentional, and mirrors the behaviour when storing named ``xarray.DataArray`` objects inside datasets. Certain manipulations of our tree are forbidden, if they would create an inconsistent result. In episode 51 of the show Futurama, Philip J. Fry travels back in time and accidentally becomes his own Grandfather. If we try similar time-travelling hijinks with Homer, we get a :py:class:`InvalidTreeError` raised: .. ipython:: python :okexcept: abe.parent = homer .. _evolutionary tree: Ancestry in an Evolutionary Tree ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Let's use a different example of a tree to discuss more complex relationships between nodes - the phylogenetic tree, or tree of life. .. ipython:: python vertebrates = DataTree.from_dict( name="Vertebrae", d={ "/Sharks": None, "/Bony Skeleton/Ray-finned Fish": None, "/Bony Skeleton/Four Limbs/Amphibians": None, "/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Primates": None, "/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Rodents & Rabbits": None, "/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Dinosaurs": None, "/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Birds": None, }, ) primates = vertebrates["/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Primates"] dinosaurs = vertebrates[ "/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Dinosaurs" ] We have used the :py:meth:`~DataTree.from_dict` constructor method as an alternate way to quickly create a whole tree, and :ref:`filesystem paths` (to be explained shortly) to select two nodes of interest. .. ipython:: python vertebrates This tree shows various families of species, grouped by their common features (making it technically a `"Cladogram" `_, rather than an evolutionary tree). Here both the species and the features used to group them are represented by ``DataTree`` node objects - there is no distinction in types of node. We can however get a list of only the nodes we used to represent species by using the fact that all those nodes have no children - they are "leaf nodes". We can check if a node is a leaf with :py:meth:`~DataTree.is_leaf`, and get a list of all leaves with the :py:class:`~DataTree.leaves` property: .. ipython:: python primates.is_leaf [node.name for node in vertebrates.leaves] Pretending that this is a true evolutionary tree for a moment, we can find the features of the evolutionary ancestors (so-called "ancestor" nodes), the distinguishing feature of the common ancestor of all vertebrate life (the root node), and even the distinguishing feature of the common ancestor of any two species (the common ancestor of two nodes): .. ipython:: python [node.name for node in primates.ancestors] primates.root.name primates.find_common_ancestor(dinosaurs).name We can only find a common ancestor between two nodes that lie in the same tree. If we try to find the common evolutionary ancestor between primates and an Alien species that has no relationship to Earth's evolutionary tree, an error will be raised. .. ipython:: python :okexcept: alien = DataTree(name="Xenomorph") primates.find_common_ancestor(alien) .. _navigating trees: Navigating Trees ---------------- There are various ways to access the different nodes in a tree. Properties ~~~~~~~~~~ We can navigate trees using the :py:class:`~DataTree.parent` and :py:class:`~DataTree.children` properties of each node, for example: .. ipython:: python lisa.parent.children["Bart"].name but there are also more convenient ways to access nodes. Dictionary-like interface ~~~~~~~~~~~~~~~~~~~~~~~~~ Children are stored on each node as a key-value mapping from name to child node. They can be accessed and altered via the :py:class:`~DataTree.__getitem__` and :py:class:`~DataTree.__setitem__` syntax. In general :py:class:`~DataTree.DataTree` objects support almost the entire set of dict-like methods, including :py:meth:`~DataTree.keys`, :py:class:`~DataTree.values`, :py:class:`~DataTree.items`, :py:meth:`~DataTree.__delitem__` and :py:meth:`~DataTree.update`. .. ipython:: python vertebrates["Bony Skeleton"]["Ray-finned Fish"] Note that the dict-like interface combines access to child ``DataTree`` nodes and stored ``DataArrays``, so if we have a node that contains both children and data, calling :py:meth:`~DataTree.keys` will list both names of child nodes and names of data variables: .. ipython:: python dt = DataTree( data=xr.Dataset({"foo": 0, "bar": 1}), children={"a": DataTree(), "b": DataTree()}, ) print(dt) list(dt.keys()) This also means that the names of variables and of child nodes must be different to one another. Attribute-like access ~~~~~~~~~~~~~~~~~~~~~ You can also select both variables and child nodes through dot indexing .. ipython:: python dt.foo dt.a .. _filesystem paths: Filesystem-like Paths ~~~~~~~~~~~~~~~~~~~~~ Hierarchical trees can be thought of as analogous to file systems. Each node is like a directory, and each directory can contain both more sub-directories and data. .. note:: You can even make the filesystem analogy concrete by using :py:func:`~DataTree.open_mfdatatree` or :py:func:`~DataTree.save_mfdatatree` # TODO not yet implemented - see GH issue 51 Datatree objects support a syntax inspired by unix-like filesystems, where the "path" to a node is specified by the keys of each intermediate node in sequence, separated by forward slashes. This is an extension of the conventional dictionary ``__getitem__`` syntax to allow navigation across multiple levels of the tree. Like with filepaths, paths within the tree can either be relative to the current node, e.g. .. ipython:: python abe["Homer/Bart"].name abe["./Homer/Bart"].name # alternative syntax or relative to the root node. A path specified from the root (as opposed to being specified relative to an arbitrary node in the tree) is sometimes also referred to as a `"fully qualified name" `_, or as an "absolute path". The root node is referred to by ``"/"``, so the path from the root node to its grand-child would be ``"/child/grandchild"``, e.g. .. ipython:: python # absolute path will start from root node lisa["/Homer/Bart"].name Relative paths between nodes also support the ``"../"`` syntax to mean the parent of the current node. We can use this with ``__setitem__`` to add a missing entry to our evolutionary tree, but add it relative to a more familiar node of interest: .. ipython:: python primates["../../Two Fenestrae/Crocodiles"] = DataTree() print(vertebrates) Given two nodes in a tree, we can also find their relative path: .. ipython:: python bart.relative_to(lisa) You can use this filepath feature to build a nested tree from a dictionary of filesystem-like paths and corresponding ``xarray.Dataset`` objects in a single step. If we have a dictionary where each key is a valid path, and each value is either valid data or ``None``, we can construct a complex tree quickly using the alternative constructor :py:meth:`DataTree.from_dict()`: .. ipython:: python d = { "/": xr.Dataset({"foo": "orange"}), "/a": xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])}), "/a/b": xr.Dataset({"zed": np.NaN}), "a/c/d": None, } dt = DataTree.from_dict(d) dt .. note:: Notice that using the path-like syntax will also create any intermediate empty nodes necessary to reach the end of the specified path (i.e. the node labelled `"c"` in this case.) This is to help avoid lots of redundant entries when creating deeply-nested trees using :py:meth:`DataTree.from_dict`. .. _iterating over trees: Iterating over trees ~~~~~~~~~~~~~~~~~~~~ You can iterate over every node in a tree using the subtree :py:class:`~DataTree.subtree` property. This returns an iterable of nodes, which yields them in depth-first order. .. ipython:: python for node in vertebrates.subtree: print(node.path) A very useful pattern is to use :py:class:`~DataTree.subtree` conjunction with the :py:class:`~DataTree.path` property to manipulate the nodes however you wish, then rebuild a new tree using :py:meth:`DataTree.from_dict()`. For example, we could keep only the nodes containing data by looping over all nodes, checking if they contain any data using :py:class:`~DataTree.has_data`, then rebuilding a new tree using only the paths of those nodes: .. ipython:: python non_empty_nodes = {node.path: node.ds for node in dt.subtree if node.has_data} DataTree.from_dict(non_empty_nodes) You can see this tree is similar to the ``dt`` object above, except that it is missing the empty nodes ``a/c`` and ``a/c/d``. (If you want to keep the name of the root node, you will need to add the ``name`` kwarg to :py:class:`from_dict`, i.e. ``DataTree.from_dict(non_empty_nodes, name=dt.root.name)``.) .. _manipulating trees: Manipulating Trees ------------------ Subsetting Tree Nodes ~~~~~~~~~~~~~~~~~~~~~ We can subset our tree to select only nodes of interest in various ways. Similarly to on a real filesystem, matching nodes by common patterns in their paths is often useful. We can use :py:meth:`DataTree.match` for this: .. ipython:: python dt = DataTree.from_dict( { "/a/A": None, "/a/B": None, "/b/A": None, "/b/B": None, } ) result = dt.match("*/B") result We can also subset trees by the contents of the nodes. :py:meth:`DataTree.filter` retains only the nodes of a tree that meet a certain condition. For example, we could recreate the Simpson's family tree with the ages of each individual, then filter for only the adults: First lets recreate the tree but with an `age` data variable in every node: .. ipython:: python simpsons = DataTree.from_dict( d={ "/": xr.Dataset({"age": 83}), "/Herbert": xr.Dataset({"age": 40}), "/Homer": xr.Dataset({"age": 39}), "/Homer/Bart": xr.Dataset({"age": 10}), "/Homer/Lisa": xr.Dataset({"age": 8}), "/Homer/Maggie": xr.Dataset({"age": 1}), }, name="Abe", ) simpsons Now let's filter out the minors: .. ipython:: python simpsons.filter(lambda node: node["age"] > 18) The result is a new tree, containing only the nodes matching the condition. (Yes, under the hood :py:meth:`~DataTree.filter` is just syntactic sugar for the pattern we showed you in :ref:`iterating over trees` !) .. _Tree Contents: Tree Contents ------------- Hollow Trees ~~~~~~~~~~~~ A concept that can sometimes be useful is that of a "Hollow Tree", which means a tree with data stored only at the leaf nodes. This is useful because certain useful tree manipulation operations only make sense for hollow trees. You can check if a tree is a hollow tree by using the :py:class:`~DataTree.is_hollow` property. We can see that the Simpson's family is not hollow because the data variable ``"age"`` is present at some nodes which have children (i.e. Abe and Homer). .. ipython:: python simpsons.is_hollow .. _tree computation: Computation ----------- `DataTree` objects are also useful for performing computations, not just for organizing data. Operations and Methods on Trees ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To show how applying operations across a whole tree at once can be useful, let's first create a example scientific dataset. .. ipython:: python def time_stamps(n_samples, T): """Create an array of evenly-spaced time stamps""" return xr.DataArray( data=np.linspace(0, 2 * np.pi * T, n_samples), dims=["time"] ) def signal_generator(t, f, A, phase): """Generate an example electrical-like waveform""" return A * np.sin(f * t.data + phase) time_stamps1 = time_stamps(n_samples=15, T=1.5) time_stamps2 = time_stamps(n_samples=10, T=1.0) voltages = DataTree.from_dict( { "/oscilloscope1": xr.Dataset( { "potential": ( "time", signal_generator(time_stamps1, f=2, A=1.2, phase=0.5), ), "current": ( "time", signal_generator(time_stamps1, f=2, A=1.2, phase=1), ), }, coords={"time": time_stamps1}, ), "/oscilloscope2": xr.Dataset( { "potential": ( "time", signal_generator(time_stamps2, f=1.6, A=1.6, phase=0.2), ), "current": ( "time", signal_generator(time_stamps2, f=1.6, A=1.6, phase=0.7), ), }, coords={"time": time_stamps2}, ), } ) voltages Most xarray computation methods also exist as methods on datatree objects, so you can for example take the mean value of these two timeseries at once: .. ipython:: python voltages.mean(dim="time") This works by mapping the standard :py:meth:`xarray.Dataset.mean()` method over the dataset stored in each node of the tree one-by-one. The arguments passed to the method are used for every node, so the values of the arguments you pass might be valid for one node and invalid for another .. ipython:: python :okexcept: voltages.isel(time=12) Notice that the error raised helpfully indicates which node of the tree the operation failed on. Arithmetic Methods on Trees ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Arithmetic methods are also implemented, so you can e.g. add a scalar to every dataset in the tree at once. For example, we can advance the timeline of the Simpsons by a decade just by .. ipython:: python simpsons + 10 See that the same change (fast-forwarding by adding 10 years to the age of each character) has been applied to every node. Mapping Custom Functions Over Trees ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can map custom computation over each node in a tree using :py:meth:`DataTree.map_over_subtree`. You can map any function, so long as it takes `xarray.Dataset` objects as one (or more) of the input arguments, and returns one (or more) xarray datasets. .. note:: Functions passed to :py:func:`map_over_subtree` cannot alter nodes in-place. Instead they must return new `xarray.Dataset` objects. For example, we can define a function to calculate the Root Mean Square of a timeseries .. ipython:: python def rms(signal): return np.sqrt(np.mean(signal**2)) Then calculate the RMS value of these signals: .. ipython:: python voltages.map_over_subtree(rms) .. _multiple trees: We can also use the :py:func:`map_over_subtree` decorator to promote a function which accepts datasets into one which accepts datatrees. Operating on Multiple Trees --------------------------- The examples so far have involved mapping functions or methods over the nodes of a single tree, but we can generalize this to mapping functions over multiple trees at once. Comparing Trees for Isomorphism ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For it to make sense to map a single non-unary function over the nodes of multiple trees at once, each tree needs to have the same structure. Specifically two trees can only be considered similar, or "isomorphic", if they have the same number of nodes, and each corresponding node has the same number of children. We can check if any two trees are isomorphic using the :py:meth:`DataTree.isomorphic` method. .. ipython:: python :okexcept: dt1 = DataTree.from_dict({"a": None, "a/b": None}) dt2 = DataTree.from_dict({"a": None}) dt1.isomorphic(dt2) dt3 = DataTree.from_dict({"a": None, "b": None}) dt1.isomorphic(dt3) dt4 = DataTree.from_dict({"A": None, "A/B": xr.Dataset({"foo": 1})}) dt1.isomorphic(dt4) If the trees are not isomorphic a :py:class:`~TreeIsomorphismError` will be raised. Notice that corresponding tree nodes do not need to have the same name or contain the same data in order to be considered isomorphic. Arithmetic Between Multiple Trees ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Arithmetic operations like multiplication are binary operations, so as long as we have two isomorphic trees, we can do arithmetic between them. .. ipython:: python currents = DataTree.from_dict( { "/oscilloscope1": xr.Dataset( { "current": ( "time", signal_generator(time_stamps1, f=2, A=1.2, phase=1), ), }, coords={"time": time_stamps1}, ), "/oscilloscope2": xr.Dataset( { "current": ( "time", signal_generator(time_stamps2, f=1.6, A=1.6, phase=0.7), ), }, coords={"time": time_stamps2}, ), } ) currents currents.isomorphic(voltages) We could use this feature to quickly calculate the electrical power in our signal, P=IV. .. ipython:: python power = currents * voltages power datatree-0.0.14/docs/source/index.rst000066400000000000000000000055141455257650300174770ustar00rootroot00000000000000.. currentmodule:: datatree Datatree ======== **Datatree is a prototype implementation of a tree-like hierarchical data structure for xarray.** Why Datatree? ~~~~~~~~~~~~~ Datatree was born after the xarray team recognised a `need for a new hierarchical data structure `_, that was more flexible than a single :py:class:`xarray.Dataset` object. The initial motivation was to represent netCDF files / Zarr stores with multiple nested groups in a single in-memory object, but :py:class:`~datatree.DataTree` objects have many other uses. You might want to use datatree for: - Organising many related datasets, e.g. results of the same experiment with different parameters, or simulations of the same system using different models, - Analysing similar data at multiple resolutions simultaneously, such as when doing a convergence study, - Comparing heterogenous but related data, such as experimental and theoretical data, - I/O with nested data formats such as netCDF / Zarr groups. Development Roadmap ~~~~~~~~~~~~~~~~~~~ Datatree currently lives in a separate repository to the main xarray package. This allows the datatree developers to make changes to it, experiment, and improve it faster. Eventually we plan to fully integrate datatree upstream into xarray's main codebase, at which point the `github.com/xarray-contrib/datatree `_ repository will be archived. This should not cause much disruption to code that depends on datatree - you will likely only have to change the import line (i.e. from ``from datatree import DataTree`` to ``from xarray import DataTree``). However, until this full integration occurs, datatree's API should not be considered to have the same `level of stability as xarray's `_. User Feedback ~~~~~~~~~~~~~ We really really really want to hear your opinions on datatree! At this point in development, user feedback is critical to help us create something that will suit everyone's needs. Please raise any thoughts, issues, suggestions or bugs, no matter how small or large, on the `github issue tracker `_. .. toctree:: :maxdepth: 2 :caption: Documentation Contents Installation Quick Overview Tutorial Data Model Hierarchical Data Reading and Writing Files API Reference Terminology Contributing Guide What's New GitHub repository Feedback -------- If you encounter any errors, problems with **Datatree**, or have any suggestions, please open an issue on `GitHub `_. datatree-0.0.14/docs/source/installation.rst000066400000000000000000000021501455257650300210620ustar00rootroot00000000000000.. currentmodule:: datatree ============ Installation ============ Datatree can be installed in three ways: Using the `conda `__ package manager that comes with the Anaconda/Miniconda distribution: .. code:: bash $ conda install xarray-datatree --channel conda-forge Using the `pip `__ package manager: .. code:: bash $ python -m pip install xarray-datatree To install a development version from source: .. code:: bash $ git clone https://github.com/xarray-contrib/datatree $ cd datatree $ python -m pip install -e . You will just need xarray as a required dependency, with netcdf4, zarr, and h5netcdf as optional dependencies to allow file I/O. .. note:: Datatree is very much still in the early stages of development. There may be functions that are present but whose internals are not yet implemented, or significant changes to the API in future. That said, if you try it out and find some behaviour that looks like a bug to you, please report it on the `issue tracker `_! datatree-0.0.14/docs/source/io.rst000066400000000000000000000042371455257650300170000ustar00rootroot00000000000000.. currentmodule:: datatree .. _io: Reading and Writing Files ========================= .. note:: This page builds on the information given in xarray's main page on `reading and writing files `_, so it is suggested that you are familiar with those first. netCDF ------ Groups ~~~~~~ Whilst netCDF groups can only be loaded individually as Dataset objects, a whole file of many nested groups can be loaded as a single :py:class:`DataTree` object. To open a whole netCDF file as a tree of groups use the :py:func:`open_datatree` function. To save a DataTree object as a netCDF file containing many groups, use the :py:meth:`DataTree.to_netcdf` method. .. _netcdf.group.warning: .. warning:: ``DataTree`` objects do not follow the exact same data model as netCDF files, which means that perfect round-tripping is not always possible. In particular in the netCDF data model dimensions are entities that can exist regardless of whether any variable possesses them. This is in contrast to `xarray's data model `_ (and hence :ref:`datatree's data model `) in which the dimensions of a (Dataset/Tree) object are simply the set of dimensions present across all variables in that dataset. This means that if a netCDF file contains dimensions but no variables which possess those dimensions, these dimensions will not be present when that file is opened as a DataTree object. Saving this DataTree object to file will therefore not preserve these "unused" dimensions. Zarr ---- Groups ~~~~~~ Nested groups in zarr stores can be represented by loading the store as a :py:class:`DataTree` object, similarly to netCDF. To open a whole zarr store as a tree of groups use the :py:func:`open_datatree` function. To save a DataTree object as a zarr store containing many groups, use the :py:meth:`DataTree.to_zarr()` method. .. note:: Note that perfect round-tripping should always be possible with a zarr store (:ref:`unlike for netCDF files `), as zarr does not support "unused" dimensions. datatree-0.0.14/docs/source/quick-overview.rst000066400000000000000000000061761455257650300213550ustar00rootroot00000000000000.. currentmodule:: datatree ############## Quick overview ############## DataTrees --------- :py:class:`DataTree` is a tree-like container of :py:class:`xarray.DataArray` objects, organised into multiple mutually alignable groups. You can think of it like a (recursive) ``dict`` of :py:class:`xarray.Dataset` objects. Let's first make some example xarray datasets (following on from xarray's `quick overview `_ page): .. ipython:: python import numpy as np import xarray as xr data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]}) ds = xr.Dataset(dict(foo=data, bar=("x", [1, 2]), baz=np.pi)) ds ds2 = ds.interp(coords={"x": [10, 12, 14, 16, 18, 20]}) ds2 ds3 = xr.Dataset( dict(people=["alice", "bob"], heights=("people", [1.57, 1.82])), coords={"species": "human"}, ) ds3 Now we'll put this data into a multi-group tree: .. ipython:: python from datatree import DataTree dt = DataTree.from_dict({"simulation/coarse": ds, "simulation/fine": ds2, "/": ds3}) dt This creates a datatree with various groups. We have one root group, containing information about individual people. (This root group can be named, but here is unnamed, so is referred to with ``"/"``, same as the root of a unix-like filesystem.) The root group then has one subgroup ``simulation``, which contains no data itself but does contain another two subgroups, named ``fine`` and ``coarse``. The (sub-)sub-groups ``fine`` and ``coarse`` contain two very similar datasets. They both have an ``"x"`` dimension, but the dimension is of different lengths in each group, which makes the data in each group unalignable. In the root group we placed some completely unrelated information, showing how we can use a tree to store heterogenous data. The constraints on each group are therefore the same as the constraint on dataarrays within a single dataset. We created the sub-groups using a filesystem-like syntax, and accessing groups works the same way. We can access individual dataarrays in a similar fashion .. ipython:: python dt["simulation/coarse/foo"] and we can also pull out the data in a particular group as a ``Dataset`` object using ``.ds``: .. ipython:: python dt["simulation/coarse"].ds Operations map over subtrees, so we can take a mean over the ``x`` dimension of both the ``fine`` and ``coarse`` groups just by .. ipython:: python avg = dt["simulation"].mean(dim="x") avg Here the ``"x"`` dimension used is always the one local to that sub-group. You can do almost everything you can do with ``Dataset`` objects with ``DataTree`` objects (including indexing and arithmetic), as operations will be mapped over every sub-group in the tree. This allows you to work with multiple groups of non-alignable variables at once. .. note:: If all of your variables are mutually alignable (i.e. they live on the same grid, such that every common dimension name maps to the same length), then you probably don't need :py:class:`DataTree`, and should consider just sticking with ``xarray.Dataset``. datatree-0.0.14/docs/source/terminology.rst000066400000000000000000000034601455257650300207360ustar00rootroot00000000000000.. currentmodule:: datatree .. _terminology: This page extends `xarray's page on terminology `_. Terminology =========== .. glossary:: DataTree A tree-like collection of ``Dataset`` objects. A *tree* is made up of one or more *nodes*, each of which can store the same information as a single ``Dataset`` (accessed via `.ds`). This data is stored in the same way as in a ``Dataset``, i.e. in the form of data variables (see **Variable** in the `corresponding xarray terminology page `_), dimensions, coordinates, and attributes. The nodes in a tree are linked to one another, and each node is it's own instance of ``DataTree`` object. Each node can have zero or more *children* (stored in a dictionary-like manner under their corresponding *names*), and those child nodes can themselves have children. If a node is a child of another node that other node is said to be its *parent*. Nodes can have a maximum of one parent, and if a node has no parent it is said to be the *root* node of that *tree*. Subtree A section of a *tree*, consisting of a *node* along with all the child nodes below it (and the child nodes below them, i.e. all so-called *descendant* nodes). Excludes the parent node and all nodes above. Group Another word for a subtree, reflecting how the hierarchical structure of a ``DataTree`` allows for grouping related data together. Analogous to a single `netCDF group `_ or `Zarr group `_. datatree-0.0.14/docs/source/tutorial.rst000066400000000000000000000001061455257650300202230ustar00rootroot00000000000000.. currentmodule:: datatree ======== Tutorial ======== Coming soon! datatree-0.0.14/docs/source/whats-new.rst000066400000000000000000000343341455257650300203070ustar00rootroot00000000000000.. currentmodule:: datatree What's New ========== .. ipython:: python :suppress: import numpy as np import pandas as pd import xarray as xray import xarray import xarray as xr import datatree np.random.seed(123456) .. _whats-new.v0.0.14: v0.0.14 (unreleased) -------------------- New Features ~~~~~~~~~~~~ Breaking changes ~~~~~~~~~~~~~~~~ - Renamed `DataTree.lineage` to `DataTree.parents` to match `pathlib` vocabulary (:issue:`283`, :pull:`286`) - Minimum required version of xarray is now 2023.12.0, i.e. the latest version. This is required to prevent recent changes to xarray's internals from breaking datatree. (:issue:`293`, :pull:`294`) By `Tom Nicholas `_. - Change default write mode of :py:meth:`DataTree.to_zarr` to ``'w-'`` to match ``xarray`` default and prevent accidental directory overwrites. (:issue:`274`, :pull:`275`) By `Sam Levang `_. Deprecations ~~~~~~~~~~~~ - Renamed `DataTree.lineage` to `DataTree.parents` to match `pathlib` vocabulary (:issue:`283`, :pull:`286`). `lineage` is now deprecated and use of `parents` is encouraged. By `Etienne Schalk `_. Bug fixes ~~~~~~~~~ - Keep attributes on nodes containing no data in :py:func:`map_over_subtree`. (:issue:`278`, :pull:`279`) By `Sam Levang `_. Documentation ~~~~~~~~~~~~~ - Use ``napoleon`` instead of ``numpydoc`` to align with xarray documentation (:issue:`284`, :pull:`298`). By `Etienne Schalk `_. Internal Changes ~~~~~~~~~~~~~~~~ .. _whats-new.v0.0.13: v0.0.13 (27/10/2023) -------------------- New Features ~~~~~~~~~~~~ - New :py:meth:`DataTree.match` method for glob-like pattern matching of node paths. (:pull:`267`) By `Tom Nicholas `_. - New :py:meth:`DataTree.is_hollow` property for checking if data is only contained at the leaf nodes. (:pull:`272`) By `Tom Nicholas `_. - Indicate which node caused the problem if error encountered while applying user function using :py:func:`map_over_subtree` (:issue:`190`, :pull:`264`). Only works when using python 3.11 or later. By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ - Nodes containing only attributes but no data are now ignored by :py:func:`map_over_subtree` (:issue:`262`, :pull:`263`) By `Tom Nicholas `_. - Disallow altering of given dataset inside function called by :py:func:`map_over_subtree` (:pull:`269`, reverts part of :pull:`194`). By `Tom Nicholas `_. Bug fixes ~~~~~~~~~ - Fix unittests on i386. (:pull:`249`) By `Antonio Valentino `_. - Ensure nodepath class is compatible with python 3.12 (:pull:`260`) By `Max Grover `_. Documentation ~~~~~~~~~~~~~ - Added new sections to page on ``Working with Hierarchical Data`` (:pull:`180`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ * No longer use the deprecated `distutils` package. .. _whats-new.v0.0.12: v0.0.12 (03/07/2023) -------------------- New Features ~~~~~~~~~~~~ - Added a :py:func:`DataTree.level`, :py:func:`DataTree.depth`, and :py:func:`DataTree.width` property (:pull:`208`). By `Tom Nicholas `_. - Allow dot-style (or "attribute-like") access to child nodes and variables, with ipython autocomplete. (:issue:`189`, :pull:`98`) By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ Deprecations ~~~~~~~~~~~~ - Dropped support for python 3.8 (:issue:`212`, :pull:`214`) By `Tom Nicholas `_. Bug fixes ~~~~~~~~~ - Allow for altering of given dataset inside function called by :py:func:`map_over_subtree` (:issue:`188`, :pull:`194`). By `Tom Nicholas `_. - copy subtrees without creating ancestor nodes (:pull:`201`) By `Justus Magin `_. Documentation ~~~~~~~~~~~~~ Internal Changes ~~~~~~~~~~~~~~~~ .. _whats-new.v0.0.11: v0.0.11 (01/09/2023) -------------------- Big update with entirely new pages in the docs, new methods (``.drop_nodes``, ``.filter``, ``.leaves``, ``.descendants``), and bug fixes! New Features ~~~~~~~~~~~~ - Added a :py:meth:`DataTree.drop_nodes` method (:issue:`161`, :pull:`175`). By `Tom Nicholas `_. - New, more specific exception types for tree-related errors (:pull:`169`). By `Tom Nicholas `_. - Added a new :py:meth:`DataTree.descendants` property (:pull:`170`). By `Tom Nicholas `_. - Added a :py:meth:`DataTree.leaves` property (:pull:`177`). By `Tom Nicholas `_. - Added a :py:meth:`DataTree.filter` method (:pull:`184`). By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ - :py:meth:`DataTree.copy` copy method now only copies the subtree, not the parent nodes (:pull:`171`). By `Tom Nicholas `_. - Grafting a subtree onto another tree now leaves name of original subtree object unchanged (:issue:`116`, :pull:`172`, :pull:`178`). By `Tom Nicholas `_. - Changed the :py:meth:`DataTree.assign` method to just work on the local node (:pull:`181`). By `Tom Nicholas `_. Deprecations ~~~~~~~~~~~~ Bug fixes ~~~~~~~~~ - Fix bug with :py:meth:`DataTree.relative_to` method (:issue:`133`, :pull:`160`). By `Tom Nicholas `_. - Fix links to API docs in all documentation (:pull:`183`). By `Tom Nicholas `_. Documentation ~~~~~~~~~~~~~ - Changed docs theme to match xarray's main documentation. (:pull:`173`) By `Tom Nicholas `_. - Added ``Terminology`` page. (:pull:`174`) By `Tom Nicholas `_. - Added page on ``Working with Hierarchical Data`` (:pull:`179`) By `Tom Nicholas `_. - Added context content to ``Index`` page (:pull:`182`) By `Tom Nicholas `_. - Updated the README (:pull:`187`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ .. _whats-new.v0.0.10: v0.0.10 (12/07/2022) -------------------- Adds accessors and a `.pipe()` method. New Features ~~~~~~~~~~~~ - Add the ability to register accessors on ``DataTree`` objects, by using ``register_datatree_accessor``. (:pull:`144`) By `Tom Nicholas `_. - Allow method chaining with a new :py:meth:`DataTree.pipe` method (:issue:`151`, :pull:`156`). By `Justus Magin `_. Breaking changes ~~~~~~~~~~~~~~~~ Deprecations ~~~~~~~~~~~~ Bug fixes ~~~~~~~~~ - Allow ``Datatree`` objects as values in :py:meth:`DataTree.from_dict` (:pull:`159`). By `Justus Magin `_. Documentation ~~~~~~~~~~~~~ - Added ``Reading and Writing Files`` page. (:pull:`158`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ - Avoid reading from same file twice with fsspec3 (:pull:`130`) By `William Roberts `_. .. _whats-new.v0.0.9: v0.0.9 (07/14/2022) ------------------- New Features ~~~~~~~~~~~~ Breaking changes ~~~~~~~~~~~~~~~~ Deprecations ~~~~~~~~~~~~ Bug fixes ~~~~~~~~~ Documentation ~~~~~~~~~~~~~ - Switch docs theme (:pull:`123`). By `JuliusBusecke `_. Internal Changes ~~~~~~~~~~~~~~~~ .. _whats-new.v0.0.7: v0.0.7 (07/11/2022) ------------------- New Features ~~~~~~~~~~~~ - Improve the HTML repr by adding tree-style lines connecting groups and sub-groups (:pull:`109`). By `Benjamin Woods `_. Breaking changes ~~~~~~~~~~~~~~~~ - The ``DataTree.ds`` attribute now returns a view onto an immutable Dataset-like object, instead of an actual instance of ``xarray.Dataset``. This make break existing ``isinstance`` checks or ``assert`` comparisons. (:pull:`99`) By `Tom Nicholas `_. Deprecations ~~~~~~~~~~~~ Bug fixes ~~~~~~~~~ - Modifying the contents of a ``DataTree`` object via the ``DataTree.ds`` attribute is now forbidden, which prevents any possibility of the contents of a ``DataTree`` object and its ``.ds`` attribute diverging. (:issue:`38`, :pull:`99`) By `Tom Nicholas `_. - Fixed a bug so that names of children now always match keys under which parents store them (:pull:`99`). By `Tom Nicholas `_. Documentation ~~~~~~~~~~~~~ - Added ``Data Structures`` page describing the internal structure of a ``DataTree`` object, and its relation to ``xarray.Dataset`` objects. (:pull:`103`) By `Tom Nicholas `_. - API page updated with all the methods that are copied from ``xarray.Dataset``. (:pull:`41`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ - Refactored ``DataTree`` class to store a set of ``xarray.Variable`` objects instead of a single ``xarray.Dataset``. This approach means that the ``DataTree`` class now effectively copies and extends the internal structure of ``xarray.Dataset``. (:pull:`41`) By `Tom Nicholas `_. - Refactored to use intermediate ``NamedNode`` class, separating implementation of methods requiring a ``name`` attribute from those not requiring it. By `Tom Nicholas `_. - Made ``testing.test_datatree.create_test_datatree`` into a pytest fixture (:pull:`107`). By `Benjamin Woods `_. .. _whats-new.v0.0.6: v0.0.6 (06/03/2022) ------------------- Various small bug fixes, in preparation for more significant changes in the next version. Bug fixes ~~~~~~~~~ - Fixed bug with checking that assigning parent or new children did not create a loop in the tree (:pull:`105`) By `Tom Nicholas `_. - Do not call ``__exit__`` on Zarr store when opening (:pull:`90`) By `Matt McCormick `_. - Fix netCDF encoding for compression (:pull:`95`) By `Joe Hamman `_. - Added validity checking for node names (:pull:`106`) By `Tom Nicholas `_. .. _whats-new.v0.0.5: v0.0.5 (05/05/2022) ------------------- - Major refactor of internals, moving from the ``DataTree.children`` attribute being a ``Tuple[DataTree]`` to being a ``OrderedDict[str, DataTree]``. This was necessary in order to integrate better with xarray's dictionary-like API, solve several issues, simplify the code internally, remove dependencies, and enable new features. (:pull:`76`) By `Tom Nicholas `_. New Features ~~~~~~~~~~~~ - Syntax for accessing nodes now supports file-like paths, including parent nodes via ``"../"``, relative paths, the root node via ``"/"``, and the current node via ``"."``. (Internally it actually uses ``pathlib`` now.) By `Tom Nicholas `_. - New path-like API methods, such as ``.relative_to``, ``.find_common_ancestor``, and ``.same_tree``. - Some new dictionary-like methods, such as ``DataTree.get`` and ``DataTree.update``. (:pull:`76`) By `Tom Nicholas `_. - New HTML repr, which will automatically display in a jupyter notebook. (:pull:`78`) By `Tom Nicholas `_. - New delitem method so you can delete nodes. (:pull:`88`) By `Tom Nicholas `_. - New ``to_dict`` method. (:pull:`82`) By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ - Node names are now optional, which means that the root of the tree can be unnamed. This has knock-on effects for a lot of the API. - The ``__init__`` signature for ``DataTree`` has changed, so that ``name`` is now an optional kwarg. - Files will now be loaded as a slightly different tree, because the root group no longer needs to be given a default name. - Removed tag-like access to nodes. - Removes the option to delete all data in a node by assigning None to the node (in favour of deleting data by replacing the node's ``.ds`` attribute with an empty Dataset), or to create a new empty node in the same way (in favour of assigning an empty DataTree object instead). - Removes the ability to create a new node by assigning a ``Dataset`` object to ``DataTree.__setitem__``. - Several other minor API changes such as ``.pathstr`` -> ``.path``, and ``from_dict``'s dictionary argument now being required. (:pull:`76`) By `Tom Nicholas `_. Deprecations ~~~~~~~~~~~~ - No longer depends on the anytree library (:pull:`76`) By `Tom Nicholas `_. Bug fixes ~~~~~~~~~ - Fixed indentation issue with the string repr (:pull:`86`) By `Tom Nicholas `_. Documentation ~~~~~~~~~~~~~ - Quick-overview page updated to match change in path syntax (:pull:`76`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ - Basically every file was changed in some way to accommodate (:pull:`76`). - No longer need the utility functions for string manipulation that were defined in ``utils.py``. - A considerable amount of code copied over from the internals of anytree (e.g. in ``render.py`` and ``iterators.py``). The Apache license for anytree has now been bundled with datatree. (:pull:`76`). By `Tom Nicholas `_. .. _whats-new.v0.0.4: v0.0.4 (31/03/2022) ------------------- - Ensure you get the pretty tree-like string representation by default in ipython (:pull:`73`). By `Tom Nicholas `_. - Now available on conda-forge (as xarray-datatree)! (:pull:`71`) By `Anderson Banihirwe `_. - Allow for python 3.8 (:pull:`70`). By `Don Setiawan `_. .. _whats-new.v0.0.3: v0.0.3 (30/03/2022) ------------------- - First released version available on both pypi (as xarray-datatree)! datatree-0.0.14/pyproject.toml000066400000000000000000000027551455257650300163260ustar00rootroot00000000000000[project] name = "xarray-datatree" description = "Hierarchical tree-like data structures for xarray" readme = "README.md" authors = [ {name = "Thomas Nicholas", email = "thomas.nicholas@columbia.edu"} ] license = {text = "Apache-2"} classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] requires-python = ">=3.9" dependencies = [ "xarray >=2023.12.0", "packaging", ] dynamic = ["version"] [project.urls] Home = "https://github.com/xarray-contrib/datatree" Documentation = "https://xarray-datatree.readthedocs.io/en/stable/" [build-system] requires = [ "setuptools>=61.0.0", "wheel", "setuptools_scm[toml]>=7.0", "check-manifest" ] [tool.setuptools_scm] write_to = "datatree/_version.py" write_to_template = ''' # Do not change! Do not track in version control! __version__ = "{version}" ''' [tool.setuptools.packages.find] exclude = ["docs", "tests", "tests.*", "docs.*"] [tool.setuptools.package-data] datatree = ["py.typed"] [tool.isort] profile = "black" skip_gitignore = true float_to_top = true default_section = "THIRDPARTY" known_first_party = "datatree" [mypy] files = "datatree/**/*.py" show_error_codes = true datatree-0.0.14/readthedocs.yml000066400000000000000000000001571455257650300164140ustar00rootroot00000000000000version: 2 conda: environment: ci/doc.yml build: os: 'ubuntu-20.04' tools: python: 'mambaforge-4.10'