pax_global_header 0000666 0000000 0000000 00000000064 14552576503 0014526 g ustar 00root root 0000000 0000000 52 comment=03b0a8c04248ae2bb13ab8c4b832a9f406e0d156
datatree-0.0.14/ 0000775 0000000 0000000 00000000000 14552576503 0013401 5 ustar 00root root 0000000 0000000 datatree-0.0.14/.flake8 0000664 0000000 0000000 00000000516 14552576503 0014556 0 ustar 00root root 0000000 0000000 [flake8]
ignore =
# whitespace before ':' - doesn't work well with black
E203
# module level import not at top of file
E402
# line too long - let black worry about that
E501
# do not assign a lambda expression, use a def
E731
# line break before binary operator
W503
exclude=
.eggs
doc
datatree-0.0.14/.git_archival.txt 0000664 0000000 0000000 00000000157 14552576503 0016657 0 ustar 00root root 0000000 0000000 node: $Format:%H$
node-date: $Format:%cI$
describe-name: $Format:%(describe:tags=true)$
ref-names: $Format:%D$
datatree-0.0.14/.github/ 0000775 0000000 0000000 00000000000 14552576503 0014741 5 ustar 00root root 0000000 0000000 datatree-0.0.14/.github/dependabot.yml 0000664 0000000 0000000 00000000401 14552576503 0017564 0 ustar 00root root 0000000 0000000 version: 2
updates:
- package-ecosystem: pip
directory: "/"
schedule:
interval: daily
- package-ecosystem: "github-actions"
directory: "/"
schedule:
# Check for updates to GitHub Actions every weekday
interval: "daily"
datatree-0.0.14/.github/pull_request_template.md 0000664 0000000 0000000 00000000415 14552576503 0021702 0 ustar 00root root 0000000 0000000
- [ ] Closes #xxxx
- [ ] Tests added
- [ ] Passes `pre-commit run --all-files`
- [ ] New functions/methods are listed in `api.rst`
- [ ] Changes are summarized in `docs/source/whats-new.rst`
datatree-0.0.14/.github/workflows/ 0000775 0000000 0000000 00000000000 14552576503 0016776 5 ustar 00root root 0000000 0000000 datatree-0.0.14/.github/workflows/main.yaml 0000664 0000000 0000000 00000004406 14552576503 0020612 0 ustar 00root root 0000000 0000000 name: CI
on:
push:
branches:
- main
pull_request:
branches:
- main
schedule:
- cron: "0 0 * * *"
jobs:
test:
name: ${{ matrix.python-version }}-build
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Create conda environment
uses: mamba-org/provision-with-micromamba@main
with:
cache-downloads: true
micromamba-version: 'latest'
environment-file: ci/environment.yml
extra-specs: |
python=${{ matrix.python-version }}
- name: Conda info
run: conda info
- name: Install datatree
run: |
python -m pip install -e . --no-deps --force-reinstall
- name: Conda list
run: conda list
- name: Running Tests
run: |
python -m pytest --cov=./ --cov-report=xml --verbose
- name: Upload code coverage to Codecov
uses: codecov/codecov-action@v3.1.4
with:
file: ./coverage.xml
flags: unittests
env_vars: OS,PYTHON
name: codecov-umbrella
fail_ci_if_error: false
test-upstream:
name: ${{ matrix.python-version }}-dev-build
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Create conda environment
uses: mamba-org/provision-with-micromamba@main
with:
cache-downloads: true
micromamba-version: 'latest'
environment-file: ci/environment.yml
extra-specs: |
python=${{ matrix.python-version }}
- name: Conda info
run: conda info
- name: Install dev reqs
run: |
python -m pip install --no-deps --upgrade \
git+https://github.com/pydata/xarray \
git+https://github.com/Unidata/netcdf4-python
python -m pip install -e . --no-deps --force-reinstall
- name: Conda list
run: conda list
- name: Running Tests
run: |
python -m pytest --verbose
datatree-0.0.14/.github/workflows/pypipublish.yaml 0000664 0000000 0000000 00000003741 14552576503 0022237 0 ustar 00root root 0000000 0000000 name: Build distribution
on:
release:
types:
- published
push:
branches:
- main
pull_request:
branches:
- main
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build-artifacts:
runs-on: ubuntu-latest
if: github.repository == 'xarray-contrib/datatree'
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
name: Install Python
with:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install build
- name: Build tarball and wheels
run: |
git clean -xdf
git restore -SW .
python -m build --sdist --wheel .
- uses: actions/upload-artifact@v4
with:
name: releases
path: dist
test-built-dist:
needs: build-artifacts
runs-on: ubuntu-latest
steps:
- uses: actions/setup-python@v5
name: Install Python
with:
python-version: '3.10'
- uses: actions/download-artifact@v4
with:
name: releases
path: dist
- name: List contents of built dist
run: |
ls -ltrh
ls -ltrh dist
- name: Verify the built dist/wheel is valid
run: |
python -m pip install --upgrade pip
python -m pip install dist/xarray_datatree*.whl
python -c "import datatree; print(datatree.__version__)"
upload-to-pypi:
needs: test-built-dist
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v4
with:
name: releases
path: dist
- name: Publish package to PyPI
uses: pypa/gh-action-pypi-publish@v1.8.11
with:
user: ${{ secrets.PYPI_USERNAME }}
password: ${{ secrets.PYPI_PASSWORD }}
verbose: true
datatree-0.0.14/.gitignore 0000664 0000000 0000000 00000003540 14552576503 0015373 0 ustar 00root root 0000000 0000000 # Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
docs/source/generated
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# version
_version.py
# Ignore vscode specific settings
.vscode/
datatree-0.0.14/.pre-commit-config.yaml 0000664 0000000 0000000 00000003153 14552576503 0017664 0 ustar 00root root 0000000 0000000 # https://pre-commit.com/
ci:
autoupdate_schedule: monthly
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
# isort should run before black as black sometimes tweaks the isort output
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
# https://github.com/python/black#version-control-integration
- repo: https://github.com/psf/black
rev: 23.12.1
hooks:
- id: black
- repo: https://github.com/keewis/blackdoc
rev: v0.3.9
hooks:
- id: blackdoc
- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
hooks:
- id: flake8
# - repo: https://github.com/Carreau/velin
# rev: 0.0.8
# hooks:
# - id: velin
# args: ["--write", "--compact"]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
hooks:
- id: mypy
# Copied from setup.cfg
exclude: "properties|asv_bench|docs"
additional_dependencies: [
# Type stubs
types-python-dateutil,
types-pkg_resources,
types-PyYAML,
types-pytz,
# Dependencies that are typed
numpy,
typing-extensions>=4.1.0,
]
# run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194
# - repo: https://github.com/asottile/pyupgrade
# rev: v1.22.1
# hooks:
# - id: pyupgrade
# args:
# - "--py3-only"
# # remove on f-strings in Py3.7
# - "--keep-percent-format"
datatree-0.0.14/LICENSE 0000664 0000000 0000000 00000026137 14552576503 0014417 0 ustar 00root root 0000000 0000000 Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright (c) 2022 onwards, datatree developers
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
datatree-0.0.14/README.md 0000664 0000000 0000000 00000013554 14552576503 0014670 0 ustar 00root root 0000000 0000000 # datatree
| CI | [![GitHub Workflow Status][github-ci-badge]][github-ci-link] [![Code Coverage Status][codecov-badge]][codecov-link] [![pre-commit.ci status][pre-commit.ci-badge]][pre-commit.ci-link] |
| :---------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| **Docs** | [![Documentation Status][rtd-badge]][rtd-link] |
| **Package** | [![Conda][conda-badge]][conda-link] [![PyPI][pypi-badge]][pypi-link] |
| **License** | [![License][license-badge]][repo-link] |
**Datatree is a prototype implementation of a tree-like hierarchical data structure for xarray.**
Datatree was born after the xarray team recognised a [need for a new hierarchical data structure](https://github.com/pydata/xarray/issues/4118),
that was more flexible than a single `xarray.Dataset` object.
The initial motivation was to represent netCDF files / Zarr stores with multiple nested groups in a single in-memory object,
but `datatree.DataTree` objects have many other uses.
### Installation
You can install datatree via pip:
```shell
pip install xarray-datatree
```
or via conda-forge
```shell
conda install -c conda-forge xarray-datatree
```
### Why Datatree?
You might want to use datatree for:
- Organising many related datasets, e.g. results of the same experiment with different parameters, or simulations of the same system using different models,
- Analysing similar data at multiple resolutions simultaneously, such as when doing a convergence study,
- Comparing heterogenous but related data, such as experimental and theoretical data,
- I/O with nested data formats such as netCDF / Zarr groups.
[**Talk slides on Datatree from AMS-python 2023**](https://speakerdeck.com/tomnicholas/xarray-datatree-hierarchical-data-structures-for-multi-model-science)
### Features
The approach used here is based on benbovy's [`DatasetNode` example](https://gist.github.com/benbovy/92e7c76220af1aaa4b3a0b65374e233a) - the basic idea is that each tree node wraps a up to a single `xarray.Dataset`. The differences are that this effort:
- Uses a node structure inspired by [anytree](https://github.com/xarray-contrib/datatree/issues/7) for the tree,
- Implements path-like getting and setting,
- Has functions for mapping user-supplied functions over every node in the tree,
- Automatically dispatches *some* of `xarray.Dataset`'s API over every node in the tree (such as `.isel`),
- Has a bunch of tests,
- Has a printable representation that currently looks like this:
### Get Started
You can create a `DataTree` object in 3 ways:
1) Load from a netCDF file (or Zarr store) that has groups via `open_datatree()`.
2) Using the init method of `DataTree`, which creates an individual node.
You can then specify the nodes' relationships to one other, either by setting `.parent` and `.children` attributes,
or through `__get/setitem__` access, e.g. `dt['path/to/node'] = DataTree()`.
3) Create a tree from a dictionary of paths to datasets using `DataTree.from_dict()`.
### Development Roadmap
Datatree currently lives in a separate repository to the main xarray package.
This allows the datatree developers to make changes to it, experiment, and improve it faster.
Eventually we plan to fully integrate datatree upstream into xarray's main codebase, at which point the [github.com/xarray-contrib/datatree](https://github.com/xarray-contrib/datatree>) repository will be archived.
This should not cause much disruption to code that depends on datatree - you will likely only have to change the import line (i.e. from ``from datatree import DataTree`` to ``from xarray import DataTree``).
However, until this full integration occurs, datatree's API should not be considered to have the same [level of stability as xarray's](https://docs.xarray.dev/en/stable/contributing.html#backwards-compatibility).
### User Feedback
We really really really want to hear your opinions on datatree!
At this point in development, user feedback is critical to help us create something that will suit everyone's needs.
Please raise any thoughts, issues, suggestions or bugs, no matter how small or large, on the [github issue tracker](https://github.com/xarray-contrib/datatree/issues).
[github-ci-badge]: https://img.shields.io/github/actions/workflow/status/xarray-contrib/datatree/main.yaml?branch=main&label=CI&logo=github
[github-ci-link]: https://github.com/xarray-contrib/datatree/actions?query=workflow%3ACI
[codecov-badge]: https://img.shields.io/codecov/c/github/xarray-contrib/datatree.svg?logo=codecov
[codecov-link]: https://codecov.io/gh/xarray-contrib/datatree
[rtd-badge]: https://img.shields.io/readthedocs/xarray-datatree/latest.svg
[rtd-link]: https://xarray-datatree.readthedocs.io/en/latest/?badge=latest
[pypi-badge]: https://img.shields.io/pypi/v/xarray-datatree?logo=pypi
[pypi-link]: https://pypi.org/project/xarray-datatree
[conda-badge]: https://img.shields.io/conda/vn/conda-forge/xarray-datatree?logo=anaconda
[conda-link]: https://anaconda.org/conda-forge/xarray-datatree
[license-badge]: https://img.shields.io/github/license/xarray-contrib/datatree
[repo-link]: https://github.com/xarray-contrib/datatree
[pre-commit.ci-badge]: https://results.pre-commit.ci/badge/github/xarray-contrib/datatree/main.svg
[pre-commit.ci-link]: https://results.pre-commit.ci/latest/github/xarray-contrib/datatree/main
datatree-0.0.14/ci/ 0000775 0000000 0000000 00000000000 14552576503 0013774 5 ustar 00root root 0000000 0000000 datatree-0.0.14/ci/doc.yml 0000664 0000000 0000000 00000000667 14552576503 0015275 0 ustar 00root root 0000000 0000000 name: datatree-doc
channels:
- conda-forge
dependencies:
- pip
- python>=3.9
- netcdf4
- scipy
- sphinx>=4.2.0
- sphinx-copybutton
- sphinx-panels
- sphinx-autosummary-accessors
- sphinx-book-theme >= 0.0.38
- nbsphinx
- sphinxcontrib-srclinks
- pickleshare
- pydata-sphinx-theme>=0.4.3
- ipython
- h5netcdf
- zarr
- xarray
- pip:
- -e ..
- sphinxext-rediraffe
- sphinxext-opengraph
datatree-0.0.14/ci/environment.yml 0000664 0000000 0000000 00000000340 14552576503 0017060 0 ustar 00root root 0000000 0000000 name: datatree-test
channels:
- conda-forge
- nodefaults
dependencies:
- python>=3.9
- netcdf4
- pytest
- flake8
- black
- codecov
- pytest-cov
- h5netcdf
- zarr
- pip:
- xarray>=2022.05.0.dev0
datatree-0.0.14/codecov.yml 0000664 0000000 0000000 00000000453 14552576503 0015550 0 ustar 00root root 0000000 0000000 codecov:
require_ci_to_pass: false
max_report_age: off
comment: false
ignore:
- 'datatree/tests/*'
- 'setup.py'
- 'conftest.py'
coverage:
precision: 2
round: down
status:
project:
default:
target: 95
informational: true
patch: off
changes: false
datatree-0.0.14/conftest.py 0000664 0000000 0000000 00000000102 14552576503 0015571 0 ustar 00root root 0000000 0000000 import pytest
pytest.register_assert_rewrite("datatree.testing")
datatree-0.0.14/datatree/ 0000775 0000000 0000000 00000000000 14552576503 0015172 5 ustar 00root root 0000000 0000000 datatree-0.0.14/datatree/__init__.py 0000664 0000000 0000000 00000001360 14552576503 0017303 0 ustar 00root root 0000000 0000000 # import public API
from .datatree import DataTree
from .extensions import register_datatree_accessor
from .io import open_datatree
from .mapping import TreeIsomorphismError, map_over_subtree
from .treenode import InvalidTreeError, NotFoundInTreeError
try:
# NOTE: the `_version.py` file must not be present in the git repository
# as it is generated by setuptools at install time
from ._version import __version__
except ImportError: # pragma: no cover
# Local copy or not installed with setuptools
__version__ = "999"
__all__ = (
"DataTree",
"open_datatree",
"TreeIsomorphismError",
"InvalidTreeError",
"NotFoundInTreeError",
"map_over_subtree",
"register_datatree_accessor",
"__version__",
)
datatree-0.0.14/datatree/common.py 0000664 0000000 0000000 00000010514 14552576503 0017035 0 ustar 00root root 0000000 0000000 """
This file and class only exists because it was easier to copy the code for AttrAccessMixin from xarray.core.common
with some slight modifications than it was to change the behaviour of an inherited xarray internal here.
The modifications are marked with # TODO comments.
"""
import warnings
from contextlib import suppress
from typing import Any, Hashable, Iterable, List, Mapping
class TreeAttrAccessMixin:
"""Mixin class that allows getting keys with attribute access"""
__slots__ = ()
def __init_subclass__(cls, **kwargs):
"""Verify that all subclasses explicitly define ``__slots__``. If they don't,
raise error in the core xarray module and a FutureWarning in third-party
extensions.
"""
if not hasattr(object.__new__(cls), "__dict__"):
pass
# TODO reinstate this once integrated upstream
# elif cls.__module__.startswith("datatree."):
# raise AttributeError(f"{cls.__name__} must explicitly define __slots__")
# else:
# cls.__setattr__ = cls._setattr_dict
# warnings.warn(
# f"xarray subclass {cls.__name__} should explicitly define __slots__",
# FutureWarning,
# stacklevel=2,
# )
super().__init_subclass__(**kwargs)
@property
def _attr_sources(self) -> Iterable[Mapping[Hashable, Any]]:
"""Places to look-up items for attribute-style access"""
yield from ()
@property
def _item_sources(self) -> Iterable[Mapping[Hashable, Any]]:
"""Places to look-up items for key-autocompletion"""
yield from ()
def __getattr__(self, name: str) -> Any:
if name not in {"__dict__", "__setstate__"}:
# this avoids an infinite loop when pickle looks for the
# __setstate__ attribute before the xarray object is initialized
for source in self._attr_sources:
with suppress(KeyError):
return source[name]
raise AttributeError(
f"{type(self).__name__!r} object has no attribute {name!r}"
)
# This complicated two-method design boosts overall performance of simple operations
# - particularly DataArray methods that perform a _to_temp_dataset() round-trip - by
# a whopping 8% compared to a single method that checks hasattr(self, "__dict__") at
# runtime before every single assignment. All of this is just temporary until the
# FutureWarning can be changed into a hard crash.
def _setattr_dict(self, name: str, value: Any) -> None:
"""Deprecated third party subclass (see ``__init_subclass__`` above)"""
object.__setattr__(self, name, value)
if name in self.__dict__:
# Custom, non-slotted attr, or improperly assigned variable?
warnings.warn(
f"Setting attribute {name!r} on a {type(self).__name__!r} object. Explicitly define __slots__ "
"to suppress this warning for legitimate custom attributes and "
"raise an error when attempting variables assignments.",
FutureWarning,
stacklevel=2,
)
def __setattr__(self, name: str, value: Any) -> None:
"""Objects with ``__slots__`` raise AttributeError if you try setting an
undeclared attribute. This is desirable, but the error message could use some
improvement.
"""
try:
object.__setattr__(self, name, value)
except AttributeError as e:
# Don't accidentally shadow custom AttributeErrors, e.g.
# DataArray.dims.setter
if str(e) != "{!r} object has no attribute {!r}".format(
type(self).__name__, name
):
raise
raise AttributeError(
f"cannot set attribute {name!r} on a {type(self).__name__!r} object. Use __setitem__ style"
"assignment (e.g., `ds['name'] = ...`) instead of assigning variables."
) from e
def __dir__(self) -> List[str]:
"""Provide method name lookup and completion. Only provide 'public'
methods.
"""
extra_attrs = {
item
for source in self._attr_sources
for item in source
if isinstance(item, str)
}
return sorted(set(dir(type(self))) | extra_attrs)
datatree-0.0.14/datatree/datatree.py 0000664 0000000 0000000 00000147461 14552576503 0017352 0 ustar 00root root 0000000 0000000 from __future__ import annotations
import copy
import itertools
from collections import OrderedDict
from html import escape
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
Generic,
Hashable,
Iterable,
Iterator,
List,
Mapping,
MutableMapping,
Optional,
Set,
Tuple,
Union,
overload,
)
from xarray.core import utils
from xarray.core.coordinates import DatasetCoordinates
from xarray.core.dataarray import DataArray
from xarray.core.dataset import Dataset, DataVariables
from xarray.core.indexes import Index, Indexes
from xarray.core.merge import dataset_update_method
from xarray.core.options import OPTIONS as XR_OPTS
from xarray.core.utils import (
Default,
Frozen,
HybridMappingProxy,
_default,
either_dict_or_kwargs,
maybe_wrap_array,
)
from xarray.core.variable import Variable
from . import formatting, formatting_html
from .common import TreeAttrAccessMixin
from .mapping import TreeIsomorphismError, check_isomorphic, map_over_subtree
from .ops import (
DataTreeArithmeticMixin,
MappedDatasetMethodsMixin,
MappedDataWithCoords,
)
from .render import RenderTree
from .treenode import NamedNode, NodePath, Tree
try:
from xarray.core.variable import calculate_dimensions
except ImportError:
# for xarray versions 2022.03.0 and earlier
from xarray.core.dataset import calculate_dimensions
if TYPE_CHECKING:
import pandas as pd
from xarray.core.merge import CoercibleValue
from xarray.core.types import ErrorOptions
# """
# DEVELOPERS' NOTE
# ----------------
# The idea of this module is to create a `DataTree` class which inherits the tree structure from TreeNode, and also copies
# the entire API of `xarray.Dataset`, but with certain methods decorated to instead map the dataset function over every
# node in the tree. As this API is copied without directly subclassing `xarray.Dataset` we instead create various Mixin
# classes (in ops.py) which each define part of `xarray.Dataset`'s extensive API.
#
# Some of these methods must be wrapped to map over all nodes in the subtree. Others are fine to inherit unaltered
# (normally because they (a) only call dataset properties and (b) don't return a dataset that should be nested into a new
# tree) and some will get overridden by the class definition of DataTree.
# """
T_Path = Union[str, NodePath]
def _coerce_to_dataset(data: Dataset | DataArray | None) -> Dataset:
if isinstance(data, DataArray):
ds = data.to_dataset()
elif isinstance(data, Dataset):
ds = data
elif data is None:
ds = Dataset()
else:
raise TypeError(
f"data object is not an xarray Dataset, DataArray, or None, it is of type {type(data)}"
)
return ds
def _check_for_name_collisions(
children: Iterable[str], variables: Iterable[Hashable]
) -> None:
colliding_names = set(children).intersection(set(variables))
if colliding_names:
raise KeyError(
f"Some names would collide between variables and children: {list(colliding_names)}"
)
class DatasetView(Dataset):
"""
An immutable Dataset-like view onto the data in a single DataTree node.
In-place operations modifying this object should raise an AttributeError.
This requires overriding all inherited constructors.
Operations returning a new result will return a new xarray.Dataset object.
This includes all API on Dataset, which will be inherited.
"""
# TODO what happens if user alters (in-place) a DataArray they extracted from this object?
__slots__ = (
"_attrs",
"_cache",
"_coord_names",
"_dims",
"_encoding",
"_close",
"_indexes",
"_variables",
)
def __init__(
self,
data_vars: Optional[Mapping[Any, Any]] = None,
coords: Optional[Mapping[Any, Any]] = None,
attrs: Optional[Mapping[Any, Any]] = None,
):
raise AttributeError("DatasetView objects are not to be initialized directly")
@classmethod
def _from_node(
cls,
wrapping_node: DataTree,
) -> DatasetView:
"""Constructor, using dataset attributes from wrapping node"""
obj: DatasetView = object.__new__(cls)
obj._variables = wrapping_node._variables
obj._coord_names = wrapping_node._coord_names
obj._dims = wrapping_node._dims
obj._indexes = wrapping_node._indexes
obj._attrs = wrapping_node._attrs
obj._close = wrapping_node._close
obj._encoding = wrapping_node._encoding
return obj
def __setitem__(self, key, val) -> None:
raise AttributeError(
"Mutation of the DatasetView is not allowed, please use `.__setitem__` on the wrapping DataTree node, "
"or use `dt.to_dataset()` if you want a mutable dataset. If calling this from within `map_over_subtree`,"
"use `.copy()` first to get a mutable version of the input dataset."
)
def update(self, other) -> None:
raise AttributeError(
"Mutation of the DatasetView is not allowed, please use `.update` on the wrapping DataTree node, "
"or use `dt.to_dataset()` if you want a mutable dataset. If calling this from within `map_over_subtree`,"
"use `.copy()` first to get a mutable version of the input dataset."
)
# FIXME https://github.com/python/mypy/issues/7328
@overload
def __getitem__(self, key: Mapping) -> Dataset: # type: ignore[misc]
...
@overload
def __getitem__(self, key: Hashable) -> DataArray: # type: ignore[misc]
...
@overload
def __getitem__(self, key: Any) -> Dataset:
...
def __getitem__(self, key) -> DataArray:
# TODO call the `_get_item` method of DataTree to allow path-like access to contents of other nodes
# For now just call Dataset.__getitem__
return Dataset.__getitem__(self, key)
@classmethod
def _construct_direct(
cls,
variables: dict[Any, Variable],
coord_names: set[Hashable],
dims: Optional[dict[Any, int]] = None,
attrs: Optional[dict] = None,
indexes: Optional[dict[Any, Index]] = None,
encoding: Optional[dict] = None,
close: Optional[Callable[[], None]] = None,
) -> Dataset:
"""
Overriding this method (along with ._replace) and modifying it to return a Dataset object
should hopefully ensure that the return type of any method on this object is a Dataset.
"""
if dims is None:
dims = calculate_dimensions(variables)
if indexes is None:
indexes = {}
obj = object.__new__(Dataset)
obj._variables = variables
obj._coord_names = coord_names
obj._dims = dims
obj._indexes = indexes
obj._attrs = attrs
obj._close = close
obj._encoding = encoding
return obj
def _replace(
self,
variables: Optional[dict[Hashable, Variable]] = None,
coord_names: Optional[set[Hashable]] = None,
dims: Optional[dict[Any, int]] = None,
attrs: dict[Hashable, Any] | None | Default = _default,
indexes: Optional[dict[Hashable, Index]] = None,
encoding: dict | None | Default = _default,
inplace: bool = False,
) -> Dataset:
"""
Overriding this method (along with ._construct_direct) and modifying it to return a Dataset object
should hopefully ensure that the return type of any method on this object is a Dataset.
"""
if inplace:
raise AttributeError("In-place mutation of the DatasetView is not allowed")
return Dataset._replace(
self,
variables=variables,
coord_names=coord_names,
dims=dims,
attrs=attrs,
indexes=indexes,
encoding=encoding,
inplace=inplace,
)
def map(
self,
func: Callable,
keep_attrs: bool | None = None,
args: Iterable[Any] = (),
**kwargs: Any,
) -> Dataset:
"""Apply a function to each data variable in this dataset
Parameters
----------
func : callable
Function which can be called in the form `func(x, *args, **kwargs)`
to transform each DataArray `x` in this dataset into another
DataArray.
keep_attrs : bool or None, optional
If True, both the dataset's and variables' attributes (`attrs`) will be
copied from the original objects to the new ones. If False, the new dataset
and variables will be returned without copying the attributes.
args : iterable, optional
Positional arguments passed on to `func`.
**kwargs : Any
Keyword arguments passed on to `func`.
Returns
-------
applied : Dataset
Resulting dataset from applying ``func`` to each data variable.
Examples
--------
>>> da = xr.DataArray(np.random.randn(2, 3))
>>> ds = xr.Dataset({"foo": da, "bar": ("x", [-1, 2])})
>>> ds
Dimensions: (dim_0: 2, dim_1: 3, x: 2)
Dimensions without coordinates: dim_0, dim_1, x
Data variables:
foo (dim_0, dim_1) float64 1.764 0.4002 0.9787 2.241 1.868 -0.9773
bar (x) int64 -1 2
>>> ds.map(np.fabs)
Dimensions: (dim_0: 2, dim_1: 3, x: 2)
Dimensions without coordinates: dim_0, dim_1, x
Data variables:
foo (dim_0, dim_1) float64 1.764 0.4002 0.9787 2.241 1.868 0.9773
bar (x) float64 1.0 2.0
"""
# Copied from xarray.Dataset so as not to call type(self), which causes problems (see datatree GH188).
# TODO Refactor xarray upstream to avoid needing to overwrite this.
# TODO This copied version will drop all attrs - the keep_attrs stuff should be re-instated
variables = {
k: maybe_wrap_array(v, func(v, *args, **kwargs))
for k, v in self.data_vars.items()
}
# return type(self)(variables, attrs=attrs)
return Dataset(variables)
class DataTree(
NamedNode,
MappedDatasetMethodsMixin,
MappedDataWithCoords,
DataTreeArithmeticMixin,
TreeAttrAccessMixin,
Generic[Tree],
Mapping,
):
"""
A tree-like hierarchical collection of xarray objects.
Attempts to present an API like that of xarray.Dataset, but methods are wrapped to also update all the tree's child nodes.
"""
# TODO Some way of sorting children by depth
# TODO do we need a watch out for if methods intended only for root nodes are called on non-root nodes?
# TODO dataset methods which should not or cannot act over the whole tree, such as .to_array
# TODO .loc method
# TODO a lot of properties like .variables could be defined in a DataMapping class which both Dataset and DataTree inherit from
# TODO all groupby classes
# TODO a lot of properties like .variables could be defined in a DataMapping class which both Dataset and DataTree inherit from
# TODO __slots__
# TODO all groupby classes
_name: Optional[str]
_parent: Optional[DataTree]
_children: OrderedDict[str, DataTree]
_attrs: Optional[Dict[Hashable, Any]]
_cache: Dict[str, Any]
_coord_names: Set[Hashable]
_dims: Dict[Hashable, int]
_encoding: Optional[Dict[Hashable, Any]]
_close: Optional[Callable[[], None]]
_indexes: Dict[Hashable, Index]
_variables: Dict[Hashable, Variable]
__slots__ = (
"_name",
"_parent",
"_children",
"_attrs",
"_cache",
"_coord_names",
"_dims",
"_encoding",
"_close",
"_indexes",
"_variables",
)
def __init__(
self,
data: Optional[Dataset | DataArray] = None,
parent: Optional[DataTree] = None,
children: Optional[Mapping[str, DataTree]] = None,
name: Optional[str] = None,
):
"""
Create a single node of a DataTree.
The node may optionally contain data in the form of data and coordinate variables, stored in the same way as
data is stored in an xarray.Dataset.
Parameters
----------
data : Dataset, DataArray, or None, optional
Data to store under the .ds attribute of this node. DataArrays will be promoted to Datasets.
Default is None.
parent : DataTree, optional
Parent node to this node. Default is None.
children : Mapping[str, DataTree], optional
Any child nodes of this node. Default is None.
name : str, optional
Name for this node of the tree. Default is None.
Returns
-------
DataTree
See Also
--------
DataTree.from_dict
"""
# validate input
if children is None:
children = {}
ds = _coerce_to_dataset(data)
_check_for_name_collisions(children, ds.variables)
super().__init__(name=name)
# set data attributes
self._replace(
inplace=True,
variables=ds._variables,
coord_names=ds._coord_names,
dims=ds._dims,
indexes=ds._indexes,
attrs=ds._attrs,
encoding=ds._encoding,
)
self._close = ds._close
# set tree attributes (must happen after variables set to avoid initialization errors)
self.children = children
self.parent = parent
@property
def parent(self: DataTree) -> DataTree | None:
"""Parent of this node."""
return self._parent
@parent.setter
def parent(self: DataTree, new_parent: DataTree) -> None:
if new_parent and self.name is None:
raise ValueError("Cannot set an unnamed node as a child of another node")
self._set_parent(new_parent, self.name)
@property
def ds(self) -> DatasetView:
"""
An immutable Dataset-like view onto the data in this node.
For a mutable Dataset containing the same data as in this node, use `.to_dataset()` instead.
See Also
--------
DataTree.to_dataset
"""
return DatasetView._from_node(self)
@ds.setter
def ds(self, data: Optional[Union[Dataset, DataArray]] = None) -> None:
ds = _coerce_to_dataset(data)
_check_for_name_collisions(self.children, ds.variables)
self._replace(
inplace=True,
variables=ds._variables,
coord_names=ds._coord_names,
dims=ds._dims,
indexes=ds._indexes,
attrs=ds._attrs,
encoding=ds._encoding,
)
self._close = ds._close
def _pre_attach(self: DataTree, parent: DataTree) -> None:
"""
Method which superclass calls before setting parent, here used to prevent having two
children with duplicate names (or a data variable with the same name as a child).
"""
super()._pre_attach(parent)
if self.name in list(parent.ds.variables):
raise KeyError(
f"parent {parent.name} already contains a data variable named {self.name}"
)
def to_dataset(self) -> Dataset:
"""
Return the data in this node as a new xarray.Dataset object.
See Also
--------
DataTree.ds
"""
return Dataset._construct_direct(
self._variables,
self._coord_names,
self._dims,
self._attrs,
self._indexes,
self._encoding,
self._close,
)
@property
def has_data(self):
"""Whether or not there are any data variables in this node."""
return len(self._variables) > 0
@property
def has_attrs(self) -> bool:
"""Whether or not there are any metadata attributes in this node."""
return len(self.attrs.keys()) > 0
@property
def is_empty(self) -> bool:
"""False if node contains any data or attrs. Does not look at children."""
return not (self.has_data or self.has_attrs)
@property
def is_hollow(self) -> bool:
"""True if only leaf nodes contain data."""
return not any(node.has_data for node in self.subtree if not node.is_leaf)
@property
def variables(self) -> Mapping[Hashable, Variable]:
"""Low level interface to node contents as dict of Variable objects.
This ordered dictionary is frozen to prevent mutation that could
violate Dataset invariants. It contains all variable objects
constituting this DataTree node, including both data variables and
coordinates.
"""
return Frozen(self._variables)
@property
def attrs(self) -> Dict[Hashable, Any]:
"""Dictionary of global attributes on this node object."""
if self._attrs is None:
self._attrs = {}
return self._attrs
@attrs.setter
def attrs(self, value: Mapping[Any, Any]) -> None:
self._attrs = dict(value)
@property
def encoding(self) -> Dict:
"""Dictionary of global encoding attributes on this node object."""
if self._encoding is None:
self._encoding = {}
return self._encoding
@encoding.setter
def encoding(self, value: Mapping) -> None:
self._encoding = dict(value)
@property
def dims(self) -> Mapping[Hashable, int]:
"""Mapping from dimension names to lengths.
Cannot be modified directly, but is updated when adding new variables.
Note that type of this object differs from `DataArray.dims`.
See `DataTree.sizes`, `Dataset.sizes`, and `DataArray.sizes` for consistently named
properties.
"""
return Frozen(self._dims)
@property
def sizes(self) -> Mapping[Hashable, int]:
"""Mapping from dimension names to lengths.
Cannot be modified directly, but is updated when adding new variables.
This is an alias for `DataTree.dims` provided for the benefit of
consistency with `DataArray.sizes`.
See Also
--------
DataArray.sizes
"""
return self.dims
@property
def _attr_sources(self) -> Iterable[Mapping[Hashable, Any]]:
"""Places to look-up items for attribute-style access"""
yield from self._item_sources
yield self.attrs
@property
def _item_sources(self) -> Iterable[Mapping[Any, Any]]:
"""Places to look-up items for key-completion"""
yield self.data_vars
yield HybridMappingProxy(keys=self._coord_names, mapping=self.coords)
# virtual coordinates
yield HybridMappingProxy(keys=self.dims, mapping=self)
# immediate child nodes
yield self.children
def _ipython_key_completions_(self) -> List[str]:
"""Provide method for the key-autocompletions in IPython.
See http://ipython.readthedocs.io/en/stable/config/integrating.html#tab-completion
For the details.
"""
# TODO allow auto-completing relative string paths, e.g. `dt['path/to/../ node'`
# Would require changes to ipython's autocompleter, see https://github.com/ipython/ipython/issues/12420
# Instead for now we only list direct paths to all node in subtree explicitly
items_on_this_node = self._item_sources
full_file_like_paths_to_all_nodes_in_subtree = {
node.path[1:]: node for node in self.subtree
}
all_item_sources = itertools.chain(
items_on_this_node, [full_file_like_paths_to_all_nodes_in_subtree]
)
items = {
item
for source in all_item_sources
for item in source
if isinstance(item, str)
}
return list(items)
def __contains__(self, key: object) -> bool:
"""The 'in' operator will return true or false depending on whether
'key' is either an array stored in the datatree or a child node, or neither.
"""
return key in self.variables or key in self.children
def __bool__(self) -> bool:
return bool(self.ds.data_vars) or bool(self.children)
def __iter__(self) -> Iterator[Hashable]:
return itertools.chain(self.ds.data_vars, self.children)
def __array__(self, dtype=None):
raise TypeError(
"cannot directly convert a DataTree into a "
"numpy array. Instead, create an xarray.DataArray "
"first, either with indexing on the DataTree or by "
"invoking the `to_array()` method."
)
def __repr__(self) -> str:
return formatting.datatree_repr(self)
def __str__(self) -> str:
return formatting.datatree_repr(self)
def _repr_html_(self):
"""Make html representation of datatree object"""
if XR_OPTS["display_style"] == "text":
return f"
{escape(repr(self))}
"
return formatting_html.datatree_repr(self)
@classmethod
def _construct_direct(
cls,
variables: dict[Any, Variable],
coord_names: set[Hashable],
dims: Optional[dict[Any, int]] = None,
attrs: Optional[dict] = None,
indexes: Optional[dict[Any, Index]] = None,
encoding: Optional[dict] = None,
name: str | None = None,
parent: DataTree | None = None,
children: Optional[OrderedDict[str, DataTree]] = None,
close: Optional[Callable[[], None]] = None,
) -> DataTree:
"""Shortcut around __init__ for internal use when we want to skip costly validation."""
# data attributes
if dims is None:
dims = calculate_dimensions(variables)
if indexes is None:
indexes = {}
if children is None:
children = OrderedDict()
obj: DataTree = object.__new__(cls)
obj._variables = variables
obj._coord_names = coord_names
obj._dims = dims
obj._indexes = indexes
obj._attrs = attrs
obj._close = close
obj._encoding = encoding
# tree attributes
obj._name = name
obj._children = children
obj._parent = parent
return obj
def _replace(
self: DataTree,
variables: Optional[dict[Hashable, Variable]] = None,
coord_names: Optional[set[Hashable]] = None,
dims: Optional[dict[Any, int]] = None,
attrs: dict[Hashable, Any] | None | Default = _default,
indexes: Optional[dict[Hashable, Index]] = None,
encoding: dict | None | Default = _default,
name: str | None | Default = _default,
parent: DataTree | None = _default,
children: Optional[OrderedDict[str, DataTree]] = None,
inplace: bool = False,
) -> DataTree:
"""
Fastpath constructor for internal use.
Returns an object with optionally replaced attributes.
Explicitly passed arguments are *not* copied when placed on the new
datatree. It is up to the caller to ensure that they have the right type
and are not used elsewhere.
"""
# TODO Adding new children inplace using this method will cause bugs.
# You will end up with an inconsistency between the name of the child node and the key the child is stored under.
# Use ._set() instead for now
if inplace:
if variables is not None:
self._variables = variables
if coord_names is not None:
self._coord_names = coord_names
if dims is not None:
self._dims = dims
if attrs is not _default:
self._attrs = attrs
if indexes is not None:
self._indexes = indexes
if encoding is not _default:
self._encoding = encoding
if name is not _default:
self._name = name
if parent is not _default:
self._parent = parent
if children is not None:
self._children = children
obj = self
else:
if variables is None:
variables = self._variables.copy()
if coord_names is None:
coord_names = self._coord_names.copy()
if dims is None:
dims = self._dims.copy()
if attrs is _default:
attrs = copy.copy(self._attrs)
if indexes is None:
indexes = self._indexes.copy()
if encoding is _default:
encoding = copy.copy(self._encoding)
if name is _default:
name = self._name # no need to copy str objects or None
if parent is _default:
parent = copy.copy(self._parent)
if children is _default:
children = copy.copy(self._children)
obj = self._construct_direct(
variables,
coord_names,
dims,
attrs,
indexes,
encoding,
name,
parent,
children,
)
return obj
def copy(
self: DataTree,
deep: bool = False,
) -> DataTree:
"""
Returns a copy of this subtree.
Copies this node and all child nodes.
If `deep=True`, a deep copy is made of each of the component variables.
Otherwise, a shallow copy of each of the component variable is made, so
that the underlying memory region of the new datatree is the same as in
the original datatree.
Parameters
----------
deep : bool, default: False
Whether each component variable is loaded into memory and copied onto
the new object. Default is False.
Returns
-------
object : DataTree
New object with dimensions, attributes, coordinates, name, encoding,
and data of this node and all child nodes copied from original.
See Also
--------
xarray.Dataset.copy
pandas.DataFrame.copy
"""
return self._copy_subtree(deep=deep)
def _copy_subtree(
self: DataTree,
deep: bool = False,
memo: dict[int, Any] | None = None,
) -> DataTree:
"""Copy entire subtree"""
new_tree = self._copy_node(deep=deep)
for node in self.descendants:
path = node.relative_to(self)
new_tree[path] = node._copy_node(deep=deep)
return new_tree
def _copy_node(
self: DataTree,
deep: bool = False,
) -> DataTree:
"""Copy just one node of a tree"""
new_node: DataTree = DataTree()
new_node.name = self.name
new_node.ds = self.to_dataset().copy(deep=deep)
return new_node
def __copy__(self: DataTree) -> DataTree:
return self._copy_subtree(deep=False)
def __deepcopy__(self: DataTree, memo: dict[int, Any] | None = None) -> DataTree:
return self._copy_subtree(deep=True, memo=memo)
def get(
self: DataTree, key: str, default: Optional[DataTree | DataArray] = None
) -> Optional[DataTree | DataArray]:
"""
Access child nodes, variables, or coordinates stored in this node.
Returned object will be either a DataTree or DataArray object depending on whether the key given points to a
child or variable.
Parameters
----------
key : str
Name of variable / child within this node. Must lie in this immediate node (not elsewhere in the tree).
default : DataTree | DataArray, optional
A value to return if the specified key does not exist. Default return value is None.
"""
if key in self.children:
return self.children[key]
elif key in self.ds:
return self.ds[key]
else:
return default
def __getitem__(self: DataTree, key: str) -> DataTree | DataArray:
"""
Access child nodes, variables, or coordinates stored anywhere in this tree.
Returned object will be either a DataTree or DataArray object depending on whether the key given points to a
child or variable.
Parameters
----------
key : str
Name of variable / child within this node, or unix-like path to variable / child within another node.
Returns
-------
Union[DataTree, DataArray]
"""
# Either:
if utils.is_dict_like(key):
# dict-like indexing
raise NotImplementedError("Should this index over whole tree?")
elif isinstance(key, str):
# TODO should possibly deal with hashables in general?
# path-like: a name of a node/variable, or path to a node/variable
path = NodePath(key)
return self._get_item(path)
elif utils.is_list_like(key):
# iterable of variable names
raise NotImplementedError(
"Selecting via tags is deprecated, and selecting multiple items should be "
"implemented via .subset"
)
else:
raise ValueError(f"Invalid format for key: {key}")
def _set(self, key: str, val: DataTree | CoercibleValue) -> None:
"""
Set the child node or variable with the specified key to value.
Counterpart to the public .get method, and also only works on the immediate node, not other nodes in the tree.
"""
if isinstance(val, DataTree):
# create and assign a shallow copy here so as not to alter original name of node in grafted tree
new_node = val.copy(deep=False)
new_node.name = key
new_node.parent = self
else:
if not isinstance(val, (DataArray, Variable)):
# accommodate other types that can be coerced into Variables
val = DataArray(val)
self.update({key: val})
def __setitem__(
self,
key: str,
value: Any,
) -> None:
"""
Add either a child node or an array to the tree, at any position.
Data can be added anywhere, and new nodes will be created to cross the path to the new location if necessary.
If there is already a node at the given location, then if value is a Node class or Dataset it will overwrite the
data already present at that node, and if value is a single array, it will be merged with it.
"""
# TODO xarray.Dataset accepts other possibilities, how do we exactly replicate all the behaviour?
if utils.is_dict_like(key):
raise NotImplementedError
elif isinstance(key, str):
# TODO should possibly deal with hashables in general?
# path-like: a name of a node/variable, or path to a node/variable
path = NodePath(key)
return self._set_item(path, value, new_nodes_along_path=True)
else:
raise ValueError("Invalid format for key")
def update(self, other: Dataset | Mapping[str, DataTree | DataArray]) -> None:
"""
Update this node's children and / or variables.
Just like `dict.update` this is an in-place operation.
"""
# TODO separate by type
new_children = {}
new_variables = {}
for k, v in other.items():
if isinstance(v, DataTree):
# avoid named node being stored under inconsistent key
new_child = v.copy()
new_child.name = k
new_children[k] = new_child
elif isinstance(v, (DataArray, Variable)):
# TODO this should also accommodate other types that can be coerced into Variables
new_variables[k] = v
else:
raise TypeError(f"Type {type(v)} cannot be assigned to a DataTree")
vars_merge_result = dataset_update_method(self.to_dataset(), new_variables)
# TODO are there any subtleties with preserving order of children like this?
merged_children = OrderedDict({**self.children, **new_children})
self._replace(
inplace=True, children=merged_children, **vars_merge_result._asdict()
)
def assign(
self, items: Mapping[Any, Any] | None = None, **items_kwargs: Any
) -> DataTree:
"""
Assign new data variables or child nodes to a DataTree, returning a new object
with all the original items in addition to the new ones.
Parameters
----------
items : mapping of hashable to Any
Mapping from variable or child node names to the new values. If the new values
are callable, they are computed on the Dataset and assigned to new
data variables. If the values are not callable, (e.g. a DataTree, DataArray,
scalar, or array), they are simply assigned.
**items_kwargs
The keyword arguments form of ``variables``.
One of variables or variables_kwargs must be provided.
Returns
-------
dt : DataTree
A new DataTree with the new variables or children in addition to all the
existing items.
Notes
-----
Since ``kwargs`` is a dictionary, the order of your arguments may not
be preserved, and so the order of the new variables is not well-defined.
Assigning multiple items within the same ``assign`` is
possible, but you cannot reference other variables created within the
same ``assign`` call.
See Also
--------
xarray.Dataset.assign
pandas.DataFrame.assign
"""
items = either_dict_or_kwargs(items, items_kwargs, "assign")
dt = self.copy()
dt.update(items)
return dt
def drop_nodes(
self: DataTree, names: str | Iterable[str], *, errors: ErrorOptions = "raise"
) -> DataTree:
"""
Drop child nodes from this node.
Parameters
----------
names : str or iterable of str
Name(s) of nodes to drop.
errors : {"raise", "ignore"}, default: "raise"
If 'raise', raises a KeyError if any of the node names
passed are not present as children of this node. If 'ignore',
any given names that are present are dropped and no error is raised.
Returns
-------
dropped : DataTree
A copy of the node with the specified children dropped.
"""
# the Iterable check is required for mypy
if isinstance(names, str) or not isinstance(names, Iterable):
names = {names}
else:
names = set(names)
if errors == "raise":
extra = names - set(self.children)
if extra:
raise KeyError(f"Cannot drop all nodes - nodes {extra} not present")
children_to_keep = OrderedDict(
{name: child for name, child in self.children.items() if name not in names}
)
return self._replace(children=children_to_keep)
@classmethod
def from_dict(
cls,
d: MutableMapping[str, Dataset | DataArray | DataTree | None],
name: Optional[str] = None,
) -> DataTree:
"""
Create a datatree from a dictionary of data objects, organised by paths into the tree.
Parameters
----------
d : dict-like
A mapping from path names to xarray.Dataset, xarray.DataArray, or DataTree objects.
Path names are to be given as unix-like path. If path names containing more than one part are given, new
tree nodes will be constructed as necessary.
To assign data to the root node of the tree use "/" as the path.
name : Hashable, optional
Name for the root node of the tree. Default is None.
Returns
-------
DataTree
Notes
-----
If your dictionary is nested you will need to flatten it before using this method.
"""
# First create the root node
root_data = d.pop("/", None)
obj = cls(name=name, data=root_data, parent=None, children=None)
if d:
# Populate tree with children determined from data_objects mapping
for path, data in d.items():
# Create and set new node
node_name = NodePath(path).name
if isinstance(data, cls):
new_node = data.copy()
new_node.orphan()
else:
new_node = cls(name=node_name, data=data)
obj._set_item(
path,
new_node,
allow_overwrite=False,
new_nodes_along_path=True,
)
return obj
def to_dict(self) -> Dict[str, Dataset]:
"""
Create a dictionary mapping of absolute node paths to the data contained in those nodes.
Returns
-------
Dict[str, Dataset]
"""
return {node.path: node.to_dataset() for node in self.subtree}
@property
def nbytes(self) -> int:
return sum(node.to_dataset().nbytes for node in self.subtree)
def __len__(self) -> int:
return len(self.children) + len(self.data_vars)
@property
def indexes(self) -> Indexes[pd.Index]:
"""Mapping of pandas.Index objects used for label based indexing.
Raises an error if this DataTree node has indexes that cannot be coerced
to pandas.Index objects.
See Also
--------
DataTree.xindexes
"""
return self.xindexes.to_pandas_indexes()
@property
def xindexes(self) -> Indexes[Index]:
"""Mapping of xarray Index objects used for label based indexing."""
return Indexes(self._indexes, {k: self._variables[k] for k in self._indexes})
@property
def coords(self) -> DatasetCoordinates:
"""Dictionary of xarray.DataArray objects corresponding to coordinate
variables
"""
return DatasetCoordinates(self.to_dataset())
@property
def data_vars(self) -> DataVariables:
"""Dictionary of DataArray objects corresponding to data variables"""
return DataVariables(self.to_dataset())
def isomorphic(
self,
other: DataTree,
from_root: bool = False,
strict_names: bool = False,
) -> bool:
"""
Two DataTrees are considered isomorphic if every node has the same number of children.
Nothing about the data in each node is checked.
Isomorphism is a necessary condition for two trees to be used in a nodewise binary operation,
such as ``tree1 + tree2``.
By default this method does not check any part of the tree above the given node.
Therefore this method can be used as default to check that two subtrees are isomorphic.
Parameters
----------
other : DataTree
The other tree object to compare to.
from_root : bool, optional, default is False
Whether or not to first traverse to the root of the two trees before checking for isomorphism.
If neither tree has a parent then this has no effect.
strict_names : bool, optional, default is False
Whether or not to also check that every node in the tree has the same name as its counterpart in the other
tree.
See Also
--------
DataTree.equals
DataTree.identical
"""
try:
check_isomorphic(
self,
other,
require_names_equal=strict_names,
check_from_root=from_root,
)
return True
except (TypeError, TreeIsomorphismError):
return False
def equals(self, other: DataTree, from_root: bool = True) -> bool:
"""
Two DataTrees are equal if they have isomorphic node structures, with matching node names,
and if they have matching variables and coordinates, all of which are equal.
By default this method will check the whole tree above the given node.
Parameters
----------
other : DataTree
The other tree object to compare to.
from_root : bool, optional, default is True
Whether or not to first traverse to the root of the two trees before checking for isomorphism.
If neither tree has a parent then this has no effect.
See Also
--------
Dataset.equals
DataTree.isomorphic
DataTree.identical
"""
if not self.isomorphic(other, from_root=from_root, strict_names=True):
return False
return all(
[
node.ds.equals(other_node.ds)
for node, other_node in zip(self.subtree, other.subtree)
]
)
def identical(self, other: DataTree, from_root=True) -> bool:
"""
Like equals, but will also check all dataset attributes and the attributes on
all variables and coordinates.
By default this method will check the whole tree above the given node.
Parameters
----------
other : DataTree
The other tree object to compare to.
from_root : bool, optional, default is True
Whether or not to first traverse to the root of the two trees before checking for isomorphism.
If neither tree has a parent then this has no effect.
See Also
--------
Dataset.identical
DataTree.isomorphic
DataTree.equals
"""
if not self.isomorphic(other, from_root=from_root, strict_names=True):
return False
return all(
node.ds.identical(other_node.ds)
for node, other_node in zip(self.subtree, other.subtree)
)
def filter(self: DataTree, filterfunc: Callable[[DataTree], bool]) -> DataTree:
"""
Filter nodes according to a specified condition.
Returns a new tree containing only the nodes in the original tree for which `fitlerfunc(node)` is True.
Will also contain empty nodes at intermediate positions if required to support leaves.
Parameters
----------
filterfunc: function
A function which accepts only one DataTree - the node on which filterfunc will be called.
Returns
-------
DataTree
See Also
--------
match
pipe
map_over_subtree
"""
filtered_nodes = {
node.path: node.ds for node in self.subtree if filterfunc(node)
}
return DataTree.from_dict(filtered_nodes, name=self.root.name)
def match(self, pattern: str) -> DataTree:
"""
Return nodes with paths matching pattern.
Uses unix glob-like syntax for pattern-matching.
Parameters
----------
pattern: str
A pattern to match each node path against.
Returns
-------
DataTree
See Also
--------
filter
pipe
map_over_subtree
Examples
--------
>>> dt = DataTree.from_dict(
... {
... "/a/A": None,
... "/a/B": None,
... "/b/A": None,
... "/b/B": None,
... }
... )
>>> dt.match("*/B")
DataTree('None', parent=None)
├── DataTree('a')
│ └── DataTree('B')
└── DataTree('b')
└── DataTree('B')
"""
matching_nodes = {
node.path: node.ds
for node in self.subtree
if NodePath(node.path).match(pattern)
}
return DataTree.from_dict(matching_nodes, name=self.root.name)
def map_over_subtree(
self,
func: Callable,
*args: Iterable[Any],
**kwargs: Any,
) -> DataTree | Tuple[DataTree]:
"""
Apply a function to every dataset in this subtree, returning a new tree which stores the results.
The function will be applied to any dataset stored in this node, as well as any dataset stored in any of the
descendant nodes. The returned tree will have the same structure as the original subtree.
func needs to return a Dataset in order to rebuild the subtree.
Parameters
----------
func : callable
Function to apply to datasets with signature:
`func(node.ds, *args, **kwargs) -> Dataset`.
Function will not be applied to any nodes without datasets.
*args : tuple, optional
Positional arguments passed on to `func`.
**kwargs : Any
Keyword arguments passed on to `func`.
Returns
-------
subtrees : DataTree, Tuple of DataTrees
One or more subtrees containing results from applying ``func`` to the data at each node.
"""
# TODO this signature means that func has no way to know which node it is being called upon - change?
# TODO fix this typing error
return map_over_subtree(func)(self, *args, **kwargs) # type: ignore[operator]
def map_over_subtree_inplace(
self,
func: Callable,
*args: Iterable[Any],
**kwargs: Any,
) -> None:
"""
Apply a function to every dataset in this subtree, updating data in place.
Parameters
----------
func : callable
Function to apply to datasets with signature:
`func(node.ds, *args, **kwargs) -> Dataset`.
Function will not be applied to any nodes without datasets,
*args : tuple, optional
Positional arguments passed on to `func`.
**kwargs : Any
Keyword arguments passed on to `func`.
"""
# TODO if func fails on some node then the previous nodes will still have been updated...
for node in self.subtree:
if node.has_data:
node.ds = func(node.ds, *args, **kwargs)
def pipe(
self, func: Callable | tuple[Callable, str], *args: Any, **kwargs: Any
) -> Any:
"""Apply ``func(self, *args, **kwargs)``
This method replicates the pandas method of the same name.
Parameters
----------
func : callable
function to apply to this xarray object (Dataset/DataArray).
``args``, and ``kwargs`` are passed into ``func``.
Alternatively a ``(callable, data_keyword)`` tuple where
``data_keyword`` is a string indicating the keyword of
``callable`` that expects the xarray object.
*args
positional arguments passed into ``func``.
**kwargs
a dictionary of keyword arguments passed into ``func``.
Returns
-------
object : Any
the return type of ``func``.
Notes
-----
Use ``.pipe`` when chaining together functions that expect
xarray or pandas objects, e.g., instead of writing
.. code:: python
f(g(h(dt), arg1=a), arg2=b, arg3=c)
You can write
.. code:: python
(dt.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c))
If you have a function that takes the data as (say) the second
argument, pass a tuple indicating which keyword expects the
data. For example, suppose ``f`` takes its data as ``arg2``:
.. code:: python
(dt.pipe(h).pipe(g, arg1=a).pipe((f, "arg2"), arg1=a, arg3=c))
"""
if isinstance(func, tuple):
func, target = func
if target in kwargs:
raise ValueError(
f"{target} is both the pipe target and a keyword argument"
)
kwargs[target] = self
else:
args = (self,) + args
return func(*args, **kwargs)
def render(self):
"""Print tree structure, including any data stored at each node."""
for pre, fill, node in RenderTree(self):
print(f"{pre}DataTree('{self.name}')")
for ds_line in repr(node.ds)[1:]:
print(f"{fill}{ds_line}")
def merge(self, datatree: DataTree) -> DataTree:
"""Merge all the leaves of a second DataTree into this one."""
raise NotImplementedError
def merge_child_nodes(self, *paths, new_path: T_Path) -> DataTree:
"""Merge a set of child nodes into a single new node."""
raise NotImplementedError
# TODO some kind of .collapse() or .flatten() method to merge a subtree
def as_array(self) -> DataArray:
return self.ds.as_dataarray()
@property
def groups(self):
"""Return all netCDF4 groups in the tree, given as a tuple of path-like strings."""
return tuple(node.path for node in self.subtree)
def to_netcdf(
self, filepath, mode: str = "w", encoding=None, unlimited_dims=None, **kwargs
):
"""
Write datatree contents to a netCDF file.
Parameters
----------
filepath : str or Path
Path to which to save this datatree.
mode : {"w", "a"}, default: "w"
Write ('w') or append ('a') mode. If mode='w', any existing file at
this location will be overwritten. If mode='a', existing variables
will be overwritten. Only appies to the root group.
encoding : dict, optional
Nested dictionary with variable names as keys and dictionaries of
variable specific encodings as values, e.g.,
``{"root/set1": {"my_variable": {"dtype": "int16", "scale_factor": 0.1,
"zlib": True}, ...}, ...}``. See ``xarray.Dataset.to_netcdf`` for available
options.
unlimited_dims : dict, optional
Mapping of unlimited dimensions per group that that should be serialized as unlimited dimensions.
By default, no dimensions are treated as unlimited dimensions.
Note that unlimited_dims may also be set via
``dataset.encoding["unlimited_dims"]``.
kwargs :
Addional keyword arguments to be passed to ``xarray.Dataset.to_netcdf``
"""
from .io import _datatree_to_netcdf
_datatree_to_netcdf(
self,
filepath,
mode=mode,
encoding=encoding,
unlimited_dims=unlimited_dims,
**kwargs,
)
def to_zarr(
self,
store,
mode: str = "w-",
encoding=None,
consolidated: bool = True,
**kwargs,
):
"""
Write datatree contents to a Zarr store.
Parameters
----------
store : MutableMapping, str or Path, optional
Store or path to directory in file system
mode : {{"w", "w-", "a", "r+", None}, default: "w-"
Persistence mode: “w” means create (overwrite if exists); “w-” means create (fail if exists);
“a” means override existing variables (create if does not exist); “r+” means modify existing
array values only (raise an error if any metadata or shapes would change). The default mode
is “a” if append_dim is set. Otherwise, it is “r+” if region is set and w- otherwise.
encoding : dict, optional
Nested dictionary with variable names as keys and dictionaries of
variable specific encodings as values, e.g.,
``{"root/set1": {"my_variable": {"dtype": "int16", "scale_factor": 0.1}, ...}, ...}``.
See ``xarray.Dataset.to_zarr`` for available options.
consolidated : bool
If True, apply zarr's `consolidate_metadata` function to the store
after writing metadata for all groups.
kwargs :
Additional keyword arguments to be passed to ``xarray.Dataset.to_zarr``
"""
from .io import _datatree_to_zarr
_datatree_to_zarr(
self,
store,
mode=mode,
encoding=encoding,
consolidated=consolidated,
**kwargs,
)
def plot(self):
raise NotImplementedError
datatree-0.0.14/datatree/extensions.py 0000664 0000000 0000000 00000001015 14552576503 0017740 0 ustar 00root root 0000000 0000000 from xarray.core.extensions import _register_accessor
from .datatree import DataTree
def register_datatree_accessor(name):
"""Register a custom accessor on DataTree objects.
Parameters
----------
name : str
Name under which the accessor should be registered. A warning is issued
if this name conflicts with a preexisting attribute.
See Also
--------
xarray.register_dataarray_accessor
xarray.register_dataset_accessor
"""
return _register_accessor(name, DataTree)
datatree-0.0.14/datatree/formatting.py 0000664 0000000 0000000 00000005633 14552576503 0017725 0 ustar 00root root 0000000 0000000 from typing import TYPE_CHECKING
from xarray.core.formatting import _compat_to_str, diff_dataset_repr
from .mapping import diff_treestructure
from .render import RenderTree
if TYPE_CHECKING:
from .datatree import DataTree
def diff_nodewise_summary(a, b, compat):
"""Iterates over all corresponding nodes, recording differences between data at each location."""
compat_str = _compat_to_str(compat)
summary = []
for node_a, node_b in zip(a.subtree, b.subtree):
a_ds, b_ds = node_a.ds, node_b.ds
if not a_ds._all_compat(b_ds, compat):
dataset_diff = diff_dataset_repr(a_ds, b_ds, compat_str)
data_diff = "\n".join(dataset_diff.split("\n", 1)[1:])
nodediff = (
f"\nData in nodes at position '{node_a.path}' do not match:"
f"{data_diff}"
)
summary.append(nodediff)
return "\n".join(summary)
def diff_tree_repr(a, b, compat):
summary = [
f"Left and right {type(a).__name__} objects are not {_compat_to_str(compat)}"
]
# TODO check root parents?
strict_names = True if compat in ["equals", "identical"] else False
treestructure_diff = diff_treestructure(a, b, strict_names)
# If the trees structures are different there is no point comparing each node
# TODO we could show any differences in nodes up to the first place that structure differs?
if treestructure_diff or compat == "isomorphic":
summary.append("\n" + treestructure_diff)
else:
nodewise_diff = diff_nodewise_summary(a, b, compat)
summary.append("\n" + nodewise_diff)
return "\n".join(summary)
def datatree_repr(dt):
"""A printable representation of the structure of this entire tree."""
renderer = RenderTree(dt)
lines = []
for pre, fill, node in renderer:
node_repr = _single_node_repr(node)
node_line = f"{pre}{node_repr.splitlines()[0]}"
lines.append(node_line)
if node.has_data or node.has_attrs:
ds_repr = node_repr.splitlines()[2:]
for line in ds_repr:
if len(node.children) > 0:
lines.append(f"{fill}{renderer.style.vertical}{line}")
else:
lines.append(f"{fill}{' ' * len(renderer.style.vertical)}{line}")
# Tack on info about whether or not root node has a parent at the start
first_line = lines[0]
parent = f'"{dt.parent.name}"' if dt.parent is not None else "None"
first_line_with_parent = first_line[:-1] + f", parent={parent})"
lines[0] = first_line_with_parent
return "\n".join(lines)
def _single_node_repr(node: "DataTree") -> str:
"""Information about this node, not including its relationships to other nodes."""
node_info = f"DataTree('{node.name}')"
if node.has_data or node.has_attrs:
ds_info = "\n" + repr(node.ds)
else:
ds_info = ""
return node_info + ds_info
datatree-0.0.14/datatree/formatting_html.py 0000664 0000000 0000000 00000006424 14552576503 0020750 0 ustar 00root root 0000000 0000000 from functools import partial
from html import escape
from typing import Any, Mapping
from xarray.core.formatting_html import (
_mapping_section,
_obj_repr,
attr_section,
coord_section,
datavar_section,
dim_section,
)
from xarray.core.options import OPTIONS
OPTIONS["display_expand_groups"] = "default"
def summarize_children(children: Mapping[str, Any]) -> str:
N_CHILDREN = len(children) - 1
# Get result from node_repr and wrap it
lines_callback = lambda n, c, end: _wrap_repr(node_repr(n, c), end=end)
children_html = "".join(
lines_callback(n, c, end=False) # Long lines
if i < N_CHILDREN
else lines_callback(n, c, end=True) # Short lines
for i, (n, c) in enumerate(children.items())
)
return "".join(
[
"
"]
ds = dt.ds
sections = [
children_section(dt.children),
dim_section(ds),
coord_section(ds.coords),
datavar_section(ds.data_vars),
attr_section(ds.attrs),
]
return _obj_repr(ds, header_components, sections)
def _wrap_repr(r: str, end: bool = False) -> str:
"""
Wrap HTML representation with a tee to the left of it.
Enclosing HTML tag is a
with :code:`display: inline-grid` style.
Turns:
[ title ]
| details |
|_____________|
into (A):
|─ [ title ]
| | details |
| |_____________|
or (B):
└─ [ title ]
| details |
|_____________|
Parameters
----------
r: str
HTML representation to wrap.
end: bool
Specify if the line on the left should continue or end.
Default is True.
Returns
-------
str
Wrapped HTML representation.
Tee color is set to the variable :code:`--xr-border-color`.
"""
# height of line
end = bool(end)
height = "100%" if end is False else "1.2em"
return "".join(
[
"
",
"
",
"
",
"
",
"
",
"
",
"
",
r,
"
" "
",
"
",
]
)
def datatree_repr(dt: Any) -> str:
obj_type = f"datatree.{type(dt).__name__}"
return node_repr(obj_type, dt)
datatree-0.0.14/datatree/io.py 0000664 0000000 0000000 00000015662 14552576503 0016165 0 ustar 00root root 0000000 0000000 from xarray import Dataset, open_dataset
from .datatree import DataTree, NodePath
def _iter_zarr_groups(root, parent="/"):
parent = NodePath(parent)
for path, group in root.groups():
gpath = parent / path
yield str(gpath)
yield from _iter_zarr_groups(group, parent=gpath)
def _iter_nc_groups(root, parent="/"):
parent = NodePath(parent)
for path, group in root.groups.items():
gpath = parent / path
yield str(gpath)
yield from _iter_nc_groups(group, parent=gpath)
def _get_nc_dataset_class(engine):
if engine == "netcdf4":
from netCDF4 import Dataset # type: ignore
elif engine == "h5netcdf":
from h5netcdf.legacyapi import Dataset # type: ignore
elif engine is None:
try:
from netCDF4 import Dataset
except ImportError:
from h5netcdf.legacyapi import Dataset # type: ignore
else:
raise ValueError(f"unsupported engine: {engine}")
return Dataset
def open_datatree(filename_or_obj, engine=None, **kwargs) -> DataTree:
"""
Open and decode a dataset from a file or file-like object, creating one Tree node for each group in the file.
Parameters
----------
filename_or_obj : str, Path, file-like, or DataStore
Strings and Path objects are interpreted as a path to a netCDF file or Zarr store.
engine : str, optional
Xarray backend engine to us. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`.
kwargs :
Additional keyword arguments passed to ``xarray.open_dataset`` for each group.
Returns
-------
DataTree
"""
if engine == "zarr":
return _open_datatree_zarr(filename_or_obj, **kwargs)
elif engine in [None, "netcdf4", "h5netcdf"]:
return _open_datatree_netcdf(filename_or_obj, engine=engine, **kwargs)
else:
raise ValueError("Unsupported engine")
def _open_datatree_netcdf(filename: str, **kwargs) -> DataTree:
ncDataset = _get_nc_dataset_class(kwargs.get("engine", None))
ds = open_dataset(filename, **kwargs)
tree_root = DataTree.from_dict({"/": ds})
with ncDataset(filename, mode="r") as ncds:
for path in _iter_nc_groups(ncds):
subgroup_ds = open_dataset(filename, group=path, **kwargs)
# TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again
node_name = NodePath(path).name
new_node: DataTree = DataTree(name=node_name, data=subgroup_ds)
tree_root._set_item(
path,
new_node,
allow_overwrite=False,
new_nodes_along_path=True,
)
return tree_root
def _open_datatree_zarr(store, **kwargs) -> DataTree:
import zarr # type: ignore
zds = zarr.open_group(store, mode="r")
ds = open_dataset(store, engine="zarr", **kwargs)
tree_root = DataTree.from_dict({"/": ds})
for path in _iter_zarr_groups(zds):
try:
subgroup_ds = open_dataset(store, engine="zarr", group=path, **kwargs)
except zarr.errors.PathNotFoundError:
subgroup_ds = Dataset()
# TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again
node_name = NodePath(path).name
new_node: DataTree = DataTree(name=node_name, data=subgroup_ds)
tree_root._set_item(
path,
new_node,
allow_overwrite=False,
new_nodes_along_path=True,
)
return tree_root
def _create_empty_netcdf_group(filename, group, mode, engine):
ncDataset = _get_nc_dataset_class(engine)
with ncDataset(filename, mode=mode) as rootgrp:
rootgrp.createGroup(group)
def _datatree_to_netcdf(
dt: DataTree,
filepath,
mode: str = "w",
encoding=None,
unlimited_dims=None,
**kwargs,
):
if kwargs.get("format", None) not in [None, "NETCDF4"]:
raise ValueError("to_netcdf only supports the NETCDF4 format")
engine = kwargs.get("engine", None)
if engine not in [None, "netcdf4", "h5netcdf"]:
raise ValueError("to_netcdf only supports the netcdf4 and h5netcdf engines")
if kwargs.get("group", None) is not None:
raise NotImplementedError(
"specifying a root group for the tree has not been implemented"
)
if not kwargs.get("compute", True):
raise NotImplementedError("compute=False has not been implemented yet")
if encoding is None:
encoding = {}
# In the future, we may want to expand this check to insure all the provided encoding
# options are valid. For now, this simply checks that all provided encoding keys are
# groups in the datatree.
if set(encoding) - set(dt.groups):
raise ValueError(
f"unexpected encoding group name(s) provided: {set(encoding) - set(dt.groups)}"
)
if unlimited_dims is None:
unlimited_dims = {}
for node in dt.subtree:
ds = node.ds
group_path = node.path
if ds is None:
_create_empty_netcdf_group(filepath, group_path, mode, engine)
else:
ds.to_netcdf(
filepath,
group=group_path,
mode=mode,
encoding=encoding.get(node.path),
unlimited_dims=unlimited_dims.get(node.path),
**kwargs,
)
mode = "r+"
def _create_empty_zarr_group(store, group, mode):
import zarr # type: ignore
root = zarr.open_group(store, mode=mode)
root.create_group(group, overwrite=True)
def _datatree_to_zarr(
dt: DataTree,
store,
mode: str = "w-",
encoding=None,
consolidated: bool = True,
**kwargs,
):
from zarr.convenience import consolidate_metadata # type: ignore
if kwargs.get("group", None) is not None:
raise NotImplementedError(
"specifying a root group for the tree has not been implemented"
)
if not kwargs.get("compute", True):
raise NotImplementedError("compute=False has not been implemented yet")
if encoding is None:
encoding = {}
# In the future, we may want to expand this check to insure all the provided encoding
# options are valid. For now, this simply checks that all provided encoding keys are
# groups in the datatree.
if set(encoding) - set(dt.groups):
raise ValueError(
f"unexpected encoding group name(s) provided: {set(encoding) - set(dt.groups)}"
)
for node in dt.subtree:
ds = node.ds
group_path = node.path
if ds is None:
_create_empty_zarr_group(store, group_path, mode)
else:
ds.to_zarr(
store,
group=group_path,
mode=mode,
encoding=encoding.get(node.path),
consolidated=False,
**kwargs,
)
if "w" in mode:
mode = "a"
if consolidated:
consolidate_metadata(store)
datatree-0.0.14/datatree/iterators.py 0000664 0000000 0000000 00000007053 14552576503 0017565 0 ustar 00root root 0000000 0000000 from abc import abstractmethod
from collections import abc
from typing import Callable, Iterator, List, Optional
from .treenode import Tree
"""These iterators are copied from anytree.iterators, with minor modifications."""
class AbstractIter(abc.Iterator):
def __init__(
self,
node: Tree,
filter_: Optional[Callable] = None,
stop: Optional[Callable] = None,
maxlevel: Optional[int] = None,
):
"""
Iterate over tree starting at `node`.
Base class for all iterators.
Keyword Args:
filter_: function called with every `node` as argument, `node` is returned if `True`.
stop: stop iteration at `node` if `stop` function returns `True` for `node`.
maxlevel (int): maximum descending in the node hierarchy.
"""
self.node = node
self.filter_ = filter_
self.stop = stop
self.maxlevel = maxlevel
self.__iter = None
def __init(self):
node = self.node
maxlevel = self.maxlevel
filter_ = self.filter_ or AbstractIter.__default_filter
stop = self.stop or AbstractIter.__default_stop
children = (
[]
if AbstractIter._abort_at_level(1, maxlevel)
else AbstractIter._get_children([node], stop)
)
return self._iter(children, filter_, stop, maxlevel)
@staticmethod
def __default_filter(node):
return True
@staticmethod
def __default_stop(node):
return False
def __iter__(self) -> Iterator[Tree]:
return self
def __next__(self) -> Iterator[Tree]:
if self.__iter is None:
self.__iter = self.__init()
item = next(self.__iter) # type: ignore[call-overload]
return item
@staticmethod
@abstractmethod
def _iter(children: List[Tree], filter_, stop, maxlevel) -> Iterator[Tree]:
...
@staticmethod
def _abort_at_level(level, maxlevel):
return maxlevel is not None and level > maxlevel
@staticmethod
def _get_children(children: List[Tree], stop) -> List[Tree]:
return [child for child in children if not stop(child)]
class PreOrderIter(AbstractIter):
"""
Iterate over tree applying pre-order strategy starting at `node`.
Start at root and go-down until reaching a leaf node.
Step upwards then, and search for the next leafs.
"""
@staticmethod
def _iter(children, filter_, stop, maxlevel):
for child_ in children:
if stop(child_):
continue
if filter_(child_):
yield child_
if not AbstractIter._abort_at_level(2, maxlevel):
descendantmaxlevel = maxlevel - 1 if maxlevel else None
for descendant_ in PreOrderIter._iter(
list(child_.children.values()), filter_, stop, descendantmaxlevel
):
yield descendant_
class LevelOrderIter(AbstractIter):
"""
Iterate over tree applying level-order strategy starting at `node`.
"""
@staticmethod
def _iter(children, filter_, stop, maxlevel):
level = 1
while children:
next_children = []
for child in children:
if filter_(child):
yield child
next_children += AbstractIter._get_children(
list(child.children.values()), stop
)
children = next_children
level += 1
if AbstractIter._abort_at_level(level, maxlevel):
break
datatree-0.0.14/datatree/mapping.py 0000664 0000000 0000000 00000032434 14552576503 0017205 0 ustar 00root root 0000000 0000000 from __future__ import annotations
import functools
import sys
from itertools import repeat
from textwrap import dedent
from typing import TYPE_CHECKING, Callable, Tuple
from xarray import DataArray, Dataset
from .iterators import LevelOrderIter
from .treenode import NodePath, TreeNode
if TYPE_CHECKING:
from .datatree import DataTree
class TreeIsomorphismError(ValueError):
"""Error raised if two tree objects do not share the same node structure."""
pass
def check_isomorphic(
a: DataTree,
b: DataTree,
require_names_equal: bool = False,
check_from_root: bool = True,
):
"""
Check that two trees have the same structure, raising an error if not.
Does not compare the actual data in the nodes.
By default this function only checks that subtrees are isomorphic, not the entire tree above (if it exists).
Can instead optionally check the entire trees starting from the root, which will ensure all
Can optionally check if corresponding nodes should have the same name.
Parameters
----------
a : DataTree
b : DataTree
require_names_equal : Bool
Whether or not to also check that each node has the same name as its counterpart.
check_from_root : Bool
Whether or not to first traverse to the root of the trees before checking for isomorphism.
If a & b have no parents then this has no effect.
Raises
------
TypeError
If either a or b are not tree objects.
TreeIsomorphismError
If a and b are tree objects, but are not isomorphic to one another.
Also optionally raised if their structure is isomorphic, but the names of any two
respective nodes are not equal.
"""
if not isinstance(a, TreeNode):
raise TypeError(f"Argument `a` is not a tree, it is of type {type(a)}")
if not isinstance(b, TreeNode):
raise TypeError(f"Argument `b` is not a tree, it is of type {type(b)}")
if check_from_root:
a = a.root
b = b.root
diff = diff_treestructure(a, b, require_names_equal=require_names_equal)
if diff:
raise TreeIsomorphismError("DataTree objects are not isomorphic:\n" + diff)
def diff_treestructure(a: DataTree, b: DataTree, require_names_equal: bool) -> str:
"""
Return a summary of why two trees are not isomorphic.
If they are isomorphic return an empty string.
"""
# Walking nodes in "level-order" fashion means walking down from the root breadth-first.
# Checking for isomorphism by walking in this way implicitly assumes that the tree is an ordered tree
# (which it is so long as children are stored in a tuple or list rather than in a set).
for node_a, node_b in zip(LevelOrderIter(a), LevelOrderIter(b)):
path_a, path_b = node_a.path, node_b.path
if require_names_equal:
if node_a.name != node_b.name:
diff = dedent(
f"""\
Node '{path_a}' in the left object has name '{node_a.name}'
Node '{path_b}' in the right object has name '{node_b.name}'"""
)
return diff
if len(node_a.children) != len(node_b.children):
diff = dedent(
f"""\
Number of children on node '{path_a}' of the left object: {len(node_a.children)}
Number of children on node '{path_b}' of the right object: {len(node_b.children)}"""
)
return diff
return ""
def map_over_subtree(func: Callable) -> Callable:
"""
Decorator which turns a function which acts on (and returns) Datasets into one which acts on and returns DataTrees.
Applies a function to every dataset in one or more subtrees, returning new trees which store the results.
The function will be applied to any data-containing dataset stored in any of the nodes in the trees. The returned
trees will have the same structure as the supplied trees.
`func` needs to return one Datasets, DataArrays, or None in order to be able to rebuild the subtrees after
mapping, as each result will be assigned to its respective node of a new tree via `DataTree.__setitem__`. Any
returned value that is one of these types will be stacked into a separate tree before returning all of them.
The trees passed to the resulting function must all be isomorphic to one another. Their nodes need not be named
similarly, but all the output trees will have nodes named in the same way as the first tree passed.
Parameters
----------
func : callable
Function to apply to datasets with signature:
`func(*args, **kwargs) -> Union[Dataset, Iterable[Dataset]]`.
(i.e. func must accept at least one Dataset and return at least one Dataset.)
Function will not be applied to any nodes without datasets.
*args : tuple, optional
Positional arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
via .ds .
**kwargs : Any
Keyword arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
via .ds .
Returns
-------
mapped : callable
Wrapped function which returns one or more tree(s) created from results of applying ``func`` to the dataset at
each node.
See also
--------
DataTree.map_over_subtree
DataTree.map_over_subtree_inplace
DataTree.subtree
"""
# TODO examples in the docstring
# TODO inspect function to work out immediately if the wrong number of arguments were passed for it?
@functools.wraps(func)
def _map_over_subtree(*args, **kwargs) -> DataTree | Tuple[DataTree, ...]:
"""Internal function which maps func over every node in tree, returning a tree of the results."""
from .datatree import DataTree
all_tree_inputs = [a for a in args if isinstance(a, DataTree)] + [
a for a in kwargs.values() if isinstance(a, DataTree)
]
if len(all_tree_inputs) > 0:
first_tree, *other_trees = all_tree_inputs
else:
raise TypeError("Must pass at least one tree object")
for other_tree in other_trees:
# isomorphism is transitive so this is enough to guarantee all trees are mutually isomorphic
check_isomorphic(
first_tree, other_tree, require_names_equal=False, check_from_root=False
)
# Walk all trees simultaneously, applying func to all nodes that lie in same position in different trees
# We don't know which arguments are DataTrees so we zip all arguments together as iterables
# Store tuples of results in a dict because we don't yet know how many trees we need to rebuild to return
out_data_objects = {}
args_as_tree_length_iterables = [
a.subtree if isinstance(a, DataTree) else repeat(a) for a in args
]
n_args = len(args_as_tree_length_iterables)
kwargs_as_tree_length_iterables = {
k: v.subtree if isinstance(v, DataTree) else repeat(v)
for k, v in kwargs.items()
}
for node_of_first_tree, *all_node_args in zip(
first_tree.subtree,
*args_as_tree_length_iterables,
*list(kwargs_as_tree_length_iterables.values()),
):
node_args_as_datasetviews = [
a.ds if isinstance(a, DataTree) else a for a in all_node_args[:n_args]
]
node_kwargs_as_datasetviews = dict(
zip(
[k for k in kwargs_as_tree_length_iterables.keys()],
[
v.ds if isinstance(v, DataTree) else v
for v in all_node_args[n_args:]
],
)
)
func_with_error_context = _handle_errors_with_path_context(
node_of_first_tree.path
)(func)
if node_of_first_tree.has_data:
# call func on the data in this particular set of corresponding nodes
results = func_with_error_context(
*node_args_as_datasetviews, **node_kwargs_as_datasetviews
)
elif node_of_first_tree.has_attrs:
# propagate attrs
results = node_of_first_tree.ds
else:
# nothing to propagate so use fastpath to create empty node in new tree
results = None
# TODO implement mapping over multiple trees in-place using if conditions from here on?
out_data_objects[node_of_first_tree.path] = results
# Find out how many return values we received
num_return_values = _check_all_return_values(out_data_objects)
# Reconstruct 1+ subtrees from the dict of results, by filling in all nodes of all result trees
original_root_path = first_tree.path
result_trees = []
for i in range(num_return_values):
out_tree_contents = {}
for n in first_tree.subtree:
p = n.path
if p in out_data_objects.keys():
if isinstance(out_data_objects[p], tuple):
output_node_data = out_data_objects[p][i]
else:
output_node_data = out_data_objects[p]
else:
output_node_data = None
# Discard parentage so that new trees don't include parents of input nodes
relative_path = str(NodePath(p).relative_to(original_root_path))
relative_path = "/" if relative_path == "." else relative_path
out_tree_contents[relative_path] = output_node_data
new_tree = DataTree.from_dict(
out_tree_contents,
name=first_tree.name,
)
result_trees.append(new_tree)
# If only one result then don't wrap it in a tuple
if len(result_trees) == 1:
return result_trees[0]
else:
return tuple(result_trees)
return _map_over_subtree
def _handle_errors_with_path_context(path):
"""Wraps given function so that if it fails it also raises path to node on which it failed."""
def decorator(func):
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
if sys.version_info >= (3, 11):
# Add the context information to the error message
e.add_note(
f"Raised whilst mapping function over node with path {path}"
)
raise
return wrapper
return decorator
def add_note(err: BaseException, msg: str) -> None:
# TODO: remove once python 3.10 can be dropped
if sys.version_info < (3, 11):
err.__notes__ = getattr(err, "__notes__", []) + [msg] # type: ignore[attr-defined]
else:
err.add_note(msg)
def _check_single_set_return_values(path_to_node, obj):
"""Check types returned from single evaluation of func, and return number of return values received from func."""
if isinstance(obj, (Dataset, DataArray)):
return 1
elif isinstance(obj, tuple):
for r in obj:
if not isinstance(r, (Dataset, DataArray)):
raise TypeError(
f"One of the results of calling func on datasets on the nodes at position {path_to_node} is "
f"of type {type(r)}, not Dataset or DataArray."
)
return len(obj)
else:
raise TypeError(
f"The result of calling func on the node at position {path_to_node} is of type {type(obj)}, not "
f"Dataset or DataArray, nor a tuple of such types."
)
def _check_all_return_values(returned_objects):
"""Walk through all values returned by mapping func over subtrees, raising on any invalid or inconsistent types."""
if all(r is None for r in returned_objects.values()):
raise TypeError(
"Called supplied function on all nodes but found a return value of None for"
"all of them."
)
result_data_objects = [
(path_to_node, r)
for path_to_node, r in returned_objects.items()
if r is not None
]
if len(result_data_objects) == 1:
# Only one node in the tree: no need to check consistency of results between nodes
path_to_node, result = result_data_objects[0]
num_return_values = _check_single_set_return_values(path_to_node, result)
else:
prev_path, _ = result_data_objects[0]
prev_num_return_values, num_return_values = None, None
for path_to_node, obj in result_data_objects[1:]:
num_return_values = _check_single_set_return_values(path_to_node, obj)
if (
num_return_values != prev_num_return_values
and prev_num_return_values is not None
):
raise TypeError(
f"Calling func on the nodes at position {path_to_node} returns {num_return_values} separate return "
f"values, whereas calling func on the nodes at position {prev_path} instead returns "
f"{prev_num_return_values} separate return values."
)
prev_path, prev_num_return_values = path_to_node, num_return_values
return num_return_values
datatree-0.0.14/datatree/ops.py 0000664 0000000 0000000 00000015257 14552576503 0016357 0 ustar 00root root 0000000 0000000 import textwrap
from xarray import Dataset
from .mapping import map_over_subtree
"""
Module which specifies the subset of xarray.Dataset's API which we wish to copy onto DataTree.
Structured to mirror the way xarray defines Dataset's various operations internally, but does not actually import from
xarray's internals directly, only the public-facing xarray.Dataset class.
"""
_MAPPED_DOCSTRING_ADDENDUM = textwrap.fill(
"This method was copied from xarray.Dataset, but has been altered to "
"call the method on the Datasets stored in every node of the subtree. "
"See the `map_over_subtree` function for more details.",
width=117,
)
# TODO equals, broadcast_equals etc.
# TODO do dask-related private methods need to be exposed?
_DATASET_DASK_METHODS_TO_MAP = [
"load",
"compute",
"persist",
"unify_chunks",
"chunk",
"map_blocks",
]
_DATASET_METHODS_TO_MAP = [
"as_numpy",
"set_coords",
"reset_coords",
"info",
"isel",
"sel",
"head",
"tail",
"thin",
"broadcast_like",
"reindex_like",
"reindex",
"interp",
"interp_like",
"rename",
"rename_dims",
"rename_vars",
"swap_dims",
"expand_dims",
"set_index",
"reset_index",
"reorder_levels",
"stack",
"unstack",
"merge",
"drop_vars",
"drop_sel",
"drop_isel",
"drop_dims",
"transpose",
"dropna",
"fillna",
"interpolate_na",
"ffill",
"bfill",
"combine_first",
"reduce",
"map",
"diff",
"shift",
"roll",
"sortby",
"quantile",
"rank",
"differentiate",
"integrate",
"cumulative_integrate",
"filter_by_attrs",
"polyfit",
"pad",
"idxmin",
"idxmax",
"argmin",
"argmax",
"query",
"curvefit",
]
_ALL_DATASET_METHODS_TO_MAP = _DATASET_DASK_METHODS_TO_MAP + _DATASET_METHODS_TO_MAP
_DATA_WITH_COORDS_METHODS_TO_MAP = [
"squeeze",
"clip",
"assign_coords",
"where",
"close",
"isnull",
"notnull",
"isin",
"astype",
]
REDUCE_METHODS = ["all", "any"]
NAN_REDUCE_METHODS = [
"max",
"min",
"mean",
"prod",
"sum",
"std",
"var",
"median",
]
NAN_CUM_METHODS = ["cumsum", "cumprod"]
_TYPED_DATASET_OPS_TO_MAP = [
"__add__",
"__sub__",
"__mul__",
"__pow__",
"__truediv__",
"__floordiv__",
"__mod__",
"__and__",
"__xor__",
"__or__",
"__lt__",
"__le__",
"__gt__",
"__ge__",
"__eq__",
"__ne__",
"__radd__",
"__rsub__",
"__rmul__",
"__rpow__",
"__rtruediv__",
"__rfloordiv__",
"__rmod__",
"__rand__",
"__rxor__",
"__ror__",
"__iadd__",
"__isub__",
"__imul__",
"__ipow__",
"__itruediv__",
"__ifloordiv__",
"__imod__",
"__iand__",
"__ixor__",
"__ior__",
"__neg__",
"__pos__",
"__abs__",
"__invert__",
"round",
"argsort",
"conj",
"conjugate",
]
# TODO NUM_BINARY_OPS apparently aren't defined on DatasetArithmetic, and don't appear to be injected anywhere...
_ARITHMETIC_METHODS_TO_MAP = (
REDUCE_METHODS
+ NAN_REDUCE_METHODS
+ NAN_CUM_METHODS
+ _TYPED_DATASET_OPS_TO_MAP
+ ["__array_ufunc__"]
)
def _wrap_then_attach_to_cls(
target_cls_dict, source_cls, methods_to_set, wrap_func=None
):
"""
Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree)
Result is like having written this in the classes' definition:
```
@wrap_func
def method_name(self, *args, **kwargs):
return self.method(*args, **kwargs)
```
Every method attached here needs to have a return value of Dataset or DataArray in order to construct a new tree.
Parameters
----------
target_cls_dict : MappingProxy
The __dict__ attribute of the class which we want the methods to be added to. (The __dict__ attribute can also
be accessed by calling vars() from within that classes' definition.) This will be updated by this function.
source_cls : class
Class object from which we want to copy methods (and optionally wrap them). Should be the actual class object
(or instance), not just the __dict__.
methods_to_set : Iterable[Tuple[str, callable]]
The method names and definitions supplied as a list of (method_name_string, method) pairs.
This format matches the output of inspect.getmembers().
wrap_func : callable, optional
Function to decorate each method with. Must have the same return type as the method.
"""
for method_name in methods_to_set:
orig_method = getattr(source_cls, method_name)
wrapped_method = (
wrap_func(orig_method) if wrap_func is not None else orig_method
)
target_cls_dict[method_name] = wrapped_method
if wrap_func is map_over_subtree:
# Add a paragraph to the method's docstring explaining how it's been mapped
orig_method_docstring = orig_method.__doc__
# if orig_method_docstring is not None:
# if "\n" in orig_method_docstring:
# new_method_docstring = orig_method_docstring.replace(
# "\n", _MAPPED_DOCSTRING_ADDENDUM, 1
# )
# else:
# new_method_docstring = (
# orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}"
# )
setattr(target_cls_dict[method_name], "__doc__", orig_method_docstring)
class MappedDatasetMethodsMixin:
"""
Mixin to add methods defined specifically on the Dataset class such as .query(), but wrapped to map over all nodes
in the subtree.
"""
_wrap_then_attach_to_cls(
target_cls_dict=vars(),
source_cls=Dataset,
methods_to_set=_ALL_DATASET_METHODS_TO_MAP,
wrap_func=map_over_subtree,
)
class MappedDataWithCoords:
"""
Mixin to add coordinate-aware Dataset methods such as .where(), but wrapped to map over all nodes in the subtree.
"""
# TODO add mapped versions of groupby, weighted, rolling, rolling_exp, coarsen, resample
_wrap_then_attach_to_cls(
target_cls_dict=vars(),
source_cls=Dataset,
methods_to_set=_DATA_WITH_COORDS_METHODS_TO_MAP,
wrap_func=map_over_subtree,
)
class DataTreeArithmeticMixin:
"""
Mixin to add Dataset arithmetic operations such as __add__, reduction methods such as .mean(), and enable numpy
ufuncs such as np.sin(), but wrapped to map over all nodes in the subtree.
"""
_wrap_then_attach_to_cls(
target_cls_dict=vars(),
source_cls=Dataset,
methods_to_set=_ARITHMETIC_METHODS_TO_MAP,
wrap_func=map_over_subtree,
)
datatree-0.0.14/datatree/py.typed 0000664 0000000 0000000 00000000000 14552576503 0016657 0 ustar 00root root 0000000 0000000 datatree-0.0.14/datatree/render.py 0000664 0000000 0000000 00000020676 14552576503 0017036 0 ustar 00root root 0000000 0000000 """
String Tree Rendering. Copied from anytree.
"""
import collections
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from .datatree import DataTree
Row = collections.namedtuple("Row", ("pre", "fill", "node"))
class AbstractStyle(object):
def __init__(self, vertical, cont, end):
"""
Tree Render Style.
Args:
vertical: Sign for vertical line.
cont: Chars for a continued branch.
end: Chars for the last branch.
"""
super(AbstractStyle, self).__init__()
self.vertical = vertical
self.cont = cont
self.end = end
assert (
len(cont) == len(vertical) == len(end)
), f"'{vertical}', '{cont}' and '{end}' need to have equal length"
@property
def empty(self):
"""Empty string as placeholder."""
return " " * len(self.end)
def __repr__(self):
return f"{self.__class__.__name__}()"
class ContStyle(AbstractStyle):
def __init__(self):
"""
Continued style, without gaps.
>>> from anytree import Node, RenderTree
>>> root = Node("root")
>>> s0 = Node("sub0", parent=root)
>>> s0b = Node("sub0B", parent=s0)
>>> s0a = Node("sub0A", parent=s0)
>>> s1 = Node("sub1", parent=root)
>>> print(RenderTree(root, style=ContStyle()))
Node('/root')
├── Node('/root/sub0')
│ ├── Node('/root/sub0/sub0B')
│ └── Node('/root/sub0/sub0A')
└── Node('/root/sub1')
"""
super(ContStyle, self).__init__(
"\u2502 ", "\u251c\u2500\u2500 ", "\u2514\u2500\u2500 "
)
class RenderTree(object):
def __init__(
self, node: "DataTree", style=ContStyle(), childiter=list, maxlevel=None
):
"""
Render tree starting at `node`.
Keyword Args:
style (AbstractStyle): Render Style.
childiter: Child iterator.
maxlevel: Limit rendering to this depth.
:any:`RenderTree` is an iterator, returning a tuple with 3 items:
`pre`
tree prefix.
`fill`
filling for multiline entries.
`node`
:any:`NodeMixin` object.
It is up to the user to assemble these parts to a whole.
>>> from anytree import Node, RenderTree
>>> root = Node("root", lines=["c0fe", "c0de"])
>>> s0 = Node("sub0", parent=root, lines=["ha", "ba"])
>>> s0b = Node("sub0B", parent=s0, lines=["1", "2", "3"])
>>> s0a = Node("sub0A", parent=s0, lines=["a", "b"])
>>> s1 = Node("sub1", parent=root, lines=["Z"])
Simple one line:
>>> for pre, _, node in RenderTree(root):
... print("%s%s" % (pre, node.name))
...
root
├── sub0
│ ├── sub0B
│ └── sub0A
└── sub1
Multiline:
>>> for pre, fill, node in RenderTree(root):
... print("%s%s" % (pre, node.lines[0]))
... for line in node.lines[1:]:
... print("%s%s" % (fill, line))
...
c0fe
c0de
├── ha
│ ba
│ ├── 1
│ │ 2
│ │ 3
│ └── a
│ b
└── Z
`maxlevel` limits the depth of the tree:
>>> print(RenderTree(root, maxlevel=2))
Node('/root', lines=['c0fe', 'c0de'])
├── Node('/root/sub0', lines=['ha', 'ba'])
└── Node('/root/sub1', lines=['Z'])
The `childiter` is responsible for iterating over child nodes at the
same level. An reversed order can be achived by using `reversed`.
>>> for row in RenderTree(root, childiter=reversed):
... print("%s%s" % (row.pre, row.node.name))
...
root
├── sub1
└── sub0
├── sub0A
└── sub0B
Or writing your own sort function:
>>> def mysort(items):
... return sorted(items, key=lambda item: item.name)
...
>>> for row in RenderTree(root, childiter=mysort):
... print("%s%s" % (row.pre, row.node.name))
...
root
├── sub0
│ ├── sub0A
│ └── sub0B
└── sub1
:any:`by_attr` simplifies attribute rendering and supports multiline:
>>> print(RenderTree(root).by_attr())
root
├── sub0
│ ├── sub0B
│ └── sub0A
└── sub1
>>> print(RenderTree(root).by_attr("lines"))
c0fe
c0de
├── ha
│ ba
│ ├── 1
│ │ 2
│ │ 3
│ └── a
│ b
└── Z
And can be a function:
>>> print(RenderTree(root).by_attr(lambda n: " ".join(n.lines)))
c0fe c0de
├── ha ba
│ ├── 1 2 3
│ └── a b
└── Z
"""
if not isinstance(style, AbstractStyle):
style = style()
self.node = node
self.style = style
self.childiter = childiter
self.maxlevel = maxlevel
def __iter__(self):
return self.__next(self.node, tuple())
def __next(self, node, continues, level=0):
yield RenderTree.__item(node, continues, self.style)
children = node.children.values()
level += 1
if children and (self.maxlevel is None or level < self.maxlevel):
children = self.childiter(children)
for child, is_last in _is_last(children):
for grandchild in self.__next(
child, continues + (not is_last,), level=level
):
yield grandchild
@staticmethod
def __item(node, continues, style):
if not continues:
return Row("", "", node)
else:
items = [style.vertical if cont else style.empty for cont in continues]
indent = "".join(items[:-1])
branch = style.cont if continues[-1] else style.end
pre = indent + branch
fill = "".join(items)
return Row(pre, fill, node)
def __str__(self):
lines = ["%s%r" % (pre, node) for pre, _, node in self]
return "\n".join(lines)
def __repr__(self):
classname = self.__class__.__name__
args = [
repr(self.node),
"style=%s" % repr(self.style),
"childiter=%s" % repr(self.childiter),
]
return "%s(%s)" % (classname, ", ".join(args))
def by_attr(self, attrname="name"):
"""
Return rendered tree with node attribute `attrname`.
>>> from anytree import AnyNode, RenderTree
>>> root = AnyNode(id="root")
>>> s0 = AnyNode(id="sub0", parent=root)
>>> s0b = AnyNode(id="sub0B", parent=s0, foo=4, bar=109)
>>> s0a = AnyNode(id="sub0A", parent=s0)
>>> s1 = AnyNode(id="sub1", parent=root)
>>> s1a = AnyNode(id="sub1A", parent=s1)
>>> s1b = AnyNode(id="sub1B", parent=s1, bar=8)
>>> s1c = AnyNode(id="sub1C", parent=s1)
>>> s1ca = AnyNode(id="sub1Ca", parent=s1c)
>>> print(RenderTree(root).by_attr("id"))
root
├── sub0
│ ├── sub0B
│ └── sub0A
└── sub1
├── sub1A
├── sub1B
└── sub1C
└── sub1Ca
"""
def get():
for pre, fill, node in self:
attr = (
attrname(node)
if callable(attrname)
else getattr(node, attrname, "")
)
if isinstance(attr, (list, tuple)):
lines = attr
else:
lines = str(attr).split("\n")
yield "%s%s" % (pre, lines[0])
for line in lines[1:]:
yield "%s%s" % (fill, line)
return "\n".join(get())
def _is_last(iterable):
iter_ = iter(iterable)
try:
nextitem = next(iter_)
except StopIteration:
pass
else:
item = nextitem
while True:
try:
nextitem = next(iter_)
yield item, False
except StopIteration:
yield nextitem, True
break
item = nextitem
datatree-0.0.14/datatree/testing.py 0000664 0000000 0000000 00000007047 14552576503 0017231 0 ustar 00root root 0000000 0000000 from xarray.testing.assertions import ensure_warnings
from .datatree import DataTree
from .formatting import diff_tree_repr
@ensure_warnings
def assert_isomorphic(a: DataTree, b: DataTree, from_root: bool = False):
"""
Two DataTrees are considered isomorphic if every node has the same number of children.
Nothing about the data in each node is checked.
Isomorphism is a necessary condition for two trees to be used in a nodewise binary operation,
such as tree1 + tree2.
By default this function does not check any part of the tree above the given node.
Therefore this function can be used as default to check that two subtrees are isomorphic.
Parameters
----------
a : DataTree
The first object to compare.
b : DataTree
The second object to compare.
from_root : bool, optional, default is False
Whether or not to first traverse to the root of the trees before checking for isomorphism.
If a & b have no parents then this has no effect.
See Also
--------
DataTree.isomorphic
assert_equals
assert_identical
"""
__tracebackhide__ = True
assert isinstance(a, type(b))
if isinstance(a, DataTree):
if from_root:
a = a.root
b = b.root
assert a.isomorphic(b, from_root=from_root), diff_tree_repr(a, b, "isomorphic")
else:
raise TypeError(f"{type(a)} not of type DataTree")
@ensure_warnings
def assert_equal(a: DataTree, b: DataTree, from_root: bool = True):
"""
Two DataTrees are equal if they have isomorphic node structures, with matching node names,
and if they have matching variables and coordinates, all of which are equal.
By default this method will check the whole tree above the given node.
Parameters
----------
a : DataTree
The first object to compare.
b : DataTree
The second object to compare.
from_root : bool, optional, default is True
Whether or not to first traverse to the root of the trees before checking for isomorphism.
If a & b have no parents then this has no effect.
See Also
--------
DataTree.equals
assert_isomorphic
assert_identical
"""
__tracebackhide__ = True
assert isinstance(a, type(b))
if isinstance(a, DataTree):
if from_root:
a = a.root
b = b.root
assert a.equals(b, from_root=from_root), diff_tree_repr(a, b, "equals")
else:
raise TypeError(f"{type(a)} not of type DataTree")
@ensure_warnings
def assert_identical(a: DataTree, b: DataTree, from_root: bool = True):
"""
Like assert_equals, but will also check all dataset attributes and the attributes on
all variables and coordinates.
By default this method will check the whole tree above the given node.
Parameters
----------
a : xarray.DataTree
The first object to compare.
b : xarray.DataTree
The second object to compare.
from_root : bool, optional, default is True
Whether or not to first traverse to the root of the trees before checking for isomorphism.
If a & b have no parents then this has no effect.
See Also
--------
DataTree.identical
assert_isomorphic
assert_equal
"""
__tracebackhide__ = True
assert isinstance(a, type(b))
if isinstance(a, DataTree):
if from_root:
a = a.root
b = b.root
assert a.identical(b, from_root=from_root), diff_tree_repr(a, b, "identical")
else:
raise TypeError(f"{type(a)} not of type DataTree")
datatree-0.0.14/datatree/tests/ 0000775 0000000 0000000 00000000000 14552576503 0016334 5 ustar 00root root 0000000 0000000 datatree-0.0.14/datatree/tests/__init__.py 0000664 0000000 0000000 00000001554 14552576503 0020452 0 ustar 00root root 0000000 0000000 import importlib
import pytest
from packaging import version
def _importorskip(modname, minversion=None):
try:
mod = importlib.import_module(modname)
has = True
if minversion is not None:
if LooseVersion(mod.__version__) < LooseVersion(minversion):
raise ImportError("Minimum version not satisfied")
except ImportError:
has = False
func = pytest.mark.skipif(not has, reason=f"requires {modname}")
return has, func
def LooseVersion(vstring):
# Our development version is something like '0.10.9+aac7bfc'
# This function just ignores the git commit id.
vstring = vstring.split("+")[0]
return version.parse(vstring)
has_zarr, requires_zarr = _importorskip("zarr")
has_h5netcdf, requires_h5netcdf = _importorskip("h5netcdf")
has_netCDF4, requires_netCDF4 = _importorskip("netCDF4")
datatree-0.0.14/datatree/tests/conftest.py 0000664 0000000 0000000 00000003542 14552576503 0020537 0 ustar 00root root 0000000 0000000 import pytest
import xarray as xr
from datatree import DataTree
@pytest.fixture(scope="module")
def create_test_datatree():
"""
Create a test datatree with this structure:
|-- set1
| |--
| | Dimensions: ()
| | Data variables:
| | a int64 0
| | b int64 1
| |-- set1
| |-- set2
|-- set2
| |--
| | Dimensions: (x: 2)
| | Data variables:
| | a (x) int64 2, 3
| | b (x) int64 0.1, 0.2
| |-- set1
|-- set3
|--
| Dimensions: (x: 2, y: 3)
| Data variables:
| a (y) int64 6, 7, 8
| set0 (x) int64 9, 10
The structure has deliberately repeated names of tags, variables, and
dimensions in order to better check for bugs caused by name conflicts.
"""
def _create_test_datatree(modify=lambda ds: ds):
set1_data = modify(xr.Dataset({"a": 0, "b": 1}))
set2_data = modify(xr.Dataset({"a": ("x", [2, 3]), "b": ("x", [0.1, 0.2])}))
root_data = modify(xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}))
# Avoid using __init__ so we can independently test it
root = DataTree(data=root_data)
set1 = DataTree(name="set1", parent=root, data=set1_data)
DataTree(name="set1", parent=set1)
DataTree(name="set2", parent=set1)
set2 = DataTree(name="set2", parent=root, data=set2_data)
DataTree(name="set1", parent=set2)
DataTree(name="set3", parent=root)
return root
return _create_test_datatree
@pytest.fixture(scope="module")
def simple_datatree(create_test_datatree):
"""
Invoke create_test_datatree fixture (callback).
Returns a DataTree.
"""
return create_test_datatree()
datatree-0.0.14/datatree/tests/test_dataset_api.py 0000664 0000000 0000000 00000006051 14552576503 0022225 0 ustar 00root root 0000000 0000000 import numpy as np
import xarray as xr
from datatree import DataTree
from datatree.testing import assert_equal
class TestDSMethodInheritance:
def test_dataset_method(self):
ds = xr.Dataset({"a": ("x", [1, 2, 3])})
dt = DataTree(data=ds)
DataTree(name="results", parent=dt, data=ds)
expected = DataTree(data=ds.isel(x=1))
DataTree(name="results", parent=expected, data=ds.isel(x=1))
result = dt.isel(x=1)
assert_equal(result, expected)
def test_reduce_method(self):
ds = xr.Dataset({"a": ("x", [False, True, False])})
dt = DataTree(data=ds)
DataTree(name="results", parent=dt, data=ds)
expected = DataTree(data=ds.any())
DataTree(name="results", parent=expected, data=ds.any())
result = dt.any()
assert_equal(result, expected)
def test_nan_reduce_method(self):
ds = xr.Dataset({"a": ("x", [1, 2, 3])})
dt = DataTree(data=ds)
DataTree(name="results", parent=dt, data=ds)
expected = DataTree(data=ds.mean())
DataTree(name="results", parent=expected, data=ds.mean())
result = dt.mean()
assert_equal(result, expected)
def test_cum_method(self):
ds = xr.Dataset({"a": ("x", [1, 2, 3])})
dt = DataTree(data=ds)
DataTree(name="results", parent=dt, data=ds)
expected = DataTree(data=ds.cumsum())
DataTree(name="results", parent=expected, data=ds.cumsum())
result = dt.cumsum()
assert_equal(result, expected)
class TestOps:
def test_binary_op_on_int(self):
ds1 = xr.Dataset({"a": [5], "b": [3]})
ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]})
dt = DataTree(data=ds1)
DataTree(name="subnode", data=ds2, parent=dt)
expected = DataTree(data=ds1 * 5)
DataTree(name="subnode", data=ds2 * 5, parent=expected)
result = dt * 5
assert_equal(result, expected)
def test_binary_op_on_dataset(self):
ds1 = xr.Dataset({"a": [5], "b": [3]})
ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]})
dt = DataTree(data=ds1)
DataTree(name="subnode", data=ds2, parent=dt)
other_ds = xr.Dataset({"z": ("z", [0.1, 0.2])})
expected = DataTree(data=ds1 * other_ds)
DataTree(name="subnode", data=ds2 * other_ds, parent=expected)
result = dt * other_ds
assert_equal(result, expected)
def test_binary_op_on_datatree(self):
ds1 = xr.Dataset({"a": [5], "b": [3]})
ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]})
dt = DataTree(data=ds1)
DataTree(name="subnode", data=ds2, parent=dt)
expected = DataTree(data=ds1 * ds1)
DataTree(name="subnode", data=ds2 * ds2, parent=expected)
result = dt * dt
assert_equal(result, expected)
class TestUFuncs:
def test_tree(self, create_test_datatree):
dt = create_test_datatree()
expected = create_test_datatree(modify=lambda ds: np.sin(ds))
result_tree = np.sin(dt)
assert_equal(result_tree, expected)
datatree-0.0.14/datatree/tests/test_datatree.py 0000664 0000000 0000000 00000061340 14552576503 0021542 0 ustar 00root root 0000000 0000000 from copy import copy, deepcopy
import numpy as np
import pytest
import xarray as xr
import xarray.testing as xrt
from xarray.tests import create_test_data, source_ndarray
import datatree.testing as dtt
from datatree import DataTree, NotFoundInTreeError
class TestTreeCreation:
def test_empty(self):
dt = DataTree(name="root")
assert dt.name == "root"
assert dt.parent is None
assert dt.children == {}
xrt.assert_identical(dt.to_dataset(), xr.Dataset())
def test_unnamed(self):
dt = DataTree()
assert dt.name is None
def test_bad_names(self):
with pytest.raises(TypeError):
DataTree(name=5)
with pytest.raises(ValueError):
DataTree(name="folder/data")
class TestFamilyTree:
def test_setparent_unnamed_child_node_fails(self):
john = DataTree(name="john")
with pytest.raises(ValueError, match="unnamed"):
DataTree(parent=john)
def test_create_two_children(self):
root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])})
set1_data = xr.Dataset({"a": 0, "b": 1})
root = DataTree(data=root_data)
set1 = DataTree(name="set1", parent=root, data=set1_data)
DataTree(name="set1", parent=root)
DataTree(name="set2", parent=set1)
def test_create_full_tree(self, simple_datatree):
root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])})
set1_data = xr.Dataset({"a": 0, "b": 1})
set2_data = xr.Dataset({"a": ("x", [2, 3]), "b": ("x", [0.1, 0.2])})
root = DataTree(data=root_data)
set1 = DataTree(name="set1", parent=root, data=set1_data)
DataTree(name="set1", parent=set1)
DataTree(name="set2", parent=set1)
set2 = DataTree(name="set2", parent=root, data=set2_data)
DataTree(name="set1", parent=set2)
DataTree(name="set3", parent=root)
expected = simple_datatree
assert root.identical(expected)
class TestNames:
def test_child_gets_named_on_attach(self):
sue = DataTree()
mary = DataTree(children={"Sue": sue}) # noqa
assert sue.name == "Sue"
class TestPaths:
def test_path_property(self):
sue = DataTree()
mary = DataTree(children={"Sue": sue})
john = DataTree(children={"Mary": mary}) # noqa
assert sue.path == "/Mary/Sue"
assert john.path == "/"
def test_path_roundtrip(self):
sue = DataTree()
mary = DataTree(children={"Sue": sue})
john = DataTree(children={"Mary": mary}) # noqa
assert john[sue.path] is sue
def test_same_tree(self):
mary = DataTree()
kate = DataTree()
john = DataTree(children={"Mary": mary, "Kate": kate}) # noqa
assert mary.same_tree(kate)
def test_relative_paths(self):
sue = DataTree()
mary = DataTree(children={"Sue": sue})
annie = DataTree()
john = DataTree(children={"Mary": mary, "Annie": annie})
result = sue.relative_to(john)
assert result == "Mary/Sue"
assert john.relative_to(sue) == "../.."
assert annie.relative_to(sue) == "../../Annie"
assert sue.relative_to(annie) == "../Mary/Sue"
assert sue.relative_to(sue) == "."
evil_kate = DataTree()
with pytest.raises(
NotFoundInTreeError, match="nodes do not lie within the same tree"
):
sue.relative_to(evil_kate)
class TestStoreDatasets:
def test_create_with_data(self):
dat = xr.Dataset({"a": 0})
john = DataTree(name="john", data=dat)
xrt.assert_identical(john.to_dataset(), dat)
with pytest.raises(TypeError):
DataTree(name="mary", parent=john, data="junk") # noqa
def test_set_data(self):
john = DataTree(name="john")
dat = xr.Dataset({"a": 0})
john.ds = dat
xrt.assert_identical(john.to_dataset(), dat)
with pytest.raises(TypeError):
john.ds = "junk"
def test_has_data(self):
john = DataTree(name="john", data=xr.Dataset({"a": 0}))
assert john.has_data
john = DataTree(name="john", data=None)
assert not john.has_data
def test_is_hollow(self):
john = DataTree(data=xr.Dataset({"a": 0}))
assert john.is_hollow
eve = DataTree(children={"john": john})
assert eve.is_hollow
eve.ds = xr.Dataset({"a": 1})
assert not eve.is_hollow
class TestVariablesChildrenNameCollisions:
def test_parent_already_has_variable_with_childs_name(self):
dt = DataTree(data=xr.Dataset({"a": [0], "b": 1}))
with pytest.raises(KeyError, match="already contains a data variable named a"):
DataTree(name="a", data=None, parent=dt)
def test_assign_when_already_child_with_variables_name(self):
dt = DataTree(data=None)
DataTree(name="a", data=None, parent=dt)
with pytest.raises(KeyError, match="names would collide"):
dt.ds = xr.Dataset({"a": 0})
dt.ds = xr.Dataset()
new_ds = dt.to_dataset().assign(a=xr.DataArray(0))
with pytest.raises(KeyError, match="names would collide"):
dt.ds = new_ds
class TestGet:
...
class TestGetItem:
def test_getitem_node(self):
folder1 = DataTree(name="folder1")
results = DataTree(name="results", parent=folder1)
highres = DataTree(name="highres", parent=results)
assert folder1["results"] is results
assert folder1["results/highres"] is highres
def test_getitem_self(self):
dt = DataTree()
assert dt["."] is dt
def test_getitem_single_data_variable(self):
data = xr.Dataset({"temp": [0, 50]})
results = DataTree(name="results", data=data)
xrt.assert_identical(results["temp"], data["temp"])
def test_getitem_single_data_variable_from_node(self):
data = xr.Dataset({"temp": [0, 50]})
folder1 = DataTree(name="folder1")
results = DataTree(name="results", parent=folder1)
DataTree(name="highres", parent=results, data=data)
xrt.assert_identical(folder1["results/highres/temp"], data["temp"])
def test_getitem_nonexistent_node(self):
folder1 = DataTree(name="folder1")
DataTree(name="results", parent=folder1)
with pytest.raises(KeyError):
folder1["results/highres"]
def test_getitem_nonexistent_variable(self):
data = xr.Dataset({"temp": [0, 50]})
results = DataTree(name="results", data=data)
with pytest.raises(KeyError):
results["pressure"]
@pytest.mark.xfail(reason="Should be deprecated in favour of .subset")
def test_getitem_multiple_data_variables(self):
data = xr.Dataset({"temp": [0, 50], "p": [5, 8, 7]})
results = DataTree(name="results", data=data)
xrt.assert_identical(results[["temp", "p"]], data[["temp", "p"]])
@pytest.mark.xfail(reason="Indexing needs to return whole tree (GH #77)")
def test_getitem_dict_like_selection_access_to_dataset(self):
data = xr.Dataset({"temp": [0, 50]})
results = DataTree(name="results", data=data)
xrt.assert_identical(results[{"temp": 1}], data[{"temp": 1}])
class TestUpdate:
def test_update(self):
dt = DataTree()
dt.update({"foo": xr.DataArray(0), "a": DataTree()})
expected = DataTree.from_dict({"/": xr.Dataset({"foo": 0}), "a": None})
print(dt)
print(dt.children)
print(dt._children)
print(dt["a"])
print(expected)
dtt.assert_equal(dt, expected)
def test_update_new_named_dataarray(self):
da = xr.DataArray(name="temp", data=[0, 50])
folder1 = DataTree(name="folder1")
folder1.update({"results": da})
expected = da.rename("results")
xrt.assert_equal(folder1["results"], expected)
def test_update_doesnt_alter_child_name(self):
dt = DataTree()
dt.update({"foo": xr.DataArray(0), "a": DataTree(name="b")})
assert "a" in dt.children
child = dt["a"]
assert child.name == "a"
def test_update_overwrite(self):
actual = DataTree.from_dict({"a": DataTree(xr.Dataset({"x": 1}))})
actual.update({"a": DataTree(xr.Dataset({"x": 2}))})
expected = DataTree.from_dict({"a": DataTree(xr.Dataset({"x": 2}))})
print(actual)
print(expected)
dtt.assert_equal(actual, expected)
class TestCopy:
def test_copy(self, create_test_datatree):
dt = create_test_datatree()
for node in dt.root.subtree:
node.attrs["Test"] = [1, 2, 3]
for copied in [dt.copy(deep=False), copy(dt)]:
dtt.assert_identical(dt, copied)
for node, copied_node in zip(dt.root.subtree, copied.root.subtree):
assert node.encoding == copied_node.encoding
# Note: IndexVariable objects with string dtype are always
# copied because of xarray.core.util.safe_cast_to_index.
# Limiting the test to data variables.
for k in node.data_vars:
v0 = node.variables[k]
v1 = copied_node.variables[k]
assert source_ndarray(v0.data) is source_ndarray(v1.data)
copied_node["foo"] = xr.DataArray(data=np.arange(5), dims="z")
assert "foo" not in node
copied_node.attrs["foo"] = "bar"
assert "foo" not in node.attrs
assert node.attrs["Test"] is copied_node.attrs["Test"]
def test_copy_subtree(self):
dt = DataTree.from_dict({"/level1/level2/level3": xr.Dataset()})
actual = dt["/level1/level2"].copy()
expected = DataTree.from_dict({"/level3": xr.Dataset()}, name="level2")
dtt.assert_identical(actual, expected)
def test_deepcopy(self, create_test_datatree):
dt = create_test_datatree()
for node in dt.root.subtree:
node.attrs["Test"] = [1, 2, 3]
for copied in [dt.copy(deep=True), deepcopy(dt)]:
dtt.assert_identical(dt, copied)
for node, copied_node in zip(dt.root.subtree, copied.root.subtree):
assert node.encoding == copied_node.encoding
# Note: IndexVariable objects with string dtype are always
# copied because of xarray.core.util.safe_cast_to_index.
# Limiting the test to data variables.
for k in node.data_vars:
v0 = node.variables[k]
v1 = copied_node.variables[k]
assert source_ndarray(v0.data) is not source_ndarray(v1.data)
copied_node["foo"] = xr.DataArray(data=np.arange(5), dims="z")
assert "foo" not in node
copied_node.attrs["foo"] = "bar"
assert "foo" not in node.attrs
assert node.attrs["Test"] is not copied_node.attrs["Test"]
@pytest.mark.xfail(reason="data argument not yet implemented")
def test_copy_with_data(self, create_test_datatree):
orig = create_test_datatree()
# TODO use .data_vars once that property is available
data_vars = {
k: v for k, v in orig.variables.items() if k not in orig._coord_names
}
new_data = {k: np.random.randn(*v.shape) for k, v in data_vars.items()}
actual = orig.copy(data=new_data)
expected = orig.copy()
for k, v in new_data.items():
expected[k].data = v
dtt.assert_identical(expected, actual)
# TODO test parents and children?
class TestSetItem:
def test_setitem_new_child_node(self):
john = DataTree(name="john")
mary = DataTree(name="mary")
john["mary"] = mary
grafted_mary = john["mary"]
assert grafted_mary.parent is john
assert grafted_mary.name == "mary"
def test_setitem_unnamed_child_node_becomes_named(self):
john2 = DataTree(name="john2")
john2["sonny"] = DataTree()
assert john2["sonny"].name == "sonny"
def test_setitem_new_grandchild_node(self):
john = DataTree(name="john")
mary = DataTree(name="mary", parent=john)
rose = DataTree(name="rose")
john["mary/rose"] = rose
grafted_rose = john["mary/rose"]
assert grafted_rose.parent is mary
assert grafted_rose.name == "rose"
def test_grafted_subtree_retains_name(self):
subtree = DataTree(name="original_subtree_name")
root = DataTree(name="root")
root["new_subtree_name"] = subtree # noqa
assert subtree.name == "original_subtree_name"
def test_setitem_new_empty_node(self):
john = DataTree(name="john")
john["mary"] = DataTree()
mary = john["mary"]
assert isinstance(mary, DataTree)
xrt.assert_identical(mary.to_dataset(), xr.Dataset())
def test_setitem_overwrite_data_in_node_with_none(self):
john = DataTree(name="john")
mary = DataTree(name="mary", parent=john, data=xr.Dataset())
john["mary"] = DataTree()
xrt.assert_identical(mary.to_dataset(), xr.Dataset())
john.ds = xr.Dataset()
with pytest.raises(ValueError, match="has no name"):
john["."] = DataTree()
@pytest.mark.xfail(reason="assigning Datasets doesn't yet create new nodes")
def test_setitem_dataset_on_this_node(self):
data = xr.Dataset({"temp": [0, 50]})
results = DataTree(name="results")
results["."] = data
xrt.assert_identical(results.to_dataset(), data)
@pytest.mark.xfail(reason="assigning Datasets doesn't yet create new nodes")
def test_setitem_dataset_as_new_node(self):
data = xr.Dataset({"temp": [0, 50]})
folder1 = DataTree(name="folder1")
folder1["results"] = data
xrt.assert_identical(folder1["results"].to_dataset(), data)
@pytest.mark.xfail(reason="assigning Datasets doesn't yet create new nodes")
def test_setitem_dataset_as_new_node_requiring_intermediate_nodes(self):
data = xr.Dataset({"temp": [0, 50]})
folder1 = DataTree(name="folder1")
folder1["results/highres"] = data
xrt.assert_identical(folder1["results/highres"].to_dataset(), data)
def test_setitem_named_dataarray(self):
da = xr.DataArray(name="temp", data=[0, 50])
folder1 = DataTree(name="folder1")
folder1["results"] = da
expected = da.rename("results")
xrt.assert_equal(folder1["results"], expected)
def test_setitem_unnamed_dataarray(self):
data = xr.DataArray([0, 50])
folder1 = DataTree(name="folder1")
folder1["results"] = data
xrt.assert_equal(folder1["results"], data)
def test_setitem_variable(self):
var = xr.Variable(data=[0, 50], dims="x")
folder1 = DataTree(name="folder1")
folder1["results"] = var
xrt.assert_equal(folder1["results"], xr.DataArray(var))
def test_setitem_coerce_to_dataarray(self):
folder1 = DataTree(name="folder1")
folder1["results"] = 0
xrt.assert_equal(folder1["results"], xr.DataArray(0))
def test_setitem_add_new_variable_to_empty_node(self):
results = DataTree(name="results")
results["pressure"] = xr.DataArray(data=[2, 3])
assert "pressure" in results.ds
results["temp"] = xr.Variable(data=[10, 11], dims=["x"])
assert "temp" in results.ds
# What if there is a path to traverse first?
results = DataTree(name="results")
results["highres/pressure"] = xr.DataArray(data=[2, 3])
assert "pressure" in results["highres"].ds
results["highres/temp"] = xr.Variable(data=[10, 11], dims=["x"])
assert "temp" in results["highres"].ds
def test_setitem_dataarray_replace_existing_node(self):
t = xr.Dataset({"temp": [0, 50]})
results = DataTree(name="results", data=t)
p = xr.DataArray(data=[2, 3])
results["pressure"] = p
expected = t.assign(pressure=p)
xrt.assert_identical(results.to_dataset(), expected)
class TestDictionaryInterface:
...
class TestTreeFromDict:
def test_data_in_root(self):
dat = xr.Dataset()
dt = DataTree.from_dict({"/": dat})
assert dt.name is None
assert dt.parent is None
assert dt.children == {}
xrt.assert_identical(dt.to_dataset(), dat)
def test_one_layer(self):
dat1, dat2 = xr.Dataset({"a": 1}), xr.Dataset({"b": 2})
dt = DataTree.from_dict({"run1": dat1, "run2": dat2})
xrt.assert_identical(dt.to_dataset(), xr.Dataset())
assert dt.name is None
xrt.assert_identical(dt["run1"].to_dataset(), dat1)
assert dt["run1"].children == {}
xrt.assert_identical(dt["run2"].to_dataset(), dat2)
assert dt["run2"].children == {}
def test_two_layers(self):
dat1, dat2 = xr.Dataset({"a": 1}), xr.Dataset({"a": [1, 2]})
dt = DataTree.from_dict({"highres/run": dat1, "lowres/run": dat2})
assert "highres" in dt.children
assert "lowres" in dt.children
highres_run = dt["highres/run"]
xrt.assert_identical(highres_run.to_dataset(), dat1)
def test_nones(self):
dt = DataTree.from_dict({"d": None, "d/e": None})
assert [node.name for node in dt.subtree] == [None, "d", "e"]
assert [node.path for node in dt.subtree] == ["/", "/d", "/d/e"]
xrt.assert_identical(dt["d/e"].to_dataset(), xr.Dataset())
def test_full(self, simple_datatree):
dt = simple_datatree
paths = list(node.path for node in dt.subtree)
assert paths == [
"/",
"/set1",
"/set1/set1",
"/set1/set2",
"/set2",
"/set2/set1",
"/set3",
]
def test_datatree_values(self):
dat1 = DataTree(data=xr.Dataset({"a": 1}))
expected = DataTree()
expected["a"] = dat1
actual = DataTree.from_dict({"a": dat1})
dtt.assert_identical(actual, expected)
def test_roundtrip(self, simple_datatree):
dt = simple_datatree
roundtrip = DataTree.from_dict(dt.to_dict())
assert roundtrip.equals(dt)
@pytest.mark.xfail
def test_roundtrip_unnamed_root(self, simple_datatree):
# See GH81
dt = simple_datatree
dt.name = "root"
roundtrip = DataTree.from_dict(dt.to_dict())
assert roundtrip.equals(dt)
class TestDatasetView:
def test_view_contents(self):
ds = create_test_data()
dt = DataTree(data=ds)
assert ds.identical(
dt.ds
) # this only works because Dataset.identical doesn't check types
assert isinstance(dt.ds, xr.Dataset)
def test_immutability(self):
# See issue #38
dt = DataTree(name="root", data=None)
DataTree(name="a", data=None, parent=dt)
with pytest.raises(
AttributeError, match="Mutation of the DatasetView is not allowed"
):
dt.ds["a"] = xr.DataArray(0)
with pytest.raises(
AttributeError, match="Mutation of the DatasetView is not allowed"
):
dt.ds.update({"a": 0})
# TODO are there any other ways you can normally modify state (in-place)?
# (not attribute-like assignment because that doesn't work on Dataset anyway)
def test_methods(self):
ds = create_test_data()
dt = DataTree(data=ds)
assert ds.mean().identical(dt.ds.mean())
assert type(dt.ds.mean()) == xr.Dataset
def test_arithmetic(self, create_test_datatree):
dt = create_test_datatree()
expected = create_test_datatree(modify=lambda ds: 10.0 * ds)["set1"]
result = 10.0 * dt["set1"].ds
assert result.identical(expected)
def test_init_via_type(self):
# from datatree GH issue #188
# xarray's .weighted is unusual because it uses type() to create a Dataset/DataArray
a = xr.DataArray(
np.random.rand(3, 4, 10),
dims=["x", "y", "time"],
coords={"area": (["x", "y"], np.random.rand(3, 4))},
).to_dataset(name="data")
dt = DataTree(data=a)
def weighted_mean(ds):
return ds.weighted(ds.area).mean(["x", "y"])
weighted_mean(dt.ds)
class TestAccess:
def test_attribute_access(self, create_test_datatree):
dt = create_test_datatree()
# vars / coords
for key in ["a", "set0"]:
xrt.assert_equal(dt[key], getattr(dt, key))
assert key in dir(dt)
# dims
xrt.assert_equal(dt["a"]["y"], getattr(dt.a, "y"))
assert "y" in dir(dt["a"])
# children
for key in ["set1", "set2", "set3"]:
dtt.assert_equal(dt[key], getattr(dt, key))
assert key in dir(dt)
# attrs
dt.attrs["meta"] = "NASA"
assert dt.attrs["meta"] == "NASA"
assert "meta" in dir(dt)
def test_ipython_key_completions(self, create_test_datatree):
dt = create_test_datatree()
key_completions = dt._ipython_key_completions_()
node_keys = [node.path[1:] for node in dt.subtree]
assert all(node_key in key_completions for node_key in node_keys)
var_keys = list(dt.variables.keys())
assert all(var_key in key_completions for var_key in var_keys)
def test_operation_with_attrs_but_no_data(self):
# tests bug from xarray-datatree GH262
xs = xr.Dataset({"testvar": xr.DataArray(np.ones((2, 3)))})
dt = DataTree.from_dict({"node1": xs, "node2": xs})
dt.attrs["test_key"] = 1 # sel works fine without this line
dt.sel(dim_0=0)
class TestRestructuring:
def test_drop_nodes(self):
sue = DataTree.from_dict({"Mary": None, "Kate": None, "Ashley": None})
# test drop just one node
dropped_one = sue.drop_nodes(names="Mary")
assert "Mary" not in dropped_one.children
# test drop multiple nodes
dropped = sue.drop_nodes(names=["Mary", "Kate"])
assert not set(["Mary", "Kate"]).intersection(set(dropped.children))
assert "Ashley" in dropped.children
# test raise
with pytest.raises(KeyError, match="nodes {'Mary'} not present"):
dropped.drop_nodes(names=["Mary", "Ashley"])
# test ignore
childless = dropped.drop_nodes(names=["Mary", "Ashley"], errors="ignore")
assert childless.children == {}
def test_assign(self):
dt = DataTree()
expected = DataTree.from_dict({"/": xr.Dataset({"foo": 0}), "/a": None})
# kwargs form
result = dt.assign(foo=xr.DataArray(0), a=DataTree())
dtt.assert_equal(result, expected)
# dict form
result = dt.assign({"foo": xr.DataArray(0), "a": DataTree()})
dtt.assert_equal(result, expected)
class TestPipe:
def test_noop(self, create_test_datatree):
dt = create_test_datatree()
actual = dt.pipe(lambda tree: tree)
assert actual.identical(dt)
def test_params(self, create_test_datatree):
dt = create_test_datatree()
def f(tree, **attrs):
return tree.assign(arr_with_attrs=xr.Variable("dim0", [], attrs=attrs))
attrs = {"x": 1, "y": 2, "z": 3}
actual = dt.pipe(f, **attrs)
assert actual["arr_with_attrs"].attrs == attrs
def test_named_self(self, create_test_datatree):
dt = create_test_datatree()
def f(x, tree, y):
tree.attrs.update({"x": x, "y": y})
return tree
attrs = {"x": 1, "y": 2}
actual = dt.pipe((f, "tree"), **attrs)
assert actual is dt and actual.attrs == attrs
class TestSubset:
def test_match(self):
# TODO is this example going to cause problems with case sensitivity?
dt = DataTree.from_dict(
{
"/a/A": None,
"/a/B": None,
"/b/A": None,
"/b/B": None,
}
)
result = dt.match("*/B")
expected = DataTree.from_dict(
{
"/a/B": None,
"/b/B": None,
}
)
dtt.assert_identical(result, expected)
def test_filter(self):
simpsons = DataTree.from_dict(
d={
"/": xr.Dataset({"age": 83}),
"/Herbert": xr.Dataset({"age": 40}),
"/Homer": xr.Dataset({"age": 39}),
"/Homer/Bart": xr.Dataset({"age": 10}),
"/Homer/Lisa": xr.Dataset({"age": 8}),
"/Homer/Maggie": xr.Dataset({"age": 1}),
},
name="Abe",
)
expected = DataTree.from_dict(
d={
"/": xr.Dataset({"age": 83}),
"/Herbert": xr.Dataset({"age": 40}),
"/Homer": xr.Dataset({"age": 39}),
},
name="Abe",
)
elders = simpsons.filter(lambda node: node["age"] > 18)
dtt.assert_identical(elders, expected)
datatree-0.0.14/datatree/tests/test_extensions.py 0000664 0000000 0000000 00000002300 14552576503 0022137 0 ustar 00root root 0000000 0000000 import pytest
from datatree import DataTree, register_datatree_accessor
class TestAccessor:
def test_register(self) -> None:
@register_datatree_accessor("demo")
class DemoAccessor:
"""Demo accessor."""
def __init__(self, xarray_obj):
self._obj = xarray_obj
@property
def foo(self):
return "bar"
dt: DataTree = DataTree()
assert dt.demo.foo == "bar" # type: ignore
# accessor is cached
assert dt.demo is dt.demo # type: ignore
# check descriptor
assert dt.demo.__doc__ == "Demo accessor." # type: ignore
# TODO: typing doesn't seem to work with accessors
assert DataTree.demo.__doc__ == "Demo accessor." # type: ignore
assert isinstance(dt.demo, DemoAccessor) # type: ignore
assert DataTree.demo is DemoAccessor # type: ignore
with pytest.warns(Warning, match="overriding a preexisting attribute"):
@register_datatree_accessor("demo")
class Foo:
pass
# ensure we can remove it
del DataTree.demo # type: ignore
assert not hasattr(DataTree, "demo")
datatree-0.0.14/datatree/tests/test_formatting.py 0000664 0000000 0000000 00000007236 14552576503 0022127 0 ustar 00root root 0000000 0000000 from textwrap import dedent
from xarray import Dataset
from datatree import DataTree
from datatree.formatting import diff_tree_repr
class TestRepr:
def test_print_empty_node(self):
dt = DataTree(name="root")
printout = dt.__str__()
assert printout == "DataTree('root', parent=None)"
def test_print_empty_node_with_attrs(self):
dat = Dataset(attrs={"note": "has attrs"})
dt = DataTree(name="root", data=dat)
printout = dt.__str__()
assert printout == dedent(
"""\
DataTree('root', parent=None)
Dimensions: ()
Data variables:
*empty*
Attributes:
note: has attrs"""
)
def test_print_node_with_data(self):
dat = Dataset({"a": [0, 2]})
dt = DataTree(name="root", data=dat)
printout = dt.__str__()
expected = [
"DataTree('root', parent=None)",
"Dimensions",
"Coordinates",
"a",
"Data variables",
"*empty*",
]
for expected_line, printed_line in zip(expected, printout.splitlines()):
assert expected_line in printed_line
def test_nested_node(self):
dat = Dataset({"a": [0, 2]})
root = DataTree(name="root")
DataTree(name="results", data=dat, parent=root)
printout = root.__str__()
assert printout.splitlines()[2].startswith(" ")
def test_print_datatree(self, simple_datatree):
dt = simple_datatree
print(dt)
# TODO work out how to test something complex like this
def test_repr_of_node_with_data(self):
dat = Dataset({"a": [0, 2]})
dt = DataTree(name="root", data=dat)
assert "Coordinates" in repr(dt)
class TestDiffFormatting:
def test_diff_structure(self):
dt_1 = DataTree.from_dict({"a": None, "a/b": None, "a/c": None})
dt_2 = DataTree.from_dict({"d": None, "d/e": None})
expected = dedent(
"""\
Left and right DataTree objects are not isomorphic
Number of children on node '/a' of the left object: 2
Number of children on node '/d' of the right object: 1"""
)
actual = diff_tree_repr(dt_1, dt_2, "isomorphic")
assert actual == expected
def test_diff_node_names(self):
dt_1 = DataTree.from_dict({"a": None})
dt_2 = DataTree.from_dict({"b": None})
expected = dedent(
"""\
Left and right DataTree objects are not identical
Node '/a' in the left object has name 'a'
Node '/b' in the right object has name 'b'"""
)
actual = diff_tree_repr(dt_1, dt_2, "identical")
assert actual == expected
def test_diff_node_data(self):
import numpy as np
# casting to int64 explicitly ensures that int64s are created on all architectures
ds1 = Dataset({"u": np.int64(0), "v": np.int64(1)})
ds3 = Dataset({"w": np.int64(5)})
dt_1 = DataTree.from_dict({"a": ds1, "a/b": ds3})
ds2 = Dataset({"u": np.int64(0)})
ds4 = Dataset({"w": np.int64(6)})
dt_2 = DataTree.from_dict({"a": ds2, "a/b": ds4})
expected = dedent(
"""\
Left and right DataTree objects are not equal
Data in nodes at position '/a' do not match:
Data variables only on the left object:
v int64 1
Data in nodes at position '/a/b' do not match:
Differing data variables:
L w int64 5
R w int64 6"""
)
actual = diff_tree_repr(dt_1, dt_2, "equals")
assert actual == expected
datatree-0.0.14/datatree/tests/test_formatting_html.py 0000664 0000000 0000000 00000012705 14552576503 0023150 0 ustar 00root root 0000000 0000000 import pytest
import xarray as xr
from datatree import DataTree, formatting_html
@pytest.fixture(scope="module", params=["some html", "some other html"])
def repr(request):
return request.param
class Test_summarize_children:
"""
Unit tests for summarize_children.
"""
func = staticmethod(formatting_html.summarize_children)
@pytest.fixture(scope="class")
def childfree_tree_factory(self):
"""
Fixture for a child-free DataTree factory.
"""
from random import randint
def _childfree_tree_factory():
return DataTree(
data=xr.Dataset({"z": ("y", [randint(1, 100) for _ in range(3)])})
)
return _childfree_tree_factory
@pytest.fixture(scope="class")
def childfree_tree(self, childfree_tree_factory):
"""
Fixture for a child-free DataTree.
"""
return childfree_tree_factory()
@pytest.fixture(scope="function")
def mock_node_repr(self, monkeypatch):
"""
Apply mocking for node_repr.
"""
def mock(group_title, dt):
"""
Mock with a simple result
"""
return group_title + " " + str(id(dt))
monkeypatch.setattr(formatting_html, "node_repr", mock)
@pytest.fixture(scope="function")
def mock_wrap_repr(self, monkeypatch):
"""
Apply mocking for _wrap_repr.
"""
def mock(r, *, end, **kwargs):
"""
Mock by appending "end" or "not end".
"""
return r + " " + ("end" if end else "not end") + "//"
monkeypatch.setattr(formatting_html, "_wrap_repr", mock)
def test_empty_mapping(self):
"""
Test with an empty mapping of children.
"""
children = {}
assert self.func(children) == (
"
" "
"
)
def test_one_child(self, childfree_tree, mock_wrap_repr, mock_node_repr):
"""
Test with one child.
Uses a mock of _wrap_repr and node_repr to essentially mock
the inline lambda function "lines_callback".
"""
# Create mapping of children
children = {"a": childfree_tree}
# Expect first line to be produced from the first child, and
# wrapped as the last child
first_line = f"a {id(children['a'])} end//"
assert self.func(children) == (
"
"
f"{first_line}"
"
"
)
def test_two_children(self, childfree_tree_factory, mock_wrap_repr, mock_node_repr):
"""
Test with two level deep children.
Uses a mock of _wrap_repr and node_repr to essentially mock
the inline lambda function "lines_callback".
"""
# Create mapping of children
children = {"a": childfree_tree_factory(), "b": childfree_tree_factory()}
# Expect first line to be produced from the first child, and
# wrapped as _not_ the last child
first_line = f"a {id(children['a'])} not end//"
# Expect second line to be produced from the second child, and
# wrapped as the last child
second_line = f"b {id(children['b'])} end//"
assert self.func(children) == (
"
"
f"{first_line}"
f"{second_line}"
"
"
)
class Test__wrap_repr:
"""
Unit tests for _wrap_repr.
"""
func = staticmethod(formatting_html._wrap_repr)
def test_end(self, repr):
"""
Test with end=True.
"""
r = self.func(repr, end=True)
assert r == (
"
"
"
"
"
"
"
"
"
"
"
"
"
"
f"{repr}"
"
"
"
"
"
"
)
def test_not_end(self, repr):
"""
Test with end=False.
"""
r = self.func(repr, end=False)
assert r == (
"
"
"
"
"
"
"
"
"
"
"
"
"
"
f"{repr}"
"
"
"
"
"
"
)
datatree-0.0.14/datatree/tests/test_io.py 0000664 0000000 0000000 00000010643 14552576503 0020360 0 ustar 00root root 0000000 0000000 import pytest
import zarr.errors
from datatree.io import open_datatree
from datatree.testing import assert_equal
from datatree.tests import requires_h5netcdf, requires_netCDF4, requires_zarr
class TestIO:
@requires_netCDF4
def test_to_netcdf(self, tmpdir, simple_datatree):
filepath = str(
tmpdir / "test.nc"
) # casting to str avoids a pathlib bug in xarray
original_dt = simple_datatree
original_dt.to_netcdf(filepath, engine="netcdf4")
roundtrip_dt = open_datatree(filepath)
assert_equal(original_dt, roundtrip_dt)
@requires_netCDF4
def test_netcdf_encoding(self, tmpdir, simple_datatree):
filepath = str(
tmpdir / "test.nc"
) # casting to str avoids a pathlib bug in xarray
original_dt = simple_datatree
# add compression
comp = dict(zlib=True, complevel=9)
enc = {"/set2": {var: comp for var in original_dt["/set2"].ds.data_vars}}
original_dt.to_netcdf(filepath, encoding=enc, engine="netcdf4")
roundtrip_dt = open_datatree(filepath)
assert roundtrip_dt["/set2/a"].encoding["zlib"] == comp["zlib"]
assert roundtrip_dt["/set2/a"].encoding["complevel"] == comp["complevel"]
enc["/not/a/group"] = {"foo": "bar"}
with pytest.raises(ValueError, match="unexpected encoding group.*"):
original_dt.to_netcdf(filepath, encoding=enc, engine="netcdf4")
@requires_h5netcdf
def test_to_h5netcdf(self, tmpdir, simple_datatree):
filepath = str(
tmpdir / "test.nc"
) # casting to str avoids a pathlib bug in xarray
original_dt = simple_datatree
original_dt.to_netcdf(filepath, engine="h5netcdf")
roundtrip_dt = open_datatree(filepath)
assert_equal(original_dt, roundtrip_dt)
@requires_zarr
def test_to_zarr(self, tmpdir, simple_datatree):
filepath = str(
tmpdir / "test.zarr"
) # casting to str avoids a pathlib bug in xarray
original_dt = simple_datatree
original_dt.to_zarr(filepath)
roundtrip_dt = open_datatree(filepath, engine="zarr")
assert_equal(original_dt, roundtrip_dt)
@requires_zarr
def test_zarr_encoding(self, tmpdir, simple_datatree):
import zarr
filepath = str(
tmpdir / "test.zarr"
) # casting to str avoids a pathlib bug in xarray
original_dt = simple_datatree
comp = {"compressor": zarr.Blosc(cname="zstd", clevel=3, shuffle=2)}
enc = {"/set2": {var: comp for var in original_dt["/set2"].ds.data_vars}}
original_dt.to_zarr(filepath, encoding=enc)
roundtrip_dt = open_datatree(filepath, engine="zarr")
print(roundtrip_dt["/set2/a"].encoding)
assert roundtrip_dt["/set2/a"].encoding["compressor"] == comp["compressor"]
enc["/not/a/group"] = {"foo": "bar"}
with pytest.raises(ValueError, match="unexpected encoding group.*"):
original_dt.to_zarr(filepath, encoding=enc, engine="zarr")
@requires_zarr
def test_to_zarr_zip_store(self, tmpdir, simple_datatree):
from zarr.storage import ZipStore
filepath = str(
tmpdir / "test.zarr.zip"
) # casting to str avoids a pathlib bug in xarray
original_dt = simple_datatree
store = ZipStore(filepath)
original_dt.to_zarr(store)
roundtrip_dt = open_datatree(store, engine="zarr")
assert_equal(original_dt, roundtrip_dt)
@requires_zarr
def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree):
filepath = tmpdir / "test.zarr"
zmetadata = filepath / ".zmetadata"
s1zmetadata = filepath / "set1" / ".zmetadata"
filepath = str(filepath) # casting to str avoids a pathlib bug in xarray
original_dt = simple_datatree
original_dt.to_zarr(filepath, consolidated=False)
assert not zmetadata.exists()
assert not s1zmetadata.exists()
with pytest.warns(RuntimeWarning, match="consolidated"):
roundtrip_dt = open_datatree(filepath, engine="zarr")
assert_equal(original_dt, roundtrip_dt)
@requires_zarr
def test_to_zarr_default_write_mode(self, tmpdir, simple_datatree):
simple_datatree.to_zarr(tmpdir)
# with default settings, to_zarr should not overwrite an existing dir
with pytest.raises(zarr.errors.ContainsGroupError):
simple_datatree.to_zarr(tmpdir)
datatree-0.0.14/datatree/tests/test_mapping.py 0000664 0000000 0000000 00000027212 14552576503 0021404 0 ustar 00root root 0000000 0000000 import numpy as np
import pytest
import xarray as xr
from datatree.datatree import DataTree
from datatree.mapping import TreeIsomorphismError, check_isomorphic, map_over_subtree
from datatree.testing import assert_equal
empty = xr.Dataset()
class TestCheckTreesIsomorphic:
def test_not_a_tree(self):
with pytest.raises(TypeError, match="not a tree"):
check_isomorphic("s", 1)
def test_different_widths(self):
dt1 = DataTree.from_dict(d={"a": empty})
dt2 = DataTree.from_dict(d={"b": empty, "c": empty})
expected_err_str = (
"Number of children on node '/' of the left object: 1\n"
"Number of children on node '/' of the right object: 2"
)
with pytest.raises(TreeIsomorphismError, match=expected_err_str):
check_isomorphic(dt1, dt2)
def test_different_heights(self):
dt1 = DataTree.from_dict({"a": empty})
dt2 = DataTree.from_dict({"b": empty, "b/c": empty})
expected_err_str = (
"Number of children on node '/a' of the left object: 0\n"
"Number of children on node '/b' of the right object: 1"
)
with pytest.raises(TreeIsomorphismError, match=expected_err_str):
check_isomorphic(dt1, dt2)
def test_names_different(self):
dt1 = DataTree.from_dict({"a": xr.Dataset()})
dt2 = DataTree.from_dict({"b": empty})
expected_err_str = (
"Node '/a' in the left object has name 'a'\n"
"Node '/b' in the right object has name 'b'"
)
with pytest.raises(TreeIsomorphismError, match=expected_err_str):
check_isomorphic(dt1, dt2, require_names_equal=True)
def test_isomorphic_names_equal(self):
dt1 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty})
dt2 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty})
check_isomorphic(dt1, dt2, require_names_equal=True)
def test_isomorphic_ordering(self):
dt1 = DataTree.from_dict({"a": empty, "b": empty, "b/d": empty, "b/c": empty})
dt2 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty})
check_isomorphic(dt1, dt2, require_names_equal=False)
def test_isomorphic_names_not_equal(self):
dt1 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty})
dt2 = DataTree.from_dict({"A": empty, "B": empty, "B/C": empty, "B/D": empty})
check_isomorphic(dt1, dt2)
def test_not_isomorphic_complex_tree(self, create_test_datatree):
dt1 = create_test_datatree()
dt2 = create_test_datatree()
dt2["set1/set2/extra"] = DataTree(name="extra")
with pytest.raises(TreeIsomorphismError, match="/set1/set2"):
check_isomorphic(dt1, dt2)
def test_checking_from_root(self, create_test_datatree):
dt1 = create_test_datatree()
dt2 = create_test_datatree()
real_root = DataTree(name="real root")
dt2.name = "not_real_root"
dt2.parent = real_root
with pytest.raises(TreeIsomorphismError):
check_isomorphic(dt1, dt2, check_from_root=True)
class TestMapOverSubTree:
def test_no_trees_passed(self):
@map_over_subtree
def times_ten(ds):
return 10.0 * ds
with pytest.raises(TypeError, match="Must pass at least one tree"):
times_ten("dt")
def test_not_isomorphic(self, create_test_datatree):
dt1 = create_test_datatree()
dt2 = create_test_datatree()
dt2["set1/set2/extra"] = DataTree(name="extra")
@map_over_subtree
def times_ten(ds1, ds2):
return ds1 * ds2
with pytest.raises(TreeIsomorphismError):
times_ten(dt1, dt2)
def test_no_trees_returned(self, create_test_datatree):
dt1 = create_test_datatree()
dt2 = create_test_datatree()
@map_over_subtree
def bad_func(ds1, ds2):
return None
with pytest.raises(TypeError, match="return value of None"):
bad_func(dt1, dt2)
def test_single_dt_arg(self, create_test_datatree):
dt = create_test_datatree()
@map_over_subtree
def times_ten(ds):
return 10.0 * ds
expected = create_test_datatree(modify=lambda ds: 10.0 * ds)
result_tree = times_ten(dt)
assert_equal(result_tree, expected)
def test_single_dt_arg_plus_args_and_kwargs(self, create_test_datatree):
dt = create_test_datatree()
@map_over_subtree
def multiply_then_add(ds, times, add=0.0):
return (times * ds) + add
expected = create_test_datatree(modify=lambda ds: (10.0 * ds) + 2.0)
result_tree = multiply_then_add(dt, 10.0, add=2.0)
assert_equal(result_tree, expected)
def test_multiple_dt_args(self, create_test_datatree):
dt1 = create_test_datatree()
dt2 = create_test_datatree()
@map_over_subtree
def add(ds1, ds2):
return ds1 + ds2
expected = create_test_datatree(modify=lambda ds: 2.0 * ds)
result = add(dt1, dt2)
assert_equal(result, expected)
def test_dt_as_kwarg(self, create_test_datatree):
dt1 = create_test_datatree()
dt2 = create_test_datatree()
@map_over_subtree
def add(ds1, value=0.0):
return ds1 + value
expected = create_test_datatree(modify=lambda ds: 2.0 * ds)
result = add(dt1, value=dt2)
assert_equal(result, expected)
def test_return_multiple_dts(self, create_test_datatree):
dt = create_test_datatree()
@map_over_subtree
def minmax(ds):
return ds.min(), ds.max()
dt_min, dt_max = minmax(dt)
expected_min = create_test_datatree(modify=lambda ds: ds.min())
assert_equal(dt_min, expected_min)
expected_max = create_test_datatree(modify=lambda ds: ds.max())
assert_equal(dt_max, expected_max)
def test_return_wrong_type(self, simple_datatree):
dt1 = simple_datatree
@map_over_subtree
def bad_func(ds1):
return "string"
with pytest.raises(TypeError, match="not Dataset or DataArray"):
bad_func(dt1)
def test_return_tuple_of_wrong_types(self, simple_datatree):
dt1 = simple_datatree
@map_over_subtree
def bad_func(ds1):
return xr.Dataset(), "string"
with pytest.raises(TypeError, match="not Dataset or DataArray"):
bad_func(dt1)
@pytest.mark.xfail
def test_return_inconsistent_number_of_results(self, simple_datatree):
dt1 = simple_datatree
@map_over_subtree
def bad_func(ds):
# Datasets in simple_datatree have different numbers of dims
# TODO need to instead return different numbers of Dataset objects for this test to catch the intended error
return tuple(ds.dims)
with pytest.raises(TypeError, match="instead returns"):
bad_func(dt1)
def test_wrong_number_of_arguments_for_func(self, simple_datatree):
dt = simple_datatree
@map_over_subtree
def times_ten(ds):
return 10.0 * ds
with pytest.raises(
TypeError, match="takes 1 positional argument but 2 were given"
):
times_ten(dt, dt)
def test_map_single_dataset_against_whole_tree(self, create_test_datatree):
dt = create_test_datatree()
@map_over_subtree
def nodewise_merge(node_ds, fixed_ds):
return xr.merge([node_ds, fixed_ds])
other_ds = xr.Dataset({"z": ("z", [0])})
expected = create_test_datatree(modify=lambda ds: xr.merge([ds, other_ds]))
result_tree = nodewise_merge(dt, other_ds)
assert_equal(result_tree, expected)
@pytest.mark.xfail
def test_trees_with_different_node_names(self):
# TODO test this after I've got good tests for renaming nodes
raise NotImplementedError
def test_dt_method(self, create_test_datatree):
dt = create_test_datatree()
def multiply_then_add(ds, times, add=0.0):
return times * ds + add
expected = create_test_datatree(modify=lambda ds: (10.0 * ds) + 2.0)
result_tree = dt.map_over_subtree(multiply_then_add, 10.0, add=2.0)
assert_equal(result_tree, expected)
def test_discard_ancestry(self, create_test_datatree):
# Check for datatree GH issue #48
dt = create_test_datatree()
subtree = dt["set1"]
@map_over_subtree
def times_ten(ds):
return 10.0 * ds
expected = create_test_datatree(modify=lambda ds: 10.0 * ds)["set1"]
result_tree = times_ten(subtree)
assert_equal(result_tree, expected, from_root=False)
def test_skip_empty_nodes_with_attrs(self, create_test_datatree):
# inspired by xarray-datatree GH262
dt = create_test_datatree()
dt["set1/set2"].attrs["foo"] = "bar"
def check_for_data(ds):
# fails if run on a node that has no data
assert len(ds.variables) != 0
return ds
dt.map_over_subtree(check_for_data)
def test_keep_attrs_on_empty_nodes(self, create_test_datatree):
# GH278
dt = create_test_datatree()
dt["set1/set2"].attrs["foo"] = "bar"
def empty_func(ds):
return ds
result = dt.map_over_subtree(empty_func)
assert result["set1/set2"].attrs == dt["set1/set2"].attrs
@pytest.mark.xfail(
reason="probably some bug in pytests handling of exception notes"
)
def test_error_contains_path_of_offending_node(self, create_test_datatree):
dt = create_test_datatree()
dt["set1"]["bad_var"] = 0
print(dt)
def fail_on_specific_node(ds):
if "bad_var" in ds:
raise ValueError("Failed because 'bar_var' present in dataset")
with pytest.raises(
ValueError, match="Raised whilst mapping function over node /set1"
):
dt.map_over_subtree(fail_on_specific_node)
class TestMutableOperations:
def test_construct_using_type(self):
# from datatree GH issue #188
# xarray's .weighted is unusual because it uses type() to create a Dataset/DataArray
a = xr.DataArray(
np.random.rand(3, 4, 10),
dims=["x", "y", "time"],
coords={"area": (["x", "y"], np.random.rand(3, 4))},
).to_dataset(name="data")
b = xr.DataArray(
np.random.rand(2, 6, 14),
dims=["x", "y", "time"],
coords={"area": (["x", "y"], np.random.rand(2, 6))},
).to_dataset(name="data")
dt = DataTree.from_dict({"a": a, "b": b})
def weighted_mean(ds):
return ds.weighted(ds.area).mean(["x", "y"])
dt.map_over_subtree(weighted_mean)
def test_alter_inplace_forbidden(self):
simpsons = DataTree.from_dict(
d={
"/": xr.Dataset({"age": 83}),
"/Herbert": xr.Dataset({"age": 40}),
"/Homer": xr.Dataset({"age": 39}),
"/Homer/Bart": xr.Dataset({"age": 10}),
"/Homer/Lisa": xr.Dataset({"age": 8}),
"/Homer/Maggie": xr.Dataset({"age": 1}),
},
name="Abe",
)
def fast_forward(ds: xr.Dataset, years: float) -> xr.Dataset:
"""Add some years to the age, but by altering the given dataset"""
ds["age"] = ds["age"] + years
return ds
with pytest.raises(AttributeError):
simpsons.map_over_subtree(fast_forward, years=10)
@pytest.mark.xfail
class TestMapOverSubTreeInplace:
def test_map_over_subtree_inplace(self):
raise NotImplementedError
datatree-0.0.14/datatree/tests/test_treenode.py 0000664 0000000 0000000 00000025014 14552576503 0021554 0 ustar 00root root 0000000 0000000 import pytest
from datatree.iterators import LevelOrderIter, PreOrderIter
from datatree.treenode import InvalidTreeError, NamedNode, NodePath, TreeNode
class TestFamilyTree:
def test_lonely(self):
root = TreeNode()
assert root.parent is None
assert root.children == {}
def test_parenting(self):
john = TreeNode()
mary = TreeNode()
mary._set_parent(john, "Mary")
assert mary.parent == john
assert john.children["Mary"] is mary
def test_no_time_traveller_loops(self):
john = TreeNode()
with pytest.raises(InvalidTreeError, match="cannot be a parent of itself"):
john._set_parent(john, "John")
with pytest.raises(InvalidTreeError, match="cannot be a parent of itself"):
john.children = {"John": john}
mary = TreeNode()
rose = TreeNode()
mary._set_parent(john, "Mary")
rose._set_parent(mary, "Rose")
with pytest.raises(InvalidTreeError, match="is already a descendant"):
john._set_parent(rose, "John")
with pytest.raises(InvalidTreeError, match="is already a descendant"):
rose.children = {"John": john}
def test_parent_swap(self):
john = TreeNode()
mary = TreeNode()
mary._set_parent(john, "Mary")
steve = TreeNode()
mary._set_parent(steve, "Mary")
assert mary.parent == steve
assert steve.children["Mary"] is mary
assert "Mary" not in john.children
def test_multi_child_family(self):
mary = TreeNode()
kate = TreeNode()
john = TreeNode(children={"Mary": mary, "Kate": kate})
assert john.children["Mary"] is mary
assert john.children["Kate"] is kate
assert mary.parent is john
assert kate.parent is john
def test_disown_child(self):
mary = TreeNode()
john = TreeNode(children={"Mary": mary})
mary.orphan()
assert mary.parent is None
assert "Mary" not in john.children
def test_doppelganger_child(self):
kate = TreeNode()
john = TreeNode()
with pytest.raises(TypeError):
john.children = {"Kate": 666}
with pytest.raises(InvalidTreeError, match="Cannot add same node"):
john.children = {"Kate": kate, "Evil_Kate": kate}
john = TreeNode(children={"Kate": kate})
evil_kate = TreeNode()
evil_kate._set_parent(john, "Kate")
assert john.children["Kate"] is evil_kate
def test_sibling_relationships(self):
mary = TreeNode()
kate = TreeNode()
ashley = TreeNode()
TreeNode(children={"Mary": mary, "Kate": kate, "Ashley": ashley})
assert kate.siblings["Mary"] is mary
assert kate.siblings["Ashley"] is ashley
assert "Kate" not in kate.siblings
def test_ancestors(self):
tony = TreeNode()
michael = TreeNode(children={"Tony": tony})
vito = TreeNode(children={"Michael": michael})
assert tony.root is vito
assert tony.parents == (michael, vito)
assert tony.ancestors == (vito, michael, tony)
class TestGetNodes:
def test_get_child(self):
steven = TreeNode()
sue = TreeNode(children={"Steven": steven})
mary = TreeNode(children={"Sue": sue})
john = TreeNode(children={"Mary": mary})
# get child
assert john._get_item("Mary") is mary
assert mary._get_item("Sue") is sue
# no child exists
with pytest.raises(KeyError):
john._get_item("Kate")
# get grandchild
assert john._get_item("Mary/Sue") is sue
# get great-grandchild
assert john._get_item("Mary/Sue/Steven") is steven
# get from middle of tree
assert mary._get_item("Sue/Steven") is steven
def test_get_upwards(self):
sue = TreeNode()
kate = TreeNode()
mary = TreeNode(children={"Sue": sue, "Kate": kate})
john = TreeNode(children={"Mary": mary})
assert sue._get_item("../") is mary
assert sue._get_item("../../") is john
# relative path
assert sue._get_item("../Kate") is kate
def test_get_from_root(self):
sue = TreeNode()
mary = TreeNode(children={"Sue": sue})
john = TreeNode(children={"Mary": mary}) # noqa
assert sue._get_item("/Mary") is mary
class TestSetNodes:
def test_set_child_node(self):
john = TreeNode()
mary = TreeNode()
john._set_item("Mary", mary)
assert john.children["Mary"] is mary
assert isinstance(mary, TreeNode)
assert mary.children == {}
assert mary.parent is john
def test_child_already_exists(self):
mary = TreeNode()
john = TreeNode(children={"Mary": mary})
mary_2 = TreeNode()
with pytest.raises(KeyError):
john._set_item("Mary", mary_2, allow_overwrite=False)
def test_set_grandchild(self):
rose = TreeNode()
mary = TreeNode()
john = TreeNode()
john._set_item("Mary", mary)
john._set_item("Mary/Rose", rose)
assert john.children["Mary"] is mary
assert isinstance(mary, TreeNode)
assert "Rose" in mary.children
assert rose.parent is mary
def test_create_intermediate_child(self):
john = TreeNode()
rose = TreeNode()
# test intermediate children not allowed
with pytest.raises(KeyError, match="Could not reach"):
john._set_item(path="Mary/Rose", item=rose, new_nodes_along_path=False)
# test intermediate children allowed
john._set_item("Mary/Rose", rose, new_nodes_along_path=True)
assert "Mary" in john.children
mary = john.children["Mary"]
assert isinstance(mary, TreeNode)
assert mary.children == {"Rose": rose}
assert rose.parent == mary
assert rose.parent == mary
def test_overwrite_child(self):
john = TreeNode()
mary = TreeNode()
john._set_item("Mary", mary)
# test overwriting not allowed
marys_evil_twin = TreeNode()
with pytest.raises(KeyError, match="Already a node object"):
john._set_item("Mary", marys_evil_twin, allow_overwrite=False)
assert john.children["Mary"] is mary
assert marys_evil_twin.parent is None
# test overwriting allowed
marys_evil_twin = TreeNode()
john._set_item("Mary", marys_evil_twin, allow_overwrite=True)
assert john.children["Mary"] is marys_evil_twin
assert marys_evil_twin.parent is john
class TestPruning:
def test_del_child(self):
john = TreeNode()
mary = TreeNode()
john._set_item("Mary", mary)
del john["Mary"]
assert "Mary" not in john.children
assert mary.parent is None
with pytest.raises(KeyError):
del john["Mary"]
def create_test_tree():
a = NamedNode(name="a")
b = NamedNode()
c = NamedNode()
d = NamedNode()
e = NamedNode()
f = NamedNode()
g = NamedNode()
h = NamedNode()
i = NamedNode()
a.children = {"b": b, "c": c}
b.children = {"d": d, "e": e}
e.children = {"f": f, "g": g}
c.children = {"h": h}
h.children = {"i": i}
return a, f
class TestIterators:
def test_preorderiter(self):
root, _ = create_test_tree()
result = [node.name for node in PreOrderIter(root)]
expected = [
"a",
"b",
"d",
"e",
"f",
"g",
"c",
"h",
"i",
]
assert result == expected
def test_levelorderiter(self):
root, _ = create_test_tree()
result = [node.name for node in LevelOrderIter(root)]
expected = [
"a", # root Node is unnamed
"b",
"c",
"d",
"e",
"h",
"f",
"g",
"i",
]
assert result == expected
class TestAncestry:
def test_parents(self):
_, leaf = create_test_tree()
expected = ["e", "b", "a"]
assert [node.name for node in leaf.parents] == expected
def test_lineage(self):
_, leaf = create_test_tree()
expected = ["f", "e", "b", "a"]
assert [node.name for node in leaf.lineage] == expected
def test_ancestors(self):
_, leaf = create_test_tree()
ancestors = leaf.ancestors
expected = ["a", "b", "e", "f"]
for node, expected_name in zip(ancestors, expected):
assert node.name == expected_name
def test_subtree(self):
root, _ = create_test_tree()
subtree = root.subtree
expected = [
"a",
"b",
"d",
"e",
"f",
"g",
"c",
"h",
"i",
]
for node, expected_name in zip(subtree, expected):
assert node.name == expected_name
def test_descendants(self):
root, _ = create_test_tree()
descendants = root.descendants
expected = [
"b",
"d",
"e",
"f",
"g",
"c",
"h",
"i",
]
for node, expected_name in zip(descendants, expected):
assert node.name == expected_name
def test_leaves(self):
tree, _ = create_test_tree()
leaves = tree.leaves
expected = [
"d",
"f",
"g",
"i",
]
for node, expected_name in zip(leaves, expected):
assert node.name == expected_name
def test_levels(self):
a, f = create_test_tree()
assert a.level == 0
assert f.level == 3
assert a.depth == 3
assert f.depth == 3
assert a.width == 1
assert f.width == 3
class TestRenderTree:
def test_render_nodetree(self):
sam = NamedNode()
ben = NamedNode()
mary = NamedNode(children={"Sam": sam, "Ben": ben})
kate = NamedNode()
john = NamedNode(children={"Mary": mary, "Kate": kate})
printout = john.__str__()
expected_nodes = [
"NamedNode()",
"NamedNode('Mary')",
"NamedNode('Sam')",
"NamedNode('Ben')",
"NamedNode('Kate')",
]
for expected_node, printed_node in zip(expected_nodes, printout.splitlines()):
assert expected_node in printed_node
def test_nodepath():
path = NodePath("/Mary")
assert path.root == "/"
assert path.stem == "Mary"
datatree-0.0.14/datatree/tests/test_version.py 0000664 0000000 0000000 00000000117 14552576503 0021431 0 ustar 00root root 0000000 0000000 import datatree
def test_version():
assert datatree.__version__ != "999"
datatree-0.0.14/datatree/treenode.py 0000664 0000000 0000000 00000054720 14552576503 0017361 0 ustar 00root root 0000000 0000000 from __future__ import annotations
import sys
from collections import OrderedDict
from pathlib import PurePosixPath
from typing import (
TYPE_CHECKING,
Generic,
Iterator,
Mapping,
Optional,
Tuple,
TypeVar,
Union,
)
from xarray.core.utils import Frozen, is_dict_like
if TYPE_CHECKING:
from xarray.core.types import T_DataArray
class InvalidTreeError(Exception):
"""Raised when user attempts to create an invalid tree in some way."""
class NotFoundInTreeError(ValueError):
"""Raised when operation can't be completed because one node is part of the expected tree."""
class NodePath(PurePosixPath):
"""Represents a path from one node to another within a tree."""
def __init__(self, *pathsegments):
if sys.version_info >= (3, 12):
super().__init__(*pathsegments)
else:
super().__new__(PurePosixPath, *pathsegments)
if self.drive:
raise ValueError("NodePaths cannot have drives")
if self.root not in ["/", ""]:
raise ValueError(
'Root of NodePath can only be either "/" or "", with "" meaning the path is relative.'
)
# TODO should we also forbid suffixes to avoid node names with dots in them?
Tree = TypeVar("Tree", bound="TreeNode")
class TreeNode(Generic[Tree]):
"""
Base class representing a node of a tree, with methods for traversing and altering the tree.
This class stores no data, it has only parents and children attributes, and various methods.
Stores child nodes in an Ordered Dictionary, which is necessary to ensure that equality checks between two trees
also check that the order of child nodes is the same.
Nodes themselves are intrinsically unnamed (do not possess a ._name attribute), but if the node has a parent you can
find the key it is stored under via the .name property.
The .parent attribute is read-only: to replace the parent using public API you must set this node as the child of a
new parent using `new_parent.children[name] = child_node`, or to instead detach from the current parent use
`child_node.orphan()`.
This class is intended to be subclassed by DataTree, which will overwrite some of the inherited behaviour,
in particular to make names an inherent attribute, and allow setting parents directly. The intention is to mirror
the class structure of xarray.Variable & xarray.DataArray, where Variable is unnamed but DataArray is (optionally)
named.
Also allows access to any other node in the tree via unix-like paths, including upwards referencing via '../'.
(This class is heavily inspired by the anytree library's NodeMixin class.)
"""
_parent: Optional[Tree]
_children: OrderedDict[str, Tree]
def __init__(self, children: Optional[Mapping[str, Tree]] = None):
"""Create a parentless node."""
self._parent = None
self._children = OrderedDict()
if children is not None:
self.children = children
@property
def parent(self) -> Tree | None:
"""Parent of this node."""
return self._parent
def _set_parent(
self, new_parent: Tree | None, child_name: Optional[str] = None
) -> None:
# TODO is it possible to refactor in a way that removes this private method?
if new_parent is not None and not isinstance(new_parent, TreeNode):
raise TypeError(
"Parent nodes must be of type DataTree or None, "
f"not type {type(new_parent)}"
)
old_parent = self._parent
if new_parent is not old_parent:
self._check_loop(new_parent)
self._detach(old_parent)
self._attach(new_parent, child_name)
def _check_loop(self, new_parent: Tree | None) -> None:
"""Checks that assignment of this new parent will not create a cycle."""
if new_parent is not None:
if new_parent is self:
raise InvalidTreeError(
f"Cannot set parent, as node {self} cannot be a parent of itself."
)
if self._is_descendant_of(new_parent):
raise InvalidTreeError(
"Cannot set parent, as intended parent is already a descendant of this node."
)
def _is_descendant_of(self, node: Tree) -> bool:
return any(n is self for n in node.parents)
def _detach(self, parent: Tree | None) -> None:
if parent is not None:
self._pre_detach(parent)
parents_children = parent.children
parent._children = OrderedDict(
{
name: child
for name, child in parents_children.items()
if child is not self
}
)
self._parent = None
self._post_detach(parent)
def _attach(self, parent: Tree | None, child_name: Optional[str] = None) -> None:
if parent is not None:
if child_name is None:
raise ValueError(
"To directly set parent, child needs a name, but child is unnamed"
)
self._pre_attach(parent)
parentchildren = parent._children
assert not any(
child is self for child in parentchildren
), "Tree is corrupt."
parentchildren[child_name] = self
self._parent = parent
self._post_attach(parent)
else:
self._parent = None
def orphan(self) -> None:
"""Detach this node from its parent."""
self._set_parent(new_parent=None)
@property
def children(self: Tree) -> Mapping[str, Tree]:
"""Child nodes of this node, stored under a mapping via their names."""
return Frozen(self._children)
@children.setter
def children(self: Tree, children: Mapping[str, Tree]) -> None:
self._check_children(children)
children = OrderedDict(children)
old_children = self.children
del self.children
try:
self._pre_attach_children(children)
for name, child in children.items():
child._set_parent(new_parent=self, child_name=name)
self._post_attach_children(children)
assert len(self.children) == len(children)
except Exception:
# if something goes wrong then revert to previous children
self.children = old_children
raise
@children.deleter
def children(self) -> None:
# TODO this just detaches all the children, it doesn't actually delete them...
children = self.children
self._pre_detach_children(children)
for child in self.children.values():
child.orphan()
assert len(self.children) == 0
self._post_detach_children(children)
@staticmethod
def _check_children(children: Mapping[str, Tree]) -> None:
"""Check children for correct types and for any duplicates."""
if not is_dict_like(children):
raise TypeError(
"children must be a dict-like mapping from names to node objects"
)
seen = set()
for name, child in children.items():
if not isinstance(child, TreeNode):
raise TypeError(
f"Cannot add object {name}. It is of type {type(child)}, "
"but can only add children of type DataTree"
)
childid = id(child)
if childid not in seen:
seen.add(childid)
else:
raise InvalidTreeError(
f"Cannot add same node {name} multiple times as different children."
)
def __repr__(self) -> str:
return f"TreeNode(children={dict(self._children)})"
def _pre_detach_children(self: Tree, children: Mapping[str, Tree]) -> None:
"""Method call before detaching `children`."""
pass
def _post_detach_children(self: Tree, children: Mapping[str, Tree]) -> None:
"""Method call after detaching `children`."""
pass
def _pre_attach_children(self: Tree, children: Mapping[str, Tree]) -> None:
"""Method call before attaching `children`."""
pass
def _post_attach_children(self: Tree, children: Mapping[str, Tree]) -> None:
"""Method call after attaching `children`."""
pass
def _iter_parents(self: Tree) -> Iterator[Tree]:
"""Iterate up the tree, starting from the current node."""
node: Tree | None = self.parent
while node is not None:
yield node
node = node.parent
def iter_lineage(self: Tree) -> Tuple[Tree, ...]:
"""Iterate up the tree, starting from the current node."""
from warnings import warn
warn(
"`iter_lineage` has been deprecated, and in the future will raise an error."
"Please use `parents` from now on.",
DeprecationWarning,
)
return tuple((self, *self.parents))
@property
def lineage(self: Tree) -> Tuple[Tree, ...]:
"""All parent nodes and their parent nodes, starting with the closest."""
from warnings import warn
warn(
"`lineage` has been deprecated, and in the future will raise an error."
"Please use `parents` from now on.",
DeprecationWarning,
)
return self.iter_lineage()
@property
def parents(self: Tree) -> Tuple[Tree, ...]:
"""All parent nodes and their parent nodes, starting with the closest."""
return tuple(self._iter_parents())
@property
def ancestors(self: Tree) -> Tuple[Tree, ...]:
"""All parent nodes and their parent nodes, starting with the most distant."""
from warnings import warn
warn(
"`ancestors` has been deprecated, and in the future will raise an error."
"Please use `parents`. Example: `tuple(reversed(node.parents))`",
DeprecationWarning,
)
return tuple((*reversed(self.parents), self))
@property
def root(self: Tree) -> Tree:
"""Root node of the tree"""
node = self
while node.parent is not None:
node = node.parent
return node
@property
def is_root(self) -> bool:
"""Whether this node is the tree root."""
return self.parent is None
@property
def is_leaf(self) -> bool:
"""
Whether this node is a leaf node.
Leaf nodes are defined as nodes which have no children.
"""
return self.children == {}
@property
def leaves(self: Tree) -> Tuple[Tree, ...]:
"""
All leaf nodes.
Leaf nodes are defined as nodes which have no children.
"""
return tuple([node for node in self.subtree if node.is_leaf])
@property
def siblings(self: Tree) -> OrderedDict[str, Tree]:
"""
Nodes with the same parent as this node.
"""
if self.parent:
return OrderedDict(
{
name: child
for name, child in self.parent.children.items()
if child is not self
}
)
else:
return OrderedDict()
@property
def subtree(self: Tree) -> Iterator[Tree]:
"""
An iterator over all nodes in this tree, including both self and all descendants.
Iterates depth-first.
See Also
--------
DataTree.descendants
"""
from . import iterators
return iterators.PreOrderIter(self)
@property
def descendants(self: Tree) -> Tuple[Tree, ...]:
"""
Child nodes and all their child nodes.
Returned in depth-first order.
See Also
--------
DataTree.subtree
"""
all_nodes = tuple(self.subtree)
this_node, *descendants = all_nodes
return tuple(descendants)
@property
def level(self: Tree) -> int:
"""
Level of this node.
Level means number of parent nodes above this node before reaching the root.
The root node is at level 0.
Returns
-------
level : int
See Also
--------
depth
width
"""
return len(self.parents)
@property
def depth(self: Tree) -> int:
"""
Maximum level of this tree.
Measured from the root, which has a depth of 0.
Returns
-------
depth : int
See Also
--------
level
width
"""
return max(node.level for node in self.root.subtree)
@property
def width(self: Tree) -> int:
"""
Number of nodes at this level in the tree.
Includes number of immediate siblings, but also "cousins" in other branches and so-on.
Returns
-------
depth : int
See Also
--------
level
depth
"""
return len([node for node in self.root.subtree if node.level == self.level])
def _pre_detach(self: Tree, parent: Tree) -> None:
"""Method call before detaching from `parent`."""
pass
def _post_detach(self: Tree, parent: Tree) -> None:
"""Method call after detaching from `parent`."""
pass
def _pre_attach(self: Tree, parent: Tree) -> None:
"""Method call before attaching to `parent`."""
pass
def _post_attach(self: Tree, parent: Tree) -> None:
"""Method call after attaching to `parent`."""
pass
def get(self: Tree, key: str, default: Optional[Tree] = None) -> Optional[Tree]:
"""
Return the child node with the specified key.
Only looks for the node within the immediate children of this node,
not in other nodes of the tree.
"""
if key in self.children:
return self.children[key]
else:
return default
# TODO `._walk` method to be called by both `_get_item` and `_set_item`
def _get_item(self: Tree, path: str | NodePath) -> Union[Tree, T_DataArray]:
"""
Returns the object lying at the given path.
Raises a KeyError if there is no object at the given path.
"""
if isinstance(path, str):
path = NodePath(path)
if path.root:
current_node = self.root
root, *parts = list(path.parts)
else:
current_node = self
parts = list(path.parts)
for part in parts:
if part == "..":
if current_node.parent is None:
raise KeyError(f"Could not find node at {path}")
else:
current_node = current_node.parent
elif part in ("", "."):
pass
else:
if current_node.get(part) is None:
raise KeyError(f"Could not find node at {path}")
else:
current_node = current_node.get(part)
return current_node
def _set(self: Tree, key: str, val: Tree) -> None:
"""
Set the child node with the specified key to value.
Counterpart to the public .get method, and also only works on the immediate node, not other nodes in the tree.
"""
new_children = {**self.children, key: val}
self.children = new_children
def _set_item(
self: Tree,
path: str | NodePath,
item: Union[Tree, T_DataArray],
new_nodes_along_path: bool = False,
allow_overwrite: bool = True,
) -> None:
"""
Set a new item in the tree, overwriting anything already present at that path.
The given value either forms a new node of the tree or overwrites an existing item at that location.
Parameters
----------
path
item
new_nodes_along_path : bool
If true, then if necessary new nodes will be created along the given path, until the tree can reach the
specified location.
allow_overwrite : bool
Whether or not to overwrite any existing node at the location given by path.
Raises
------
KeyError
If node cannot be reached, and new_nodes_along_path=False.
Or if a node already exists at the specified path, and allow_overwrite=False.
"""
if isinstance(path, str):
path = NodePath(path)
if not path.name:
raise ValueError("Can't set an item under a path which has no name")
if path.root:
# absolute path
current_node = self.root
root, *parts, name = path.parts
else:
# relative path
current_node = self
*parts, name = path.parts
if parts:
# Walk to location of new node, creating intermediate node objects as we go if necessary
for part in parts:
if part == "..":
if current_node.parent is None:
# We can't create a parent if `new_nodes_along_path=True` as we wouldn't know what to name it
raise KeyError(f"Could not reach node at path {path}")
else:
current_node = current_node.parent
elif part in ("", "."):
pass
else:
if part in current_node.children:
current_node = current_node.children[part]
elif new_nodes_along_path:
# Want child classes (i.e. DataTree) to populate tree with their own types
new_node = type(self)()
current_node._set(part, new_node)
current_node = current_node.children[part]
else:
raise KeyError(f"Could not reach node at path {path}")
if name in current_node.children:
# Deal with anything already existing at this location
if allow_overwrite:
current_node._set(name, item)
else:
raise KeyError(f"Already a node object at path {path}")
else:
current_node._set(name, item)
def __delitem__(self: Tree, key: str):
"""Remove a child node from this tree object."""
if key in self.children:
child = self._children[key]
del self._children[key]
child.orphan()
else:
raise KeyError("Cannot delete")
def same_tree(self, other: Tree) -> bool:
"""True if other node is in the same tree as this node."""
return self.root is other.root
class NamedNode(TreeNode, Generic[Tree]):
"""
A TreeNode which knows its own name.
Implements path-like relationships to other nodes in its tree.
"""
_name: Optional[str]
_parent: Optional[Tree]
_children: OrderedDict[str, Tree]
def __init__(self, name=None, children=None):
super().__init__(children=children)
self._name = None
self.name = name
@property
def name(self) -> str | None:
"""The name of this node."""
return self._name
@name.setter
def name(self, name: str | None) -> None:
if name is not None:
if not isinstance(name, str):
raise TypeError("node name must be a string or None")
if "/" in name:
raise ValueError("node names cannot contain forward slashes")
self._name = name
def __str__(self) -> str:
return f"NamedNode({self.name})" if self.name else "NamedNode()"
def _post_attach(self: NamedNode, parent: NamedNode) -> None:
"""Ensures child has name attribute corresponding to key under which it has been stored."""
key = next(k for k, v in parent.children.items() if v is self)
self.name = key
@property
def path(self) -> str:
"""Return the file-like path from the root to this node."""
if self.is_root:
return "/"
else:
root, *ancestors = tuple(reversed(self.parents))
# don't include name of root because (a) root might not have a name & (b) we want path relative to root.
names = [*(node.name for node in ancestors), self.name]
return "/" + "/".join(names)
def relative_to(self: NamedNode, other: NamedNode) -> str:
"""
Compute the relative path from this node to node `other`.
If other is not in this tree, or it's otherwise impossible, raise a ValueError.
"""
if not self.same_tree(other):
raise NotFoundInTreeError(
"Cannot find relative path because nodes do not lie within the same tree"
)
this_path = NodePath(self.path)
if other.path in list(parent.path for parent in (self, *self.parents)):
return str(this_path.relative_to(other.path))
else:
common_ancestor = self.find_common_ancestor(other)
path_to_common_ancestor = other._path_to_ancestor(common_ancestor)
return str(
path_to_common_ancestor / this_path.relative_to(common_ancestor.path)
)
def find_common_ancestor(self, other: NamedNode) -> NamedNode:
"""
Find the first common ancestor of two nodes in the same tree.
Raise ValueError if they are not in the same tree.
"""
if self is other:
return self
other_paths = [op.path for op in other.parents]
for parent in (self, *self.parents):
if parent.path in other_paths:
return parent
raise NotFoundInTreeError(
"Cannot find common ancestor because nodes do not lie within the same tree"
)
def _path_to_ancestor(self, ancestor: NamedNode) -> NodePath:
"""Return the relative path from this node to the given ancestor node"""
if not self.same_tree(ancestor):
raise NotFoundInTreeError(
"Cannot find relative path to ancestor because nodes do not lie within the same tree"
)
if ancestor.path not in list(a.path for a in (self, *self.parents)):
raise NotFoundInTreeError(
"Cannot find relative path to ancestor because given node is not an ancestor of this node"
)
parents_paths = list(parent.path for parent in (self, *self.parents))
generation_gap = list(parents_paths).index(ancestor.path)
path_upwards = "../" * generation_gap if generation_gap > 0 else "."
return NodePath(path_upwards)
datatree-0.0.14/docs/ 0000775 0000000 0000000 00000000000 14552576503 0014331 5 ustar 00root root 0000000 0000000 datatree-0.0.14/docs/Makefile 0000664 0000000 0000000 00000015627 14552576503 0016004 0 ustar 00root root 0000000 0000000 # Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
.PHONY: help clean html rtdhtml dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
help:
@echo "Please use \`make ' where is one of"
@echo " html to make standalone HTML files"
@echo " rtdhtml Build html using same settings used on ReadtheDocs"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
rtdhtml:
$(SPHINXBUILD) -T -j auto -E -W --keep-going -b html -d $(BUILDDIR)/doctrees -D language=en . $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/complexity.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/complexity.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/complexity"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/complexity"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
datatree-0.0.14/docs/README.md 0000664 0000000 0000000 00000000447 14552576503 0015615 0 ustar 00root root 0000000 0000000 # README - docs
## Build the documentation locally
```bash
cd docs # From project's root
make clean
rm -rf source/generated # remove autodoc artefacts, that are not removed by `make clean`
make html
```
## Access the documentation locally
Open `docs/_build/html/index.html` in a web browser
datatree-0.0.14/docs/make.bat 0000664 0000000 0000000 00000014503 14552576503 0015741 0 ustar 00root root 0000000 0000000 @ECHO OFF
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=_build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
set I18NSPHINXOPTS=%SPHINXOPTS% .
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^` where ^ is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. xml to make Docutils-native XML files
echo. pseudoxml to make pseudoxml-XML files for display purposes
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
goto end
)
if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)
%SPHINXBUILD% 2> nul
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\complexity.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\complexity.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdf" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf
cd %BUILDDIR%/..
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdfja" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf-ja
cd %BUILDDIR%/..
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)
if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
if "%1" == "xml" (
%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The XML files are in %BUILDDIR%/xml.
goto end
)
if "%1" == "pseudoxml" (
%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
goto end
)
:end
datatree-0.0.14/docs/source/ 0000775 0000000 0000000 00000000000 14552576503 0015631 5 ustar 00root root 0000000 0000000 datatree-0.0.14/docs/source/api.rst 0000664 0000000 0000000 00000014116 14552576503 0017137 0 ustar 00root root 0000000 0000000 .. currentmodule:: datatree
#############
API reference
#############
DataTree
========
Creating a DataTree
-------------------
Methods of creating a datatree.
.. autosummary::
:toctree: generated/
DataTree
DataTree.from_dict
Tree Attributes
---------------
Attributes relating to the recursive tree-like structure of a ``DataTree``.
.. autosummary::
:toctree: generated/
DataTree.parent
DataTree.children
DataTree.name
DataTree.path
DataTree.root
DataTree.is_root
DataTree.is_leaf
DataTree.leaves
DataTree.level
DataTree.depth
DataTree.width
DataTree.subtree
DataTree.descendants
DataTree.siblings
DataTree.lineage
DataTree.parents
DataTree.ancestors
DataTree.groups
Data Contents
-------------
Interface to the data objects (optionally) stored inside a single ``DataTree`` node.
This interface echoes that of ``xarray.Dataset``.
.. autosummary::
:toctree: generated/
DataTree.dims
DataTree.sizes
DataTree.data_vars
DataTree.coords
DataTree.attrs
DataTree.encoding
DataTree.indexes
DataTree.nbytes
DataTree.ds
DataTree.to_dataset
DataTree.has_data
DataTree.has_attrs
DataTree.is_empty
DataTree.is_hollow
Dictionary Interface
--------------------
``DataTree`` objects also have a dict-like interface mapping keys to either ``xarray.DataArray``s or to child ``DataTree`` nodes.
.. autosummary::
:toctree: generated/
DataTree.__getitem__
DataTree.__setitem__
DataTree.__delitem__
DataTree.update
DataTree.get
DataTree.items
DataTree.keys
DataTree.values
Tree Manipulation
-----------------
For manipulating, traversing, navigating, or mapping over the tree structure.
.. autosummary::
:toctree: generated/
DataTree.orphan
DataTree.same_tree
DataTree.relative_to
DataTree.iter_lineage
DataTree.find_common_ancestor
DataTree.map_over_subtree
map_over_subtree
DataTree.pipe
DataTree.match
DataTree.filter
Pathlib-like Interface
----------------------
``DataTree`` objects deliberately echo some of the API of `pathlib.PurePath`.
.. autosummary::
:toctree: generated/
DataTree.name
DataTree.parent
DataTree.parents
DataTree.relative_to
Missing:
..
``DataTree.glob``
``DataTree.joinpath``
``DataTree.with_name``
``DataTree.walk``
``DataTree.rename``
``DataTree.replace``
DataTree Contents
-----------------
Manipulate the contents of all nodes in a tree simultaneously.
.. autosummary::
:toctree: generated/
DataTree.copy
DataTree.assign_coords
DataTree.merge
DataTree.rename
DataTree.rename_vars
DataTree.rename_dims
DataTree.swap_dims
DataTree.expand_dims
DataTree.drop_vars
DataTree.drop_dims
DataTree.set_coords
DataTree.reset_coords
DataTree Node Contents
----------------------
Manipulate the contents of a single DataTree node.
.. autosummary::
:toctree: generated/
DataTree.assign
DataTree.drop_nodes
Comparisons
===========
Compare one ``DataTree`` object to another.
.. autosummary::
:toctree: generated/
DataTree.isomorphic
DataTree.equals
DataTree.identical
Indexing
========
Index into all nodes in the subtree simultaneously.
.. autosummary::
:toctree: generated/
DataTree.isel
DataTree.sel
DataTree.drop_sel
DataTree.drop_isel
DataTree.head
DataTree.tail
DataTree.thin
DataTree.squeeze
DataTree.interp
DataTree.interp_like
DataTree.reindex
DataTree.reindex_like
DataTree.set_index
DataTree.reset_index
DataTree.reorder_levels
DataTree.query
..
Missing:
``DataTree.loc``
Missing Value Handling
======================
.. autosummary::
:toctree: generated/
DataTree.isnull
DataTree.notnull
DataTree.combine_first
DataTree.dropna
DataTree.fillna
DataTree.ffill
DataTree.bfill
DataTree.interpolate_na
DataTree.where
DataTree.isin
Computation
===========
Apply a computation to the data in all nodes in the subtree simultaneously.
.. autosummary::
:toctree: generated/
DataTree.map
DataTree.reduce
DataTree.diff
DataTree.quantile
DataTree.differentiate
DataTree.integrate
DataTree.map_blocks
DataTree.polyfit
DataTree.curvefit
Aggregation
===========
Aggregate data in all nodes in the subtree simultaneously.
.. autosummary::
:toctree: generated/
DataTree.all
DataTree.any
DataTree.argmax
DataTree.argmin
DataTree.idxmax
DataTree.idxmin
DataTree.max
DataTree.min
DataTree.mean
DataTree.median
DataTree.prod
DataTree.sum
DataTree.std
DataTree.var
DataTree.cumsum
DataTree.cumprod
ndarray methods
===============
Methods copied from :py:class:`numpy.ndarray` objects, here applying to the data in all nodes in the subtree.
.. autosummary::
:toctree: generated/
DataTree.argsort
DataTree.astype
DataTree.clip
DataTree.conj
DataTree.conjugate
DataTree.round
DataTree.rank
Reshaping and reorganising
==========================
Reshape or reorganise the data in all nodes in the subtree.
.. autosummary::
:toctree: generated/
DataTree.transpose
DataTree.stack
DataTree.unstack
DataTree.shift
DataTree.roll
DataTree.pad
DataTree.sortby
DataTree.broadcast_like
Plotting
========
I/O
===
Open a datatree from an on-disk store or serialize the tree.
.. autosummary::
:toctree: generated/
open_datatree
DataTree.to_dict
DataTree.to_netcdf
DataTree.to_zarr
..
Missing:
``open_mfdatatree``
Tutorial
========
Testing
=======
Test that two DataTree objects are similar.
.. autosummary::
:toctree: generated/
testing.assert_isomorphic
testing.assert_equal
testing.assert_identical
Exceptions
==========
Exceptions raised when manipulating trees.
.. autosummary::
:toctree: generated/
TreeIsomorphismError
InvalidTreeError
NotFoundInTreeError
Advanced API
============
Relatively advanced API for users or developers looking to understand the internals, or extend functionality.
.. autosummary::
:toctree: generated/
DataTree.variables
register_datatree_accessor
..
Missing:
``DataTree.set_close``
datatree-0.0.14/docs/source/conf.py 0000664 0000000 0000000 00000031606 14552576503 0017136 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# flake8: noqa
# Ignoring F401: imported but unused
# complexity documentation build configuration file, created by
# sphinx-quickstart on Tue Jul 9 22:26:36 2013.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import inspect
import os
import sys
import sphinx_autosummary_accessors
import datatree
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
# sys.path.insert(0, os.path.abspath('.'))
cwd = os.getcwd()
parent = os.path.dirname(cwd)
sys.path.insert(0, parent)
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.viewcode",
"sphinx.ext.linkcode",
"sphinx.ext.autosummary",
"sphinx.ext.intersphinx",
"sphinx.ext.extlinks",
"sphinx.ext.napoleon",
"sphinx_copybutton",
"sphinxext.opengraph",
"sphinx_autosummary_accessors",
"IPython.sphinxext.ipython_console_highlighting",
"IPython.sphinxext.ipython_directive",
"nbsphinx",
"sphinxcontrib.srclinks",
]
extlinks = {
"issue": ("https://github.com/xarray-contrib/datatree/issues/%s", "GH#%s"),
"pull": ("https://github.com/xarray-contrib/datatree/pull/%s", "GH#%s"),
}
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates", sphinx_autosummary_accessors.templates_path]
# Generate the API documentation when building
autosummary_generate = True
# Napoleon configurations
napoleon_google_docstring = False
napoleon_numpy_docstring = True
napoleon_use_param = False
napoleon_use_rtype = False
napoleon_preprocess_types = True
napoleon_type_aliases = {
# general terms
"sequence": ":term:`sequence`",
"iterable": ":term:`iterable`",
"callable": ":py:func:`callable`",
"dict_like": ":term:`dict-like `",
"dict-like": ":term:`dict-like `",
"path-like": ":term:`path-like `",
"mapping": ":term:`mapping`",
"file-like": ":term:`file-like `",
# special terms
# "same type as caller": "*same type as caller*", # does not work, yet
# "same type as values": "*same type as values*", # does not work, yet
# stdlib type aliases
"MutableMapping": "~collections.abc.MutableMapping",
"sys.stdout": ":obj:`sys.stdout`",
"timedelta": "~datetime.timedelta",
"string": ":class:`string `",
# numpy terms
"array_like": ":term:`array_like`",
"array-like": ":term:`array-like `",
"scalar": ":term:`scalar`",
"array": ":term:`array`",
"hashable": ":term:`hashable `",
# matplotlib terms
"color-like": ":py:func:`color-like `",
"matplotlib colormap name": ":doc:`matplotlib colormap name `",
"matplotlib axes object": ":py:class:`matplotlib axes object `",
"colormap": ":py:class:`colormap `",
# objects without namespace: xarray
"DataArray": "~xarray.DataArray",
"Dataset": "~xarray.Dataset",
"Variable": "~xarray.Variable",
"DatasetGroupBy": "~xarray.core.groupby.DatasetGroupBy",
"DataArrayGroupBy": "~xarray.core.groupby.DataArrayGroupBy",
# objects without namespace: numpy
"ndarray": "~numpy.ndarray",
"MaskedArray": "~numpy.ma.MaskedArray",
"dtype": "~numpy.dtype",
"ComplexWarning": "~numpy.ComplexWarning",
# objects without namespace: pandas
"Index": "~pandas.Index",
"MultiIndex": "~pandas.MultiIndex",
"CategoricalIndex": "~pandas.CategoricalIndex",
"TimedeltaIndex": "~pandas.TimedeltaIndex",
"DatetimeIndex": "~pandas.DatetimeIndex",
"Series": "~pandas.Series",
"DataFrame": "~pandas.DataFrame",
"Categorical": "~pandas.Categorical",
"Path": "~~pathlib.Path",
# objects with abbreviated namespace (from pandas)
"pd.Index": "~pandas.Index",
"pd.NaT": "~pandas.NaT",
}
# The suffix of source filenames.
source_suffix = ".rst"
# The encoding of source files.
# source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = "index"
# General information about the project.
project = "Datatree"
copyright = "2021 onwards, Tom Nicholas and its Contributors"
author = "Tom Nicholas"
html_show_sourcelink = True
srclink_project = "https://github.com/xarray-contrib/datatree"
srclink_branch = "main"
srclink_src_path = "docs/source"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = datatree.__version__
# The full version, including alpha/beta/rc tags.
release = datatree.__version__
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
# language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
# today = ''
# Else, today_fmt is used as the format for a strftime call.
# today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ["_build"]
# The reST default role (used for this markup: `text`) to use for all documents.
# default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
# add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
# add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
# show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
# modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
# keep_warnings = False
# -- Intersphinx links ---------------------------------------------------------
intersphinx_mapping = {
"python": ("https://docs.python.org/3.8/", None),
"numpy": ("https://numpy.org/doc/stable", None),
"xarray": ("https://xarray.pydata.org/en/stable/", None),
}
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = "sphinx_book_theme"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
"repository_url": "https://github.com/xarray-contrib/datatree",
"repository_branch": "main",
"path_to_docs": "docs/source",
"use_repository_button": True,
"use_issues_button": True,
"use_edit_page_button": True,
}
# Add any paths that contain custom themes here, relative to this directory.
# html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# " v documentation".
# html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
# html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
# html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
# html_favicon = None
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
# html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
# html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
# html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
# html_additional_pages = {}
# If false, no module index is generated.
# html_domain_indices = True
# If false, no index is generated.
# html_use_index = True
# If true, the index is split into individual pages for each letter.
# html_split_index = False
# If true, links to the reST sources are added to the pages.
# html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
# html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
# html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
# html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
# html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = "datatree_doc"
# -- Options for LaTeX output --------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
# 'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
("index", "datatree.tex", "Datatree Documentation", author, "manual")
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
# latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
# latex_use_parts = False
# If true, show page references after internal links.
# latex_show_pagerefs = False
# If true, show URL addresses after external links.
# latex_show_urls = False
# Documents to append as an appendix to all manuals.
# latex_appendices = []
# If false, no module index is generated.
# latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [("index", "datatree", "Datatree Documentation", [author], 1)]
# If true, show URL addresses after external links.
# man_show_urls = False
# -- Options for Texinfo output ------------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
"index",
"datatree",
"Datatree Documentation",
author,
"datatree",
"Tree-like hierarchical data structure for xarray.",
"Miscellaneous",
)
]
# Documents to append as an appendix to all manuals.
# texinfo_appendices = []
# If false, no module index is generated.
# texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
# texinfo_show_urls = 'footnote'
# If true, do not generate a @detailmenu in the "Top" node's menu.
# texinfo_no_detailmenu = False
# based on numpy doc/source/conf.py
def linkcode_resolve(domain, info):
"""
Determine the URL corresponding to Python object
"""
if domain != "py":
return None
modname = info["module"]
fullname = info["fullname"]
submod = sys.modules.get(modname)
if submod is None:
return None
obj = submod
for part in fullname.split("."):
try:
obj = getattr(obj, part)
except AttributeError:
return None
try:
fn = inspect.getsourcefile(inspect.unwrap(obj))
except TypeError:
fn = None
if not fn:
return None
try:
source, lineno = inspect.getsourcelines(obj)
except OSError:
lineno = None
if lineno:
linespec = f"#L{lineno}-L{lineno + len(source) - 1}"
else:
linespec = ""
fn = os.path.relpath(fn, start=os.path.dirname(datatree.__file__))
if "+" in datatree.__version__:
return f"https://github.com/xarray-contrib/datatree/blob/main/datatree/{fn}{linespec}"
else:
return (
f"https://github.com/xarray-contrib/datatree/blob/"
f"v{datatree.__version__}/datatree/{fn}{linespec}"
)
datatree-0.0.14/docs/source/contributing.rst 0000664 0000000 0000000 00000010227 14552576503 0021074 0 ustar 00root root 0000000 0000000 ========================
Contributing to Datatree
========================
Contributions are highly welcomed and appreciated. Every little help counts,
so do not hesitate!
.. contents:: Contribution links
:depth: 2
.. _submitfeedback:
Feature requests and feedback
-----------------------------
Do you like Datatree? Share some love on Twitter or in your blog posts!
We'd also like to hear about your propositions and suggestions. Feel free to
`submit them as issues `_ and:
* Explain in detail how they should work.
* Keep the scope as narrow as possible. This will make it easier to implement.
.. _reportbugs:
Report bugs
-----------
Report bugs for Datatree in the `issue tracker `_.
If you are reporting a bug, please include:
* Your operating system name and version.
* Any details about your local setup that might be helpful in troubleshooting,
specifically the Python interpreter version, installed libraries, and Datatree
version.
* Detailed steps to reproduce the bug.
If you can write a demonstration test that currently fails but should pass
(xfail), that is a very useful commit to make as well, even if you cannot
fix the bug itself.
.. _fixbugs:
Fix bugs
--------
Look through the `GitHub issues for bugs `_.
Talk to developers to find out how you can fix specific bugs.
Write documentation
-------------------
Datatree could always use more documentation. What exactly is needed?
* More complementary documentation. Have you perhaps found something unclear?
* Docstrings. There can never be too many of them.
* Blog posts, articles and such -- they're all very appreciated.
You can also edit documentation files directly in the GitHub web interface,
without using a local copy. This can be convenient for small fixes.
To build the documentation locally, you first need to install the following
tools:
- `Sphinx `__
- `sphinx_rtd_theme `__
- `sphinx-autosummary-accessors `__
You can then build the documentation with the following commands::
$ cd docs
$ make html
The built documentation should be available in the ``docs/_build/`` folder.
.. _`pull requests`:
.. _pull-requests:
Preparing Pull Requests
-----------------------
#. Fork the
`Datatree GitHub repository `__. It's
fine to use ``Datatree`` as your fork repository name because it will live
under your user.
#. Clone your fork locally using `git `_ and create a branch::
$ git clone git@github.com:{YOUR_GITHUB_USERNAME}/Datatree.git
$ cd Datatree
# now, to fix a bug or add feature create your own branch off "master":
$ git checkout -b your-bugfix-feature-branch-name master
#. Install `pre-commit `_ and its hook on the Datatree repo::
$ pip install --user pre-commit
$ pre-commit install
Afterwards ``pre-commit`` will run whenever you commit.
https://pre-commit.com/ is a framework for managing and maintaining multi-language pre-commit hooks
to ensure code-style and code formatting is consistent.
#. Install dependencies into a new conda environment::
$ conda env update -f ci/environment.yml
#. Run all the tests
Now running tests is as simple as issuing this command::
$ conda activate datatree-dev
$ pytest --junitxml=test-reports/junit.xml --cov=./ --verbose
This command will run tests via the "pytest" tool.
#. You can now edit your local working copy and run the tests again as necessary. Please follow PEP-8 for naming.
When committing, ``pre-commit`` will re-format the files if necessary.
#. Commit and push once your tests pass and you are happy with your change(s)::
$ git commit -a -m ""
$ git push -u
#. Finally, submit a pull request through the GitHub website using this data::
head-fork: YOUR_GITHUB_USERNAME/Datatree
compare: your-branch-name
base-fork: TomNicholas/datatree
base: master
datatree-0.0.14/docs/source/data-structures.rst 0000664 0000000 0000000 00000017166 14552576503 0021530 0 ustar 00root root 0000000 0000000 .. currentmodule:: datatree
.. _data structures:
Data Structures
===============
.. ipython:: python
:suppress:
import numpy as np
import pandas as pd
import xarray as xr
import datatree
np.random.seed(123456)
np.set_printoptions(threshold=10)
%xmode minimal
.. note::
This page builds on the information given in xarray's main page on
`data structures `_, so it is suggested that you
are familiar with those first.
DataTree
--------
:py:class:`DataTree` is xarray's highest-level data structure, able to organise heterogeneous data which
could not be stored inside a single :py:class:`Dataset` object. This includes representing the recursive structure of multiple
`groups`_ within a netCDF file or `Zarr Store`_.
.. _groups: https://www.unidata.ucar.edu/software/netcdf/workshops/2011/groups-types/GroupsIntro.html
.. _Zarr Store: https://zarr.readthedocs.io/en/stable/tutorial.html#groups
Each ``DataTree`` object (or "node") contains the same data that a single ``xarray.Dataset`` would (i.e. ``DataArray`` objects
stored under hashable keys), and so has the same key properties:
- ``dims``: a dictionary mapping of dimension names to lengths, for the variables in this node,
- ``data_vars``: a dict-like container of DataArrays corresponding to variables in this node,
- ``coords``: another dict-like container of DataArrays, corresponding to coordinate variables in this node,
- ``attrs``: dict to hold arbitary metadata relevant to data in this node.
A single ``DataTree`` object acts much like a single ``Dataset`` object, and has a similar set of dict-like methods
defined upon it. However, ``DataTree``'s can also contain other ``DataTree`` objects, so they can be thought of as nested dict-like
containers of both ``xarray.DataArray``'s and ``DataTree``'s.
A single datatree object is known as a "node", and its position relative to other nodes is defined by two more key
properties:
- ``children``: An ordered dictionary mapping from names to other ``DataTree`` objects, known as its' "child nodes".
- ``parent``: The single ``DataTree`` object whose children this datatree is a member of, known as its' "parent node".
Each child automatically knows about its parent node, and a node without a parent is known as a "root" node
(represented by the ``parent`` attribute pointing to ``None``).
Nodes can have multiple children, but as each child node has at most one parent, there can only ever be one root node in a given tree.
The overall structure is technically a `connected acyclic undirected rooted graph`, otherwise known as a
`"Tree" `_.
.. note::
Technically a ``DataTree`` with more than one child node forms an `"Ordered Tree" `_,
because the children are stored in an Ordered Dictionary. However, this distinction only really matters for a few
edge cases involving operations on multiple trees simultaneously, and can safely be ignored by most users.
``DataTree`` objects can also optionally have a ``name`` as well as ``attrs``, just like a ``DataArray``.
Again these are not normally used unless explicitly accessed by the user.
.. _creating a datatree:
Creating a DataTree
~~~~~~~~~~~~~~~~~~~
One way to create a ``DataTree`` from scratch is to create each node individually,
specifying the nodes' relationship to one another as you create each one.
The ``DataTree`` constructor takes:
- ``data``: The data that will be stored in this node, represented by a single ``xarray.Dataset``, or a named ``xarray.DataArray``.
- ``parent``: The parent node (if there is one), given as a ``DataTree`` object.
- ``children``: The various child nodes (if there are any), given as a mapping from string keys to ``DataTree`` objects.
- ``name``: A string to use as the name of this node.
Let's make a single datatree node with some example data in it:
.. ipython:: python
from datatree import DataTree
ds1 = xr.Dataset({"foo": "orange"})
dt = DataTree(name="root", data=ds1) # create root node
dt
At this point our node is also the root node, as every tree has a root node.
We can add a second node to this tree either by referring to the first node in the constructor of the second:
.. ipython:: python
ds2 = xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])})
# add a child by referring to the parent node
node2 = DataTree(name="a", parent=dt, data=ds2)
or by dynamically updating the attributes of one node to refer to another:
.. ipython:: python
# add a second child by first creating a new node ...
ds3 = xr.Dataset({"zed": np.NaN})
node3 = DataTree(name="b", data=ds3)
# ... then updating its .parent property
node3.parent = dt
Our tree now has three nodes within it:
.. ipython:: python
dt
It is at tree construction time that consistency checks are enforced. For instance, if we try to create a `cycle` the constructor will raise an error:
.. ipython:: python
:okexcept:
dt.parent = node3
Alternatively you can also create a ``DataTree`` object from
- An ``xarray.Dataset`` using ``Dataset.to_node()`` (not yet implemented),
- A dictionary mapping directory-like paths to either ``DataTree`` nodes or data, using :py:meth:`DataTree.from_dict()`,
- A netCDF or Zarr file on disk with :py:func:`open_datatree()`. See :ref:`reading and writing files `.
DataTree Contents
~~~~~~~~~~~~~~~~~
Like ``xarray.Dataset``, ``DataTree`` implements the python mapping interface, but with values given by either ``xarray.DataArray`` objects or other ``DataTree`` objects.
.. ipython:: python
dt["a"]
dt["foo"]
Iterating over keys will iterate over both the names of variables and child nodes.
We can also access all the data in a single node through a dataset-like view
.. ipython:: python
dt["a"].ds
This demonstrates the fact that the data in any one node is equivalent to the contents of a single ``xarray.Dataset`` object.
The ``DataTree.ds`` property returns an immutable view, but we can instead extract the node's data contents as a new (and mutable)
``xarray.Dataset`` object via :py:meth:`DataTree.to_dataset()`:
.. ipython:: python
dt["a"].to_dataset()
Like with ``Dataset``, you can access the data and coordinate variables of a node separately via the ``data_vars`` and ``coords`` attributes:
.. ipython:: python
dt["a"].data_vars
dt["a"].coords
Dictionary-like methods
~~~~~~~~~~~~~~~~~~~~~~~
We can update a datatree in-place using Python's standard dictionary syntax, similar to how we can for Dataset objects.
For example, to create this example datatree from scratch, we could have written:
# TODO update this example using ``.coords`` and ``.data_vars`` as setters,
.. ipython:: python
dt = DataTree(name="root")
dt["foo"] = "orange"
dt["a"] = DataTree(data=xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])}))
dt["a/b/zed"] = np.NaN
dt
To change the variables in a node of a ``DataTree``, you can use all the standard dictionary
methods, including ``values``, ``items``, ``__delitem__``, ``get`` and
:py:meth:`DataTree.update`.
Note that assigning a ``DataArray`` object to a ``DataTree`` variable using ``__setitem__`` or ``update`` will
:ref:`automatically align ` the array(s) to the original node's indexes.
If you copy a ``DataTree`` using the :py:func:`copy` function or the :py:meth:`DataTree.copy` method it will copy the subtree,
meaning that node and children below it, but no parents above it.
Like for ``Dataset``, this copy is shallow by default, but you can copy all the underlying data arrays by calling ``dt.copy(deep=True)``.
datatree-0.0.14/docs/source/hierarchical-data.rst 0000664 0000000 0000000 00000054206 14552576503 0021717 0 ustar 00root root 0000000 0000000 .. currentmodule:: datatree
.. _hierarchical-data:
Working With Hierarchical Data
==============================
.. ipython:: python
:suppress:
import numpy as np
import pandas as pd
import xarray as xr
from datatree import DataTree
np.random.seed(123456)
np.set_printoptions(threshold=10)
%xmode minimal
Why Hierarchical Data?
----------------------
Many real-world datasets are composed of multiple differing components,
and it can often be be useful to think of these in terms of a hierarchy of related groups of data.
Examples of data which one might want organise in a grouped or hierarchical manner include:
- Simulation data at multiple resolutions,
- Observational data about the same system but from multiple different types of sensors,
- Mixed experimental and theoretical data,
- A systematic study recording the same experiment but with different parameters,
- Heterogenous data, such as demographic and metereological data,
or even any combination of the above.
Often datasets like this cannot easily fit into a single :py:class:`xarray.Dataset` object,
or are more usefully thought of as groups of related ``xarray.Dataset`` objects.
For this purpose we provide the :py:class:`DataTree` class.
This page explains in detail how to understand and use the different features of the :py:class:`DataTree` class for your own hierarchical data needs.
.. _node relationships:
Node Relationships
------------------
.. _creating a family tree:
Creating a Family Tree
~~~~~~~~~~~~~~~~~~~~~~
The three main ways of creating a ``DataTree`` object are described briefly in :ref:`creating a datatree`.
Here we go into more detail about how to create a tree node-by-node, using a famous family tree from the Simpsons cartoon as an example.
Let's start by defining nodes representing the two siblings, Bart and Lisa Simpson:
.. ipython:: python
bart = DataTree(name="Bart")
lisa = DataTree(name="Lisa")
Each of these node objects knows their own :py:class:`~DataTree.name`, but they currently have no relationship to one another.
We can connect them by creating another node representing a common parent, Homer Simpson:
.. ipython:: python
homer = DataTree(name="Homer", children={"Bart": bart, "Lisa": lisa})
Here we set the children of Homer in the node's constructor.
We now have a small family tree
.. ipython:: python
homer
where we can see how these individual Simpson family members are related to one another.
The nodes representing Bart and Lisa are now connected - we can confirm their sibling rivalry by examining the :py:class:`~DataTree.siblings` property:
.. ipython:: python
list(bart.siblings)
But oops, we forgot Homer's third daughter, Maggie! Let's add her by updating Homer's :py:class:`~DataTree.children` property to include her:
.. ipython:: python
maggie = DataTree(name="Maggie")
homer.children = {"Bart": bart, "Lisa": lisa, "Maggie": maggie}
homer
Let's check that Maggie knows who her Dad is:
.. ipython:: python
maggie.parent.name
That's good - updating the properties of our nodes does not break the internal consistency of our tree, as changes of parentage are automatically reflected on both nodes.
These children obviously have another parent, Marge Simpson, but ``DataTree`` nodes can only have a maximum of one parent.
Genealogical `family trees are not even technically trees `_ in the mathematical sense -
the fact that distant relatives can mate makes it a directed acyclic graph.
Trees of ``DataTree`` objects cannot represent this.
Homer is currently listed as having no parent (the so-called "root node" of this tree), but we can update his :py:class:`~DataTree.parent` property:
.. ipython:: python
abe = DataTree(name="Abe")
homer.parent = abe
Abe is now the "root" of this tree, which we can see by examining the :py:class:`~DataTree.root` property of any node in the tree
.. ipython:: python
maggie.root.name
We can see the whole tree by printing Abe's node or just part of the tree by printing Homer's node:
.. ipython:: python
abe
homer
We can see that Homer is aware of his parentage, and we say that Homer and his children form a "subtree" of the larger Simpson family tree.
In episode 28, Abe Simpson reveals that he had another son, Herbert "Herb" Simpson.
We can add Herbert to the family tree without displacing Homer by :py:meth:`~DataTree.assign`-ing another child to Abe:
.. ipython:: python
herbert = DataTree(name="Herb")
abe.assign({"Herbert": herbert})
.. note::
This example shows a minor subtlety - the returned tree has Homer's brother listed as ``"Herbert"``,
but the original node was named "Herbert". Not only are names overriden when stored as keys like this,
but the new node is a copy, so that the original node that was reference is unchanged (i.e. ``herbert.name == "Herb"`` still).
In other words, nodes are copied into trees, not inserted into them.
This is intentional, and mirrors the behaviour when storing named ``xarray.DataArray`` objects inside datasets.
Certain manipulations of our tree are forbidden, if they would create an inconsistent result.
In episode 51 of the show Futurama, Philip J. Fry travels back in time and accidentally becomes his own Grandfather.
If we try similar time-travelling hijinks with Homer, we get a :py:class:`InvalidTreeError` raised:
.. ipython:: python
:okexcept:
abe.parent = homer
.. _evolutionary tree:
Ancestry in an Evolutionary Tree
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Let's use a different example of a tree to discuss more complex relationships between nodes - the phylogenetic tree, or tree of life.
.. ipython:: python
vertebrates = DataTree.from_dict(
name="Vertebrae",
d={
"/Sharks": None,
"/Bony Skeleton/Ray-finned Fish": None,
"/Bony Skeleton/Four Limbs/Amphibians": None,
"/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Primates": None,
"/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Rodents & Rabbits": None,
"/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Dinosaurs": None,
"/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Birds": None,
},
)
primates = vertebrates["/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Primates"]
dinosaurs = vertebrates[
"/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Dinosaurs"
]
We have used the :py:meth:`~DataTree.from_dict` constructor method as an alternate way to quickly create a whole tree,
and :ref:`filesystem paths` (to be explained shortly) to select two nodes of interest.
.. ipython:: python
vertebrates
This tree shows various families of species, grouped by their common features (making it technically a `"Cladogram" `_,
rather than an evolutionary tree).
Here both the species and the features used to group them are represented by ``DataTree`` node objects - there is no distinction in types of node.
We can however get a list of only the nodes we used to represent species by using the fact that all those nodes have no children - they are "leaf nodes".
We can check if a node is a leaf with :py:meth:`~DataTree.is_leaf`, and get a list of all leaves with the :py:class:`~DataTree.leaves` property:
.. ipython:: python
primates.is_leaf
[node.name for node in vertebrates.leaves]
Pretending that this is a true evolutionary tree for a moment, we can find the features of the evolutionary ancestors (so-called "ancestor" nodes),
the distinguishing feature of the common ancestor of all vertebrate life (the root node),
and even the distinguishing feature of the common ancestor of any two species (the common ancestor of two nodes):
.. ipython:: python
[node.name for node in primates.ancestors]
primates.root.name
primates.find_common_ancestor(dinosaurs).name
We can only find a common ancestor between two nodes that lie in the same tree.
If we try to find the common evolutionary ancestor between primates and an Alien species that has no relationship to Earth's evolutionary tree,
an error will be raised.
.. ipython:: python
:okexcept:
alien = DataTree(name="Xenomorph")
primates.find_common_ancestor(alien)
.. _navigating trees:
Navigating Trees
----------------
There are various ways to access the different nodes in a tree.
Properties
~~~~~~~~~~
We can navigate trees using the :py:class:`~DataTree.parent` and :py:class:`~DataTree.children` properties of each node, for example:
.. ipython:: python
lisa.parent.children["Bart"].name
but there are also more convenient ways to access nodes.
Dictionary-like interface
~~~~~~~~~~~~~~~~~~~~~~~~~
Children are stored on each node as a key-value mapping from name to child node.
They can be accessed and altered via the :py:class:`~DataTree.__getitem__` and :py:class:`~DataTree.__setitem__` syntax.
In general :py:class:`~DataTree.DataTree` objects support almost the entire set of dict-like methods,
including :py:meth:`~DataTree.keys`, :py:class:`~DataTree.values`, :py:class:`~DataTree.items`,
:py:meth:`~DataTree.__delitem__` and :py:meth:`~DataTree.update`.
.. ipython:: python
vertebrates["Bony Skeleton"]["Ray-finned Fish"]
Note that the dict-like interface combines access to child ``DataTree`` nodes and stored ``DataArrays``,
so if we have a node that contains both children and data, calling :py:meth:`~DataTree.keys` will list both names of child nodes and
names of data variables:
.. ipython:: python
dt = DataTree(
data=xr.Dataset({"foo": 0, "bar": 1}),
children={"a": DataTree(), "b": DataTree()},
)
print(dt)
list(dt.keys())
This also means that the names of variables and of child nodes must be different to one another.
Attribute-like access
~~~~~~~~~~~~~~~~~~~~~
You can also select both variables and child nodes through dot indexing
.. ipython:: python
dt.foo
dt.a
.. _filesystem paths:
Filesystem-like Paths
~~~~~~~~~~~~~~~~~~~~~
Hierarchical trees can be thought of as analogous to file systems.
Each node is like a directory, and each directory can contain both more sub-directories and data.
.. note::
You can even make the filesystem analogy concrete by using :py:func:`~DataTree.open_mfdatatree` or :py:func:`~DataTree.save_mfdatatree` # TODO not yet implemented - see GH issue 51
Datatree objects support a syntax inspired by unix-like filesystems,
where the "path" to a node is specified by the keys of each intermediate node in sequence,
separated by forward slashes.
This is an extension of the conventional dictionary ``__getitem__`` syntax to allow navigation across multiple levels of the tree.
Like with filepaths, paths within the tree can either be relative to the current node, e.g.
.. ipython:: python
abe["Homer/Bart"].name
abe["./Homer/Bart"].name # alternative syntax
or relative to the root node.
A path specified from the root (as opposed to being specified relative to an arbitrary node in the tree) is sometimes also referred to as a
`"fully qualified name" `_,
or as an "absolute path".
The root node is referred to by ``"/"``, so the path from the root node to its grand-child would be ``"/child/grandchild"``, e.g.
.. ipython:: python
# absolute path will start from root node
lisa["/Homer/Bart"].name
Relative paths between nodes also support the ``"../"`` syntax to mean the parent of the current node.
We can use this with ``__setitem__`` to add a missing entry to our evolutionary tree, but add it relative to a more familiar node of interest:
.. ipython:: python
primates["../../Two Fenestrae/Crocodiles"] = DataTree()
print(vertebrates)
Given two nodes in a tree, we can also find their relative path:
.. ipython:: python
bart.relative_to(lisa)
You can use this filepath feature to build a nested tree from a dictionary of filesystem-like paths and corresponding ``xarray.Dataset`` objects in a single step.
If we have a dictionary where each key is a valid path, and each value is either valid data or ``None``,
we can construct a complex tree quickly using the alternative constructor :py:meth:`DataTree.from_dict()`:
.. ipython:: python
d = {
"/": xr.Dataset({"foo": "orange"}),
"/a": xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])}),
"/a/b": xr.Dataset({"zed": np.NaN}),
"a/c/d": None,
}
dt = DataTree.from_dict(d)
dt
.. note::
Notice that using the path-like syntax will also create any intermediate empty nodes necessary to reach the end of the specified path
(i.e. the node labelled `"c"` in this case.)
This is to help avoid lots of redundant entries when creating deeply-nested trees using :py:meth:`DataTree.from_dict`.
.. _iterating over trees:
Iterating over trees
~~~~~~~~~~~~~~~~~~~~
You can iterate over every node in a tree using the subtree :py:class:`~DataTree.subtree` property.
This returns an iterable of nodes, which yields them in depth-first order.
.. ipython:: python
for node in vertebrates.subtree:
print(node.path)
A very useful pattern is to use :py:class:`~DataTree.subtree` conjunction with the :py:class:`~DataTree.path` property to manipulate the nodes however you wish,
then rebuild a new tree using :py:meth:`DataTree.from_dict()`.
For example, we could keep only the nodes containing data by looping over all nodes,
checking if they contain any data using :py:class:`~DataTree.has_data`,
then rebuilding a new tree using only the paths of those nodes:
.. ipython:: python
non_empty_nodes = {node.path: node.ds for node in dt.subtree if node.has_data}
DataTree.from_dict(non_empty_nodes)
You can see this tree is similar to the ``dt`` object above, except that it is missing the empty nodes ``a/c`` and ``a/c/d``.
(If you want to keep the name of the root node, you will need to add the ``name`` kwarg to :py:class:`from_dict`, i.e. ``DataTree.from_dict(non_empty_nodes, name=dt.root.name)``.)
.. _manipulating trees:
Manipulating Trees
------------------
Subsetting Tree Nodes
~~~~~~~~~~~~~~~~~~~~~
We can subset our tree to select only nodes of interest in various ways.
Similarly to on a real filesystem, matching nodes by common patterns in their paths is often useful.
We can use :py:meth:`DataTree.match` for this:
.. ipython:: python
dt = DataTree.from_dict(
{
"/a/A": None,
"/a/B": None,
"/b/A": None,
"/b/B": None,
}
)
result = dt.match("*/B")
result
We can also subset trees by the contents of the nodes.
:py:meth:`DataTree.filter` retains only the nodes of a tree that meet a certain condition.
For example, we could recreate the Simpson's family tree with the ages of each individual, then filter for only the adults:
First lets recreate the tree but with an `age` data variable in every node:
.. ipython:: python
simpsons = DataTree.from_dict(
d={
"/": xr.Dataset({"age": 83}),
"/Herbert": xr.Dataset({"age": 40}),
"/Homer": xr.Dataset({"age": 39}),
"/Homer/Bart": xr.Dataset({"age": 10}),
"/Homer/Lisa": xr.Dataset({"age": 8}),
"/Homer/Maggie": xr.Dataset({"age": 1}),
},
name="Abe",
)
simpsons
Now let's filter out the minors:
.. ipython:: python
simpsons.filter(lambda node: node["age"] > 18)
The result is a new tree, containing only the nodes matching the condition.
(Yes, under the hood :py:meth:`~DataTree.filter` is just syntactic sugar for the pattern we showed you in :ref:`iterating over trees` !)
.. _Tree Contents:
Tree Contents
-------------
Hollow Trees
~~~~~~~~~~~~
A concept that can sometimes be useful is that of a "Hollow Tree", which means a tree with data stored only at the leaf nodes.
This is useful because certain useful tree manipulation operations only make sense for hollow trees.
You can check if a tree is a hollow tree by using the :py:class:`~DataTree.is_hollow` property.
We can see that the Simpson's family is not hollow because the data variable ``"age"`` is present at some nodes which
have children (i.e. Abe and Homer).
.. ipython:: python
simpsons.is_hollow
.. _tree computation:
Computation
-----------
`DataTree` objects are also useful for performing computations, not just for organizing data.
Operations and Methods on Trees
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To show how applying operations across a whole tree at once can be useful,
let's first create a example scientific dataset.
.. ipython:: python
def time_stamps(n_samples, T):
"""Create an array of evenly-spaced time stamps"""
return xr.DataArray(
data=np.linspace(0, 2 * np.pi * T, n_samples), dims=["time"]
)
def signal_generator(t, f, A, phase):
"""Generate an example electrical-like waveform"""
return A * np.sin(f * t.data + phase)
time_stamps1 = time_stamps(n_samples=15, T=1.5)
time_stamps2 = time_stamps(n_samples=10, T=1.0)
voltages = DataTree.from_dict(
{
"/oscilloscope1": xr.Dataset(
{
"potential": (
"time",
signal_generator(time_stamps1, f=2, A=1.2, phase=0.5),
),
"current": (
"time",
signal_generator(time_stamps1, f=2, A=1.2, phase=1),
),
},
coords={"time": time_stamps1},
),
"/oscilloscope2": xr.Dataset(
{
"potential": (
"time",
signal_generator(time_stamps2, f=1.6, A=1.6, phase=0.2),
),
"current": (
"time",
signal_generator(time_stamps2, f=1.6, A=1.6, phase=0.7),
),
},
coords={"time": time_stamps2},
),
}
)
voltages
Most xarray computation methods also exist as methods on datatree objects,
so you can for example take the mean value of these two timeseries at once:
.. ipython:: python
voltages.mean(dim="time")
This works by mapping the standard :py:meth:`xarray.Dataset.mean()` method over the dataset stored in each node of the
tree one-by-one.
The arguments passed to the method are used for every node, so the values of the arguments you pass might be valid for one node and invalid for another
.. ipython:: python
:okexcept:
voltages.isel(time=12)
Notice that the error raised helpfully indicates which node of the tree the operation failed on.
Arithmetic Methods on Trees
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Arithmetic methods are also implemented, so you can e.g. add a scalar to every dataset in the tree at once.
For example, we can advance the timeline of the Simpsons by a decade just by
.. ipython:: python
simpsons + 10
See that the same change (fast-forwarding by adding 10 years to the age of each character) has been applied to every node.
Mapping Custom Functions Over Trees
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
You can map custom computation over each node in a tree using :py:meth:`DataTree.map_over_subtree`.
You can map any function, so long as it takes `xarray.Dataset` objects as one (or more) of the input arguments,
and returns one (or more) xarray datasets.
.. note::
Functions passed to :py:func:`map_over_subtree` cannot alter nodes in-place.
Instead they must return new `xarray.Dataset` objects.
For example, we can define a function to calculate the Root Mean Square of a timeseries
.. ipython:: python
def rms(signal):
return np.sqrt(np.mean(signal**2))
Then calculate the RMS value of these signals:
.. ipython:: python
voltages.map_over_subtree(rms)
.. _multiple trees:
We can also use the :py:func:`map_over_subtree` decorator to promote a function which accepts datasets into one which
accepts datatrees.
Operating on Multiple Trees
---------------------------
The examples so far have involved mapping functions or methods over the nodes of a single tree,
but we can generalize this to mapping functions over multiple trees at once.
Comparing Trees for Isomorphism
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
For it to make sense to map a single non-unary function over the nodes of multiple trees at once,
each tree needs to have the same structure. Specifically two trees can only be considered similar, or "isomorphic",
if they have the same number of nodes, and each corresponding node has the same number of children.
We can check if any two trees are isomorphic using the :py:meth:`DataTree.isomorphic` method.
.. ipython:: python
:okexcept:
dt1 = DataTree.from_dict({"a": None, "a/b": None})
dt2 = DataTree.from_dict({"a": None})
dt1.isomorphic(dt2)
dt3 = DataTree.from_dict({"a": None, "b": None})
dt1.isomorphic(dt3)
dt4 = DataTree.from_dict({"A": None, "A/B": xr.Dataset({"foo": 1})})
dt1.isomorphic(dt4)
If the trees are not isomorphic a :py:class:`~TreeIsomorphismError` will be raised.
Notice that corresponding tree nodes do not need to have the same name or contain the same data in order to be considered isomorphic.
Arithmetic Between Multiple Trees
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Arithmetic operations like multiplication are binary operations, so as long as we have two isomorphic trees,
we can do arithmetic between them.
.. ipython:: python
currents = DataTree.from_dict(
{
"/oscilloscope1": xr.Dataset(
{
"current": (
"time",
signal_generator(time_stamps1, f=2, A=1.2, phase=1),
),
},
coords={"time": time_stamps1},
),
"/oscilloscope2": xr.Dataset(
{
"current": (
"time",
signal_generator(time_stamps2, f=1.6, A=1.6, phase=0.7),
),
},
coords={"time": time_stamps2},
),
}
)
currents
currents.isomorphic(voltages)
We could use this feature to quickly calculate the electrical power in our signal, P=IV.
.. ipython:: python
power = currents * voltages
power
datatree-0.0.14/docs/source/index.rst 0000664 0000000 0000000 00000005514 14552576503 0017477 0 ustar 00root root 0000000 0000000 .. currentmodule:: datatree
Datatree
========
**Datatree is a prototype implementation of a tree-like hierarchical data structure for xarray.**
Why Datatree?
~~~~~~~~~~~~~
Datatree was born after the xarray team recognised a `need for a new hierarchical data structure `_,
that was more flexible than a single :py:class:`xarray.Dataset` object.
The initial motivation was to represent netCDF files / Zarr stores with multiple nested groups in a single in-memory object,
but :py:class:`~datatree.DataTree` objects have many other uses.
You might want to use datatree for:
- Organising many related datasets, e.g. results of the same experiment with different parameters, or simulations of the same system using different models,
- Analysing similar data at multiple resolutions simultaneously, such as when doing a convergence study,
- Comparing heterogenous but related data, such as experimental and theoretical data,
- I/O with nested data formats such as netCDF / Zarr groups.
Development Roadmap
~~~~~~~~~~~~~~~~~~~
Datatree currently lives in a separate repository to the main xarray package.
This allows the datatree developers to make changes to it, experiment, and improve it faster.
Eventually we plan to fully integrate datatree upstream into xarray's main codebase, at which point the `github.com/xarray-contrib/datatree `_ repository will be archived.
This should not cause much disruption to code that depends on datatree - you will likely only have to change the import line (i.e. from ``from datatree import DataTree`` to ``from xarray import DataTree``).
However, until this full integration occurs, datatree's API should not be considered to have the same `level of stability as xarray's `_.
User Feedback
~~~~~~~~~~~~~
We really really really want to hear your opinions on datatree!
At this point in development, user feedback is critical to help us create something that will suit everyone's needs.
Please raise any thoughts, issues, suggestions or bugs, no matter how small or large, on the `github issue tracker `_.
.. toctree::
:maxdepth: 2
:caption: Documentation Contents
Installation
Quick Overview
Tutorial
Data Model
Hierarchical Data
Reading and Writing Files
API Reference
Terminology
Contributing Guide
What's New
GitHub repository
Feedback
--------
If you encounter any errors, problems with **Datatree**, or have any suggestions, please open an issue
on `GitHub `_.
datatree-0.0.14/docs/source/installation.rst 0000664 0000000 0000000 00000002150 14552576503 0021062 0 ustar 00root root 0000000 0000000 .. currentmodule:: datatree
============
Installation
============
Datatree can be installed in three ways:
Using the `conda `__ package manager that comes with the
Anaconda/Miniconda distribution:
.. code:: bash
$ conda install xarray-datatree --channel conda-forge
Using the `pip `__ package manager:
.. code:: bash
$ python -m pip install xarray-datatree
To install a development version from source:
.. code:: bash
$ git clone https://github.com/xarray-contrib/datatree
$ cd datatree
$ python -m pip install -e .
You will just need xarray as a required dependency, with netcdf4, zarr, and h5netcdf as optional dependencies to allow file I/O.
.. note::
Datatree is very much still in the early stages of development. There may be functions that are present but whose
internals are not yet implemented, or significant changes to the API in future.
That said, if you try it out and find some behaviour that looks like a bug to you, please report it on the
`issue tracker `_!
datatree-0.0.14/docs/source/io.rst 0000664 0000000 0000000 00000004237 14552576503 0017000 0 ustar 00root root 0000000 0000000 .. currentmodule:: datatree
.. _io:
Reading and Writing Files
=========================
.. note::
This page builds on the information given in xarray's main page on
`reading and writing files `_,
so it is suggested that you are familiar with those first.
netCDF
------
Groups
~~~~~~
Whilst netCDF groups can only be loaded individually as Dataset objects, a whole file of many nested groups can be loaded
as a single :py:class:`DataTree` object.
To open a whole netCDF file as a tree of groups use the :py:func:`open_datatree` function.
To save a DataTree object as a netCDF file containing many groups, use the :py:meth:`DataTree.to_netcdf` method.
.. _netcdf.group.warning:
.. warning::
``DataTree`` objects do not follow the exact same data model as netCDF files, which means that perfect round-tripping
is not always possible.
In particular in the netCDF data model dimensions are entities that can exist regardless of whether any variable possesses them.
This is in contrast to `xarray's data model `_
(and hence :ref:`datatree's data model `) in which the dimensions of a (Dataset/Tree)
object are simply the set of dimensions present across all variables in that dataset.
This means that if a netCDF file contains dimensions but no variables which possess those dimensions,
these dimensions will not be present when that file is opened as a DataTree object.
Saving this DataTree object to file will therefore not preserve these "unused" dimensions.
Zarr
----
Groups
~~~~~~
Nested groups in zarr stores can be represented by loading the store as a :py:class:`DataTree` object, similarly to netCDF.
To open a whole zarr store as a tree of groups use the :py:func:`open_datatree` function.
To save a DataTree object as a zarr store containing many groups, use the :py:meth:`DataTree.to_zarr()` method.
.. note::
Note that perfect round-tripping should always be possible with a zarr store (:ref:`unlike for netCDF files `),
as zarr does not support "unused" dimensions.
datatree-0.0.14/docs/source/quick-overview.rst 0000664 0000000 0000000 00000006176 14552576503 0021355 0 ustar 00root root 0000000 0000000 .. currentmodule:: datatree
##############
Quick overview
##############
DataTrees
---------
:py:class:`DataTree` is a tree-like container of :py:class:`xarray.DataArray` objects, organised into multiple mutually alignable groups.
You can think of it like a (recursive) ``dict`` of :py:class:`xarray.Dataset` objects.
Let's first make some example xarray datasets (following on from xarray's
`quick overview `_ page):
.. ipython:: python
import numpy as np
import xarray as xr
data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]})
ds = xr.Dataset(dict(foo=data, bar=("x", [1, 2]), baz=np.pi))
ds
ds2 = ds.interp(coords={"x": [10, 12, 14, 16, 18, 20]})
ds2
ds3 = xr.Dataset(
dict(people=["alice", "bob"], heights=("people", [1.57, 1.82])),
coords={"species": "human"},
)
ds3
Now we'll put this data into a multi-group tree:
.. ipython:: python
from datatree import DataTree
dt = DataTree.from_dict({"simulation/coarse": ds, "simulation/fine": ds2, "/": ds3})
dt
This creates a datatree with various groups. We have one root group, containing information about individual people.
(This root group can be named, but here is unnamed, so is referred to with ``"/"``, same as the root of a unix-like filesystem.)
The root group then has one subgroup ``simulation``, which contains no data itself but does contain another two subgroups,
named ``fine`` and ``coarse``.
The (sub-)sub-groups ``fine`` and ``coarse`` contain two very similar datasets.
They both have an ``"x"`` dimension, but the dimension is of different lengths in each group, which makes the data in each group unalignable.
In the root group we placed some completely unrelated information, showing how we can use a tree to store heterogenous data.
The constraints on each group are therefore the same as the constraint on dataarrays within a single dataset.
We created the sub-groups using a filesystem-like syntax, and accessing groups works the same way.
We can access individual dataarrays in a similar fashion
.. ipython:: python
dt["simulation/coarse/foo"]
and we can also pull out the data in a particular group as a ``Dataset`` object using ``.ds``:
.. ipython:: python
dt["simulation/coarse"].ds
Operations map over subtrees, so we can take a mean over the ``x`` dimension of both the ``fine`` and ``coarse`` groups just by
.. ipython:: python
avg = dt["simulation"].mean(dim="x")
avg
Here the ``"x"`` dimension used is always the one local to that sub-group.
You can do almost everything you can do with ``Dataset`` objects with ``DataTree`` objects
(including indexing and arithmetic), as operations will be mapped over every sub-group in the tree.
This allows you to work with multiple groups of non-alignable variables at once.
.. note::
If all of your variables are mutually alignable
(i.e. they live on the same grid, such that every common dimension name maps to the same length),
then you probably don't need :py:class:`DataTree`, and should consider just sticking with ``xarray.Dataset``.
datatree-0.0.14/docs/source/terminology.rst 0000664 0000000 0000000 00000003460 14552576503 0020736 0 ustar 00root root 0000000 0000000 .. currentmodule:: datatree
.. _terminology:
This page extends `xarray's page on terminology `_.
Terminology
===========
.. glossary::
DataTree
A tree-like collection of ``Dataset`` objects. A *tree* is made up of one or more *nodes*,
each of which can store the same information as a single ``Dataset`` (accessed via `.ds`).
This data is stored in the same way as in a ``Dataset``, i.e. in the form of data variables
(see **Variable** in the `corresponding xarray terminology page `_),
dimensions, coordinates, and attributes.
The nodes in a tree are linked to one another, and each node is it's own instance of ``DataTree`` object.
Each node can have zero or more *children* (stored in a dictionary-like manner under their corresponding *names*),
and those child nodes can themselves have children.
If a node is a child of another node that other node is said to be its *parent*. Nodes can have a maximum of one parent,
and if a node has no parent it is said to be the *root* node of that *tree*.
Subtree
A section of a *tree*, consisting of a *node* along with all the child nodes below it
(and the child nodes below them, i.e. all so-called *descendant* nodes).
Excludes the parent node and all nodes above.
Group
Another word for a subtree, reflecting how the hierarchical structure of a ``DataTree`` allows for grouping related data together.
Analogous to a single
`netCDF group `_ or
`Zarr group `_.
datatree-0.0.14/docs/source/tutorial.rst 0000664 0000000 0000000 00000000106 14552576503 0020223 0 ustar 00root root 0000000 0000000 .. currentmodule:: datatree
========
Tutorial
========
Coming soon!
datatree-0.0.14/docs/source/whats-new.rst 0000664 0000000 0000000 00000034334 14552576503 0020307 0 ustar 00root root 0000000 0000000 .. currentmodule:: datatree
What's New
==========
.. ipython:: python
:suppress:
import numpy as np
import pandas as pd
import xarray as xray
import xarray
import xarray as xr
import datatree
np.random.seed(123456)
.. _whats-new.v0.0.14:
v0.0.14 (unreleased)
--------------------
New Features
~~~~~~~~~~~~
Breaking changes
~~~~~~~~~~~~~~~~
- Renamed `DataTree.lineage` to `DataTree.parents` to match `pathlib` vocabulary
(:issue:`283`, :pull:`286`)
- Minimum required version of xarray is now 2023.12.0, i.e. the latest version.
This is required to prevent recent changes to xarray's internals from breaking datatree.
(:issue:`293`, :pull:`294`)
By `Tom Nicholas `_.
- Change default write mode of :py:meth:`DataTree.to_zarr` to ``'w-'`` to match ``xarray``
default and prevent accidental directory overwrites. (:issue:`274`, :pull:`275`)
By `Sam Levang `_.
Deprecations
~~~~~~~~~~~~
- Renamed `DataTree.lineage` to `DataTree.parents` to match `pathlib` vocabulary
(:issue:`283`, :pull:`286`). `lineage` is now deprecated and use of `parents` is encouraged.
By `Etienne Schalk `_.
Bug fixes
~~~~~~~~~
- Keep attributes on nodes containing no data in :py:func:`map_over_subtree`. (:issue:`278`, :pull:`279`)
By `Sam Levang `_.
Documentation
~~~~~~~~~~~~~
- Use ``napoleon`` instead of ``numpydoc`` to align with xarray documentation
(:issue:`284`, :pull:`298`).
By `Etienne Schalk `_.
Internal Changes
~~~~~~~~~~~~~~~~
.. _whats-new.v0.0.13:
v0.0.13 (27/10/2023)
--------------------
New Features
~~~~~~~~~~~~
- New :py:meth:`DataTree.match` method for glob-like pattern matching of node paths. (:pull:`267`)
By `Tom Nicholas `_.
- New :py:meth:`DataTree.is_hollow` property for checking if data is only contained at the leaf nodes. (:pull:`272`)
By `Tom Nicholas `_.
- Indicate which node caused the problem if error encountered while applying user function using :py:func:`map_over_subtree`
(:issue:`190`, :pull:`264`). Only works when using python 3.11 or later.
By `Tom Nicholas `_.
Breaking changes
~~~~~~~~~~~~~~~~
- Nodes containing only attributes but no data are now ignored by :py:func:`map_over_subtree` (:issue:`262`, :pull:`263`)
By `Tom Nicholas `_.
- Disallow altering of given dataset inside function called by :py:func:`map_over_subtree` (:pull:`269`, reverts part of :pull:`194`).
By `Tom Nicholas `_.
Bug fixes
~~~~~~~~~
- Fix unittests on i386. (:pull:`249`)
By `Antonio Valentino `_.
- Ensure nodepath class is compatible with python 3.12 (:pull:`260`)
By `Max Grover `_.
Documentation
~~~~~~~~~~~~~
- Added new sections to page on ``Working with Hierarchical Data`` (:pull:`180`)
By `Tom Nicholas `_.
Internal Changes
~~~~~~~~~~~~~~~~
* No longer use the deprecated `distutils` package.
.. _whats-new.v0.0.12:
v0.0.12 (03/07/2023)
--------------------
New Features
~~~~~~~~~~~~
- Added a :py:func:`DataTree.level`, :py:func:`DataTree.depth`, and :py:func:`DataTree.width` property (:pull:`208`).
By `Tom Nicholas `_.
- Allow dot-style (or "attribute-like") access to child nodes and variables, with ipython autocomplete. (:issue:`189`, :pull:`98`)
By `Tom Nicholas `_.
Breaking changes
~~~~~~~~~~~~~~~~
Deprecations
~~~~~~~~~~~~
- Dropped support for python 3.8 (:issue:`212`, :pull:`214`)
By `Tom Nicholas `_.
Bug fixes
~~~~~~~~~
- Allow for altering of given dataset inside function called by :py:func:`map_over_subtree` (:issue:`188`, :pull:`194`).
By `Tom Nicholas `_.
- copy subtrees without creating ancestor nodes (:pull:`201`)
By `Justus Magin `_.
Documentation
~~~~~~~~~~~~~
Internal Changes
~~~~~~~~~~~~~~~~
.. _whats-new.v0.0.11:
v0.0.11 (01/09/2023)
--------------------
Big update with entirely new pages in the docs,
new methods (``.drop_nodes``, ``.filter``, ``.leaves``, ``.descendants``), and bug fixes!
New Features
~~~~~~~~~~~~
- Added a :py:meth:`DataTree.drop_nodes` method (:issue:`161`, :pull:`175`).
By `Tom Nicholas `_.
- New, more specific exception types for tree-related errors (:pull:`169`).
By `Tom Nicholas `_.
- Added a new :py:meth:`DataTree.descendants` property (:pull:`170`).
By `Tom Nicholas `_.
- Added a :py:meth:`DataTree.leaves` property (:pull:`177`).
By `Tom Nicholas `_.
- Added a :py:meth:`DataTree.filter` method (:pull:`184`).
By `Tom Nicholas `_.
Breaking changes
~~~~~~~~~~~~~~~~
- :py:meth:`DataTree.copy` copy method now only copies the subtree, not the parent nodes (:pull:`171`).
By `Tom Nicholas `_.
- Grafting a subtree onto another tree now leaves name of original subtree object unchanged (:issue:`116`, :pull:`172`, :pull:`178`).
By `Tom Nicholas `_.
- Changed the :py:meth:`DataTree.assign` method to just work on the local node (:pull:`181`).
By `Tom Nicholas `_.
Deprecations
~~~~~~~~~~~~
Bug fixes
~~~~~~~~~
- Fix bug with :py:meth:`DataTree.relative_to` method (:issue:`133`, :pull:`160`).
By `Tom Nicholas `_.
- Fix links to API docs in all documentation (:pull:`183`).
By `Tom Nicholas `_.
Documentation
~~~~~~~~~~~~~
- Changed docs theme to match xarray's main documentation. (:pull:`173`)
By `Tom Nicholas `_.
- Added ``Terminology`` page. (:pull:`174`)
By `Tom Nicholas `_.
- Added page on ``Working with Hierarchical Data`` (:pull:`179`)
By `Tom Nicholas `_.
- Added context content to ``Index`` page (:pull:`182`)
By `Tom Nicholas `_.
- Updated the README (:pull:`187`)
By `Tom Nicholas `_.
Internal Changes
~~~~~~~~~~~~~~~~
.. _whats-new.v0.0.10:
v0.0.10 (12/07/2022)
--------------------
Adds accessors and a `.pipe()` method.
New Features
~~~~~~~~~~~~
- Add the ability to register accessors on ``DataTree`` objects, by using ``register_datatree_accessor``. (:pull:`144`)
By `Tom Nicholas `_.
- Allow method chaining with a new :py:meth:`DataTree.pipe` method (:issue:`151`, :pull:`156`).
By `Justus Magin `_.
Breaking changes
~~~~~~~~~~~~~~~~
Deprecations
~~~~~~~~~~~~
Bug fixes
~~~~~~~~~
- Allow ``Datatree`` objects as values in :py:meth:`DataTree.from_dict` (:pull:`159`).
By `Justus Magin `_.
Documentation
~~~~~~~~~~~~~
- Added ``Reading and Writing Files`` page. (:pull:`158`)
By `Tom Nicholas `_.
Internal Changes
~~~~~~~~~~~~~~~~
- Avoid reading from same file twice with fsspec3 (:pull:`130`)
By `William Roberts `_.
.. _whats-new.v0.0.9:
v0.0.9 (07/14/2022)
-------------------
New Features
~~~~~~~~~~~~
Breaking changes
~~~~~~~~~~~~~~~~
Deprecations
~~~~~~~~~~~~
Bug fixes
~~~~~~~~~
Documentation
~~~~~~~~~~~~~
- Switch docs theme (:pull:`123`).
By `JuliusBusecke `_.
Internal Changes
~~~~~~~~~~~~~~~~
.. _whats-new.v0.0.7:
v0.0.7 (07/11/2022)
-------------------
New Features
~~~~~~~~~~~~
- Improve the HTML repr by adding tree-style lines connecting groups and sub-groups (:pull:`109`).
By `Benjamin Woods `_.
Breaking changes
~~~~~~~~~~~~~~~~
- The ``DataTree.ds`` attribute now returns a view onto an immutable Dataset-like object, instead of an actual instance
of ``xarray.Dataset``. This make break existing ``isinstance`` checks or ``assert`` comparisons. (:pull:`99`)
By `Tom Nicholas `_.
Deprecations
~~~~~~~~~~~~
Bug fixes
~~~~~~~~~
- Modifying the contents of a ``DataTree`` object via the ``DataTree.ds`` attribute is now forbidden, which prevents
any possibility of the contents of a ``DataTree`` object and its ``.ds`` attribute diverging. (:issue:`38`, :pull:`99`)
By `Tom Nicholas `_.
- Fixed a bug so that names of children now always match keys under which parents store them (:pull:`99`).
By `Tom Nicholas `_.
Documentation
~~~~~~~~~~~~~
- Added ``Data Structures`` page describing the internal structure of a ``DataTree`` object, and its relation to
``xarray.Dataset`` objects. (:pull:`103`)
By `Tom Nicholas `_.
- API page updated with all the methods that are copied from ``xarray.Dataset``. (:pull:`41`)
By `Tom Nicholas `_.
Internal Changes
~~~~~~~~~~~~~~~~
- Refactored ``DataTree`` class to store a set of ``xarray.Variable`` objects instead of a single ``xarray.Dataset``.
This approach means that the ``DataTree`` class now effectively copies and extends the internal structure of
``xarray.Dataset``. (:pull:`41`)
By `Tom Nicholas `_.
- Refactored to use intermediate ``NamedNode`` class, separating implementation of methods requiring a ``name``
attribute from those not requiring it.
By `Tom Nicholas `_.
- Made ``testing.test_datatree.create_test_datatree`` into a pytest fixture (:pull:`107`).
By `Benjamin Woods `_.
.. _whats-new.v0.0.6:
v0.0.6 (06/03/2022)
-------------------
Various small bug fixes, in preparation for more significant changes in the next version.
Bug fixes
~~~~~~~~~
- Fixed bug with checking that assigning parent or new children did not create a loop in the tree (:pull:`105`)
By `Tom Nicholas `_.
- Do not call ``__exit__`` on Zarr store when opening (:pull:`90`)
By `Matt McCormick `_.
- Fix netCDF encoding for compression (:pull:`95`)
By `Joe Hamman `_.
- Added validity checking for node names (:pull:`106`)
By `Tom Nicholas `_.
.. _whats-new.v0.0.5:
v0.0.5 (05/05/2022)
-------------------
- Major refactor of internals, moving from the ``DataTree.children`` attribute being a ``Tuple[DataTree]`` to being a
``OrderedDict[str, DataTree]``. This was necessary in order to integrate better with xarray's dictionary-like API,
solve several issues, simplify the code internally, remove dependencies, and enable new features. (:pull:`76`)
By `Tom Nicholas `_.
New Features
~~~~~~~~~~~~
- Syntax for accessing nodes now supports file-like paths, including parent nodes via ``"../"``, relative paths, the
root node via ``"/"``, and the current node via ``"."``. (Internally it actually uses ``pathlib`` now.)
By `Tom Nicholas `_.
- New path-like API methods, such as ``.relative_to``, ``.find_common_ancestor``, and ``.same_tree``.
- Some new dictionary-like methods, such as ``DataTree.get`` and ``DataTree.update``. (:pull:`76`)
By `Tom Nicholas `_.
- New HTML repr, which will automatically display in a jupyter notebook. (:pull:`78`)
By `Tom Nicholas `_.
- New delitem method so you can delete nodes. (:pull:`88`)
By `Tom Nicholas `_.
- New ``to_dict`` method. (:pull:`82`)
By `Tom Nicholas `_.
Breaking changes
~~~~~~~~~~~~~~~~
- Node names are now optional, which means that the root of the tree can be unnamed. This has knock-on effects for
a lot of the API.
- The ``__init__`` signature for ``DataTree`` has changed, so that ``name`` is now an optional kwarg.
- Files will now be loaded as a slightly different tree, because the root group no longer needs to be given a default
name.
- Removed tag-like access to nodes.
- Removes the option to delete all data in a node by assigning None to the node (in favour of deleting data by replacing
the node's ``.ds`` attribute with an empty Dataset), or to create a new empty node in the same way (in favour of
assigning an empty DataTree object instead).
- Removes the ability to create a new node by assigning a ``Dataset`` object to ``DataTree.__setitem__``.
- Several other minor API changes such as ``.pathstr`` -> ``.path``, and ``from_dict``'s dictionary argument now being
required. (:pull:`76`)
By `Tom Nicholas `_.
Deprecations
~~~~~~~~~~~~
- No longer depends on the anytree library (:pull:`76`)
By `Tom Nicholas `_.
Bug fixes
~~~~~~~~~
- Fixed indentation issue with the string repr (:pull:`86`)
By `Tom Nicholas `_.
Documentation
~~~~~~~~~~~~~
- Quick-overview page updated to match change in path syntax (:pull:`76`)
By `Tom Nicholas `_.
Internal Changes
~~~~~~~~~~~~~~~~
- Basically every file was changed in some way to accommodate (:pull:`76`).
- No longer need the utility functions for string manipulation that were defined in ``utils.py``.
- A considerable amount of code copied over from the internals of anytree (e.g. in ``render.py`` and ``iterators.py``).
The Apache license for anytree has now been bundled with datatree. (:pull:`76`).
By `Tom Nicholas `_.
.. _whats-new.v0.0.4:
v0.0.4 (31/03/2022)
-------------------
- Ensure you get the pretty tree-like string representation by default in ipython (:pull:`73`).
By `Tom Nicholas `_.
- Now available on conda-forge (as xarray-datatree)! (:pull:`71`)
By `Anderson Banihirwe `_.
- Allow for python 3.8 (:pull:`70`).
By `Don Setiawan `_.
.. _whats-new.v0.0.3:
v0.0.3 (30/03/2022)
-------------------
- First released version available on both pypi (as xarray-datatree)!
datatree-0.0.14/pyproject.toml 0000664 0000000 0000000 00000002755 14552576503 0016326 0 ustar 00root root 0000000 0000000 [project]
name = "xarray-datatree"
description = "Hierarchical tree-like data structures for xarray"
readme = "README.md"
authors = [
{name = "Thomas Nicholas", email = "thomas.nicholas@columbia.edu"}
]
license = {text = "Apache-2"}
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
]
requires-python = ">=3.9"
dependencies = [
"xarray >=2023.12.0",
"packaging",
]
dynamic = ["version"]
[project.urls]
Home = "https://github.com/xarray-contrib/datatree"
Documentation = "https://xarray-datatree.readthedocs.io/en/stable/"
[build-system]
requires = [
"setuptools>=61.0.0",
"wheel",
"setuptools_scm[toml]>=7.0",
"check-manifest"
]
[tool.setuptools_scm]
write_to = "datatree/_version.py"
write_to_template = '''
# Do not change! Do not track in version control!
__version__ = "{version}"
'''
[tool.setuptools.packages.find]
exclude = ["docs", "tests", "tests.*", "docs.*"]
[tool.setuptools.package-data]
datatree = ["py.typed"]
[tool.isort]
profile = "black"
skip_gitignore = true
float_to_top = true
default_section = "THIRDPARTY"
known_first_party = "datatree"
[mypy]
files = "datatree/**/*.py"
show_error_codes = true
datatree-0.0.14/readthedocs.yml 0000664 0000000 0000000 00000000157 14552576503 0016414 0 ustar 00root root 0000000 0000000 version: 2
conda:
environment: ci/doc.yml
build:
os: 'ubuntu-20.04'
tools:
python: 'mambaforge-4.10'