././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1699387091.342064 h5netcdf-1.3.0/0000755000175100001770000000000014522513323012620 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/.pre-commit-config.yaml0000644000175100001770000000134014522513274017104 0ustar00runnerdockerrepos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-docstring-first - id: check-toml - id: check-yaml - id: debug-statements - id: mixed-line-ending - repo: https://github.com/charliermarsh/ruff-pre-commit rev: 'v0.0.291' hooks: - id: ruff args: [ "--fix" ] - repo: https://github.com/psf/black rev: 23.9.1 hooks: - id: black - repo: https://github.com/adamchainz/blacken-docs rev: "1.16.0" hooks: - id: blacken-docs additional_dependencies: - black==23.9.1 ci: autoupdate_commit_msg: 'MNT: update pre-commit hooks' ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/AUTHORS.txt0000644000175100001770000000042014522513274014507 0ustar00runnerdockerThe h5netcdf developers: * Stephan Hoyer (main h5netcdf author) * Kai Mühlbauer (current maintainer) * Developers that have contributed to the h5netcdf repository: ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/CHANGELOG.rst0000644000175100001770000002370514522513274014655 0ustar00runnerdockerChange Log ---------- Version 1.3.0 (November 7th, 2023): - Add ros3 support by checking `driver`-kwarg. By `Ezequiel Cimadevilla Alvarez `_ - Code and CI maintenance. By `Mark Harfouche `_ and `Kai Mühlbauer `_. Version 1.2.0 (June 2nd, 2023): - Remove h5py2 compatibility code, remove h5py2 CI runs, mention NEP29 as upstream dependency support strategy. By `Kai Mühlbauer `_ and `Mark Harfouche `_. - Update to pyproject.toml-only build process, adapt CI, use `ruff` for linting, add .pre-commit-config.yaml. By `Kai Mühlbauer `_. - Maintenance CI (use setup-micromamba), fix hsds, fix tests, fix license. By `Kai Mühlbauer `_. - Raise early with h5py-error. By `Kai Mühlbauer `_. - Add simple test to ensure that the shape is stored in the coordinates. By `Mark Harfouche `_. Version 1.1.0 (November 23rd, 2022): - Rework adding _FillValue-attribute, add tests. By `Kai Mühlbauer `_. - Add special add_phony method for creating phony dimensions, add test. By `Kai Mühlbauer `_. - Rewrite _unlabeled_dimension_mix (labeled/unlabeled), add tests. By `Kai Mühlbauer `_. - Add default netcdf fillvalues, pad only if necessary, adapt tests. By `Kai Mühlbauer `_. - Fix regression in padding algorithm, add test. By `Kai Mühlbauer `_. - Set ``track_order=True`` by default in created files if h5py 3.7.0 or greater is detected to help compatibility with netCDF4-c programs. By `Mark Harfouche `_. Version 1.0.2 (August 2nd, 2022): - Adapt boolean indexing as h5py 3.7.0 started supporting it. By `Kai Mühlbauer `_. - Fix several tests to work with new h5py 3.7.0. By `Mark Harfouche `_ and `Kai Mühlbauer `_. Version 1.0.1 (June 27th, 2022): - Fix failing tests when using netCDF4 4.9.0. Reported and patch submitted by `Bas Couwenberg `_. Version 1.0.0 (March 31st, 2022): - Add HSDS pytest-fixture, make tests work with h5ypd. By `Aleksandar Jelenak `_. - Remove `_NCProperties` from existing file if writing invalid netcdf features. Warn users if `.nc` file extension is used writing invalid netcdf features. By `Kai Mühlbauer `_. - Remove deprecated code (eg. remove deprecated code (eg. handling mode, chunking_heuristics, decode_vlen_strings), adapt LICENSE/AUTHOR.txt, prepare repository for release 1.0. By `Kai Mühlbauer `_. Version 0.15.0 (March 18th, 2022): - Add documentation to ``h5netcdf``, merging current available documentation available as ``.rst``-files, in the repo-wiki and new API-docs into one document using ``sphinx-doc`` and ``sphinx-book-theme``. By `Kai Mühlbauer `_. Version 0.14.1 (March 2nd, 2022): - Directly return non-string ``Empty``-type attributes as empty numpy-ndarray. By `Kai Mühlbauer `_. Version 0.14.0 (February 25, 2022): - Add ``chunking_heuristic`` keyword and custom heuristic ``chunking_heuristic="h5netcdf"`` with better handling of unlimited dimensions. By `Dion Häfner `_. - Return group name instead of full group path for legacy API. By `Kai Mühlbauer `_. - Add ``endian`` keyword argument ``legacyapi.Dataset.createVariable``. By `Kai Mühlbauer `_. - Resize Dimensions when writing to variables (legacy API only), return padded arrays. By `Kai Mühlbauer `_. - Allow 1D boolean indexers in legacy API. By `Kai Mühlbauer `_. - Revert order tracking by default to avoid a bug in ``h5py`` (Closes Issue #136). By `Mark Harfouche `_. - Implement Dimension-class. By `Kai Mühlbauer `_. - Return items from 0-dim and one-element 1-dim array attributes. Return multi-element attributes as lists. Return string attributes as Python strings decoded from their respective encoding (`utf-8`, `ascii`). By `Kai Mühlbauer `_. Version 0.13.0 (January 12, 2022): - Assign dimensions at creation time, instead of at sync/flush (file-close). By `Kai Mühlbauer `_. - Create/attach dimension scales on the fly, instead of at sync/flush (file-close). By `Kai Mühlbauer `_. - Ensure order tracking is true for newly created netcdf4 files as required by the netcdf4 standard. This enables files created by h5netcdf to be appended to by netCDF4 library users (Closes Issue #128). By `Mark Harfouche `_. Version 0.12.0 (December 20, 2021): - Added ``FutureWarning`` to use ``mode='r'`` as default when opening files. By `Ryan Grout `_. - Moved handling of ``_nc4_non_coord_`` to ``h5netcdf.BaseVariable``. By `Kai Mühlbauer `_. - Write ``_NCProperties`` when overwriting existing files. By `Kai Mühlbauer `_. - Create/Attach dimension scales on append (``mode="r+"``) By `Kai Mühlbauer `_. - Create/Attach/Detach dimension scales only if necessary. By `Kai Mühlbauer `_. - Switch warning into error when using invalid netCDF features. By `Kai Mühlbauer `_. - Avoid circular references to objects referencing h5py objects. By `Tom Augspurger `_. Version 0.11.0 (April 20, 2021): - Included ``h5pyd.Dataset`` objects as netCDF variables. By `Aleksandar Jelenak `_. - Added automatic PyPI upload on creation of github release. - Moved Changelog to CHANGELOG.rst. - Updated ``decode_vlen_strings`` ``FutureWarning``. - Support for ``h5py.Empty`` strings. By `Kai Mühlbauer `_. Version 0.10.0 (February 11, 2021): - Replaced ``decode_strings`` with ``decode_vlen_strings``. By `Kai Mühlbauer `_. Version 0.9.0 (February 7, 2021): - Special thanks to `Kai Mühlbauer `_ for stepping up as a co-maintainer! - Support for ``decode_strings``, to restore old behavior with h5py 3. By `Kai Mühlbauer `_. Version 0.8.1 (July 17, 2020): - Fix h5py deprecation warning in test suite. Version 0.8.0 (February 4, 2020): - Support for reading Datasets with missing dimension scales. By `Kai Mühlbauer `_. - Fixed a bug where ``Datatype`` objects were treated as ``Datasets``. - Fixed several issues with upstream deprecations. Version 0.7.4 (June 1, 2019): - Fixed a flakey test on Python 2.7 and 3.4. Version 0.7.3 (May 20, 2019): - Fixed another bug that could result in reusing dimension IDs, when modifying existing files. Version 0.7.1 (Mar 16, 2019): - Fixed a bug where h5netcdf could write invalid netCDF files with reused dimension IDs when dimensions are written in multiple groups. netCDF-C 4.6.2 will crash when reading these files, but you can still read these files with older versions of the netcdf library (or h5netcdf). - Updated to use version 2 of ``_NCProperties`` attribute. Version 0.7 (Feb 26, 2019): - Support for reading and writing file-like objects (requires h5py 2.9 or newer). By `Scott Henderson `_. Version 0.6.2 (Aug 19, 2018): - Fixed a bug that prevented creating variables with the same name as previously created dimensions in reopened files. Version 0.6.1 (Jun 8, 2018): - Compression with arbitrary filters no longer triggers warnings about invalid netCDF files, because this is now `supported by netCDF `__. Version 0.6 (Jun 7, 2018): - Support for reading and writing data to remote HDF5 files via the HDF5 REST API using the ``h5pyd`` package. Any file "path" starting with either ``http://``, ``https://``, or ``hdf5://`` will automatically trigger the use of this package. By `Aleksandar Jelenak `_. Version 0.5.1 (Apr 11, 2018): - Bug fix for files with an unlimited dimension with no associated variables. By `Aleksandar Jelenak `_. Version 0.5 (Oct 17, 2017): - Support for creating unlimited dimensions. By `Lion Krischer `_. Version 0.4.3 (Oct 10, 2017): - Fix test suite failure with recent versions of netCDF4-Python. Version 0.4.2 (Sep 12, 2017): - Raise ``AttributeError`` rather than ``KeyError`` when attributes are not found using the legacy API. This fixes an issue that prevented writing to h5netcdf with dask. Version 0.4.1 (Sep 6, 2017): - Include tests in source distribution on pypi. Version 0.4 (Aug 30, 2017): - Add ``invalid_netcdf`` argument. Warnings are now issued by default when writing an invalid NetCDF file. See the "Invalid netCDF files" section of the README for full details. Version 0.3.1 (Sep 2, 2016): - Fix garbage collection issue. - Add missing ``.flush()`` method for groups. - Allow creating dimensions of size 0. Version 0.3.0 (Aug 7, 2016): - Datasets are now loaded lazily. This should increase performance when opening files with a large number of groups and/or variables. - Support for writing arrays of variable length unicode strings with ``dtype=str`` via the legacy API. - h5netcdf now writes the ``_NCProperties`` attribute for identifying netCDF4 files. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/LICENSE0000644000175100001770000000274114522513274013636 0ustar00runnerdockerCopyright (c) 2015, h5netcdf developers All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1699387091.342064 h5netcdf-1.3.0/PKG-INFO0000644000175100001770000003211714522513323013721 0ustar00runnerdockerMetadata-Version: 2.1 Name: h5netcdf Version: 1.3.0 Summary: netCDF4 via h5py Author-email: Stephan Hoyer , Kai Mühlbauer Maintainer-email: h5netcdf developers License: Copyright (c) 2015, h5netcdf developers All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Project-URL: homepage, https://h5netcdf.org Project-URL: documentation, https://h5netcdf.org Project-URL: repository, https://github.com/h5netcdf/h5netcdf Project-URL: changelog, https://github.com/h5netcdf/h5netcdf/blob/main/CHANGELOG.rst Classifier: Development Status :: 5 - Production/Stable Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Intended Audience :: Science/Research Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Topic :: Scientific/Engineering Requires-Python: >=3.9 Description-Content-Type: text/x-rst License-File: LICENSE License-File: AUTHORS.txt Requires-Dist: h5py Requires-Dist: packaging Provides-Extra: test Requires-Dist: netCDF4; extra == "test" Requires-Dist: pytest; extra == "test" h5netcdf ======== .. image:: https://github.com/h5netcdf/h5netcdf/workflows/CI/badge.svg :target: https://github.com/h5netcdf/h5netcdf/actions .. image:: https://badge.fury.io/py/h5netcdf.svg :target: https://pypi.org/project/h5netcdf/ .. image:: https://github.com/h5netcdf/h5netcdf/actions/workflows/pages/pages-build-deployment/badge.svg?branch=gh-pages :target: https://h5netcdf.github.io/h5netcdf/ A Python interface for the `netCDF4`_ file-format that reads and writes local or remote HDF5 files directly via `h5py`_ or `h5pyd`_, without relying on the Unidata netCDF library. .. _netCDF4: https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html#netcdf_4_spec .. _h5py: https://www.h5py.org/ .. _h5pyd: https://github.com/HDFGroup/h5pyd .. why-h5netcdf Why h5netcdf? ------------- - It has one less binary dependency (netCDF C). If you already have h5py installed, reading netCDF4 with h5netcdf may be much easier than installing netCDF4-Python. - We've seen occasional reports of better performance with h5py than netCDF4-python, though in many cases performance is identical. For `one workflow`_, h5netcdf was reported to be almost **4x faster** than `netCDF4-python`_. - Anecdotally, HDF5 users seem to be unexcited about switching to netCDF -- hopefully this will convince them that netCDF4 is actually quite sane! - Finally, side-stepping the netCDF C library (and Cython bindings to it) gives us an easier way to identify the source of performance issues and bugs in the netCDF libraries/specification. .. _one workflow: https://github.com/Unidata/netcdf4-python/issues/390#issuecomment-93864839 .. _xarray: https://github.com/pydata/xarray/ Install ------- Ensure you have a recent version of h5py installed (I recommend using `conda`_ or the community effort `conda-forge`_). At least version 3.0 is required. Then:: $ pip install h5netcdf Or if you are already using conda:: $ conda install h5netcdf Note: From version 1.2. h5netcdf tries to align with a `nep29`_-like support policy with regard to it's upstream dependencies. .. _conda: https://conda.io/ .. _conda-forge: https://conda-forge.org/ .. _nep29: https://numpy.org/neps/nep-0029-deprecation_policy.html Usage ----- h5netcdf has two APIs, a new API and a legacy API. Both interfaces currently reproduce most of the features of the netCDF interface, with the notable exception of support for operations that rename or delete existing objects. We simply haven't gotten around to implementing this yet. Patches would be very welcome. New API ~~~~~~~ The new API supports direct hierarchical access of variables and groups. Its design is an adaptation of h5py to the netCDF data model. For example: .. code-block:: python import h5netcdf import numpy as np with h5netcdf.File('mydata.nc', 'w') as f: # set dimensions with a dictionary f.dimensions = {'x': 5} # and update them with a dict-like interface # f.dimensions['x'] = 5 # f.dimensions.update({'x': 5}) v = f.create_variable('hello', ('x',), float) v[:] = np.ones(5) # you don't need to create groups first # you also don't need to create dimensions first if you supply data # with the new variable v = f.create_variable('/grouped/data', ('y',), data=np.arange(10)) # access and modify attributes with a dict-like interface v.attrs['foo'] = 'bar' # you can access variables and groups directly using a hierarchical # keys like h5py print(f['/grouped/data']) # add an unlimited dimension f.dimensions['z'] = None # explicitly resize a dimension and all variables using it f.resize_dimension('z', 3) Notes: - Automatic resizing of unlimited dimensions with array indexing is not available. - Dimensions need to be manually resized with ``Group.resize_dimension(dimension, size)``. - Arrays are returned padded with ``fillvalue`` (taken from underlying hdf5 dataset) up to current size of variable's dimensions. The behaviour is equivalent to netCDF4-python's ``Dataset.set_auto_mask(False)``. Legacy API ~~~~~~~~~~ The legacy API is designed for compatibility with `netCDF4-python`_. To use it, import ``h5netcdf.legacyapi``: .. _netCDF4-python: https://github.com/Unidata/netcdf4-python .. code-block:: python import h5netcdf.legacyapi as netCDF4 # everything here would also work with this instead: # import netCDF4 import numpy as np with netCDF4.Dataset('mydata.nc', 'w') as ds: ds.createDimension('x', 5) v = ds.createVariable('hello', float, ('x',)) v[:] = np.ones(5) g = ds.createGroup('grouped') g.createDimension('y', 10) g.createVariable('data', 'i8', ('y',)) v = g['data'] v[:] = np.arange(10) v.foo = 'bar' print(ds.groups['grouped'].variables['data']) The legacy API is designed to be easy to try-out for netCDF4-python users, but it is not an exact match. Here is an incomplete list of functionality we don't include: - Utility functions ``chartostring``, ``num2date``, etc., that are not directly necessary for writing netCDF files. - h5netcdf variables do not support automatic masking or scaling (e.g., of values matching the ``_FillValue`` attribute). We prefer to leave this functionality to client libraries (e.g., `xarray`_), which can implement their exact desired scaling behavior. Nevertheless arrays are returned padded with ``fillvalue`` (taken from underlying hdf5 dataset) up to current size of variable's dimensions. The behaviour is equivalent to netCDF4-python's ``Dataset.set_auto_mask(False)``. .. _invalid netcdf: Invalid netCDF files ~~~~~~~~~~~~~~~~~~~~ h5py implements some features that do not (yet) result in valid netCDF files: - Data types: - Booleans - Complex values - Non-string variable length types - Enum types - Reference types - Arbitrary filters: - Scale-offset filters By default [#]_, h5netcdf will not allow writing files using any of these features, as files with such features are not readable by other netCDF tools. However, these are still valid HDF5 files. If you don't care about netCDF compatibility, you can use these features by setting ``invalid_netcdf=True`` when creating a file: .. code-block:: python # avoid the .nc extension for non-netcdf files f = h5netcdf.File('mydata.h5', invalid_netcdf=True) ... # works with the legacy API, too, though compression options are not exposed ds = h5netcdf.legacyapi.Dataset('mydata.h5', invalid_netcdf=True) ... In such cases the `_NCProperties` attribute will not be saved to the file or be removed from an existing file. A warning will be issued if the file has `.nc`-extension. .. rubric:: Footnotes .. [#] h5netcdf we will raise ``h5netcdf.CompatibilityError``. Decoding variable length strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ h5py 3.0 introduced `new behavior`_ for handling variable length string. Instead of being automatically decoded with UTF-8 into NumPy arrays of ``str``, they are required as arrays of ``bytes``. The legacy API preserves the old behavior of h5py (which matches netCDF4), and automatically decodes strings. The new API matches h5py behavior. Explicitly set ``decode_vlen_strings=True`` in the ``h5netcdf.File`` constructor to opt-in to automatic decoding. .. _new behavior: https://docs.h5py.org/en/stable/strings.html .. _phony dims: Datasets with missing dimension scales ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By default [#]_ h5netcdf raises a ``ValueError`` if variables with no dimension scale associated with one of their axes are accessed. You can set ``phony_dims='sort'`` when opening a file to let h5netcdf invent phony dimensions according to `netCDF`_ behaviour. .. code-block:: python # mimic netCDF-behaviour for non-netcdf files f = h5netcdf.File('mydata.h5', mode='r', phony_dims='sort') ... Note, that this iterates once over the whole group-hierarchy. This has affects on performance in case you rely on laziness of group access. You can set ``phony_dims='access'`` instead to defer phony dimension creation to group access time. The created phony dimension naming will differ from `netCDF`_ behaviour. .. code-block:: python f = h5netcdf.File('mydata.h5', mode='r', phony_dims='access') ... .. rubric:: Footnotes .. [#] Keyword default setting ``phony_dims=None`` for backwards compatibility. .. _netCDF: https://docs.unidata.ucar.edu/netcdf-c/current/interoperability_hdf5.html Track Order ~~~~~~~~~~~ As of h5netcdf 1.1.0, if h5py 3.7.0 or greater is detected, the ``track_order`` parameter is set to ``True`` enabling `order tracking`_ for newly created netCDF4 files. This helps ensure that files created with the h5netcdf library can be modified by the netCDF4-c and netCDF4-python implementation used in other software stacks. Since this change should be transparent to most users, it was made without deprecation. Since track_order is set at creation time, any dataset that was created with ``track_order=False`` (h5netcdf version 1.0.2 and older except for 0.13.0) will continue to opened with order tracker disabled. The following describes the behavior of h5netcdf with respect to order tracking for a few key versions: - Version 0.12.0 and earlier, the ``track_order`` parameter`order was missing and thus order tracking was implicitely set to ``False``. - Version 0.13.0 enabled order tracking by setting the parameter ``track_order`` to ``True`` by default without deprecation. - Versions 0.13.1 to 1.0.2 set ``track_order`` to ``False`` due to a bug in a core dependency of h5netcdf, h5py `upstream bug`_ which was resolved in h5py 3.7.0 with the help of the h5netcdf team. - In version 1.1.0, if h5py 3.7.0 or above is detected, the ``track_order`` parameter is set to ``True`` by default. .. _order tracking: https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html#creation_order .. _upstream bug: https://github.com/h5netcdf/h5netcdf/issues/136 .. _[*]: https://github.com/h5netcdf/h5netcdf/issues/128 .. changelog Changelog --------- `Changelog`_ .. _Changelog: https://github.com/h5netcdf/h5netcdf/blob/main/CHANGELOG.rst .. license License ------- `3-clause BSD`_ .. _3-clause BSD: https://github.com/h5netcdf/h5netcdf/blob/main/LICENSE ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/README.rst0000644000175100001770000002431714522513274014323 0ustar00runnerdockerh5netcdf ======== .. image:: https://github.com/h5netcdf/h5netcdf/workflows/CI/badge.svg :target: https://github.com/h5netcdf/h5netcdf/actions .. image:: https://badge.fury.io/py/h5netcdf.svg :target: https://pypi.org/project/h5netcdf/ .. image:: https://github.com/h5netcdf/h5netcdf/actions/workflows/pages/pages-build-deployment/badge.svg?branch=gh-pages :target: https://h5netcdf.github.io/h5netcdf/ A Python interface for the `netCDF4`_ file-format that reads and writes local or remote HDF5 files directly via `h5py`_ or `h5pyd`_, without relying on the Unidata netCDF library. .. _netCDF4: https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html#netcdf_4_spec .. _h5py: https://www.h5py.org/ .. _h5pyd: https://github.com/HDFGroup/h5pyd .. why-h5netcdf Why h5netcdf? ------------- - It has one less binary dependency (netCDF C). If you already have h5py installed, reading netCDF4 with h5netcdf may be much easier than installing netCDF4-Python. - We've seen occasional reports of better performance with h5py than netCDF4-python, though in many cases performance is identical. For `one workflow`_, h5netcdf was reported to be almost **4x faster** than `netCDF4-python`_. - Anecdotally, HDF5 users seem to be unexcited about switching to netCDF -- hopefully this will convince them that netCDF4 is actually quite sane! - Finally, side-stepping the netCDF C library (and Cython bindings to it) gives us an easier way to identify the source of performance issues and bugs in the netCDF libraries/specification. .. _one workflow: https://github.com/Unidata/netcdf4-python/issues/390#issuecomment-93864839 .. _xarray: https://github.com/pydata/xarray/ Install ------- Ensure you have a recent version of h5py installed (I recommend using `conda`_ or the community effort `conda-forge`_). At least version 3.0 is required. Then:: $ pip install h5netcdf Or if you are already using conda:: $ conda install h5netcdf Note: From version 1.2. h5netcdf tries to align with a `nep29`_-like support policy with regard to it's upstream dependencies. .. _conda: https://conda.io/ .. _conda-forge: https://conda-forge.org/ .. _nep29: https://numpy.org/neps/nep-0029-deprecation_policy.html Usage ----- h5netcdf has two APIs, a new API and a legacy API. Both interfaces currently reproduce most of the features of the netCDF interface, with the notable exception of support for operations that rename or delete existing objects. We simply haven't gotten around to implementing this yet. Patches would be very welcome. New API ~~~~~~~ The new API supports direct hierarchical access of variables and groups. Its design is an adaptation of h5py to the netCDF data model. For example: .. code-block:: python import h5netcdf import numpy as np with h5netcdf.File('mydata.nc', 'w') as f: # set dimensions with a dictionary f.dimensions = {'x': 5} # and update them with a dict-like interface # f.dimensions['x'] = 5 # f.dimensions.update({'x': 5}) v = f.create_variable('hello', ('x',), float) v[:] = np.ones(5) # you don't need to create groups first # you also don't need to create dimensions first if you supply data # with the new variable v = f.create_variable('/grouped/data', ('y',), data=np.arange(10)) # access and modify attributes with a dict-like interface v.attrs['foo'] = 'bar' # you can access variables and groups directly using a hierarchical # keys like h5py print(f['/grouped/data']) # add an unlimited dimension f.dimensions['z'] = None # explicitly resize a dimension and all variables using it f.resize_dimension('z', 3) Notes: - Automatic resizing of unlimited dimensions with array indexing is not available. - Dimensions need to be manually resized with ``Group.resize_dimension(dimension, size)``. - Arrays are returned padded with ``fillvalue`` (taken from underlying hdf5 dataset) up to current size of variable's dimensions. The behaviour is equivalent to netCDF4-python's ``Dataset.set_auto_mask(False)``. Legacy API ~~~~~~~~~~ The legacy API is designed for compatibility with `netCDF4-python`_. To use it, import ``h5netcdf.legacyapi``: .. _netCDF4-python: https://github.com/Unidata/netcdf4-python .. code-block:: python import h5netcdf.legacyapi as netCDF4 # everything here would also work with this instead: # import netCDF4 import numpy as np with netCDF4.Dataset('mydata.nc', 'w') as ds: ds.createDimension('x', 5) v = ds.createVariable('hello', float, ('x',)) v[:] = np.ones(5) g = ds.createGroup('grouped') g.createDimension('y', 10) g.createVariable('data', 'i8', ('y',)) v = g['data'] v[:] = np.arange(10) v.foo = 'bar' print(ds.groups['grouped'].variables['data']) The legacy API is designed to be easy to try-out for netCDF4-python users, but it is not an exact match. Here is an incomplete list of functionality we don't include: - Utility functions ``chartostring``, ``num2date``, etc., that are not directly necessary for writing netCDF files. - h5netcdf variables do not support automatic masking or scaling (e.g., of values matching the ``_FillValue`` attribute). We prefer to leave this functionality to client libraries (e.g., `xarray`_), which can implement their exact desired scaling behavior. Nevertheless arrays are returned padded with ``fillvalue`` (taken from underlying hdf5 dataset) up to current size of variable's dimensions. The behaviour is equivalent to netCDF4-python's ``Dataset.set_auto_mask(False)``. .. _invalid netcdf: Invalid netCDF files ~~~~~~~~~~~~~~~~~~~~ h5py implements some features that do not (yet) result in valid netCDF files: - Data types: - Booleans - Complex values - Non-string variable length types - Enum types - Reference types - Arbitrary filters: - Scale-offset filters By default [#]_, h5netcdf will not allow writing files using any of these features, as files with such features are not readable by other netCDF tools. However, these are still valid HDF5 files. If you don't care about netCDF compatibility, you can use these features by setting ``invalid_netcdf=True`` when creating a file: .. code-block:: python # avoid the .nc extension for non-netcdf files f = h5netcdf.File('mydata.h5', invalid_netcdf=True) ... # works with the legacy API, too, though compression options are not exposed ds = h5netcdf.legacyapi.Dataset('mydata.h5', invalid_netcdf=True) ... In such cases the `_NCProperties` attribute will not be saved to the file or be removed from an existing file. A warning will be issued if the file has `.nc`-extension. .. rubric:: Footnotes .. [#] h5netcdf we will raise ``h5netcdf.CompatibilityError``. Decoding variable length strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ h5py 3.0 introduced `new behavior`_ for handling variable length string. Instead of being automatically decoded with UTF-8 into NumPy arrays of ``str``, they are required as arrays of ``bytes``. The legacy API preserves the old behavior of h5py (which matches netCDF4), and automatically decodes strings. The new API matches h5py behavior. Explicitly set ``decode_vlen_strings=True`` in the ``h5netcdf.File`` constructor to opt-in to automatic decoding. .. _new behavior: https://docs.h5py.org/en/stable/strings.html .. _phony dims: Datasets with missing dimension scales ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By default [#]_ h5netcdf raises a ``ValueError`` if variables with no dimension scale associated with one of their axes are accessed. You can set ``phony_dims='sort'`` when opening a file to let h5netcdf invent phony dimensions according to `netCDF`_ behaviour. .. code-block:: python # mimic netCDF-behaviour for non-netcdf files f = h5netcdf.File('mydata.h5', mode='r', phony_dims='sort') ... Note, that this iterates once over the whole group-hierarchy. This has affects on performance in case you rely on laziness of group access. You can set ``phony_dims='access'`` instead to defer phony dimension creation to group access time. The created phony dimension naming will differ from `netCDF`_ behaviour. .. code-block:: python f = h5netcdf.File('mydata.h5', mode='r', phony_dims='access') ... .. rubric:: Footnotes .. [#] Keyword default setting ``phony_dims=None`` for backwards compatibility. .. _netCDF: https://docs.unidata.ucar.edu/netcdf-c/current/interoperability_hdf5.html Track Order ~~~~~~~~~~~ As of h5netcdf 1.1.0, if h5py 3.7.0 or greater is detected, the ``track_order`` parameter is set to ``True`` enabling `order tracking`_ for newly created netCDF4 files. This helps ensure that files created with the h5netcdf library can be modified by the netCDF4-c and netCDF4-python implementation used in other software stacks. Since this change should be transparent to most users, it was made without deprecation. Since track_order is set at creation time, any dataset that was created with ``track_order=False`` (h5netcdf version 1.0.2 and older except for 0.13.0) will continue to opened with order tracker disabled. The following describes the behavior of h5netcdf with respect to order tracking for a few key versions: - Version 0.12.0 and earlier, the ``track_order`` parameter`order was missing and thus order tracking was implicitely set to ``False``. - Version 0.13.0 enabled order tracking by setting the parameter ``track_order`` to ``True`` by default without deprecation. - Versions 0.13.1 to 1.0.2 set ``track_order`` to ``False`` due to a bug in a core dependency of h5netcdf, h5py `upstream bug`_ which was resolved in h5py 3.7.0 with the help of the h5netcdf team. - In version 1.1.0, if h5py 3.7.0 or above is detected, the ``track_order`` parameter is set to ``True`` by default. .. _order tracking: https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html#creation_order .. _upstream bug: https://github.com/h5netcdf/h5netcdf/issues/136 .. _[*]: https://github.com/h5netcdf/h5netcdf/issues/128 .. changelog Changelog --------- `Changelog`_ .. _Changelog: https://github.com/h5netcdf/h5netcdf/blob/main/CHANGELOG.rst .. license License ------- `3-clause BSD`_ .. _3-clause BSD: https://github.com/h5netcdf/h5netcdf/blob/main/LICENSE ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1699387091.3380637 h5netcdf-1.3.0/doc/0000755000175100001770000000000014522513323013365 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/doc/Makefile0000644000175100001770000000120214522513274015025 0ustar00runnerdocker# Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = $(CURDIR) BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/doc/api.rst0000644000175100001770000000047514522513274014703 0ustar00runnerdocker.. currentmodule:: h5netcdf .. _api: ################# New API reference ################# This page provides an auto-generated summary of h5netcdf's new API. .. autosummary:: :toctree: generated/ File File.create_group File.create_variable File.resize_dimension Group Dimension Variable ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/doc/changelog.rst0000644000175100001770000000003714522513274016053 0ustar00runnerdocker.. include:: ../CHANGELOG.rst ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/doc/conf.py0000644000175100001770000000765014522513274014701 0ustar00runnerdocker# Configuration file for the Sphinx documentation builder. # # This file only contains a selection of the most common options. For a full # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Path setup -------------------------------------------------------------- import datetime # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath(".")) # -- Project information ----------------------------------------------------- # The full version, including alpha/beta/rc tags. import h5netcdf release = h5netcdf.__version__ project = "h5netcdf" copyright = "2015-%s, h5netcdf developers" % datetime.datetime.now().year language = "en" # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.coverage", "sphinx.ext.extlinks", "sphinx.ext.intersphinx", "sphinx.ext.napoleon", "sphinx.ext.mathjax", "sphinx.ext.todo", "sphinx.ext.autosectionlabel", "sphinx.ext.githubpages", ] extlinks = { "issue": ("https://github.com/h5netcdf/h5netcdf/issues/%s", "GH%s"), "pull": ("https://github.com/h5netcdf/h5netcdf/pull/%s", "PR%s"), } # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] autosummary_generate = True autoclass_content = "class" autodoc_default_options = { "members": True, "undoc-members": False, "inherited-members": False, } # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = "sphinx_book_theme" html_title = f"h5netcdf - {release}" html_baseurl = "https://h5netcdf.org" html_context = { "github_user": "h5netcdf", "github_repo": "h5netcdf", "github_version": "main", "doc_path": "doc", } html_theme_options = { "show_toc_level": 2, "use_download_button": True, "repository_url": "https://github.com/h5netcdf/h5netcdf", "repository_branch": "main", "path_to_docs": "doc", "use_edit_page_button": True, "use_issues_button": True, "use_repository_button": True, "use_download_button": True, } intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "numpy": ("https://numpy.org/doc/stable/", None), } # -- Napoleon settings for docstring processing ------------------------------- napoleon_google_docstring = False napoleon_numpy_docstring = True napoleon_include_special_with_doc = False napoleon_use_param = False napoleon_use_rtype = False napoleon_preprocess_types = True napoleon_type_aliases = { "scalar": ":term:`scalar`", "sequence": ":term:`sequence`", "callable": ":py:func:`callable`", "file-like": ":term:`file-like `", "array-like": ":term:`array-like `", "Path": "~~pathlib.Path", } # handle release substition url = "https://github.com/h5netcdf" # get version version_tuple = h5netcdf._version.version_tuple # is release? if len(version_tuple) == 3: gh_tree_name = f"v{h5netcdf._version.version}" else: # extract git revision gh_tree_name = version_tuple[-1].split(".")[0][1:] rel = "`{0} <{1}/h5netcdf/tree/{2}>`__".format(release, url, gh_tree_name) rst_epilog = "" rst_epilog += f""" .. |release| replace:: {rel} """ ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/doc/devguide.rst0000644000175100001770000001223214522513274015720 0ustar00runnerdockerDevelopers Guide ================ Team ---- - `Kai Mühlbauer `_ - `Stephan Hoyer `_ Contributors ------------ - `Aleksandar Jelenak `_ - `Bas Couwenberg `_. - `Brett Naul `_ - `Dion Häfner `_ - `Drew Parsons `_ - `Frédéric Laliberté `_ - `Ghislain Vaillant `_ - `Lion Krischer `_ - `Mark Harfouche `_ - `Martin Raspaud `_ - `Pierre Augier `_ - `Ryan Grout `_ - `Scott Henderson `_ - `Tom Augspurger `_ If you are interested to contribute, just let us know by creating an issue or pull request on github. Contribution Guidelines ----------------------- - New features and changes should be added via Pull Requests from forks for contributors as well as maintainers. - Pull Requests should have at least one approval (once the maintainer count has increased). - Self merges without approval are allowed for repository maintenance, hotfixes and if the code changes do not affect functionality. - Directly pushing to the repository main branch should only be used as a last resort. - Releases should be introduced via Pull Request and approved. Exception: Patch release after hotfix. Continuous Integration ---------------------- ``h5netcdf`` uses GitHub Actions for Continuous Integration (CI). On every ``push`` to a repository branch or a PullRequest branch several checks are performed: - Lint and style checks (``ruff``, ``black``) - Unit tests with latest ``h5py3`` (Python 3.9, 3.10, 3.11) facilitating GitHub Ubuntu worker - Documentation build, artifacts are made available to download - On release, source-tarball and universal wheel is uploaded to PyPI and documentation is made available on `h5netcdf GitHub Pages`_ .. _h5netcdf GitHub Pages: https://h5netcdf.github.io/h5netcdf Documentation ------------- The documentation, located in ``doc``-folder, can be created using ``sphinx-doc`` and the ``sphinx-book_theme``:: $ cd doc $ make html The rendered documentation is then available in the subfolder ``_build``. Due to the history several documents, eg. `README.rst`_ and `CHANGELOG.rst`_, are located in the project's root folder. They are linked into the documentation via ``.. include``-directive. Links and cross-references originating from these files should be hardcoded to maintain operation also in non-rendered format. .. _README.rst: https://github.com/h5netcdf/h5netcdf/blob/main/README.rst .. _CHANGELOG.rst: https://github.com/h5netcdf/h5netcdf/blob/main/CHANGELOG.rst Release Workflow ---------------- 1. Create release commit (can be done per PullRequest for more visibility) * versioning is done via `setuptools_scm` * update CHANGELOG.rst if necessary * add/update sections to README.rst (or documentation) if necessary * check all needed dependencies are listed in setup.py 2. Create release * draft `new github release`_ * tag version (eg `v1.2.0`) `@ Target: main` * set release title (eg. `release 1.2.0`) * add release description (eg. `bugfix-release`), tbd. This will start the CI workflow once again. The workflow creates `sdist` and universal `wheel` and uploads it to PyPI. .. _new github release: https://github.com/h5netcdf/h5netcdf/releases/new References ---------- This section contains links to material how ``netCDF4`` facilitates ``HDF5``. Some valuable links on dimension scales: - `HDF5 Dimension Scales`_ - `HDF5 Dimension Scales Part 2`_ - `HDF5 Dimension Scales Part 3`_ - `NetCDF-4 Dimensions and HDF5 Dimension Scales`_ - `NetCDF-4 use of dimension scales`_ Other resources - `NetCDF-4 performance`_ - `String **NULLTERM** vs. **NULLPAD**`_ netCDF4-python quirks: - ``_Netcdf4Dimid`` gets attached to all data variables if a 2D coordinate variable is created and any variable is written/file is reopened for append, see `issue 1104`_ - unlimited variable dimensions are reported as current size of the dimension scale, even if the variable's underlying ``DATASPACE`` dimension is smaller (eg. 0) .. _HDF5 Dimension Scales: https://www.unidata.ucar.edu/blogs/developer/en/entry/dimensions_scales .. _HDF5 Dimension Scales Part 2: https://www.unidata.ucar.edu/blogs/developer/en/entry/dimension_scale2 .. _HDF5 Dimension Scales Part 3: https://www.unidata.ucar.edu/blogs/developer/en/entry/dimension_scales_part_3 .. _NetCDF-4 Dimensions and HDF5 Dimension Scales: https://www.unidata.ucar.edu/blogs/developer/en/entry/netcdf4_shared_dimensions .. _NetCDF-4 use of dimension scales: https://www.unidata.ucar.edu/blogs/developer/en/entry/netcdf4_use_of_dimension_scales .. _NetCDF-4 performance: https://www.researchgate.net/publication/330347054_2A5_NETCDF-4_PERFORMANCE_IMPROVEMENTS_OPENING_COMPLEX_DATA_FILES .. _String **NULLTERM** vs. **NULLPAD**: https://github.com/PyTables/PyTables/issues/264 .. _issue 1104: https://github.com/Unidata/netcdf4-python/issues/1104 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/doc/feature.rst0000644000175100001770000000650714522513274015567 0ustar00runnerdockerlegacyapi vs new API feature comparison ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In general both API should be comparable in handling netCDF4 files. The :ref:`legacyapi ` is more in line with `netCDF4-python`_ , whereas the :ref:`new API ` aligns to `h5py`_. Still, there are some differences which are outlined in the following table. .. _netCDF4-python: https://unidata.github.io/netcdf4-python/ .. _h5py: https://www.h5py.org/ .. include:: +---------------------+-------------------------+----------------+------------------+ | feature | legacyapi | new api | type | +=====================+=========================+================+==================+ | 1D boolean indexer | |check| | |check| | Variable/Dataset | +---------------------+-------------------------+----------------+------------------+ | resize on write | |check| | |cross| | Dimension | | | | | Variable/Dataset | +---------------------+-------------------------+----------------+------------------+ | resize dimension | only current dimension | dimension and | Dimension | | | | all connected | Variable/Dataset | | | | variables | | +---------------------+-------------------------+----------------+------------------+ | group name | name only | full path | Group | +---------------------+-------------------------+----------------+------------------+ | phony_dims | kwarg | kwarg | Dimension | +---------------------+-------------------------+----------------+------------------+ | decode_vlen_strings | |check| | kwarg | Variable/Dataset | +---------------------+-------------------------+----------------+------------------+ | chunk sizes | ``h5netcdf``-style | kwarg | Variable/Dataset | +---------------------+-------------------------+----------------+------------------+ | dimension ``.size`` | max size dimension | size dimension | Dimension | | | and connected variables | | | +---------------------+-------------------------+----------------+------------------+ | | | | Attribute | | valid netcdf | kwarg | kwarg | Variable/Dataset | +---------------------+-------------------------+----------------+------------------+ | ``h5py.Empty`` | | | | | string attrs | ``b""`` | ``b""`` | Attribute | +---------------------+-------------------------+----------------+------------------+ | endian | |check| | |cross| | Variable/Dataset | +---------------------+-------------------------+----------------+------------------+ | track order | |cross| | |cross| | File/Group | | | | | Dataset | +---------------------+-------------------------+----------------+------------------+ ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/doc/index.rst0000644000175100001770000000443714522513274015243 0ustar00runnerdocker.. h5netcdf documentation master file, created by sphinx-quickstart on Sat Mar 5 16:30:15 2022. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. h5netcdf: A Python interface for the netCDF4 file-format based on h5py ====================================================================== :Release: |release| :Date: |today| **h5netcdf** is an open source project and Python package that provides an interface for the `netCDF4 file-format`_ that reads and writes local or remote HDF5 files directly via `h5py`_ or `h5pyd`_, without relying on the Unidata netCDF library. .. toctree:: :maxdepth: 2 :hidden: :caption: For users Overview Feature comparison New API Reference Legacy API Reference .. toctree:: :maxdepth: 2 :hidden: :caption: For developers Developers Guide Changelog .. toctree:: :maxdepth: 1 :hidden: :caption: Community GitHub issues .. include:: ../README.rst :start-after: .. why-h5netcdf :end-before: .. changelog History ------- The project was started in early 2015. The first commit was made on 7th of April in 2015 by Stephan Hoyer. The first `official` ``h5netcdf`` announcement was made by Stephan on the `xarray issue tracker`_ only one day later. The library evolved constantly over the years (fixing bugs and adding enhancements) and gained contributions from 15 other :ref:`contributors` so far. The library is widely used, especially as backend within `xarray`_. Early 2020 Kai Mühlbauer started to add contributions and after some time he volunteered to help in maintaining ``h5netcdf``. Two years later in January 2022 Stephan handed the project-lead over to Kai. ``h5netcdf`` version 1.0 was released on 31st of March 2022. .. _netCDF4 file-format: https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html#netcdf_4_spec .. _h5py: https://www.h5py.org/ .. _h5pyd: https://github.com/HDFGroup/h5pyd .. _xarray issue tracker: https://github.com/pydata/xarray/issues/23#issuecomment-90780331 .. include:: ../README.rst :start-after: .. license Indices and tables ------------------ * :ref:`genindex` * :ref:`modindex` * :ref:`search` ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/doc/legacyapi.rst0000644000175100001770000000054214522513274016063 0ustar00runnerdocker.. currentmodule:: h5netcdf.legacyapi .. _legacyapi: #################### Legacy API reference #################### This page provides an auto-generated summary of h5netcdf's legacy API. .. autosummary:: :toctree: generated/ Dataset Dataset.createGroup Dataset.createDimension Dataset.createVariable Group Dimension Variable ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1699387091.3380637 h5netcdf-1.3.0/h5netcdf/0000755000175100001770000000000014522513323014320 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/h5netcdf/__init__.py0000644000175100001770000000070714522513274016442 0ustar00runnerdocker""" h5netcdf ======== A Python library for the netCDF4 file-format that directly reads and writes HDF5 files via h5py, without using the Unidata netCDF library. """ try: from ._version import version as __version__ except Exception: # Local copy or not installed with setuptools. # Disable minimum version checks on downstream libraries. __version__ = "999" from .core import CompatibilityError, Dimension, File, Group, Variable # noqa ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387091.0 h5netcdf-1.3.0/h5netcdf/_version.py0000644000175100001770000000063314522513323016520 0ustar00runnerdocker# file generated by setuptools_scm # don't change, don't track in version control TYPE_CHECKING = False if TYPE_CHECKING: from typing import Tuple, Union VERSION_TUPLE = Tuple[Union[int, str], ...] else: VERSION_TUPLE = object version: str __version__: str __version_tuple__: VERSION_TUPLE version_tuple: VERSION_TUPLE __version__ = version = '1.3.0' __version_tuple__ = version_tuple = (1, 3, 0) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/h5netcdf/attrs.py0000644000175100001770000000677514522513274016053 0ustar00runnerdockerfrom collections.abc import MutableMapping import numpy as np _HIDDEN_ATTRS = frozenset( [ "REFERENCE_LIST", "CLASS", "DIMENSION_LIST", "NAME", "_Netcdf4Dimid", "_Netcdf4Coordinates", "_nc3_strict", "_NCProperties", ] ) class Attributes(MutableMapping): def __init__(self, h5attrs, check_dtype, h5py_pckg): self._h5attrs = h5attrs self._check_dtype = check_dtype self._h5py = h5py_pckg def __getitem__(self, key): if key in _HIDDEN_ATTRS: raise KeyError(key) # get original attribute via h5py low level api # see https://github.com/h5py/h5py/issues/2045 if self._h5py.__name__ == "h5py": attr = self._h5attrs.get_id(key) else: attr = self._h5attrs[key] # handle Empty types if isinstance(self._h5attrs[key], self._h5py.Empty): # see https://github.com/h5netcdf/h5netcdf/issues/94 for details string_info = self._h5py.check_string_dtype(self._h5attrs[key].dtype) if string_info and string_info.length == 1: return b"" # see https://github.com/h5netcdf/h5netcdf/issues/154 for details else: return np.array([], dtype=attr.dtype) output = self._h5attrs[key] # string decoding subtleties # vlen strings are already decoded -> only decode fixed length strings # see https://github.com/h5netcdf/h5netcdf/issues/116 # netcdf4-python returns string arrays as lists, we do as well if self._h5py.__name__ == "h5py": string_info = self._h5py.check_string_dtype(attr.dtype) if string_info is not None: # do not decode "S1"-type char arrays, as they are actually wanted as bytes # see https://github.com/Unidata/netcdf4-python/issues/271 if string_info.length is not None and string_info.length > 1: encoding = string_info.encoding if np.isscalar(output): output = output.decode(encoding, "surrogateescape") else: output = [ b.decode(encoding, "surrogateescape") for b in output.flat ] else: # transform string array to list if not np.isscalar(output): output = output.tolist() # return item if single element list/array # see https://github.com/h5netcdf/h5netcdf/issues/116 if not np.isscalar(output) and len(output) == 1: return output[0] return output def __setitem__(self, key, value): if key in _HIDDEN_ATTRS: raise AttributeError("cannot write attribute with reserved name %r" % key) if hasattr(value, "dtype"): dtype = value.dtype else: dtype = np.asarray(value).dtype self._check_dtype(dtype) self._h5attrs[key] = value def __delitem__(self, key): del self._h5attrs[key] def __iter__(self): for key in self._h5attrs: if key not in _HIDDEN_ATTRS: yield key def __len__(self): hidden_count = sum(1 if attr in self._h5attrs else 0 for attr in _HIDDEN_ATTRS) return len(self._h5attrs) - hidden_count def __repr__(self): return "\n".join(["%r" % type(self)] + [f"{k}: {v!r}" for k, v in self.items()]) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/h5netcdf/core.py0000644000175100001770000013111614522513274015632 0ustar00runnerdocker# For details on how netCDF4 builds on HDF5: # http://www.unidata.ucar.edu/software/netcdf/docs/file_format_specifications.html#netcdf_4_spec import os.path import warnings import weakref from collections import ChainMap, Counter, OrderedDict, defaultdict from collections.abc import Mapping import h5py import numpy as np from packaging import version from . import __version__ from .attrs import Attributes from .dimensions import Dimension, Dimensions from .utils import Frozen try: import h5pyd except ImportError: no_h5pyd = True else: no_h5pyd = False NOT_A_VARIABLE = b"This is a netCDF dimension but not a netCDF variable." def _join_h5paths(parent_path, child_path): return "/".join([parent_path.rstrip("/"), child_path.lstrip("/")]) def _name_from_dimension(dim): # First value in a dimension is the actual dimension scale # which we'll use to extract the name. return dim[0].name.split("/")[-1] class CompatibilityError(Exception): """Raised when using features that are not part of the NetCDF4 API.""" def _invalid_netcdf_feature(feature, allow): if not allow: msg = ( f"{feature} are not a supported NetCDF feature, and are not allowed by " "h5netcdf unless invalid_netcdf=True." ) raise CompatibilityError(msg) def _transform_1d_boolean_indexers(key): """Find and transform 1D boolean indexers to int""" # return key, if not iterable try: key = [ np.asanyarray(k).nonzero()[0] if isinstance(k, (np.ndarray, list)) and type(k[0]) in (bool, np.bool_) else k for k in key ] except TypeError: return key return tuple(key) def _expanded_indexer(key, ndim): """Expand indexing key to tuple with length equal the number of dimensions.""" # ToDo: restructure this routine to gain more performance # short circuit, if we have only slice if key is tuple and all(isinstance(k, slice) for k in key): return key # always return tuple and force colons to slices key = np.index_exp[key] # dimensions len_key = len(key) # find Ellipsis ellipsis = [i for i, k in enumerate(key) if k is Ellipsis] if len(ellipsis) > 1: raise IndexError( f"an index can only have a single ellipsis ('...'), {len(ellipsis)} given" ) else: # expand Ellipsis wherever it is len_key -= len(ellipsis) res_dim_cnt = ndim - len_key res_dims = res_dim_cnt * (slice(None),) ellipsis = ellipsis[0] if ellipsis else None # check for correct dimensionality if ndim and res_dim_cnt < 0: raise IndexError( f"too many indices for array: array is {ndim}-dimensional, but {len_key} were indexed" ) # convert remaining integer indices to slices key = tuple([slice(k, k + 1) if isinstance(k, int) else k for k in key]) # slices to build resulting key k1 = slice(ellipsis) k2 = slice(len_key, None) if ellipsis is None else slice(ellipsis + 1, None) return key[k1] + res_dims + key[k2] class BaseVariable: def __init__(self, parent, name, dimensions=None): self._parent_ref = weakref.ref(parent) self._root_ref = weakref.ref(parent._root) self._h5path = _join_h5paths(parent.name, name) self._dimensions = dimensions self._initialized = True @property def _parent(self): return self._parent_ref() @property def _root(self): return self._root_ref() @property def _h5ds(self): # Always refer to the root file and store not h5py object # subclasses: return self._root._h5file[self._h5path] @property def name(self): """Return variable name.""" # fix name if _nc4_non_coord_ return self._h5ds.name.replace("_nc4_non_coord_", "") def _lookup_dimensions(self): attrs = self._h5ds.attrs # coordinate variable and dimension, eg. 1D ("time") or 2D string variable if ( "_Netcdf4Coordinates" in attrs and attrs.get("CLASS", None) == b"DIMENSION_SCALE" ): order_dim = { value._dimid: key for key, value in self._parent._all_dimensions.items() } return tuple( order_dim[coord_id] for coord_id in attrs["_Netcdf4Coordinates"] ) # normal variable carrying DIMENSION_LIST # extract hdf5 file references and get objects name if "DIMENSION_LIST" in attrs: # check if malformed variable and raise if _unlabeled_dimension_mix(self._h5ds) == "labeled": # If a dimension has attached more than one scale for some reason, then # take the last one. This is in line with netcdf-c and netcdf4-python. return tuple( self._root._h5file[ref[-1]].name.split("/")[-1] for ref in list(self._h5ds.attrs.get("DIMENSION_LIST", [])) ) # need to use the h5ds name here to distinguish from collision dimensions child_name = self._h5ds.name.split("/")[-1] if child_name in self._parent._all_dimensions: return (child_name,) dims = [] phony_dims = defaultdict(int) for axis, dim in enumerate(self._h5ds.dims): if len(dim): name = _name_from_dimension(dim) else: # if unlabeled dimensions are found if self._root._phony_dims_mode is None: raise ValueError( f"variable {self.name!r} has no dimension scale " f"associated with axis {axis}. \n" f"Use phony_dims='sort' for sorted naming or " f"phony_dims='access' for per access naming." ) else: # get current dimension dimsize = self._h5ds.shape[axis] # get dimension names dim_names = [ d.name # for phony dims we need to look only in the current group for d in self._parent._all_dimensions.maps[0].values() if d.size == dimsize ] # extract wanted dimension name name = dim_names[phony_dims[dimsize]].split("/")[-1] phony_dims[dimsize] += 1 dims.append(name) return tuple(dims) def _attach_dim_scales(self): """Attach dimension scales""" for n, dim in enumerate(self.dimensions): # find and attach dimensions also in parent groups self._h5ds.dims[n].attach_scale(self._parent._all_dimensions[dim]._h5ds) def _attach_coords(self): dims = self.dimensions # find dimensions also in parent groups coord_ids = np.array( [self._parent._all_dimensions[d]._dimid for d in dims], "int32", ) if len(coord_ids) > 1: self._h5ds.attrs["_Netcdf4Coordinates"] = coord_ids def _ensure_dim_id(self): """Set _Netcdf4Dimid""" # set _Netcdf4Dimid, use id of first dimension # netCDF4 does this when the first variable's data is written if self.dimensions and not self._h5ds.attrs.get("_Netcdf4Dimid", False): dim = self._parent._all_h5groups[self.dimensions[0]] if "_Netcdf4Dimid" in dim.attrs: self._h5ds.attrs["_Netcdf4Dimid"] = dim.attrs["_Netcdf4Dimid"] def _maybe_resize_dimensions(self, key, value): """Resize according to given (expanded) key with respect to variable dimensions""" new_shape = () v = None for i, dim in enumerate(self.dimensions): # is unlimited dimensions (check in all dimensions) if self._parent._all_dimensions[dim].isunlimited(): if key[i].stop is None: # if stop is None, get dimensions from value, # they must match with variable dimension if v is None: v = np.asarray(value) if v.ndim == self.ndim: new_max = max(v.shape[i], self._h5ds.shape[i]) elif v.ndim == 0: # for scalars we take the current dimension size (check in all dimensions new_max = self._parent._all_dimensions[dim].size else: raise IndexError("shape of data does not conform to slice") else: new_max = max(key[i].stop, self._h5ds.shape[i]) # resize unlimited dimension if needed but no other variables # this is in line with `netcdf4-python` which only resizes # the dimension and this variable if self._parent._all_dimensions[dim].size < new_max: self._parent.resize_dimension(dim, new_max) new_shape += (new_max,) else: new_shape += (self._parent._all_dimensions[dim].size,) # increase variable size if shape is changing if self._h5ds.shape != new_shape: self._h5ds.resize(new_shape) @property def dimensions(self): """Return variable dimension names.""" if self._dimensions is None: self._dimensions = self._lookup_dimensions() return self._dimensions @property def shape(self): """Return current sizes of all variable dimensions.""" # return actual dimensions sizes, this is in line with netcdf4-python return tuple([self._parent._all_dimensions[d].size for d in self.dimensions]) @property def ndim(self): """Return number variable dimensions""" return len(self.shape) def __len__(self): return self.shape[0] @property def dtype(self): """Return NumPy dtype object giving the variable’s type.""" return self._h5ds.dtype def _get_padding(self, key): """Return padding if needed, defaults to False.""" padding = False if self.dtype != str and self.dtype.kind in ["f", "i", "u"]: key0 = _expanded_indexer(key, self.ndim) key0 = _transform_1d_boolean_indexers(key0) # extract max shape of key vs hdf5-shape h5ds_shape = self._h5ds.shape shape = self.shape # check for ndarray and list # see https://github.com/pydata/xarray/issues/7154 # first get maximum index max_index = [ max(k) + 1 if isinstance(k, (np.ndarray, list)) else k.stop for k in key0 ] # second convert to max shape max_shape = tuple( [ shape[i] if k is None else max(h5ds_shape[i], k) for i, k in enumerate(max_index) ] ) # check if hdf5 dataset dimensions are smaller than # their respective netcdf dimensions sdiff = [d0 - d1 for d0, d1 in zip(max_shape, h5ds_shape)] # create padding only if hdf5 dataset is smaller than netcdf dimension if sum(sdiff): padding = [(0, s) for s in sdiff] return padding def __array__(self, *args, **kwargs): return self._h5ds.__array__(*args, **kwargs) def __getitem__(self, key): from .legacyapi import Dataset if isinstance(self._parent._root, Dataset): # this is only for legacyapi # fix boolean indexing for affected versions # https://github.com/h5py/h5py/pull/2079 # https://github.com/h5netcdf/h5netcdf/pull/125/ h5py_version = version.parse(h5py.__version__) if version.parse("3.0.0") <= h5py_version < version.parse("3.7.0"): key = _transform_1d_boolean_indexers(key) if getattr(self._root, "decode_vlen_strings", False): string_info = self._root._h5py.check_string_dtype(self._h5ds.dtype) if string_info and string_info.length is None: return self._h5ds.asstr()[key] # get padding padding = self._get_padding(key) # apply padding with fillvalue (both api) if padding: fv = self.dtype.type(self._h5ds.fillvalue) return np.pad( self._h5ds, pad_width=padding, mode="constant", constant_values=fv, )[key] return self._h5ds[key] def __setitem__(self, key, value): from .legacyapi import Dataset if isinstance(self._parent._root, Dataset): # resize on write only for legacyapi key = _expanded_indexer(key, self.ndim) key = _transform_1d_boolean_indexers(key) # resize on write only for legacy API self._maybe_resize_dimensions(key, value) self._h5ds[key] = value @property def attrs(self): """Return variable attributes.""" return Attributes( self._h5ds.attrs, self._root._check_valid_netcdf_dtype, self._root._h5py ) _cls_name = "h5netcdf.Variable" def __repr__(self): if self._parent._root._closed: return "" % self._cls_name header = "<{} {!r}: dimensions {}, shape {}, dtype {}>".format( self._cls_name, self.name, self.dimensions, self.shape, self.dtype, ) return "\n".join( [header] + ["Attributes:"] + [f" {k}: {v!r}" for k, v in self.attrs.items()] ) class Variable(BaseVariable): @property def chunks(self): return self._h5ds.chunks @property def compression(self): return self._h5ds.compression @property def compression_opts(self): return self._h5ds.compression_opts @property def fletcher32(self): return self._h5ds.fletcher32 @property def shuffle(self): return self._h5ds.shuffle class _LazyObjectLookup(Mapping): def __init__(self, parent, object_cls): self._parent_ref = weakref.ref(parent) self._object_cls = object_cls self._objects = OrderedDict() @property def _parent(self): return self._parent_ref() def __setitem__(self, name, obj): self._objects[name] = obj def add(self, name): self._objects[name] = None def __iter__(self): for name in self._objects: # fix variable name for variable which clashes with dim name yield name.replace("_nc4_non_coord_", "") def __len__(self): return len(self._objects) def __getitem__(self, key): # check for _nc4_non_coord_ variable if key not in self._objects and "_nc4_non_coord_" + key in self._objects: key = "_nc4_non_coord_" + key if self._objects[key] is not None: return self._objects[key] else: self._objects[key] = self._object_cls(self._parent, key) return self._objects[key] def _netcdf_dimension_but_not_variable(h5py_dataset): return NOT_A_VARIABLE in h5py_dataset.attrs.get("NAME", b"") def _unlabeled_dimension_mix(h5py_dataset): # check if dataset has dims and get it dimlist = getattr(h5py_dataset, "dims", []) if not dimlist: status = "nodim" else: dimset = set([len(j) for j in dimlist]) # either all dimensions have exactly one scale # or all dimensions have no scale if dimset ^ {0} == set(): status = "unlabeled" elif dimset & {0}: name = h5py_dataset.name.split("/")[-1] raise ValueError( f"malformed variable {name} has mixing of labeled and " "unlabeled dimensions." ) else: status = "labeled" return status class Group(Mapping): _variable_cls = Variable _dimension_cls = Dimension @property def _group_cls(self): return Group def __init__(self, parent, name): """Create netCDF4 group. Groups are containers by which the netCDF4 (HDF5) files are organized. Each group is like a Dataset itself. """ self._parent_ref = weakref.ref(parent) self._root_ref = weakref.ref(parent._root) self._h5path = _join_h5paths(parent._h5path, name) self._dimensions = Dimensions(self) # this map keeps track of all dimensions if parent is self: self._all_dimensions = ChainMap(self._dimensions) else: self._all_dimensions = parent._all_dimensions.new_child(self._dimensions) self._all_h5groups = parent._all_h5groups.new_child(self._h5group) self._variables = _LazyObjectLookup(self, self._variable_cls) self._groups = _LazyObjectLookup(self, self._group_cls) # initialize phony dimension counter if self._root._phony_dims_mode is not None: phony_dims = Counter() for k, v in self._h5group.items(): if isinstance(v, self._root._h5py.Group): # add to the groups collection if this is a h5py(d) Group # instance self._groups.add(k) else: if v.attrs.get("CLASS") == b"DIMENSION_SCALE": # add dimension and retrieve size self._dimensions.add(k) else: if self._root._phony_dims_mode is not None: # check if malformed variable and raise if _unlabeled_dimension_mix(v) == "unlabeled": # if unscaled variable, get phony dimensions phony_dims |= Counter(v.shape) if not _netcdf_dimension_but_not_variable(v): if isinstance(v, self._root._h5py.Dataset): self._variables.add(k) # iterate over found phony dimensions and create them if self._root._phony_dims_mode is not None: # retrieve labeled dims count from already acquired dimensions labeled_dims = Counter( [d._maxsize for d in self._dimensions.values() if not d._phony] ) for size, cnt in phony_dims.items(): # only create missing dimensions for pcnt in range(labeled_dims[size], cnt): name = self._root._phony_dim_count # for sort mode, we need to add precalculated max_dim_id + 1 if self._root._phony_dims_mode == "sort": name += self._root._max_dim_id + 1 name = f"phony_dim_{name}" self._dimensions.add_phony(name, size) self._initialized = True @property def _root(self): return self._root_ref() @property def _parent(self): return self._parent_ref() @property def _h5group(self): # Always refer to the root file and store not h5py object # subclasses: return self._root._h5file[self._h5path] @property def _track_order(self): if self._root._h5py.__name__ == "h5pyd": return False # TODO: make a suggestion to upstream to create a property # for files to get if they track the order # As of version 3.6.0 this property did not exist from h5py.h5p import CRT_ORDER_INDEXED, CRT_ORDER_TRACKED gcpl = self._h5group.id.get_create_plist() attr_creation_order = gcpl.get_attr_creation_order() order_tracked = bool(attr_creation_order & CRT_ORDER_TRACKED) order_indexed = bool(attr_creation_order & CRT_ORDER_INDEXED) return order_tracked and order_indexed @property def name(self): from .legacyapi import Dataset name = self._h5group.name # get group name only instead of full path for legacyapi if isinstance(self._parent._root, Dataset) and len(name) > 1: name = name.split("/")[-1] return name @property def dimensions(self): return self._dimensions @dimensions.setter def dimensions(self, value): for k, v in self._all_dimensions.maps[0].items(): if k in value: if v != value[k]: raise ValueError("cannot modify existing dimension %r" % k) else: raise ValueError( "new dimensions do not include existing dimension %r" % k ) self._dimensions.update(value) def _create_child_group(self, name): if name in self: raise ValueError("unable to create group %r (name already exists)" % name) kwargs = {} if self._root._h5py.__name__ == "h5py": kwargs.update(track_order=self._track_order) self._h5group.create_group(name, **kwargs) self._groups[name] = self._group_cls(self, name) return self._groups[name] def _require_child_group(self, name): try: return self._groups[name] except KeyError: return self._create_child_group(name) def create_group(self, name): """Create NetCDF4 group. Parameters ---------- name : str Name of new group. """ if name.startswith("/"): return self._root.create_group(name[1:]) keys = name.split("/") group = self for k in keys[:-1]: group = group._require_child_group(k) return group._create_child_group(keys[-1]) def _create_child_variable( self, name, dimensions, dtype, data, fillvalue, chunks, chunking_heuristic, **kwargs, ): if name in self: raise ValueError( "unable to create variable %r " "(name already exists)" % name ) if data is not None: data = np.asarray(data) if dtype is None: dtype = data.dtype if dtype == np.bool_: # never warn since h5netcdf has always errored here _invalid_netcdf_feature( "boolean dtypes", self._root.invalid_netcdf, ) else: self._root._check_valid_netcdf_dtype(dtype) if "scaleoffset" in kwargs: _invalid_netcdf_feature( "scale-offset filters", self._root.invalid_netcdf, ) # maybe create new dimensions depending on data if data is not None: for d, s in zip(dimensions, data.shape): # create new dimensions only ever if # - they are not known via parent-groups # - they are given in dimensions # - it's not a coordinate variable, they will get special handling later if d not in self._all_dimensions and d in dimensions and d is not name: # calls _create_dimension self.dimensions[d] = s # coordinate variable need_dim_adding = False if dimensions: for dim in dimensions: if name not in self._all_dimensions and name == dim: need_dim_adding = True # variable <-> dimension name clash if name in self._dimensions and ( name not in dimensions or (len(dimensions) > 1 and dimensions[0] != name) ): h5name = "_nc4_non_coord_" + name else: h5name = name # get shape from all dimensions shape = tuple(self._all_dimensions[d].size for d in dimensions) maxshape = tuple(self._all_dimensions[d]._maxsize for d in dimensions if d) # If it is passed directly it will change the default compression # settings. if shape != maxshape: kwargs["maxshape"] = maxshape has_unsized_dims = 0 in shape if has_unsized_dims and chunks in {None, True}: if chunking_heuristic in [None, "h5netcdf"]: chunks = _get_default_chunksizes(shape, dtype) elif chunking_heuristic == "h5py": # do nothing -> h5py will handle chunks internally pass else: raise ValueError( "got unrecognized value %s for chunking_heuristic argument " '(has to be "h5py" or "h5netcdf")' % chunking_heuristic ) # Clear dummy HDF5 datasets with this name that were created for a # dimension scale without a corresponding variable. # Keep the references, to re-attach later refs = None if h5name in self._dimensions and h5name in self._h5group: refs = self._dimensions[name]._scale_refs self._dimensions[name]._detach_scale() del self._h5group[name] if self._root._h5py.__name__ == "h5py": kwargs.update(dict(track_order=self._parent._track_order)) # handling default fillvalues for legacyapi # see https://github.com/h5netcdf/h5netcdf/issues/182 from .legacyapi import Dataset, _get_default_fillvalue fillval = fillvalue if fillvalue is None and isinstance(self._parent._root, Dataset): fillval = _get_default_fillvalue(dtype) # create hdf5 variable self._h5group.create_dataset( h5name, shape, dtype=dtype, data=data, chunks=chunks, fillvalue=fillval, **kwargs, ) # create variable class instance variable = self._variable_cls(self, h5name, dimensions) self._variables[h5name] = variable # need to put coordinate variable into dimensions if need_dim_adding: self._dimensions.add(name) # Re-create dim-scale and re-attach references to coordinate variable. if name in self._all_dimensions and h5name in self._h5group: self._all_dimensions[name]._create_scale() if refs is not None: self._all_dimensions[name]._attach_scale(refs) # In case of data variables attach dim_scales and coords. if name in self.variables and h5name not in self._dimensions: variable._attach_dim_scales() variable._attach_coords() # This is a bit of a hack, netCDF4 attaches _Netcdf4Dimid to every variable # when a variable is first written to, after variable creation. # Here we just attach it to every variable on creation. # Todo: get this consistent with netcdf-c/netcdf4-python variable._ensure_dim_id() if fillvalue is not None: # trying to create correct type of fillvalue if variable.dtype is str: value = fillvalue else: string_info = self._root._h5py.check_string_dtype(variable.dtype) if ( string_info and string_info.length is not None and string_info.length > 1 ): value = fillvalue else: value = variable.dtype.type(fillvalue) variable.attrs._h5attrs["_FillValue"] = value return variable def create_variable( self, name, dimensions=(), dtype=None, data=None, fillvalue=None, chunks=None, chunking_heuristic=None, **kwargs, ): """Creates a new variable. Parameters ---------- name : str Name of the new variable. If given as a path, intermediate groups will be created, if not existent. dimensions : tuple Tuple containing dimension name strings. Defaults to empty tuple, effectively creating a scalar variable. dtype : numpy.dtype, str, optional Dataype of the new variable. Defaults to None. fillvalue : scalar, optional Specify fillvalue for uninitialized parts of the variable. Defaults to ``None``. chunks : tuple, optional Tuple of integers specifying the chunksizes of each variable dimension. chunking_heuristic : str, optional Specify auto-chunking approach. Can be either of ``h5py`` or ``h5netcdf``. Defaults to ``h5netcdf``. Discussion on ``h5netcdf`` chunking can be found in (:issue:`52`) and (:pull:`127`). compression : str, optional Compression filter to apply, defaults to ``gzip`` compression_opts : int Parameter for compression filter. For ``compression="gzip"`` Integer from 1 to 9 specifying the compression level. Defaults to 4. fletcher32 : bool If ``True``, HDF5 Fletcher32 checksum algorithm is applied. Defaults to ``False``. shuffle : bool, optional If ``True``, HDF5 shuffle filter will be applied. Defaults to ``True``. Note ---- Please refer to ``h5py`` `documentation`_ for further parameters via keyword arguments. Any parameterizations which do not adhere to netCDF4 standard will only work on files created with ``invalid_netcdf=True``, .. _documentation: https://docs.h5py.org/en/stable/high/dataset.html#creating-datasets Returns ------- var : h5netcdf.Variable Variable class instance """ # if root-variable if name.startswith("/"): # handling default fillvalues for legacyapi # see https://github.com/h5netcdf/h5netcdf/issues/182 from .legacyapi import Dataset, _get_default_fillvalue if fillvalue is None and isinstance(self._parent._root, Dataset): fillvalue = _get_default_fillvalue(dtype) return self._root.create_variable( name[1:], dimensions, dtype, data, fillvalue, chunks, chunking_heuristic, **kwargs, ) # else split groups and iterate child groups keys = name.split("/") if not keys[-1]: raise ValueError("name parameter cannot be an empty string") group = self for k in keys[:-1]: group = group._require_child_group(k) return group._create_child_variable( keys[-1], dimensions, dtype, data, fillvalue, chunks, chunking_heuristic, **kwargs, ) def _get_child(self, key): try: return self.variables[key] except KeyError: return self.groups[key] def __getitem__(self, key): if key.startswith("/"): return self._root[key[1:]] keys = key.split("/") item = self for k in keys: item = item._get_child(k) return item def __iter__(self): for name in self.groups: yield name for name in self.variables: yield name def __len__(self): return len(self.variables) + len(self.groups) @property def parent(self): return self._parent def flush(self): self._root.flush() sync = flush @property def groups(self): return Frozen(self._groups) @property def variables(self): return Frozen(self._variables) @property def dims(self): return Frozen(self._dimensions) @property def attrs(self): return Attributes( self._h5group.attrs, self._root._check_valid_netcdf_dtype, self._root._h5py ) _cls_name = "h5netcdf.Group" def _repr_body(self): return ( ["Dimensions:"] + [ " {}: {}".format( k, f"Unlimited (current: {self._dimensions[k].size})" if v is None else v, ) for k, v in self.dimensions.items() ] + ["Groups:"] + [f" {g}" for g in self.groups] + ["Variables:"] + [ f" {k}: {v.dimensions!r} {v.dtype}" for k, v in self.variables.items() ] + ["Attributes:"] + [f" {k}: {v!r}" for k, v in self.attrs.items()] ) def __repr__(self): if self._root._closed: return f"" header = f"<{self._cls_name} {self.name!r} ({len(self)} members)>" return "\n".join([header] + self._repr_body()) def resize_dimension(self, dim, size): """Resize a dimension to a certain size. It will pad with the underlying HDF5 data sets' fill values (usually zero) where necessary. """ self._dimensions[dim]._resize(size) class File(Group): def __init__(self, path, mode="r", invalid_netcdf=False, phony_dims=None, **kwargs): """NetCDF4 file constructor. Parameters ---------- path: path-like Location of the netCDF4 file to be accessed. mode: "r", "r+", "a", "w" A valid file access mode. Defaults to "r". invalid_netcdf: bool Allow writing netCDF4 with data types and attributes that would otherwise not generate netCDF4 files that can be read by other applications. See :ref:`invalid netcdf` for more details. phony_dims: 'sort', 'access' See :ref:`phony dims` for more details. track_order: bool Corresponds to the h5py.File `track_order` parameter. Unless specified, the library will choose a default that enhances compatibility with netCDF4-c. If h5py version 3.7.0 or greater is installed, this parameter will be set to True by default. track_order is required to be true to for netCDF4-c libraries to append to a file. If an older version of h5py is detected, this parameter will be set to False by default to work around a bug in h5py limiting the number of attributes for a given variable. **kwargs: Additional keyword arguments to be passed to the ``h5py.File`` constructor. Notes ----- In h5netcdf version 0.12.0 and earlier, order tracking was disabled in HDF5 file. As this is a requirement for the current netCDF4 standard, it has been enabled without deprecation as of version 0.13.0 (:issue:`128`). Datasets created with h5netcdf version 0.12.0 that are opened with newer versions of h5netcdf will continue to disable order tracker. """ # 2022/01/09 # netCDF4 wants the track_order parameter to be true # through this might be getting relaxed in a more recent version of the # standard # https://github.com/Unidata/netcdf-c/issues/2054 # https://github.com/h5netcdf/h5netcdf/issues/128 # h5py versions less than 3.7.0 had a bug that limited the number of # attributes when track_order was set to true by default. # However, setting track_order to True helps with compatibility # with netcdf4-c and generally, keeping track of how things were added # to the dataset. # https://github.com/h5netcdf/h5netcdf/issues/136#issuecomment-1017457067 track_order_default = version.parse(h5py.__version__) >= version.parse("3.7.0") track_order = kwargs.pop("track_order", track_order_default) self.decode_vlen_strings = kwargs.pop("decode_vlen_strings", None) try: if isinstance(path, str): if ( path.startswith(("http://", "https://", "hdf5://")) and "driver" not in kwargs ): if no_h5pyd: raise ImportError( "No module named 'h5pyd'. h5pyd is required for " f"opening urls: {path}" ) try: with h5pyd.File(path, "r", **kwargs) as f: # noqa pass self._preexisting_file = True except OSError: self._preexisting_file = False self._h5py = h5pyd self._h5file = self._h5py.File( path, mode, track_order=track_order, **kwargs ) else: self._preexisting_file = os.path.exists(path) and mode != "w" self._h5py = h5py self._h5file = self._h5py.File( path, mode, track_order=track_order, **kwargs ) else: # file-like object self._preexisting_file = mode in {"r", "r+", "a"} self._h5py = h5py self._h5file = self._h5py.File( path, mode, track_order=track_order, **kwargs ) except Exception: self._closed = True raise else: self._closed = False self._mode = mode self._writable = mode != "r" self._root_ref = weakref.ref(self) self._h5path = "/" self.invalid_netcdf = invalid_netcdf # phony dimension handling self._phony_dims_mode = phony_dims if phony_dims is not None: self._phony_dim_count = 0 if phony_dims not in ["sort", "access"]: raise ValueError( f"unknown value {phony_dims!r} for phony_dims\n" "Use phony_dims='sort' for sorted naming, " "phony_dims='access' for per access naming." ) # string decoding if "legacy" in self._cls_name: if self.decode_vlen_strings is not None: msg = ( "'decode_vlen_strings' keyword argument is not allowed in h5netcdf " "legacy API." ) raise TypeError(msg) self.decode_vlen_strings = True else: if self.decode_vlen_strings is None: self.decode_vlen_strings = False self._max_dim_id = -1 # This maps keeps track of all HDF5 datasets corresponding to this group. self._all_h5groups = ChainMap(self._h5group) super().__init__(self, self._h5path) # get maximum dimension id and count of labeled dimensions if self._writable: self._max_dim_id = self._get_maximum_dimension_id() # initialize all groups to detect/create phony dimensions # mimics netcdf-c style naming if phony_dims == "sort": self._determine_phony_dimensions() def _get_maximum_dimension_id(self): dimids = [] def _dimids(name, obj): if obj.attrs.get("CLASS", None) == b"DIMENSION_SCALE": dimids.append(obj.attrs.get("_Netcdf4Dimid", -1)) self._h5file.visititems(_dimids) return max(dimids) if dimids else -1 def _determine_phony_dimensions(self): def create_phony_dimensions(grp): for name in grp.groups: create_phony_dimensions(grp[name]) create_phony_dimensions(self) def _check_valid_netcdf_dtype(self, dtype): dtype = np.dtype(dtype) if dtype == bool: description = "boolean" elif dtype == complex: description = "complex" elif h5py.check_dtype(enum=dtype) is not None: description = "enum" elif h5py.check_dtype(ref=dtype) is not None: description = "reference" elif h5py.check_dtype(vlen=dtype) not in {None, str, bytes}: description = "non-string variable length" else: description = None if description is not None: _invalid_netcdf_feature( f"{description} dtypes", self.invalid_netcdf, ) @property def mode(self): return self._h5file.mode @property def filename(self): return self._h5file.filename @property def parent(self): return None @property def _root(self): return self def flush(self): if self._writable: # only write `_NCProperties` in newly created files if not self._preexisting_file and not self.invalid_netcdf: _NC_PROPERTIES = "version=2,h5netcdf={},hdf5={},{}={}".format( __version__, self._h5py.version.hdf5_version, self._h5py.__name__, self._h5py.__version__, ) self.attrs._h5attrs["_NCProperties"] = np.array( _NC_PROPERTIES, dtype=h5py.string_dtype( encoding="ascii", length=len(_NC_PROPERTIES) ), ) if self.invalid_netcdf: # see https://github.com/h5netcdf/h5netcdf/issues/165 # warn user if .nc file extension is used for invalid netcdf features if os.path.splitext(self.filename)[1] == ".nc": msg = ( f"You are writing invalid netcdf features to file " f"`{self.filename}`. The file will thus be not conforming " f"to NetCDF-4 standard and might not be readable by other " f"netcdf tools. Consider using a different extension." ) warnings.warn(msg, UserWarning, stacklevel=2) # remove _NCProperties if invalid_netcdf if exists if "_NCProperties" in self.attrs._h5attrs: del self.attrs._h5attrs["_NCProperties"] sync = flush def close(self): if not self._closed: self.flush() self._h5file.close() self._closed = True __del__ = close def __enter__(self): return self def __exit__(self, type, value, traceback): self.close() _cls_name = "h5netcdf.File" def __repr__(self): if self._closed: return "" % self._cls_name header = "<{} {!r} (mode {})>".format( self._cls_name, self.filename.split("/")[-1], self.mode, ) return "\n".join([header] + self._repr_body()) def _get_default_chunksizes(dimsizes, dtype): # This is a modified version of h5py's default chunking heuristic # https://github.com/h5py/h5py/blob/aa31f03bef99e5807d1d6381e36233325d944279/h5py/_hl/filters.py#L334-L389 # (published under BSD-3-Clause, included at licenses/H5PY_LICENSE.txt) # See also https://github.com/h5py/h5py/issues/2029 for context. CHUNK_BASE = 16 * 1024 # Multiplier by which chunks are adjusted CHUNK_MIN = 8 * 1024 # Soft lower limit (8k) CHUNK_MAX = 1024 * 1024 # Hard upper limit (1M) type_size = np.dtype(dtype).itemsize is_unlimited = np.array([x == 0 for x in dimsizes]) # For unlimited dimensions start with a guess of 1024 chunks = np.array([x if x != 0 else 1024 for x in dimsizes], dtype="=f8") ndims = len(dimsizes) if ndims == 0: raise ValueError("Chunks not allowed for scalar datasets.") if not np.all(np.isfinite(chunks)): raise ValueError("Illegal value in chunk tuple") # Determine the optimal chunk size in bytes using a PyTables expression. # This is kept as a float. dset_size = np.prod(chunks[~is_unlimited]) * type_size target_size = CHUNK_BASE * (2 ** np.log10(dset_size / (1024 * 1024))) if target_size > CHUNK_MAX: target_size = CHUNK_MAX elif target_size < CHUNK_MIN: target_size = CHUNK_MIN i = 0 while True: # Repeatedly loop over the axes, dividing them by 2. # Start by reducing unlimited axes first. # Stop when: # 1a. We're smaller than the target chunk size, OR # 1b. We're within 50% of the target chunk size, AND # 2. The chunk is smaller than the maximum chunk size idx = i % ndims chunk_bytes = np.prod(chunks) * type_size done = ( chunk_bytes < target_size or abs(chunk_bytes - target_size) / target_size < 0.5 ) and chunk_bytes < CHUNK_MAX if done: break if np.prod(chunks) == 1: break # Element size larger than CHUNK_MAX nelem_unlim = np.prod(chunks[is_unlimited]) if nelem_unlim == 1 or is_unlimited[idx]: chunks[idx] = np.ceil(chunks[idx] / 2.0) i += 1 return tuple(int(x) for x in chunks) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/h5netcdf/dimensions.py0000644000175100001770000001722214522513274017053 0ustar00runnerdockerimport weakref from collections import OrderedDict from collections.abc import MutableMapping import h5py import numpy as np class Dimensions(MutableMapping): def __init__(self, group): self._group_ref = weakref.ref(group) self._objects = OrderedDict() @property def _group(self): return self._group_ref() def __getitem__(self, name): return self._objects[name] def __setitem__(self, name, size): # creating new dimensions if not self._group._root._writable: raise RuntimeError("H5NetCDF: Write to read only") if name in self._objects: raise ValueError("dimension %r already exists" % name) self._objects[name] = Dimension(self._group, name, size, create_h5ds=True) def add_phony(self, name, size): self._objects[name] = Dimension( self._group, name, size, create_h5ds=False, phony=True ) def add(self, name): # adding dimensions which are already created in the file self._objects[name] = Dimension(self._group, name) def __delitem__(self, key): raise NotImplementedError("cannot yet delete dimensions") def __iter__(self): yield from self._objects def __len__(self): return len(self._objects) def __repr__(self): if self._group._root._closed: return "" return "" % ", ".join( f"{k}={v!r}" for k, v in self._objects.items() ) def _join_h5paths(parent_path, child_path): return "/".join([parent_path.rstrip("/"), child_path.lstrip("/")]) class Dimension: def __init__(self, parent, name, size=None, create_h5ds=False, phony=False): """NetCDF4 Dimension constructor. Parameters ---------- parent: h5netcdf.Group Parent group. name: str Name of the dimension. size : int Size of the Netcdf4 Dimension. Defaults to None (unlimited). create_h5ds : bool For internal use only. phony : bool For internal use only. """ self._parent_ref = weakref.ref(parent) self._phony = phony self._root_ref = weakref.ref(parent._root) self._h5path = _join_h5paths(parent.name, name) self._name = name self._size = 0 if size is None else size if self._phony: self._root._phony_dim_count += 1 else: self._root._max_dim_id += 1 self._dimensionid = self._root._max_dim_id if parent._root._writable and create_h5ds and not self._phony: self._create_scale() self._initialized = True @property def _root(self): return self._root_ref() @property def _parent(self): return self._parent_ref() @property def name(self): """Return dimension name.""" if self._phony: return self._name return self._h5ds.name.split("/")[-1] @property def size(self): """Return dimension size.""" size = len(self) if self.isunlimited(): # return actual dimensions sizes, this is in line with netcdf4-python # get sizes from all connected variables and calculate max # because netcdf unlimited dimensions can be any length # but connected variables dimensions can have a certain larger length. reflist = self._h5ds.attrs.get("REFERENCE_LIST", None) if reflist is not None: for ref, axis in reflist: var = self._parent._h5group["/"][ref] size = max(var.shape[axis], size) return size def group(self): """Return parent group.""" return self._parent def isunlimited(self): """Return ``True`` if dimension is unlimited, otherwise ``False``.""" if self._phony: return False return self._h5ds.maxshape == (None,) @property def _h5ds(self): if self._phony: return None return self._root._h5file[self._h5path] @property def _isscale(self): return h5py.h5ds.is_scale(self._h5ds.id) @property def _dimid(self): if self._phony: return False return self._h5ds.attrs.get("_Netcdf4Dimid", self._dimensionid) def _resize(self, size): from .legacyapi import Dataset if not self.isunlimited(): raise ValueError( "Dimension '%s' is not unlimited and thus cannot be resized." % self.name ) self._h5ds.resize((size,)) # resize all referenced datasets for new API if not isinstance(self._root, Dataset): refs = self._scale_refs if refs: for var, dim in refs: self._parent._all_h5groups[var].resize(size, dim) @property def _scale_refs(self): """Return dimension scale references""" return list(self._h5ds.attrs.get("REFERENCE_LIST", [])) def _create_scale(self): """Create dimension scale for this dimension""" if self._name not in self._parent._h5group: kwargs = {} if self._size is None or self._size == 0: kwargs["maxshape"] = (None,) if self._root._h5py.__name__ == "h5py": kwargs.update(dict(track_order=self._parent._track_order)) self._parent._h5group.create_dataset( name=self._name, shape=(self._size,), dtype=">f4", **kwargs, ) self._h5ds.attrs["_Netcdf4Dimid"] = np.array(self._dimid, dtype=np.int32) if len(self._h5ds.shape) > 1: dims = self._parent._variables[self._name].dimensions coord_ids = np.array( [self._parent._dimensions[d]._dimid for d in dims], "int32" ) self._h5ds.attrs["_Netcdf4Coordinates"] = coord_ids # need special handling for size in case of scalar and tuple size = self._size if not size: size = 1 if isinstance(size, tuple): size = size[0] dimlen = bytes(f"{size:10}", "ascii") NOT_A_VARIABLE = b"This is a netCDF dimension but not a netCDF variable." scale_name = ( self.name if self.name in self._parent._variables else NOT_A_VARIABLE + dimlen ) # don't re-create scales if they already exist. if not self._root._h5py.h5ds.is_scale(self._h5ds.id): self._h5ds.make_scale(scale_name) def _attach_scale(self, refs): """Attach dimension scale to references""" for var, dim in refs: self._parent._all_h5groups[var].dims[dim].attach_scale(self._h5ds) def _detach_scale(self): """Detach dimension scale from all references""" refs = self._scale_refs if refs: for var, dim in refs: self._parent._all_h5groups[var].dims[dim].detach_scale(self._h5ds) @property def _maxsize(self): return None if self.isunlimited() else self.size def __len__(self): if self._phony: return self._size return self._h5ds.shape[0] _cls_name = "h5netcdf.Dimension" def __repr__(self): if not self._phony and self._parent._root._closed: return "" % self._cls_name special = "" if self._phony: special += " (phony_dim)" if self.isunlimited(): special += " (unlimited)" header = f"<{self._cls_name} {self.name!r}: size {self.size}{special}>" return "\n".join([header]) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/h5netcdf/legacyapi.py0000644000175100001770000001570514522513274016645 0ustar00runnerdockerimport sys import h5py import numpy as np from . import core #: default netcdf fillvalues default_fillvals = { "S1": "\x00", "i1": -127, "u1": 255, "i2": -32767, "u2": 65535, "i4": -2147483647, "u4": 4294967295, "i8": -9223372036854775806, "u8": 18446744073709551614, "f4": 9.969209968386869e36, "f8": 9.969209968386869e36, } def _get_default_fillvalue(dtype): kind = np.dtype(dtype).kind fillvalue = None if kind in ["u", "i", "f"]: size = np.dtype(dtype).itemsize fillvalue = default_fillvals[f"{kind}{size}"] return fillvalue def _check_return_dtype_endianess(endian="native"): little_endian = sys.byteorder == "little" endianess = "=" if endian == "little": endianess = little_endian and endianess or "<" elif endian == "big": endianess = not little_endian and endianess or ">" elif endian == "native": pass else: raise ValueError( "'endian' keyword argument must be 'little','big' or 'native', got '%s'" % endian ) return endianess class HasAttributesMixin: _initialized = False def getncattr(self, name): """Retrieve a netCDF4 attribute.""" return self.attrs[name] def setncattr(self, name, value): """Set a netCDF4 attribute.""" self.attrs[name] = value def ncattrs(self): """Return netCDF4 attribute names.""" return list(self.attrs) def __getattr__(self, name): try: return self.attrs[name] except KeyError: raise AttributeError( f"NetCDF: attribute {type(self).__name__}:{name} not found" ) def __setattr__(self, name, value): if self._initialized and name not in self.__dict__: self.attrs[name] = value else: object.__setattr__(self, name, value) class Variable(core.BaseVariable, HasAttributesMixin): _cls_name = "h5netcdf.legacyapi.Variable" def chunking(self): """Return variable chunking information. The chunksize is returned as a sequence with the size for each dimension. If the dataset is defined to be contiguous (no chunking) the word 'contiguous' is returned. """ chunks = self._h5ds.chunks if chunks is None: return "contiguous" else: return chunks def filters(self): """Return HDF5 filter parameters dictionary.""" complevel = self._h5ds.compression_opts return { "complevel": 0 if complevel is None else complevel, "fletcher32": self._h5ds.fletcher32, "shuffle": self._h5ds.shuffle, "zlib": self._h5ds.compression == "gzip", } @property def dtype(self): """Return netCDF4.Variable datatype.""" dt = self._h5ds.dtype if h5py.check_dtype(vlen=dt) is str: return str return dt class Group(core.Group, HasAttributesMixin): _cls_name = "h5netcdf.legacyapi.Group" _variable_cls = Variable @property def _group_cls(self): return Group createGroup = core.Group.create_group def createDimension(self, name, size): """Creates a new dimension with given name and size. Parameters ---------- name : str Dimension name size : int, None size must be a positive integer or None (unlimited). Specifying size=0 results in an unlimited dimension too. Returns ------- dim : h5netcdf.legacyapi.Dimension Dimension class instance. """ self._dimensions[name] = size return self._dimensions[name] def createVariable( self, varname, datatype, dimensions=(), zlib=False, complevel=4, shuffle=True, fletcher32=False, chunksizes=None, fill_value=None, endian="native", ): """Creates a new variable. Parameters ---------- varname : str Name of the new variable. If given as a path, intermediate groups will be created, if not existent. datatype : numpy.dtype, str Dataype of the new variable dimensions : tuple Tuple containing dimension name strings. Defaults to empty tuple, effectively creating a scalar variable. zlib : bool, optional If ``True``, variable data will be gzip compressed. complevel : int, optional Integer between 1 and 9 defining compression level. Defaults to 4. Ignored if ``zlib=False``. shuffle : bool, optional If ``True``, HDF5 shuffle filter will be applied. Defaults to ``True``. Ignored if ``zlib=False``. fletcher32 : bool, optional If ``True``, HDF5 Fletcher32 checksum algorithm is applied. Defaults to ``False``. chunksizes : tuple, optional Tuple of integers specifying the chunksizes of each variable dimension. Discussion on ``h5netcdf`` chunksizes can be found in (:issue:`52`) and (:pull:`127`). fill_value : scalar, optional Specify ``_FillValue`` for uninitialized parts of the variable. Defaults to ``None``. endian : str, optional Control on-disk storage format. Can be any of ``little``, ``big`` or ``native`` (default). Returns ------- var : h5netcdf.legacyapi.Variable Variable class instance """ if len(dimensions) == 0: # it's a scalar # rip off chunk and filter options for consistency with netCDF4-python chunksizes = None zlib = False fletcher32 = False shuffle = False if datatype is str: datatype = h5py.special_dtype(vlen=str) kwds = {} if zlib: # only add compression related keyword arguments if relevant (h5py # chokes otherwise) kwds["compression"] = "gzip" kwds["compression_opts"] = complevel kwds["shuffle"] = shuffle # control endian-ess endianess = _check_return_dtype_endianess(endian) # needs swapping? if endianess != "=": # transform to numpy dtype and swap endianess dtype = np.dtype(datatype) if dtype.byteorder != "|": datatype = dtype.newbyteorder("S") # closer to netCDF4 chunking behavior kwds["chunking_heuristic"] = "h5netcdf" return super().create_variable( varname, dimensions, dtype=datatype, fletcher32=fletcher32, chunks=chunksizes, fillvalue=fill_value, **kwds, ) class Dimension(core.Dimension): _cls_name = "h5netcdf.legacyapi.Dimensions" class Dataset(core.File, Group, HasAttributesMixin): _cls_name = "h5netcdf.legacyapi.Dataset" ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1699387091.3380637 h5netcdf-1.3.0/h5netcdf/tests/0000755000175100001770000000000014522513323015462 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/h5netcdf/tests/conftest.py0000644000175100001770000001165214522513274017673 0ustar00runnerdockerimport os import sys import tempfile from pathlib import Path from shutil import rmtree import pytest try: from h5pyd._apps.hstouch import main as hstouch from hsds.hsds_app import HsdsApp with_reqd_pkgs = True except ImportError: with_reqd_pkgs = False def set_hsds_root(): """Make required HSDS root directory.""" hsds_root = Path(os.environ["ROOT_DIR"]) / os.environ["BUCKET_NAME"] / "home" if hsds_root.exists(): rmtree(hsds_root) old_sysargv = sys.argv sys.argv = [""] sys.argv.extend(["-e", os.environ["HS_ENDPOINT"]]) sys.argv.extend(["-u", "admin"]) sys.argv.extend(["-p", "admin"]) sys.argv.extend(["--bucket", os.environ["BUCKET_NAME"]]) sys.argv.append("/home/") hstouch() sys.argv = [""] sys.argv.extend(["-e", os.environ["HS_ENDPOINT"]]) sys.argv.extend(["-u", "admin"]) sys.argv.extend(["-p", "admin"]) sys.argv.extend(["--bucket", os.environ["BUCKET_NAME"]]) sys.argv.extend(["-o", os.environ["HS_USERNAME"]]) sys.argv.append(f'/home/{os.environ["HS_USERNAME"]}/') hstouch() sys.argv = old_sysargv @pytest.fixture(scope="session") def hsds_up(): """Provide HDF Highly Scalabale Data Service (HSDS) for h5pyd testing.""" if with_reqd_pkgs: root_dir = Path(tempfile.mkdtemp(prefix="tmp-hsds-root-")) os.environ["BUCKET_NAME"] = "data" (root_dir / os.getenv("BUCKET_NAME")).mkdir(parents=True, exist_ok=True) os.environ["ROOT_DIR"] = str(root_dir) os.environ["HS_USERNAME"] = "h5netcdf-pytest" os.environ["HS_PASSWORD"] = "TestEarlyTestEverything" config = """allow_noauth: true auth_expiration: -1 default_public: False aws_access_key_id: xxx aws_secret_access_key: xxx aws_iam_role: hsds_role aws_region: us-east-1 hsds_endpoint: http://hsds.hdf.test aws_s3_gateway: null aws_dynamodb_gateway: null aws_dynamodb_users_table: null azure_connection_string: null azure_resource_id: null azure_storage_account: null azure_resource_group: null root_dir: null password_salt: null bucket_name: hsdstest head_port: 5100 head_ram: 512m dn_port: 6101 dn_ram: 3g sn_port: 5101 sn_ram: 1g rangeget_port: 6900 rangeget_ram: 2g target_sn_count: 0 target_dn_count: 0 log_level: INFO log_timestamps: false log_prefix: null max_tcp_connections: 100 head_sleep_time: 10 node_sleep_time: 10 async_sleep_time: 10 s3_sync_interval: 1 s3_sync_task_timeout: 10 store_read_timeout: 1 store_read_sleep_interval: 0.1 max_pending_write_requests: 20 flush_sleep_interval: 1 max_chunks_per_request: 1000 min_chunk_size: 1m max_chunk_size: 4m max_request_size: 100m max_chunks_per_folder: 0 max_task_count: 100 max_tasks_per_node_per_request: 16 aio_max_pool_connections: 64 metadata_mem_cache_size: 128m metadata_mem_cache_expire: 3600 chunk_mem_cache_size: 128m chunk_mem_cache_expire: 3600 data_cache_size: 128m data_cache_max_req_size: 128k data_cache_expire_time: 3600 data_cache_page_size: 4m data_cache_max_concurrent_read: 16 timeout: 30 password_file: /config/passwd.txt groups_file: /config/groups.txt server_name: Highly Scalable Data Service (HSDS) greeting: Welcome to HSDS! about: HSDS is a webservice for HDF data top_level_domains: [] cors_domain: "*" admin_user: admin admin_group: null openid_provider: azure openid_url: null openid_audience: null openid_claims: unique_name,appid,roles chaos_die: 0 standalone_app: false blosc_nthreads: 2 http_compression: false http_max_url_length: 512 k8s_app_label: hsds k8s_namespace: null restart_policy: on-failure domain_req_max_objects_limit: 500 """ tmp_dir = Path(tempfile.mkdtemp(prefix="tmp-hsds-")) config_file = tmp_dir / "config.yml" config_file.write_text(config) passwd_file = tmp_dir / "passwd.txt" passwd_file.write_text( f'admin:admin\n{os.environ["HS_USERNAME"]}:{os.environ["HS_PASSWORD"]}\n' ) log_file = str(tmp_dir / "hsds.log") tmp_dir = str(tmp_dir) if sys.platform == "darwin": # macOS temp directory paths can be very long and break low-level # socket comms code... socket_dir = "/tmp/hsds" else: socket_dir = tmp_dir try: hsds = HsdsApp( username=os.environ["HS_USERNAME"], password=os.environ["HS_PASSWORD"], password_file=str(passwd_file), log_level=os.getenv("LOG_LEVEL", "DEBUG"), logfile=log_file, socket_dir=socket_dir, config_dir=tmp_dir, dn_count=2, ) hsds.run() is_up = hsds.ready if is_up: os.environ["HS_ENDPOINT"] = hsds.endpoint set_hsds_root() except Exception: is_up = False yield is_up hsds.stop() rmtree(tmp_dir, ignore_errors=True) rmtree(socket_dir, ignore_errors=True) rmtree(root_dir, ignore_errors=True) else: yield False ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/h5netcdf/tests/pytest.ini0000644000175100001770000000015314522513274017517 0ustar00runnerdocker[pytest] filterwarnings = ignore:Using h5py's default chunking with unlimited dimensions:FutureWarning ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/h5netcdf/tests/test_h5netcdf.py0000644000175100001770000024502214522513274020605 0ustar00runnerdockerimport gc import io import random import re import string import tempfile from os import environ as env import h5py import netCDF4 import numpy as np import pytest from packaging import version from pytest import raises import h5netcdf from h5netcdf import legacyapi from h5netcdf.core import NOT_A_VARIABLE, CompatibilityError try: import h5pyd without_h5pyd = False except ImportError: without_h5pyd = True remote_h5 = ("http:", "hdf5:") @pytest.fixture def tmp_local_netcdf(tmpdir): return str(tmpdir.join("testfile.nc")) @pytest.fixture(params=["testfile.nc", "hdf5://testfile"]) def tmp_local_or_remote_netcdf(request, tmpdir, hsds_up): if request.param.startswith(remote_h5): if without_h5pyd: pytest.skip("h5pyd package not available") elif not hsds_up: pytest.skip("HSDS service not running") rnd = "".join(random.choice(string.ascii_uppercase) for _ in range(5)) return ( "hdf5://" + "home" + "/" + env["HS_USERNAME"] + "/" + "testfile" + rnd + ".nc" ) else: return str(tmpdir.join(request.param)) @pytest.fixture(params=[True, False]) def decode_vlen_strings(request): return dict(decode_vlen_strings=request.param) @pytest.fixture(params=[netCDF4, legacyapi]) def netcdf_write_module(request): return request.param def get_hdf5_module(resource): """Return the correct h5py module based on the input resource.""" if isinstance(resource, str) and resource.startswith(remote_h5): return h5pyd else: return h5py def string_to_char(arr): """Like nc4.stringtochar, but faster and more flexible.""" # ensure the array is contiguous arr = np.array(arr, copy=False, order="C") kind = arr.dtype.kind if kind not in ["U", "S"]: raise ValueError("argument must be a string") return arr.reshape(arr.shape + (1,)).view(kind + "1") def array_equal(a, b): a, b = map(np.array, (a[...], b[...])) if a.shape != b.shape: return False try: return np.allclose(a, b) except TypeError: return (a == b).all() _char_array = string_to_char(np.array(["a", "b", "c", "foo", "bar", "baz"], dtype="S")) _string_array = np.array( [["foobar0", "foobar1", "foobar3"], ["foofoofoo", "foofoobar", "foobarbar"]] ) _vlen_string = "foo" def is_h5py_char_working(tmp_netcdf, name): h5 = get_hdf5_module(tmp_netcdf) # https://github.com/Unidata/netcdf-c/issues/298 with h5.File(tmp_netcdf, "r") as ds: v = ds[name] try: assert array_equal(v, _char_array) return True except Exception as e: if re.match("^Can't read data", e.args[0]): return False else: raise def write_legacy_netcdf(tmp_netcdf, write_module): ds = write_module.Dataset(tmp_netcdf, "w") ds.setncattr("global", 42) ds.other_attr = "yes" ds.createDimension("x", 4) ds.createDimension("y", 5) ds.createDimension("z", 6) ds.createDimension("empty", 0) ds.createDimension("string3", 3) ds.createDimension("unlimited", None) v = ds.createVariable("foo", float, ("x", "y"), chunksizes=(4, 5), zlib=True) v[...] = 1 v.setncattr("units", "meters") v = ds.createVariable("y", int, ("y",), fill_value=-1) v[:4] = np.arange(4) v = ds.createVariable("z", "S1", ("z", "string3"), fill_value=b"X") v[...] = _char_array v = ds.createVariable("scalar", np.float32, ()) v[...] = 2.0 # test creating a scalar with compression option (with should be ignored) v = ds.createVariable("intscalar", np.int64, (), zlib=6, fill_value=None) v[...] = 2 v = ds.createVariable("foo_unlimited", float, ("x", "unlimited")) v[...] = 1 with raises((h5netcdf.CompatibilityError, TypeError)): ds.createVariable("boolean", np.bool_, ("x")) g = ds.createGroup("subgroup") v = g.createVariable("subvar", np.int32, ("x",)) v[...] = np.arange(4.0) g.createDimension("y", 10) g.createVariable("y_var", float, ("y",)) ds.createDimension("mismatched_dim", 1) ds.createVariable("mismatched_dim", int, ()) v = ds.createVariable("var_len_str", str, ("x")) v[0] = "foo" ds.close() def write_h5netcdf(tmp_netcdf): ds = h5netcdf.File(tmp_netcdf, "w") ds.attrs["global"] = 42 ds.attrs["other_attr"] = "yes" ds.dimensions = {"x": 4, "y": 5, "z": 6, "empty": 0, "unlimited": None} v = ds.create_variable( "foo", ("x", "y"), float, chunks=(4, 5), compression="gzip", shuffle=True ) v[...] = 1 v.attrs["units"] = "meters" remote_file = isinstance(tmp_netcdf, str) and tmp_netcdf.startswith(remote_h5) if not remote_file: v = ds.create_variable("y", ("y",), int, fillvalue=-1) v[:4] = np.arange(4) v = ds.create_variable("z", ("z", "string3"), data=_char_array, fillvalue=b"X") v = ds.create_variable("scalar", data=np.float32(2.0)) v = ds.create_variable("intscalar", data=np.int64(2)) v = ds.create_variable("foo_unlimited", ("x", "unlimited"), float) v[...] = 1 with raises((h5netcdf.CompatibilityError, TypeError)): ds.create_variable("boolean", data=True) g = ds.create_group("subgroup") v = g.create_variable("subvar", ("x",), np.int32) v[...] = np.arange(4.0) with raises(AttributeError): v.attrs["_Netcdf4Dimid"] = -1 g.dimensions["y"] = 10 g.create_variable("y_var", ("y",), float) g.flush() ds.dimensions["mismatched_dim"] = 1 ds.create_variable("mismatched_dim", dtype=int) ds.flush() dt = h5py.special_dtype(vlen=str) v = ds.create_variable("var_len_str", ("x",), dtype=dt) v[0] = _vlen_string ds.close() def read_legacy_netcdf(tmp_netcdf, read_module, write_module): ds = read_module.Dataset(tmp_netcdf, "r") assert ds.ncattrs() == ["global", "other_attr"] assert ds.getncattr("global") == 42 if write_module is not netCDF4: # skip for now: https://github.com/Unidata/netcdf4-python/issues/388 assert ds.other_attr == "yes" with pytest.raises(AttributeError): ds.does_not_exist assert set(ds.dimensions) == set( ["x", "y", "z", "empty", "string3", "mismatched_dim", "unlimited"] ) assert set(ds.variables) == set( [ "foo", "y", "z", "intscalar", "scalar", "var_len_str", "mismatched_dim", "foo_unlimited", ] ) assert set(ds.groups) == set(["subgroup"]) assert ds.parent is None v = ds.variables["foo"] assert array_equal(v, np.ones((4, 5))) assert v.dtype == float assert v.dimensions == ("x", "y") assert v.ndim == 2 assert v.ncattrs() == ["units"] if write_module is not netCDF4: assert v.getncattr("units") == "meters" assert tuple(v.chunking()) == (4, 5) # check for dict items separately # see https://github.com/h5netcdf/h5netcdf/issues/171 filters = v.filters() assert filters["complevel"] == 4 assert filters["fletcher32"] is False assert filters["shuffle"] is True assert filters["zlib"] is True v = ds.variables["y"] assert array_equal(v, np.r_[np.arange(4), [-1]]) assert v.dtype == int assert v.dimensions == ("y",) assert v.ndim == 1 assert v.ncattrs() == ["_FillValue"] assert v.getncattr("_FillValue") == -1 assert v.chunking() == "contiguous" # check for dict items separately # see https://github.com/h5netcdf/h5netcdf/issues/171 filters = v.filters() assert filters["complevel"] == 0 assert filters["fletcher32"] is False assert filters["shuffle"] is False assert filters["zlib"] is False ds.close() # Check the behavior if h5py. Cannot expect h5netcdf to overcome these # errors: if is_h5py_char_working(tmp_netcdf, "z"): ds = read_module.Dataset(tmp_netcdf, "r") v = ds.variables["z"] assert array_equal(v, _char_array) assert v.dtype == "S1" assert v.ndim == 2 assert v.dimensions == ("z", "string3") assert v.ncattrs() == ["_FillValue"] assert v.getncattr("_FillValue") == b"X" else: ds = read_module.Dataset(tmp_netcdf, "r") v = ds.variables["scalar"] assert array_equal(v, np.array(2.0)) assert v.dtype == "float32" assert v.ndim == 0 assert v.dimensions == () assert v.ncattrs() == [] v = ds.variables["intscalar"] assert array_equal(v, np.array(2)) assert v.dtype == "int64" assert v.ndim == 0 assert v.dimensions == () assert v.ncattrs() == [] v = ds.variables["var_len_str"] assert v.dtype == str assert v[0] == _vlen_string v = ds.groups["subgroup"].variables["subvar"] assert ds.groups["subgroup"].parent is ds assert array_equal(v, np.arange(4.0)) assert v.dtype == "int32" assert v.ndim == 1 assert v.dimensions == ("x",) assert v.ncattrs() == [] v = ds.groups["subgroup"].variables["y_var"] assert v.shape == (10,) assert "y" in ds.groups["subgroup"].dimensions ds.close() def read_h5netcdf(tmp_netcdf, write_module, decode_vlen_strings): remote_file = isinstance(tmp_netcdf, str) and tmp_netcdf.startswith(remote_h5) ds = h5netcdf.File(tmp_netcdf, "r", **decode_vlen_strings) assert ds.name == "/" assert list(ds.attrs) == ["global", "other_attr"] assert ds.attrs["global"] == 42 if write_module is not netCDF4: # skip for now: https://github.com/Unidata/netcdf4-python/issues/388 assert ds.attrs["other_attr"] == "yes" assert set(ds.dimensions) == set( ["x", "y", "z", "empty", "string3", "mismatched_dim", "unlimited"] ) variables = set( [ "foo", "z", "intscalar", "scalar", "var_len_str", "mismatched_dim", "foo_unlimited", ] ) # fix current failure of hsds/h5pyd if not remote_file: variables |= set(["y"]) assert set(ds.variables) == variables assert set(ds.groups) == set(["subgroup"]) assert ds.parent is None v = ds["foo"] assert v.name == "/foo" assert array_equal(v, np.ones((4, 5))) assert v.dtype == float assert v.dimensions == ("x", "y") assert v.ndim == 2 assert list(v.attrs) == ["units"] if write_module is not netCDF4: assert v.attrs["units"] == "meters" assert v.chunks == (4, 5) assert v.compression == "gzip" assert v.compression_opts == 4 assert not v.fletcher32 assert v.shuffle # fix current failure of hsds/h5pyd if not remote_file: v = ds["y"] assert array_equal(v, np.r_[np.arange(4), [-1]]) assert v.dtype == int assert v.dimensions == ("y",) assert v.ndim == 1 assert list(v.attrs) == ["_FillValue"] assert v.attrs["_FillValue"] == -1 if not remote_file: assert v.chunks is None assert v.compression is None assert v.compression_opts is None assert not v.fletcher32 assert not v.shuffle ds.close() if is_h5py_char_working(tmp_netcdf, "z"): ds = h5netcdf.File(tmp_netcdf, "r") v = ds["z"] assert array_equal(v, _char_array) assert v.dtype == "S1" assert v.ndim == 2 assert v.dimensions == ("z", "string3") assert list(v.attrs) == ["_FillValue"] assert v.attrs["_FillValue"] == b"X" else: ds = h5netcdf.File(tmp_netcdf, "r", **decode_vlen_strings) v = ds["scalar"] assert array_equal(v, np.array(2.0)) assert v.dtype == "float32" assert v.ndim == 0 assert v.dimensions == () assert list(v.attrs) == [] v = ds.variables["intscalar"] assert array_equal(v, np.array(2)) assert v.dtype == "int64" assert v.ndim == 0 assert v.dimensions == () assert list(v.attrs) == [] v = ds["var_len_str"] assert h5py.check_dtype(vlen=v.dtype) == str if getattr(ds, "decode_vlen_strings", True): assert v[0] == _vlen_string else: assert v[0] == _vlen_string.encode("utf_8") v = ds["/subgroup/subvar"] assert v is ds["subgroup"]["subvar"] assert v is ds["subgroup/subvar"] assert v is ds["subgroup"]["/subgroup/subvar"] assert v.name == "/subgroup/subvar" assert ds["subgroup"].name == "/subgroup" assert ds["subgroup"].parent is ds assert array_equal(v, np.arange(4.0)) assert v.dtype == "int32" assert v.ndim == 1 assert v.dimensions == ("x",) assert list(v.attrs) == [] assert ds["/subgroup/y_var"].shape == (10,) assert ds["/subgroup"].dimensions["y"].size == 10 ds.close() def roundtrip_legacy_netcdf(tmp_netcdf, read_module, write_module): write_legacy_netcdf(tmp_netcdf, write_module) read_legacy_netcdf(tmp_netcdf, read_module, write_module) def test_write_legacyapi_read_netCDF4(tmp_local_netcdf): roundtrip_legacy_netcdf(tmp_local_netcdf, netCDF4, legacyapi) def test_roundtrip_h5netcdf_legacyapi(tmp_local_netcdf): roundtrip_legacy_netcdf(tmp_local_netcdf, legacyapi, legacyapi) def test_write_netCDF4_read_legacyapi(tmp_local_netcdf): roundtrip_legacy_netcdf(tmp_local_netcdf, legacyapi, netCDF4) def test_write_h5netcdf_read_legacyapi(tmp_local_netcdf): write_h5netcdf(tmp_local_netcdf) read_legacy_netcdf(tmp_local_netcdf, legacyapi, h5netcdf) def test_write_h5netcdf_read_netCDF4(tmp_local_netcdf): write_h5netcdf(tmp_local_netcdf) read_legacy_netcdf(tmp_local_netcdf, netCDF4, h5netcdf) def test_roundtrip_h5netcdf(tmp_local_or_remote_netcdf, decode_vlen_strings): write_h5netcdf(tmp_local_or_remote_netcdf) read_h5netcdf(tmp_local_or_remote_netcdf, h5netcdf, decode_vlen_strings) def test_write_netCDF4_read_h5netcdf(tmp_local_netcdf, decode_vlen_strings): write_legacy_netcdf(tmp_local_netcdf, netCDF4) read_h5netcdf(tmp_local_netcdf, netCDF4, decode_vlen_strings) def test_write_legacyapi_read_h5netcdf(tmp_local_netcdf, decode_vlen_strings): write_legacy_netcdf(tmp_local_netcdf, legacyapi) read_h5netcdf(tmp_local_netcdf, legacyapi, decode_vlen_strings) def test_fileobj(decode_vlen_strings): fileobj = tempfile.TemporaryFile() write_h5netcdf(fileobj) read_h5netcdf(fileobj, h5netcdf, decode_vlen_strings) fileobj = io.BytesIO() write_h5netcdf(fileobj) read_h5netcdf(fileobj, h5netcdf, decode_vlen_strings) def test_repr(tmp_local_or_remote_netcdf): write_h5netcdf(tmp_local_or_remote_netcdf) f = h5netcdf.File(tmp_local_or_remote_netcdf, "a") assert "h5netcdf.File" in repr(f) assert "subgroup" in repr(f) assert "foo" in repr(f) assert "other_attr" in repr(f) assert "h5netcdf.attrs.Attributes" in repr(f.attrs) assert "global" in repr(f.attrs) d = f.dimensions assert "h5netcdf.Dimensions" in repr(d) assert "x=" in repr(d) g = f["subgroup"] assert "h5netcdf.Group" in repr(g) assert "subvar" in repr(g) v = f["foo"] assert "h5netcdf.Variable" in repr(v) assert "float" in repr(v) assert "units" in repr(v) f.dimensions["temp"] = None assert "temp: " in repr(f) f.resize_dimension("temp", 5) assert "temp: " in repr(f) f.close() assert "Closed" in repr(f) assert "Closed" in repr(d) assert "Closed" in repr(g) assert "Closed" in repr(v) def test_attrs_api(tmp_local_or_remote_netcdf): h5 = get_hdf5_module(tmp_local_or_remote_netcdf) with h5netcdf.File(tmp_local_or_remote_netcdf, "w") as ds: ds.attrs["conventions"] = "CF" ds.attrs["empty_string"] = h5.Empty(dtype=np.dtype("|S1")) ds.dimensions["x"] = 1 v = ds.create_variable("x", ("x",), "i4") v.attrs.update({"units": "meters", "foo": "bar"}) assert ds._closed with h5netcdf.File(tmp_local_or_remote_netcdf, "r") as ds: assert len(ds.attrs) == 2 assert dict(ds.attrs) == {"conventions": "CF", "empty_string": b""} assert list(ds.attrs) == ["conventions", "empty_string"] assert dict(ds["x"].attrs) == {"units": "meters", "foo": "bar"} assert len(ds["x"].attrs) == 2 assert sorted(ds["x"].attrs) == ["foo", "units"] def test_shape_is_tied_to_coordinate(tmp_local_or_remote_netcdf): with h5netcdf.legacyapi.Dataset( tmp_local_or_remote_netcdf, "w", ) as ds: ds.createDimension("x", size=None) ds.createVariable("xvar", int, ("x",)) ds["xvar"][:5] = np.arange(5) assert ds["xvar"].shape == (5,) ds.createVariable("yvar", int, ("x",)) ds["yvar"][:10] = np.arange(10) assert ds["yvar"].shape == (10,) # The shape of the xvar should change too # this is in order to be in line with the behavior # of netCDF4-c assert ds["xvar"].shape == (10,) def test_optional_netcdf4_attrs(tmp_local_or_remote_netcdf): h5 = get_hdf5_module(tmp_local_or_remote_netcdf) with h5.File(tmp_local_or_remote_netcdf, "w") as f: foo_data = np.arange(50).reshape(5, 10) f.create_dataset("foo", data=foo_data) f.create_dataset("x", data=np.arange(5)) f.create_dataset("y", data=np.arange(10)) f["x"].make_scale() f["y"].make_scale() f["foo"].dims[0].attach_scale(f["x"]) f["foo"].dims[1].attach_scale(f["y"]) with h5netcdf.File(tmp_local_or_remote_netcdf, "r") as ds: assert ds["foo"].dimensions == ("x", "y") assert ds.dimensions.keys() == {"x", "y"} assert ds.dimensions["x"].size == 5 assert ds.dimensions["y"].size == 10 assert array_equal(ds["foo"], foo_data) def test_error_handling(tmp_local_or_remote_netcdf): with h5netcdf.File(tmp_local_or_remote_netcdf, "w") as ds: ds.dimensions["x"] = 1 with raises(ValueError): ds.dimensions["x"] = 2 with raises(ValueError): ds.dimensions = {"x": 2} with raises(ValueError): ds.dimensions = {"y": 3} ds.create_variable("x", ("x",), dtype=float) with raises(ValueError): ds.create_variable("x", ("x",), dtype=float) with raises(ValueError): ds.create_variable("y/", ("x",), dtype=float) ds.create_group("subgroup") with raises(ValueError): ds.create_group("subgroup") def test_decode_string_error(tmp_local_or_remote_netcdf): write_h5netcdf(tmp_local_or_remote_netcdf) with pytest.raises(TypeError): with h5netcdf.legacyapi.Dataset( tmp_local_or_remote_netcdf, "r", decode_vlen_strings=True ) as ds: assert ds.name == "/" def create_invalid_netcdf_data(): foo_data = np.arange(125).reshape(5, 5, 5) bar_data = np.arange(625).reshape(25, 5, 5) var = {"foo1": foo_data, "foo2": bar_data, "foo3": foo_data, "foo4": bar_data} var2 = {"x": 5, "y": 5, "z": 5, "x1": 25, "y1": 5, "z1": 5} return var, var2 def check_invalid_netcdf4(var, i): pdim = "phony_dim_{}" assert var["foo1"].dimensions[0] == pdim.format(i * 4) assert var["foo1"].dimensions[1] == pdim.format(1 + i * 4) assert var["foo1"].dimensions[2] == pdim.format(2 + i * 4) assert var["foo2"].dimensions[0] == pdim.format(3 + i * 4) assert var["foo2"].dimensions[1] == pdim.format(0 + i * 4) assert var["foo2"].dimensions[2] == pdim.format(1 + i * 4) assert var["foo3"].dimensions[0] == pdim.format(i * 4) assert var["foo3"].dimensions[1] == pdim.format(1 + i * 4) assert var["foo3"].dimensions[2] == pdim.format(2 + i * 4) assert var["foo4"].dimensions[0] == pdim.format(3 + i * 4) assert var["foo4"].dimensions[1] == pdim.format(i * 4) assert var["foo4"].dimensions[2] == pdim.format(1 + i * 4) assert var["x"].dimensions[0] == pdim.format(i * 4) assert var["y"].dimensions[0] == pdim.format(i * 4) assert var["z"].dimensions[0] == pdim.format(i * 4) assert var["x1"].dimensions[0] == pdim.format(3 + i * 4) assert var["y1"].dimensions[0] == pdim.format(i * 4) assert var["z1"].dimensions[0] == pdim.format(i * 4) def test_invalid_netcdf4(tmp_local_or_remote_netcdf): if tmp_local_or_remote_netcdf.startswith(remote_h5): pytest.skip("netCDF4 package does not work with remote HDF5 files") h5 = get_hdf5_module(tmp_local_or_remote_netcdf) with h5.File(tmp_local_or_remote_netcdf, "w") as f: var, var2 = create_invalid_netcdf_data() grps = ["bar", "baz"] for grp in grps: fx = f.create_group(grp) for k, v in var.items(): fx.create_dataset(k, data=v) for k, v in var2.items(): fx.create_dataset(k, data=np.arange(v)) with h5netcdf.File(tmp_local_or_remote_netcdf, "r", phony_dims="sort") as dsr: for i, grp in enumerate(grps): var = dsr[grp].variables check_invalid_netcdf4(var, i) with h5netcdf.File(tmp_local_or_remote_netcdf, "r", phony_dims="access") as dsr: for i, grp in enumerate(grps): var = dsr[grp].variables check_invalid_netcdf4(var, i) if not tmp_local_or_remote_netcdf.startswith(remote_h5): # netcdf4 package does not work with remote HDF5 files with netCDF4.Dataset(tmp_local_or_remote_netcdf, "r") as dsr: for i, grp in enumerate(grps): var = dsr[grp].variables check_invalid_netcdf4(var, i) with h5netcdf.File(tmp_local_or_remote_netcdf, "r") as ds: with raises(ValueError): ds["bar"].variables["foo1"].dimensions with raises(ValueError): with h5netcdf.File(tmp_local_or_remote_netcdf, "r", phony_dims="srt") as ds: pass def test_fake_phony_dims(tmp_local_or_remote_netcdf): # tests writing of dimension with phony naming scheme # see https://github.com/h5netcdf/h5netcdf/issues/178 with h5netcdf.File(tmp_local_or_remote_netcdf, mode="w") as ds: ds.dimensions["phony_dim_0"] = 3 def check_invalid_netcdf4_mixed(var, i): pdim = f"phony_dim_{i}" assert var["foo1"].dimensions[0] == "y1" assert var["foo1"].dimensions[1] == "z1" assert var["foo1"].dimensions[2] == pdim assert var["foo2"].dimensions[0] == "x1" assert var["foo2"].dimensions[1] == "y1" assert var["foo2"].dimensions[2] == "z1" assert var["foo3"].dimensions[0] == "y1" assert var["foo3"].dimensions[1] == "z1" assert var["foo3"].dimensions[2] == pdim assert var["foo4"].dimensions[0] == "x1" assert var["foo4"].dimensions[1] == "y1" assert var["foo4"].dimensions[2] == "z1" assert var["x"].dimensions[0] == "y1" assert var["y"].dimensions[0] == "y1" assert var["z"].dimensions[0] == "y1" assert var["x1"].dimensions[0] == "x1" assert var["y1"].dimensions[0] == "y1" assert var["z1"].dimensions[0] == "z1" def test_invalid_netcdf4_mixed(tmp_local_or_remote_netcdf): if tmp_local_or_remote_netcdf.startswith(remote_h5): pytest.skip("netCDF4 package does not work with remote HDF5 files") h5 = get_hdf5_module(tmp_local_or_remote_netcdf) with h5.File(tmp_local_or_remote_netcdf, "w") as f: var, var2 = create_invalid_netcdf_data() for k, v in var.items(): f.create_dataset(k, data=v) for k, v in var2.items(): f.create_dataset(k, data=np.arange(v)) f["x1"].make_scale() f["y1"].make_scale() f["z1"].make_scale() f["foo2"].dims[0].attach_scale(f["x1"]) f["foo2"].dims[1].attach_scale(f["y1"]) f["foo2"].dims[2].attach_scale(f["z1"]) with h5netcdf.File(tmp_local_or_remote_netcdf, "r", phony_dims="sort") as ds: var = ds.variables check_invalid_netcdf4_mixed(var, 3) with h5netcdf.File(tmp_local_or_remote_netcdf, "r", phony_dims="access") as ds: var = ds.variables check_invalid_netcdf4_mixed(var, 0) if not tmp_local_or_remote_netcdf.startswith(remote_h5): # netcdf4 package does not work with remote HDF5 files with netCDF4.Dataset(tmp_local_or_remote_netcdf, "r") as ds: var = ds.variables check_invalid_netcdf4_mixed(var, 3) with h5netcdf.File(tmp_local_or_remote_netcdf, "r") as ds: with raises(ValueError): ds.variables["foo1"].dimensions def test_invalid_netcdf_malformed_dimension_scales(tmp_local_or_remote_netcdf): h5 = get_hdf5_module(tmp_local_or_remote_netcdf) with h5.File(tmp_local_or_remote_netcdf, "w") as f: foo_data = np.arange(125).reshape(5, 5, 5) f.create_dataset("foo1", data=foo_data) f.create_dataset("x", data=np.arange(5)) f.create_dataset("y", data=np.arange(5)) f.create_dataset("z", data=np.arange(5)) f["x"].make_scale() f["y"].make_scale() f["z"].make_scale() f["foo1"].dims[0].attach_scale(f["x"]) with raises(ValueError): with h5netcdf.File(tmp_local_or_remote_netcdf, "r") as ds: assert ds print(ds) with raises(ValueError): with h5netcdf.File(tmp_local_or_remote_netcdf, "r", phony_dims="sort") as ds: assert ds print(ds) def test_hierarchical_access_auto_create(tmp_local_or_remote_netcdf): ds = h5netcdf.File(tmp_local_or_remote_netcdf, "w") ds.create_variable("/foo/bar", data=1) g = ds.create_group("foo/baz") g.create_variable("/foo/hello", data=2) assert set(ds) == set(["foo"]) assert set(ds["foo"]) == set(["bar", "baz", "hello"]) ds.close() ds = h5netcdf.File(tmp_local_or_remote_netcdf, "r") assert set(ds) == set(["foo"]) assert set(ds["foo"]) == set(["bar", "baz", "hello"]) ds.close() def test_Netcdf4Dimid(tmp_local_netcdf): # regression test for https://github.com/h5netcdf/h5netcdf/issues/53 with h5netcdf.File(tmp_local_netcdf, "w") as f: f.dimensions["x"] = 1 g = f.create_group("foo") g.dimensions["x"] = 2 g.dimensions["y"] = 3 with h5py.File(tmp_local_netcdf, "r") as f: # all dimension IDs should be present exactly once dim_ids = {f[name].attrs["_Netcdf4Dimid"] for name in ["x", "foo/x", "foo/y"]} assert dim_ids == {0, 1, 2} def test_reading_str_array_from_netCDF4(tmp_local_netcdf, decode_vlen_strings): # This tests reading string variables created by netCDF4 with netCDF4.Dataset(tmp_local_netcdf, "w") as ds: ds.createDimension("foo1", _string_array.shape[0]) ds.createDimension("foo2", _string_array.shape[1]) ds.createVariable("bar", str, ("foo1", "foo2")) ds.variables["bar"][:] = _string_array ds = h5netcdf.File(tmp_local_netcdf, "r", **decode_vlen_strings) v = ds.variables["bar"] if getattr(ds, "decode_vlen_strings", True): assert array_equal(v, _string_array) else: assert array_equal(v, np.char.encode(_string_array)) ds.close() def test_nc_properties_new(tmp_local_or_remote_netcdf): with h5netcdf.File(tmp_local_or_remote_netcdf, "w"): pass h5 = get_hdf5_module(tmp_local_or_remote_netcdf) with h5.File(tmp_local_or_remote_netcdf, "r") as f: assert b"h5netcdf" in f.attrs["_NCProperties"] def test_failed_read_open_and_clean_delete(tmpdir): # A file that does not exist but is opened for # reading should only raise an IOError and # no AttributeError at garbage collection. path = str(tmpdir.join("this_file_does_not_exist.nc")) try: with h5netcdf.File(path, "r") as ds: assert ds except OSError: pass # Look at garbage collection: # A simple gc.collect() does not raise an exception. # Must seek the File object and imitate its del command # by forcing it to close. obj_list = gc.get_objects() for obj in obj_list: try: is_h5netcdf_File = isinstance(obj, h5netcdf.File) except AttributeError: is_h5netcdf_File = False if is_h5netcdf_File: obj.close() def test_create_variable_matching_saved_dimension(tmp_local_or_remote_netcdf): h5 = get_hdf5_module(tmp_local_or_remote_netcdf) # if h5 is not h5py: # pytest.xfail("https://github.com/h5netcdf/h5netcdf/issues/48") with h5netcdf.File(tmp_local_or_remote_netcdf, "w") as f: f.dimensions["x"] = 2 f.create_variable("y", data=[1, 2], dimensions=("x",)) with h5.File(tmp_local_or_remote_netcdf, "r") as f: dimlen = f"{f['y'].dims[0].values()[0].size:10}" assert f["y"].dims[0].keys() == [NOT_A_VARIABLE.decode("ascii") + dimlen] with h5netcdf.File(tmp_local_or_remote_netcdf, "a") as f: f.create_variable("x", data=[0, 1], dimensions=("x",)) with h5.File(tmp_local_or_remote_netcdf, "r") as f: assert f["y"].dims[0].keys() == ["x"] def test_invalid_netcdf_error(tmp_local_or_remote_netcdf): if tmp_local_or_remote_netcdf.startswith(remote_h5): pytest.skip("Remote HDF5 does not yet support LZF compression") with h5netcdf.File(tmp_local_or_remote_netcdf, "w", invalid_netcdf=False) as f: # valid f.create_variable( "lzf_compressed", data=[1], dimensions=("x"), compression="lzf" ) # invalid with pytest.raises(h5netcdf.CompatibilityError): f.create_variable("complex", data=1j) with pytest.raises(h5netcdf.CompatibilityError): f.attrs["complex_attr"] = 1j with pytest.raises(h5netcdf.CompatibilityError): f.create_variable("scaleoffset", data=[1], dimensions=("x",), scaleoffset=0) def test_invalid_netcdf_okay(tmp_local_or_remote_netcdf): if tmp_local_or_remote_netcdf.startswith(remote_h5): pytest.skip("h5pyd does not support NumPy complex dtype yet") with pytest.warns(UserWarning, match="invalid netcdf features"): with h5netcdf.File(tmp_local_or_remote_netcdf, "w", invalid_netcdf=True) as f: f.create_variable( "lzf_compressed", data=[1], dimensions=("x"), compression="lzf" ) f.create_variable("complex", data=1j) f.attrs["complex_attr"] = 1j f.create_variable("scaleoffset", data=[1], dimensions=("x",), scaleoffset=0) with h5netcdf.File(tmp_local_or_remote_netcdf, "r") as f: np.testing.assert_equal(f["lzf_compressed"][:], [1]) assert f["complex"][...] == 1j assert f.attrs["complex_attr"] == 1j np.testing.assert_equal(f["scaleoffset"][:], [1]) h5 = get_hdf5_module(tmp_local_or_remote_netcdf) with h5.File(tmp_local_or_remote_netcdf, "r") as f: assert "_NCProperties" not in f.attrs def test_invalid_netcdf_overwrite_valid(tmp_local_netcdf): # https://github.com/h5netcdf/h5netcdf/issues/165 with netCDF4.Dataset(tmp_local_netcdf, mode="w"): pass with pytest.warns(UserWarning): with h5netcdf.File(tmp_local_netcdf, "a", invalid_netcdf=True) as f: f.create_variable( "lzf_compressed", data=[1], dimensions=("x"), compression="lzf" ) f.create_variable("complex", data=1j) f.attrs["complex_attr"] = 1j f.create_variable("scaleoffset", data=[1], dimensions=("x",), scaleoffset=0) with h5netcdf.File(tmp_local_netcdf, "r") as f: np.testing.assert_equal(f["lzf_compressed"][:], [1]) assert f["complex"][...] == 1j assert f.attrs["complex_attr"] == 1j np.testing.assert_equal(f["scaleoffset"][:], [1]) h5 = get_hdf5_module(tmp_local_netcdf) with h5.File(tmp_local_netcdf, "r") as f: assert "_NCProperties" not in f.attrs def test_reopen_file_different_dimension_sizes(tmp_local_netcdf): # regression test for https://github.com/h5netcdf/h5netcdf/issues/55 with h5netcdf.File(tmp_local_netcdf, "w") as f: f.create_variable("/one/foo", data=[1], dimensions=("x",)) with h5netcdf.File(tmp_local_netcdf, "a") as f: f.create_variable("/two/foo", data=[1, 2], dimensions=("x",)) with netCDF4.Dataset(tmp_local_netcdf, "r") as f: assert f.groups["one"].variables["foo"][...].shape == (1,) def test_invalid_then_valid_no_ncproperties(tmp_local_or_remote_netcdf): with pytest.warns(UserWarning, match="invalid netcdf features"): with h5netcdf.File(tmp_local_or_remote_netcdf, "w", invalid_netcdf=True): pass with h5netcdf.File(tmp_local_or_remote_netcdf, "a"): pass h5 = get_hdf5_module(tmp_local_or_remote_netcdf) with h5.File(tmp_local_or_remote_netcdf, "r") as f: # still not a valid netcdf file assert "_NCProperties" not in f.attrs def test_creating_and_resizing_unlimited_dimensions(tmp_local_or_remote_netcdf): with h5netcdf.File(tmp_local_or_remote_netcdf, "w") as f: f.dimensions["x"] = None f.dimensions["y"] = 15 f.dimensions["z"] = None f.resize_dimension("z", 20) with pytest.raises(ValueError) as e: f.resize_dimension("y", 20) assert e.value.args[0] == ( "Dimension 'y' is not unlimited and thus cannot be resized." ) h5 = get_hdf5_module(tmp_local_or_remote_netcdf) # Assert some behavior observed by using the C netCDF bindings. with h5.File(tmp_local_or_remote_netcdf, "r") as f: assert f["x"].shape == (0,) assert f["x"].maxshape == (None,) assert f["y"].shape == (15,) assert f["y"].maxshape == (15,) assert f["z"].shape == (20,) assert f["z"].maxshape == (None,) def test_creating_variables_with_unlimited_dimensions(tmp_local_or_remote_netcdf): with h5netcdf.File(tmp_local_or_remote_netcdf, "w") as f: f.dimensions["x"] = None f.dimensions["y"] = 2 # Creating a variable without data will initialize an array with zero # length. f.create_variable("dummy", dimensions=("x", "y"), dtype=np.int64) assert f.variables["dummy"].shape == (0, 2) assert f.variables["dummy"]._h5ds.maxshape == (None, 2) # Trying to create a variable while the current size of the dimension # is still zero will fail. with pytest.raises(ValueError) as e: f.create_variable( "dummy2", data=np.array([[1, 2], [3, 4]]), dimensions=("x", "y") ) assert e.value.args[0] == "Shape tuple is incompatible with data" # Creating a coordinate variable f.create_variable("x", dimensions=("x",), dtype=np.int64) # Resize data. assert f.variables["dummy"].shape == (0, 2) f.resize_dimension("x", 3) # This will also force a resize of the existing variables and it will # be padded with zeros. assert f.dimensions["x"].size == 3 np.testing.assert_allclose(f.variables["dummy"], np.zeros((3, 2))) # Creating another variable with no data will now also take the shape # of the current dimensions. f.create_variable("dummy3", dimensions=("x", "y"), dtype=np.int64) assert f.variables["dummy3"].shape == (3, 2) assert f.variables["dummy3"]._h5ds.maxshape == (None, 2) np.testing.assert_allclose(f.variables["dummy3"], np.zeros((3, 2))) # Writing to a variable with an unlimited dimension raises if tmp_local_or_remote_netcdf.startswith(remote_h5): # We don't expect any errors. This is effectively a void context manager expected_errors = memoryview(b"") else: expected_errors = pytest.raises(TypeError) with expected_errors as e: f.variables["dummy3"][:] = np.ones((5, 2)) if not tmp_local_or_remote_netcdf.startswith(remote_h5): assert e.value.args[0] == "Can't broadcast (5, 2) -> (3, 2)" assert f.variables["dummy3"].shape == (3, 2) assert f.variables["dummy3"]._h5ds.maxshape == (None, 2) assert f["x"].shape == (3,) assert f.dimensions["x"].size == 3 if tmp_local_or_remote_netcdf.startswith(remote_h5): # h5pyd writes the data, but does not expand the dimensions np.testing.assert_allclose(f.variables["dummy3"], np.ones((3, 2))) else: # original data is kept for h5py np.testing.assert_allclose(f.variables["dummy3"], np.zeros((3, 2))) # Close and read again to also test correct parsing of unlimited # dimensions. with h5netcdf.File(tmp_local_or_remote_netcdf, "r") as f: assert f.dimensions["x"].isunlimited() assert f.dimensions["x"].size == 3 assert f._h5file["x"].maxshape == (None,) assert f._h5file["x"].shape == (3,) assert f.dimensions["y"].size == 2 assert f._h5file["y"].maxshape == (2,) assert f._h5file["y"].shape == (2,) def test_writing_to_an_unlimited_dimension(tmp_local_or_remote_netcdf): with h5netcdf.File(tmp_local_or_remote_netcdf, "w") as f: # Two dimensions, only one is unlimited. f.dimensions["x"] = None f.dimensions["y"] = 3 f.dimensions["z"] = None # Cannot create it without first resizing it. with pytest.raises(ValueError) as e: f.create_variable( "dummy1", data=np.array([[1, 2, 3]]), dimensions=("x", "y") ) assert e.value.args[0] == "Shape tuple is incompatible with data" # Without data. f.create_variable("dummy1", dimensions=("x", "y"), dtype=np.int64) f.create_variable("dummy2", dimensions=("x", "y"), dtype=np.int64) f.create_variable("dummy3", dimensions=("x", "y"), dtype=np.int64) f.create_variable("dummyX", dimensions=("x", "y", "z"), dtype=np.int64) g = f.create_group("test") g.create_variable("dummy4", dimensions=("y", "x", "x"), dtype=np.int64) g.create_variable("dummy5", dimensions=("y", "y"), dtype=np.int64) assert f.variables["dummy1"].shape == (0, 3) assert f.variables["dummy2"].shape == (0, 3) assert f.variables["dummy3"].shape == (0, 3) assert f.variables["dummyX"].shape == (0, 3, 0) assert g.variables["dummy4"].shape == (3, 0, 0) assert g.variables["dummy5"].shape == (3, 3) # resize dimensions and all connected variables f.resize_dimension("x", 2) assert f.variables["dummy1"].shape == (2, 3) assert f.variables["dummy2"].shape == (2, 3) assert f.variables["dummy3"].shape == (2, 3) assert f.variables["dummyX"].shape == (2, 3, 0) assert g.variables["dummy4"].shape == (3, 2, 2) assert g.variables["dummy5"].shape == (3, 3) # broadcast writing if tmp_local_or_remote_netcdf.startswith(remote_h5): expected_errors = pytest.raises(OSError) else: # We don't expect any errors. This is effectively a void context manager expected_errors = memoryview(b"") with expected_errors as e: f.variables["dummy3"][...] = [[1, 2, 3]] np.testing.assert_allclose(f.variables["dummy3"], [[1, 2, 3], [1, 2, 3]]) if tmp_local_or_remote_netcdf.startswith(remote_h5): assert "Got asyncio.IncompleteReadError" in e.value.args[0] def test_c_api_can_read_unlimited_dimensions(tmp_local_netcdf): with h5netcdf.File(tmp_local_netcdf, "w") as f: # Three dimensions, only one is limited. f.dimensions["x"] = None f.dimensions["y"] = 3 f.dimensions["z"] = None f.create_variable("dummy1", dimensions=("x", "y"), dtype=np.int64) f.create_variable("dummy2", dimensions=("y", "x", "x"), dtype=np.int64) g = f.create_group("test") g.create_variable("dummy3", dimensions=("y", "y"), dtype=np.int64) g.create_variable("dummy4", dimensions=("z", "z"), dtype=np.int64) f.resize_dimension("x", 2) with netCDF4.Dataset(tmp_local_netcdf, "r") as f: assert f.dimensions["x"].size == 2 assert f.dimensions["x"].isunlimited() is True assert f.dimensions["y"].size == 3 assert f.dimensions["y"].isunlimited() is False assert f.dimensions["z"].size == 0 assert f.dimensions["z"].isunlimited() is True assert f.variables["dummy1"].shape == (2, 3) assert f.variables["dummy2"].shape == (3, 2, 2) g = f.groups["test"] assert g.variables["dummy3"].shape == (3, 3) assert g.variables["dummy4"].shape == (0, 0) def test_reading_unlimited_dimensions_created_with_c_api(tmp_local_netcdf): with netCDF4.Dataset(tmp_local_netcdf, "w") as f: f.createDimension("x", None) f.createDimension("y", 3) f.createDimension("z", None) dummy1 = f.createVariable("dummy1", float, ("x", "y")) f.createVariable("dummy2", float, ("y", "x", "x")) g = f.createGroup("test") g.createVariable("dummy3", float, ("y", "y")) g.createVariable("dummy4", float, ("z", "z")) # Assign something to trigger a resize. dummy1[:] = [[1, 2, 3], [4, 5, 6]] # Create another variable with same dimensions f.createVariable("dummy5", float, ("x", "y")) with h5netcdf.File(tmp_local_netcdf, "r") as f: assert f.dimensions["x"].isunlimited() assert f.dimensions["y"].size == 3 assert f.dimensions["z"].isunlimited() # This is parsed correctly due to h5netcdf's init trickery. assert f.dimensions["x"].size == 2 assert f.dimensions["y"].size == 3 assert f.dimensions["z"].size == 0 # But the actual data-set and arrays are not correct. # assert f["dummy1"].shape == (2, 3) # XXX: This array has some data with dimension x - netcdf does not # appear to keep dimensions consistent. # With https://github.com/h5netcdf/h5netcdf/pull/103 h5netcdf will # return a padded array assert f["dummy2"].shape == (3, 2, 2) f.groups["test"]["dummy3"].shape == (3, 3) f.groups["test"]["dummy4"].shape == (0, 0) assert f["dummy5"].shape == (2, 3) def test_reading_unused_unlimited_dimension(tmp_local_or_remote_netcdf): """Test reading a file with unused dimension of unlimited size""" with h5netcdf.File(tmp_local_or_remote_netcdf, "w") as f: f.dimensions = {"x": None} f.resize_dimension("x", 5) assert f.dimensions["x"].isunlimited() assert f.dimensions["x"].size == 5 def test_reading_special_datatype_created_with_c_api(tmp_local_netcdf): """Test reading a file with unsupported Datatype""" with netCDF4.Dataset(tmp_local_netcdf, "w") as f: complex128 = np.dtype([("real", np.float64), ("imag", np.float64)]) f.createCompoundType(complex128, "complex128") with h5netcdf.File(tmp_local_netcdf, "r") as f: pass def test_nc4_non_coord(tmp_local_netcdf): # Here we generate a few variables and coordinates # The default should be to track the order of creation # Thus, on reopening the file, the order in which # the variables are listed should be maintained # y -- refers to the coordinate y # _nc4_non_coord_y -- refers to the data y with h5netcdf.File(tmp_local_netcdf, "w") as f: f.dimensions = {"x": None, "y": 2} f.create_variable("test", dimensions=("x",), dtype=np.int64) f.create_variable("y", dimensions=("x",), dtype=np.int64) with h5netcdf.File(tmp_local_netcdf, "r") as f: assert list(f.dimensions) == ["x", "y"] assert f.dimensions["x"].size == 0 assert f.dimensions["x"].isunlimited() assert f.dimensions["y"].size == 2 if version.parse(h5py.__version__) >= version.parse("3.7.0"): assert list(f.variables) == ["test", "y"] assert list(f._h5group.keys()) == ["x", "y", "test", "_nc4_non_coord_y"] with h5netcdf.File(tmp_local_netcdf, "w") as f: f.dimensions = {"x": None, "y": 2} f.create_variable("y", dimensions=("x",), dtype=np.int64) f.create_variable("test", dimensions=("x",), dtype=np.int64) with h5netcdf.File(tmp_local_netcdf, "r") as f: assert list(f.dimensions) == ["x", "y"] assert f.dimensions["x"].size == 0 assert f.dimensions["x"].isunlimited() assert f.dimensions["y"].size == 2 if version.parse(h5py.__version__) >= version.parse("3.7.0"): assert list(f.variables) == ["y", "test"] assert list(f._h5group.keys()) == ["x", "y", "_nc4_non_coord_y", "test"] def test_overwrite_existing_file(tmp_local_netcdf): # create file with _NCProperties attribute with netCDF4.Dataset(tmp_local_netcdf, "w") as ds: ds.createDimension("x", 10) # check attribute with h5netcdf.File(tmp_local_netcdf, "r") as ds: assert ds.attrs._h5attrs.get("_NCProperties", False) # overwrite file with legacyapi with legacyapi.Dataset(tmp_local_netcdf, "w") as ds: ds.createDimension("x", 10) # check attribute with h5netcdf.File(tmp_local_netcdf, "r") as ds: assert ds.attrs._h5attrs.get("_NCProperties", False) # overwrite file with new api with h5netcdf.File(tmp_local_netcdf, "w") as ds: ds.dimensions["x"] = 10 # check attribute with h5netcdf.File(tmp_local_netcdf, "r") as ds: assert ds.attrs._h5attrs.get("_NCProperties", False) def test_scales_on_append(tmp_local_netcdf): # create file with _NCProperties attribute with netCDF4.Dataset(tmp_local_netcdf, "w") as ds: ds.createDimension("x", 10) # append file with netCDF4 with netCDF4.Dataset(tmp_local_netcdf, "r+") as ds: ds.createVariable("test", "i4", ("x",)) # check scales with h5netcdf.File(tmp_local_netcdf, "r") as ds: assert ds.variables["test"].attrs._h5attrs.get("DIMENSION_LIST", False) # append file with legacyapi with legacyapi.Dataset(tmp_local_netcdf, "r+") as ds: ds.createVariable("test1", "i4", ("x",)) # check scales with h5netcdf.File(tmp_local_netcdf, "r") as ds: assert ds.variables["test1"].attrs._h5attrs.get("DIMENSION_LIST", False) def create_attach_scales(filename, append_module): # create file with netCDF4 with netCDF4.Dataset(filename, "w") as ds: ds.createDimension("x", 0) ds.createDimension("y", 1) ds.createVariable("test", "i4", ("x",)) ds.variables["test"] = np.ones((10,)) # append file with netCDF4 with append_module.Dataset(filename, "a") as ds: ds.createVariable("test1", "i4", ("x",)) ds.createVariable("y", "i4", ("x", "y")) # check scales with h5netcdf.File(filename, "r") as ds: refs = ds._h5group["x"].attrs.get("REFERENCE_LIST", False) assert len(refs) == 3 for (ref, dim), name in zip(refs, ["/test", "/test1", "/_nc4_non_coord_y"]): assert dim == 0 assert ds._root._h5file[ref].name == name def test_create_attach_scales_netcdf4(tmp_local_netcdf): create_attach_scales(tmp_local_netcdf, netCDF4) def test_create_attach_scales_legacyapi(tmp_local_netcdf): create_attach_scales(tmp_local_netcdf, legacyapi) def test_detach_scale(tmp_local_netcdf): with h5netcdf.File(tmp_local_netcdf, "w") as ds: ds.dimensions["x"] = 2 ds.dimensions["y"] = 2 with h5netcdf.File(tmp_local_netcdf, "a") as ds: ds.create_variable("test", dimensions=("x",), dtype=np.int64) with h5netcdf.File(tmp_local_netcdf, "r") as ds: refs = ds._h5group["x"].attrs.get("REFERENCE_LIST", False) assert len(refs) == 1 for (ref, dim), name in zip(refs, ["/test"]): assert dim == 0 assert ds._root._h5file[ref].name == name with h5netcdf.File(tmp_local_netcdf, "a") as ds: ds.dimensions["x"]._detach_scale() with h5netcdf.File(tmp_local_netcdf, "r") as ds: refs = ds._h5group["x"].attrs.get("REFERENCE_LIST", False) assert not refs def test_is_scale(tmp_local_netcdf): with legacyapi.Dataset(tmp_local_netcdf, "w") as ds: ds.createDimension("x", 10) with legacyapi.Dataset(tmp_local_netcdf, "r") as ds: assert ds.dimensions["x"]._isscale def test_get_dim_scale_refs(tmp_local_netcdf): with legacyapi.Dataset(tmp_local_netcdf, "w") as ds: ds.createDimension("x", 10) ds.createVariable("test0", "i8", ("x",)) ds.createVariable("test1", "i8", ("x",)) with legacyapi.Dataset(tmp_local_netcdf, "r") as ds: refs = ds.dimensions["x"]._scale_refs assert ds._h5file[refs[0][0]] == ds["test0"]._h5ds assert ds._h5file[refs[1][0]] == ds["test1"]._h5ds def create_netcdf_dimensions(ds, idx): # dimension and variable setup is adapted from the blogpost at # https://www.unidata.ucar.edu/blogs/developer/en/entry/netcdf4_shared_dimensions g = ds.createGroup("dimtest" + str(idx)) g.createDimension("time", 0) # time g.createDimension("nvec", 5 + idx) # nvec g.createDimension("sample", 2 + idx) # sample g.createDimension("ship", 3 + idx) # ship g.createDimension("ship_strlen", 10) # ship_strlen g.createDimension("collide", 7 + idx) # collide time = g.createVariable("time", "f8", ("time",)) data = g.createVariable("data", "i8", ("ship", "sample", "time", "nvec")) collide = g.createVariable("collide", "i8", ("nvec",)) non_collide = g.createVariable("non_collide", "i8", ("nvec",)) ship = g.createVariable("ship", "S1", ("ship", "ship_strlen")) sample = g.createVariable("sample", "i8", ("time", "sample")) time[:] = np.arange(10 + idx) data[:] = np.ones((3 + idx, 2 + idx, 10 + idx, 5 + idx)) * 12.0 collide[...] = np.arange(5 + idx) non_collide[...] = np.arange(5 + idx) + 10 sample[0 : 2 + idx, : 2 + idx] = np.ones((2 + idx, 2 + idx)) ship[0] = list("Skiff ") def create_h5netcdf_dimensions(ds, idx): # dimension and variable setup is adapted from the blogpost at # https://www.unidata.ucar.edu/blogs/developer/en/entry/netcdf4_shared_dimensions g = ds.create_group("dimtest" + str(idx)) g.dimensions["time"] = 0 # time g.dimensions["nvec"] = 5 + idx # nvec g.dimensions["sample"] = 2 + idx # sample g.dimensions["ship"] = 3 + idx # ship g.dimensions["ship_strlen"] = 10 # ship_strlen g.dimensions["collide"] = 7 + idx # collide g.create_variable("time", dimensions=("time",), dtype=np.float64) g.create_variable( "data", dimensions=("ship", "sample", "time", "nvec"), dtype=np.int64 ) g.create_variable("collide", dimensions=("nvec",), dtype=np.int64) g.create_variable("non_collide", dimensions=("nvec",), dtype=np.int64) g.create_variable("sample", dimensions=("time", "sample"), dtype=np.int64) g.create_variable("ship", dimensions=("ship", "ship_strlen"), dtype="S1") g.resize_dimension("time", 10 + idx) g.variables["time"][:] = np.arange(10 + idx) g.variables["data"][:] = np.ones((3 + idx, 2 + idx, 10 + idx, 5 + idx)) * 12.0 g.variables["collide"][...] = np.arange(5 + idx) g.variables["non_collide"][...] = np.arange(5 + idx) + 10 g.variables["sample"][0 : 2 + idx, : 2 + idx] = np.ones((2 + idx, 2 + idx)) g.variables["ship"][0] = list("Skiff ") def check_netcdf_dimensions(tmp_netcdf, write_module, read_module): if read_module in [legacyapi, netCDF4]: opener = read_module.Dataset else: opener = h5netcdf.File with opener(tmp_netcdf, "r") as ds: for i, grp in enumerate(["dimtest0", "dimtest1"]): g = ds.groups[grp] assert set(g.dimensions) == { "collide", "ship_strlen", "time", "nvec", "ship", "sample", } if read_module in [legacyapi, h5netcdf]: assert g.dimensions["time"].isunlimited() assert g.dimensions["time"].size == 10 + i assert not g.dimensions["nvec"].isunlimited() assert g.dimensions["nvec"].size == 5 + i assert not g.dimensions["sample"].isunlimited() assert g.dimensions["sample"].size == 2 + i assert not g.dimensions["collide"].isunlimited() assert g.dimensions["collide"].size == 7 + i assert not g.dimensions["ship"].isunlimited() assert g.dimensions["ship"].size == 3 + i assert not g.dimensions["ship_strlen"].isunlimited() assert g.dimensions["ship_strlen"].size == 10 else: assert g.dimensions["time"].isunlimited() assert g.dimensions["time"].size == 10 + i assert not g.dimensions["nvec"].isunlimited() assert g.dimensions["nvec"].size == 5 + i assert not g.dimensions["sample"].isunlimited() assert g.dimensions["sample"].size == 2 + i assert not g.dimensions["ship"].isunlimited() assert g.dimensions["ship"].size == 3 + i assert not g.dimensions["ship_strlen"].isunlimited() assert g.dimensions["ship_strlen"].size == 10 assert not g.dimensions["collide"].isunlimited() assert g.dimensions["collide"].size == 7 + i assert set(g.variables) == { "data", "collide", "non_collide", "time", "sample", "ship", } assert g.variables["time"].shape == (10 + i,) assert g.variables["data"].shape == (3 + i, 2 + i, 10 + i, 5 + i) assert g.variables["collide"].shape == (5 + i,) assert g.variables["non_collide"].shape == (5 + i,) assert g.variables["sample"].shape == (10 + i, 2 + i) assert g.variables["ship"].shape == (3 + i, 10) def write_dimensions(tmp_netcdf, write_module): if write_module in [legacyapi, netCDF4]: with write_module.Dataset(tmp_netcdf, "w") as ds: create_netcdf_dimensions(ds, 0) create_netcdf_dimensions(ds, 1) else: with write_module.File(tmp_netcdf, "w") as ds: create_h5netcdf_dimensions(ds, 0) create_h5netcdf_dimensions(ds, 1) @pytest.fixture( params=[ [netCDF4, netCDF4], [legacyapi, legacyapi], [h5netcdf, h5netcdf], [legacyapi, netCDF4], [netCDF4, legacyapi], [h5netcdf, netCDF4], [netCDF4, h5netcdf], [legacyapi, h5netcdf], [h5netcdf, legacyapi], ] ) def read_write_matrix(request): print("write module:", request.param[0].__name__) print("read_module:", request.param[1].__name__) return request.param def test_dimensions(tmp_local_netcdf, read_write_matrix): write_dimensions(tmp_local_netcdf, read_write_matrix[0]) check_netcdf_dimensions( tmp_local_netcdf, read_write_matrix[0], read_write_matrix[1] ) def test_no_circular_references(tmp_local_netcdf): # https://github.com/h5py/h5py/issues/2019 with h5netcdf.File(tmp_local_netcdf, "w") as ds: ds.dimensions["x"] = 2 ds.dimensions["y"] = 2 gc.collect() with h5netcdf.File(tmp_local_netcdf, "r") as ds: refs = gc.get_referrers(ds) for ref in refs: print(ref) assert len(refs) == 1 def test_expanded_variables_netcdf4(tmp_local_netcdf, netcdf_write_module): # partially reimplemented due to performance reason in edge cases # https://github.com/h5netcdf/h5netcdf/issues/182 with netcdf_write_module.Dataset(tmp_local_netcdf, "w") as ds: f = ds.createGroup("test") f.createDimension("x", None) f.createDimension("y", 3) dummy1 = f.createVariable("dummy1", float, ("x", "y")) dummy2 = f.createVariable("dummy2", float, ("x", "y")) dummy3 = f.createVariable("dummy3", float, ("x", "y")) dummy4 = f.createVariable("dummy4", float, ("x", "y")) dummy1[:] = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] dummy2[1, :] = [4, 5, 6] dummy3[0:2, :] = [[1, 2, 3], [4, 5, 6]] # don't mask, since h5netcdf doesn't do masking if netcdf_write_module == netCDF4: ds.set_auto_mask(False) res1 = dummy1[:] res2 = dummy2[:] res3 = dummy3[:] res4 = dummy4[:] with netCDF4.Dataset(tmp_local_netcdf, "r") as ds: # don't mask, since h5netcdf doesn't do masking if netcdf_write_module == netCDF4: ds.set_auto_mask(False) f = ds["test"] np.testing.assert_allclose(f.variables["dummy1"][:], res1) np.testing.assert_allclose(f.variables["dummy1"][1, :], [4.0, 5.0, 6.0]) np.testing.assert_allclose(f.variables["dummy1"][1:2, :], [[4.0, 5.0, 6.0]]) assert f.variables["dummy1"].shape == (3, 3) np.testing.assert_allclose(f.variables["dummy2"][:], res2) np.testing.assert_allclose(f.variables["dummy2"][1, :], [4.0, 5.0, 6.0]) np.testing.assert_allclose(f.variables["dummy2"][1:2, :], [[4.0, 5.0, 6.0]]) assert f.variables["dummy2"].shape == (3, 3) np.testing.assert_allclose(f.variables["dummy3"][:], res3) np.testing.assert_allclose(f.variables["dummy3"][1, :], [4.0, 5.0, 6.0]) np.testing.assert_allclose(f.variables["dummy3"][1:2, :], [[4.0, 5.0, 6.0]]) assert f.variables["dummy3"].shape == (3, 3) np.testing.assert_allclose(f.variables["dummy4"][:], res4) assert f.variables["dummy4"].shape == (3, 3) with legacyapi.Dataset(tmp_local_netcdf, "r") as ds: f = ds["test"] np.testing.assert_allclose(f.variables["dummy1"][:], res1) np.testing.assert_allclose(f.variables["dummy1"][1, :], [4.0, 5.0, 6.0]) np.testing.assert_allclose(f.variables["dummy1"][1:2, :], [[4.0, 5.0, 6.0]]) np.testing.assert_allclose(f.variables["dummy1"]._h5ds[1, :], [4.0, 5.0, 6.0]) np.testing.assert_allclose( f.variables["dummy1"]._h5ds[1:2, :], [[4.0, 5.0, 6.0]] ) assert f.variables["dummy1"].shape == (3, 3) assert f.variables["dummy1"]._h5ds.shape == (3, 3) np.testing.assert_allclose(f.variables["dummy2"][:], res2) np.testing.assert_allclose(f.variables["dummy2"][1, :], [4.0, 5.0, 6.0]) np.testing.assert_allclose(f.variables["dummy2"][1:2, :], [[4.0, 5.0, 6.0]]) assert f.variables["dummy2"].shape == (3, 3) assert f.variables["dummy2"]._h5ds.shape == (2, 3) np.testing.assert_allclose(f.variables["dummy3"][:], res3) np.testing.assert_allclose(f.variables["dummy3"][1, :], [4.0, 5.0, 6.0]) np.testing.assert_allclose(f.variables["dummy3"][1:2, :], [[4.0, 5.0, 6.0]]) assert f.variables["dummy3"].shape == (3, 3) assert f.variables["dummy3"]._h5ds.shape == (2, 3) np.testing.assert_allclose(f.variables["dummy4"][:], res4) assert f.variables["dummy4"].shape == (3, 3) assert f.variables["dummy4"]._h5ds.shape == (0, 3) with h5netcdf.File(tmp_local_netcdf, "r") as ds: f = ds["test"] np.testing.assert_allclose(f.variables["dummy1"][:], res1) np.testing.assert_allclose(f.variables["dummy1"][:, :], res1) np.testing.assert_allclose(f.variables["dummy1"][1, :], [4.0, 5.0, 6.0]) np.testing.assert_allclose(f.variables["dummy1"][1:2, :], [[4.0, 5.0, 6.0]]) assert f.variables["dummy1"].shape == (3, 3) assert f.variables["dummy1"]._h5ds.shape == (3, 3) np.testing.assert_allclose(f.variables["dummy2"][:], res2) np.testing.assert_allclose(f.variables["dummy2"][1, :], [4.0, 5.0, 6.0]) np.testing.assert_allclose(f.variables["dummy2"][1:2, :], [[4.0, 5.0, 6.0]]) assert f.variables["dummy2"].shape == (3, 3) assert f.variables["dummy2"]._h5ds.shape == (2, 3) np.testing.assert_allclose(f.variables["dummy3"][:], res3) np.testing.assert_allclose(f.variables["dummy3"][1, :], [4.0, 5.0, 6.0]) np.testing.assert_allclose(f.variables["dummy3"][1:2, :], [[4.0, 5.0, 6.0]]) assert f.variables["dummy3"].shape == (3, 3) assert f.variables["dummy3"]._h5ds.shape == (2, 3) np.testing.assert_allclose(f.variables["dummy4"][:], res4) assert f.variables["dummy4"].shape == (3, 3) assert f.variables["dummy4"]._h5ds.shape == (0, 3) # https://github.com/h5netcdf/h5netcdf/issues/136 @pytest.mark.skipif( version.parse(h5py.__version__) < version.parse("3.7.0"), reason="h5py<3.7.0 bug with track_order prevents editing with netCDF4", ) def test_creation_with_h5netcdf_edit_with_netcdf4(tmp_local_netcdf): # In version 0.12.0, the wrong file creation attributes were used # making netcdf4 unable to open files created by h5netcdf # https://github.com/h5netcdf/h5netcdf/issues/128 with h5netcdf.File(tmp_local_netcdf, "w") as the_file: the_file.dimensions = {"x": 5} variable = the_file.create_variable("hello", ("x",), float) variable[...] = 5 with netCDF4.Dataset(tmp_local_netcdf, mode="a") as the_file: variable = the_file["hello"] np.testing.assert_array_equal(variable[...].data, 5) # Edit an existing variable variable[:3] = 2 # Create a new variable variable = the_file.createVariable("goodbye", float, ("x",)) variable[...] = 10 with h5netcdf.File(tmp_local_netcdf, "a") as the_file: # Ensure edited variable is consistent with the expected data variable = the_file["hello"] np.testing.assert_array_equal(variable[...].data, [2, 2, 2, 5, 5]) # Ensure new variable is accessible variable = the_file["goodbye"] np.testing.assert_array_equal(variable[...].data, 10) def test_track_order_specification(tmp_local_netcdf): # While netcdf4-c has historically only allowed track_order to be True # There doesn't seem to be a good reason for this # https://github.com/Unidata/netcdf-c/issues/2054 historically, h5netcdf # has not specified this parameter (leaving it implicitely as False) # We want to make sure we allow both here with h5netcdf.File(tmp_local_netcdf, "w", track_order=False): pass with h5netcdf.File(tmp_local_netcdf, "w", track_order=True): pass # This should always work with the default file opening settings # https://github.com/h5netcdf/h5netcdf/issues/136#issuecomment-1017457067 def test_more_than_7_attr_creation(tmp_local_netcdf): with h5netcdf.File(tmp_local_netcdf, "w") as h5file: for i in range(100): h5file.attrs[f"key{i}"] = i h5file.attrs[f"key{i}"] = 0 # Add a test that is supposed to fail in relation to issue #136 # We choose to monitor when h5py will have fixed their issue in our test suite # to enhance maintainability # https://github.com/h5netcdf/h5netcdf/issues/136#issuecomment-1017457067 @pytest.mark.parametrize("track_order", [False, True]) def test_more_than_7_attr_creation_track_order(tmp_local_netcdf, track_order): h5py_version = version.parse(h5py.__version__) if track_order and h5py_version < version.parse("3.7.0"): expected_errors = pytest.raises(KeyError) else: # We don't expect any errors. This is effectively a void context manager expected_errors = memoryview(b"") with h5netcdf.File(tmp_local_netcdf, "w", track_order=track_order) as h5file: with expected_errors: for i in range(100): h5file.attrs[f"key{i}"] = i h5file.attrs[f"key{i}"] = 0 def test_group_names(tmp_local_netcdf): # https://github.com/h5netcdf/h5netcdf/issues/68 with netCDF4.Dataset(tmp_local_netcdf, mode="w") as ds: for i in range(10): ds = ds.createGroup(f"group{i:02d}") with netCDF4.Dataset(tmp_local_netcdf, "r") as ds: assert ds.name == "/" name = "" for i in range(10): name = "/".join([name, f"group{i:02d}"]) assert ds[name].name == name.split("/")[-1] with legacyapi.Dataset(tmp_local_netcdf, "r") as ds: assert ds.name == "/" name = "" for i in range(10): name = "/".join([name, f"group{i:02d}"]) assert ds[name].name == name.split("/")[-1] with h5netcdf.File(tmp_local_netcdf, "r") as ds: assert ds.name == "/" name = "" for i in range(10): name = "/".join([name, f"group{i:02d}"]) assert ds[name].name == name def test_legacyapi_endianess(tmp_local_netcdf): # https://github.com/h5netcdf/h5netcdf/issues/15 big = legacyapi._check_return_dtype_endianess("big") little = legacyapi._check_return_dtype_endianess("little") native = legacyapi._check_return_dtype_endianess("native") with legacyapi.Dataset(tmp_local_netcdf, "w") as ds: ds.createDimension("x", 4) # test creating variable using endian keyword argument v = ds.createVariable("big", int, ("x"), endian="big") v[...] = 65533 v = ds.createVariable("little", int, ("x"), endian="little") v[...] = 65533 v = ds.createVariable("native", int, ("x"), endian="native") v[...] = 65535 with h5py.File(tmp_local_netcdf, "r") as ds: assert ds["big"].dtype.byteorder == big assert ds["little"].dtype.byteorder == little assert ds["native"].dtype.byteorder == native with h5netcdf.File(tmp_local_netcdf, "r") as ds: assert ds["big"].dtype.byteorder == big assert ds["little"].dtype.byteorder == little assert ds["native"].dtype.byteorder == native with legacyapi.Dataset(tmp_local_netcdf, "r") as ds: assert ds["big"].dtype.byteorder == big assert ds["little"].dtype.byteorder == little assert ds["native"].dtype.byteorder == native with netCDF4.Dataset(tmp_local_netcdf, "r") as ds: assert ds["big"].dtype.byteorder == big assert ds["little"].dtype.byteorder == little assert ds["native"].dtype.byteorder == native def test_bool_slicing_length_one_dim(tmp_local_netcdf): # see https://github.com/h5netcdf/h5netcdf/issues/23 with h5netcdf.File(tmp_local_netcdf, "w") as ds: ds.dimensions = {"x": 1, "y": 2} v = ds.create_variable("hello", ("x", "y"), "float") v[:] = np.ones((1, 2)) bool_slice = np.array([1], dtype=bool) # works for legacy API with legacyapi.Dataset(tmp_local_netcdf, "a") as ds: data = ds["hello"][bool_slice, :] np.testing.assert_equal(data, np.ones((1, 2))) ds["hello"][bool_slice, :] = np.zeros((1, 2)) data = ds["hello"][bool_slice, :] np.testing.assert_equal(data, np.zeros((1, 2))) # should raise for h5py >= 3.0.0 and h5py < 3.7.0 # https://github.com/h5py/h5py/pull/2079 # https://github.com/h5netcdf/h5netcdf/pull/125/ with h5netcdf.File(tmp_local_netcdf, "r") as ds: h5py_version = version.parse(h5py.__version__) if version.parse("3.0.0") <= h5py_version < version.parse("3.7.0"): error = "Indexing arrays must have integer dtypes" with pytest.raises(TypeError) as e: ds["hello"][bool_slice, :] assert error == str(e.value) else: ds["hello"][bool_slice, :] def test_fancy_indexing(tmp_local_netcdf): # regression test for https://github.com/pydata/xarray/issues/7154 with h5netcdf.legacyapi.Dataset(tmp_local_netcdf, "w") as ds: ds.createDimension("x", None) ds.createDimension("y", None) ds.createVariable("hello", int, ("x", "y"), fill_value=0) ds["hello"][:5, :10] = np.arange(5 * 10, dtype="int").reshape((5, 10)) ds.createVariable("hello2", int, ("x", "y")) ds["hello2"][:10, :20] = np.arange(10 * 20, dtype="int").reshape((10, 20)) with legacyapi.Dataset(tmp_local_netcdf, "a") as ds: np.testing.assert_array_equal(ds["hello"][1, [7, 8, 9]], [17, 18, 19]) np.testing.assert_array_equal(ds["hello"][1, [9, 10, 11]], [19, 0, 0]) np.testing.assert_array_equal(ds["hello"][1, slice(9, 12)], [19, 0, 0]) np.testing.assert_array_equal(ds["hello"][[2, 3, 4], 1], [21, 31, 41]) np.testing.assert_array_equal(ds["hello"][[4, 5, 6], 1], [41, 0, 0]) np.testing.assert_array_equal(ds["hello"][slice(4, 7), 1], [41, 0, 0]) def test_h5py_chunking(tmp_local_netcdf): with h5netcdf.File(tmp_local_netcdf, "w") as ds: ds.dimensions = {"x": 10, "y": 10, "z": 10, "t": None} v = ds.create_variable( "hello", ("x", "y", "z", "t"), "float", chunking_heuristic="h5py" ) chunks_h5py = v.chunks ds.resize_dimension("t", 4) v = ds.create_variable( "hello3", ("x", "y", "z", "t"), "float", chunking_heuristic="h5py" ) chunks_resized = v.chunks # cases above should be equivalent to a fixed dimension with appropriate size with h5netcdf.File(tmp_local_netcdf, "w") as ds: ds.dimensions = {"x": 10, "y": 10, "z": 10, "t": 1024} v = ds.create_variable( "hello", ("x", "y", "z", "t"), "float", chunks=True, chunking_heuristic="h5py", ) chunks_true = v.chunks with h5netcdf.File(tmp_local_netcdf, "w") as ds: ds.dimensions = {"x": 10, "y": 10, "z": 10, "t": 4} v = ds.create_variable( "hello", ("x", "y", "z", "t"), "float", chunks=True, chunking_heuristic="h5py", ) chunks_true_resized = v.chunks assert chunks_h5py == chunks_true assert chunks_resized == chunks_true_resized def test_h5netcdf_chunking(tmp_local_netcdf): # produces much smaller chunks for unsized dimensions with h5netcdf.File(tmp_local_netcdf, "w") as ds: ds.dimensions = {"x": 10, "y": 10, "z": 10, "t": None} v = ds.create_variable( "hello", ("x", "y", "z", "t"), "float", chunking_heuristic="h5netcdf" ) chunks_h5netcdf = v.chunks assert chunks_h5netcdf == (10, 10, 10, 1) # should produce chunks > 1 for small fixed dims with h5netcdf.File(tmp_local_netcdf, "w") as ds: ds.dimensions = {"x": 10, "t": None} v = ds.create_variable( "hello", ("x", "t"), "float", chunking_heuristic="h5netcdf" ) chunks_h5netcdf = v.chunks assert chunks_h5netcdf == (10, 128) # resized unlimited dimensions should be treated like fixed dims with h5netcdf.File(tmp_local_netcdf, "w") as ds: ds.dimensions = {"x": 10, "y": 10, "z": 10, "t": None} ds.resize_dimension("t", 10) v = ds.create_variable( "hello", ("x", "y", "z", "t"), "float", chunking_heuristic="h5netcdf" ) chunks_h5netcdf = v.chunks assert chunks_h5netcdf == (5, 5, 5, 10) def test_create_invalid_netcdf_catch_error(tmp_local_netcdf): # see https://github.com/h5netcdf/h5netcdf/issues/138 with h5netcdf.File(tmp_local_netcdf, "w") as f: try: f.create_variable("test", ("x", "y"), data=np.ones((10, 10), dtype="bool")) except CompatibilityError: pass assert repr(f.dimensions) == "" def test_dimensions_in_parent_groups(tmpdir): with netCDF4.Dataset(tmpdir.join("test_netcdf.nc"), mode="w") as ds: ds0 = ds for i in range(10): ds = ds.createGroup(f"group{i:02d}") ds0.createDimension("x", 10) ds0.createDimension("y", 20) ds0["group00"].createVariable("test", float, ("x", "y")) var = ds0["group00"].createVariable("x", float, ("x", "y")) var[:] = np.ones((10, 20)) with legacyapi.Dataset(tmpdir.join("test_legacy.nc"), mode="w") as ds: ds0 = ds for i in range(10): ds = ds.createGroup(f"group{i:02d}") ds0.createDimension("x", 10) ds0.createDimension("y", 20) ds0["group00"].createVariable("test", float, ("x", "y")) var = ds0["group00"].createVariable("x", float, ("x", "y")) var[:] = np.ones((10, 20)) with h5netcdf.File(tmpdir.join("test_netcdf.nc"), mode="r") as ds0: with h5netcdf.File(tmpdir.join("test_legacy.nc"), mode="r") as ds1: assert repr(ds0.dimensions["x"]) == repr(ds1.dimensions["x"]) assert repr(ds0.dimensions["y"]) == repr(ds1.dimensions["y"]) assert repr(ds0["group00"]) == repr(ds1["group00"]) assert repr(ds0["group00"]["test"]) == repr(ds1["group00"]["test"]) assert repr(ds0["group00"]["x"]) == repr(ds1["group00"]["x"]) def test_array_attributes(tmp_local_netcdf): with h5netcdf.File(tmp_local_netcdf, "w") as ds: dt = h5py.string_dtype("utf-8") unicode = "unicodé" ds.attrs["unicode"] = unicode ds.attrs["unicode_0dim"] = np.array(unicode, dtype=dt) ds.attrs["unicode_1dim"] = np.array([unicode], dtype=dt) ds.attrs["unicode_arrary"] = np.array([unicode, "foobár"], dtype=dt) ds.attrs["unicode_list"] = [unicode] dt = h5py.string_dtype("ascii") # if dtype is ascii it's irrelevant if the data is provided as bytes or string ascii = "ascii" ds.attrs["ascii"] = ascii ds.attrs["ascii_0dim"] = np.array(ascii, dtype=dt) ds.attrs["ascii_1dim"] = np.array([ascii], dtype=dt) ds.attrs["ascii_array"] = np.array([ascii, "foobar"], dtype=dt) ds.attrs["ascii_list"] = [ascii] ascii = b"ascii" ds.attrs["bytes"] = ascii ds.attrs["bytes_0dim"] = np.array(ascii, dtype=dt) ds.attrs["bytes_1dim"] = np.array([ascii], dtype=dt) ds.attrs["bytes_array"] = np.array([ascii, b"foobar"], dtype=dt) ds.attrs["bytes_list"] = [ascii] dt = h5py.string_dtype("utf-8", 10) # unicode needs to be encoded properly for fixed size string type ds.attrs["unicode_fixed"] = np.array(unicode.encode("utf-8"), dtype=dt) ds.attrs["unicode_fixed_0dim"] = np.array(unicode.encode("utf-8"), dtype=dt) ds.attrs["unicode_fixed_1dim"] = np.array([unicode.encode("utf-8")], dtype=dt) ds.attrs["unicode_fixed_arrary"] = np.array( [unicode.encode("utf-8"), "foobár".encode()], dtype=dt ) dt = h5py.string_dtype("ascii", 10) ascii = "ascii" ds.attrs["ascii_fixed"] = np.array(ascii, dtype=dt) ds.attrs["ascii_fixed_0dim"] = np.array(ascii, dtype=dt) ds.attrs["ascii_fixed_1dim"] = np.array([ascii], dtype=dt) ds.attrs["ascii_fixed_array"] = np.array([ascii, "foobar"], dtype=dt) ascii = b"ascii" ds.attrs["bytes_fixed"] = np.array(ascii, dtype=dt) ds.attrs["bytes_fixed_0dim"] = np.array(ascii, dtype=dt) ds.attrs["bytes_fixed_1dim"] = np.array([ascii], dtype=dt) ds.attrs["bytes_fixed_array"] = np.array([ascii, b"foobar"], dtype=dt) ds.attrs["int"] = 1 ds.attrs["intlist"] = [1] ds.attrs["int_array"] = np.arange(10) ds.attrs["empty_list"] = [] ds.attrs["empty_array"] = np.array([]) with h5netcdf.File(tmp_local_netcdf, mode="r") as ds: assert ds.attrs["unicode"] == unicode assert ds.attrs["unicode_0dim"] == unicode assert ds.attrs["unicode_1dim"] == unicode assert ds.attrs["unicode_arrary"] == [unicode, "foobár"] assert ds.attrs["unicode_list"] == unicode # bytes and strings are received as strings for h5py3 ascii = "ascii" foobar = "foobar" assert ds.attrs["ascii"] == "ascii" assert ds.attrs["ascii_0dim"] == ascii assert ds.attrs["ascii_1dim"] == ascii assert ds.attrs["ascii_array"] == [ascii, foobar] assert ds.attrs["ascii_list"] == "ascii" assert ds.attrs["bytes"] == ascii assert ds.attrs["bytes_0dim"] == ascii assert ds.attrs["bytes_1dim"] == ascii assert ds.attrs["bytes_array"] == [ascii, foobar] assert ds.attrs["bytes_list"] == "ascii" assert ds.attrs["unicode_fixed"] == unicode assert ds.attrs["unicode_fixed_0dim"] == unicode assert ds.attrs["unicode_fixed_1dim"] == unicode assert ds.attrs["unicode_fixed_arrary"] == [unicode, "foobár"] ascii = "ascii" assert ds.attrs["ascii_fixed"] == ascii assert ds.attrs["ascii_fixed_0dim"] == ascii assert ds.attrs["ascii_fixed_1dim"] == ascii assert ds.attrs["ascii_fixed_array"] == [ascii, "foobar"] assert ds.attrs["bytes_fixed"] == ascii assert ds.attrs["bytes_fixed_0dim"] == ascii assert ds.attrs["bytes_fixed_1dim"] == ascii assert ds.attrs["bytes_fixed_array"] == [ascii, "foobar"] assert ds.attrs["int"] == 1 assert ds.attrs["intlist"] == 1 np.testing.assert_equal(ds.attrs["int_array"], np.arange(10)) np.testing.assert_equal(ds.attrs["empty_list"], np.array([])) np.testing.assert_equal(ds.attrs["empty_array"], np.array([])) with legacyapi.Dataset(tmp_local_netcdf, mode="r") as ds: assert ds.unicode == unicode assert ds.unicode_0dim == unicode assert ds.unicode_1dim == unicode assert ds.unicode_arrary == [unicode, "foobár"] assert ds.unicode_list == unicode # bytes and strings are received as strings for h5py3 ascii = "ascii" foobar = "foobar" assert ds.ascii == "ascii" assert ds.ascii_0dim == ascii assert ds.ascii_1dim == ascii assert ds.ascii_array == [ascii, foobar] assert ds.ascii_list == "ascii" assert ds.bytes == ascii assert ds.bytes_0dim == ascii assert ds.bytes_1dim == ascii assert ds.bytes_array == [ascii, foobar] assert ds.bytes_list == "ascii" assert ds.unicode_fixed == unicode assert ds.unicode_fixed_0dim == unicode assert ds.unicode_fixed_1dim == unicode assert ds.unicode_fixed_arrary == [unicode, "foobár"] ascii = "ascii" assert ds.ascii_fixed == ascii assert ds.ascii_fixed_0dim == ascii assert ds.ascii_fixed_1dim == ascii assert ds.ascii_fixed_array == [ascii, "foobar"] assert ds.bytes_fixed == ascii assert ds.bytes_fixed_0dim == ascii assert ds.bytes_fixed_1dim == ascii assert ds.bytes_fixed_array == [ascii, "foobar"] assert ds.int == 1 assert ds.intlist == 1 np.testing.assert_equal(ds.int_array, np.arange(10)) np.testing.assert_equal(ds.attrs["empty_list"], np.array([])) np.testing.assert_equal(ds.attrs["empty_array"], np.array([])) with netCDF4.Dataset(tmp_local_netcdf, mode="r") as ds: assert ds.unicode == unicode assert ds.unicode_0dim == unicode assert ds.unicode_1dim == unicode assert ds.unicode_arrary == [unicode, "foobár"] assert ds.unicode_list == unicode ascii = "ascii" assert ds.ascii == ascii assert ds.ascii_0dim == ascii assert ds.ascii_1dim == ascii assert ds.ascii_array == [ascii, "foobar"] assert ds.ascii_list == ascii assert ds.bytes == ascii assert ds.bytes_0dim == ascii assert ds.bytes_1dim == ascii assert ds.bytes_array == [ascii, "foobar"] assert ds.bytes_list == ascii assert ds.unicode_fixed == unicode assert ds.unicode_fixed_0dim == unicode assert ds.unicode_fixed_1dim == unicode assert ds.unicode_fixed_arrary == [unicode, "foobár"] assert ds.ascii_fixed == ascii assert ds.ascii_fixed_0dim == ascii assert ds.ascii_fixed_1dim == ascii assert ds.ascii_fixed_array == [ascii, "foobar"] assert ds.bytes_fixed == ascii assert ds.bytes_fixed_0dim == ascii assert ds.bytes_fixed_1dim == ascii assert ds.bytes_fixed_array == [ascii, "foobar"] assert ds.int == 1 assert ds.intlist == 1 np.testing.assert_equal(ds.int_array, np.arange(10)) np.testing.assert_equal(ds.empty_list, np.array([])) np.testing.assert_equal(ds.empty_array, np.array([])) @pytest.mark.skipif( version.parse(h5py.__version__) < version.parse("3.7.0"), reason="does not work with h5py < 3.7.0", ) def test_vlen_string_dataset_fillvalue(tmp_local_netcdf, decode_vlen_strings): # check _FillValue for VLEN string datasets # only works for h5py >= 3.7.0 # first with new API with h5netcdf.File(tmp_local_netcdf, "w") as ds: ds.dimensions = {"string": 10} dt0 = h5py.string_dtype() fill_value0 = "bár" ds.create_variable("x0", ("string",), dtype=dt0, fillvalue=fill_value0) dt1 = h5py.string_dtype("ascii") fill_value1 = "bar" ds.create_variable("x1", ("string",), dtype=dt1, fillvalue=fill_value1) # check, if new API can read them with h5netcdf.File(tmp_local_netcdf, "r", **decode_vlen_strings) as ds: decode_vlen = decode_vlen_strings["decode_vlen_strings"] fvalue0 = fill_value0 if decode_vlen else fill_value0.encode("utf-8") fvalue1 = fill_value1 if decode_vlen else fill_value1.encode("utf-8") assert ds["x0"][0] == fvalue0 assert ds["x0"].attrs["_FillValue"] == fill_value0 assert ds["x1"][0] == fvalue1 assert ds["x1"].attrs["_FillValue"] == fill_value1 # check if legacyapi can read them with legacyapi.Dataset(tmp_local_netcdf, "r") as ds: assert ds["x0"][0] == fill_value0 assert ds["x0"]._FillValue == fill_value0 assert ds["x1"][0] == fill_value1 assert ds["x1"]._FillValue == fill_value1 # check if netCDF4-python can read them with netCDF4.Dataset(tmp_local_netcdf, "r") as ds: assert ds["x0"][0] == fill_value0 assert ds["x0"]._FillValue == fill_value0 assert ds["x1"][0] == fill_value1 assert ds["x1"]._FillValue == fill_value1 # second with legacyapi with legacyapi.Dataset(tmp_local_netcdf, "w") as ds: ds.createDimension("string", 10) fill_value0 = "bár" ds.createVariable("x0", str, ("string",), fill_value=fill_value0) fill_value1 = "bar" ds.createVariable("x1", str, ("string",), fill_value=fill_value1) # check if new API can read them with h5netcdf.File(tmp_local_netcdf, "r", **decode_vlen_strings) as ds: decode_vlen = decode_vlen_strings["decode_vlen_strings"] fvalue0 = fill_value0 if decode_vlen else fill_value0.encode("utf-8") fvalue1 = fill_value1 if decode_vlen else fill_value1.encode("utf-8") assert ds["x0"][0] == fvalue0 assert ds["x0"].attrs["_FillValue"] == fill_value0 assert ds["x1"][0] == fvalue1 assert ds["x1"].attrs["_FillValue"] == fill_value1 # check if legacyapi can read them with legacyapi.Dataset(tmp_local_netcdf, "r") as ds: assert ds["x0"][0] == fill_value0 assert ds["x0"]._FillValue == fill_value0 assert ds["x1"][0] == fill_value1 assert ds["x1"]._FillValue == fill_value1 # check if netCDF4-python can read them with netCDF4.Dataset(tmp_local_netcdf, "r") as ds: assert ds["x0"][0] == fill_value0 assert ds["x0"]._FillValue == fill_value0 assert ds["x1"][0] == fill_value1 assert ds["x1"]._FillValue == fill_value1 @pytest.mark.skipif( "ros3" not in h5py.registered_drivers(), reason="ros3 not available" ) def test_ros3(): fname = ( "https://www.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc" ) f = h5netcdf.File(fname, "r", driver="ros3") assert "Temperature" in list(f) f.close() ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/h5netcdf/utils.py0000644000175100001770000000124214522513274016036 0ustar00runnerdockerfrom collections.abc import Mapping class Frozen(Mapping): """Wrapper around an object implementing the mapping interface to make it immutable. If you really want to modify the mapping, the mutable version is saved under the `_mapping` attribute. """ def __init__(self, mapping): self._mapping = mapping def __getitem__(self, key): return self._mapping[key] def __iter__(self): return iter(self._mapping) def __len__(self): return len(self._mapping) def __contains__(self, key): return key in self._mapping def __repr__(self): return f"{type(self).__name__}({self._mapping!r})" ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1699387091.3380637 h5netcdf-1.3.0/h5netcdf.egg-info/0000755000175100001770000000000014522513323016012 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387091.0 h5netcdf-1.3.0/h5netcdf.egg-info/PKG-INFO0000644000175100001770000003211714522513323017113 0ustar00runnerdockerMetadata-Version: 2.1 Name: h5netcdf Version: 1.3.0 Summary: netCDF4 via h5py Author-email: Stephan Hoyer , Kai Mühlbauer Maintainer-email: h5netcdf developers License: Copyright (c) 2015, h5netcdf developers All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Project-URL: homepage, https://h5netcdf.org Project-URL: documentation, https://h5netcdf.org Project-URL: repository, https://github.com/h5netcdf/h5netcdf Project-URL: changelog, https://github.com/h5netcdf/h5netcdf/blob/main/CHANGELOG.rst Classifier: Development Status :: 5 - Production/Stable Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Intended Audience :: Science/Research Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Topic :: Scientific/Engineering Requires-Python: >=3.9 Description-Content-Type: text/x-rst License-File: LICENSE License-File: AUTHORS.txt Requires-Dist: h5py Requires-Dist: packaging Provides-Extra: test Requires-Dist: netCDF4; extra == "test" Requires-Dist: pytest; extra == "test" h5netcdf ======== .. image:: https://github.com/h5netcdf/h5netcdf/workflows/CI/badge.svg :target: https://github.com/h5netcdf/h5netcdf/actions .. image:: https://badge.fury.io/py/h5netcdf.svg :target: https://pypi.org/project/h5netcdf/ .. image:: https://github.com/h5netcdf/h5netcdf/actions/workflows/pages/pages-build-deployment/badge.svg?branch=gh-pages :target: https://h5netcdf.github.io/h5netcdf/ A Python interface for the `netCDF4`_ file-format that reads and writes local or remote HDF5 files directly via `h5py`_ or `h5pyd`_, without relying on the Unidata netCDF library. .. _netCDF4: https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html#netcdf_4_spec .. _h5py: https://www.h5py.org/ .. _h5pyd: https://github.com/HDFGroup/h5pyd .. why-h5netcdf Why h5netcdf? ------------- - It has one less binary dependency (netCDF C). If you already have h5py installed, reading netCDF4 with h5netcdf may be much easier than installing netCDF4-Python. - We've seen occasional reports of better performance with h5py than netCDF4-python, though in many cases performance is identical. For `one workflow`_, h5netcdf was reported to be almost **4x faster** than `netCDF4-python`_. - Anecdotally, HDF5 users seem to be unexcited about switching to netCDF -- hopefully this will convince them that netCDF4 is actually quite sane! - Finally, side-stepping the netCDF C library (and Cython bindings to it) gives us an easier way to identify the source of performance issues and bugs in the netCDF libraries/specification. .. _one workflow: https://github.com/Unidata/netcdf4-python/issues/390#issuecomment-93864839 .. _xarray: https://github.com/pydata/xarray/ Install ------- Ensure you have a recent version of h5py installed (I recommend using `conda`_ or the community effort `conda-forge`_). At least version 3.0 is required. Then:: $ pip install h5netcdf Or if you are already using conda:: $ conda install h5netcdf Note: From version 1.2. h5netcdf tries to align with a `nep29`_-like support policy with regard to it's upstream dependencies. .. _conda: https://conda.io/ .. _conda-forge: https://conda-forge.org/ .. _nep29: https://numpy.org/neps/nep-0029-deprecation_policy.html Usage ----- h5netcdf has two APIs, a new API and a legacy API. Both interfaces currently reproduce most of the features of the netCDF interface, with the notable exception of support for operations that rename or delete existing objects. We simply haven't gotten around to implementing this yet. Patches would be very welcome. New API ~~~~~~~ The new API supports direct hierarchical access of variables and groups. Its design is an adaptation of h5py to the netCDF data model. For example: .. code-block:: python import h5netcdf import numpy as np with h5netcdf.File('mydata.nc', 'w') as f: # set dimensions with a dictionary f.dimensions = {'x': 5} # and update them with a dict-like interface # f.dimensions['x'] = 5 # f.dimensions.update({'x': 5}) v = f.create_variable('hello', ('x',), float) v[:] = np.ones(5) # you don't need to create groups first # you also don't need to create dimensions first if you supply data # with the new variable v = f.create_variable('/grouped/data', ('y',), data=np.arange(10)) # access and modify attributes with a dict-like interface v.attrs['foo'] = 'bar' # you can access variables and groups directly using a hierarchical # keys like h5py print(f['/grouped/data']) # add an unlimited dimension f.dimensions['z'] = None # explicitly resize a dimension and all variables using it f.resize_dimension('z', 3) Notes: - Automatic resizing of unlimited dimensions with array indexing is not available. - Dimensions need to be manually resized with ``Group.resize_dimension(dimension, size)``. - Arrays are returned padded with ``fillvalue`` (taken from underlying hdf5 dataset) up to current size of variable's dimensions. The behaviour is equivalent to netCDF4-python's ``Dataset.set_auto_mask(False)``. Legacy API ~~~~~~~~~~ The legacy API is designed for compatibility with `netCDF4-python`_. To use it, import ``h5netcdf.legacyapi``: .. _netCDF4-python: https://github.com/Unidata/netcdf4-python .. code-block:: python import h5netcdf.legacyapi as netCDF4 # everything here would also work with this instead: # import netCDF4 import numpy as np with netCDF4.Dataset('mydata.nc', 'w') as ds: ds.createDimension('x', 5) v = ds.createVariable('hello', float, ('x',)) v[:] = np.ones(5) g = ds.createGroup('grouped') g.createDimension('y', 10) g.createVariable('data', 'i8', ('y',)) v = g['data'] v[:] = np.arange(10) v.foo = 'bar' print(ds.groups['grouped'].variables['data']) The legacy API is designed to be easy to try-out for netCDF4-python users, but it is not an exact match. Here is an incomplete list of functionality we don't include: - Utility functions ``chartostring``, ``num2date``, etc., that are not directly necessary for writing netCDF files. - h5netcdf variables do not support automatic masking or scaling (e.g., of values matching the ``_FillValue`` attribute). We prefer to leave this functionality to client libraries (e.g., `xarray`_), which can implement their exact desired scaling behavior. Nevertheless arrays are returned padded with ``fillvalue`` (taken from underlying hdf5 dataset) up to current size of variable's dimensions. The behaviour is equivalent to netCDF4-python's ``Dataset.set_auto_mask(False)``. .. _invalid netcdf: Invalid netCDF files ~~~~~~~~~~~~~~~~~~~~ h5py implements some features that do not (yet) result in valid netCDF files: - Data types: - Booleans - Complex values - Non-string variable length types - Enum types - Reference types - Arbitrary filters: - Scale-offset filters By default [#]_, h5netcdf will not allow writing files using any of these features, as files with such features are not readable by other netCDF tools. However, these are still valid HDF5 files. If you don't care about netCDF compatibility, you can use these features by setting ``invalid_netcdf=True`` when creating a file: .. code-block:: python # avoid the .nc extension for non-netcdf files f = h5netcdf.File('mydata.h5', invalid_netcdf=True) ... # works with the legacy API, too, though compression options are not exposed ds = h5netcdf.legacyapi.Dataset('mydata.h5', invalid_netcdf=True) ... In such cases the `_NCProperties` attribute will not be saved to the file or be removed from an existing file. A warning will be issued if the file has `.nc`-extension. .. rubric:: Footnotes .. [#] h5netcdf we will raise ``h5netcdf.CompatibilityError``. Decoding variable length strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ h5py 3.0 introduced `new behavior`_ for handling variable length string. Instead of being automatically decoded with UTF-8 into NumPy arrays of ``str``, they are required as arrays of ``bytes``. The legacy API preserves the old behavior of h5py (which matches netCDF4), and automatically decodes strings. The new API matches h5py behavior. Explicitly set ``decode_vlen_strings=True`` in the ``h5netcdf.File`` constructor to opt-in to automatic decoding. .. _new behavior: https://docs.h5py.org/en/stable/strings.html .. _phony dims: Datasets with missing dimension scales ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By default [#]_ h5netcdf raises a ``ValueError`` if variables with no dimension scale associated with one of their axes are accessed. You can set ``phony_dims='sort'`` when opening a file to let h5netcdf invent phony dimensions according to `netCDF`_ behaviour. .. code-block:: python # mimic netCDF-behaviour for non-netcdf files f = h5netcdf.File('mydata.h5', mode='r', phony_dims='sort') ... Note, that this iterates once over the whole group-hierarchy. This has affects on performance in case you rely on laziness of group access. You can set ``phony_dims='access'`` instead to defer phony dimension creation to group access time. The created phony dimension naming will differ from `netCDF`_ behaviour. .. code-block:: python f = h5netcdf.File('mydata.h5', mode='r', phony_dims='access') ... .. rubric:: Footnotes .. [#] Keyword default setting ``phony_dims=None`` for backwards compatibility. .. _netCDF: https://docs.unidata.ucar.edu/netcdf-c/current/interoperability_hdf5.html Track Order ~~~~~~~~~~~ As of h5netcdf 1.1.0, if h5py 3.7.0 or greater is detected, the ``track_order`` parameter is set to ``True`` enabling `order tracking`_ for newly created netCDF4 files. This helps ensure that files created with the h5netcdf library can be modified by the netCDF4-c and netCDF4-python implementation used in other software stacks. Since this change should be transparent to most users, it was made without deprecation. Since track_order is set at creation time, any dataset that was created with ``track_order=False`` (h5netcdf version 1.0.2 and older except for 0.13.0) will continue to opened with order tracker disabled. The following describes the behavior of h5netcdf with respect to order tracking for a few key versions: - Version 0.12.0 and earlier, the ``track_order`` parameter`order was missing and thus order tracking was implicitely set to ``False``. - Version 0.13.0 enabled order tracking by setting the parameter ``track_order`` to ``True`` by default without deprecation. - Versions 0.13.1 to 1.0.2 set ``track_order`` to ``False`` due to a bug in a core dependency of h5netcdf, h5py `upstream bug`_ which was resolved in h5py 3.7.0 with the help of the h5netcdf team. - In version 1.1.0, if h5py 3.7.0 or above is detected, the ``track_order`` parameter is set to ``True`` by default. .. _order tracking: https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html#creation_order .. _upstream bug: https://github.com/h5netcdf/h5netcdf/issues/136 .. _[*]: https://github.com/h5netcdf/h5netcdf/issues/128 .. changelog Changelog --------- `Changelog`_ .. _Changelog: https://github.com/h5netcdf/h5netcdf/blob/main/CHANGELOG.rst .. license License ------- `3-clause BSD`_ .. _3-clause BSD: https://github.com/h5netcdf/h5netcdf/blob/main/LICENSE ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387091.0 h5netcdf-1.3.0/h5netcdf.egg-info/SOURCES.txt0000644000175100001770000000117614522513323017703 0ustar00runnerdocker.pre-commit-config.yaml AUTHORS.txt CHANGELOG.rst LICENSE README.rst pyproject.toml doc/Makefile doc/api.rst doc/changelog.rst doc/conf.py doc/devguide.rst doc/feature.rst doc/index.rst doc/legacyapi.rst h5netcdf/__init__.py h5netcdf/_version.py h5netcdf/attrs.py h5netcdf/core.py h5netcdf/dimensions.py h5netcdf/legacyapi.py h5netcdf/utils.py h5netcdf.egg-info/PKG-INFO h5netcdf.egg-info/SOURCES.txt h5netcdf.egg-info/dependency_links.txt h5netcdf.egg-info/requires.txt h5netcdf.egg-info/top_level.txt h5netcdf/tests/conftest.py h5netcdf/tests/pytest.ini h5netcdf/tests/test_h5netcdf.py licenses/H5PY_LICENSE.txt licenses/PSF_LICENSE.txt././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387091.0 h5netcdf-1.3.0/h5netcdf.egg-info/dependency_links.txt0000644000175100001770000000000114522513323022060 0ustar00runnerdocker ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387091.0 h5netcdf-1.3.0/h5netcdf.egg-info/requires.txt0000644000175100001770000000004614522513323020412 0ustar00runnerdockerh5py packaging [test] netCDF4 pytest ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387091.0 h5netcdf-1.3.0/h5netcdf.egg-info/top_level.txt0000644000175100001770000000001114522513323020534 0ustar00runnerdockerh5netcdf ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1699387091.3380637 h5netcdf-1.3.0/licenses/0000755000175100001770000000000014522513323014425 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/licenses/H5PY_LICENSE.txt0000644000175100001770000000276014522513274017067 0ustar00runnerdockerCopyright (c) 2008 Andrew Collette and contributors All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/licenses/PSF_LICENSE.txt0000644000175100001770000003073114522513274016771 0ustar00runnerdockerA. HISTORY OF THE SOFTWARE ========================== Python was created in the early 1990s by Guido van Rossum at Stichting Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands as a successor of a language called ABC. Guido remains Python's principal author, although it includes many contributions from others. In 1995, Guido continued his work on Python at the Corporation for National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) in Reston, Virginia where he released several versions of the software. In May 2000, Guido and the Python core development team moved to BeOpen.com to form the BeOpen PythonLabs team. In October of the same year, the PythonLabs team moved to Digital Creations (now Zope Corporation, see http://www.zope.com). In 2001, the Python Software Foundation (PSF, see http://www.python.org/psf/) was formed, a non-profit organization created specifically to own Python-related Intellectual Property. Zope Corporation is a sponsoring member of the PSF. All Python releases are Open Source (see http://www.opensource.org for the Open Source Definition). Historically, most, but not all, Python releases have also been GPL-compatible; the table below summarizes the various releases. Release Derived Year Owner GPL- from compatible? (1) 0.9.0 thru 1.2 1991-1995 CWI yes 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes 1.6 1.5.2 2000 CNRI no 2.0 1.6 2000 BeOpen.com no 1.6.1 1.6 2001 CNRI yes (2) 2.1 2.0+1.6.1 2001 PSF no 2.0.1 2.0+1.6.1 2001 PSF yes 2.1.1 2.1+2.0.1 2001 PSF yes 2.1.2 2.1.1 2002 PSF yes 2.1.3 2.1.2 2002 PSF yes 2.2 and above 2.1.1 2001-now PSF yes Footnotes: (1) GPL-compatible doesn't mean that we're distributing Python under the GPL. All Python licenses, unlike the GPL, let you distribute a modified version without making your changes open source. The GPL-compatible licenses make it possible to combine Python with other software that is released under the GPL; the others don't. (2) According to Richard Stallman, 1.6.1 is not GPL-compatible, because its license has a choice of law clause. According to CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 is "not incompatible" with the GPL. Thanks to the many outside volunteers who have worked under Guido's direction to make these releases possible. B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON =============================================================== PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 -------------------------------------------- 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Python Software Foundation; All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python. 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 ------------------------------------------- BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the Individual or Organization ("Licensee") accessing and otherwise using this software in source or binary form and its associated documentation ("the Software"). 2. Subject to the terms and conditions of this BeOpen Python License Agreement, BeOpen hereby grants Licensee a non-exclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use the Software alone or in any derivative version, provided, however, that the BeOpen Python License is retained in the Software, alone or in any derivative version prepared by Licensee. 3. BeOpen is making the Software available to Licensee on an "AS IS" basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 5. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 6. This License Agreement shall be governed by and interpreted in all respects by the law of the State of California, excluding conflict of law provisions. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between BeOpen and Licensee. This License Agreement does not grant permission to use BeOpen trademarks or trade names in a trademark sense to endorse or promote products or services of Licensee, or any third party. As an exception, the "BeOpen Python" logos available at http://www.pythonlabs.com/logos.html may be used according to the permissions granted on that web page. 7. By copying, installing or otherwise using the software, Licensee agrees to be bound by the terms and conditions of this License Agreement. CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 --------------------------------------- 1. This LICENSE AGREEMENT is between the Corporation for National Research Initiatives, having an office at 1895 Preston White Drive, Reston, VA 20191 ("CNRI"), and the Individual or Organization ("Licensee") accessing and otherwise using Python 1.6.1 software in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, CNRI hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python 1.6.1 alone or in any derivative version, provided, however, that CNRI's License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) 1995-2001 Corporation for National Research Initiatives; All Rights Reserved" are retained in Python 1.6.1 alone or in any derivative version prepared by Licensee. Alternately, in lieu of CNRI's License Agreement, Licensee may substitute the following text (omitting the quotes): "Python 1.6.1 is made available subject to the terms and conditions in CNRI's License Agreement. This Agreement together with Python 1.6.1 may be located on the Internet using the following unique, persistent identifier (known as a handle): 1895.22/1013. This Agreement may also be obtained from a proxy server on the Internet using the following URL: http://hdl.handle.net/1895.22/1013". 3. In the event Licensee prepares a derivative work that is based on or incorporates Python 1.6.1 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python 1.6.1. 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. This License Agreement shall be governed by the federal intellectual property law of the United States, including without limitation the federal copyright law, and, to the extent such U.S. federal law does not apply, by the law of the Commonwealth of Virginia, excluding Virginia's conflict of law provisions. Notwithstanding the foregoing, with regard to derivative works based on Python 1.6.1 that incorporate non-separable material that was previously distributed under the GNU General Public License (GPL), the law of the Commonwealth of Virginia shall govern this License Agreement only as to issues arising under or with respect to Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between CNRI and Licensee. This License Agreement does not grant permission to use CNRI trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By clicking on the "ACCEPT" button where indicated, or by copying, installing or otherwise using Python 1.6.1, Licensee agrees to be bound by the terms and conditions of this License Agreement. ACCEPT CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 -------------------------------------------------- Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, The Netherlands. All rights reserved. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation, and that the name of Stichting Mathematisch Centrum or CWI not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1699387068.0 h5netcdf-1.3.0/pyproject.toml0000644000175100001770000000401714522513274015543 0ustar00runnerdocker[build-system] requires = [ "setuptools>=42", "wheel", "setuptools_scm[toml]>=7.0", ] build-backend = "setuptools.build_meta" [project] name = "h5netcdf" description = "netCDF4 via h5py" requires-python = ">=3.9" license = {file = "LICENSE"} authors = [ { name = "Stephan Hoyer", email = "shoyer@gmail.com" }, { name = "Kai Mühlbauer", email = "kmuehlbauer@wradlib.org" }, ] maintainers = [ { name = "h5netcdf developers", email = "devteam@h5netcdf.org" } ] classifiers=[ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Intended Audience :: Science/Research", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering", ] dependencies = ["h5py", "packaging"] dynamic = ["version", "readme"] [project.urls] homepage = "https://h5netcdf.org" documentation = "https://h5netcdf.org" repository = "https://github.com/h5netcdf/h5netcdf" changelog = "https://github.com/h5netcdf/h5netcdf/blob/main/CHANGELOG.rst" [project.optional-dependencies] test = ["netCDF4", "pytest"] [tool.setuptools.dynamic] readme = { file = ["README.rst"]} [tool.setuptools] packages = ["h5netcdf", "h5netcdf.tests"] [tool.setuptools_scm] write_to = "h5netcdf/_version.py" version_scheme = "release-branch-semver" fallback_version = "999" [tool.black] target-version = ["py39"] line-length = 88 [tool.ruff] target-version = "py39" builtins = ["ellipsis"] exclude = [ ".eggs", "doc", ] # E402: module level import not at top of file # E501: line too long - let black worry about that # E731: do not assign a lambda expression, use a def ignore = [ "E402", "E501", "E731", ] select = [ # Pyflakes "F", # Pycodestyle "E", "W", # isort "I", # Pyupgrade "UP", ] [tool.ruff.isort] known-first-party = ["h5netcdf"] ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1699387091.342064 h5netcdf-1.3.0/setup.cfg0000644000175100001770000000004614522513323014441 0ustar00runnerdocker[egg_info] tag_build = tag_date = 0